File Coverage

blib/lib/GraphViz2/Marpa.pm
Criterion Covered Total %
statement 33 382 8.6
branch 0 146 0.0
condition 0 14 0.0
subroutine 11 36 30.5
pod 9 10 90.0
total 53 588 9.0


line stmt bran cond sub pod time code
1             package GraphViz2::Marpa;
2              
3 2     2   1728 use strict;
  2         2  
  2         44  
4 2     2   7 use warnings;
  2         2  
  2         55  
5 2     2   6 use warnings qw(FATAL utf8); # Fatalize encoding glitches.
  2         4  
  2         82  
6              
7 2     2   767 use GraphViz2::Marpa::Renderer::Graphviz;
  2         6  
  2         14  
8              
9 2     2   63 use Log::Handler;
  2         4  
  2         19  
10              
11 2     2   971 use Marpa::R2;
  2         188942  
  2         32  
12              
13 2     2   72 use Moo;
  2         3  
  2         15  
14              
15 2     2   1823 use Path::Tiny; # For path().
  2         15771  
  2         106  
16              
17 2     2   1333 use Tree::DAG_Node;
  2         26848  
  2         23  
18              
19 2     2   69 use Types::Standard qw/Any ArrayRef Int HashRef Str/;
  2         2  
  2         19  
20              
21 2     2   1449 use Try::Tiny;
  2         4  
  2         7648  
22              
23             has bnf =>
24             (
25             default => sub{return ''},
26             is => 'rw',
27             isa => Any,
28             required => 0,
29             );
30              
31             has bnf4html =>
32             (
33             default => sub{return ''},
34             is => 'rw',
35             isa => Any,
36             required => 0,
37             );
38              
39             has brace_count =>
40             (
41             default => sub{return 0},
42             is => 'rw',
43             isa => Int,
44             required => 0,
45             );
46              
47             has description =>
48             (
49             default => sub{return ''},
50             is => 'rw',
51             isa => Str,
52             required => 0,
53             );
54              
55             has grammar =>
56             (
57             default => sub {return ''},
58             is => 'rw',
59             isa => Any,
60             required => 0,
61             );
62              
63             has grammar4html =>
64             (
65             default => sub {return ''},
66             is => 'rw',
67             isa => Any,
68             required => 0,
69             );
70              
71             has graph_text =>
72             (
73             default => sub{return ''},
74             is => 'rw',
75             isa => Str,
76             required => 0,
77             );
78              
79             has input_file =>
80             (
81             default => sub{return ''},
82             is => 'rw',
83             isa => Str,
84             required => 0,
85             );
86              
87             has known_events =>
88             (
89             default => sub{return {} },
90             is => 'rw',
91             isa => HashRef,
92             required => 0,
93             );
94              
95             has logger =>
96             (
97             default => sub{return undef},
98             is => 'rw',
99             isa => Any,
100             required => 0,
101             );
102              
103             has maxlevel =>
104             (
105             default => sub{return 'notice'},
106             is => 'rw',
107             isa => Str,
108             required => 0,
109             );
110              
111             has minlevel =>
112             (
113             default => sub{return 'error'},
114             is => 'rw',
115             isa => Str,
116             required => 0,
117             );
118              
119             has output_file =>
120             (
121             default => sub{return ''},
122             is => 'rw',
123             isa => Str,
124             required => 0,
125             );
126              
127             has recce =>
128             (
129             default => sub{return ''},
130             is => 'rw',
131             isa => Any,
132             required => 0,
133             );
134              
135             has recce4html =>
136             (
137             default => sub{return ''},
138             is => 'rw',
139             isa => Any,
140             required => 0,
141             );
142              
143             has renderer =>
144             (
145             default => sub{return ''},
146             is => 'rw',
147             isa => Any,
148             required => 0,
149             );
150              
151             has stack =>
152             (
153             default => sub{return []},
154             is => 'rw',
155             isa => ArrayRef,
156             required => 0,
157             );
158              
159             has trace_terminals =>
160             (
161             default => sub{return 0},
162             is => 'rw',
163             isa => Int,
164             required => 0,
165             );
166              
167             has tree =>
168             (
169             default => sub{return ''},
170             is => 'rw',
171             isa => Any,
172             required => 0,
173             );
174              
175             has uid =>
176             (
177             default => sub{return 0},
178             is => 'rw',
179             isa => Int,
180             required => 0,
181             );
182              
183             our $VERSION = '2.10';
184              
185             # ------------------------------------------------
186              
187             sub BUILD
188             {
189 0     0 0   my($self) = @_;
190              
191 0 0         if (! defined $self -> logger)
192             {
193 0           $self -> logger(Log::Handler -> new);
194 0           $self -> logger -> add
195             (
196             screen =>
197             {
198             maxlevel => $self -> maxlevel,
199             message_layout => '%m',
200             minlevel => $self -> minlevel,
201             }
202             );
203             }
204              
205             # Policy: Event names are always the same as the name of the corresponding lexeme.
206              
207             $self -> bnf
208             (
209             <<'END_OF_GRAMMAR'
210              
211             :default ::= action => [values]
212              
213             lexeme default = latm => 1 # Longest Acceptable Token Match.
214              
215             # Input stuff.
216              
217             :start ::= graph_definition
218              
219             graph_definition ::= prolog_tokens graph_statement
220              
221             # Prolog stuff.
222              
223             prolog_tokens ::= prolog_strict_and_id
224             | prolog_strict_no_id
225             | prolog_no_strict_but_id
226             | prolog_no_strict_no_id
227              
228             prolog_strict_and_id ::= strict_token graph_type graph_id_token
229              
230             prolog_strict_no_id ::= strict_token graph_type
231              
232             prolog_no_strict_but_id ::= graph_type graph_id_token
233              
234             prolog_no_strict_no_id ::= graph_type
235              
236             strict_token ::= strict_literal
237              
238             graph_type ::= digraph_literal
239             | graph_literal
240              
241             graph_id_token ::= node_name
242              
243             # Graph stuff.
244              
245             graph_statement ::= open_brace statement_list close_brace
246              
247             statement_list ::= statement_token*
248              
249             statement_token ::= statement statement_terminator
250              
251             # Statement stuff.
252              
253             statement ::= assignment_statement
254             | node_statement
255             | edge_statement
256             | subgraph_statement
257              
258             statement_terminator ::= semicolon_literal
259             statement_terminator ::=
260              
261             # Assignment stuff.
262              
263             assignment_statements ::= assignment_statement*
264              
265             assignment_statement ::= attribute_name ('=') attribute_value
266              
267             # Node stuff
268              
269             node_statement ::= node_name attribute_statements
270              
271             # Attribute stuff.
272              
273             attribute_statements ::= attribute_statement*
274              
275             attribute_statement ::= open_bracket assignment_statements close_bracket
276              
277             # Edge stuff
278              
279             edge_statement ::= edge_lhs edge_rhs attribute_statements
280              
281             edge_lhs ::= node_statement
282             | subgraph_statement
283              
284             edge_rhs ::= edge_name edge_lhs
285             | edge_name edge_lhs edge_rhs
286              
287             edge_name ::= directed_edge
288             | undirected_edge
289              
290             # Subgraph stuff.
291              
292             subgraph_statement ::= subgraph_sequence attribute_statements
293              
294             subgraph_sequence ::= subgraph_sub_and_id
295             | subgraph_sub_no_id
296             | subgraph_no_sub_no_id
297              
298             subgraph_sub_and_id ::= subgraph_prefix subgraph_id_token graph_statement
299              
300             subgraph_sub_no_id ::= subgraph_prefix graph_statement
301              
302             subgraph_no_sub_no_id ::= graph_statement
303              
304             subgraph_prefix ::= subgraph_literal
305              
306             subgraph_id_token ::= subgraph_id
307              
308             # Lexemes in alphabetical order.
309             # Quoted string handling copied from Marpa::R2's metag.bnf.
310              
311             :lexeme ~ attribute_name pause => before event => attribute_name
312             attribute_name ~ string
313              
314             :lexeme ~ attribute_value pause => before event => attribute_value
315             attribute_value ~ string
316              
317             :lexeme ~ close_brace pause => before event => close_brace
318             close_brace ~ '}'
319              
320             :lexeme ~ close_bracket pause => before event => close_bracket
321             close_bracket ~ ']'
322             close_bracket ~ '];'
323              
324             :lexeme ~ digraph_literal pause => before event => digraph_literal
325             digraph_literal ~ 'digraph':i
326              
327             :lexeme ~ directed_edge pause => before event => directed_edge
328             directed_edge ~ '->'
329              
330             double_quoted_char_set ~ double_quoted_char+
331             double_quoted_char ~ escaped_char
332             | [^\"\x{0A}\x{0B}\x{0C}\x{0D}\x{0085}\x{2028}\x{2029}]
333              
334             escaped_char ~ '\' [[:print:]]
335              
336             # Use ' here just for the UltraEdit syntax hiliter.
337              
338             :lexeme ~ graph_literal pause => before event => graph_literal
339             graph_literal ~ 'graph':i
340              
341             html_quoted_char_set ~ [.]+
342              
343             :lexeme ~ node_name pause => before event => node_name
344             node_name ~ string
345              
346             :lexeme ~ open_brace pause => before event => open_brace
347             open_brace ~ '{'
348              
349             :lexeme ~ open_bracket pause => before event => open_bracket
350             open_bracket ~ '['
351              
352             semicolon_literal ~ ';'
353              
354             :lexeme ~ strict_literal pause => before event => strict_literal
355             strict_literal ~ 'strict':i
356              
357             string ~ [\"] double_quoted_char_set [\"]
358             string ~ '<' html_quoted_char_set '>'
359             string ~ unquoted_char_set
360              
361             :lexeme ~ subgraph_id pause => before event => subgraph_id
362             subgraph_id ~ string
363              
364             :lexeme ~ subgraph_literal pause => before event => subgraph_literal
365             subgraph_literal ~ 'subgraph':i
366              
367             :lexeme ~ undirected_edge pause => before event => undirected_edge
368             undirected_edge ~ '--'
369              
370             # The '=' is necessary for cases like: 'name=value' in node_1 [name=value].
371              
372             unquoted_char_set ~ unquoted_char+
373             unquoted_char ~ escaped_char
374             | [^\s\[\]={}]
375              
376             # Boilerplate.
377              
378             :discard ~ separators
379             separators ~ [;,]
380              
381             :discard ~ whitespace
382             whitespace ~ [\s]+
383              
384             :discard ~
385             :discard ~
386             :discard ~
387              
388             # C and C++ comment handling copied from MarpaX::Languages::C::AST.
389              
390             ~ '/*' '*/'
391              
392             ~
393              
394             ~ [^*]*
395             ~ *
396             ~ [^/*]
397             ~ [*]+
398             ~ [^*]*
399             ~ [*]*
400              
401             ~ '//'
402             ~ [^\n]*
403              
404             # Hash comment handling copied from Marpa::R2's metag.bnf.
405              
406             ~
407             |
408              
409             ~ '#'
410              
411             ~ '#'
412              
413             ~ *
414              
415             ~ [\x{0A}\x{0B}\x{0C}\x{0D}\x{2028}\x{2029}]
416              
417             ~ [^\x{0A}\x{0B}\x{0C}\x{0D}\x{2028}\x{2029}]
418              
419             END_OF_GRAMMAR
420 0           );
421              
422 0           $self -> grammar
423             (
424             Marpa::R2::Scanless::G -> new
425             ({
426             source => \$self -> bnf
427             })
428             );
429              
430 0           $self -> recce
431             (
432             Marpa::R2::Scanless::R -> new
433             ({
434             grammar => $self -> grammar,
435             ranking_method => 'high_rule_only',
436             trace_terminals => $self -> trace_terminals,
437             })
438             );
439              
440 0           my(%event);
441              
442 0           for my $line (split(/\n/, $self -> bnf) )
443             {
444 0 0         $event{$1} = 1 if ($line =~ /event\s+=>\s+(\w+)/);
445             }
446              
447 0           $self -> known_events(\%event);
448              
449             # This grammar was devised by rns (Ruslan Shvedov) for nested, double-quoted strings.
450             # See MarpaX::Demo::SampleScipts and scripts/quoted.strings.05.pl.
451              
452 0           $self -> bnf4html
453             (
454             <<'END_OF_GRAMMAR'
455             :default ::= action => [ values ]
456              
457             lexeme default = latm => 1
458              
459             string ::= '<' quoted '>'
460             quoted ::= item | quoted item
461             item ::= string | unquoted
462              
463             unquoted ~ [^<>]+
464              
465             :discard ~ whitespace
466             whitespace ~ [\s+]
467             END_OF_GRAMMAR
468             );
469              
470 0           $self -> grammar4html
471             (
472             Marpa::R2::Scanless::G -> new
473             ({
474             source => \$self -> bnf4html
475             })
476             );
477              
478             # Since $self -> stack has not been initialized yet,
479             # we can't call _add_daughter() until after this statement.
480              
481 0           $self -> tree(Tree::DAG_Node -> new({name => 'root', attributes => {name => 'root', port => '', type => 'root_literal', uid => $self -> uid, value => 'root'} }));
482 0           $self -> stack([$self -> tree -> root]);
483              
484 0           for my $name (qw/prolog graph/)
485             {
486 0           $self -> _add_daughter($name, {type => "${name}_literal", value => $name});
487             }
488              
489             # The 'prolog' daughter is the parent of all items in the prolog,
490             # so it gets pushed onto the stack.
491             # Later, when 'digraph' or 'graph' is encountered, the 'graph' daughter replaces it.
492              
493 0           my(@daughters) = $self -> tree -> daughters;
494 0           my($index) = 0; # 0 => prolog, 1 => graph.
495 0           my($stack) = $self -> stack;
496              
497 0           push @$stack, $daughters[$index];
498              
499 0           $self -> stack($stack);
500              
501             } # End of BUILD.
502              
503             # ------------------------------------------------
504              
505             sub _add_daughter
506             {
507 0     0     my($self, $name, $attributes) = @_;
508 0           my(@name) = $self -> decode_port_compass($$attributes{value});
509 0           $$attributes{name} = $name[0];
510 0           $$attributes{port} = $name[1];
511 0           $$attributes{uid} = $self -> uid($self -> uid + 1);
512 0           my($node) = Tree::DAG_Node -> new({name => $name, attributes => $attributes});
513 0           my($stack) = $self -> stack;
514              
515 0           $$stack[$#$stack] -> add_daughter($node);
516              
517             } # End of _add_daughter.
518              
519             # ------------------------------------------------
520              
521             sub _check4embedded_separator
522             {
523 0     0     my($self, $lexeme, $pos) = @_;
524              
525             # Separators are [;,].
526             # The grammar allows them in things, like:
527             # o width=.1,height=.1. Accept floats.
528             # o fontsize=24,fontname="Arial". Accept integers.
529             # o color="slateblue",fontsize=24. Accept "...".
530             # o style=filled,color=white. Accent [A-Za-z]
531              
532 0 0         my($numeric) = ($lexeme =~ /^(\d+|\d+\.\d*|\.\d+)[;,]/) ? $1 : undef;
533              
534 0 0 0       if ($numeric || ($lexeme =~ /^(".*"|[A-Za-z]+)[;,]/s) )
535             {
536 0           my($s) = $lexeme;
537 0   0       $lexeme = $numeric || $1;
538 0           $pos = $pos - length($s) + length($lexeme);
539             }
540              
541 0           return ($lexeme, $pos);
542              
543             } # End of _check4embedded_separator.
544              
545             # ------------------------------------------------
546              
547             sub clean_after
548             {
549 0     0 1   my($self, $s) = @_;
550              
551             # The grammar allows things like 'xyz,', so clean them up.
552             # Also, trim spaces and then double-quotes. The reason for doing things in this order
553             # is that the user might have written " X ", so we don't remove the quotes first.
554              
555 0 0         substr($s, -1, 1) = '' if (substr($s, -1, 1) eq ',');
556 0           $s =~ s/^\s+//;
557 0           $s =~ s/\s+$//;
558 0           $s =~ s/"(.*)"/$1/;
559              
560 0           return $s;
561              
562             } # End of clean_after.
563              
564             # ------------------------------------------------
565              
566             sub clean_before
567             {
568 0     0 1   my($self, $s) = @_;
569              
570 0           $s =~ s/\s*;\s*$//;
571 0           $s =~ s/^\s+//;
572 0           $s =~ s/\s+$//;
573 0           $s =~ s/^(<)\s+/$1/;
574 0           $s =~ s/\s+(>)$/$1/;
575              
576 0           return $s;
577              
578             } # End of clean_before.
579              
580             # ------------------------------------------------
581              
582             sub decode_node
583             {
584 0     0 1   my($self, $node) = @_;
585 0           my($attributes) = $node -> attributes;
586              
587             return
588             {
589             id => $node -> name,
590             name => $$attributes{name},
591             port => $$attributes{port},
592             type => $$attributes{type},
593             uid => $$attributes{uid},
594             value => $$attributes{value},
595 0           };
596              
597             } # End of decode_node.
598              
599             # --------------------------------------------------
600              
601             sub decode_port_compass
602             {
603 0     0 1   my($self, $name) = @_;
604              
605             # Remove :port:compass, if any, from name.
606             # But beware Perl-style node names like 'A::Class'.
607             # The (?=.) means there must be something after the last ':',
608             # which means we don't split 'A:' or 'A::'.
609              
610 0           my(@field) = split(/(:(?!:)(?=.))/, $name);
611 0 0         $field[0] = $name if ($#field < 0);
612              
613             # Restore Perl module names:
614             # o A: & : & B to A::B.
615             # o A: & : B: & : & C to A::B::C.
616              
617 0   0       while ( ($field[0] =~ /:$/) && ($#field >= 2) )
618             {
619 0           splice(@field, 0, 3, "$field[0]:$field[2]");
620             }
621              
622             # Restore:
623             # o : & port to :port.
624             # o : & port & : & compass to :port:compass.
625              
626 0 0         splice(@field, 1, $#field, join('', @field[1 .. $#field]) ) if ($#field > 0);
627              
628 0           my(@result);
629              
630 0 0         if ($#field == 0)
631             {
632 0           @result = ($name, '');
633             }
634             else
635             {
636 0           @result = ($field[0], join('', @field[1 .. $#field]) );
637             }
638              
639 0           return @result;
640              
641             } # End of decode_port_compass.
642              
643             # ------------------------------------------------
644              
645             sub decode_tree
646             {
647 0     0 1   my($self, $tree) = @_;
648 0           my($prolog) =
649             {
650             digraph => 'digraph',
651             strict => '',
652             };
653              
654             # Examine the daughters of the prolog to find the digraph/graph and strict attributes.
655              
656 0           my($node_id);
657              
658 0           for my $node ( ($tree -> daughters)[0] -> daughters)
659             {
660 0           $node_id = $self -> decode_node($node);
661 0 0         $$prolog{digraph} = 'graph' if ($$node_id{name} eq 'graph');
662 0 0         $$prolog{strict} = 'strict ' if ($$node_id{name} eq 'strict');
663             }
664              
665 0           return $prolog;
666              
667             } # End of decode_tree.
668              
669             # ------------------------------------------------
670              
671             sub _decode_result
672             {
673 0     0     my($self, $result) = @_;
674 0           my(@worklist) = $result;
675              
676 0           my($obj);
677             my($ref_type);
678 0           my(@stack);
679              
680             do
681 0           {
682 0           $obj = shift @worklist;
683 0           $ref_type = ref $obj;
684              
685 0 0         if ($ref_type eq 'ARRAY')
    0          
    0          
686             {
687 0           unshift @worklist, @$obj;
688             }
689             elsif ($ref_type eq 'HASH')
690             {
691 0           push @stack, {%$obj};
692             }
693             elsif ($ref_type)
694             {
695 0           die "Unsupported object type $ref_type\n";
696             }
697             else
698             {
699 0           push @stack, $obj;
700             }
701              
702             } while (@worklist);
703              
704 0           return join('', @stack);
705              
706             } # End of _decode_result.
707              
708             # ------------------------------------------------
709              
710             sub _dump_stack
711             {
712 0     0     my($self, $caller) = @_;
713              
714 0           $self -> log(info => "\tStack @ $caller");
715              
716 0           my($node_id);
717              
718 0           for my $item (@{$self -> stack})
  0            
719             {
720 0           $node_id = $self -> decode_node($item);
721              
722 0           $self -> log(info => "\tUid: $$node_id{uid}. Id: $$node_id{id}. Name: $$node_id{name}");
723             }
724              
725 0           $self -> log(debug => join("\n", @{$self -> tree -> tree2string}) );
  0            
726              
727             } # End of _dump_stack.
728              
729             # ------------------------------------------------
730              
731             sub hashref2string
732             {
733 0     0 1   my($self, $hashref) = @_;
734 0   0       $hashref ||= {};
735              
736 0           return '{' . join(', ', map{qq|$_ => "$$hashref{$_}"|} sort keys %$hashref) . '}';
  0            
737              
738             } # End of hashref2string.
739              
740             # ------------------------------------------------
741              
742             sub _identify_lexeme
743             {
744 0     0     my($self, $string, $start, $span, $pos, $lexeme) = @_;
745              
746 0           pos($string) = $start + $span;
747 0 0         $string =~ /\G\s*(\S)/ || return;
748 0           my($literal) = $1;
749              
750 0           my($type);
751              
752 0 0         if (substr($lexeme, 0, 1) eq '{')
753             {
754 0           $pos++;
755              
756 0           $span = 1;
757 0           $type = 'open_brace';
758             }
759             else
760             {
761 0 0         $type = ($literal eq '=') ? 'attribute_name' : 'node_name';
762             }
763              
764 0           $self -> log(debug => "Disambiguated lexeme (2 of 2) |$lexeme| as '$type'. pos: $pos");
765              
766 0           return ($type, $span, $pos);
767              
768             } # End of _identify_lexeme.
769              
770             # ------------------------------------------------
771              
772             sub log
773             {
774 0     0 1   my($self, $level, $s) = @_;
775              
776 0 0         $self -> logger -> log($level => $s) if ($self -> logger);
777              
778             } # End of log.
779              
780             # ------------------------------------------------
781              
782             sub next_few_chars
783             {
784 0     0 1   my($self, $s, $offset) = @_;
785 0           $s = substr($s, $offset, 20);
786 0           $s =~ tr/\n/ /;
787 0           $s =~ s/^\s+//;
788 0           $s =~ s/\s+$//;
789              
790 0           return $s;
791              
792             } # End of next_few_chars.
793              
794             # ------------------------------------------------
795              
796             sub _process
797             {
798 0     0     my($self) = @_;
799 0           my($string) = $self -> clean_before($self -> graph_text);
800 0           my($length) = length $string;
801 0           my($format) = '%-20s %5s %5s %5s %-20s %-20s';
802 0           my($last_event) = '';
803 0           my($prolog_token) = qr/^(?:digraph|graph|strict)_literal$/;
804 0           my($pos) = 0;
805 0           my(%class) =
806             (
807             edge => 'class',
808             graph => 'class',
809             node => 'class',
810             );
811              
812 0           $self -> log(debug => "Length of input: $length");
813 0           $self -> log(debug => sprintf($format, 'Event', 'Start', 'Span', 'Pos', 'Lexeme', 'Comment') );
814              
815 0           my($event_name);
816             my(@fields);
817 0           my($lexeme);
818 0           my($node_name);
819 0           my($original_lexeme);
820 0           my($span, $start, $s, $stack);
821 0           my($temp, $type);
822              
823             # We use read()/lexeme_read()/resume() because we pause at each lexeme.
824             # Also, in read(), we use $pos and $length to avoid reading Ruby Slippers tokens (if any).
825              
826 0           for
827             (
828             $pos = $self -> recce -> read(\$string, $pos, $length);
829             $pos < $length;
830             $pos = $self -> recce -> resume($pos)
831             )
832             {
833 0           ($start, $span) = $self -> recce -> pause_span;
834 0           ($event_name, $span, $pos) = $self -> _validate_event($string, $start, $span, $pos);
835 0           $lexeme = $self -> recce -> literal($start, $span);
836 0           $original_lexeme = $lexeme;
837 0           $pos = $self -> recce -> lexeme_read($event_name);
838              
839 0 0         die "lexeme_read($event_name) rejected lexeme |$lexeme|\n" if (! defined $pos);
840              
841             # Special case.
842             # This 'if' matches the start of the big 'if' just below.
843             # It's here so that the log(debug) code is in the right order.
844              
845 0 0 0       if ( ($event_name eq 'attribute_name') && (substr($lexeme, 0, 1) eq '[') )
846             {
847 0           $temp = '[';
848              
849 0           $self -> log(debug => sprintf($format, 'open_bracket', $start, 1, $pos, $temp, '-') );
850 0           $self -> _process_bracket($temp, 'open_bracket');
851             }
852              
853 0           $self -> log(debug => sprintf($format, $event_name, $start, $span, $pos, $lexeme, '-') );
854              
855 0 0         if ($event_name eq 'attribute_name')
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
856             {
857             # Special cases.
858              
859 0 0         if (substr($lexeme, 0, 1) eq '[')
860             {
861 0           $event_name = 'open_bracket'; # Sets $last_event at the end of the loop.
862 0           substr($lexeme, 0, 1) = '';
863             }
864              
865 0           $fields[0] = $self -> clean_after($lexeme);
866             }
867             elsif ($event_name eq 'attribute_value')
868             {
869 0 0         if ($lexeme =~ /^
870             {
871             # Note: We pass in $start and it becomes $pos.
872              
873 0           ($lexeme, $pos) = $self -> _process_html(\$string, $start);
874              
875 0           $self -> _add_daughter('attribute', {type => $fields[0], value => $lexeme});
876             }
877             else
878             {
879             # Special cases.
880             # Handle ']' and '];'.
881              
882 0           $temp = '';
883 0 0         substr($lexeme, -1, 1) = '' if (substr($lexeme, -1, 1) eq ';');
884              
885 0 0         if (substr($lexeme, -1, 1) eq ']')
886             {
887 0           $temp = ']';
888 0           substr($lexeme, -1, 1) = '';
889             }
890              
891 0           ($lexeme, $pos) = $self -> _check4embedded_separator($lexeme, $pos);
892 0           $lexeme = $self -> clean_after($lexeme);
893 0           $s = $self -> next_few_chars($string, $pos);
894              
895 0 0         $self -> log(debug => "Lexeme |$original_lexeme| corrected to be |$lexeme|. pos: $pos. Next few char |$s|") if ($original_lexeme ne $lexeme);
896 0           $self -> _add_daughter('attribute', {type => $fields[0], value => $lexeme});
897              
898 0           @fields = ();
899              
900 0 0         if ($temp)
901             {
902 0           $event_name = 'close_bracket'; # Sets $last_event at the end of the loop.
903              
904 0           $self -> log(debug => sprintf($format, $event_name, $start, 1, $pos, $temp, 'Adjusted event_name') );
905 0           $self -> _process_bracket($temp, $event_name);
906             }
907             }
908             }
909             elsif ($event_name eq 'close_brace')
910             {
911 0           $self -> _process_brace($lexeme, $event_name);
912             }
913             elsif ($event_name eq 'close_bracket')
914             {
915             # Special case.
916              
917 0 0         substr($lexeme, -1, 1) = '' if (substr($lexeme, -1, 1) eq ';');
918 0           $s = $self -> next_few_chars($string, $pos);
919              
920 0 0         $self -> log(debug => "Lexeme |$original_lexeme| corrected to be |$lexeme|. pos: $pos. Next few char |$s|") if ($original_lexeme ne $lexeme);
921 0           $self -> _process_bracket($lexeme, $event_name);
922             }
923             elsif ($event_name eq 'directed_edge')
924             {
925 0           $self -> _add_daughter('edge_id', {type => $event_name, value => $lexeme});
926             }
927             elsif ($event_name eq 'node_name')
928             {
929             # Special cases.
930              
931 0 0         if (substr($lexeme, 0, 1) eq '{')
932             {
933 0           $event_name = 'open_brace';
934 0           $pos -= (length($lexeme) - 1);
935 0           $lexeme = '{';
936              
937 0           $self -> log(debug => sprintf($format, $event_name, $start, $span, $pos, $lexeme, 'Adjusted event_name, lexeme and pos') );
938 0           $self -> _process_brace($lexeme, $event_name);
939              
940 0           next;
941             }
942              
943             # This 'if' is repeated just below.
944              
945 0 0         if (substr($lexeme, -1, 1) eq ';')
946             {
947 0           substr($lexeme, -1, 1) = '';
948              
949 0 0         next if ($lexeme eq '');
950             }
951              
952 0 0         if (substr($lexeme, -1, 1) eq '}')
953             {
954 0           substr($lexeme, -1, 1) = '';
955 0           $pos -= 1;
956             }
957              
958             # This 'if' is repeated just above.
959              
960 0 0         if (substr($lexeme, -1, 1) eq ';')
961             {
962 0           substr($lexeme, -1, 1) = '';
963              
964 0 0         next if ($lexeme eq '');
965             }
966              
967 0 0         $self -> log(debug => "Lexeme |$original_lexeme| corrected to be |$lexeme|") if ($original_lexeme ne $lexeme);
968              
969 0           $lexeme = $self -> clean_after($lexeme);
970              
971 0 0         if ($class{lc $lexeme})
972             {
973 0           $lexeme = lc $lexeme;
974 0           $type = $class{$lexeme};
975             }
976             else
977             {
978 0           $type = 'node_id';
979              
980             # If this node's mother is the 'graph' node, then it's a graph_id.
981              
982 0           $stack = $self -> stack;
983 0 0         $type = 'graph_id' if ($$stack[$#$stack] -> name eq 'graph');
984             }
985              
986 0 0         $self -> log(debug => "|$lexeme| classified as a $type") if ($original_lexeme ne $lexeme);
987 0           $self -> _add_daughter($type, {type => $type, value => $lexeme});
988             }
989             elsif ($event_name eq 'open_brace')
990             {
991 0           $self -> _process_brace($lexeme, $event_name);
992             }
993             elsif ($event_name eq 'open_bracket')
994             {
995 0           $self -> _process_bracket($lexeme, $event_name);
996             }
997             elsif ($event_name =~ $prolog_token)
998             {
999 0           $self -> _process_prolog_token($event_name, $lexeme);
1000             }
1001             elsif ($event_name eq 'subgraph_id')
1002             {
1003 0           $self -> _add_daughter('subgraph_id', {type => $event_name, value => $lexeme});
1004             }
1005             elsif ($event_name eq 'subgraph_literal')
1006             {
1007 0           $self -> _add_daughter('literal', {type => $event_name, value => 'subgraph'});
1008             }
1009             elsif ($event_name eq 'undirected_edge')
1010             {
1011 0           $self -> _add_daughter('edge_id', {type => $event_name, value => $lexeme});
1012             }
1013              
1014             # Step past separators.
1015              
1016 0 0         if (substr($string, $pos, 1) =~ /[;,]/)
1017             {
1018 0           $pos++;
1019             }
1020              
1021 0           $last_event = $event_name;
1022             }
1023              
1024 0 0         if (my $ambiguous_status = $self -> recce -> ambiguous)
1025             {
1026 0           my($terminals) = $self -> recce -> terminals_expected;
1027 0 0         $terminals = ['(None)'] if ($#$terminals < 0);
1028              
1029 0           $self -> log(info => 'Terminals expected: ' . join(', ', @$terminals) );
1030 0           $self -> log(info => "Parse is ambiguous. Status: $ambiguous_status");
1031             }
1032              
1033             # Return a defined value for success and undef for failure.
1034              
1035 0           return $self -> recce -> value;
1036              
1037             } # End of _process.
1038              
1039             # ------------------------------------------------
1040              
1041             sub _process_brace
1042             {
1043 0     0     my($self, $name, $event_name) = @_;
1044              
1045             # When the 1st '{' is encountered, the 'graph' daughter of the root
1046             # becomes the parent of all other tree nodes, replacing the 'prolog' daughter,
1047             # which has been the parent of 'strict' and 'digraph' or graph' up to now.
1048              
1049 0 0         if ($self -> brace_count == 0)
1050             {
1051 0           my($stack) = $self -> stack;
1052              
1053 0           pop @$stack;
1054              
1055 0           my(@daughters) = $self -> tree -> daughters;
1056 0           my($index) = 1; # 0 => prolog, 1 => graph.
1057              
1058 0           push @$stack, $daughters[$index];
1059              
1060 0           $self -> stack($stack);
1061             }
1062              
1063             # When a '{' is encountered, the last thing pushed becomes it's parent.
1064             # Likewise, when a '}' is encountered, we pop the stack.
1065              
1066 0           my($stack) = $self -> stack;
1067              
1068 0 0         if ($name eq '{')
1069             {
1070 0           $self -> brace_count($self -> brace_count + 1);
1071 0           $self -> _add_daughter('literal', {type => $event_name, value => $name});
1072              
1073 0           my(@daughters) = $$stack[$#$stack] -> daughters;
1074              
1075 0           push @$stack, $daughters[$#daughters];
1076              
1077 0           $self -> stack($stack);
1078             }
1079             else
1080             {
1081 0           pop @$stack;
1082              
1083 0           $self -> stack($stack);
1084 0           $self -> _add_daughter('literal', {type => $event_name, value => $name});
1085 0           $self -> brace_count($self -> brace_count - 1);
1086             }
1087              
1088             } # End of _process_brace.
1089              
1090             # ------------------------------------------------
1091              
1092             sub _process_bracket
1093             {
1094 0     0     my($self, $name, $event_name) = @_;
1095              
1096             # When a '[' is encountered, the last thing pushed becomes it's parent.
1097             # Likewise, if ']' is encountered, we pop the stack.
1098              
1099 0           my($stack) = $self -> stack;
1100              
1101 0 0         if ($name eq '[')
1102             {
1103 0           my(@daughters) = $$stack[$#$stack] -> daughters;
1104              
1105 0           push @$stack, $daughters[$#daughters];
1106              
1107 0           $self -> _add_daughter('literal', {type => $event_name, value => $name});
1108             }
1109             else
1110             {
1111 0           $self -> _add_daughter('literal', {type => $event_name, value => $name});
1112              
1113 0           pop @$stack;
1114              
1115 0           $self -> stack($stack);
1116             }
1117              
1118             } # End of _process_bracket.
1119              
1120             # ------------------------------------------------
1121              
1122             sub _process_html
1123             {
1124 0     0     my($self, $stringref, $pos) = @_;
1125              
1126 0           $self -> recce4html
1127             (
1128             Marpa::R2::Scanless::R -> new
1129             ({
1130             grammar => $self -> grammar4html,
1131             })
1132             );
1133              
1134             # Return 0 for success and 1 for failure.
1135              
1136 0           my($candidate) = substr($$stringref, $pos);
1137              
1138 0           my($error);
1139             my($html);
1140 0           my($value);
1141              
1142             try
1143             {
1144 0     0     $self -> recce4html -> read(\$candidate);
1145              
1146 0           $value = $self -> recce4html -> value;
1147              
1148 0 0         if (defined $value)
1149             {
1150 0           $html = $self -> _decode_result($$value);
1151             }
1152             else
1153             {
1154 0           $error = 'Parse failed';
1155             }
1156             }
1157             catch
1158             {
1159 0     0     $error = $_;
1160              
1161             # But wait! It might be OK after all.
1162             # Actually, this branch always happens, because for valid DOT files,
1163             # there must be something in the input ('lexemes') after the HTML.
1164              
1165 0 0         if ($self -> recce4html -> exhausted)
1166             {
1167 0           my(@span) = $self -> recce4html -> last_completed_span('string');
1168 0           $html = substr($candidate, $span[0], $span[1]);
1169             }
1170 0           };
1171              
1172 0 0         if (! defined $html)
1173             {
1174 0           die $error;
1175             }
1176              
1177 0           $pos += length($html);
1178              
1179 0           return ($html, $pos);
1180              
1181             } # End of _process_html.
1182              
1183             # ------------------------------------------------
1184              
1185             sub _process_prolog_token
1186             {
1187 0     0     my($self, $event_name, $value) = @_;
1188              
1189 0           $self -> _add_daughter('literal', {type => $event_name, value => lc $value});
1190              
1191             # When 'digraph' or 'graph' is encountered, the 'graph' daughter of the root
1192             # becomes the parent of all other tree nodes, replacing the 'prolog' daughter.
1193              
1194 0 0         if ($event_name ne 'strict_literal')
1195             {
1196 0           my($stack) = $self -> stack;
1197              
1198 0           pop @$stack;
1199              
1200 0           my(@daughters) = $self -> tree -> daughters;
1201 0           my($index) = 1; # 0 => prolog, 1 => graph.
1202              
1203 0           push @$stack, $daughters[$index];
1204              
1205 0           $self -> stack($stack);
1206             }
1207              
1208             } # End of _process_prolog_token.
1209              
1210             # ------------------------------------------------
1211              
1212             sub run
1213             {
1214 0     0 1   my($self) = @_;
1215              
1216 0 0         if ($self -> description)
    0          
1217             {
1218             # Assume graph is a single line without comments.
1219              
1220 0           $self -> graph_text($self -> description);
1221             }
1222             elsif ($self -> input_file)
1223             {
1224             # Quick removal of whole-line C++ and hash comments.
1225             # In the regexp, # is written as \# just for the Ultraedit syntax hiliter.
1226             # Steps:
1227             # o Read file as a set of lines.
1228             # o Look for trailing \ chars, and combine those lines.
1229             # o Combine all remaining lines with ' '.
1230              
1231 0           my(@line) = grep{! m!^(?:\#|//)!} path($self -> input_file) -> lines_utf8;
  0            
1232 0           my($last) = $#line; # Store this separately so we can fiddle $i.
1233 0           my($i) = 0;
1234 0           my($buffer) = '';
1235              
1236 0           my(@out);
1237              
1238             # We don't check the very last line. If it ends with '\\', we definitely want an error.
1239              
1240 0           while ($i < $last)
1241             {
1242 0 0         if ($line[$i] =~ /(.*)\\$/)
1243             {
1244 0           $buffer .= $1;
1245             }
1246             else
1247             {
1248 0 0         if (length($buffer) > 0)
1249             {
1250 0           $line[$i] = "$buffer$line[$i]";
1251 0           $buffer = '';
1252             }
1253              
1254 0           push @out, $line[$i];
1255             }
1256              
1257 0           $i++;
1258             }
1259              
1260 0           push @out, $line[$i];
1261              
1262 0           $self -> graph_text(join(' ', @out) );
1263              
1264 0           $self -> log(debug => "After processing this graph:\n|" . $self -> graph_text . '|');
1265             }
1266             else
1267             {
1268 0           die "You must provide a graph using one of -input_file or -description. \n";
1269             }
1270              
1271             # Return 0 for success and 1 for failure.
1272              
1273 0           my($result) = 0;
1274              
1275             try
1276             {
1277 0 0   0     if (defined (my $value = $self -> _process) )
1278             {
1279 0           $self -> log(info => 'Parsed tree:');
1280 0           $self -> log(info => join("\n", @{$self -> tree -> tree2string}) );
  0            
1281             }
1282             else
1283             {
1284 0           $result = 1;
1285              
1286 0           $self -> log(error => 'Parse failed');
1287             }
1288             }
1289             catch
1290             {
1291 0     0     $result = 1;
1292              
1293 0           $self -> log(error => "Parse failed. Error: $_");
1294 0           };
1295              
1296 0           $self -> log(info => "Parse result: $result (0 is success)");
1297              
1298 0 0         if ($result == 0)
1299             {
1300             # Clean up the stack by popping the root node.
1301              
1302 0           my($stack) = $self -> stack;
1303              
1304 0           pop @$stack;
1305              
1306 0           $self -> stack($stack);
1307 0           $self -> log(debug => 'Brace count: ' . $self -> brace_count . ' (0 is success)');
1308 0           $self -> log(debug => 'Stack size: ' . $#{$self -> stack} . ' (0 is success)');
  0            
1309              
1310 0           my($output_file) = $self -> output_file;
1311              
1312 0 0         if ($output_file)
1313             {
1314 0 0         $self -> renderer
1315             (
1316             GraphViz2::Marpa::Renderer::Graphviz -> new
1317             (
1318             logger => $self -> logger,
1319             maxlevel => $self -> maxlevel,
1320             minlevel => $self -> minlevel,
1321             output_file => $self -> output_file,
1322             tree => $self -> tree,
1323             )
1324             ) if (! $self -> renderer);
1325              
1326 0           $self -> renderer -> run;
1327             }
1328             }
1329             else
1330             {
1331 0           $self -> log(info => 'The stack and the tree when we died ...');
1332 0           $self -> _dump_stack('_process_brace() pushed { onto stack');
1333             }
1334              
1335             # Return 0 for success and 1 for failure.
1336              
1337 0           return $result;
1338              
1339             } # End of run.
1340              
1341             # ------------------------------------------------
1342              
1343             sub _validate_event
1344             {
1345 0     0     my($self, $string, $start, $span, $pos) = @_;
1346 0           my(@event) = @{$self -> recce -> events};
  0            
1347 0           my($event_count) = scalar @event;
1348 0           my(@event_name) = sort map{$$_[0]} @event;
  0            
1349 0           my($event_name) = $event_name[0]; # Default.
1350 0           my($lexeme) = substr($string, $start, $span);
1351 0           my($line, $column) = $self -> recce -> line_column($start);
1352 0           my($literal) = $self -> next_few_chars($string, $start + $span);
1353 0           my($message) = "Location: ($line, $column). Lexeme: |$lexeme|. Next few chars: |$literal|";
1354 0           $message = "$message. Events: $event_count. Names: ";
1355              
1356 0           $self -> log(debug => $message . join(', ', @event_name) . '.');
1357              
1358 0           my(%event_name);
1359              
1360 0           @event_name{@event_name} = (1) x @event_name;
1361              
1362 0           for (@event_name)
1363             {
1364 0 0         die "Unexpected event name '$_'" if (! ${$self -> known_events}{$_});
  0            
1365             }
1366              
1367 0 0         if ($event_count > 1)
1368             {
1369 0           my(%special_case) =
1370             (
1371             '}' => 'close_brace',
1372             ']' => 'close_bracket',
1373             '];' => 'close_bracket',
1374             '->' => 'directed_edge',
1375             '{' => 'open_brace',
1376             '[' => 'open_bracket',
1377             'subgraph' => 'subgraph_literal',
1378             '--' => 'undirected_edge',
1379             );
1380              
1381 0 0         if ($special_case{$lexeme})
    0          
1382             {
1383 0           $event_name = $special_case{$lexeme};
1384              
1385 0           $self -> log(debug => "Disambiguated lexeme (1 of 2) |$lexeme| as '$event_name'");
1386             }
1387             elsif ($event_count == 2)
1388             {
1389             # We can handle ambiguous events when they are 'attribute_name' and 'node_name'.
1390             # 'attribute_name' is followed by '=', and 'node_name' is followed by anything else.
1391             # In fact, 'node_name' may be folowed by '[' to indicate the start of its attributes.
1392              
1393 0           $event_name = undef;
1394 0           my($expected) = "$event_name[0]!$event_name[1]";
1395              
1396 0 0         if ($expected eq 'attribute_name!node_name')
1397             {
1398 0           ($event_name, $span, $pos) = $self -> _identify_lexeme($string, $start, $span, $pos, $lexeme);
1399             }
1400              
1401 0 0         if (! defined $event_name)
1402             {
1403 0           die "Cannot identify lexeme as either 'attribute_name' or 'node_name'. \n";
1404             }
1405             }
1406             else
1407             {
1408 0           die "The code only handles 1 event at a time, or the pair ('attribute_name', 'node_name'). \n";
1409             }
1410             }
1411              
1412 0           return ($event_name, $span, $pos);
1413              
1414             } # End of _validate_event.
1415              
1416             # ------------------------------------------------
1417              
1418             1;
1419              
1420             =pod
1421              
1422             =head1 NAME
1423              
1424             C - A Marpa-based parser for Graphviz C files
1425              
1426             =head1 Synopsis
1427              
1428             =over 4
1429              
1430             =item o Display help
1431              
1432             perl scripts/g2m.pl -h
1433              
1434             =item o Run the parser
1435              
1436             perl scripts/g2m.pl -input_file data/16.gv
1437             perl scripts/g2m.pl -input_file data/16.gv -max info
1438              
1439             The L discusses the way the parsed data is stored in RAM.
1440              
1441             =item o Run the parser and the default renderer
1442              
1443             perl scripts/g2m.pl -input_file data/16.gv -output_file ./16.gv
1444              
1445             ./16.gv will be the rendered Graphviz C file.
1446              
1447             See scripts/test.utf8.sh for comparing the output of running the parser, and C, on all
1448             data/utf8.*.gv files.
1449              
1450             =back
1451              
1452             See also L.
1453              
1454             =head1 Description
1455              
1456             L provides a L-based parser for L
1457             graph definitions.
1458              
1459             Demo output: L.
1460              
1461             L.
1462              
1463             Articles:
1464              
1465             =over 4
1466              
1467             =item o Overview
1468              
1469             L
1470              
1471             =item o Building the Grammar
1472              
1473             L
1474              
1475             This module will be re-written, again, now that its BNF has been incorporated into GraphViz2::Marpa,
1476             and patched along the way.
1477              
1478             =back
1479              
1480             =head1 Modules
1481              
1482             =over 4
1483              
1484             =item o L
1485              
1486             The current module, which documents the set of modules.
1487              
1488             It can, optionally, use the default renderer L.
1489              
1490             Accepts a L graph definition and builds a corresponding
1491             data structure representing the parsed graph. It can pass that data to the default renderer,
1492             L, which can then render it to a text file ready to be
1493             input to C. Such 'round-tripping', as it's called, is the best way to test a renderer.
1494              
1495             See scripts/g2m.pl and scripts/test.utf8.sh.
1496              
1497             =item o L
1498              
1499             The default renderer. Optionally called by the parser.
1500              
1501             =item o L
1502              
1503             Auxiliary code, used to help generate the demo page.
1504              
1505             =item o L
1506              
1507             Auxiliary code, used to help generate the demo page.
1508              
1509             =back
1510              
1511             =head1 Sample Data
1512              
1513             =over 4
1514              
1515             =item o Input files: data/*.gv
1516              
1517             These are valid L graph definition files.
1518              
1519             Some data/*.gv files may contain deliberate mistakes, which may or may not stop production
1520             of output files. They may cause various warning messages to be printed by C when
1521             being rendered.
1522              
1523             See L for details.
1524              
1525             =item o Output files: html/*.svg
1526              
1527             The html/*.svg are L graph definition files output
1528             by scripts/generate.demo.sh.
1529              
1530             The round trip shows that the lex/parse process does not lose information along the way, but
1531             comments are discarded..
1532              
1533             This set, and the set xt/author/html/*.svg just below, are generated by running
1534             scripts/generate.demo.sh. This in turn runs both scripts/generate.svg.sh and
1535             scripts/generate.demo.pl.
1536              
1537             =item o Input files: xt/author/data/*.gv
1538              
1539             As for data/*.gv above, but these files are copied from Graphviz V 2.38.0, and are often quite
1540             complex.
1541              
1542             See find.candidates.pl, below.
1543              
1544             =item o Output files: xt/author/html/*.svg
1545              
1546             As for html/*.svg above.
1547              
1548             =back
1549              
1550             =head1 Scripts
1551              
1552             These are in the scripts/ directory.
1553              
1554             =over 4
1555              
1556             =item o copy.config.pl
1557              
1558             For use by the author. Output:
1559              
1560             Copied config/.htgraphviz2.marpa.conf to /home/ron/.config/Perl/GraphViz2-Marpa
1561              
1562             =item o find.candidates.pl
1563              
1564             For use by the author.
1565              
1566             This scans an unpacked distro of Graphviz V 2.38.0 and finds *.gv matching these criteria:
1567              
1568             =over 4
1569              
1570             =item o In ~/Downloads/Graphviz/graphviz-2.38.0/
1571              
1572             =item o Not too big
1573              
1574             I.e. the file must be < 10,000 bytes in size, otherwise it may take too long to process.
1575              
1576             =item o Not a fake
1577              
1578             Currently, only ~/Downloads/Graphviz/graphviz-2.38.0/tclpkg/gv/META.gv fits this
1579             definition.
1580              
1581             =item o Not already present in xt/author/data
1582              
1583             =back
1584              
1585             Any candidates found have their names printed, for easy one-at-a-time copying from Graphviz and
1586             testing via scripts/test.1.sh.
1587              
1588             =item o find.config.pl
1589              
1590             For use by the author. Output:
1591              
1592             Using: File::HomeDir -> my_dist_config('GraphViz2-Marpa', '.htgraphviz2.marpa.conf'):
1593             Found: /home/ron/.config/Perl/GraphViz2-Marpa/.htgraphviz2.marpa.conf
1594              
1595             =item o g2m.pl
1596              
1597             Runs the parser. Try running with -h.
1598              
1599             =item o g2m.sh
1600              
1601             Simplifies running g2m.pl.
1602              
1603             =item o generate.demo.pl
1604              
1605             See generate.demo.sh.
1606              
1607             =item o generate.demo.sh
1608              
1609             For use by the author. Actions:
1610              
1611             =over
1612              
1613             =item o Runs dot on all data/*.gv files; outputs to html/*.svg
1614              
1615             =item o Runs scripts/generate.demo.pl; outputs to html/index.html
1616              
1617             =item o Copies html/* to various places
1618              
1619             =back
1620              
1621             =item o generate.svg.sh
1622              
1623             Convert all data/*.svg into html/*.svg.
1624              
1625             Used by generate.demo.sh.
1626              
1627             =item o gv2svg.sh
1628              
1629             Converts one data/*.gv file into $DR/Perl-modules/html/graphviz2.marpa/*.svg.
1630              
1631             =item o pod2html.sh
1632              
1633             Converts all *.pm files to *.html, and copies them in my web server's dir structure (in Debian's
1634             RAM disk).
1635              
1636             =item o test.1.sh
1637              
1638             Runs both the parser and C so I can compare the output.
1639              
1640             =item o test.html.pl
1641              
1642             Uses method perform_1_test() in L, to test the stand-alone BNF used for
1643             HTML-like tables.
1644              
1645             Note: t/test.t also calls perform_1_test().
1646              
1647             =item o test.utf8.sh
1648              
1649             Tests one data/utf8*.gv file more thoroughly than test.1.sh does.
1650              
1651             =back
1652              
1653             =head1 Distributions
1654              
1655             This module is available as a Unix-style distro (*.tgz).
1656              
1657             See L
1658             for help on unpacking and installing distros.
1659              
1660             =head1 Installation
1661              
1662             Install L as you would for any C module:
1663              
1664             Run:
1665              
1666             cpanm GraphViz2::Marpa
1667              
1668             or run:
1669              
1670             sudo cpan GraphViz2::Marpa
1671              
1672             or unpack the distro, and then either:
1673              
1674             perl Build.PL
1675             ./Build
1676             ./Build test
1677             sudo ./Build install
1678              
1679             or:
1680              
1681             perl Makefile.PL
1682             make (or dmake or nmake)
1683             make test
1684             make install
1685              
1686             =head1 Constructor and Initialization
1687              
1688             C is called as C<< my($g2m) = GraphViz2::Marpa -> new(k1 => v1, k2 => v2, ...) >>.
1689              
1690             It returns a new object of type C.
1691              
1692             Key-value pairs accepted in the parameter list (see corresponding methods for details
1693             [e.g. L]):
1694              
1695             =over 4
1696              
1697             =item o description => $graphDescription
1698              
1699             Read the L graph definition from the command line.
1700              
1701             You are strongly encouraged to surround this string with '...' to protect it from your shell.
1702              
1703             See also the 'input_file' option to read the description from a file.
1704              
1705             The 'description' option takes precedence over the 'input_file' option.
1706              
1707             Default: ''.
1708              
1709             =item o input_file => $aDotInputFileName
1710              
1711             Read the L graph definition from a file.
1712              
1713             See also the 'description' option to read the graph definition from the command line.
1714              
1715             The 'description' option takes precedence over the 'input_file' option.
1716              
1717             Default: ''.
1718              
1719             See the distro for data/*.gv.
1720              
1721             =item o logger => $aLoggerObject
1722              
1723             Specify a logger compatible with L, for the lexer and parser to use.
1724              
1725             Default: A logger of type L which writes to the screen.
1726              
1727             To disable logging, just set 'logger' to the empty string (not undef).
1728              
1729             =item o maxlevel => $logOption1
1730              
1731             This option affects L.
1732              
1733             See the L docs.
1734              
1735             Default: 'notice'.
1736              
1737             =item o minlevel => $logOption2
1738              
1739             This option affects L.
1740              
1741             See the L docs.
1742              
1743             Default: 'error'.
1744              
1745             No lower levels are used.
1746              
1747             =item o output_file => aRenderedDotInputFileName
1748              
1749             Specify the name of a file for the renderer to write.
1750              
1751             That is, write the DOT-style graph definition to a file.
1752              
1753             When this file and the input file are both run thru C, they should produce identical *.svg
1754             files.
1755              
1756             If an output file name is specified, an object of type L is
1757             created and called after the input file has been successfully parsed.
1758              
1759             Default: ''.
1760              
1761             The default means the renderer is not called.
1762              
1763             =item o renderer => aGraphViz2::Marpa::Renderer::Graphviz-compatible object
1764              
1765             Specify a renderer for the parser to use.
1766              
1767             See C just above.
1768              
1769             Default: undef.
1770              
1771             If an output file is specified, then an object of type L
1772             is created and its C method is called.
1773              
1774             =item o trace_terminals => $Boolean
1775              
1776             This allows g2m.pl to control the C setting passed to L.
1777              
1778             =back
1779              
1780             =head1 Methods
1781              
1782             =head2 clean_before($s)
1783              
1784             Clean the given string before passing it to Marpa.
1785              
1786             =head2 clean_after($s)
1787              
1788             Clean the given string before storing it in the tree.
1789              
1790             =head2 decode_port_compass($name)
1791              
1792             Returns a 2-element array for the given DOT node name.
1793              
1794             =over 4
1795              
1796             =item o [0]: The node name without any port+compass suffix
1797              
1798             =item o [1]: The port+compass suffix (prefixed by ':'), or ''
1799              
1800             =back
1801              
1802             =head2 decode_node($node)
1803              
1804             Returns a hashref of the tree node's name and attributes.
1805              
1806             Key => Value pairs:
1807              
1808             =over 4
1809              
1810             =item o id => $node -> name
1811              
1812             This identifies the type of tree node. It has values like 'node_id', 'edge_id', 'literal', etc.
1813             These values come from the grammar.
1814              
1815             =item o name => $$attributes{value}
1816              
1817             This is the name of the tree node. The value comes from the input stream.
1818              
1819             But, if C is 'node_id', then C is the DOT node's name without any port+compass suffix.
1820              
1821             =item o port => The DOT node name's port+compass suffix (prefixed by ':'), or ''
1822              
1823             This value come from the grammar.
1824              
1825             =item o type => $$attributes{type}
1826              
1827             This has values like 'node_id', 'open_bracket', etc. In fact, these are the names of lexemes.
1828             These values come from the grammar.
1829              
1830             =item o uid => $$attributes{uid}
1831              
1832             This is the unique uid of the tree node.
1833              
1834             =item o value => $$attributes{value}
1835              
1836             This is usually a copy of the C attribute. The value comes from the input stream.
1837              
1838             If the C is 'node_id>' then this value will be the DOT node's name including any
1839             port+compass suffix.
1840              
1841             =back
1842              
1843             =head2 decode_tree($tree)
1844              
1845             Returns a hashref of the tree's digraph/graph and strict attributes. These are extracted from the
1846             prolog of the tree, which means $tree must be a whole tree, and not just a node within a whole
1847             tree.
1848              
1849             Key => Value pairs:
1850              
1851             =over 4
1852              
1853             =item o digraph => 'digraph' || 'graph'
1854              
1855             Default: 'digraph'.
1856              
1857             =item o strict => 'strict' || '' (empty string)
1858              
1859             Default: ''.
1860              
1861             =back
1862              
1863             =head2 description([$graph])
1864              
1865             The [] indicate an optional parameter.
1866              
1867             Get or set the L graph definition string.
1868              
1869             The value supplied by the 'description' option takes precedence over the value read from the
1870             'input_file'.
1871              
1872             See also L.
1873              
1874             'description' is a parameter to L. See L for details.
1875              
1876             =head2 hashref2string($h)
1877              
1878             Convert the keys and values of $h to a string, including '{' and '}'.
1879              
1880             Defaults to '{}' if $h is not defined.
1881              
1882             =head2 input_file([$graph_file_name])
1883              
1884             Here, the [] indicate an optional parameter.
1885              
1886             Get or set the name of the file to read the L graph
1887             definition from.
1888              
1889             The value supplied by the 'description' option takes precedence over the value read from the
1890             'input_file'.
1891              
1892             See also the L method.
1893              
1894             'input_file' is a parameter to L. See L for details.
1895              
1896             =head2 log($level, $s)
1897              
1898             If a logger is defined, this logs the message $s at level $level.
1899              
1900             =head2 logger([$logger_object])
1901              
1902             Here, the [] indicate an optional parameter.
1903              
1904             Get or set the logger object.
1905              
1906             To disable logging, just set 'logger' to the empty string (not undef), in the call to L.
1907              
1908             This logger is passed to other modules.
1909              
1910             'logger' is a parameter to L. See L for details.
1911              
1912             =head2 maxlevel([$string])
1913              
1914             Here, the [] indicate an optional parameter.
1915              
1916             Get or set the value used by the logger object.
1917              
1918             This option is only used if an object of type L is ceated.
1919             See L.
1920              
1921             'maxlevel' is a parameter to L. See L for details.
1922              
1923             =head2 minlevel([$string])
1924              
1925             Here, the [] indicate an optional parameter.
1926              
1927             Get or set the value used by the logger object.
1928              
1929             This option is only used if an object of type L is created.
1930             See L.
1931              
1932             'minlevel' is a parameter to L. See L for details.
1933              
1934             =head2 new()
1935              
1936             See L for details on the parameters accepted by L.
1937              
1938             =head2 next_few_chars($s, $offset)
1939              
1940             Returns a substring of $s, starting at $offset, for use in progress messages.
1941              
1942             The default string length returned is 20 characters.
1943              
1944             =head2 output_file([$file_name])
1945              
1946             Here, the [] indicate an optional parameter.
1947              
1948             Get or set the name of the file for the renderer to write.
1949              
1950             If an output file name is specified, an object of type L is
1951             created and called after the input file has been successfully parsed.
1952              
1953             'output_file' is a parameter to L. See L for details.
1954              
1955             =head2 renderer([$renderer_object])
1956              
1957             Here, the [] indicate an optional parameter.
1958              
1959             Get or set the renderer object.
1960              
1961             This renderer is called if C is given a value.
1962              
1963             'renderer' is a parameter to L. See L for details.
1964              
1965             =head2 run()
1966              
1967             This is the only method the caller needs to call. All parameters are supplied to L
1968             (or via other methods before C is called).
1969              
1970             See scripts/g2m.pl.
1971              
1972             Returns 0 for success and 1 for failure.
1973              
1974             =head2 trace_terminals([$Boolean])
1975              
1976             Here, the [] indicate an optional parameter.
1977              
1978             Get or set the C option passed to L.
1979              
1980             =head1 FAQ
1981              
1982             =head2 How is the parsed data held in RAM?
1983              
1984             The parsed output is held in a tree managed by L.
1985              
1986             Here and below, the word C (usually) refers to nodes in this tree, not Graphviz-style nodes.
1987              
1988             The root node always looks like this when printed by Tree::DAG_Node's tree2string() method:
1989              
1990             root. Attributes: {node=>"root", port=>"", type=>"root_literal", uid=>"0", value=>"root"}
1991              
1992             Interpretation:
1993              
1994             =over 4
1995              
1996             =item o The node name
1997              
1998             Here, C.
1999              
2000             =item o The node's attributes
2001              
2002             Key fields:
2003              
2004             =over 4
2005              
2006             =item o node
2007              
2008             The name of the DOT node without any port+compass suffix. Here C.
2009              
2010             =item o port
2011              
2012             The port+compass suffix of the DOT node name, if any, else ''. Here the empty string.
2013              
2014             =item o type
2015              
2016             Here, C.
2017              
2018             The type (or name) of the value. The word 'name' is not used to avoid confusion with the name of the
2019             node.
2020              
2021             =item o uid
2022              
2023             A unique integer assigned to each node. Counts up from 0. Not used.
2024              
2025             =item o value
2026              
2027             The value of the node.
2028              
2029             Here, C.
2030              
2031             =back
2032              
2033             =back
2034              
2035             =head2 Can you explain this tree in more detail?
2036              
2037             Sure. Firstly, we examine a sample graph, assuming the module's pre-reqs are installed.
2038             Let's use data/10.gv. Here it is as an
2039             L.
2040              
2041             Run one of these:
2042              
2043             scripts/g2m.sh data/10.gv -max info
2044             perl -Ilib scripts/g2m.pl -input_file data/10.gv -max info
2045              
2046             The former echos the input file to STDOUT before running the latter.
2047              
2048             Using C<-max notice>, which is the default, produces no output from C.
2049              
2050             This is the input:
2051              
2052             STRICT DiGraph graph_10_01
2053             {
2054             node_10_01_1 [fillcolor = red, style = filled]
2055             node_10_01_2 [fillcolor = green, style = filled]
2056              
2057             node_10_01_1 -> node_10_01_2 [arrowtail = dot, arrowhead = odot]
2058             }
2059              
2060             And this is the output:
2061              
2062             Parsed tree:
2063             root. Attributes: {name => "root", port => "", type => "root_literal", uid => "0", value => "root"}
2064             |--- prolog. Attributes: {name => "prolog", port => "", type => "prolog_literal", uid => "1", value => "prolog"}
2065             | |--- literal. Attributes: {name => "strict", port => "", type => "strict_literal", uid => "3", value => "strict"}
2066             | |--- literal. Attributes: {name => "digraph", port => "", type => "digraph_literal", uid => "4", value => "digraph"}
2067             |--- graph. Attributes: {name => "graph", port => "", type => "graph_literal", uid => "2", value => "graph"}
2068             |--- graph_id. Attributes: {name => "graph_10_01", port => "", type => "graph_id", uid => "5", value => "graph_10_01"}
2069             |--- literal. Attributes: {name => "{", port => "", type => "open_brace", uid => "6", value => "{"}
2070             | |--- node_id. Attributes: {name => "node_10_01_1", port => "", type => "node_id", uid => "7", value => "node_10_01_1"}
2071             | | |--- literal. Attributes: {name => "[", port => "", type => "open_bracket", uid => "8", value => "["}
2072             | | |--- attribute. Attributes: {name => "red", port => "", type => "fillcolor", uid => "9", value => "red"}
2073             | | |--- attribute. Attributes: {name => "filled", port => "", type => "style", uid => "10", value => "filled"}
2074             | | |--- literal. Attributes: {name => "]", port => "", type => "close_bracket", uid => "11", value => "]"}
2075             | |--- node_id. Attributes: {name => "node_10_01_2", port => "", type => "node_id", uid => "12", value => "node_10_01_2"}
2076             | | |--- literal. Attributes: {name => "[", port => "", type => "open_bracket", uid => "13", value => "["}
2077             | | |--- attribute. Attributes: {name => "green", port => "", type => "fillcolor", uid => "14", value => "green"}
2078             | | |--- attribute. Attributes: {name => "filled", port => "", type => "style", uid => "15", value => "filled"}
2079             | | |--- literal. Attributes: {name => "]", port => "", type => "close_bracket", uid => "16", value => "]"}
2080             | |--- node_id. Attributes: {name => "node_10_01_1", port => "", type => "node_id", uid => "17", value => "node_10_01_1"}
2081             | |--- edge_id. Attributes: {name => "->", port => "", type => "directed_edge", uid => "18", value => "->"}
2082             | |--- node_id. Attributes: {name => "node_10_01_2", port => "", type => "node_id", uid => "19", value => "node_10_01_2"}
2083             | |--- literal. Attributes: {name => "[", port => "", type => "open_bracket", uid => "20", value => "["}
2084             | |--- attribute. Attributes: {name => "dot", port => "", type => "arrowtail", uid => "21", value => "dot"}
2085             | |--- attribute. Attributes: {name => "odot", port => "", type => "arrowhead", uid => "22", value => "odot"}
2086             | |--- literal. Attributes: {name => "]", port => "", type => "close_bracket", uid => "23", value => "]"}
2087             |--- literal. Attributes: {name => "}", port => "", type => "close_brace", uid => "24", value => "}"}
2088             Parse result: 0 (0 is success)
2089              
2090             You can see from this output that words special to Graphviz (e.g. STRICT) are accepted no matter
2091             what case they are in. Such tokens are stored in lower-case.
2092              
2093             A more detailed analysis follows.
2094              
2095             The C node has 2 daughters:
2096              
2097             =over 4
2098              
2099             =item o The C sub-tree
2100              
2101             The C node is the root of a sub-tree holding everything before the graph's ID, if any.
2102              
2103             The node is called C, and its hashref of attributes is
2104             C<< {type => "prolog_literal", uid => "1", value => "prolog"} >>.
2105              
2106             It has 1 or 2 daughters. The possibilities are:
2107              
2108             =over 4
2109              
2110             =item o Input: 'digraph ...'
2111              
2112             The 1 daughter is named C, and its attributes are
2113             C<< {type => "digraph_literal", uid => "3", value => "digraph"} >>.
2114              
2115             =item o Input: 'graph ...'
2116              
2117             The 1 daughter is named C, and its attributes are
2118             C<< {type => "graph_literal", uid => "3", value => "graph"} >>.
2119              
2120             =item o Input: 'strict digraph ...'
2121              
2122             The 2 daughters are named C, and their attributes are, respectively,
2123             C<< {type => "strict_literal", uid => "3", value => "strict"} >> and
2124             C<< {type => "digraph_literal", uid => "4", value => "digraph"} >>.
2125              
2126             =item o Input: 'strict graph ...'
2127              
2128             The 2 daughters are named C, and their attributes are, respectively,
2129             C<< {type => "strict_literal", uid => "3", value => "strict"'} >> and
2130             C<< {type => "graph_literal", uid => "4", value => "graph"} >>.
2131              
2132             =back
2133              
2134             And yes, the graph ID, if any, is under the C node. The reason for this is that for every
2135             subgraph within the graph, the same structure applies: First the (sub)graph ID, then a literal
2136             '{', then that (sub)graph's details, and finally a literal '}'.
2137              
2138             =item o The 'graph' sub-tree
2139              
2140             The C node is the root of a sub-tree holding everything about the graph, including the graph's
2141             ID, if any.
2142              
2143             The node is called C, and its hashref of attributes is
2144             C<< {type => "graph_literal", uid => "2", value => "graph"} >>.
2145              
2146             The C node has as many daughters, with their own daughters, as is necessary to hold the
2147             output of parsing the remainder of the input.
2148              
2149             In particular, if the input graph has an ID, i.e. the input is of the form 'digraph my_id ...'
2150             (or various versions thereof) then the 1st daughter will be called C, and its attributes
2151             will be C<< {type => "node_id", uid => "5", value => "my_id"} >>.
2152              
2153             Futher, the 2nd daughter will be called C, and its attributes will be
2154             C<< {ype => "open_brace", uid => "6", value => "{"} >>. A subsequent daughter will eventually (for a
2155             syntax-free input file, of course) also be called C, and its attributes will be
2156             C<< {type => "close_brace", uid => "#", value => "}"} >>.
2157              
2158             Naturally, if the graph has no ID (i.e. input lacks the 'my_id' token) then the uids will differ
2159             slightly.
2160              
2161             As mentioned, this pattern of optional (sub)graph id followed by a matching pair of '{', '}' nodes,
2162             is used for all graphs and subgraphs.
2163              
2164             In the case the input contains an explicit C, then just before the node representing
2165             'my_id' or '{', there will be another node representing the C token.
2166              
2167             It's name will be C, and its attributes will be
2168             C<< {type => "subgraph_literal", uid => "#", value => "subgraph"} >>.
2169              
2170             =back
2171              
2172             =head2 How many different names can these nodes have?
2173              
2174             The list of possible node names follows. You should always examine the C and C keys of
2175             the node's attributes to determine the exact nature of the node.
2176              
2177             =over 4
2178              
2179             =item o attribute
2180              
2181             In this case, the node's attributes contain a hashref like
2182             {type => "arrowhead", uid => "33", value => "odiamond"}, meaning the C field holds the type
2183             (i.e. name) of the attribute, and the 'value' field holds the value of the attribute.
2184              
2185             =item o class
2186              
2187             This is used when any of C, C, or C appear at the start of the (sub)graph, and
2188             is the mother of the attributes attached to the class. The C of the attribute will be
2189             C, C, or C.
2190              
2191             The 1st and last daughters will be literals whose attribute values are '[' and ']' respectively,
2192             and the middle daughter(s) will be nodes of type C (as just discussed).
2193              
2194             =item o edge_id
2195              
2196             The C of the attribute will be either '--' or '->'.
2197              
2198             Thus the C of the edge will be the previous daughter (node or subgraph), and the C of
2199             the edge will be the next.
2200              
2201             Samples are:
2202              
2203             n1 -> n2
2204             n1 -> {n2}
2205             {n1} -> n2
2206              
2207             In a L of nodes, the last node in
2208             the chain may have daughters that are the attributes of each edge in the chain. This is how
2209             Graphviz syntax attaches edge attributes to a path. The class C can also be used to provide
2210             attributes for the edge.
2211              
2212             =item o graph
2213              
2214             There is only ever 1 node called C. This tree node is always present.
2215              
2216             =item o graph_id
2217              
2218             There is only ever 1 node called C.
2219              
2220             If present, it's mother must be the tree node called C, in which case it will be the first
2221             daughter of C.
2222              
2223             But, it will be absent if the graph is unnamed, as in strict digraph /* no name */ {...}.
2224              
2225             =item o literal
2226              
2227             C is the name of some nodes, with the C key in the attributes having one of these
2228             values:
2229              
2230             =over 4
2231              
2232             =item o {
2233              
2234             Indicates the start of a (sub)graph.
2235              
2236             =item o }
2237              
2238             Indicates the end of a (sub)graph.
2239              
2240             =item o [
2241              
2242             This indicates the start of a set of attributes for a specific class, edge or node, or the
2243             edge attributes at the end of a path.
2244              
2245             The 1st and last daughters will be literals whose attribute C keys are '[' and ']'
2246             respectively.
2247              
2248             Between these 2 nodes will be 1 node for each attribute, as seen above with
2249             C<< edge ["color" = "green",] >>.
2250              
2251             Note: Graphviz allows an abbreviated syntax for setting the attributes of a (sub)graph. So, instead
2252             of needing:
2253              
2254             graph [rankdir = LR]
2255              
2256             You can just use:
2257              
2258             rankdir = LR
2259              
2260             In such cases, these attributes are not surrounded by '[' and ']'.
2261              
2262             =item o ]
2263              
2264             See the previous point.
2265              
2266             =item o digraph_literal
2267              
2268             =item o graph_literal
2269              
2270             =item o strict_literal
2271              
2272             =item o subgraph_literal
2273              
2274             =back
2275              
2276             =item o node_id
2277              
2278             The C of the attributes is the name of the graph, a node, or a subgraph.
2279              
2280             Note: A node name can appear more than once in succession, either as a declaration of the node's
2281             existence and then as the tail of an edge, or, as in this fragment of data/56.gv:
2282              
2283             node [shape=rpromoter colorscheme=rdbu5 color=1 style=filled fontcolor=3]; Hef1a; TRE; UAS;
2284             Hef1aLacOid; Hef1aLacOid [label="Hef1a-LacOid"];
2285              
2286             This is a case where tree compression could be done, but isn't done yet.
2287              
2288             =item o prolog
2289              
2290             There is only ever 1 node called C. This tree node is always present.
2291              
2292             =item o root
2293              
2294             There is only ever 1 node called C. This tree node is always present.
2295              
2296             =back
2297              
2298             =head2 How are nodes, ports and compass points represented in the (above) tree?
2299              
2300             Input contains this fragment of data/17.02.gv:
2301              
2302             node_17_02_1:p11 -> node_17_02_2:p22:s
2303             [
2304             arrowhead = "odiamond";
2305             arrowtail = "odot",
2306             color = red
2307             dir = both;
2308             ];
2309              
2310             The output log contains:
2311              
2312             | |--- node_id. Attributes: {node => "node_17_02_1", port => ":p11", type => "node_id", uid => "29", value => "node_17_02_1:p11"}
2313             | |--- edge_id. Attributes: {name => "directed_edge", node => "->", port => "", uid => "30", value => "->"}
2314             | |--- node_id. Attributes: {node => "node_17_02_2", port => ":p22:s", type => "node_id", uid => "31", value => "node_17_02_2:p22:s"}
2315              
2316             You can see the ports and compass points have been incorporated into the C attribute, and
2317             that is value comes from concatenating the values of the C and C attributes.
2318              
2319             See L and L.
2320              
2321             =head2 How are HTML-like labels handled
2322              
2323             The main grammar (See C<< $self -> bnf >> in the source) is used to hold the definitions of strings
2324             (See C). Thus Marpa, via the main parser C<< $self -> recce >>, is used to identify
2325             all types of strings.
2326              
2327             Then, if the string starts with '>', C<_process_html()> is called, and has a separate grammar
2328             (See C). This in turn uses a separate grammar object (C) and a separate
2329             parser (C). C<_process_html()> traps any I parsing errors, found when lexemes
2330             (text) follows the HTML, and saves the label's value. This method also sets $pos to the first char
2331             after the HTML, so when control returns to the main parser, and the main grammar, the main parser
2332             is not aware of the existence of the HTML, and just keeps on parsing from where the HTML parser
2333             finished.
2334              
2335             =head2 How are comments stored in the tree?
2336              
2337             They aren't stored, they are discarded. And this in turn means rendered C files can't ever
2338             contain them.
2339              
2340             =head2 What is the homepage of Marpa?
2341              
2342             L.
2343              
2344             That page has a long list of links.
2345              
2346             =head2 Why do I get error messages like the following?
2347              
2348             Error: :1: syntax error near line 1
2349             context: digraph >>> Graph <<< {
2350              
2351             Graphviz reserves some words as keywords, meaning they can't be used as an ID, e.g. for the
2352             name of the graph.
2353              
2354             So, don't do this:
2355              
2356             strict graph graph{...}
2357             strict graph Graph{...}
2358             strict graph strict{...}
2359             etc...
2360              
2361             Likewise for non-strict graphs, and digraphs. You can however add double-quotes around such
2362             reserved words:
2363              
2364             strict graph "graph"{...}
2365              
2366             Even better, use a more meaningful name for your graph...
2367              
2368             The keywords are: node, edge, graph, digraph, subgraph and strict. Compass points are not keywords.
2369              
2370             See L in the discussion of the syntax of DOT
2371             for details.
2372              
2373             =head2 Does this package support Unicode in the input C file?
2374              
2375             Yes.
2376              
2377             But you are I to put node names using utf8 glyphs in double-quotes, even though
2378             it is not always necessary.
2379              
2380             See xt/author/data/utf8.*.gv and scripts/test.utf8.sh. In particular, see xt/author/data/utf8.01.gv.
2381              
2382             =head2 How can I switch from Marpa::XS to Marpa::PP?
2383              
2384             Don't use either of them. Use L.
2385              
2386             =head2 If I input x.old.gv and output x.new.gv, should these 2 files be identical?
2387              
2388             Yes - at least in the sense that running C on them will produce the same output files.
2389             This is assuming the default renderer is used.
2390              
2391             See scripts/test.utf8.pl for how to do just that.
2392              
2393             As mentioned just above, comments in input files are discarded, so they can never be in the output
2394             file.
2395              
2396             =head2 How are custom graph attributes handled?
2397              
2398             They are treated like any other attribute. That is, syntax checking is not performed at that level,
2399             but only at the grammatical level. If the construct matches the grammar, this code accepts it.
2400              
2401             See data/32.gv.
2402              
2403             =head2 How are the demo files generated?
2404              
2405             See scripts/generate.demo.sh.
2406              
2407             =head2 How do I run author tests?
2408              
2409             This runs both standard and author tests:
2410              
2411             shell> perl Build.PL; ./Build; ./Build test; ./Build authortest
2412              
2413             There are currently (V 2.00) 91 standard tests, and in xt/author/*.t, 4 pod tests and 355 author
2414             tests. Combined, they take almost 2m 30s to run.
2415              
2416             =head1 See Also
2417              
2418             L. The significance of this module is that during the re-write of
2419             GraphViz2::Marpa V 1 => 2, the string-handling code was built-up step-by-step in
2420             L.
2421              
2422             Later, that code was improved within this module, and will be back-ported into
2423             Marpa::Demo::StringParser. In particular the technique used in _process_html() really should be
2424             back-ported.
2425              
2426             Also, see L for 2 ways the tree built by this module can be processed
2427             to provide analysis of the structure of the graph.
2428              
2429             =head1 Machine-Readable Change Log
2430              
2431             The file Changes was converted into Changelog.ini by L.
2432              
2433             =head1 Version Numbers
2434              
2435             Version numbers < 1.00 represent development versions. From 1.00 up, they are production versions.
2436              
2437             =head1 Thanks
2438              
2439             Many thanks are due to the people who worked on L.
2440              
2441             Jeffrey Kegler wrote Marpa and L.
2442              
2443             And thanks to rns (Ruslan Shvedov) for writing the grammar for double-quoted strings used in
2444             L's scripts/quoted.strings.02.pl. I adapted it to HTML (see
2445             scripts/quoted.strings.05.pl in that module), and then incorporated the grammar into this module.
2446             For details, search for C, C and C in the source of the current
2447             module.
2448              
2449             =head1 Repository
2450              
2451             L
2452              
2453             =head1 Support
2454              
2455             Email the author, or log a bug on RT:
2456              
2457             L.
2458              
2459             =head1 Author
2460              
2461             L was written by Ron Savage Iron@savage.net.auE> in 2012.
2462              
2463             Marpa's homepage: .
2464              
2465             My homepage: L.
2466              
2467             =head1 Copyright
2468              
2469             Australian copyright (c) 2012, Ron Savage.
2470              
2471             All Programs of mine are 'OSI Certified Open Source Software';
2472             you can redistribute them and/or modify them under the terms of
2473             The Perl License, a copy of which is available at:
2474             http://dev.perl.org/licenses/
2475              
2476             =cut