File Coverage

blib/lib/GraphViz2/Marpa.pm

Criterion	Covered	Total	%
statement	33	382	8.6
branch	0	146	0.0
condition	0	14	0.0
subroutine	11	36	30.5
pod	9	10	90.0
total	53	588	9.0

line	stmt	bran	cond	sub	pod	time	code
1							package GraphViz2::Marpa;
2
3	2			2		1728	use strict;
	2					2
	2					44
4	2			2		7	use warnings;
	2					2
	2					55
5	2			2		6	use warnings qw(FATAL utf8); # Fatalize encoding glitches.
	2					4
	2					82
6
7	2			2		767	use GraphViz2::Marpa::Renderer::Graphviz;
	2					6
	2					14
8
9	2			2		63	use Log::Handler;
	2					4
	2					19
10
11	2			2		971	use Marpa::R2;
	2					188942
	2					32
12
13	2			2		72	use Moo;
	2					3
	2					15
14
15	2			2		1823	use Path::Tiny; # For path().
	2					15771
	2					106
16
17	2			2		1333	use Tree::DAG_Node;
	2					26848
	2					23
18
19	2			2		69	use Types::Standard qw/Any ArrayRef Int HashRef Str/;
	2					2
	2					19
20
21	2			2		1449	use Try::Tiny;
	2					4
	2					7648
22
23							has bnf =>
24							(
25							default => sub{return ''},
26							is => 'rw',
27							isa => Any,
28							required => 0,
29							);
30
31							has bnf4html =>
32							(
33							default => sub{return ''},
34							is => 'rw',
35							isa => Any,
36							required => 0,
37							);
38
39							has brace_count =>
40							(
41							default => sub{return 0},
42							is => 'rw',
43							isa => Int,
44							required => 0,
45							);
46
47							has description =>
48							(
49							default => sub{return ''},
50							is => 'rw',
51							isa => Str,
52							required => 0,
53							);
54
55							has grammar =>
56							(
57							default => sub {return ''},
58							is => 'rw',
59							isa => Any,
60							required => 0,
61							);
62
63							has grammar4html =>
64							(
65							default => sub {return ''},
66							is => 'rw',
67							isa => Any,
68							required => 0,
69							);
70
71							has graph_text =>
72							(
73							default => sub{return ''},
74							is => 'rw',
75							isa => Str,
76							required => 0,
77							);
78
79							has input_file =>
80							(
81							default => sub{return ''},
82							is => 'rw',
83							isa => Str,
84							required => 0,
85							);
86
87							has known_events =>
88							(
89							default => sub{return {} },
90							is => 'rw',
91							isa => HashRef,
92							required => 0,
93							);
94
95							has logger =>
96							(
97							default => sub{return undef},
98							is => 'rw',
99							isa => Any,
100							required => 0,
101							);
102
103							has maxlevel =>
104							(
105							default => sub{return 'notice'},
106							is => 'rw',
107							isa => Str,
108							required => 0,
109							);
110
111							has minlevel =>
112							(
113							default => sub{return 'error'},
114							is => 'rw',
115							isa => Str,
116							required => 0,
117							);
118
119							has output_file =>
120							(
121							default => sub{return ''},
122							is => 'rw',
123							isa => Str,
124							required => 0,
125							);
126
127							has recce =>
128							(
129							default => sub{return ''},
130							is => 'rw',
131							isa => Any,
132							required => 0,
133							);
134
135							has recce4html =>
136							(
137							default => sub{return ''},
138							is => 'rw',
139							isa => Any,
140							required => 0,
141							);
142
143							has renderer =>
144							(
145							default => sub{return ''},
146							is => 'rw',
147							isa => Any,
148							required => 0,
149							);
150
151							has stack =>
152							(
153							default => sub{return []},
154							is => 'rw',
155							isa => ArrayRef,
156							required => 0,
157							);
158
159							has trace_terminals =>
160							(
161							default => sub{return 0},
162							is => 'rw',
163							isa => Int,
164							required => 0,
165							);
166
167							has tree =>
168							(
169							default => sub{return ''},
170							is => 'rw',
171							isa => Any,
172							required => 0,
173							);
174
175							has uid =>
176							(
177							default => sub{return 0},
178							is => 'rw',
179							isa => Int,
180							required => 0,
181							);
182
183							our $VERSION = '2.10';
184
185							# ------------------------------------------------
186
187							sub BUILD
188							{
189	0			0	0		my($self) = @_;
190
191	0	0					if (! defined $self -> logger)
192							{
193	0						$self -> logger(Log::Handler -> new);
194	0						$self -> logger -> add
195							(
196							screen =>
197							{
198							maxlevel => $self -> maxlevel,
199							message_layout => '%m',
200							minlevel => $self -> minlevel,
201							}
202							);
203							}
204
205							# Policy: Event names are always the same as the name of the corresponding lexeme.
206
207							$self -> bnf
208							(
209							<<'END_OF_GRAMMAR'
210
211							:default ::= action => [values]
212
213							lexeme default = latm => 1 # Longest Acceptable Token Match.
214
215							# Input stuff.
216
217							:start ::= graph_definition
218
219							graph_definition ::= prolog_tokens graph_statement
220
221							# Prolog stuff.
222
223							prolog_tokens ::= prolog_strict_and_id
224							\| prolog_strict_no_id
225							\| prolog_no_strict_but_id
226							\| prolog_no_strict_no_id
227
228							prolog_strict_and_id ::= strict_token graph_type graph_id_token
229
230							prolog_strict_no_id ::= strict_token graph_type
231
232							prolog_no_strict_but_id ::= graph_type graph_id_token
233
234							prolog_no_strict_no_id ::= graph_type
235
236							strict_token ::= strict_literal
237
238							graph_type ::= digraph_literal
239							\| graph_literal
240
241							graph_id_token ::= node_name
242
243							# Graph stuff.
244
245							graph_statement ::= open_brace statement_list close_brace
246
247							statement_list ::= statement_token*
248
249							statement_token ::= statement statement_terminator
250
251							# Statement stuff.
252
253							statement ::= assignment_statement
254							\| node_statement
255							\| edge_statement
256							\| subgraph_statement
257
258							statement_terminator ::= semicolon_literal
259							statement_terminator ::=
260
261							# Assignment stuff.
262
263							assignment_statements ::= assignment_statement*
264
265							assignment_statement ::= attribute_name ('=') attribute_value
266
267							# Node stuff
268
269							node_statement ::= node_name attribute_statements
270
271							# Attribute stuff.
272
273							attribute_statements ::= attribute_statement*
274
275							attribute_statement ::= open_bracket assignment_statements close_bracket
276
277							# Edge stuff
278
279							edge_statement ::= edge_lhs edge_rhs attribute_statements
280
281							edge_lhs ::= node_statement
282							\| subgraph_statement
283
284							edge_rhs ::= edge_name edge_lhs
285							\| edge_name edge_lhs edge_rhs
286
287							edge_name ::= directed_edge
288							\| undirected_edge
289
290							# Subgraph stuff.
291
292							subgraph_statement ::= subgraph_sequence attribute_statements
293
294							subgraph_sequence ::= subgraph_sub_and_id
295							\| subgraph_sub_no_id
296							\| subgraph_no_sub_no_id
297
298							subgraph_sub_and_id ::= subgraph_prefix subgraph_id_token graph_statement
299
300							subgraph_sub_no_id ::= subgraph_prefix graph_statement
301
302							subgraph_no_sub_no_id ::= graph_statement
303
304							subgraph_prefix ::= subgraph_literal
305
306							subgraph_id_token ::= subgraph_id
307
308							# Lexemes in alphabetical order.
309							# Quoted string handling copied from Marpa::R2's metag.bnf.
310
311							:lexeme ~ attribute_name pause => before event => attribute_name
312							attribute_name ~ string
313
314							:lexeme ~ attribute_value pause => before event => attribute_value
315							attribute_value ~ string
316
317							:lexeme ~ close_brace pause => before event => close_brace
318							close_brace ~ '}'
319
320							:lexeme ~ close_bracket pause => before event => close_bracket
321							close_bracket ~ ']'
322							close_bracket ~ '];'
323
324							:lexeme ~ digraph_literal pause => before event => digraph_literal
325							digraph_literal ~ 'digraph':i
326
327							:lexeme ~ directed_edge pause => before event => directed_edge
328							directed_edge ~ '->'
329
330							double_quoted_char_set ~ double_quoted_char+
331							double_quoted_char ~ escaped_char
332							\| [^\"\x{0A}\x{0B}\x{0C}\x{0D}\x{0085}\x{2028}\x{2029}]
333
334							escaped_char ~ '\' [[:print:]]
335
336							# Use ' here just for the UltraEdit syntax hiliter.
337
338							:lexeme ~ graph_literal pause => before event => graph_literal
339							graph_literal ~ 'graph':i
340
341							html_quoted_char_set ~ [.]+
342
343							:lexeme ~ node_name pause => before event => node_name
344							node_name ~ string
345
346							:lexeme ~ open_brace pause => before event => open_brace
347							open_brace ~ '{'
348
349							:lexeme ~ open_bracket pause => before event => open_bracket
350							open_bracket ~ '['
351
352							semicolon_literal ~ ';'
353
354							:lexeme ~ strict_literal pause => before event => strict_literal
355							strict_literal ~ 'strict':i
356
357							string ~ [\"] double_quoted_char_set [\"]
358							string ~ '<' html_quoted_char_set '>'
359							string ~ unquoted_char_set
360
361							:lexeme ~ subgraph_id pause => before event => subgraph_id
362							subgraph_id ~ string
363
364							:lexeme ~ subgraph_literal pause => before event => subgraph_literal
365							subgraph_literal ~ 'subgraph':i
366
367							:lexeme ~ undirected_edge pause => before event => undirected_edge
368							undirected_edge ~ '--'
369
370							# The '=' is necessary for cases like: 'name=value' in node_1 [name=value].
371
372							unquoted_char_set ~ unquoted_char+
373							unquoted_char ~ escaped_char
374							\| [^\s\[\]={}]
375
376							# Boilerplate.
377
378							:discard ~ separators
379							separators ~ [;,]
380
381							:discard ~ whitespace
382							whitespace ~ [\s]+
383
384							:discard ~
385							:discard ~
386							:discard ~
387
388							# C and C++ comment handling copied from MarpaX::Languages::C::AST.
389
390							~ '/' '/'
391
392							~
393
394							~ [^]
395							~ *
396							~ [^/*]
397							~ [*]+
398							~ [^]
399							~ []
400
401							~ '//'
402							~ [^\n]*
403
404							# Hash comment handling copied from Marpa::R2's metag.bnf.
405
406							~
407							\|
408
409							~ '#'
410
411							~ '#'
412
413							~ *
414
415							~ [\x{0A}\x{0B}\x{0C}\x{0D}\x{2028}\x{2029}]
416
417							~ [^\x{0A}\x{0B}\x{0C}\x{0D}\x{2028}\x{2029}]
418
419							END_OF_GRAMMAR
420	0						);
421
422	0						$self -> grammar
423							(
424							Marpa::R2::Scanless::G -> new
425							({
426							source => \$self -> bnf
427							})
428							);
429
430	0						$self -> recce
431							(
432							Marpa::R2::Scanless::R -> new
433							({
434							grammar => $self -> grammar,
435							ranking_method => 'high_rule_only',
436							trace_terminals => $self -> trace_terminals,
437							})
438							);
439
440	0						my(%event);
441
442	0						for my $line (split(/\n/, $self -> bnf) )
443							{
444	0	0					$event{$1} = 1 if ($line =~ /event\s+=>\s+(\w+)/);
445							}
446
447	0						$self -> known_events(\%event);
448
449							# This grammar was devised by rns (Ruslan Shvedov) for nested, double-quoted strings.
450							# See MarpaX::Demo::SampleScipts and scripts/quoted.strings.05.pl.
451
452	0						$self -> bnf4html
453							(
454							<<'END_OF_GRAMMAR'
455							:default ::= action => [ values ]
456
457							lexeme default = latm => 1
458
459							string ::= '<' quoted '>'
460							quoted ::= item \| quoted item
461							item ::= string \| unquoted
462
463							unquoted ~ [^<>]+
464
465							:discard ~ whitespace
466							whitespace ~ [\s+]
467							END_OF_GRAMMAR
468							);
469
470	0						$self -> grammar4html
471							(
472							Marpa::R2::Scanless::G -> new
473							({
474							source => \$self -> bnf4html
475							})
476							);
477
478							# Since $self -> stack has not been initialized yet,
479							# we can't call _add_daughter() until after this statement.
480
481	0						$self -> tree(Tree::DAG_Node -> new({name => 'root', attributes => {name => 'root', port => '', type => 'root_literal', uid => $self -> uid, value => 'root'} }));
482	0						$self -> stack([$self -> tree -> root]);
483
484	0						for my $name (qw/prolog graph/)
485							{
486	0						$self -> _add_daughter($name, {type => "${name}_literal", value => $name});
487							}
488
489							# The 'prolog' daughter is the parent of all items in the prolog,
490							# so it gets pushed onto the stack.
491							# Later, when 'digraph' or 'graph' is encountered, the 'graph' daughter replaces it.
492
493	0						my(@daughters) = $self -> tree -> daughters;
494	0						my($index) = 0; # 0 => prolog, 1 => graph.
495	0						my($stack) = $self -> stack;
496
497	0						push @$stack, $daughters[$index];
498
499	0						$self -> stack($stack);
500
501							} # End of BUILD.
502
503							# ------------------------------------------------
504
505							sub _add_daughter
506							{
507	0			0			my($self, $name, $attributes) = @_;
508	0						my(@name) = $self -> decode_port_compass($$attributes{value});
509	0						$$attributes{name} = $name[0];
510	0						$$attributes{port} = $name[1];
511	0						$$attributes{uid} = $self -> uid($self -> uid + 1);
512	0						my($node) = Tree::DAG_Node -> new({name => $name, attributes => $attributes});
513	0						my($stack) = $self -> stack;
514
515	0						$$stack[$#$stack] -> add_daughter($node);
516
517							} # End of _add_daughter.
518
519							# ------------------------------------------------
520
521							sub _check4embedded_separator
522							{
523	0			0			my($self, $lexeme, $pos) = @_;
524
525							# Separators are [;,].
526							# The grammar allows them in things, like:
527							# o width=.1,height=.1. Accept floats.
528							# o fontsize=24,fontname="Arial". Accept integers.
529							# o color="slateblue",fontsize=24. Accept "...".
530							# o style=filled,color=white. Accent [A-Za-z]
531
532	0	0					my($numeric) = ($lexeme =~ /^(\d+\|\d+\.\d*\|\.\d+)[;,]/) ? $1 : undef;
533
534	0	0	0				if ($numeric \|\| ($lexeme =~ /^(".*"\|[A-Za-z]+)[;,]/s) )
535							{
536	0						my($s) = $lexeme;
537	0		0				$lexeme = $numeric \|\| $1;
538	0						$pos = $pos - length($s) + length($lexeme);
539							}
540
541	0						return ($lexeme, $pos);
542
543							} # End of _check4embedded_separator.
544
545							# ------------------------------------------------
546
547							sub clean_after
548							{
549	0			0	1		my($self, $s) = @_;
550
551							# The grammar allows things like 'xyz,', so clean them up.
552							# Also, trim spaces and then double-quotes. The reason for doing things in this order
553							# is that the user might have written " X ", so we don't remove the quotes first.
554
555	0	0					substr($s, -1, 1) = '' if (substr($s, -1, 1) eq ',');
556	0						$s =~ s/^\s+//;
557	0						$s =~ s/\s+$//;
558	0						$s =~ s/"(.*)"/$1/;
559
560	0						return $s;
561
562							} # End of clean_after.
563
564							# ------------------------------------------------
565
566							sub clean_before
567							{
568	0			0	1		my($self, $s) = @_;
569
570	0						$s =~ s/\s;\s$//;
571	0						$s =~ s/^\s+//;
572	0						$s =~ s/\s+$//;
573	0						$s =~ s/^(<)\s+/$1/;
574	0						$s =~ s/\s+(>)$/$1/;
575
576	0						return $s;
577
578							} # End of clean_before.
579
580							# ------------------------------------------------
581
582							sub decode_node
583							{
584	0			0	1		my($self, $node) = @_;
585	0						my($attributes) = $node -> attributes;
586
587							return
588							{
589							id => $node -> name,
590							name => $$attributes{name},
591							port => $$attributes{port},
592							type => $$attributes{type},
593							uid => $$attributes{uid},
594							value => $$attributes{value},
595	0						};
596
597							} # End of decode_node.
598
599							# --------------------------------------------------
600
601							sub decode_port_compass
602							{
603	0			0	1		my($self, $name) = @_;
604
605							# Remove :port:compass, if any, from name.
606							# But beware Perl-style node names like 'A::Class'.
607							# The (?=.) means there must be something after the last ':',
608							# which means we don't split 'A:' or 'A::'.
609
610	0						my(@field) = split(/(:(?!:)(?=.))/, $name);
611	0	0					$field[0] = $name if ($#field < 0);
612
613							# Restore Perl module names:
614							# o A: & : & B to A::B.
615							# o A: & : B: & : & C to A::B::C.
616
617	0		0				while ( ($field[0] =~ /:$/) && ($#field >= 2) )
618							{
619	0						splice(@field, 0, 3, "$field[0]:$field[2]");
620							}
621
622							# Restore:
623							# o : & port to :port.
624							# o : & port & : & compass to :port:compass.
625
626	0	0					splice(@field, 1, $#field, join('', @field[1 .. $#field]) ) if ($#field > 0);
627
628	0						my(@result);
629
630	0	0					if ($#field == 0)
631							{
632	0						@result = ($name, '');
633							}
634							else
635							{
636	0						@result = ($field[0], join('', @field[1 .. $#field]) );
637							}
638
639	0						return @result;
640
641							} # End of decode_port_compass.
642
643							# ------------------------------------------------
644
645							sub decode_tree
646							{
647	0			0	1		my($self, $tree) = @_;
648	0						my($prolog) =
649							{
650							digraph => 'digraph',
651							strict => '',
652							};
653
654							# Examine the daughters of the prolog to find the digraph/graph and strict attributes.
655
656	0						my($node_id);
657
658	0						for my $node ( ($tree -> daughters)[0] -> daughters)
659							{
660	0						$node_id = $self -> decode_node($node);
661	0	0					$$prolog{digraph} = 'graph' if ($$node_id{name} eq 'graph');
662	0	0					$$prolog{strict} = 'strict ' if ($$node_id{name} eq 'strict');
663							}
664
665	0						return $prolog;
666
667							} # End of decode_tree.
668
669							# ------------------------------------------------
670
671							sub _decode_result
672							{
673	0			0			my($self, $result) = @_;
674	0						my(@worklist) = $result;
675
676	0						my($obj);
677							my($ref_type);
678	0						my(@stack);
679
680							do
681	0						{
682	0						$obj = shift @worklist;
683	0						$ref_type = ref $obj;
684
685	0	0					if ($ref_type eq 'ARRAY')
		0
		0
686							{
687	0						unshift @worklist, @$obj;
688							}
689							elsif ($ref_type eq 'HASH')
690							{
691	0						push @stack, {%$obj};
692							}
693							elsif ($ref_type)
694							{
695	0						die "Unsupported object type $ref_type\n";
696							}
697							else
698							{
699	0						push @stack, $obj;
700							}
701
702							} while (@worklist);
703
704	0						return join('', @stack);
705
706							} # End of _decode_result.
707
708							# ------------------------------------------------
709
710							sub _dump_stack
711							{
712	0			0			my($self, $caller) = @_;
713
714	0						$self -> log(info => "\tStack @ $caller");
715
716	0						my($node_id);
717
718	0						for my $item (@{$self -> stack})
	0
719							{
720	0						$node_id = $self -> decode_node($item);
721
722	0						$self -> log(info => "\tUid: $$node_id{uid}. Id: $$node_id{id}. Name: $$node_id{name}");
723							}
724
725	0						$self -> log(debug => join("\n", @{$self -> tree -> tree2string}) );
	0
726
727							} # End of _dump_stack.
728
729							# ------------------------------------------------
730
731							sub hashref2string
732							{
733	0			0	1		my($self, $hashref) = @_;
734	0		0				$hashref \|\|= {};
735
736	0						return '{' . join(', ', map{qq\|$_ => "$$hashref{$_}"\|} sort keys %$hashref) . '}';
	0
737
738							} # End of hashref2string.
739
740							# ------------------------------------------------
741
742							sub _identify_lexeme
743							{
744	0			0			my($self, $string, $start, $span, $pos, $lexeme) = @_;
745
746	0						pos($string) = $start + $span;
747	0	0					$string =~ /\G\s*(\S)/ \|\| return;
748	0						my($literal) = $1;
749
750	0						my($type);
751
752	0	0					if (substr($lexeme, 0, 1) eq '{')
753							{
754	0						$pos++;
755
756	0						$span = 1;
757	0						$type = 'open_brace';
758							}
759							else
760							{
761	0	0					$type = ($literal eq '=') ? 'attribute_name' : 'node_name';
762							}
763
764	0						$self -> log(debug => "Disambiguated lexeme (2 of 2) \|$lexeme\| as '$type'. pos: $pos");
765
766	0						return ($type, $span, $pos);
767
768							} # End of _identify_lexeme.
769
770							# ------------------------------------------------
771
772							sub log
773							{
774	0			0	1		my($self, $level, $s) = @_;
775
776	0	0					$self -> logger -> log($level => $s) if ($self -> logger);
777
778							} # End of log.
779
780							# ------------------------------------------------
781
782							sub next_few_chars
783							{
784	0			0	1		my($self, $s, $offset) = @_;
785	0						$s = substr($s, $offset, 20);
786	0						$s =~ tr/\n/ /;
787	0						$s =~ s/^\s+//;
788	0						$s =~ s/\s+$//;
789
790	0						return $s;
791
792							} # End of next_few_chars.
793
794							# ------------------------------------------------
795
796							sub _process
797							{
798	0			0			my($self) = @_;
799	0						my($string) = $self -> clean_before($self -> graph_text);
800	0						my($length) = length $string;
801	0						my($format) = '%-20s %5s %5s %5s %-20s %-20s';
802	0						my($last_event) = '';
803	0						my($prolog_token) = qr/^(?:digraph\|graph\|strict)_literal$/;
804	0						my($pos) = 0;
805	0						my(%class) =
806							(
807							edge => 'class',
808							graph => 'class',
809							node => 'class',
810							);
811
812	0						$self -> log(debug => "Length of input: $length");
813	0						$self -> log(debug => sprintf($format, 'Event', 'Start', 'Span', 'Pos', 'Lexeme', 'Comment') );
814
815	0						my($event_name);
816							my(@fields);
817	0						my($lexeme);
818	0						my($node_name);
819	0						my($original_lexeme);
820	0						my($span, $start, $s, $stack);
821	0						my($temp, $type);
822
823							# We use read()/lexeme_read()/resume() because we pause at each lexeme.
824							# Also, in read(), we use $pos and $length to avoid reading Ruby Slippers tokens (if any).
825
826	0						for
827							(
828							$pos = $self -> recce -> read(\$string, $pos, $length);
829							$pos < $length;
830							$pos = $self -> recce -> resume($pos)
831							)
832							{
833	0						($start, $span) = $self -> recce -> pause_span;
834	0						($event_name, $span, $pos) = $self -> _validate_event($string, $start, $span, $pos);
835	0						$lexeme = $self -> recce -> literal($start, $span);
836	0						$original_lexeme = $lexeme;
837	0						$pos = $self -> recce -> lexeme_read($event_name);
838
839	0	0					die "lexeme_read($event_name) rejected lexeme \|$lexeme\|\n" if (! defined $pos);
840
841							# Special case.
842							# This 'if' matches the start of the big 'if' just below.
843							# It's here so that the log(debug) code is in the right order.
844
845	0	0	0				if ( ($event_name eq 'attribute_name') && (substr($lexeme, 0, 1) eq '[') )
846							{
847	0						$temp = '[';
848
849	0						$self -> log(debug => sprintf($format, 'open_bracket', $start, 1, $pos, $temp, '-') );
850	0						$self -> _process_bracket($temp, 'open_bracket');
851							}
852
853	0						$self -> log(debug => sprintf($format, $event_name, $start, $span, $pos, $lexeme, '-') );
854
855	0	0					if ($event_name eq 'attribute_name')
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
856							{
857							# Special cases.
858
859	0	0					if (substr($lexeme, 0, 1) eq '[')
860							{
861	0						$event_name = 'open_bracket'; # Sets $last_event at the end of the loop.
862	0						substr($lexeme, 0, 1) = '';
863							}
864
865	0						$fields[0] = $self -> clean_after($lexeme);
866							}
867							elsif ($event_name eq 'attribute_value')
868							{
869	0	0					if ($lexeme =~ /^
870							{
871							# Note: We pass in $start and it becomes $pos.
872
873	0						($lexeme, $pos) = $self -> _process_html(\$string, $start);
874
875	0						$self -> _add_daughter('attribute', {type => $fields[0], value => $lexeme});
876							}
877							else
878							{
879							# Special cases.
880							# Handle ']' and '];'.
881
882	0						$temp = '';
883	0	0					substr($lexeme, -1, 1) = '' if (substr($lexeme, -1, 1) eq ';');
884
885	0	0					if (substr($lexeme, -1, 1) eq ']')
886							{
887	0						$temp = ']';
888	0						substr($lexeme, -1, 1) = '';
889							}
890
891	0						($lexeme, $pos) = $self -> _check4embedded_separator($lexeme, $pos);
892	0						$lexeme = $self -> clean_after($lexeme);
893	0						$s = $self -> next_few_chars($string, $pos);
894
895	0	0					$self -> log(debug => "Lexeme \|$original_lexeme\| corrected to be \|$lexeme\|. pos: $pos. Next few char \|$s\|") if ($original_lexeme ne $lexeme);
896	0						$self -> _add_daughter('attribute', {type => $fields[0], value => $lexeme});
897
898	0						@fields = ();
899
900	0	0					if ($temp)
901							{
902	0						$event_name = 'close_bracket'; # Sets $last_event at the end of the loop.
903
904	0						$self -> log(debug => sprintf($format, $event_name, $start, 1, $pos, $temp, 'Adjusted event_name') );
905	0						$self -> _process_bracket($temp, $event_name);
906							}
907							}
908							}
909							elsif ($event_name eq 'close_brace')
910							{
911	0						$self -> _process_brace($lexeme, $event_name);
912							}
913							elsif ($event_name eq 'close_bracket')
914							{
915							# Special case.
916
917	0	0					substr($lexeme, -1, 1) = '' if (substr($lexeme, -1, 1) eq ';');
918	0						$s = $self -> next_few_chars($string, $pos);
919
920	0	0					$self -> log(debug => "Lexeme \|$original_lexeme\| corrected to be \|$lexeme\|. pos: $pos. Next few char \|$s\|") if ($original_lexeme ne $lexeme);
921	0						$self -> _process_bracket($lexeme, $event_name);
922							}
923							elsif ($event_name eq 'directed_edge')
924							{
925	0						$self -> _add_daughter('edge_id', {type => $event_name, value => $lexeme});
926							}
927							elsif ($event_name eq 'node_name')
928							{
929							# Special cases.
930
931	0	0					if (substr($lexeme, 0, 1) eq '{')
932							{
933	0						$event_name = 'open_brace';
934	0						$pos -= (length($lexeme) - 1);
935	0						$lexeme = '{';
936
937	0						$self -> log(debug => sprintf($format, $event_name, $start, $span, $pos, $lexeme, 'Adjusted event_name, lexeme and pos') );
938	0						$self -> _process_brace($lexeme, $event_name);
939
940	0						next;
941							}
942
943							# This 'if' is repeated just below.
944
945	0	0					if (substr($lexeme, -1, 1) eq ';')
946							{
947	0						substr($lexeme, -1, 1) = '';
948
949	0	0					next if ($lexeme eq '');
950							}
951
952	0	0					if (substr($lexeme, -1, 1) eq '}')
953							{
954	0						substr($lexeme, -1, 1) = '';
955	0						$pos -= 1;
956							}
957
958							# This 'if' is repeated just above.
959
960	0	0					if (substr($lexeme, -1, 1) eq ';')
961							{
962	0						substr($lexeme, -1, 1) = '';
963
964	0	0					next if ($lexeme eq '');
965							}
966
967	0	0					$self -> log(debug => "Lexeme \|$original_lexeme\| corrected to be \|$lexeme\|") if ($original_lexeme ne $lexeme);
968
969	0						$lexeme = $self -> clean_after($lexeme);
970
971	0	0					if ($class{lc $lexeme})
972							{
973	0						$lexeme = lc $lexeme;
974	0						$type = $class{$lexeme};
975							}
976							else
977							{
978	0						$type = 'node_id';
979
980							# If this node's mother is the 'graph' node, then it's a graph_id.
981
982	0						$stack = $self -> stack;
983	0	0					$type = 'graph_id' if ($$stack[$#$stack] -> name eq 'graph');
984							}
985
986	0	0					$self -> log(debug => "\|$lexeme\| classified as a $type") if ($original_lexeme ne $lexeme);
987	0						$self -> _add_daughter($type, {type => $type, value => $lexeme});
988							}
989							elsif ($event_name eq 'open_brace')
990							{
991	0						$self -> _process_brace($lexeme, $event_name);
992							}
993							elsif ($event_name eq 'open_bracket')
994							{
995	0						$self -> _process_bracket($lexeme, $event_name);
996							}
997							elsif ($event_name =~ $prolog_token)
998							{
999	0						$self -> _process_prolog_token($event_name, $lexeme);
1000							}
1001							elsif ($event_name eq 'subgraph_id')
1002							{
1003	0						$self -> _add_daughter('subgraph_id', {type => $event_name, value => $lexeme});
1004							}
1005							elsif ($event_name eq 'subgraph_literal')
1006							{
1007	0						$self -> _add_daughter('literal', {type => $event_name, value => 'subgraph'});
1008							}
1009							elsif ($event_name eq 'undirected_edge')
1010							{
1011	0						$self -> _add_daughter('edge_id', {type => $event_name, value => $lexeme});
1012							}
1013
1014							# Step past separators.
1015
1016	0	0					if (substr($string, $pos, 1) =~ /[;,]/)
1017							{
1018	0						$pos++;
1019							}
1020
1021	0						$last_event = $event_name;
1022							}
1023
1024	0	0					if (my $ambiguous_status = $self -> recce -> ambiguous)
1025							{
1026	0						my($terminals) = $self -> recce -> terminals_expected;
1027	0	0					$terminals = ['(None)'] if ($#$terminals < 0);
1028
1029	0						$self -> log(info => 'Terminals expected: ' . join(', ', @$terminals) );
1030	0						$self -> log(info => "Parse is ambiguous. Status: $ambiguous_status");
1031							}
1032
1033							# Return a defined value for success and undef for failure.
1034
1035	0						return $self -> recce -> value;
1036
1037							} # End of _process.
1038
1039							# ------------------------------------------------
1040
1041							sub _process_brace
1042							{
1043	0			0			my($self, $name, $event_name) = @_;
1044
1045							# When the 1st '{' is encountered, the 'graph' daughter of the root
1046							# becomes the parent of all other tree nodes, replacing the 'prolog' daughter,
1047							# which has been the parent of 'strict' and 'digraph' or graph' up to now.
1048
1049	0	0					if ($self -> brace_count == 0)
1050							{
1051	0						my($stack) = $self -> stack;
1052
1053	0						pop @$stack;
1054
1055	0						my(@daughters) = $self -> tree -> daughters;
1056	0						my($index) = 1; # 0 => prolog, 1 => graph.
1057
1058	0						push @$stack, $daughters[$index];
1059
1060	0						$self -> stack($stack);
1061							}
1062
1063							# When a '{' is encountered, the last thing pushed becomes it's parent.
1064							# Likewise, when a '}' is encountered, we pop the stack.
1065
1066	0						my($stack) = $self -> stack;
1067
1068	0	0					if ($name eq '{')
1069							{
1070	0						$self -> brace_count($self -> brace_count + 1);
1071	0						$self -> _add_daughter('literal', {type => $event_name, value => $name});
1072
1073	0						my(@daughters) = $$stack[$#$stack] -> daughters;
1074
1075	0						push @$stack, $daughters[$#daughters];
1076
1077	0						$self -> stack($stack);
1078							}
1079							else
1080							{
1081	0						pop @$stack;
1082
1083	0						$self -> stack($stack);
1084	0						$self -> _add_daughter('literal', {type => $event_name, value => $name});
1085	0						$self -> brace_count($self -> brace_count - 1);
1086							}
1087
1088							} # End of _process_brace.
1089
1090							# ------------------------------------------------
1091
1092							sub _process_bracket
1093							{
1094	0			0			my($self, $name, $event_name) = @_;
1095
1096							# When a '[' is encountered, the last thing pushed becomes it's parent.
1097							# Likewise, if ']' is encountered, we pop the stack.
1098
1099	0						my($stack) = $self -> stack;
1100
1101	0	0					if ($name eq '[')
1102							{
1103	0						my(@daughters) = $$stack[$#$stack] -> daughters;
1104
1105	0						push @$stack, $daughters[$#daughters];
1106
1107	0						$self -> _add_daughter('literal', {type => $event_name, value => $name});
1108							}
1109							else
1110							{
1111	0						$self -> _add_daughter('literal', {type => $event_name, value => $name});
1112
1113	0						pop @$stack;
1114
1115	0						$self -> stack($stack);
1116							}
1117
1118							} # End of _process_bracket.
1119
1120							# ------------------------------------------------
1121
1122							sub _process_html
1123							{
1124	0			0			my($self, $stringref, $pos) = @_;
1125
1126	0						$self -> recce4html
1127							(
1128							Marpa::R2::Scanless::R -> new
1129							({
1130							grammar => $self -> grammar4html,
1131							})
1132							);
1133
1134							# Return 0 for success and 1 for failure.
1135
1136	0						my($candidate) = substr($$stringref, $pos);
1137
1138	0						my($error);
1139							my($html);
1140	0						my($value);
1141
1142							try
1143							{
1144	0			0			$self -> recce4html -> read(\$candidate);
1145
1146	0						$value = $self -> recce4html -> value;
1147
1148	0	0					if (defined $value)
1149							{
1150	0						$html = $self -> _decode_result($$value);
1151							}
1152							else
1153							{
1154	0						$error = 'Parse failed';
1155							}
1156							}
1157							catch
1158							{
1159	0			0			$error = $_;
1160
1161							# But wait! It might be OK after all.
1162							# Actually, this branch always happens, because for valid DOT files,
1163							# there must be something in the input ('lexemes') after the HTML.
1164
1165	0	0					if ($self -> recce4html -> exhausted)
1166							{
1167	0						my(@span) = $self -> recce4html -> last_completed_span('string');
1168	0						$html = substr($candidate, $span[0], $span[1]);
1169							}
1170	0						};
1171
1172	0	0					if (! defined $html)
1173							{
1174	0						die $error;
1175							}
1176
1177	0						$pos += length($html);
1178
1179	0						return ($html, $pos);
1180
1181							} # End of _process_html.
1182
1183							# ------------------------------------------------
1184
1185							sub _process_prolog_token
1186							{
1187	0			0			my($self, $event_name, $value) = @_;
1188
1189	0						$self -> _add_daughter('literal', {type => $event_name, value => lc $value});
1190
1191							# When 'digraph' or 'graph' is encountered, the 'graph' daughter of the root
1192							# becomes the parent of all other tree nodes, replacing the 'prolog' daughter.
1193
1194	0	0					if ($event_name ne 'strict_literal')
1195							{
1196	0						my($stack) = $self -> stack;
1197
1198	0						pop @$stack;
1199
1200	0						my(@daughters) = $self -> tree -> daughters;
1201	0						my($index) = 1; # 0 => prolog, 1 => graph.
1202
1203	0						push @$stack, $daughters[$index];
1204
1205	0						$self -> stack($stack);
1206							}
1207
1208							} # End of _process_prolog_token.
1209
1210							# ------------------------------------------------
1211
1212							sub run
1213							{
1214	0			0	1		my($self) = @_;
1215
1216	0	0					if ($self -> description)
		0
1217							{
1218							# Assume graph is a single line without comments.
1219
1220	0						$self -> graph_text($self -> description);
1221							}
1222							elsif ($self -> input_file)
1223							{
1224							# Quick removal of whole-line C++ and hash comments.
1225							# In the regexp, # is written as \# just for the Ultraedit syntax hiliter.
1226							# Steps:
1227							# o Read file as a set of lines.
1228							# o Look for trailing \ chars, and combine those lines.
1229							# o Combine all remaining lines with ' '.
1230
1231	0						my(@line) = grep{! m!^(?:\#\|//)!} path($self -> input_file) -> lines_utf8;
	0
1232	0						my($last) = $#line; # Store this separately so we can fiddle $i.
1233	0						my($i) = 0;
1234	0						my($buffer) = '';
1235
1236	0						my(@out);
1237
1238							# We don't check the very last line. If it ends with '\\', we definitely want an error.
1239
1240	0						while ($i < $last)
1241							{
1242	0	0					if ($line[$i] =~ /(.*)\\$/)
1243							{
1244	0						$buffer .= $1;
1245							}
1246							else
1247							{
1248	0	0					if (length($buffer) > 0)
1249							{
1250	0						$line[$i] = "$buffer$line[$i]";
1251	0						$buffer = '';
1252							}
1253
1254	0						push @out, $line[$i];
1255							}
1256
1257	0						$i++;
1258							}
1259
1260	0						push @out, $line[$i];
1261
1262	0						$self -> graph_text(join(' ', @out) );
1263
1264	0						$self -> log(debug => "After processing this graph:\n\|" . $self -> graph_text . '\|');
1265							}
1266							else
1267							{
1268	0						die "You must provide a graph using one of -input_file or -description. \n";
1269							}
1270
1271							# Return 0 for success and 1 for failure.
1272
1273	0						my($result) = 0;
1274
1275							try
1276							{
1277	0	0		0			if (defined (my $value = $self -> _process) )
1278							{
1279	0						$self -> log(info => 'Parsed tree:');
1280	0						$self -> log(info => join("\n", @{$self -> tree -> tree2string}) );
	0
1281							}
1282							else
1283							{
1284	0						$result = 1;
1285
1286	0						$self -> log(error => 'Parse failed');
1287							}
1288							}
1289							catch
1290							{
1291	0			0			$result = 1;
1292
1293	0						$self -> log(error => "Parse failed. Error: $_");
1294	0						};
1295
1296	0						$self -> log(info => "Parse result: $result (0 is success)");
1297
1298	0	0					if ($result == 0)
1299							{
1300							# Clean up the stack by popping the root node.
1301
1302	0						my($stack) = $self -> stack;
1303
1304	0						pop @$stack;
1305
1306	0						$self -> stack($stack);
1307	0						$self -> log(debug => 'Brace count: ' . $self -> brace_count . ' (0 is success)');
1308	0						$self -> log(debug => 'Stack size: ' . $#{$self -> stack} . ' (0 is success)');
	0
1309
1310	0						my($output_file) = $self -> output_file;
1311
1312	0	0					if ($output_file)
1313							{
1314	0	0					$self -> renderer
1315							(
1316							GraphViz2::Marpa::Renderer::Graphviz -> new
1317							(
1318							logger => $self -> logger,
1319							maxlevel => $self -> maxlevel,
1320							minlevel => $self -> minlevel,
1321							output_file => $self -> output_file,
1322							tree => $self -> tree,
1323							)
1324							) if (! $self -> renderer);
1325
1326	0						$self -> renderer -> run;
1327							}
1328							}
1329							else
1330							{
1331	0						$self -> log(info => 'The stack and the tree when we died ...');
1332	0						$self -> _dump_stack('_process_brace() pushed { onto stack');
1333							}
1334
1335							# Return 0 for success and 1 for failure.
1336
1337	0						return $result;
1338
1339							} # End of run.
1340
1341							# ------------------------------------------------
1342
1343							sub _validate_event
1344							{
1345	0			0			my($self, $string, $start, $span, $pos) = @_;
1346	0						my(@event) = @{$self -> recce -> events};
	0
1347	0						my($event_count) = scalar @event;
1348	0						my(@event_name) = sort map{$$_[0]} @event;
	0
1349	0						my($event_name) = $event_name[0]; # Default.
1350	0						my($lexeme) = substr($string, $start, $span);
1351	0						my($line, $column) = $self -> recce -> line_column($start);
1352	0						my($literal) = $self -> next_few_chars($string, $start + $span);
1353	0						my($message) = "Location: ($line, $column). Lexeme: \|$lexeme\|. Next few chars: \|$literal\|";
1354	0						$message = "$message. Events: $event_count. Names: ";
1355
1356	0						$self -> log(debug => $message . join(', ', @event_name) . '.');
1357
1358	0						my(%event_name);
1359
1360	0						@event_name{@event_name} = (1) x @event_name;
1361
1362	0						for (@event_name)
1363							{
1364	0	0					die "Unexpected event name '$_'" if (! ${$self -> known_events}{$_});
	0
1365							}
1366
1367	0	0					if ($event_count > 1)
1368							{
1369	0						my(%special_case) =
1370							(
1371							'}' => 'close_brace',
1372							']' => 'close_bracket',
1373							'];' => 'close_bracket',
1374							'->' => 'directed_edge',
1375							'{' => 'open_brace',
1376							'[' => 'open_bracket',
1377							'subgraph' => 'subgraph_literal',
1378							'--' => 'undirected_edge',
1379							);
1380
1381	0	0					if ($special_case{$lexeme})
		0
1382							{
1383	0						$event_name = $special_case{$lexeme};
1384
1385	0						$self -> log(debug => "Disambiguated lexeme (1 of 2) \|$lexeme\| as '$event_name'");
1386							}
1387							elsif ($event_count == 2)
1388							{
1389							# We can handle ambiguous events when they are 'attribute_name' and 'node_name'.
1390							# 'attribute_name' is followed by '=', and 'node_name' is followed by anything else.
1391							# In fact, 'node_name' may be folowed by '[' to indicate the start of its attributes.
1392
1393	0						$event_name = undef;
1394	0						my($expected) = "$event_name[0]!$event_name[1]";
1395
1396	0	0					if ($expected eq 'attribute_name!node_name')
1397							{
1398	0						($event_name, $span, $pos) = $self -> _identify_lexeme($string, $start, $span, $pos, $lexeme);
1399							}
1400
1401	0	0					if (! defined $event_name)
1402							{
1403	0						die "Cannot identify lexeme as either 'attribute_name' or 'node_name'. \n";
1404							}
1405							}
1406							else
1407							{
1408	0						die "The code only handles 1 event at a time, or the pair ('attribute_name', 'node_name'). \n";
1409							}
1410							}
1411
1412	0						return ($event_name, $span, $pos);
1413
1414							} # End of _validate_event.
1415
1416							# ------------------------------------------------
1417
1418							1;
1419
1420							=pod
1421
1422							=head1 NAME
1423
1424							C - A Marpa-based parser for Graphviz C files
1425
1426							=head1 Synopsis
1427
1428							=over 4
1429
1430							=item o Display help
1431
1432							perl scripts/g2m.pl -h
1433
1434							=item o Run the parser
1435
1436							perl scripts/g2m.pl -input_file data/16.gv
1437							perl scripts/g2m.pl -input_file data/16.gv -max info
1438
1439							The L discusses the way the parsed data is stored in RAM.
1440
1441							=item o Run the parser and the default renderer
1442
1443							perl scripts/g2m.pl -input_file data/16.gv -output_file ./16.gv
1444
1445							./16.gv will be the rendered Graphviz C file.
1446
1447							See scripts/test.utf8.sh for comparing the output of running the parser, and C, on all
1448							data/utf8.*.gv files.
1449
1450							=back
1451
1452							See also L.
1453
1454							=head1 Description
1455
1456							L provides a L-based parser for L
1457							graph definitions.
1458
1459							Demo output: L.
1460
1461							L.
1462
1463							Articles:
1464
1465							=over 4
1466
1467							=item o Overview
1468
1469							L
1470
1471							=item o Building the Grammar
1472
1473							L
1474
1475							This module will be re-written, again, now that its BNF has been incorporated into GraphViz2::Marpa,
1476							and patched along the way.
1477
1478							=back
1479
1480							=head1 Modules
1481
1482							=over 4
1483
1484							=item o L
1485
1486							The current module, which documents the set of modules.
1487
1488							It can, optionally, use the default renderer L.
1489
1490							Accepts a L graph definition and builds a corresponding
1491							data structure representing the parsed graph. It can pass that data to the default renderer,
1492							L, which can then render it to a text file ready to be
1493							input to C. Such 'round-tripping', as it's called, is the best way to test a renderer.
1494
1495							See scripts/g2m.pl and scripts/test.utf8.sh.
1496
1497							=item o L
1498
1499							The default renderer. Optionally called by the parser.
1500
1501							=item o L
1502
1503							Auxiliary code, used to help generate the demo page.
1504
1505							=item o L
1506
1507							Auxiliary code, used to help generate the demo page.
1508
1509							=back
1510
1511							=head1 Sample Data
1512
1513							=over 4
1514
1515							=item o Input files: data/*.gv
1516
1517							These are valid L graph definition files.
1518
1519							Some data/*.gv files may contain deliberate mistakes, which may or may not stop production
1520							of output files. They may cause various warning messages to be printed by C when
1521							being rendered.
1522
1523							See L for details.
1524
1525							=item o Output files: html/*.svg
1526
1527							The html/*.svg are L graph definition files output
1528							by scripts/generate.demo.sh.
1529
1530							The round trip shows that the lex/parse process does not lose information along the way, but
1531							comments are discarded..
1532
1533							This set, and the set xt/author/html/*.svg just below, are generated by running
1534							scripts/generate.demo.sh. This in turn runs both scripts/generate.svg.sh and
1535							scripts/generate.demo.pl.
1536
1537							=item o Input files: xt/author/data/*.gv
1538
1539							As for data/*.gv above, but these files are copied from Graphviz V 2.38.0, and are often quite
1540							complex.
1541
1542							See find.candidates.pl, below.
1543
1544							=item o Output files: xt/author/html/*.svg
1545
1546							As for html/*.svg above.
1547
1548							=back
1549
1550							=head1 Scripts
1551
1552							These are in the scripts/ directory.
1553
1554							=over 4
1555
1556							=item o copy.config.pl
1557
1558							For use by the author. Output:
1559
1560							Copied config/.htgraphviz2.marpa.conf to /home/ron/.config/Perl/GraphViz2-Marpa
1561
1562							=item o find.candidates.pl
1563
1564							For use by the author.
1565
1566							This scans an unpacked distro of Graphviz V 2.38.0 and finds *.gv matching these criteria:
1567
1568							=over 4
1569
1570							=item o In ~/Downloads/Graphviz/graphviz-2.38.0/
1571
1572							=item o Not too big
1573
1574							I.e. the file must be < 10,000 bytes in size, otherwise it may take too long to process.
1575
1576							=item o Not a fake
1577
1578							Currently, only ~/Downloads/Graphviz/graphviz-2.38.0/tclpkg/gv/META.gv fits this
1579							definition.
1580
1581							=item o Not already present in xt/author/data
1582
1583							=back
1584
1585							Any candidates found have their names printed, for easy one-at-a-time copying from Graphviz and
1586							testing via scripts/test.1.sh.
1587
1588							=item o find.config.pl
1589
1590							For use by the author. Output:
1591
1592							Using: File::HomeDir -> my_dist_config('GraphViz2-Marpa', '.htgraphviz2.marpa.conf'):
1593							Found: /home/ron/.config/Perl/GraphViz2-Marpa/.htgraphviz2.marpa.conf
1594
1595							=item o g2m.pl
1596
1597							Runs the parser. Try running with -h.
1598
1599							=item o g2m.sh
1600
1601							Simplifies running g2m.pl.
1602
1603							=item o generate.demo.pl
1604
1605							See generate.demo.sh.
1606
1607							=item o generate.demo.sh
1608
1609							For use by the author. Actions:
1610
1611							=over
1612
1613							=item o Runs dot on all data/.gv files; outputs to html/.svg
1614
1615							=item o Runs scripts/generate.demo.pl; outputs to html/index.html
1616
1617							=item o Copies html/* to various places
1618
1619							=back
1620
1621							=item o generate.svg.sh
1622
1623							Convert all data/.svg into html/.svg.
1624
1625							Used by generate.demo.sh.
1626
1627							=item o gv2svg.sh
1628
1629							Converts one data/.gv file into $DR/Perl-modules/html/graphviz2.marpa/.svg.
1630
1631							=item o pod2html.sh
1632
1633							Converts all .pm files to .html, and copies them in my web server's dir structure (in Debian's
1634							RAM disk).
1635
1636							=item o test.1.sh
1637
1638							Runs both the parser and C so I can compare the output.
1639
1640							=item o test.html.pl
1641
1642							Uses method perform_1_test() in L, to test the stand-alone BNF used for
1643							HTML-like tables.
1644
1645							Note: t/test.t also calls perform_1_test().
1646
1647							=item o test.utf8.sh
1648
1649							Tests one data/utf8*.gv file more thoroughly than test.1.sh does.
1650
1651							=back
1652
1653							=head1 Distributions
1654
1655							This module is available as a Unix-style distro (*.tgz).
1656
1657							See L
1658							for help on unpacking and installing distros.
1659
1660							=head1 Installation
1661
1662							Install L as you would for any C module:
1663
1664							Run:
1665
1666							cpanm GraphViz2::Marpa
1667
1668							or run:
1669
1670							sudo cpan GraphViz2::Marpa
1671
1672							or unpack the distro, and then either:
1673
1674							perl Build.PL
1675							./Build
1676							./Build test
1677							sudo ./Build install
1678
1679							or:
1680
1681							perl Makefile.PL
1682							make (or dmake or nmake)
1683							make test
1684							make install
1685
1686							=head1 Constructor and Initialization
1687
1688							C is called as C<< my($g2m) = GraphViz2::Marpa -> new(k1 => v1, k2 => v2, ...) >>.
1689
1690							It returns a new object of type C.
1691
1692							Key-value pairs accepted in the parameter list (see corresponding methods for details
1693							[e.g. L]):
1694
1695							=over 4
1696
1697							=item o description => $graphDescription
1698
1699							Read the L graph definition from the command line.
1700
1701							You are strongly encouraged to surround this string with '...' to protect it from your shell.
1702
1703							See also the 'input_file' option to read the description from a file.
1704
1705							The 'description' option takes precedence over the 'input_file' option.
1706
1707							Default: ''.
1708
1709							=item o input_file => $aDotInputFileName
1710
1711							Read the L graph definition from a file.
1712
1713							See also the 'description' option to read the graph definition from the command line.
1714
1715							The 'description' option takes precedence over the 'input_file' option.
1716
1717							Default: ''.
1718
1719							See the distro for data/*.gv.
1720
1721							=item o logger => $aLoggerObject
1722
1723							Specify a logger compatible with L, for the lexer and parser to use.
1724
1725							Default: A logger of type L which writes to the screen.
1726
1727							To disable logging, just set 'logger' to the empty string (not undef).
1728
1729							=item o maxlevel => $logOption1
1730
1731							This option affects L.
1732
1733							See the L docs.
1734
1735							Default: 'notice'.
1736
1737							=item o minlevel => $logOption2
1738
1739							This option affects L.
1740
1741							See the L docs.
1742
1743							Default: 'error'.
1744
1745							No lower levels are used.
1746
1747							=item o output_file => aRenderedDotInputFileName
1748
1749							Specify the name of a file for the renderer to write.
1750
1751							That is, write the DOT-style graph definition to a file.
1752
1753							When this file and the input file are both run thru C, they should produce identical *.svg
1754							files.
1755
1756							If an output file name is specified, an object of type L is
1757							created and called after the input file has been successfully parsed.
1758
1759							Default: ''.
1760
1761							The default means the renderer is not called.
1762
1763							=item o renderer => aGraphViz2::Marpa::Renderer::Graphviz-compatible object
1764
1765							Specify a renderer for the parser to use.
1766
1767							See C just above.
1768
1769							Default: undef.
1770
1771							If an output file is specified, then an object of type L
1772							is created and its C method is called.
1773
1774							=item o trace_terminals => $Boolean
1775
1776							This allows g2m.pl to control the C setting passed to L.
1777
1778							=back
1779
1780							=head1 Methods
1781
1782							=head2 clean_before($s)
1783
1784							Clean the given string before passing it to Marpa.
1785
1786							=head2 clean_after($s)
1787
1788							Clean the given string before storing it in the tree.
1789
1790							=head2 decode_port_compass($name)
1791
1792							Returns a 2-element array for the given DOT node name.
1793
1794							=over 4
1795
1796							=item o [0]: The node name without any port+compass suffix
1797
1798							=item o [1]: The port+compass suffix (prefixed by ':'), or ''
1799
1800							=back
1801
1802							=head2 decode_node($node)
1803
1804							Returns a hashref of the tree node's name and attributes.
1805
1806							Key => Value pairs:
1807
1808							=over 4
1809
1810							=item o id => $node -> name
1811
1812							This identifies the type of tree node. It has values like 'node_id', 'edge_id', 'literal', etc.
1813							These values come from the grammar.
1814
1815							=item o name => $$attributes{value}
1816
1817							This is the name of the tree node. The value comes from the input stream.
1818
1819							But, if C is 'node_id', then C is the DOT node's name without any port+compass suffix.
1820
1821							=item o port => The DOT node name's port+compass suffix (prefixed by ':'), or ''
1822
1823							This value come from the grammar.
1824
1825							=item o type => $$attributes{type}
1826
1827							This has values like 'node_id', 'open_bracket', etc. In fact, these are the names of lexemes.
1828							These values come from the grammar.
1829
1830							=item o uid => $$attributes{uid}
1831
1832							This is the unique uid of the tree node.
1833
1834							=item o value => $$attributes{value}
1835
1836							This is usually a copy of the C attribute. The value comes from the input stream.
1837
1838							If the C is 'node_id>' then this value will be the DOT node's name including any
1839							port+compass suffix.
1840
1841							=back
1842
1843							=head2 decode_tree($tree)
1844
1845							Returns a hashref of the tree's digraph/graph and strict attributes. These are extracted from the
1846							prolog of the tree, which means $tree must be a whole tree, and not just a node within a whole
1847							tree.
1848
1849							Key => Value pairs:
1850
1851							=over 4
1852
1853							=item o digraph => 'digraph' \|\| 'graph'
1854
1855							Default: 'digraph'.
1856
1857							=item o strict => 'strict' \|\| '' (empty string)
1858
1859							Default: ''.
1860
1861							=back
1862
1863							=head2 description([$graph])
1864
1865							The [] indicate an optional parameter.
1866
1867							Get or set the L graph definition string.
1868
1869							The value supplied by the 'description' option takes precedence over the value read from the
1870							'input_file'.
1871
1872							See also L.
1873
1874							'description' is a parameter to L. See L for details.
1875
1876							=head2 hashref2string($h)
1877
1878							Convert the keys and values of $h to a string, including '{' and '}'.
1879
1880							Defaults to '{}' if $h is not defined.
1881
1882							=head2 input_file([$graph_file_name])
1883
1884							Here, the [] indicate an optional parameter.
1885
1886							Get or set the name of the file to read the L graph
1887							definition from.
1888
1889							The value supplied by the 'description' option takes precedence over the value read from the
1890							'input_file'.
1891
1892							See also the L method.
1893
1894							'input_file' is a parameter to L. See L for details.
1895
1896							=head2 log($level, $s)
1897
1898							If a logger is defined, this logs the message $s at level $level.
1899
1900							=head2 logger([$logger_object])
1901
1902							Here, the [] indicate an optional parameter.
1903
1904							Get or set the logger object.
1905
1906							To disable logging, just set 'logger' to the empty string (not undef), in the call to L.
1907
1908							This logger is passed to other modules.
1909
1910							'logger' is a parameter to L. See L for details.
1911
1912							=head2 maxlevel([$string])
1913
1914							Here, the [] indicate an optional parameter.
1915
1916							Get or set the value used by the logger object.
1917
1918							This option is only used if an object of type L is ceated.
1919							See L.
1920
1921							'maxlevel' is a parameter to L. See L for details.
1922
1923							=head2 minlevel([$string])
1924
1925							Here, the [] indicate an optional parameter.
1926
1927							Get or set the value used by the logger object.
1928
1929							This option is only used if an object of type L is created.
1930							See L.
1931
1932							'minlevel' is a parameter to L. See L for details.
1933
1934							=head2 new()
1935
1936							See L for details on the parameters accepted by L.
1937
1938							=head2 next_few_chars($s, $offset)
1939
1940							Returns a substring of $s, starting at $offset, for use in progress messages.
1941
1942							The default string length returned is 20 characters.
1943
1944							=head2 output_file([$file_name])
1945
1946							Here, the [] indicate an optional parameter.
1947
1948							Get or set the name of the file for the renderer to write.
1949
1950							If an output file name is specified, an object of type L is
1951							created and called after the input file has been successfully parsed.
1952
1953							'output_file' is a parameter to L. See L for details.
1954
1955							=head2 renderer([$renderer_object])
1956
1957							Here, the [] indicate an optional parameter.
1958
1959							Get or set the renderer object.
1960
1961							This renderer is called if C is given a value.
1962
1963							'renderer' is a parameter to L. See L for details.
1964
1965							=head2 run()
1966
1967							This is the only method the caller needs to call. All parameters are supplied to L
1968							(or via other methods before C is called).
1969
1970							See scripts/g2m.pl.
1971
1972							Returns 0 for success and 1 for failure.
1973
1974							=head2 trace_terminals([$Boolean])
1975
1976							Here, the [] indicate an optional parameter.
1977
1978							Get or set the C option passed to L.
1979
1980							=head1 FAQ
1981
1982							=head2 How is the parsed data held in RAM?
1983
1984							The parsed output is held in a tree managed by L.
1985
1986							Here and below, the word C (usually) refers to nodes in this tree, not Graphviz-style nodes.
1987
1988							The root node always looks like this when printed by Tree::DAG_Node's tree2string() method:
1989
1990							root. Attributes: {node=>"root", port=>"", type=>"root_literal", uid=>"0", value=>"root"}
1991
1992							Interpretation:
1993
1994							=over 4
1995
1996							=item o The node name
1997
1998							Here, C.
1999
2000							=item o The node's attributes
2001
2002							Key fields:
2003
2004							=over 4
2005
2006							=item o node
2007
2008							The name of the DOT node without any port+compass suffix. Here C.
2009
2010							=item o port
2011
2012							The port+compass suffix of the DOT node name, if any, else ''. Here the empty string.
2013
2014							=item o type
2015
2016							Here, C.
2017
2018							The type (or name) of the value. The word 'name' is not used to avoid confusion with the name of the
2019							node.
2020
2021							=item o uid
2022
2023							A unique integer assigned to each node. Counts up from 0. Not used.
2024
2025							=item o value
2026
2027							The value of the node.
2028
2029							Here, C.
2030
2031							=back
2032
2033							=back
2034
2035							=head2 Can you explain this tree in more detail?
2036
2037							Sure. Firstly, we examine a sample graph, assuming the module's pre-reqs are installed.
2038							Let's use data/10.gv. Here it is as an
2039							L.
2040
2041							Run one of these:
2042
2043							scripts/g2m.sh data/10.gv -max info
2044							perl -Ilib scripts/g2m.pl -input_file data/10.gv -max info
2045
2046							The former echos the input file to STDOUT before running the latter.
2047
2048							Using C<-max notice>, which is the default, produces no output from C.
2049
2050							This is the input:
2051
2052							STRICT DiGraph graph_10_01
2053							{
2054							node_10_01_1 [fillcolor = red, style = filled]
2055							node_10_01_2 [fillcolor = green, style = filled]
2056
2057							node_10_01_1 -> node_10_01_2 [arrowtail = dot, arrowhead = odot]
2058							}
2059
2060							And this is the output:
2061
2062							Parsed tree:
2063							root. Attributes: {name => "root", port => "", type => "root_literal", uid => "0", value => "root"}
2064							\|--- prolog. Attributes: {name => "prolog", port => "", type => "prolog_literal", uid => "1", value => "prolog"}
2065							\| \|--- literal. Attributes: {name => "strict", port => "", type => "strict_literal", uid => "3", value => "strict"}
2066							\| \|--- literal. Attributes: {name => "digraph", port => "", type => "digraph_literal", uid => "4", value => "digraph"}
2067							\|--- graph. Attributes: {name => "graph", port => "", type => "graph_literal", uid => "2", value => "graph"}
2068							\|--- graph_id. Attributes: {name => "graph_10_01", port => "", type => "graph_id", uid => "5", value => "graph_10_01"}
2069							\|--- literal. Attributes: {name => "{", port => "", type => "open_brace", uid => "6", value => "{"}
2070							\| \|--- node_id. Attributes: {name => "node_10_01_1", port => "", type => "node_id", uid => "7", value => "node_10_01_1"}
2071							\| \| \|--- literal. Attributes: {name => "[", port => "", type => "open_bracket", uid => "8", value => "["}
2072							\| \| \|--- attribute. Attributes: {name => "red", port => "", type => "fillcolor", uid => "9", value => "red"}
2073							\| \| \|--- attribute. Attributes: {name => "filled", port => "", type => "style", uid => "10", value => "filled"}
2074							\| \| \|--- literal. Attributes: {name => "]", port => "", type => "close_bracket", uid => "11", value => "]"}
2075							\| \|--- node_id. Attributes: {name => "node_10_01_2", port => "", type => "node_id", uid => "12", value => "node_10_01_2"}
2076							\| \| \|--- literal. Attributes: {name => "[", port => "", type => "open_bracket", uid => "13", value => "["}
2077							\| \| \|--- attribute. Attributes: {name => "green", port => "", type => "fillcolor", uid => "14", value => "green"}
2078							\| \| \|--- attribute. Attributes: {name => "filled", port => "", type => "style", uid => "15", value => "filled"}
2079							\| \| \|--- literal. Attributes: {name => "]", port => "", type => "close_bracket", uid => "16", value => "]"}
2080							\| \|--- node_id. Attributes: {name => "node_10_01_1", port => "", type => "node_id", uid => "17", value => "node_10_01_1"}
2081							\| \|--- edge_id. Attributes: {name => "->", port => "", type => "directed_edge", uid => "18", value => "->"}
2082							\| \|--- node_id. Attributes: {name => "node_10_01_2", port => "", type => "node_id", uid => "19", value => "node_10_01_2"}
2083							\| \|--- literal. Attributes: {name => "[", port => "", type => "open_bracket", uid => "20", value => "["}
2084							\| \|--- attribute. Attributes: {name => "dot", port => "", type => "arrowtail", uid => "21", value => "dot"}
2085							\| \|--- attribute. Attributes: {name => "odot", port => "", type => "arrowhead", uid => "22", value => "odot"}
2086							\| \|--- literal. Attributes: {name => "]", port => "", type => "close_bracket", uid => "23", value => "]"}
2087							\|--- literal. Attributes: {name => "}", port => "", type => "close_brace", uid => "24", value => "}"}
2088							Parse result: 0 (0 is success)
2089
2090							You can see from this output that words special to Graphviz (e.g. STRICT) are accepted no matter
2091							what case they are in. Such tokens are stored in lower-case.
2092
2093							A more detailed analysis follows.
2094
2095							The C node has 2 daughters:
2096
2097							=over 4
2098
2099							=item o The C sub-tree
2100
2101							The C node is the root of a sub-tree holding everything before the graph's ID, if any.
2102
2103							The node is called C, and its hashref of attributes is
2104							C<< {type => "prolog_literal", uid => "1", value => "prolog"} >>.
2105
2106							It has 1 or 2 daughters. The possibilities are:
2107
2108							=over 4
2109
2110							=item o Input: 'digraph ...'
2111
2112							The 1 daughter is named C, and its attributes are
2113							C<< {type => "digraph_literal", uid => "3", value => "digraph"} >>.
2114
2115							=item o Input: 'graph ...'
2116
2117							The 1 daughter is named C, and its attributes are
2118							C<< {type => "graph_literal", uid => "3", value => "graph"} >>.
2119
2120							=item o Input: 'strict digraph ...'
2121
2122							The 2 daughters are named C, and their attributes are, respectively,
2123							C<< {type => "strict_literal", uid => "3", value => "strict"} >> and
2124							C<< {type => "digraph_literal", uid => "4", value => "digraph"} >>.
2125
2126							=item o Input: 'strict graph ...'
2127
2128							The 2 daughters are named C, and their attributes are, respectively,
2129							C<< {type => "strict_literal", uid => "3", value => "strict"'} >> and
2130							C<< {type => "graph_literal", uid => "4", value => "graph"} >>.
2131
2132							=back
2133
2134							And yes, the graph ID, if any, is under the C node. The reason for this is that for every
2135							subgraph within the graph, the same structure applies: First the (sub)graph ID, then a literal
2136							'{', then that (sub)graph's details, and finally a literal '}'.
2137
2138							=item o The 'graph' sub-tree
2139
2140							The C node is the root of a sub-tree holding everything about the graph, including the graph's
2141							ID, if any.
2142
2143							The node is called C, and its hashref of attributes is
2144							C<< {type => "graph_literal", uid => "2", value => "graph"} >>.
2145
2146							The C node has as many daughters, with their own daughters, as is necessary to hold the
2147							output of parsing the remainder of the input.
2148
2149							In particular, if the input graph has an ID, i.e. the input is of the form 'digraph my_id ...'
2150							(or various versions thereof) then the 1st daughter will be called C, and its attributes
2151							will be C<< {type => "node_id", uid => "5", value => "my_id"} >>.
2152
2153							Futher, the 2nd daughter will be called C, and its attributes will be
2154							C<< {ype => "open_brace", uid => "6", value => "{"} >>. A subsequent daughter will eventually (for a
2155							syntax-free input file, of course) also be called C, and its attributes will be
2156							C<< {type => "close_brace", uid => "#", value => "}"} >>.
2157
2158							Naturally, if the graph has no ID (i.e. input lacks the 'my_id' token) then the uids will differ
2159							slightly.
2160
2161							As mentioned, this pattern of optional (sub)graph id followed by a matching pair of '{', '}' nodes,
2162							is used for all graphs and subgraphs.
2163
2164							In the case the input contains an explicit C, then just before the node representing
2165							'my_id' or '{', there will be another node representing the C token.
2166
2167							It's name will be C, and its attributes will be
2168							C<< {type => "subgraph_literal", uid => "#", value => "subgraph"} >>.
2169
2170							=back
2171
2172							=head2 How many different names can these nodes have?
2173
2174							The list of possible node names follows. You should always examine the C and C keys of
2175							the node's attributes to determine the exact nature of the node.
2176
2177							=over 4
2178
2179							=item o attribute
2180
2181							In this case, the node's attributes contain a hashref like
2182							{type => "arrowhead", uid => "33", value => "odiamond"}, meaning the C field holds the type
2183							(i.e. name) of the attribute, and the 'value' field holds the value of the attribute.
2184
2185							=item o class
2186
2187							This is used when any of C, C, or C appear at the start of the (sub)graph, and
2188							is the mother of the attributes attached to the class. The C of the attribute will be
2189							C, C, or C.
2190
2191							The 1st and last daughters will be literals whose attribute values are '[' and ']' respectively,
2192							and the middle daughter(s) will be nodes of type C (as just discussed).
2193
2194							=item o edge_id
2195
2196							The C of the attribute will be either '--' or '->'.
2197
2198							Thus the C of the edge will be the previous daughter (node or subgraph), and the C of
2199							the edge will be the next.
2200
2201							Samples are:
2202
2203							n1 -> n2
2204							n1 -> {n2}
2205							{n1} -> n2
2206
2207							In a L of nodes, the last node in
2208							the chain may have daughters that are the attributes of each edge in the chain. This is how
2209							Graphviz syntax attaches edge attributes to a path. The class C can also be used to provide
2210							attributes for the edge.
2211
2212							=item o graph
2213
2214							There is only ever 1 node called C. This tree node is always present.
2215
2216							=item o graph_id
2217
2218							There is only ever 1 node called C.
2219
2220							If present, it's mother must be the tree node called C, in which case it will be the first
2221							daughter of C.
2222
2223							But, it will be absent if the graph is unnamed, as in strict digraph /* no name */ {...}.
2224
2225							=item o literal
2226
2227							C is the name of some nodes, with the C key in the attributes having one of these
2228							values:
2229
2230							=over 4
2231
2232							=item o {
2233
2234							Indicates the start of a (sub)graph.
2235
2236							=item o }
2237
2238							Indicates the end of a (sub)graph.
2239
2240							=item o [
2241
2242							This indicates the start of a set of attributes for a specific class, edge or node, or the
2243							edge attributes at the end of a path.
2244
2245							The 1st and last daughters will be literals whose attribute C keys are '[' and ']'
2246							respectively.
2247
2248							Between these 2 nodes will be 1 node for each attribute, as seen above with
2249							C<< edge ["color" = "green",] >>.
2250
2251							Note: Graphviz allows an abbreviated syntax for setting the attributes of a (sub)graph. So, instead
2252							of needing:
2253
2254							graph [rankdir = LR]
2255
2256							You can just use:
2257
2258							rankdir = LR
2259
2260							In such cases, these attributes are not surrounded by '[' and ']'.
2261
2262							=item o ]
2263
2264							See the previous point.
2265
2266							=item o digraph_literal
2267
2268							=item o graph_literal
2269
2270							=item o strict_literal
2271
2272							=item o subgraph_literal
2273
2274							=back
2275
2276							=item o node_id
2277
2278							The C of the attributes is the name of the graph, a node, or a subgraph.
2279
2280							Note: A node name can appear more than once in succession, either as a declaration of the node's
2281							existence and then as the tail of an edge, or, as in this fragment of data/56.gv:
2282
2283							node [shape=rpromoter colorscheme=rdbu5 color=1 style=filled fontcolor=3]; Hef1a; TRE; UAS;
2284							Hef1aLacOid; Hef1aLacOid [label="Hef1a-LacOid"];
2285
2286							This is a case where tree compression could be done, but isn't done yet.
2287
2288							=item o prolog
2289
2290							There is only ever 1 node called C. This tree node is always present.
2291
2292							=item o root
2293
2294							There is only ever 1 node called C. This tree node is always present.
2295
2296							=back
2297
2298							=head2 How are nodes, ports and compass points represented in the (above) tree?
2299
2300							Input contains this fragment of data/17.02.gv:
2301
2302							node_17_02_1:p11 -> node_17_02_2:p22:s
2303							[
2304							arrowhead = "odiamond";
2305							arrowtail = "odot",
2306							color = red
2307							dir = both;
2308							];
2309
2310							The output log contains:
2311
2312							\| \|--- node_id. Attributes: {node => "node_17_02_1", port => ":p11", type => "node_id", uid => "29", value => "node_17_02_1:p11"}
2313							\| \|--- edge_id. Attributes: {name => "directed_edge", node => "->", port => "", uid => "30", value => "->"}
2314							\| \|--- node_id. Attributes: {node => "node_17_02_2", port => ":p22:s", type => "node_id", uid => "31", value => "node_17_02_2:p22:s"}
2315
2316							You can see the ports and compass points have been incorporated into the C attribute, and
2317							that is value comes from concatenating the values of the C and C attributes.
2318
2319							See L and L.
2320
2321							=head2 How are HTML-like labels handled
2322
2323							The main grammar (See C<< $self -> bnf >> in the source) is used to hold the definitions of strings
2324							(See C). Thus Marpa, via the main parser C<< $self -> recce >>, is used to identify
2325							all types of strings.
2326
2327							Then, if the string starts with '>', C<_process_html()> is called, and has a separate grammar
2328							(See C). This in turn uses a separate grammar object (C) and a separate
2329							parser (C). C<_process_html()> traps any I parsing errors, found when lexemes
2330							(text) follows the HTML, and saves the label's value. This method also sets $pos to the first char
2331							after the HTML, so when control returns to the main parser, and the main grammar, the main parser
2332							is not aware of the existence of the HTML, and just keeps on parsing from where the HTML parser
2333							finished.
2334
2335							=head2 How are comments stored in the tree?
2336
2337							They aren't stored, they are discarded. And this in turn means rendered C files can't ever
2338							contain them.
2339
2340							=head2 What is the homepage of Marpa?
2341
2342							L.
2343
2344							That page has a long list of links.
2345
2346							=head2 Why do I get error messages like the following?
2347
2348							Error: :1: syntax error near line 1
2349							context: digraph >>> Graph <<< {
2350
2351							Graphviz reserves some words as keywords, meaning they can't be used as an ID, e.g. for the
2352							name of the graph.
2353
2354							So, don't do this:
2355
2356							strict graph graph{...}
2357							strict graph Graph{...}
2358							strict graph strict{...}
2359							etc...
2360
2361							Likewise for non-strict graphs, and digraphs. You can however add double-quotes around such
2362							reserved words:
2363
2364							strict graph "graph"{...}
2365
2366							Even better, use a more meaningful name for your graph...
2367
2368							The keywords are: node, edge, graph, digraph, subgraph and strict. Compass points are not keywords.
2369
2370							See L in the discussion of the syntax of DOT
2371							for details.
2372
2373							=head2 Does this package support Unicode in the input C file?
2374
2375							Yes.
2376
2377							But you are I to put node names using utf8 glyphs in double-quotes, even though
2378							it is not always necessary.
2379
2380							See xt/author/data/utf8.*.gv and scripts/test.utf8.sh. In particular, see xt/author/data/utf8.01.gv.
2381
2382							=head2 How can I switch from Marpa::XS to Marpa::PP?
2383
2384							Don't use either of them. Use L.
2385
2386							=head2 If I input x.old.gv and output x.new.gv, should these 2 files be identical?
2387
2388							Yes - at least in the sense that running C on them will produce the same output files.
2389							This is assuming the default renderer is used.
2390
2391							See scripts/test.utf8.pl for how to do just that.
2392
2393							As mentioned just above, comments in input files are discarded, so they can never be in the output
2394							file.
2395
2396							=head2 How are custom graph attributes handled?
2397
2398							They are treated like any other attribute. That is, syntax checking is not performed at that level,
2399							but only at the grammatical level. If the construct matches the grammar, this code accepts it.
2400
2401							See data/32.gv.
2402
2403							=head2 How are the demo files generated?
2404
2405							See scripts/generate.demo.sh.
2406
2407							=head2 How do I run author tests?
2408
2409							This runs both standard and author tests:
2410
2411							shell> perl Build.PL; ./Build; ./Build test; ./Build authortest
2412
2413							There are currently (V 2.00) 91 standard tests, and in xt/author/*.t, 4 pod tests and 355 author
2414							tests. Combined, they take almost 2m 30s to run.
2415
2416							=head1 See Also
2417
2418							L. The significance of this module is that during the re-write of
2419							GraphViz2::Marpa V 1 => 2, the string-handling code was built-up step-by-step in
2420							L.
2421
2422							Later, that code was improved within this module, and will be back-ported into
2423							Marpa::Demo::StringParser. In particular the technique used in _process_html() really should be
2424							back-ported.
2425
2426							Also, see L for 2 ways the tree built by this module can be processed
2427							to provide analysis of the structure of the graph.
2428
2429							=head1 Machine-Readable Change Log
2430
2431							The file Changes was converted into Changelog.ini by L.
2432
2433							=head1 Version Numbers
2434
2435							Version numbers < 1.00 represent development versions. From 1.00 up, they are production versions.
2436
2437							=head1 Thanks
2438
2439							Many thanks are due to the people who worked on L.
2440
2441							Jeffrey Kegler wrote Marpa and L.
2442
2443							And thanks to rns (Ruslan Shvedov) for writing the grammar for double-quoted strings used in
2444							L's scripts/quoted.strings.02.pl. I adapted it to HTML (see
2445							scripts/quoted.strings.05.pl in that module), and then incorporated the grammar into this module.
2446							For details, search for C, C and C in the source of the current
2447							module.
2448
2449							=head1 Repository
2450
2451							L
2452
2453							=head1 Support
2454
2455							Email the author, or log a bug on RT:
2456
2457							L.
2458
2459							=head1 Author
2460
2461							L was written by Ron Savage Iron@savage.net.auE> in 2012.
2462
2463							Marpa's homepage: .
2464
2465							My homepage: L.
2466
2467							=head1 Copyright
2468
2469							Australian copyright (c) 2012, Ron Savage.
2470
2471							All Programs of mine are 'OSI Certified Open Source Software';
2472							you can redistribute them and/or modify them under the terms of
2473							The Perl License, a copy of which is available at:
2474							http://dev.perl.org/licenses/
2475
2476							=cut