File Coverage

blib/lib/MarpaX/Demo/StringParser.pm

Criterion	Covered	Total	%
statement	201	250	80.4
branch	50	74	67.5
condition	7	15	46.6
subroutine	28	30	93.3
pod	4	5	80.0
total	290	374	77.5

line	stmt	bran	cond	sub	pod	time	code
1							package MarpaX::Demo::StringParser;
2
3	1			1		61055	use strict;
	1					2
	1					38
4	1			1		5	use utf8;
	1					2
	1					8
5	1			1		17	use warnings;
	1					5
	1					35
6	1			1		3	use warnings qw(FATAL utf8); # Fatalize encoding glitches.
	1					2
	1					36
7
8	1			1		631	use File::Slurp; # For read_file().
	1					11978
	1					79
9
10	1			1		1024	use Log::Handler;
	1					34507
	1					8
11
12	1			1		676	use Marpa::R2;
	1					132905
	1					17
13
14	1			1		956	use Moo;
	1					12412
	1					7
15
16	1			1		2138	use Set::Array;
	1					11707
	1					25
17
18	1			1		781	use Text::CSV;
	1					10965
	1					9
19
20	1			1		1309	use Tree::DAG_Node;
	1					16783
	1					24
21
22	1			1		1070	use Types::Standard qw/Any ArrayRef HashRef Int Str/;
	1					99144
	1					28
23
24	1			1		1743	use Try::Tiny;
	1					19
	1					5309
25
26							has bnf =>
27							(
28							default => sub{return ''},
29							is => 'rw',
30							isa => Any,
31							required => 0,
32							);
33
34							has description =>
35							(
36							default => sub{return ''},
37							is => 'rw',
38							isa => Str,
39							required => 0,
40							);
41
42							has grammar =>
43							(
44							default => sub{return ''},
45							is => 'rw',
46							isa => Any, # 'Marpa::R2::Scanless::G'.
47							required => 0,
48							);
49
50							has graph_text =>
51							(
52							default => sub{return ''},
53							is => 'rw',
54							isa => Str,
55							required => 0,
56							);
57
58							has input_file =>
59							(
60							default => sub{return ''},
61							is => 'rw',
62							isa => Str,
63							required => 0,
64							);
65
66							has logger =>
67							(
68							default => sub{return undef},
69							is => 'rw',
70							isa => Any,
71							required => 0,
72							);
73
74							has maxlevel =>
75							(
76							default => sub{return 'notice'},
77							is => 'rw',
78							isa => Str,
79							required => 0,
80							);
81
82							has minlevel =>
83							(
84							default => sub{return 'error'},
85							is => 'rw',
86							isa => Str,
87							required => 0,
88							);
89
90							has known_events =>
91							(
92							default => sub{return {} },
93							is => 'rw',
94							isa => HashRef,
95							required => 0,
96							);
97
98							has recce =>
99							(
100							default => sub{return ''},
101							is => 'rw',
102							isa => Any, # 'Marpa::R2::Scanless::R'.
103							required => 0,
104							);
105
106							has stack =>
107							(
108							default => sub{return []},
109							is => 'rw',
110							isa => ArrayRef,
111							required => 0,
112							);
113
114							has tree =>
115							(
116							default => sub{return ''},
117							is => 'rw',
118							isa => Any,
119							required => 0,
120							);
121
122							has uid =>
123							(
124							default => sub{return 0},
125							is => 'rw',
126							isa => Int,
127							required => 0,
128							);
129
130							our $VERSION = '2.03';
131
132							# --------------------------------------------------
133							# For accepted and rejected by Marpa, see
134							# Marpa-R2-2.094000/lib/Marpa/R2/meta/metag.bnf.
135
136							sub BUILD
137							{
138	6			6	0	318	my($self) = @_;
139
140	6	50				170	if (! defined $self -> logger)
141							{
142	6					1200	$self -> logger(Log::Handler -> new);
143	6					1319	$self -> logger -> add
144							(
145							screen =>
146							{
147							alias => 'logger',
148							maxlevel => $self -> maxlevel,
149							message_layout => '%m',
150							minlevel => $self -> minlevel,
151							}
152							);
153							}
154
155							# Policy: Event names are always the same as the name of the corresponding lexeme.
156
157							$self -> bnf
158							(
159							<<'END_OF_GRAMMAR'
160
161							:default ::= action => [values]
162
163							lexeme default = latm => 1 # Longest Acceptable Token Match.
164
165							:start ::= graph_grammar
166
167							graph_grammar ::= graph_definition
168
169							# Graph stuff.
170
171							graph_definition ::= node_definition
172							\| edge_definition
173							# Node stuff
174
175							node_definition ::= node_statement
176							\| node_statement graph_definition
177
178							node_statement ::= node_name_token
179							\| node_name_token attribute_definition
180							\| node_statement (',') node_statement
181
182							node_name_token ::= start_node end_node # Allow for the anonymous node.
183							\| start_node node_name end_node
184
185							# Edge stuff
186
187							edge_definition ::= edge_statement
188							\| edge_statement graph_definition
189
190							edge_statement ::= edge_name
191							\| edge_name attribute_definition
192							\| edge_statement (',') edge_statement
193
194							edge_name ::= directed_edge
195							\| undirected_edge
196
197							# Attribute stuff.
198
199							attribute_definition ::= attribute_statement+
200
201							attribute_statement ::= start_attributes string_token_set end_attributes
202
203							string_token_set ::= string_token_pair+
204
205							string_token_pair ::= literal_label
206							\| attribute_name (':') attribute_value
207
208							# Lexemes in alphabetical order.
209
210							:lexeme ~ attribute_name pause => before event => attribute_name
211
212							attribute_name ~ string_char_set+
213
214							:lexeme ~ attribute_value pause => before event => attribute_value
215
216							attribute_value ~ string_char_set+
217
218							:lexeme ~ directed_edge pause => before event => directed_edge priority => 2
219							directed_edge ~ '->'
220
221							:lexeme ~ end_attributes pause => before event => end_attributes priority => 1
222							end_attributes ~ '}'
223
224							:lexeme ~ end_node pause => before event => end_node priority => 1
225							end_node ~ ']'
226
227							escaped_char ~ '\' [[:print:]]
228
229							# Use ' here just for the UltraEdit syntax hiliter.
230
231							:lexeme ~ literal_label pause => before event => literal_label priority => 1
232							literal_label ~ 'label'
233
234							:lexeme ~ node_name pause => before event => node_name
235
236							node_name ~ string_char_set+
237
238							:lexeme ~ start_attributes pause => before event => start_attributes
239							start_attributes ~ '{'
240
241							:lexeme ~ start_node pause => before event => start_node
242							start_node ~ '['
243
244							string_char_set ~ escaped_char
245							\| [^;:}\]] # Neither a separator [;:] nor a terminator [}\]].
246
247							:lexeme ~ undirected_edge pause => before event => undirected_edge priority => 2
248							undirected_edge ~ '--'
249
250							# Boilerplate.
251
252							:discard ~ whitespace
253							whitespace ~ [\s]+
254
255							END_OF_GRAMMAR
256	6					17361	);
257
258	6					1497	$self -> grammar
259							(
260							Marpa::R2::Scanless::G -> new
261							({
262							source => \$self -> bnf
263							})
264							);
265
266	6					1099902	$self -> recce
267							(
268							Marpa::R2::Scanless::R -> new
269							({
270							grammar => $self -> grammar,
271							})
272							);
273
274	6					6682	my(%event);
275
276	6					258	for my $line (split(/\n/, $self -> bnf) )
277							{
278	564	100				3341	$event{$1} = 1 if ($line =~ /event\s+=>\s+(\w+)/);
279							}
280
281	6					326	$self -> known_events(\%event);
282
283							# Since $self -> tree has not been initialized yet,
284							# we can't call our _add_daughter() until after this statement.
285
286	6					1291	$self -> tree(Tree::DAG_Node -> new({name => 'root', attributes => {uid => 0} }));
287	6					2178	$self -> stack([$self -> tree -> root]);
288
289							# This cut-down version of Graph::Easy::Marpa has no prolog (unlike Graph::Marpa).
290							# So, all tokens in the input are descended from the 'graph' node.
291
292	6					1489	for my $name (qw/prolog graph/)
293							{
294	12					789	$self -> _add_daughter($name, {});
295							}
296
297							# The 'prolog' daughter is the parent of all items in the prolog, but is not used here.
298							# It is used in GraphViz2::Marpa;
299							# The 'graph' daughter gets pushed onto the stack because in this module's grammar,
300							# all items belong to the graph.
301
302	6					1780	my(@daughters) = $self -> tree -> daughters;
303	6					134	my($index) = 1; # 0 => prolog, 1 => graph.
304	6					175	my($stack) = $self -> stack;
305
306	6					63	push @$stack, $daughters[$index];
307
308	6					298	$self -> stack($stack);
309
310							} # End of BUILD.
311
312							# ------------------------------------------------
313
314							sub _add_daughter
315							{
316	62			62		164	my($self, $name, $attributes) = @_;
317	62					2594	$$attributes{uid} = $self -> uid($self -> uid + 1);
318	62					4477	my($node) = Tree::DAG_Node -> new({name => $name, attributes => $attributes});
319	62					9330	my($stack) = $self -> stack;
320
321	62					982	$$stack[$#$stack] -> add_daughter($node);
322
323							} # End of _add_daughter.
324
325							# --------------------------------------------------
326
327							sub clean_after
328							{
329	42			42	1	287	my($self, $s) = @_;
330
331	42					408	$s =~ s/^\s+//;
332	42					184	$s =~ s/\s+$//;
333	42					229	$s =~ s/^([\"\'])(.*)\1$/$2/; # The backslashes are just for the UltraEdit syntax hiliter.
334
335	42					188	return $s;
336
337							} # End of clean_after.
338
339							# --------------------------------------------------
340
341							sub clean_before
342							{
343	6			6	1	78	my($self, $s) = @_;
344
345	6					117	$s =~ s/\s;\s$//;
346	6					23	$s =~ s/^\s+//;
347	6					325	$s =~ s/\s+$//;
348	6					96	$s =~ s/^(<)\s+/$1/;
349	6					28	$s =~ s/\s+(>)$/$1/;
350
351	6					24	return $s;
352
353							} # End of clean_before.
354
355							# --------------------------------------------------
356
357							sub log
358							{
359	92			92	1	19570	my($self, $level, $s) = @_;
360
361	92	50				3826	$self -> logger -> log($level => $s) if ($self -> logger);
362
363							} # End of log.
364
365							# --------------------------------------------------
366
367							sub _process
368							{
369	6			6		16	my($self) = @_;
370	6					292	my($string) = $self -> clean_before($self -> graph_text);
371	6					27	my($length) = length $string;
372	6					18	my($last_event) = '';
373	6					14	my($format) = '%-20s %5s %5s %5s %-s';
374
375	6					103	$self -> log(debug => sprintf($format, 'Event', 'Start', 'Span', 'Pos', 'Lexeme') );
376
377							# We use read()/lexeme_read()/resume() because we pause at each lexeme.
378
379	6					692	my($event_name);
380							my(@fields);
381	0					0	my($lexeme, $literal);
382	0					0	my($span, $start);
383
384	6					205	for
385							(
386							my $pos = $self -> recce -> read(\$string);
387							$pos < $length;
388							$pos = $self -> recce -> resume($pos)
389							)
390							{
391	74					40094	$event_name = $self -> _validate_event;
392	74					2760	($start, $span) = $self -> recce -> pause_span;
393	74					3537	$pos = $self -> recce -> lexeme_read($event_name);
394	74					15747	$literal = substr($string, $start, $pos - $start);
395	74					16371	$lexeme = $self -> recce -> literal($start, $span);
396
397	74					3728	$self -> log(debug => sprintf($format, $event_name, $start, $span, $pos, $lexeme) );
398
399	74	100				8056	if ($event_name eq 'attribute_name')
		100
		100
		100
		100
		100
		100
		100
		50
		0
400							{
401	11					54	$fields[0] = $self -> clean_after($literal);
402							}
403							elsif ($event_name eq 'attribute_value')
404							{
405	11					58	$literal = $self -> clean_after($literal);
406
407	11					108	$self -> _add_daughter($fields[0], {value => $literal});
408
409	11					1720	@fields = ();
410
411							# Skip the separator.
412
413	11		100			209	while ( ($pos < (length($string) - 1) ) && (substr($string, $pos, 1) =~ /[\s;]/) ) { $pos++ };
	18					136
414							}
415							elsif ($event_name eq 'directed_edge')
416							{
417	5					26	$self -> _add_daughter('edge_id', {value => $self -> clean_after($literal)});
418							}
419							elsif ($event_name eq 'end_attributes')
420							{
421	9					46	$self -> _process_brace($literal);
422							}
423							elsif ($event_name eq 'end_node')
424							{
425							# Is this the anonymous node?
426
427	7	100				44	if ($last_event eq 'start_node')
428							{
429	1					10	$self -> _add_daughter('node_id', {value => ''});
430							}
431							}
432							elsif ($event_name eq 'literal_label')
433							{
434	9					30	push @fields, $literal;
435
436	9					296	$pos = $self -> _process_label($self -> recce, \@fields, $string, $length, $pos);
437	9					31	@fields = ();
438							}
439							elsif ($event_name eq 'node_name')
440							{
441	6					37	$literal = $self -> clean_after($literal);
442
443	6					53	$self -> _add_daughter('node_id', {value => $literal});
444							}
445							elsif ($event_name eq 'start_attributes')
446							{
447	9					39	$self -> _process_brace($literal);
448							}
449							elsif ($event_name eq 'start_node')
450							{
451							# Do nothing.
452							}
453							elsif ($event_name eq 'undirected_edge')
454							{
455	0					0	$self -> _add_daughter('edge_id', {value => $self -> clean_after($literal)});
456							}
457
458	74					7348	$last_event = $event_name;
459							}
460
461	6	50				1815	if ($self -> recce -> ambiguity_metric > 1)
462							{
463	0					0	$self -> log(notice => 'Ambiguous parse');
464							}
465
466	6	50				1060	if (my $ambiguous_status = $self -> recce -> ambiguous)
467							{
468	0					0	$self -> log(notice => "Parse is ambiguous: $ambiguous_status.");
469							}
470
471							# Return a defined value for success and undef for failure.
472
473	6					387	return $self -> recce -> value;
474
475							} # End of _process.
476
477							# --------------------------------------------------
478
479							sub _process_brace
480							{
481	18			18		52	my($self, $name) = @_;
482
483							# When a '{' is encountered, the last thing pushed becomes it's parent.
484							# Likewise, if '}' is encountered, we pop the stack.
485
486	18					634	my($stack) = $self -> stack;
487
488	18	100				197	if ($name eq '{')
489							{
490	9					1195	my(@daughters) = $$stack[$#$stack] -> daughters;
491
492	9					145	push @$stack, $daughters[$#daughters];
493
494	9					66	$self -> _process_token('literal', $name);
495							}
496							else
497							{
498	9					58	$self -> _process_token('literal', $name);
499
500	9					1328	pop @$stack;
501
502	9					311	$self -> stack($stack);
503							}
504
505							} # End of _process_brace.
506
507							# ------------------------------------------------
508
509							sub _process_html
510							{
511	1			1		5	my($self, $recce, $fields, $string, $length, $pos) = @_;
512
513	1					4	my($bracket_count) = 0;
514	1					5	my($open_bracket) = '<';
515	1					6	my($close_bracket) = '>';
516	1					3	my($previous_char) = '';
517	1					3	my($label) = '';
518
519	1					3	my($char);
520
521	1					7	while ($pos < $length)
522							{
523	68					80	$char = substr($string, $pos, 1);
524	68					73	$label .= $char;
525
526	68	50				341	if ($previous_char eq '\\')
		100
		100
527							{
528							}
529							elsif ($char eq $open_bracket)
530							{
531	7					10	$bracket_count++;
532							}
533							elsif ($char eq $close_bracket)
534							{
535	7					10	$bracket_count--;
536
537	7	100				19	if ($bracket_count == 0)
538							{
539	1					4	$pos++;
540
541	1					4	last;
542							}
543							}
544
545	67					82	$previous_char = $char;
546
547	67					112	$pos++;
548							}
549
550	1					10	$label = $self -> clean_after($label);
551
552	1	50	33			32	if ( ($label =~ /^$/) )
553							{
554	0					0	my($line, $column) = $recce -> line_column;
555
556	0					0	die "Mismatched <> in HTML !$label! at (line, column) = ($line, $column)\n";
557							}
558
559	1					5	push @$fields, $label;
560
561	1					7	return $self -> _skip_separator($string, $length, $pos, ';');
562
563							} # End of _process_html.
564
565							# ------------------------------------------------
566
567							sub _process_label
568							{
569	9			9		117	my($self, $recce, $fields, $string, $length, $pos) = @_;
570
571	9					45	$pos = $self -> _skip_separator($string, $length, $pos, ':');
572
573	9	50				36	return $pos if ($pos >= $length);
574
575	9					35	my($char) = substr($string, $pos, 1);
576
577	9	50				68	if ($char eq "'")
		50
		100
578							{
579	0					0	$pos = $self -> _process_quotes($recce, $fields, $string, $length, $pos, "'");
580							}
581							elsif ($char eq '"')
582							{
583	0					0	$pos = $self -> _process_quotes($recce, $fields, $string, $length, $pos, '"');
584							}
585							elsif ($char eq '<')
586							{
587	1					11	$pos = $self -> _process_html($recce, $fields, $string, $length, $pos);
588							}
589							else
590							{
591	8					44	$pos = $self -> _process_unquoted($recce, $fields, $string, $length, $pos);
592							}
593
594	9					53	for (my $i = 0; $i < $#$fields; $i += 2)
595							{
596	9					95	$self -> _add_daughter($$fields[$i], {value => $$fields[$i + 1]});
597							}
598
599	9					2275	return $pos;
600
601							} # End of _process_label.
602
603							# ------------------------------------------------
604
605							sub _process_quotes
606							{
607	0			0		0	my($self, $recce, $fields, $string, $length, $pos, $terminator) = @_;
608
609	0					0	my($previous_char) = '';
610	0					0	my($label) = '';
611	0					0	my($quote_count) = 0;
612
613	0					0	my($char);
614
615	0					0	while ($pos < $length)
616							{
617	0					0	$char = substr($string, $pos, 1);
618
619	0	0	0			0	if ( ($previous_char ne '\\') && ($char eq $terminator) )
620							{
621	0					0	$quote_count++;
622
623	0	0				0	if ($quote_count == 2)
624							{
625	0					0	$label .= $char;
626
627	0					0	$pos++;
628
629	0					0	last;
630							}
631							}
632
633	0					0	$label .= $char;
634	0					0	$previous_char = $char;
635
636	0					0	$pos++;
637							}
638
639							# Don't call clean_after, since it removes the ' and " we are about to check.
640
641	0					0	$label =~ s/^\s+//;
642	0					0	$label =~ s/\s+$//;
643
644	0	0	0			0	if ( ($label =~ /^['"]/) && ($label !~ /^(['"]).*\1$/) )
645							{
646							# Use ' and " here just for the UltraEdit syntax hiliter.
647
648	0					0	my($line, $column) = $recce -> line_column;
649
650	0					0	die "Mismatched quotes in label !$label! at (line, column) = ($line, $column)\n";
651							}
652
653	0					0	$label = $self -> clean_after($label);
654
655	0					0	push @$fields, $label;
656
657	0					0	$self -> log(debug => "_process_quotes(). Label !$label!");
658
659	0					0	return $self -> _skip_separator($string, $length, $pos, ';');
660
661							} # End of _process_quotes.
662
663							# --------------------------------------------------
664
665							sub _process_token
666							{
667	18			18		52	my($self, $name, $value) = @_;
668
669	18					140	$self -> _add_daughter($name, {value => $value});
670
671							} # End of _process_token.
672
673							# ------------------------------------------------
674
675							sub _process_unquoted
676							{
677	8			8		25	my($self, $recce, $fields, $string, $length, $pos) = @_;
678	8					52	my($re) = qr/[;}]/;
679
680	8	50				89	if (substr($string, $pos, 1) =~ $re)
681							{
682	0					0	push @$fields, '';
683
684	0					0	return $pos;
685							}
686
687	8					24	my($previous_char) = '';
688	8					20	my($label) = '';
689	8					15	my($quote_count) = 0;
690
691	8					13	my($char);
692
693	8					30	while ($pos < $length)
694							{
695	99					159	$char = substr($string, $pos, 1);
696
697	99	100	100			600	last if ( ($previous_char ne '\\') && ($char =~ $re) );
698
699	91					104	$label .= $char;
700	91					201	$previous_char = $char;
701
702	91					163	$pos++;
703							}
704
705	8					42	$label = $self -> clean_after($label);
706
707	8					28	push @$fields, $label;
708
709	8					35	return $self -> _skip_separator($string, $length, $pos, ';');
710
711							} # End of _process_unquoted.
712
713							# --------------------------------------------------
714
715							sub run
716							{
717	6			6	1	32407	my($self) = @_;
718
719	6	50				363	if ($self -> description)
		50
720							{
721							# Assume graph is a single line without comments.
722
723	0					0	$self -> graph_text($self -> description);
724							}
725							elsif ($self -> input_file)
726							{
727							# Quick removal of whole-line C++ and hash comments.
728
729	6					2791	$self -> graph_text(join(' ', grep{! m!^(?:#\|//)!} read_file($self -> input_file, binmode => ':encoding(utf-8)') ) );
	19					23198
730							}
731							else
732							{
733	0					0	die "Error: You must provide a graph using one of -input_file or -description\n";
734							}
735
736							# Return 0 for success and 1 for failure.
737
738	6					1275	my($result) = 0;
739
740							try
741							{
742	6	50		6		533	if (defined (my $value = $self -> _process) )
743							{
744	6					76251	$self -> log(info => join("\n", @{$self -> tree -> tree2string}) );
	6					1781
745							}
746							else
747							{
748	0					0	$result = 1;
749
750	0					0	$self -> log(error => 'Parse failed');
751							}
752							}
753							catch
754							{
755	0			0		0	$result = 1;
756
757	0					0	$self -> log(error => "Parse failed. Error: $_");
758	6					126	};
759
760	6					10914	$self -> log(info => "Parse result: $result (0 is success)");
761
762							# Return 0 for success and 1 for failure.
763
764	6					154198	return $result;
765
766							} # End of run.
767
768							# ------------------------------------------------
769
770							sub _skip_separator
771							{
772	18			18		46	my($self, $string, $length, $pos, $separator) = @_;
773	18					926	my($re) = qr/[\s$separator]/;
774
775	18					133	my($char);
776
777	18					167	while ($pos < $length - 1)
778							{
779	42					218	$char = substr($string, $pos, 1);
780
781	42	100				422	last if ($char !~ $re);
782
783	26					65	$pos++;
784							}
785
786	18					210	return $pos;
787
788							} # End of _skip_separator.
789
790							# ------------------------------------------------
791
792							sub _validate_event
793							{
794	74			74		261	my($self) = @_;
795	74					123	my(@event) = @{$self -> recce -> events};
	74					6508
796	74					1572	my($event_count) = scalar @event;
797
798	74	50				371	if ($event_count > 1)
799							{
800	0					0	$self -> log(error => "Events triggered: $event_count (should be 1). Names: " . join(', ', map{${$_}[0]} @event) . '.');
	0					0
	0					0
801
802	0					0	die "The code only handles 1 event at a time\n";
803							}
804
805	74					109	my($event_name) = ${$event[0]}[0];
	74					419
806
807	74	50				109	if (! ${$self -> known_events}{$event_name})
	74					2705
808							{
809	0					0	my($msg) = "Unexpected event name '$event_name'";
810
811	0					0	$self -> log(error => $msg);
812
813	0					0	die "$msg\n";
814							}
815
816	74					1174	return $event_name;
817
818							} # End of _validate_event.
819
820							# --------------------------------------------------
821
822							1;
823
824							=pod
825
826							=head1 NAME
827
828							L - A Marpa-based parser for the DASH language
829
830							=head1 Synopsis
831
832							Typical usage:
833
834							perl -Ilib scripts/parse.pl -de '[node]{color:blue; label: "Node name"}' -max info
835							perl -Ilib scripts/parse.pl -i data/node.04.dash -max info
836
837							You can use scripts/parse.sh to simplify this process, but it assumes you're input file is in data/:
838
839							scripts/parse.sh node.04 -max info
840
841							See L for sample
842							input and output.
843
844							Also, see L
845							based on this module.
846
847							=head1 Description
848
849							This module implements a parser for L (below), a wrapper language around Graphviz's
850							L. That is, the module is a pre-processor for the
851							DOT language.
852
853							Specifically, this module demonstrates how to use L's capabilities to have Marpa
854							repeatedly pass control back to code in your own module, during the parse, to handle those cases
855							where you don't want Marpa's default processing to occur.
856
857							This allows the code to deal with the classic case of where you wish to preserve whitespace in some
858							contexts, but also want Marpa to discard whitespace in all other contexts.
859
860							DASH is easier to use than DOT, which means the user can specify graphs very simply, without having
861							to learn DOT.
862
863							The DASH language is actually a cut-down version of the language used by L. For a full
864							explanation of the Graph::Easy language, see L.
865
866							The wrapper is parsed into a tree of tokens managed by L.
867
868							If requested by the user, the tree is passed to the default renderer
869							L. Various options allow the user to control the output, as
870							an SVG (PNG, ...) image, and to save the DOT version of the graph.
871
872							In the past, the code in this module was part of Graph::Easy::Marpa, but that latter module has
873							been deleted from CPAN, and all it's new code and features, together with bug fixes, is in the
874							current module.
875
876							Note that this module's usage of Marpa's adverbs I and I should be regarded as an
877							intermediate/advanced technique. For people just beginning to use Marpa, use of the I adverb
878							is the recommended technique.
879
880							The article mentioned above discusses important issues regarding the timing sequence of I
881							and I.
882
883							All this assumes a relatively recent version of Marpa, one in which its Scanless interface (SLIF)
884							is implemented. I'm currently (2014-10-10) using L V 2.096000.
885
886							Lastly, the parser and renderer will be incorporated into the next major release (V 2.00) of
887							L, which parses DOT files.
888
889							=head1 Installation
890
891							Install L as you would for any C module:
892
893							Run:
894
895							cpanm MarpaX::Demo::StringParser
896
897							or run:
898
899							sudo cpan MarpaX::Demo::StringParser
900
901							or unpack the distro, and then either:
902
903							perl Build.PL
904							./Build
905							./Build test
906							sudo ./Build install
907
908							or:
909
910							perl Makefile.PL
911							make (or dmake or nmake)
912							make test
913							make install
914
915							=head1 Scripts Shipped with this Module
916
917							All scripts are shipped in the scripts/ directory.
918
919							=over 4
920
921							=item o copy.config.pl
922
923							This is for use by the author. It just copies the config file out of the distro, so the script
924							generate.index.pl (which uses HTML template stuff) can find it.
925
926							=item o find.config.pl
927
928							This cross-checks the output of copy.config.pl.
929
930							=item o dash2svg.pl
931
932							Converts all data/.dash files into the corresponding html/.svg files.
933
934							Used by generate.demo.sh.
935
936							=item o generate.demo.sh
937
938							This generates all the SVG files for the data/*.dash files, and then generates html/index.html.
939
940							And then it copies the demo output to my dev web server's doc root, where I can cross-check it.
941
942							=item o generate.index.pl
943
944							This constructs a web page containing all the html/*.svg files.
945
946							=item o parse.pl
947
948							This runs a parse on a single input file. Run 'parse.pl -h' for details.
949
950							=item o parse.sh
951
952							This simplifies running parse.pl.
953
954							=item o pod2html.sh
955
956							This converts all lib/.pm files into their corresponding .html versions, for proof-reading and
957							uploading to my real web site.
958
959							=item o render.pl
960
961							This runs a parse on a single input file, and coverts the output into an SVG file. Run 'render.pl -h'
962							for details.
963
964							=item o render.sh
965
966							This simplifies running render.pl.
967
968							=back
969
970							=head1 Constructor and Initialization
971
972							C is called as C<< my($parser) = MarpaX::Demo::StringParser -> new(k1 => v1, k2 => v2, ...) >>.
973
974							It returns a new object of type C.
975
976							Key-value pairs accepted in the parameter list (see corresponding methods for details
977							[e.g. description($graph)]):
978
979							=over 4
980
981							=item o description => '[node.1]->[node.2]'
982
983							Specify a string for the graph definition.
984
985							You are strongly encouraged to surround this string with '...' to protect it from your shell if using
986							this module directly from the command line.
987
988							See also the I key which reads the graph from a file.
989
990							The I key takes precedence over the I key.
991
992							Default: ''.
993
994							=item o input_file => $graph_file_name
995
996							Read the graph definition from this file.
997
998							See also the I key to read the graph from the command line.
999
1000							The whole file is slurped in as a single graph.
1001
1002							The first lines of the file can start with /^\s*#/, and will be discarded as comments.
1003
1004							The I key takes precedence over the I key.
1005
1006							Default: ''.
1007
1008							=item o logger => $logger_object
1009
1010							Specify a logger object.
1011
1012							To disable logging, just set logger to the empty string.
1013
1014							Default: An object of type L.
1015
1016							=item o maxlevel => $level
1017
1018							This option is only used if this module creates an object of type L.
1019
1020							See L.
1021
1022							Default: 'notice'. A typical choice is 'info' or 'debug'.
1023
1024							=item o minlevel => $level
1025
1026							This option is only used if this module creates an object of type L.
1027
1028							See L.
1029
1030							Default: 'error'.
1031
1032							No lower levels are used.
1033
1034							=back
1035
1036							=head1 Methods
1037
1038							=head2 clean_before($s)
1039
1040							Cleans the input string before the next step in the parse process.
1041
1042							Typically only ever called once.
1043
1044							Returns the cleaned string.
1045
1046							=head2 clean_after($s)
1047
1048							Cleans the input string after each step in the parse process.
1049
1050							Typically called many times, once on each output token.
1051
1052							Returns the cleaned string.
1053
1054							=head2 description([$graph])
1055
1056							Here, the [] indicate an optional parameter.
1057
1058							Gets or sets the graph string to be parsed.
1059
1060							See also the L method.
1061
1062							The value supplied to the description() method takes precedence over the value read from the input file.
1063
1064							Also, I is an option to new().
1065
1066							=head2 graph_text([$graph])
1067
1068							Here, the [] indicate an optional parameter.
1069
1070							Returns the value of the graph definition string, from either the command line or a file.
1071
1072							=head2 input_file([$graph_file_name])
1073
1074							Here, the [] indicate an optional parameter.
1075
1076							Gets or sets the name of the file to read the graph definition from.
1077
1078							See also the L method.
1079
1080							The whole file is slurped in as a single graph.
1081
1082							The first few lines of the file can start with /^\s*#/, and will be discarded as comments.
1083
1084							The value supplied to the description() method takes precedence over the value read from the input file.
1085
1086							Also, I is an option to new().
1087
1088							=head2 log($level, $s)
1089
1090							Calls $self -> logger -> log($level => $s) if ($self -> logger).
1091
1092							=head2 run()
1093
1094							This is the only method the caller needs to call. All parameters are supplied to new().
1095
1096							Returns 0 for success and 1 for failure.
1097
1098							=head2 recce()
1099
1100							Returns an object of type L.
1101
1102							=head2 tree()
1103
1104							Returns an object of type L.
1105
1106							=head1 DASH Syntax
1107
1108							See L for sample
1109							input and output.
1110
1111							The examples in the following sections are almost all taken from data/*.dash, in the distro.
1112
1113							=head2 Graphs in DASH
1114
1115							1: A graph definition may continue over multiple lines.
1116							2: Lines beginning with either '#' or '//' are discarded as comments.
1117							3: A node name or an edge name must never be split over multiple lines.
1118							4: Attributes may be split over lines, but do not split either the name or value of the
1119							attribute over multiple lines.
1120							Note: Attribute values can contain various escaped characters, e.g. \n.
1121							5: A graph may start or end with an edge, and even have contiguous edges.
1122							See data/edge.06.dash (or the demo page). Graphviz does not allow any of these
1123							possibilities, so the default renderer fabricates anonymous nodes and inserts them where
1124							they will satisfy the requirements of Graphviz.
1125
1126							Examples:
1127
1128							1: A graph split over 10 lines:
1129							[node.1] {label: "n 1"}
1130							-> {label: 'e 1'}
1131							-> {label: e 2}
1132							[] {label: n 2}
1133							-> {label : e 3}
1134							[node.3] {label: "n 3"}
1135							-> {label: 'e 4'},
1136							-> {label: e 5}
1137							[] {label: n 2}
1138							-> {label : e 6}
1139							2: A graph split over 14 lines:
1140							->
1141							->
1142
1143							[node]
1144							[node] ->
1145							-> {label: Start} -> {color: red} [node.1] {color: green} -> [node.2]
1146							[node.1] [node.2] [node.3]
1147
1148							[]
1149							[node.1]
1150							[node 1]
1151							['node.2']
1152							["node.3"]
1153							[ From here ] -> [ To there ]
1154
1155							=head2 Nodes in DASH
1156
1157							Node names:
1158
1159							1: Are delimited by '[' and ']'.
1160							2: May be quoted with " or '.
1161							3: Allow escaped characters, using '\'.
1162							4: Allow internal spaces, even if not quoted.
1163							5: May be separated with nothing (juxtaposed), with whitespace, or with ','.
1164							This is called 'Daisy-chaining'.
1165
1166							See L for the origin of this term.
1167
1168							Examples:
1169
1170							1: The anonymous node: []
1171							2: The anonymous node, with attributes (explained below): []{color:red}
1172							3: A named node: [Marpa]
1173							4: Juxtaposed nodes: [Perl][Marpa] or [Perl] [Marpa] or [Perl], [Marpa]
1174							5: A named node with an internal space: [Perl 6]
1175							6: A named node with attributes: [node.1]{label: A and B}
1176							7: A named node with spaces: [ node.1 ]
1177							These spaces are discarded.
1178							8: A named node with attributes, with spaces: [ node.1 ] { label : ' A Z ' }
1179							The spaces around 'node.1' are discarded.
1180							The spaces around ' A Z ' are discarded.
1181							The spaces inside ' A Z ' are preserved (because of the quotes).
1182							Double-quotes act in the same way.
1183							9: A named node with attributes, with spaces:
1184							[ node.1 ] { label : Flight Path from Melbourne to London }
1185							Space preservation is as above.
1186							10: A named node with escaped characters: [\[node\]]
1187							The '[' and ']' chars are preserved.
1188							11: A named node with [] in name: [[ \]]
1189							However, since '[' and ']' delimit node names, you are I advised to escape such
1190							characters.
1191							12: A named node with quotes, spaces, and escaped chars: [" a \' b \" c"]
1192							13: A complete graph:
1193							[node.1]
1194							-> {arrowhead: odot; arrowtail: ediamond; color: green; dir: both; label: A 1; penwidth: 1}
1195							-> {color: blue; label: B 2; penwidth: 3}
1196							-> {arrowhead: box; arrowtail: invdot; color: maroon; dir: both; label: C 3; penwidth: 5}
1197							[] {label: 'Some node'}
1198							-> [node.2]
1199
1200							=head2 Edges in DASH
1201
1202							Edge names:
1203
1204							1: Are '->'
1205							This is part of a directed graph.
1206							2: Or '--'
1207							This is part of an undirected graph.
1208							3: May be separated with nothing (juxtaposed), with whitespace, or with ','.
1209							This is called 'Daisy-chaining'.
1210
1211							See L for the origin of this term.
1212
1213							It makes no sense to combine '->' and '--' in a single graph, because Graphviz will automatically
1214							reject such input. In other words, directed and undirected graphs are mutually exclusive.
1215
1216							So, if any edge in your graph is undirected (you use '--'), then every edge must use '--' and the
1217							same for '->'.
1218
1219							Examples:
1220
1221							1: An edge with attributes: -> {color:cornflowerblue; label: This edge's color is blueish ;}
1222							2: Juxtaposed edges without any spacing and without attributes: ------
1223							3: Juxtaposed edges (without comma) with attributes:
1224							-- {color: cornflowerblue; label: Top row\nBottom row}
1225							-- {color:red; label: Edges use cornflowerblue and red}
1226							4: An edge with attributes, with some escaped characters:
1227							-> {color:cornflowerblue; label: Use various escaped chars (\' \" \< \>) in label}
1228
1229							=head2 Attributes in DASH
1230
1231							Attributes:
1232
1233							1: Are delimited by '{' and '}'.
1234							2: Consist of a C and a C, separated by ':'.
1235							3: Are separated by ';'.
1236							4: The DOT language defines a set of escape characters acceptable in such a C.
1237							5: Allow quotes and whitespace as per node names.
1238							This must be true because the same non-Marpa parsers are used for both.
1239							6: Attribute values can be HTML-like. See the Graphviz docs for why we say 'HTML-like' and
1240							not HTML. See data/table.*.ge for examples.
1241
1242							See L for details.
1243
1244							Examples:
1245
1246							1: -- {color: cornflowerblue; label: Top row\nBottom row}
1247							Note the use of '\n' in the value of the label.
1248
1249							=head1 FAQ
1250
1251							=head2 What is the grammar parsed by this module?
1252
1253							See L just above.
1254
1255							=head2 How is the parsed graph stored in RAM?
1256
1257							Items are stored in a tree managed by L.
1258
1259							The sample code in the L will display a tree:
1260
1261							perl -Ilib scripts/parse.pl -i data/node.04.dash -max info
1262
1263							Output:
1264
1265							root. Attributes: {uid => "0"}
1266							\|---prolog. Attributes: {uid => "1"}
1267							\|---graph. Attributes: {uid => "2"}
1268							\|---node_id. Attributes: {uid => "3", value => "node.1"}
1269							\| \|---literal. Attributes: {uid => "4", value => "{"}
1270							\| \|---label. Attributes: {uid => "5", value => "A and B"}
1271							\| \|---literal. Attributes: {uid => "6", value => "}"}
1272							\|---node_id. Attributes: {uid => "7", value => "node.2"}
1273							\|---literal. Attributes: {uid => "8", value => "{"}
1274							\|---label. Attributes: {uid => "9", value => "A or B"}
1275							\|---literal. Attributes: {uid => "10", value => "}"}
1276							Parse result: 0 (0 is success)
1277
1278							See also the next question.
1279
1280							=head2 What is the structure of the tree of parsed tokens?
1281
1282							From the previous answer, you can see the root has 2 daughters, with the 'prolog' daughter not
1283							currently used. It is used by L.
1284
1285							The 'graph' daughter (sub-tree) is what's processed by the default rendering engine
1286							L to convert the tree (i.e. the input file) into a DOT file
1287							and into an image.
1288
1289							=head2 Does this module handle utf8?
1290
1291							Yes. See the last sample on L.
1292
1293							=head2 Why doesn't the parser handle my HTML-style labels?
1294
1295							Traps for young players:
1296
1297							=over 4
1298
1299							=item o The component must include the '/'
1300
1301							=back
1302
1303							=head2 Why do I get error messages like the following?
1304
1305							Error: :1: syntax error near line 1
1306							context: digraph >>> Graph <<< {
1307
1308							Graphviz reserves some words as keywords, meaning they can't be used as an ID, e.g. for the name of the graph.
1309							So, don't do this:
1310
1311							strict graph graph{...}
1312							strict graph Graph{...}
1313							strict graph strict{...}
1314							etc...
1315
1316							Likewise for non-strict graphs, and digraphs. You can however add double-quotes around such reserved words:
1317
1318							strict graph "graph"{...}
1319
1320							Even better, use a more meaningful name for your graph...
1321
1322							The keywords are: node, edge, graph, digraph, subgraph and strict. Compass points are not keywords.
1323
1324							See L in the discussion of the syntax of DOT
1325							for details.
1326
1327							=head2 What is the homepage of Marpa?
1328
1329							L.
1330
1331							=head2 How do I reconcile Marpa's approach with classic lexing and parsing?
1332
1333							I've included in a recent article a section called
1334							L
1335							which is aimed at helping us think about this issue.
1336
1337							=head2 How did you generate the html/*.svg files?
1338
1339							With a private script which uses L V 2.00. This script is not shipped
1340							in order to avoid a dependency on that module. Also, another private script which validates Build.PL and
1341							Makefile.PL would complain about the missing dependency.
1342
1343							See L for details.
1344
1345							=head1 Machine-Readable Change Log
1346
1347							The file Changes was converted into Changelog.ini by L.
1348
1349							=head1 Version Numbers
1350
1351							Version numbers < 1.00 represent development versions. From 1.00 up, they are production versions.
1352
1353							=head1 Repository
1354
1355							L
1356
1357							=head1 Support
1358
1359							Email the author, or log a bug on RT:
1360
1361							L.
1362
1363							=head1 Author
1364
1365							L was written by Ron Savage Iron@savage.net.auE> in 2013.
1366
1367							Marpa's homepage: .
1368
1369							My homepage: L.
1370
1371							=head1 Copyright
1372
1373							Australian copyright (c) 2013, Ron Savage.
1374
1375							All Programs of mine are 'OSI Certified Open Source Software';
1376							you can redistribute them and/or modify them under the terms of
1377							The Artistic License, a copy of which is available at:
1378							http://www.opensource.org/licenses/index.html
1379
1380							=cut