File Coverage

blib/lib/MarpaX/Demo/StringParser.pm

Criterion	Covered	Total	%
statement	201	253	79.4
branch	50	74	67.5
condition	7	15	46.6
subroutine	28	31	90.3
pod	4	5	80.0
total	290	378	76.7

line	stmt	bran	cond	sub	pod	time	code
1							package MarpaX::Demo::StringParser;
2
3	1			1		55471	use strict;
	1					2
	1					31
4	1			1		4	use utf8;
	1					2
	1					8
5	1			1		16	use warnings;
	1					6
	1					32
6	1			1		3	use warnings qw(FATAL utf8); # Fatalize encoding glitches.
	1					2
	1					33
7
8	1			1		553	use File::Slurp; # For read_file().
	1					9979
	1					95
9
10	1			1		809	use Log::Handler;
	1					28850
	1					9
11
12	1			1		599	use Marpa::R2;
	1					109343
	1					23
13
14	1			1		996	use Moo;
	1					17232
	1					10
15
16	1			1		3219	use Set::Array;
	1					16801
	1					29
17
18	1			1		872	use Text::CSV;
	1					11007
	1					7
19
20	1			1		976	use Tree::DAG_Node;
	1					17425
	1					134
21
22	1			1		1085	use Types::Standard qw/Any ArrayRef HashRef Int Str/;
	1					74556
	1					23
23
24	1			1		1548	use Try::Tiny;
	1					14
	1					4214
25
26							has bnf =>
27							(
28							default => sub{return ''},
29							is => 'rw',
30							isa => Any,
31							required => 0,
32							);
33
34							has description =>
35							(
36							default => sub{return ''},
37							is => 'rw',
38							isa => Str,
39							required => 0,
40							);
41
42							has grammar =>
43							(
44							default => sub{return ''},
45							is => 'rw',
46							isa => Any, # 'Marpa::R2::Scanless::G'.
47							required => 0,
48							);
49
50							has graph_text =>
51							(
52							default => sub{return ''},
53							is => 'rw',
54							isa => Str,
55							required => 0,
56							);
57
58							has input_file =>
59							(
60							default => sub{return ''},
61							is => 'rw',
62							isa => Str,
63							required => 0,
64							);
65
66							has logger =>
67							(
68							default => sub{return undef},
69							is => 'rw',
70							isa => Any,
71							required => 0,
72							);
73
74							has maxlevel =>
75							(
76							default => sub{return 'notice'},
77							is => 'rw',
78							isa => Str,
79							required => 0,
80							);
81
82							has minlevel =>
83							(
84							default => sub{return 'error'},
85							is => 'rw',
86							isa => Str,
87							required => 0,
88							);
89
90							has known_events =>
91							(
92							default => sub{return {} },
93							is => 'rw',
94							isa => HashRef,
95							required => 0,
96							);
97
98							has recce =>
99							(
100							default => sub{return ''},
101							is => 'rw',
102							isa => Any, # 'Marpa::R2::Scanless::R'.
103							required => 0,
104							);
105
106							has stack =>
107							(
108							default => sub{return []},
109							is => 'rw',
110							isa => ArrayRef,
111							required => 0,
112							);
113
114							has tree =>
115							(
116							default => sub{return ''},
117							is => 'rw',
118							isa => Any,
119							required => 0,
120							);
121
122							has uid =>
123							(
124							default => sub{return 0},
125							is => 'rw',
126							isa => Int,
127							required => 0,
128							);
129
130							our $VERSION = '2.02';
131
132							# --------------------------------------------------
133							# For accepted and rejected by Marpa, see
134							# Marpa-R2-2.094000/lib/Marpa/R2/meta/metag.bnf.
135
136							sub BUILD
137							{
138	6			6	0	248	my($self) = @_;
139
140	6	50				157	if (! defined $self -> logger)
141							{
142	6					1254	$self -> logger(Log::Handler -> new);
143	6					1302	$self -> logger -> add
144							(
145							screen =>
146							{
147							alias => 'logger',
148							maxlevel => $self -> maxlevel,
149							message_layout => '%m',
150							minlevel => $self -> minlevel,
151							}
152							);
153							}
154
155							# Policy: Event names are always the same as the name of the corresponding lexeme.
156
157							$self -> bnf
158							(
159							<<'END_OF_GRAMMAR'
160
161							:default ::= action => [values]
162
163							lexeme default = latm => 1 # Longest Acceptable Token Match.
164
165							:start ::= graph_grammar
166
167							graph_grammar ::= graph_definition
168
169							# Graph stuff.
170
171							graph_definition ::= node_definition
172							\| edge_definition
173							# Node stuff
174
175							node_definition ::= node_statement
176							\| node_statement graph_definition
177
178							node_statement ::= node_name_token
179							\| node_name_token attribute_definition
180							\| node_statement (',') node_statement
181
182							node_name_token ::= start_node end_node # Allow for the anonymous node.
183							\| start_node node_name end_node
184
185							# Edge stuff
186
187							edge_definition ::= edge_statement
188							\| edge_statement graph_definition
189
190							edge_statement ::= edge_name
191							\| edge_name attribute_definition
192							\| edge_statement (',') edge_statement
193
194							edge_name ::= directed_edge
195							\| undirected_edge
196
197							# Attribute stuff.
198
199							attribute_definition ::= attribute_statement+
200
201							attribute_statement ::= start_attributes string_token_set end_attributes
202
203							string_token_set ::= string_token_pair+
204
205							string_token_pair ::= literal_label
206							\| attribute_name (':') attribute_value
207
208							# Lexemes in alphabetical order.
209
210							:lexeme ~ attribute_name pause => before event => attribute_name
211
212							attribute_name ~ string_char_set+
213
214							:lexeme ~ attribute_value pause => before event => attribute_value
215
216							attribute_value ~ string_char_set+
217
218							:lexeme ~ directed_edge pause => before event => directed_edge priority => 2
219							directed_edge ~ '->'
220
221							:lexeme ~ end_attributes pause => before event => end_attributes priority => 1
222							end_attributes ~ '}'
223
224							:lexeme ~ end_node pause => before event => end_node priority => 1
225							end_node ~ ']'
226
227							escaped_char ~ '\' [[:print:]]
228
229							# Use ' here just for the UltraEdit syntax hiliter.
230
231							:lexeme ~ literal_label pause => before event => literal_label priority => 1
232							literal_label ~ 'label'
233
234							:lexeme ~ node_name pause => before event => node_name
235
236							node_name ~ string_char_set+
237
238							:lexeme ~ start_attributes pause => before event => start_attributes
239							start_attributes ~ '{'
240
241							:lexeme ~ start_node pause => before event => start_node
242							start_node ~ '['
243
244							string_char_set ~ escaped_char
245							\| [^;:}\]] # Neither a separator [;:] nor a terminator [}\]].
246
247							:lexeme ~ undirected_edge pause => before event => undirected_edge priority => 2
248							undirected_edge ~ '--'
249
250							# Lexemes in alphabetical order.
251
252							# Boilerplate.
253
254							:discard ~ whitespace
255							whitespace ~ [\s]+
256
257							END_OF_GRAMMAR
258	6					23063	);
259
260	6					2544	$self -> grammar
261							(
262							Marpa::R2::Scanless::G -> new
263							({
264							source => \$self -> bnf
265							})
266
267							);
268	6					1114757	$self -> recce
269							(
270							Marpa::R2::Scanless::R -> new
271							({
272							grammar => $self -> grammar,
273							})
274							);
275
276	6					5544	my(%event);
277
278	6					208	for my $line (split(/\n/, $self -> bnf) )
279							{
280	576	100				2097	$event{$1} = 1 if ($line =~ /event\s+=>\s+(\w+)/);
281							}
282
283	6					272	$self -> known_events(\%event);
284
285							# Since $self -> tree has not been initialized yet,
286							# we can't call our _add_daughter() until after this statement.
287
288	6					1242	$self -> tree(Tree::DAG_Node -> new({name => 'root', attributes => {uid => 0} }));
289	6					3648	$self -> stack([$self -> tree -> root]);
290
291							# This cut-down version of Graph::Easy::Marpa has no prolog (unlike Graph::Marpa).
292							# So, all tokens in the input are descended from the 'graph' node.
293
294	6					1483	for my $name (qw/prolog graph/)
295							{
296	12					615	$self -> _add_daughter($name, {});
297							}
298
299							# The 'prolog' daughter is the parent of all items in the prolog, but is not used here.
300							# It is used in GraphViz2::Marpa;
301							# The 'graph' daughter gets pushed onto the stack because in this module's grammar,
302							# all items belong to the graph.
303
304	6					584	my(@daughters) = $self -> tree -> daughters;
305	6					110	my($index) = 1; # 0 => prolog, 1 => graph.
306	6					241	my($stack) = $self -> stack;
307
308	6					96	push @$stack, $daughters[1];
309
310	6					192	$self -> stack($stack);
311
312							} # End of BUILD.
313
314							# ------------------------------------------------
315
316							sub _add_daughter
317							{
318	62			62		155	my($self, $name, $attributes) = @_;
319	62					2446	$$attributes{uid} = $self -> uid($self -> uid + 1);
320	62					4223	my($node) = Tree::DAG_Node -> new({name => $name, attributes => $attributes});
321	62					6510	my($stack) = $self -> stack;
322
323	62					914	$$stack[$#$stack] -> add_daughter($node);
324
325							} # End of _add_daughter.
326
327							# --------------------------------------------------
328
329							sub clean_after
330							{
331	42			42	1	91	my($self, $s) = @_;
332
333	42					171	$s =~ s/^\s+//;
334	42					143	$s =~ s/\s+$//;
335	42					103	$s =~ s/^([\"\'])(.*)\1$/$2/; # The backslashes are just for the UltraEdit syntax hiliter.
336
337	42					149	return $s;
338
339							} # End of clean_after.
340
341							# --------------------------------------------------
342
343							sub clean_before
344							{
345	6			6	1	69	my($self, $s) = @_;
346
347	6					112	$s =~ s/\s;\s$//;
348	6					22	$s =~ s/^\s+//;
349	6					70	$s =~ s/\s+$//;
350	6					23	$s =~ s/^(<)\s+/$1/;
351	6					69	$s =~ s/\s+(>)$/$1/;
352
353	6					28	return $s;
354
355							} # End of clean_before.
356
357							# --------------------------------------------------
358
359							sub _get_graph_from_command_line
360							{
361	0			0		0	my($self) = @_;
362
363	0					0	$self -> graph_text($self -> description);
364
365							} # End of _get_graph_from_command_line.
366
367							# --------------------------------------------------
368
369							sub log
370							{
371	92			92	1	14257	my($self, $level, $s) = @_;
372
373	92	50				5278	$self -> logger -> log($level => $s) if ($self -> logger);
374
375							} # End of log.
376
377							# --------------------------------------------------
378
379							sub _process
380							{
381	6			6		17	my($self) = @_;
382	6					272	my($string) = $self -> clean_before($self -> graph_text);
383	6					28	my($length) = length $string;
384	6					20	my($last_event) = '';
385	6					20	my($format) = '%-20s %5s %5s %-s';
386
387							# We use read()/lexeme_read()/resume() because we pause at each lexeme.
388
389	6					13	my($event_name, $edge);
390	0					0	my(@fields);
391	0					0	my($lexeme, $literal);
392	0					0	my($span, $start);
393
394	6					88	$self -> log(debug => sprintf($format, 'Event', 'Start', 'Span', 'Lexeme') );
395
396	6					712	for
397							(
398							my $pos = $self -> recce -> read(\$string);
399							$pos < $length;
400							$pos = $self -> recce -> resume($pos)
401							)
402							{
403	74					45436	$event_name = $self -> _validate_event;
404	74					2489	($start, $span) = $self -> recce -> pause_span;
405	74					4075	$pos = $self -> recce -> lexeme_read($event_name);
406	74					10029	$literal = substr($string, $start, $pos - $start);
407	74					2626	$lexeme = $self -> recce -> literal($start, $span);
408
409	74					2321	$self -> log(debug => sprintf($format, $event_name, $start, $span, $lexeme) );
410
411	74	100				37303	if ($event_name eq 'attribute_name')
		100
		100
		100
		100
		100
		100
		100
		50
		0
412							{
413	11					215	$fields[0] = $self -> clean_after($literal);
414							}
415							elsif ($event_name eq 'attribute_value')
416							{
417	11					47	$literal = $self -> clean_after($literal);
418
419	11					157	$self -> _add_daughter($fields[0], {value => $literal});
420
421	11					1409	@fields = ();
422
423							# Skip the separator.
424
425	11		100			183	while ( ($pos < (length($string) - 1) ) && (substr($string, $pos, 1) =~ /[\s;]/) ) { $pos++ };
	18					125
426							}
427							elsif ($event_name eq 'directed_edge')
428							{
429	5					24	$self -> _add_daughter('edge_id', {value => $self -> clean_after($literal)});
430							}
431							elsif ($event_name eq 'end_attributes')
432							{
433	9					37	$self -> _process_brace($literal);
434							}
435							elsif ($event_name eq 'end_node')
436							{
437							# Is this the anonymous node?
438
439	7	100				35	if ($last_event eq 'start_node')
440							{
441	1					11	$self -> _add_daughter('node_id', {value => ''});
442							}
443							}
444							elsif ($event_name eq 'literal_label')
445							{
446	9					34	push @fields, $literal;
447
448	9					262	$pos = $self -> _process_label($self -> recce, \@fields, $string, $length, $pos);
449	9					33	@fields = ();
450							}
451							elsif ($event_name eq 'node_name')
452							{
453	6					38	$literal = $self -> clean_after($literal);
454
455	6					50	$self -> _add_daughter('node_id', {value => $literal});
456							}
457							elsif ($event_name eq 'start_attributes')
458							{
459	9					35	$self -> _process_brace($literal);
460							}
461							elsif ($event_name eq 'start_node')
462							{
463							# Do nothing.
464							}
465							elsif ($event_name eq 'undirected_edge')
466							{
467	0					0	$self -> _add_daughter('edge_id', {value => $self -> clean_after($literal)});
468							}
469
470	74					5604	$last_event = $event_name;
471							}
472
473	6	50				580	if ($self -> recce -> ambiguity_metric > 1)
474							{
475	0					0	$self -> log(notice => 'Ambiguous parse');
476							}
477
478	6	50				5917	if (my $ambiguous_status = $self -> recce -> ambiguous)
479							{
480	0					0	$self -> log(notice => "Parse is ambiguous: $ambiguous_status.");
481							}
482
483							# Return a defined value for success and undef for failure.
484
485	6					434	return $self -> recce -> value;
486
487							} # End of _process.
488
489							# --------------------------------------------------
490
491							sub _process_brace
492							{
493	18			18		47	my($self, $name) = @_;
494
495							# When a '{' is encountered, the last thing pushed becomes it's parent.
496							# Likewise, if '}' is encountered, we pop the stack.
497
498	18					529	my($stack) = $self -> stack;
499
500	18	100				337	if ($name eq '{')
501							{
502	9					62	my(@daughters) = $$stack[$#$stack] -> daughters;
503
504	9					151	push @$stack, $daughters[$#daughters];
505
506	9					39	$self -> _process_token('literal', $name);
507							}
508							else
509							{
510	9					49	$self -> _process_token('literal', $name);
511
512	9					2735	pop @$stack;
513
514	9					297	$self -> stack($stack);
515							}
516
517							} # End of _process_brace.
518
519							# ------------------------------------------------
520
521							sub _process_html
522							{
523	1			1		5	my($self, $recce, $fields, $string, $length, $pos) = @_;
524
525	1					4	my($bracket_count) = 0;
526	1					3	my($open_bracket) = '<';
527	1					3	my($close_bracket) = '>';
528	1					4	my($previous_char) = '';
529	1					3	my($label) = '';
530
531	1					4	my($char);
532
533	1					7	while ($pos < $length)
534							{
535	68					379	$char = substr($string, $pos, 1);
536	68					66	$label .= $char;
537
538	68	50				205	if ($previous_char eq '\\')
		100
		100
539							{
540							}
541							elsif ($char eq $open_bracket)
542							{
543	7					7	$bracket_count++;
544							}
545							elsif ($char eq $close_bracket)
546							{
547	7					7	$bracket_count--;
548
549	7	100				19	if ($bracket_count == 0)
550							{
551	1					4	$pos++;
552
553	1					3	last;
554							}
555							}
556
557	67					634	$previous_char = $char;
558
559	67					99	$pos++;
560							}
561
562	1					8	$label = $self -> clean_after($label);
563
564	1	50	33			27	if ( ($label =~ /^$/) )
565							{
566	0					0	my($line, $column) = $recce -> line_column;
567
568	0					0	die "Mismatched <> in HTML !$label! at (line, column) = ($line, $column)\n";
569							}
570
571	1					4	push @$fields, $label;
572
573	1					7	return $self -> _skip_separator($string, $length, $pos, ';');
574
575							} # End of _process_html.
576
577							# ------------------------------------------------
578
579							sub _process_label
580							{
581	9			9		101	my($self, $recce, $fields, $string, $length, $pos) = @_;
582
583	9					46	$pos = $self -> _skip_separator($string, $length, $pos, ':');
584
585	9	50				34	return $pos if ($pos >= $length);
586
587	9					34	my($char) = substr($string, $pos, 1);
588
589	9	50				205	if ($char eq "'")
		50
		100
590							{
591	0					0	$pos = $self -> _process_quotes($recce, $fields, $string, $length, $pos, "'");
592							}
593							elsif ($char eq '"')
594							{
595	0					0	$pos = $self -> _process_quotes($recce, $fields, $string, $length, $pos, '"');
596							}
597							elsif ($char eq '<')
598							{
599	1					9	$pos = $self -> _process_html($recce, $fields, $string, $length, $pos);
600							}
601							else
602							{
603	8					43	$pos = $self -> _process_unquoted($recce, $fields, $string, $length, $pos);
604							}
605
606	9					52	for (my $i = 0; $i < $#$fields; $i += 2)
607							{
608	9					87	$self -> _add_daughter($$fields[$i], {value => $$fields[$i + 1]});
609							}
610
611	9					1058	return $pos;
612
613							} # End of _process_label.
614
615							# ------------------------------------------------
616
617							sub _process_quotes
618							{
619	0			0		0	my($self, $recce, $fields, $string, $length, $pos, $terminator) = @_;
620
621	0					0	my($previous_char) = '';
622	0					0	my($label) = '';
623	0					0	my($quote_count) = 0;
624
625	0					0	my($char);
626
627	0					0	while ($pos < $length)
628							{
629	0					0	$char = substr($string, $pos, 1);
630
631	0	0	0			0	if ( ($previous_char ne '\\') && ($char eq $terminator) )
632							{
633	0					0	$quote_count++;
634
635	0	0				0	if ($quote_count == 2)
636							{
637	0					0	$label .= $char;
638
639	0					0	$pos++;
640
641	0					0	last;
642							}
643							}
644
645	0					0	$label .= $char;
646	0					0	$previous_char = $char;
647
648	0					0	$pos++;
649							}
650
651							# Don't call clean_after, since it removes the ' and " we are about to check.
652
653	0					0	$label =~ s/^\s+//;
654	0					0	$label =~ s/\s+$//;
655
656	0	0	0			0	if ( ($label =~ /^['"]/) && ($label !~ /^(['"]).*\1$/) )
657							{
658							# Use ' and " here just for the UltraEdit syntax hiliter.
659
660	0					0	my($line, $column) = $recce -> line_column;
661
662	0					0	die "Mismatched quotes in label !$label! at (line, column) = ($line, $column)\n";
663							}
664
665	0					0	$label = $self -> clean_after($label);
666
667	0					0	push @$fields, $label;
668
669	0					0	$self -> log(debug => "_process_quotes(). Label !$label!");
670
671	0					0	return $self -> _skip_separator($string, $length, $pos, ';');
672
673							} # End of _process_quotes.
674
675							# --------------------------------------------------
676
677							sub _process_token
678							{
679	18			18		44	my($self, $name, $value) = @_;
680
681	18					107	$self -> _add_daughter($name, {value => $value});
682
683							} # End of _process_token.
684
685							# ------------------------------------------------
686
687							sub _process_unquoted
688							{
689	8			8		21	my($self, $recce, $fields, $string, $length, $pos) = @_;
690	8					45	my($re) = qr/[;}]/;
691
692	8	50				72	if (substr($string, $pos, 1) =~ $re)
693							{
694	0					0	push @$fields, '';
695
696	0					0	return $pos;
697							}
698
699	8					24	my($previous_char) = '';
700	8					22	my($label) = '';
701	8					17	my($quote_count) = 0;
702
703	8					17	my($char);
704
705	8					29	while ($pos < $length)
706							{
707	99					142	$char = substr($string, $pos, 1);
708
709	99	100	100			2825	last if ( ($previous_char ne '\\') && ($char =~ $re) );
710
711	91					116	$label .= $char;
712	91					112	$previous_char = $char;
713
714	91					156	$pos++;
715							}
716
717	8					37	$label = $self -> clean_after($label);
718
719	8					24	push @$fields, $label;
720
721	8					31	return $self -> _skip_separator($string, $length, $pos, ';');
722
723							} # End of _process_unquoted.
724
725							# --------------------------------------------------
726
727							sub run
728							{
729	6			6	1	16827	my($self) = @_;
730
731	6	50				225	if ($self -> description)
		50
732							{
733	0					0	$self -> _get_graph_from_command_line;
734							}
735							elsif ($self -> input_file)
736							{
737	6					2322	$self -> graph_text(join(' ', grep{! m!^(?:#\|//)!} read_file($self -> input_file, binmode => ':encoding(utf-8)') ) );
	19					38592
738							}
739							else
740							{
741	0					0	die "Error: You must provide a graph using one of -input_file or -description\n";
742							}
743
744							# Return 0 for success and 1 for failure.
745
746	6					1591	my($result) = 0;
747
748							try
749							{
750	6	50		6		444	if (defined (my $value = $self -> _process) )
751							{
752	6					69128	$self -> log(info => join("\n", @{$self -> tree -> tree2string}) );
	6					381
753							}
754							else
755							{
756	0					0	$result = 1;
757
758	0					0	$self -> log(error => 'Parse failed');
759							}
760							}
761							catch
762							{
763	0			0		0	$result = 1;
764
765	0					0	$self -> log(error => "Parse failed. Error: $_");
766	6					196	};
767
768							# Return 0 for success and 1 for failure.
769
770	6					3810	$self -> log(info => "Parse result: $result (0 is success)");
771
772	6					1302	return $result;
773
774							} # End of run.
775
776							# ------------------------------------------------
777
778							sub _skip_separator
779							{
780	18			18		45	my($self, $string, $length, $pos, $separator) = @_;
781	18					856	my($re) = qr/[\s$separator]/;
782
783	18					36	my($char);
784
785	18					74	while ($pos < $length - 1)
786							{
787	42					113	$char = substr($string, $pos, 1);
788
789	42	100				291	last if ($char !~ $re);
790
791	26					57	$pos++;
792							}
793
794	18					93	return $pos;
795
796							} # End of _skip_separator.
797
798							# ------------------------------------------------
799
800							sub _validate_event
801							{
802	74			74		138	my($self) = @_;
803	74					94	my(@event) = @{$self -> recce -> events};
	74					2793
804	74					1128	my($event_count) = scalar @event;
805
806	74	50				328	if ($event_count > 1)
807							{
808	0					0	$self -> log(error => "Events triggered: $event_count (should be 1). Names: " . join(', ', map{${$_}[0]} @event) . '.');
	0					0
	0					0
809
810	0					0	die "The code only handles 1 event at a time\n";
811							}
812
813	74					1209	my($event_name) = ${$event[0]}[0];
	74					186
814
815	74	50				85	if (! ${$self -> known_events}{$event_name})
	74					2179
816							{
817	0					0	my($msg) = "Unexpected event name '$event_name'";
818
819	0					0	$self -> log(error => $msg);
820
821	0					0	die "$msg\n";
822							}
823
824	74					1328	return $event_name;
825
826							} # End of _validate_event.
827
828							# --------------------------------------------------
829
830							1;
831
832							=pod
833
834							=head1 NAME
835
836							L - A Marpa-based parser for the DASH language
837
838							=head1 Synopsis
839
840							Typical usage:
841
842							perl -Ilib scripts/parse.pl -de '[node]{color:blue; label: "Node name"}' -max info
843							perl -Ilib scripts/parse.pl -i data/node.04.dash -max info
844
845							You can use scripts/parse.sh to simplify this process, but it assumes you're input file is in data/:
846
847							scripts/parse.sh node.04 -max info
848
849							See L for sample
850							input and output.
851
852							Also, see L
853							based on this module.
854
855							=head1 Description
856
857							This module implements a parser for L (below), a wrapper language around Graphviz's
858							L. That is, the module is a pre-processor for the
859							DOT language.
860
861							Specifically, this module demonstrates how to use L's capabilities to have Marpa
862							repeatedly pass control back to code in your own module, during the parse, to handle those cases
863							where you don't want Marpa's default processing to occur.
864
865							This allows the code to deal with the classic case of where you wish to preserve whitespace in some
866							contexts, but also want Marpa to discard whitespace in all other contexts.
867
868							DASH is easier to use than DOT, which means the user can specify graphs very simply, without having
869							to learn DOT.
870
871							The DASH language is actually a cut-down version of the language used by L. For a full
872							explanation of the Graph::Easy language, see L.
873
874							The wrapper is parsed into a tree of tokens managed by L.
875
876							If requested by the user, the tree is passed to the default renderer
877							L. Various options allow the user to control the output, as
878							an SVG (PNG, ...) image, and to save the DOT version of the graph.
879
880							In the past, the code in this module was part of Graph::Easy::Marpa, but that latter module has
881							been deleted from CPAN, and all it's new code and features, together with bug fixes, is in the
882							current module.
883
884							Note that this module's usage of Marpa's adverbs I and I should be regarded as an
885							intermediate/advanced technique. For people just beginning to use Marpa, use of the I adverb
886							is the recommended technique.
887
888							The article mentioned above discusses important issues regarding the timing sequence of I
889							and I.
890
891							All this assumes a relatively recent version of Marpa, one in which its Scanless interface (SLIF)
892							is implemented. I'm currently (2014-10-10) using L V 2.096000.
893
894							Lastly, the parser and renderer will be incorporated into the next major release (V 2.00) of
895							L, which parses DOT files.
896
897							=head1 Installation
898
899							Install L as you would for any C module:
900
901							Run:
902
903							cpanm MarpaX::Demo::StringParser
904
905							or run:
906
907							sudo cpan MarpaX::Demo::StringParser
908
909							or unpack the distro, and then either:
910
911							perl Build.PL
912							./Build
913							./Build test
914							sudo ./Build install
915
916							or:
917
918							perl Makefile.PL
919							make (or dmake or nmake)
920							make test
921							make install
922
923							=head1 Scripts Shipped with this Module
924
925							All scripts are shipped in the scripts/ directory.
926
927							=over 4
928
929							=item o copy.config.pl
930
931							This is for use by the author. It just copies the config file out of the distro, so the script
932							generate.index.pl (which uses HTML template stuff) can find it.
933
934							=item o find.config.pl
935
936							This cross-checks the output of copy.config.pl.
937
938							=item o dash2svg.pl
939
940							Converts all data/.dash files into the corresponding html/.svg files.
941
942							Used by generate.demo.sh.
943
944							=item o generate.demo.sh
945
946							This generates all the SVG files for the data/*.dash files, and then generates html/index.html.
947
948							And then it copies the demo output to my dev web server's doc root, where I can cross-check it.
949
950							=item o generate.index.pl
951
952							This constructs a web page containing all the html/*.svg files.
953
954							=item o parse.pl
955
956							This runs a parse on a single input file. Run 'parse.pl -h' for details.
957
958							=item o parse.sh
959
960							This simplifies running parse.pl.
961
962							=item o pod2html.sh
963
964							This converts all lib/.pm files into their corresponding .html versions, for proof-reading and
965							uploading to my real web site.
966
967							=item o render.pl
968
969							This runs a parse on a single input file, and coverts the output into an SVG file. Run 'render.pl -h'
970							for details.
971
972							=item o render.sh
973
974							This simplifies running render.pl.
975
976							=back
977
978							=head1 Constructor and Initialization
979
980							C is called as C<< my($parser) = MarpaX::Demo::StringParser -> new(k1 => v1, k2 => v2, ...) >>.
981
982							It returns a new object of type C.
983
984							Key-value pairs accepted in the parameter list (see corresponding methods for details
985							[e.g. description($graph)]):
986
987							=over 4
988
989							=item o description => '[node.1]->[node.2]'
990
991							Specify a string for the graph definition.
992
993							You are strongly encouraged to surround this string with '...' to protect it from your shell if using
994							this module directly from the command line.
995
996							See also the I key which reads the graph from a file.
997
998							The I key takes precedence over the I key.
999
1000							Default: ''.
1001
1002							=item o input_file => $graph_file_name
1003
1004							Read the graph definition from this file.
1005
1006							See also the I key to read the graph from the command line.
1007
1008							The whole file is slurped in as a single graph.
1009
1010							The first lines of the file can start with /^\s*#/, and will be discarded as comments.
1011
1012							The I key takes precedence over the I key.
1013
1014							Default: ''.
1015
1016							=item o logger => $logger_object
1017
1018							Specify a logger object.
1019
1020							To disable logging, just set logger to the empty string.
1021
1022							Default: An object of type L.
1023
1024							=item o maxlevel => $level
1025
1026							This option is only used if this module creates an object of type L.
1027
1028							See L.
1029
1030							Default: 'notice'. A typical choice is 'info' or 'debug'.
1031
1032							=item o minlevel => $level
1033
1034							This option is only used if this module creates an object of type L.
1035
1036							See L.
1037
1038							Default: 'error'.
1039
1040							No lower levels are used.
1041
1042							=back
1043
1044							=head1 Methods
1045
1046							=head2 clean_before($s)
1047
1048							Cleans the input string before the next step in the parse process.
1049
1050							Typically only ever called once.
1051
1052							Returns the cleaned string.
1053
1054							=head2 clean_after($s)
1055
1056							Cleans the input string after each step in the parse process.
1057
1058							Typically called many times, once on each output token.
1059
1060							Returns the cleaned string.
1061
1062							=head2 description([$graph])
1063
1064							Here, the [] indicate an optional parameter.
1065
1066							Gets or sets the graph string to be parsed.
1067
1068							See also the L method.
1069
1070							The value supplied to the description() method takes precedence over the value read from the input file.
1071
1072							Also, I is an option to new().
1073
1074							=head2 graph_text([$graph])
1075
1076							Here, the [] indicate an optional parameter.
1077
1078							Returns the value of the graph definition string, from either the command line or a file.
1079
1080							=head2 input_file([$graph_file_name])
1081
1082							Here, the [] indicate an optional parameter.
1083
1084							Gets or sets the name of the file to read the graph definition from.
1085
1086							See also the L method.
1087
1088							The whole file is slurped in as a single graph.
1089
1090							The first few lines of the file can start with /^\s*#/, and will be discarded as comments.
1091
1092							The value supplied to the description() method takes precedence over the value read from the input file.
1093
1094							Also, I is an option to new().
1095
1096							=head2 log($level, $s)
1097
1098							Calls $self -> logger -> log($level => $s) if ($self -> logger).
1099
1100							=head2 run()
1101
1102							This is the only method the caller needs to call. All parameters are supplied to new().
1103
1104							Returns 0 for success and 1 for failure.
1105
1106							=head2 recce()
1107
1108							Returns an object of type L.
1109
1110							=head2 tree()
1111
1112							Returns an object of type L.
1113
1114							=head1 DASH Syntax
1115
1116							See L for sample
1117							input and output.
1118
1119							The examples in the following sections are almost all taken from data/*.dash, in the distro.
1120
1121							=head2 Graphs in DASH
1122
1123							1: A graph definition may continue over multiple lines.
1124							2: Lines beginning with either '#' or '//' are discarded as comments.
1125							3: A node name or an edge name must never be split over multiple lines.
1126							4: Attributes may be split over lines, but do not split either the name or value of the
1127							attribute over multiple lines.
1128							Note: Attribute values can contain various escaped characters, e.g. \n.
1129							5: A graph may start or end with an edge, and even have contiguous edges.
1130							See data/edge.06.dash (or the demo page). Graphviz does not allow any of these
1131							possibilities, so the default renderer fabricates anonymous nodes and inserts them where
1132							they will satisfy the requirements of Graphviz.
1133
1134							Examples:
1135
1136							1: A graph split over 10 lines:
1137							[node.1] {label: "n 1"}
1138							-> {label: 'e 1'}
1139							-> {label: e 2}
1140							[] {label: n 2}
1141							-> {label : e 3}
1142							[node.3] {label: "n 3"}
1143							-> {label: 'e 4'},
1144							-> {label: e 5}
1145							[] {label: n 2}
1146							-> {label : e 6}
1147							2: A graph split over 14 lines:
1148							->
1149							->
1150
1151							[node]
1152							[node] ->
1153							-> {label: Start} -> {color: red} [node.1] {color: green} -> [node.2]
1154							[node.1] [node.2] [node.3]
1155
1156							[]
1157							[node.1]
1158							[node 1]
1159							['node.2']
1160							["node.3"]
1161							[ From here ] -> [ To there ]
1162
1163							=head2 Nodes in DASH
1164
1165							Node names:
1166
1167							1: Are delimited by '[' and ']'.
1168							2: May be quoted with " or '.
1169							3: Allow escaped characters, using '\'.
1170							4: Allow internal spaces, even if not quoted.
1171							5: May be separated with nothing (juxtaposed), with whitespace, or with ','.
1172							This is called 'Daisy-chaining'.
1173
1174							See L for the origin of this term.
1175
1176							Examples:
1177
1178							1: The anonymous node: []
1179							2: The anonymous node, with attributes (explained below): []{color:red}
1180							3: A named node: [Marpa]
1181							4: Juxtaposed nodes: [Perl][Marpa] or [Perl] [Marpa] or [Perl], [Marpa]
1182							5: A named node with an internal space: [Perl 6]
1183							6: A named node with attributes: [node.1]{label: A and B}
1184							7: A named node with spaces: [ node.1 ]
1185							These spaces are discarded.
1186							8: A named node with attributes, with spaces: [ node.1 ] { label : ' A Z ' }
1187							The spaces around 'node.1' are discarded.
1188							The spaces around ' A Z ' are discarded.
1189							The spaces inside ' A Z ' are preserved (because of the quotes).
1190							Double-quotes act in the same way.
1191							9: A named node with attributes, with spaces:
1192							[ node.1 ] { label : Flight Path from Melbourne to London }
1193							Space preservation is as above.
1194							10: A named node with escaped characters: [\[node\]]
1195							The '[' and ']' chars are preserved.
1196							11: A named node with [] in name: [[ \]]
1197							However, since '[' and ']' delimit node names, you are I advised to escape such
1198							characters.
1199							12: A named node with quotes, spaces, and escaped chars: [" a \' b \" c"]
1200							13: A complete graph:
1201							[node.1]
1202							-> {arrowhead: odot; arrowtail: ediamond; color: green; dir: both; label: A 1; penwidth: 1}
1203							-> {color: blue; label: B 2; penwidth: 3}
1204							-> {arrowhead: box; arrowtail: invdot; color: maroon; dir: both; label: C 3; penwidth: 5}
1205							[] {label: 'Some node'}
1206							-> [node.2]
1207
1208							=head2 Edges in DASH
1209
1210							Edge names:
1211
1212							1: Are '->'
1213							This is part of a directed graph.
1214							2: Or '--'
1215							This is part of an undirected graph.
1216							3: May be separated with nothing (juxtaposed), with whitespace, or with ','.
1217							This is called 'Daisy-chaining'.
1218
1219							See L for the origin of this term.
1220
1221							It makes no sense to combine '->' and '--' in a single graph, because Graphviz will automatically
1222							reject such input. In other words, directed and undirected graphs are mutually exclusive.
1223
1224							So, if any edge in your graph is undirected (you use '--'), then every edge must use '--' and the
1225							same for '->'.
1226
1227							Examples:
1228
1229							1: An edge with attributes: -> {color:cornflowerblue; label: This edge's color is blueish ;}
1230							2: Juxtaposed edges without any spacing and without attributes: ------
1231							3: Juxtaposed edges (without comma) with attributes:
1232							-- {color: cornflowerblue; label: Top row\nBottom row}
1233							-- {color:red; label: Edges use cornflowerblue and red}
1234							4: An edge with attributes, with some escaped characters:
1235							-> {color:cornflowerblue; label: Use various escaped chars (\' \" \< \>) in label}
1236
1237							=head2 Attributes in DASH
1238
1239							Attributes:
1240
1241							1: Are delimited by '{' and '}'.
1242							2: Consist of a C and a C, separated by ':'.
1243							3: Are separated by ';'.
1244							4: The DOT language defines a set of escape characters acceptable in such a C.
1245							5: Allow quotes and whitespace as per node names.
1246							This must be true because the same non-Marpa parsers are used for both.
1247							6: Attribute values can be HTML-like. See the Graphviz docs for why we say 'HTML-like' and
1248							not HTML. See data/table.*.ge for examples.
1249
1250							See L for details.
1251
1252							Examples:
1253
1254							1: -- {color: cornflowerblue; label: Top row\nBottom row}
1255							Note the use of '\n' in the value of the label.
1256
1257							=head1 FAQ
1258
1259							=head2 What is the grammar parsed by this module?
1260
1261							See L just above.
1262
1263							=head2 How is the parsed graph stored in RAM?
1264
1265							Items are stored in a tree managed by L.
1266
1267							The sample code in the L will display a tree:
1268
1269							perl -Ilib scripts/parse.pl -i data/node.04.dash -max info
1270
1271							Output:
1272
1273							root. Attributes: {uid => "0"}
1274							\|---prolog. Attributes: {uid => "1"}
1275							\|---graph. Attributes: {uid => "2"}
1276							\|---node_id. Attributes: {uid => "3", value => "node.1"}
1277							\| \|---literal. Attributes: {uid => "4", value => "{"}
1278							\| \|---label. Attributes: {uid => "5", value => "A and B"}
1279							\| \|---literal. Attributes: {uid => "6", value => "}"}
1280							\|---node_id. Attributes: {uid => "7", value => "node.2"}
1281							\|---literal. Attributes: {uid => "8", value => "{"}
1282							\|---label. Attributes: {uid => "9", value => "A or B"}
1283							\|---literal. Attributes: {uid => "10", value => "}"}
1284							Parse result: 0 (0 is success)
1285
1286							See also the next question.
1287
1288							=head2 What is the structure of the tree of parsed tokens?
1289
1290							From the previous answer, you can see the root has 2 daughters, with the 'prolog' daughter not
1291							currently used. It is used by L.
1292
1293							The 'graph' daughter (sub-tree) is what's processed by the default rendering engine
1294							L to convert the tree (i.e. the input file) into a DOT file
1295							and into an image.
1296
1297							=head2 Does this module handle utf8?
1298
1299							Yes. See the last sample on L.
1300
1301							=head2 Why doesn't the parser handle my HTML-style labels?
1302
1303							Traps for young players:
1304
1305							=over 4
1306
1307							=item o The component must include the '/'
1308
1309							=back
1310
1311							=head2 Why do I get error messages like the following?
1312
1313							Error: :1: syntax error near line 1
1314							context: digraph >>> Graph <<< {
1315
1316							Graphviz reserves some words as keywords, meaning they can't be used as an ID, e.g. for the name of the graph.
1317							So, don't do this:
1318
1319							strict graph graph{...}
1320							strict graph Graph{...}
1321							strict graph strict{...}
1322							etc...
1323
1324							Likewise for non-strict graphs, and digraphs. You can however add double-quotes around such reserved words:
1325
1326							strict graph "graph"{...}
1327
1328							Even better, use a more meaningful name for your graph...
1329
1330							The keywords are: node, edge, graph, digraph, subgraph and strict. Compass points are not keywords.
1331
1332							See L in the discussion of the syntax of DOT
1333							for details.
1334
1335							=head2 What is the homepage of Marpa?
1336
1337							L.
1338
1339							=head2 How do I reconcile Marpa's approach with classic lexing and parsing?
1340
1341							I've included in a recent article a section called
1342							L
1343							which is aimed at helping us think about this issue.
1344
1345							=head2 How did you generate the html/*.svg files?
1346
1347							With a private script which uses L V 2.00. This script is not shipped
1348							in order to avoid a dependency on that module. Also, another private script which validates Build.PL and
1349							Makefile.PL would complain about the missing dependency.
1350
1351							See L for details.
1352
1353							=head1 Machine-Readable Change Log
1354
1355							The file Changes was converted into Changelog.ini by L.
1356
1357							=head1 Version Numbers
1358
1359							Version numbers < 1.00 represent development versions. From 1.00 up, they are production versions.
1360
1361							=head1 Repository
1362
1363							L
1364
1365							=head1 Support
1366
1367							Email the author, or log a bug on RT:
1368
1369							L.
1370
1371							=head1 Author
1372
1373							L was written by Ron Savage Iron@savage.net.auE> in 2013.
1374
1375							Home page: L.
1376
1377							=head1 Copyright
1378
1379							Australian copyright (c) 2013, Ron Savage.
1380
1381							All Programs of mine are 'OSI Certified Open Source Software';
1382							you can redistribute them and/or modify them under the terms of
1383							The Artistic License, a copy of which is available at:
1384							http://www.opensource.org/licenses/index.html
1385
1386							=cut