File Coverage

blib/lib/Text/Balanced/Marpa.pm

Criterion	Covered	Total	%
statement	19	21	90.4
branch			n/a
condition			n/a
subroutine	7	7	100.0
pod			n/a
total	26	28	92.8

line	stmt	sub	time	code
1				package Text::Balanced::Marpa;
2
3	16	16	355501	use strict;
	16		36
	16		726
4	16	16	8590	use utf8;
	16		136
	16		92
5	16	16	571	use warnings;
	16		35
	16		675
6	16	16	82	use warnings qw(FATAL utf8); # Fatalize encoding glitches.
	16		26
	16		845
7	16	16	7494	use open qw(:std :utf8); # Undeclared streams in UTF-8.
	16		14989
	16		93
8
9	16		177	use Const::Exporter constants =>
10				[
11				nothing_is_fatal => 0, # The default.
12				print_errors => 1,
13				print_warnings => 2,
14				print_debugs => 4,
15				overlap_is_fatal => 8,
16				nesting_is_fatal => 16,
17				ambiguity_is_fatal => 32,
18				exhaustion_is_fatal => 64,
19	16	16	12297	];
	16		264632
20
21	16	16	34230	use Marpa::R2;
	0
	0
22
23				use Moo;
24
25				use Tree;
26
27				use Types::Standard qw/Any ArrayRef HashRef Int ScalarRef Str/;
28
29				use Try::Tiny;
30
31				has bnf =>
32				(
33				default => sub{return ''},
34				is => 'rw',
35				isa => Any,
36				required => 0,
37				);
38
39				has close =>
40				(
41				default => sub{return []},
42				is => 'rw',
43				isa => ArrayRef,
44				required => 0,
45				);
46
47				has delimiter_action =>
48				(
49				default => sub{return {} },
50				is => 'rw',
51				isa => HashRef,
52				required => 0,
53				);
54
55				has delimiter_frequency =>
56				(
57				default => sub{return {} },
58				is => 'rw',
59				isa => HashRef,
60				required => 0,
61				);
62
63				has delimiter_stack =>
64				(
65				default => sub{return []},
66				is => 'rw',
67				isa => ArrayRef,
68				required => 0,
69				);
70
71				has error_message =>
72				(
73				default => sub{return ''},
74				is => 'rw',
75				isa => Str,
76				required => 0,
77				);
78
79				has error_number =>
80				(
81				default => sub{return 0},
82				is => 'rw',
83				isa => Int,
84				required => 0,
85				);
86
87				has escape_char =>
88				(
89				default => sub{return '\\'},
90				is => 'rw',
91				isa => Str,
92				required => 0,
93				);
94
95				has grammar =>
96				(
97				default => sub {return ''},
98				is => 'rw',
99				isa => Any,
100				required => 0,
101				);
102
103				has known_events =>
104				(
105				default => sub{return {} },
106				is => 'rw',
107				isa => HashRef,
108				required => 0,
109				);
110
111				has length =>
112				(
113				default => sub{return 0},
114				is => 'rw',
115				isa => Int,
116				required => 0,
117				);
118
119				has matching_delimiter =>
120				(
121				default => sub{return {} },
122				is => 'rw',
123				isa => HashRef,
124				required => 0,
125				);
126
127				has next_few_limit =>
128				(
129				default => sub{return 20},
130				is => 'rw',
131				isa => Int,
132				required => 0,
133				);
134
135				has node_stack =>
136				(
137				default => sub{return []},
138				is => 'rw',
139				isa => ArrayRef,
140				required => 0,
141				);
142
143				has open =>
144				(
145				default => sub{return []},
146				is => 'rw',
147				isa => ArrayRef,
148				required => 0,
149				);
150
151				has options =>
152				(
153				default => sub{return 0},
154				is => 'rw',
155				isa => Int,
156				required => 0,
157				);
158
159				has pos =>
160				(
161				default => sub{return 0},
162				is => 'rw',
163				isa => Int,
164				required => 0,
165				);
166
167				has recce =>
168				(
169				default => sub{return ''},
170				is => 'rw',
171				isa => Any,
172				required => 0,
173				);
174
175				has tree =>
176				(
177				default => sub{return ''},
178				is => 'rw',
179				isa => Any,
180				required => 0,
181				);
182
183				has text =>
184				(
185				default => sub{return \''}, # Use ' in comment for UltraEdit.
186				is => 'rw',
187				isa => ScalarRef[Str],
188				required => 0,
189				);
190
191				has uid =>
192				(
193				default => sub{return 0},
194				is => 'rw',
195				isa => Int,
196				required => 0,
197				);
198
199				our $VERSION = '1.07';
200
201				# ------------------------------------------------
202
203				sub BUILD
204				{
205				my($self) = @_;
206
207				# Policy: Event names are always the same as the name of the corresponding lexeme.
208				#
209				# Note: Tokens of the form '_xxx_' are replaced just below, with values returned
210				# by the call to validate_open_close().
211
212				my($bnf) = <<'END_OF_GRAMMAR';
213
214				:default ::= action => [values]
215
216				lexeme default = latm => 1
217
218				:start ::= input_text
219
220				input_text ::= input_string*
221
222				input_string ::= quoted_text
223				\| unquoted_text
224
225				quoted_text ::= open_delim input_text close_delim
226
227				unquoted_text ::= text
228
229				# Lexemes in alphabetical order.
230
231				delimiter_char ~ [_delimiter_]
232
233				:lexeme ~ close_delim pause => before event => close_delim
234				_close_
235
236				escaped_char ~ '_escape_char_' delimiter_char # Use ' in comment for UltraEdit.
237
238				# Warning: Do not add '+' to this set, even though it speeds up things.
239				# The problem is that the set then gobbles up any '\', so the following
240				# character is no longer recognized as being escaped.
241				# Trapping the exception then generated would be possible.
242
243				non_quote_char ~ [^_delimiter_] # Use " in comment for UltraEdit.
244
245				:lexeme ~ open_delim pause => before event => open_delim
246				_open_
247
248				:lexeme ~ text pause => before event => text
249				text ~ escaped_char
250				\| non_quote_char
251				END_OF_GRAMMAR
252
253				my($hashref) = $self -> _validate_open_close;
254				$bnf =~ s/_open_/$$hashref{_open_}/;
255				$bnf =~ s/_close_/$$hashref{_close_}/;
256				$bnf =~ s/_delimiter_/$$hashref{_delimiter_}/g;
257				my($escape_char) = $self -> escape_char;
258
259				if ($escape_char eq "'")
260				{
261				my($message) = 'Single-quote is forbidden as an escape character';
262
263				$self -> error_message($message);
264				$self -> error_number(7);
265
266				# This 'die' is not inside try{}catch{}, so we add the prefix 'Error: '.
267
268				die "Error: $message\n";
269				}
270
271				$bnf =~ s/_escape_char_/$escape_char/g;
272
273				$self -> bnf($bnf);
274				$self -> grammar
275				(
276				Marpa::R2::Scanless::G -> new
277				({
278				source => \$self -> bnf
279				})
280				);
281
282				# This hash does not contain the key "'exhausted" because the exhaustion
283				# event is everywhere handled explicitly. Yes, it has a leading quote.
284
285				my(%event);
286
287				for my $line (split(/\n/, $self -> bnf) )
288				{
289				$event{$1} = 1 if ($line =~ /event\s+=>\s+(\w+)/);
290				}
291
292				$self -> known_events(\%event);
293
294				} # End of BUILD.
295
296				# ------------------------------------------------
297
298				sub _add_daughter
299				{
300				my($self, $name, $attributes) = @_;
301				$attributes = {%$attributes, uid => $self -> uid($self -> uid + 1)};
302				my($stack) = $self -> node_stack;
303				my($node) = Tree -> new($name);
304
305				$node -> meta($attributes);
306
307				$$stack[$#$stack] -> add_child({}, $node);
308
309				} # End of _add_daughter.
310
311				# ------------------------------------------------
312
313				sub next_few_chars
314				{
315				my($self, $stringref, $offset) = @_;
316				my($s) = substr($$stringref, $offset, $self -> next_few_limit);
317				$s =~ tr/\n/ /;
318				$s =~ s/^\s+//;
319				$s =~ s/\s+$//;
320
321				return $s;
322
323				} # End of next_few_chars.
324
325				# ------------------------------------------------
326
327				sub parse
328				{
329				my($self, %opts) = @_;
330
331				# Emulate parts of new(), which makes things a bit earier for the caller.
332
333				$self -> options($opts{options}) if (defined $opts{options});
334				$self -> text($opts{text}) if (defined $opts{text});
335				$self -> pos($opts{pos}) if (defined $opts{pos});
336				$self -> length($opts{length}) if (defined $opts{length});
337
338				$self -> recce
339				(
340				Marpa::R2::Scanless::R -> new
341				({
342				exhaustion => 'event',
343				grammar => $self -> grammar,
344				ranking_method => 'high_rule_only',
345				})
346				);
347
348				# Since $self -> node_stack has not been initialized yet,
349				# we can't call _add_daughter() until after this statement.
350
351				$self -> uid(0);
352				$self -> tree(Tree -> new('root') );
353				$self -> tree -> meta({text => '', uid => $self -> uid});
354				$self -> node_stack([$self -> tree -> root]);
355
356				# Return 0 for success and 1 for failure.
357
358				my($result) = 0;
359
360				my($message);
361
362				try
363				{
364				if (defined (my $value = $self -> _process) )
365				{
366				}
367				else
368				{
369				$result = 1;
370
371				print "Error: Parse failed\n" if ($self -> options & print_errors);
372				}
373				}
374				catch
375				{
376				$result = 1;
377
378				print "Error: Parse failed. ${_}" if ($self -> options & print_errors);
379				};
380
381				# Return 0 for success and 1 for failure.
382
383				return $result;
384
385				} # End of parse.
386
387				# ------------------------------------------------
388
389				sub _pop_node_stack
390				{
391				my($self) = @_;
392				my($stack) = $self -> node_stack;
393
394				pop @$stack;
395
396				$self -> node_stack($stack);
397
398				} # End of _pop_node_stack.
399
400				# ------------------------------------------------
401
402				sub _process
403				{
404				my($self) = @_;
405				my($stringref) = $self -> text \|\| \''; # Allow for undef. Use ' in comment for UltraEdit.
406				my($pos) = $self -> pos;
407				my($first_pos) = $pos;
408				my($total_length) = length($$stringref);
409				my($length) = $self -> length \|\| $total_length;
410				my($text) = '';
411				my($format) = "%-20s %5s %5s %5s %-20s %-20s\n";
412				my($last_event) = '';
413				my($matching_delimiter) = $self -> matching_delimiter;
414
415				if ($self -> options & print_debugs)
416				{
417				print "Length of input: $length. Input \|$$stringref\|\n";
418				print sprintf($format, 'Event', 'Start', 'Span', 'Pos', 'Lexeme', 'Comment');
419				}
420
421				my($delimiter_frequency, $delimiter_stack);
422				my($event_name);
423				my($lexeme);
424				my($message);
425				my($original_lexeme);
426				my($span, $start);
427				my($tos);
428
429				# We use read()/lexeme_read()/resume() because we pause at each lexeme.
430				# Also, in read(), we use $pos and $length to avoid reading Ruby Slippers tokens (if any).
431				# For the latter, see scripts/match.parentheses.02.pl in MarpaX::Demo::SampleScripts.
432
433				for
434				(
435				$pos = $self -> recce -> read($stringref, $pos, $length);
436				($pos < $total_length) && ( ($pos - $first_pos) <= $length);
437				$pos = $self -> recce -> resume($pos)
438				)
439				{
440				$delimiter_frequency = $self -> delimiter_frequency;
441				$delimiter_stack = $self -> delimiter_stack;
442				($start, $span) = $self -> recce -> pause_span;
443				($event_name, $span, $pos) = $self -> _validate_event($stringref, $start, $span, $pos, $delimiter_frequency);
444
445				# If the input is exhausted, we exit immediately so we don't try to use
446				# the values of $start, $span or $pos. They are ignored upon exit.
447
448				last if ($event_name eq "'exhausted"); # Yes, it has a leading quote.
449
450				$lexeme = $self -> recce -> literal($start, $span);
451				$original_lexeme = $lexeme;
452				$pos = $self -> recce -> lexeme_read($event_name);
453
454				die "lexeme_read($event_name) rejected lexeme \|$lexeme\|\n" if (! defined $pos);
455
456				print sprintf($format, $event_name, $start, $span, $pos, $lexeme, '-') if ($self -> options & print_debugs);
457
458				if ($event_name ne 'text')
459				{
460				$self -> _save_text($text);
461
462				$text = '';
463				}
464
465				if ($event_name eq 'close_delim')
466				{
467				$$delimiter_frequency{$lexeme}--;
468
469				$self -> delimiter_frequency($delimiter_frequency);
470
471				$tos = pop @$delimiter_stack;
472
473				$self -> delimiter_stack($delimiter_stack);
474
475				# If the top of the delimiter stack is not the lexeme corresponding to the
476				# opening delimiter of the current closing delimiter, then there's an error.
477
478				if ($$matching_delimiter{$$tos{lexeme} } ne $lexeme)
479				{
480				$message = "Last open delimiter: $$tos{lexeme}. Unexpected closing delimiter: $lexeme";
481
482				$self -> error_message($message);
483				$self -> error_number(1);
484
485				# This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
486
487				die "$message\n" if ($self -> options & overlap_is_fatal);
488
489				# If we did not die, then it's a warning message.
490
491				$self -> error_number(-1);
492
493				print "Warning: $message\n" if ($self -> options & print_warnings);
494				}
495
496				$self -> _pop_node_stack;
497				$self -> _add_daughter('close', {text => $lexeme});
498				}
499				elsif ($event_name eq 'open_delim')
500				{
501				$$delimiter_frequency{$$matching_delimiter{$lexeme} }++;
502
503				# If the top of the delimiter stack reaches 2, then there's an error.
504				# Unlike mismatched delimiters (just above), this is never gets a warning.
505
506				if ($$delimiter_frequency{$$matching_delimiter{$lexeme} } > 1)
507				{
508				$message = "Opened delimiter $lexeme again before closing previous one";
509
510				$self -> error_message($message);
511				$self -> error_number(2);
512
513				# This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
514
515				die "$message\n" if ($self -> options & nesting_is_fatal);
516
517				# If we did not die, then it's a warning message.
518
519				$self -> error_number(-2);
520
521				print "Warning: $message\n" if ($self -> options & print_warnings);
522				}
523
524				push @$delimiter_stack,
525				{
526				count => $$delimiter_frequency{$$matching_delimiter{$lexeme} },
527				lexeme => $lexeme,
528				};
529
530				$self -> delimiter_frequency($delimiter_frequency);
531				$self -> delimiter_stack($delimiter_stack);
532				$self -> _add_daughter('open', {text => $lexeme});
533				$self -> _push_node_stack;
534				}
535				elsif ($event_name eq 'text')
536				{
537				$text .= $lexeme;
538				}
539
540				$last_event = $event_name;
541				}
542
543				# Mop up any left-over chars.
544
545				$self -> _save_text($text);
546
547				if ($self -> recce -> exhausted)
548				{
549				$message = 'Parse exhausted';
550
551				$self -> error_message($message);
552				$self -> error_number(6);
553
554				if ($self -> options & exhaustion_is_fatal)
555				{
556				# This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
557
558				die "$message\n";
559				}
560				else
561				{
562				$self -> error_number(-6);
563
564				print "Warning: $message\n" if ($self -> options & print_warnings);
565				}
566				}
567				elsif (my $status = $self -> recce -> ambiguous)
568				{
569				my($terminals) = $self -> recce -> terminals_expected;
570				$terminals = ['(None)'] if ($#$terminals < 0);
571				$message = "Ambiguous parse. Status: $status. Terminals expected: " . join(', ', @$terminals);
572
573				$self -> error_message($message);
574				$self -> error_number(3);
575
576				if ($self -> options & ambiguity_is_fatal)
577				{
578				# This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
579
580				die "$message\n";
581				}
582				elsif ($self -> options & print_warnings)
583				{
584				$self -> error_number(-3);
585
586				print "Warning: $message\n";
587				}
588				}
589
590				# Return a defined value for success and undef for failure.
591
592				return $self -> recce -> value;
593
594				} # End of _process.
595
596				# ------------------------------------------------
597
598				sub _push_node_stack
599				{
600				my($self) = @_;
601				my($stack) = $self -> node_stack;
602				my(@daughters) = $$stack[$#$stack] -> children;
603
604				push @$stack, $daughters[$#daughters];
605
606				$self -> node_stack($stack);
607
608				} # End of _push_node_stack.
609
610				# ------------------------------------------------
611
612				sub _save_text
613				{
614				my($self, $text) = @_;
615
616				$self -> _add_daughter('text', {text => $text}) if (length($text) );
617
618				return '';
619
620				} # End of _save_text.
621
622				# ------------------------------------------------
623
624				sub _validate_event
625				{
626				my($self, $stringref, $start, $span, $pos, $delimiter_frequency) = @_;
627				my(@event) = @{$self -> recce -> events};
628				my($event_count) = scalar @event;
629				my(@event_name) = sort map{$$_[0]} @event;
630				my($event_name) = $event_name[0]; # Default.
631
632				# If the input is exhausted, we return immediately so we don't try to use
633				# the values of $start, $span or $pos. They are ignored upon return.
634
635				if ($event_name eq "'exhausted") # Yes, it has a leading quote.
636				{
637				return ($event_name, $span, $pos);
638				}
639
640				my($lexeme) = substr($$stringref, $start, $span);
641				my($line, $column) = $self -> recce -> line_column($start);
642				my($literal) = $self -> next_few_chars($stringref, $start + $span);
643				my($message) = "Location: ($line, $column). Lexeme: \|$lexeme\|. Next few chars: \|$literal\|";
644				$message = "$message. Events: $event_count. Names: ";
645
646				print $message, join(', ', @event_name), "\n" if ($self -> options & print_debugs);
647
648				my(%event_name);
649
650				@event_name{@event_name} = (1) x @event_name;
651
652				for (@event_name)
653				{
654				if (! ${$self -> known_events}{$_})
655				{
656				$message = "Unexpected event name '$_'";
657
658				$self -> error_message($message);
659				$self -> error_number(10);
660
661				# This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
662
663				die "$message\n";
664				}
665				}
666
667				if ($event_count > 1)
668				{
669				# We get here for single and double quotes because an open s. or d. quote is
670				# indistinguishable from a close s. or d. quote, and that leads to ambiguity.
671
672				if ( ($lexeme =~ /["']/) && (join(', ', @event_name) eq 'close_delim, open_delim') ) # ".
673				{
674				# At the time _validate_event() is called, the quote count has not yet been bumped.
675				# If this is the 1st quote, then it's an open_delim.
676				# If this is the 2nd quote, them it's a close delim.
677
678				if ($$delimiter_frequency{$lexeme} % 2 == 0)
679				{
680				$event_name = 'open_delim';
681
682				print "Disambiguated lexeme \|$lexeme\| as '$event_name'\n" if ($self -> options & print_debugs);
683				}
684				else
685				{
686				$event_name = 'close_delim';
687
688				print "Disambiguated lexeme \|$lexeme\| as '$event_name'\n" if ($self -> options & print_debugs);
689				}
690				}
691				else
692				{
693				$message = join(', ', @event_name);
694				$message = "The code does not handle these events simultaneously: $message";
695
696				$self -> error_message($message);
697				$self -> error_number(11);
698
699				# This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
700
701				die "$message\n";
702				}
703				}
704
705				return ($event_name, $span, $pos);
706
707				} # End of _validate_event.
708
709				# ------------------------------------------------
710
711				sub _validate_open_close
712				{
713				my($self) = @_;
714				my($open) = $self -> open;
715				my($close) = $self -> close;
716
717				my($message);
718
719				if ( ($#$open < 0) \|\| ($#$close < 0) )
720				{
721				$message = 'There must be at least 1 pair of open/close delimiters';
722
723				$self -> error_message($message);
724				$self -> error_number(8);
725
726				# This 'die' is not inside try{}catch{}, so we add the prefix 'Error: '.
727
728				die "Error: $message\n";
729				}
730
731				if ($#$open != $#$close)
732				{
733				$message = 'The # of open delimiters must match the # of close delimiters';
734
735				$self -> error_message($message);
736				$self -> error_number(9);
737
738				# This 'die' is not inside try{}catch{}, so we add the prefix 'Error: '.
739
740				die "Error: $message\n";
741				}
742
743				my(%substitute) = (_close_ => '', _delimiter_ => '', _open_ => '');
744				my($matching_delimiter) = {};
745				my(%seen) = (close => {}, open => {});
746
747				my($close_quote);
748				my(%delimiter_action, %delimiter_frequency);
749				my($open_quote);
750				my($prefix, %prefix);
751
752				for my $i (0 .. $#$open)
753				{
754				if ( ($$open[$i] =~ /\\/) \|\| ($$close[$i] =~ /\\/) )
755				{
756				$message = 'Backslash is forbidden as a delimiter character';
757
758				$self -> error_message($message);
759				$self -> error_number(4);
760
761				# This 'die' is not inside try{}catch{}, so we add the prefix 'Error: '.
762
763				die "Error: $message\n";
764				}
765
766				if ( ( (length($$open[$i]) > 1) && ($$open[$i] =~ /'/) ) \|\| ( (length($$close[$i]) > 1) && ($$close[$i] =~ /'/) ) )
767				{
768				$message = 'Single-quotes are forbidden in multi-character delimiters';
769
770				$self -> error_message($message);
771				$self -> error_number(5);
772
773				# This 'die' is not inside try{}catch{}, so we add the prefix 'Error: '.
774
775				die "Error: $message\n";
776				}
777
778				$seen{open}{$$open[$i]} = 0 if (! $seen{open}{$$open[$i]});
779				$seen{close}{$$close[$i]} = 0 if (! $seen{close}{$$close[$i]});
780
781				$seen{open}{$$open[$i]}++;
782				$seen{close}{$$close[$i]}++;
783
784				$delimiter_action{$$open[$i]} = 'open';
785				$delimiter_action{$$close[$i]} = 'close';
786				$$matching_delimiter{$$open[$i]} = $$close[$i];
787				$delimiter_frequency{$$open[$i]} = 0;
788				$delimiter_frequency{$$close[$i]} = 0;
789
790				if (length($$open[$i]) == 1)
791				{
792				$open_quote = $$open[$i] eq '[' ? "[\\$$open[$i]]" : "[$$open[$i]]";
793				}
794				else
795				{
796				# This fails if length > 1 and open contains a single quote.
797
798				$open_quote = "'$$open[$i]'";
799				}
800
801				if (length($$close[$i]) == 1)
802				{
803				$close_quote = $$close[$i] eq ']' ? "[\\$$close[$i]]" : "[$$close[$i]]";
804				}
805				else
806				{
807				# This fails if length > 1 and close contains a single quote.
808
809				$close_quote = "'$$close[$i]'";
810				}
811
812				$substitute{_open_} .= "open_delim\t\t\t\~ $open_quote\n" if ($seen{open}{$$open[$i]} <= 1);
813				$substitute{_close_} .= "close_delim\t\t\t\~ $close_quote\n" if ($seen{close}{$$close[$i]} <= 1);
814				$prefix = substr($$open[$i], 0, 1);
815				$prefix = "\\$prefix" if ($prefix =~ /[\[\]]/);
816				$prefix{$prefix} = 0 if (! $prefix{$prefix});
817
818				$prefix{$prefix}++;
819
820				$substitute{_delimiter_} .= $prefix if ($prefix{$prefix} == 1);
821				$prefix = substr($$close[$i], 0, 1);
822				$prefix = "\\$prefix" if ($prefix =~ /[\[\]]/);
823				$prefix{$prefix} = 0 if (! $prefix{$prefix});
824
825				$prefix{$prefix}++;
826
827				$substitute{_delimiter_} .= $prefix if ($prefix{$prefix} == 1);
828				}
829
830				$self -> delimiter_action(\%delimiter_action);
831				$self -> delimiter_frequency(\%delimiter_frequency);
832				$self -> matching_delimiter($matching_delimiter);
833
834				return \%substitute;
835
836				} # End of _validate_open_close.
837
838				# ------------------------------------------------
839
840				1;
841
842				=pod
843
844				=head1 NAME
845
846				C - Extract delimited text sequences from strings
847
848				=head1 Synopsis
849
850				#!/usr/bin/env perl
851
852				use strict;
853				use warnings;
854
855				use Text::Balanced::Marpa ':constants';
856
857				# -----------
858
859				my($count) = 0;
860				my($parser) = Text::Balanced::Marpa -> new
861				(
862				open => ['<:' ,'[%'],
863				close => [':>', '%]'],
864				options => nesting_is_fatal \| print_warnings,
865				);
866				my(@text) =
867				(
868				q\|<: a :>\|,
869				q\|a [% b <: c :> d %] e\|,
870				q\|a <: b <: c :> d :> e\|, # nesting_is_fatal triggers an error here.
871				);
872
873				my($result);
874
875				for my $text (@text)
876				{
877				$count++;
878
879				print "Parsing \|$text\|\n";
880
881				$result = $parser -> parse(text => \$text);
882
883				print join("\n", @{$parser -> tree -> tree2string}), "\n";
884				print "Parse result: $result (0 is success)\n";
885
886				if ($count == 3)
887				{
888				print "Deliberate error: Failed to parse \|$text\|\n";
889				print 'Error number: ', $parser -> error_number, '. Error message: ',
890				$parser -> error_message, "\n";
891				}
892
893				print '-' x 50, "\n";
894				}
895
896				See scripts/synopsis.pl.
897
898				This is the printout of synopsis.pl:
899
900				Parsing \|<: a :>\|
901				Parsed text:
902				root. Attributes: {}
903				\|--- open. Attributes: {text => "<:"}
904				\| \|--- string. Attributes: {text => " a "}
905				\|--- close. Attributes: {text => ":>"}
906				Parse result: 0 (0 is success)
907				--------------------------------------------------
908				Parsing \|a [% b <: c :> d %] e\|
909				Parsed text:
910				root. Attributes: {}
911				\|--- string. Attributes: {text => "a "}
912				\|--- open. Attributes: {text => "[%"}
913				\| \|--- string. Attributes: {text => " b "}
914				\| \|--- open. Attributes: {text => "<:"}
915				\| \| \|--- string. Attributes: {text => " c "}
916				\| \|--- close. Attributes: {text => ":>"}
917				\| \|--- string. Attributes: {text => " d "}
918				\|--- close. Attributes: {text => "%]"}
919				\|--- string. Attributes: {text => " e"}
920				Parse result: 0 (0 is success)
921				--------------------------------------------------
922				Parsing \|a <: b <: c :> d :> e\|
923				Error: Parse failed. Opened delimiter <: again before closing previous one
924				Text parsed so far:
925				root. Attributes: {}
926				\|--- string. Attributes: {text => "a "}
927				\|--- open. Attributes: {text => "<:"}
928				\|--- string. Attributes: {text => " b "}
929				Parse result: 1 (0 is success)
930				Deliberate error: Failed to parse \|a <: b <: c :> d :> e\|
931				Error number: 2. Error message: Opened delimiter <: again before closing previous one
932				--------------------------------------------------
933
934				See also scripts/tiny.pl and scripts/traverse.pl.
935
936				=head1 Description
937
938				L provides a L-based parser for extracting delimited text
939				sequences from strings. The text outside and inside the delimiters, and delimiters themselves, are
940				all stored as nodes in a tree managed by L.
941
942				Nested strings, with the same or different delimiters, are stored as daughters of the nodes which
943				hold the delimiters.
944
945				This module is a companion to L. The differences are discussed in the L
946				below.
947
948				See the L for various topics, including:
949
950				=over 4
951
952				=item o UFT8 handling
953
954				See t/utf8.t.
955
956				=item o Escaping delimiters within the text
957
958				See t/escapes.t.
959
960				=item o Options to make nested and/or overlapped delimiters fatal errors
961
962				See t/colons.t.
963
964				=item o Using delimiters which are part of another delimiter
965
966				See t/escapes.t and t/perl.delimiters.
967
968				=item o Processing the tree-structured output
969
970				See scripts/traverse.pl.
971
972				=item o Emulating L's use of '<:' and ':>
973
974				See t/colons.t and t/percents.t.
975
976				=item o Implementing a really trivial HTML parser
977
978				See t/html.t.
979
980				In the same vein, see t/angle.brackets.t, for code where the delimiters are just '<' and '>'.
981
982				=item o Handling multiple sets of delimiters
983
984				See t/multiple.delimiters.t.
985
986				=item o Skipping (leading) characters in the input string
987
988				See t/skip.prefix.t.
989
990				=item o Implementing hard-to-read text strings as delimiters
991
992				See t/silly.delimiters.
993
994				=back
995
996				=head1 Distributions
997
998				This module is available as a Unix-style distro (*.tgz).
999
1000				See L
1001				for help on unpacking and installing distros.
1002
1003				=head1 Installation
1004
1005				Install L as you would any C module:
1006
1007				Run:
1008
1009				cpanm Text::Balanced::Marpa
1010
1011				or run:
1012
1013				sudo cpan Text::Balanced::Marpa
1014
1015				or unpack the distro, and then either:
1016
1017				perl Build.PL
1018				./Build
1019				./Build test
1020				sudo ./Build install
1021
1022				or:
1023
1024				perl Makefile.PL
1025				make (or dmake or nmake)
1026				make test
1027				make install
1028
1029				=head1 Constructor and Initialization
1030
1031				C is called as C<< my($parser) = Text::Balanced::Marpa -> new(k1 => v1, k2 => v2, ...) >>.
1032
1033				It returns a new object of type C.
1034
1035				Key-value pairs accepted in the parameter list (see corresponding methods for details
1036				[e.g. L]):
1037
1038				=over 4
1039
1040				=item o close => $arrayref
1041
1042				An arrayref of strings, each one a closing delimiter.
1043
1044				The # of elements must match the # of elements in the 'open' arrayref.
1045
1046				See the L for details and warnings.
1047
1048				A value for this option is mandatory.
1049
1050				Default: None.
1051
1052				=item o length => $integer
1053
1054				The maxiumum length of the input string to process.
1055
1056				This parameter works in conjunction with the C parameter.
1057
1058				C can also be used as a key in the hash passed to L.
1059
1060				See the L for details.
1061
1062				Default: Calls Perl's length() function on the input string.
1063
1064				=item o next_few_limit => $integer
1065
1066				This controls how many characters are printed when displaying 'the next few chars'.
1067
1068				It only affects debug output.
1069
1070				Default: 20.
1071
1072				=item o open => $arrayref
1073
1074				An arrayref of strings, each one an opening delimiter.
1075
1076				The # of elements must match the # of elements in the 'open' arrayref.
1077
1078				See the L for details and warnings.
1079
1080				A value for this option is mandatory.
1081
1082				Default: None.
1083
1084				=item o options => $bit_string
1085
1086				This allows you to turn on various options.
1087
1088				C can also be used as a key in the hash passed to L.
1089
1090				Default: 0 (nothing is fatal).
1091
1092				See the L for details.
1093
1094				=item o pos => $integer
1095
1096				The offset within the input string at which to start processing.
1097
1098				This parameter works in conjunction with the C parameter.
1099
1100				C can also be used as a key in the hash passed to L.
1101
1102				See the L for details.
1103
1104				Note: The first character in the input string is at pos == 0.
1105
1106				Default: 0.
1107
1108				=item o text => $stringref
1109
1110				This is a reference to the string to be parsed. A stringref is used to avoid copying what could
1111				potentially be a very long string.
1112
1113				C can also be used as a key in the hash passed to L.
1114
1115				Default: \''.
1116
1117				=back
1118
1119				=head1 Methods
1120
1121				=head2 bnf()
1122
1123				Returns a string containing the grammar constructed based on user input.
1124
1125				=head2 close()
1126
1127				Get the arrayref of closing delimiters.
1128
1129				See also L.
1130
1131				See the L for details and warnings.
1132
1133				'close' is a parameter to L. See L for details.
1134
1135				=head2 delimiter_action()
1136
1137				Returns a hashref, where the keys are delimiters and the values are either 'open' or 'close'.
1138
1139				=head2 delimiter_frequency()
1140
1141				Returns a hashref where the keys are opening and closing delimiters, and the values are the # of
1142				times each delimiter appears in the input stream.
1143
1144				The value is incremented for each opening delimiter and decremented for each closing delimiter.
1145
1146				=head2 error_message()
1147
1148				Returns the last error or warning message set.
1149
1150				Error messages always start with 'Error: '. Messages never end with "\n".
1151
1152				Parsing error strings is not a good idea, ever though this module's format for them is fixed.
1153
1154				See L.
1155
1156				=head2 error_number()
1157
1158				Returns the last error or warning number set.
1159
1160				Warnings have values < 0, and errors have values > 0.
1161
1162				If the value is > 0, the message has the prefix 'Error: ', and if the value is < 0, it has the
1163				prefix 'Warning: '. If this is not the case, it's a reportable bug.
1164
1165				Possible values for error_number() and error_message():
1166
1167				=over 4
1168
1169				=item o 0 => ""
1170
1171				This is the default value.
1172
1173				=item o 1/-1 => "Last open delimiter: $lexeme_1. Unexpected closing delimiter: $lexeme_2"
1174
1175				If L returns 1, it's an error, and if it returns -1 it's a warning.
1176
1177				You can set the option C to make it fatal.
1178
1179				=item o 2/-2 => "Opened delimiter $lexeme again before closing previous one"
1180
1181				If L returns 2, it's an error, and if it returns -2 it's a warning.
1182
1183				You can set the option C to make it fatal.
1184
1185				=item o 3/-3 => "Ambiguous parse. Status: $status. Terminals expected: a, b, ..."
1186
1187				This message is only produced when the parse is ambiguous.
1188
1189				If L returns 3, it's an error, and if it returns -3 it's a warning.
1190
1191				You can set the option C to make it fatal.
1192
1193				=item o 4 => "Backslash is forbidden as a delimiter character"
1194
1195				This preempts some types of sabotage.
1196
1197				This message can never be just a warning message.
1198
1199				=item o 5 => "Single-quotes are forbidden in multi-character delimiters"
1200
1201				This limitation is due to the syntax of
1202				L.
1203
1204				This message can never be just a warning message.
1205
1206				=item o 6/-6 => "Parse exhausted"
1207
1208				If L returns 6, it's an error, and if it returns -6 it's a warning.
1209
1210				You can set the option C to make it fatal.
1211
1212				=item o 7 => 'Single-quote is forbidden as an escape character'
1213
1214				This limitation is due to the syntax of
1215				L.
1216
1217				This message can never be just a warning message.
1218
1219				=item o 8 => "There must be at least 1 pair of open/close delimiters"
1220
1221				This message can never be just a warning message.
1222
1223				=item o 9 => "The # of open delimiters must match the # of close delimiters"
1224
1225				This message can never be just a warning message.
1226
1227				=item o 10 => "Unexpected event name 'xyz'"
1228
1229				Marpa has trigged an event and it's name is not in the hash of event names derived from the BNF.
1230
1231				This message can never be just a warning message.
1232
1233				=item o 11 => "The code does not handle these events simultaneously: a, b, ..."
1234
1235				The code is written to handle single events at a time, or in rare cases, 2 events at the same time.
1236				But here, multiple events have been triggered and the code cannot handle the given combination.
1237
1238				This message can never be just a warning message.
1239
1240				=back
1241
1242				See L.
1243
1244				=head2 escape_char()
1245
1246				Get the escape char.
1247
1248				=head2 known_events()
1249
1250				Returns a hashref where the keys are event names and the values are 1.
1251
1252				=head2 length([$integer])
1253
1254				Here, the [] indicate an optional parameter.
1255
1256				Get or set the length of the input string to process.
1257
1258				See also the L and L.
1259
1260				'length' is a parameter to L. See L for details.
1261
1262				=head2 matching_delimiter()
1263
1264				Returns a hashref where the keys are opening delimiters and the values are the corresponding closing
1265				delimiters.
1266
1267				=head2 new()
1268
1269				See L for details on the parameters accepted by L.
1270
1271				=head2 next_few_chars($stringref, $offset)
1272
1273				Returns a substring of $s, starting at $offset, for use in debug messages.
1274
1275				See L.
1276
1277				=head2 next_few_limit([$integer])
1278
1279				Here, the [] indicate an optional parameter.
1280
1281				Get or set the number of characters called 'the next few chars', which are printed during debugging.
1282
1283				'next_few_limit' is a parameter to L. See L for details.
1284
1285				=head2 open()
1286
1287				Get the arrayref of opening delimiters.
1288
1289				See also L.
1290
1291				See the L for details and warnings.
1292
1293				'open' is a parameter to L. See L for details.
1294
1295				=head2 options([$bit_string])
1296
1297				Here, the [] indicate an optional parameter.
1298
1299				Get or set the option flags.
1300
1301				For typical usage, see scripts/synopsis.pl.
1302
1303				See the L for details.
1304
1305				'options' is a parameter to L. See L for details.
1306
1307				=head2 parse([%hash])
1308
1309				Here, the [] indicate an optional parameter.
1310
1311				This is the only method the user needs to call. All data can be supplied when calling L.
1312
1313				You can of course call other methods (e.g. L ) after calling L but
1314				before calling C.
1315
1316				The optional hash takes these ($key => $value) pairs (exactly the same as for L):
1317
1318				=over 4
1319
1320				=item o length => $integer
1321
1322				=item o options => $bit_string
1323
1324				=item o pos => $integer
1325
1326				=item o text => $stringref
1327
1328				=back
1329
1330				Note: If a value is passed to C, it takes precedence over any value with the same
1331				key passed to L, and over any value previously passed to the method whose name is $key.
1332				Further, the value passed to C is always passed to the corresponding method (i.e. whose
1333				name is $key), meaning any subsequent call to that method returns the value passed to C.
1334
1335				See scripts/samples.pl.
1336
1337				Returns 0 for success and 1 for failure.
1338
1339				If the value is 1, you should call L to find out what happened.
1340
1341				=head2 pos([$integer])
1342
1343				Here, the [] indicate an optional parameter.
1344
1345				Get or set the offset within the input string at which to start processing.
1346
1347				See also the L and L.
1348
1349				'pos' is a parameter to L. See L for details.
1350
1351				=head2 text([$stringref])
1352
1353				Here, the [] indicate an optional parameter.
1354
1355				Get or set a reference to the string to be parsed.
1356
1357				'text' is a parameter to L. See L for details.
1358
1359				=head2 tree()
1360
1361				Returns an object of type L, which holds the parsed data.
1362
1363				Obviously, it only makes sense to call C after calling C.
1364
1365				See scripts/traverse.pl for sample code which processes this tree's nodes.
1366
1367				=head1 FAQ
1368
1369				=head2 What are the differences between Text::Balanced::Marpa and Text::Delimited::Marpa?
1370
1371				I think this is shown most clearly by getting the 2 modules to process the same string. So,
1372				using this as input:
1373
1374				'a <:b <:c:> d:> e <:f <: g <:h:> i:> j:> k'
1375
1376				Output from Text::Balanced::Marpa's scripts/tiny.pl:
1377
1378				(# 2) \| 1 2 3 4 5 6 7 8 9
1379				\|0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
1380				Parsing \|Skip me ->a <:b <:c:> d:> e <:f <: g <:h:> i:> j:> k\|. pos: 10. length: 42
1381				Parse result: 0 (0 is success)
1382				root. Attributes: {text => "", uid => "0"}
1383				\|--- text. Attributes: {text => "a ", uid => "1"}
1384				\|--- open. Attributes: {text => "<:", uid => "2"}
1385				\| \|--- text. Attributes: {text => "b ", uid => "3"}
1386				\| \|--- open. Attributes: {text => "<:", uid => "4"}
1387				\| \| \|--- text. Attributes: {text => "c", uid => "5"}
1388				\| \|--- close. Attributes: {text => ":>", uid => "6"}
1389				\| \|--- text. Attributes: {text => " d", uid => "7"}
1390				\|--- close. Attributes: {text => ":>", uid => "8"}
1391				\|--- text. Attributes: {text => " e ", uid => "9"}
1392				\|--- open. Attributes: {text => "<:", uid => "10"}
1393				\| \|--- text. Attributes: {text => "f ", uid => "11"}
1394				\| \|--- open. Attributes: {text => "<:", uid => "12"}
1395				\| \| \|--- text. Attributes: {text => " g ", uid => "13"}
1396				\| \| \|--- open. Attributes: {text => "<:", uid => "14"}
1397				\| \| \| \|--- text. Attributes: {text => "h", uid => "15"}
1398				\| \| \|--- close. Attributes: {text => ":>", uid => "16"}
1399				\| \| \|--- text. Attributes: {text => " i", uid => "17"}
1400				\| \|--- close. Attributes: {text => ":>", uid => "18"}
1401				\| \|--- text. Attributes: {text => " j", uid => "19"}
1402				\|--- close. Attributes: {text => ":>", uid => "20"}
1403				\|--- text. Attributes: {text => " k", uid => "21"}
1404
1405				Output from Text::Delimited::Marpa's scripts/tiny.pl:
1406
1407				(# 2) \| 1 2 3 4 5 6 7 8 9
1408				\|0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
1409				Parsing \|Skip me ->a <:b <:c:> d:> e <:f <: g <:h:> i:> j:> k\|. pos: 10. length: 42
1410				Parse result: 0 (0 is success)
1411				root. Attributes: {end => "0", length => "0", start => "0", text => "", uid => "0"}
1412				\|--- span. Attributes: {end => "22", length => "9", start => "14", text => "b <:c:> d", uid => "1"}
1413				\| \|--- span. Attributes: {end => "18", length => "1", start => "18", text => "c", uid => "2"}
1414				\|--- span. Attributes: {end => "47", length => "18", start => "30", text => "f <: g <:h:> i:> j", uid => "3"}
1415				\|--- span. Attributes: {end => "43", length => "10", start => "34", text => " g <:h:> i", uid => "4"}
1416				\|--- span. Attributes: {end => "39", length => "1", start => "39", text => "h", uid => "5"}
1417
1418				Another example, using the same input string, but manually processing the tree nodes.
1419				Parent-daughter relationships are here represented by indentation.
1420
1421				Output from Text::Balanced::Marpa's scripts/traverse.pl:
1422
1423				\| 1 2 3 4 5
1424				\|012345678901234567890123456789012345678901234567890
1425				Parsing \|a <:b <:c:> d:> e <:f <: g <:h:> i:> j:> k\|.
1426				Span Text
1427				1 \|a \|
1428				2 \|<:\|
1429				3 \|b \|
1430				4 \|<:\|
1431				5 \|c\|
1432				6 \|:>\|
1433				7 \| d\|
1434				8 \|:>\|
1435				9 \| e \|
1436				10 \|<:\|
1437				11 \|f \|
1438				12 \|<:\|
1439				13 \| g \|
1440				14 \|<:\|
1441				15 \|h\|
1442				16 \|:>\|
1443				17 \| i\|
1444				18 \|:>\|
1445				19 \| j\|
1446				20 \|:>\|
1447				21 \| k\|
1448
1449				Output from Text::Delimited::Marpa's scripts/traverse.pl:
1450
1451				\| 1 2 3 4 5
1452				\|012345678901234567890123456789012345678901234567890
1453				Parsing \|a <:b <:c:> d:> e <:f <: g <:h:> i:> j:> k\|.
1454				Span Start End Length Text
1455				1 4 12 9 \|b <:c:> d\|
1456				2 8 8 1 \|c\|
1457				3 20 37 18 \|f <: g <:h:> i:> j\|
1458				4 24 33 10 \| g <:h:> i\|
1459				5 29 29 1 \|h\|
1460
1461				=head2 Where are the error messages and numbers described?
1462
1463				See L and L.
1464
1465				=head2 How do I escape delimiters?
1466
1467				By backslash-escaping the first character of all open and close delimiters which appear in the
1468				text.
1469
1470				As an example, if the delimiters are '<:' and ':>', this means you have to escape I the '<'
1471				chars and I the colons in the text.
1472
1473				The backslash is preserved in the output.
1474
1475				If you don't want to use backslash for escaping, or can't, you can pass a different escape character
1476				to L.
1477
1478				See t/escapes.t.
1479
1480				=head2 How do the length and pos parameters to new() work?
1481
1482				The recognizer - an object of type Marpa::R2::Scanless::R - is called in a loop, like this:
1483
1484				for
1485				(
1486				$pos = $self -> recce -> read($stringref, $pos, $length);
1487				$pos < $length;
1488				$pos = $self -> recce -> resume($pos)
1489				)
1490
1491				L and L can be used to initialize $pos and $length.
1492
1493				Note: The first character in the input string is at pos == 0.
1494
1495				See L for details.
1496
1497				=head2 Does this package support Unicode/UTF8?
1498
1499				Yes. See t/escapes.t, t/multiple.quotes.t and t/utf8.t.
1500
1501				=head2 Does this package handler Perl delimiters (e.g. q\|..\|, qq\|..\|, qr/../, qw/../)?
1502
1503				See t/perl.delimiters.t.
1504
1505				=head2 Warning: Calling mutators after calling new()
1506
1507				The only mutator which works after calling new() is L.
1508
1509				In particular, you can't call L, L or L after calling L.
1510				This is because parameters passed to C are interpolated into the grammar before parsing
1511				begins. And that's why the docs for those methods all say 'Get the...' and not 'Get and set the...'.
1512
1513				To make the code work, you would have to manually call _validate_open_close(). But even then
1514				a lot of things would have to be re-initialized to give the code any hope of working.
1515
1516				=head2 What is the format of the 'open' and 'close' parameters to new()?
1517
1518				Each of these parameters takes an arrayref as a value.
1519
1520				The # of elements in the 2 arrayrefs must be the same.
1521
1522				The 1st element in the 'open' arrayref is the 1st user-chosen opening delimiter, and the 1st
1523				element in the 'close' arrayref must be the corresponding closing delimiter.
1524
1525				It is possible to use a delimiter which is part of another delimiter.
1526
1527				See scripts/samples.pl. It uses both '<' and '<:' as opening delimiters and their corresponding
1528				closing delimiters are '>' and ':>'. Neat, huh?
1529
1530				=head2 What are the possible values for the 'options' parameter to new()?
1531
1532				Firstly, to make these constants available, you must say:
1533
1534				use Text::Balanced::Marpa ':constants';
1535
1536				Secondly, more detail on errors and warnings can be found at L.
1537
1538				Thirdly, for usage of these option flags, see t/angle.brackets.t, t/colons.t, t/escapes.t,
1539				t/multiple.quotes.t, t/percents.t and scripts/samples.pl.
1540
1541				Now the flags themselves:
1542
1543				=over 4
1544
1545				=item o nothing_is_fatal
1546
1547				This is the default.
1548
1549				C has the value of 0.
1550
1551				=item o print_errors
1552
1553				Print errors if this flag is set.
1554
1555				C has the value of 1.
1556
1557				=item o print_warnings
1558
1559				Print various warnings if this flag is set:
1560
1561				=over 4
1562
1563				=item o The ambiguity status and terminals expected, if the parse is ambiguous
1564
1565				=item o See L for other warnings which might be printed
1566
1567				Ambiguity is not, in and of itself, an error. But see the C option, below.
1568
1569				=back
1570
1571				It's tempting to call this option C, but Perl already has C, so I didn't.
1572
1573				C has the value of 2.
1574
1575				=item o print_debugs
1576
1577				Print extra stuff if this flag is set.
1578
1579				C has the value of 4.
1580
1581				=item o overlap_is_fatal
1582
1583				This means overlapping delimiters cause a fatal error.
1584
1585				So, setting C means '{Bold [Italic}]' would be a fatal error.
1586
1587				I use this example since it gives me the opportunity to warn you, this will I do what you want
1588				if you try to use the delimiters of '<' and '>' for HTML. That is, 'Bold Italic' is
1589				not an error because what overlap are '' and '' BUT THEY ARE NOT TAGS. The tags are '<' and
1590				'>', ok? See also t/html.t.
1591
1592				C has the value of 8.
1593
1594				=item o nesting_is_fatal
1595
1596				This means nesting of identical opening delimiters is fatal.
1597
1598				So, using C means 'a <: b <: c :> d :> e' would be a fatal error.
1599
1600				C has the value of 16.
1601
1602				=item o ambiguity_is_fatal
1603
1604				This makes L return 3 rather than -3.
1605
1606				C has the value of 32.
1607
1608				=item o exhaustion_is_fatal
1609
1610				This makes L return 6 rather than -6.
1611
1612				C has the value of 64.
1613
1614				=back
1615
1616				=head2 How do I print the tree built by the parser?
1617
1618				See L.
1619
1620				=head2 How do I make use of the tree built by the parser?
1621
1622				See scripts/traverse.pl. It is a copy of t/html.t with tree-walking code instead of test code.
1623
1624				=head2 How is the parsed data held in RAM?
1625
1626				The parsed output is held in a tree managed by L.
1627
1628				The tree always has a root node, which has nothing to do with the input data. So, even an empty
1629				imput string will produce a tree with 1 node. This root has an empty hashref associated with it.
1630
1631				Nodes have a name and a hashref of attributes.
1632
1633				The name indicates the type of node. Names are one of these literals:
1634
1635				=over 4
1636
1637				=item o close
1638
1639				=item o open
1640
1641				=item o root
1642
1643				=item o text
1644
1645				=back
1646
1647				For 'open' and 'close', the delimiter is given by the value of the 'text' key in the hashref.
1648
1649				The (key => value) pairs in the hashref are:
1650
1651				=over 4
1652
1653				=item o text => $string
1654
1655				If the node name is 'open' or 'close', $string is the delimiter.
1656
1657				If the node name is 'text', $string is the verbatim text from the document.
1658
1659				Verbatim means, for example, that backslashes in the input are preserved.
1660
1661				=back
1662
1663				Try:
1664
1665				perl -Ilib scripts/samples.pl info
1666
1667				=head2 How is HTML/XML handled?
1668
1669				The tree does not preserve the nested nature of HTML/XML.
1670
1671				Post-processing (valid) HTML could easily generate another view of the data.
1672
1673				But anyway, to get perfect HTML you'd be grabbing the output of L, right?
1674
1675				See scripts/traverse.pl and t/html.t for a trivial HTML parser.
1676
1677				=head2 What is the homepage of Marpa?
1678
1679				L.
1680
1681				That page has a long list of links.
1682
1683				=head2 How do I run author tests?
1684
1685				This runs both standard and author tests:
1686
1687				shell> perl Build.PL; ./Build; ./Build authortest
1688
1689				=head1 TODO
1690
1691				=over 4
1692
1693				=item o Advanced error reporting
1694
1695				See L.
1696
1697				Perhaps this could be a sub-class?
1698
1699				=item o I8N support for error messages
1700
1701				=item o An explicit test program for parse exhaustion
1702
1703				=back
1704
1705				=head1 See Also
1706
1707				L.
1708
1709				L and L.
1710
1711				L.
1712
1713				L - for various usages of L, but not of this module.
1714
1715				=head1 Machine-Readable Change Log
1716
1717				The file Changes was converted into Changelog.ini by L.
1718
1719				=head1 Version Numbers
1720
1721				Version numbers < 1.00 represent development versions. From 1.00 up, they are production versions.
1722
1723				=head1 Thanks
1724
1725				Thanks to Jeffrey Kegler, who wrote Marpa and L.
1726
1727				And thanks to rns (Ruslan Shvedov) for writing the grammar for double-quoted strings used in
1728				L's scripts/quoted.strings.02.pl. I adapted it to HTML (see
1729				scripts/quoted.strings.05.pl in that module), and then incorporated the grammar into
1730				L, and - after more extensions - into this module.
1731
1732				Lastly, thanks to Robert Rothenberg for L, a module which works the same way
1733				Perl does.
1734
1735				=head1 Repository
1736
1737				L
1738
1739				=head1 Support
1740
1741				Email the author, or log a bug on RT:
1742
1743				L.
1744
1745				=head1 Author
1746
1747				L was written by Ron Savage Iron@savage.net.auE> in 2014.
1748
1749				Marpa's homepage: L.
1750
1751				My homepage: L.
1752
1753				=head1 Copyright
1754
1755				Australian copyright (c) 2014, Ron Savage.
1756
1757				All Programs of mine are 'OSI Certified Open Source Software';
1758				you can redistribute them and/or modify them under the terms of
1759				The Artistic License 2.0, a copy of which is available at:
1760				http://opensource.org/licenses/alphabetical.
1761
1762				=cut