File Coverage

blib/lib/Text/Balanced/Marpa.pm
Criterion Covered Total %
statement 19 21 90.4
branch n/a
condition n/a
subroutine 7 7 100.0
pod n/a
total 26 28 92.8


line stmt bran cond sub pod time code
1             package Text::Balanced::Marpa;
2              
3 16     16   355501 use strict;
  16         36  
  16         726  
4 16     16   8590 use utf8;
  16         136  
  16         92  
5 16     16   571 use warnings;
  16         35  
  16         675  
6 16     16   82 use warnings qw(FATAL utf8); # Fatalize encoding glitches.
  16         26  
  16         845  
7 16     16   7494 use open qw(:std :utf8); # Undeclared streams in UTF-8.
  16         14989  
  16         93  
8              
9 16         177 use Const::Exporter constants =>
10             [
11             nothing_is_fatal => 0, # The default.
12             print_errors => 1,
13             print_warnings => 2,
14             print_debugs => 4,
15             overlap_is_fatal => 8,
16             nesting_is_fatal => 16,
17             ambiguity_is_fatal => 32,
18             exhaustion_is_fatal => 64,
19 16     16   12297 ];
  16         264632  
20              
21 16     16   34230 use Marpa::R2;
  0            
  0            
22              
23             use Moo;
24              
25             use Tree;
26              
27             use Types::Standard qw/Any ArrayRef HashRef Int ScalarRef Str/;
28              
29             use Try::Tiny;
30              
31             has bnf =>
32             (
33             default => sub{return ''},
34             is => 'rw',
35             isa => Any,
36             required => 0,
37             );
38              
39             has close =>
40             (
41             default => sub{return []},
42             is => 'rw',
43             isa => ArrayRef,
44             required => 0,
45             );
46              
47             has delimiter_action =>
48             (
49             default => sub{return {} },
50             is => 'rw',
51             isa => HashRef,
52             required => 0,
53             );
54              
55             has delimiter_frequency =>
56             (
57             default => sub{return {} },
58             is => 'rw',
59             isa => HashRef,
60             required => 0,
61             );
62              
63             has delimiter_stack =>
64             (
65             default => sub{return []},
66             is => 'rw',
67             isa => ArrayRef,
68             required => 0,
69             );
70              
71             has error_message =>
72             (
73             default => sub{return ''},
74             is => 'rw',
75             isa => Str,
76             required => 0,
77             );
78              
79             has error_number =>
80             (
81             default => sub{return 0},
82             is => 'rw',
83             isa => Int,
84             required => 0,
85             );
86              
87             has escape_char =>
88             (
89             default => sub{return '\\'},
90             is => 'rw',
91             isa => Str,
92             required => 0,
93             );
94              
95             has grammar =>
96             (
97             default => sub {return ''},
98             is => 'rw',
99             isa => Any,
100             required => 0,
101             );
102              
103             has known_events =>
104             (
105             default => sub{return {} },
106             is => 'rw',
107             isa => HashRef,
108             required => 0,
109             );
110              
111             has length =>
112             (
113             default => sub{return 0},
114             is => 'rw',
115             isa => Int,
116             required => 0,
117             );
118              
119             has matching_delimiter =>
120             (
121             default => sub{return {} },
122             is => 'rw',
123             isa => HashRef,
124             required => 0,
125             );
126              
127             has next_few_limit =>
128             (
129             default => sub{return 20},
130             is => 'rw',
131             isa => Int,
132             required => 0,
133             );
134              
135             has node_stack =>
136             (
137             default => sub{return []},
138             is => 'rw',
139             isa => ArrayRef,
140             required => 0,
141             );
142              
143             has open =>
144             (
145             default => sub{return []},
146             is => 'rw',
147             isa => ArrayRef,
148             required => 0,
149             );
150              
151             has options =>
152             (
153             default => sub{return 0},
154             is => 'rw',
155             isa => Int,
156             required => 0,
157             );
158              
159             has pos =>
160             (
161             default => sub{return 0},
162             is => 'rw',
163             isa => Int,
164             required => 0,
165             );
166              
167             has recce =>
168             (
169             default => sub{return ''},
170             is => 'rw',
171             isa => Any,
172             required => 0,
173             );
174              
175             has tree =>
176             (
177             default => sub{return ''},
178             is => 'rw',
179             isa => Any,
180             required => 0,
181             );
182              
183             has text =>
184             (
185             default => sub{return \''}, # Use ' in comment for UltraEdit.
186             is => 'rw',
187             isa => ScalarRef[Str],
188             required => 0,
189             );
190              
191             has uid =>
192             (
193             default => sub{return 0},
194             is => 'rw',
195             isa => Int,
196             required => 0,
197             );
198              
199             our $VERSION = '1.07';
200              
201             # ------------------------------------------------
202              
203             sub BUILD
204             {
205             my($self) = @_;
206              
207             # Policy: Event names are always the same as the name of the corresponding lexeme.
208             #
209             # Note: Tokens of the form '_xxx_' are replaced just below, with values returned
210             # by the call to validate_open_close().
211              
212             my($bnf) = <<'END_OF_GRAMMAR';
213              
214             :default ::= action => [values]
215              
216             lexeme default = latm => 1
217              
218             :start ::= input_text
219              
220             input_text ::= input_string*
221              
222             input_string ::= quoted_text
223             | unquoted_text
224              
225             quoted_text ::= open_delim input_text close_delim
226              
227             unquoted_text ::= text
228              
229             # Lexemes in alphabetical order.
230              
231             delimiter_char ~ [_delimiter_]
232              
233             :lexeme ~ close_delim pause => before event => close_delim
234             _close_
235              
236             escaped_char ~ '_escape_char_' delimiter_char # Use ' in comment for UltraEdit.
237              
238             # Warning: Do not add '+' to this set, even though it speeds up things.
239             # The problem is that the set then gobbles up any '\', so the following
240             # character is no longer recognized as being escaped.
241             # Trapping the exception then generated would be possible.
242              
243             non_quote_char ~ [^_delimiter_] # Use " in comment for UltraEdit.
244              
245             :lexeme ~ open_delim pause => before event => open_delim
246             _open_
247              
248             :lexeme ~ text pause => before event => text
249             text ~ escaped_char
250             | non_quote_char
251             END_OF_GRAMMAR
252              
253             my($hashref) = $self -> _validate_open_close;
254             $bnf =~ s/_open_/$$hashref{_open_}/;
255             $bnf =~ s/_close_/$$hashref{_close_}/;
256             $bnf =~ s/_delimiter_/$$hashref{_delimiter_}/g;
257             my($escape_char) = $self -> escape_char;
258              
259             if ($escape_char eq "'")
260             {
261             my($message) = 'Single-quote is forbidden as an escape character';
262              
263             $self -> error_message($message);
264             $self -> error_number(7);
265              
266             # This 'die' is not inside try{}catch{}, so we add the prefix 'Error: '.
267              
268             die "Error: $message\n";
269             }
270              
271             $bnf =~ s/_escape_char_/$escape_char/g;
272              
273             $self -> bnf($bnf);
274             $self -> grammar
275             (
276             Marpa::R2::Scanless::G -> new
277             ({
278             source => \$self -> bnf
279             })
280             );
281              
282             # This hash does not contain the key "'exhausted" because the exhaustion
283             # event is everywhere handled explicitly. Yes, it has a leading quote.
284              
285             my(%event);
286              
287             for my $line (split(/\n/, $self -> bnf) )
288             {
289             $event{$1} = 1 if ($line =~ /event\s+=>\s+(\w+)/);
290             }
291              
292             $self -> known_events(\%event);
293              
294             } # End of BUILD.
295              
296             # ------------------------------------------------
297              
298             sub _add_daughter
299             {
300             my($self, $name, $attributes) = @_;
301             $attributes = {%$attributes, uid => $self -> uid($self -> uid + 1)};
302             my($stack) = $self -> node_stack;
303             my($node) = Tree -> new($name);
304              
305             $node -> meta($attributes);
306              
307             $$stack[$#$stack] -> add_child({}, $node);
308              
309             } # End of _add_daughter.
310              
311             # ------------------------------------------------
312              
313             sub next_few_chars
314             {
315             my($self, $stringref, $offset) = @_;
316             my($s) = substr($$stringref, $offset, $self -> next_few_limit);
317             $s =~ tr/\n/ /;
318             $s =~ s/^\s+//;
319             $s =~ s/\s+$//;
320              
321             return $s;
322              
323             } # End of next_few_chars.
324              
325             # ------------------------------------------------
326              
327             sub parse
328             {
329             my($self, %opts) = @_;
330              
331             # Emulate parts of new(), which makes things a bit earier for the caller.
332              
333             $self -> options($opts{options}) if (defined $opts{options});
334             $self -> text($opts{text}) if (defined $opts{text});
335             $self -> pos($opts{pos}) if (defined $opts{pos});
336             $self -> length($opts{length}) if (defined $opts{length});
337              
338             $self -> recce
339             (
340             Marpa::R2::Scanless::R -> new
341             ({
342             exhaustion => 'event',
343             grammar => $self -> grammar,
344             ranking_method => 'high_rule_only',
345             })
346             );
347              
348             # Since $self -> node_stack has not been initialized yet,
349             # we can't call _add_daughter() until after this statement.
350              
351             $self -> uid(0);
352             $self -> tree(Tree -> new('root') );
353             $self -> tree -> meta({text => '', uid => $self -> uid});
354             $self -> node_stack([$self -> tree -> root]);
355              
356             # Return 0 for success and 1 for failure.
357              
358             my($result) = 0;
359              
360             my($message);
361              
362             try
363             {
364             if (defined (my $value = $self -> _process) )
365             {
366             }
367             else
368             {
369             $result = 1;
370              
371             print "Error: Parse failed\n" if ($self -> options & print_errors);
372             }
373             }
374             catch
375             {
376             $result = 1;
377              
378             print "Error: Parse failed. ${_}" if ($self -> options & print_errors);
379             };
380              
381             # Return 0 for success and 1 for failure.
382              
383             return $result;
384              
385             } # End of parse.
386              
387             # ------------------------------------------------
388              
389             sub _pop_node_stack
390             {
391             my($self) = @_;
392             my($stack) = $self -> node_stack;
393              
394             pop @$stack;
395              
396             $self -> node_stack($stack);
397              
398             } # End of _pop_node_stack.
399              
400             # ------------------------------------------------
401              
402             sub _process
403             {
404             my($self) = @_;
405             my($stringref) = $self -> text || \''; # Allow for undef. Use ' in comment for UltraEdit.
406             my($pos) = $self -> pos;
407             my($first_pos) = $pos;
408             my($total_length) = length($$stringref);
409             my($length) = $self -> length || $total_length;
410             my($text) = '';
411             my($format) = "%-20s %5s %5s %5s %-20s %-20s\n";
412             my($last_event) = '';
413             my($matching_delimiter) = $self -> matching_delimiter;
414              
415             if ($self -> options & print_debugs)
416             {
417             print "Length of input: $length. Input |$$stringref|\n";
418             print sprintf($format, 'Event', 'Start', 'Span', 'Pos', 'Lexeme', 'Comment');
419             }
420              
421             my($delimiter_frequency, $delimiter_stack);
422             my($event_name);
423             my($lexeme);
424             my($message);
425             my($original_lexeme);
426             my($span, $start);
427             my($tos);
428              
429             # We use read()/lexeme_read()/resume() because we pause at each lexeme.
430             # Also, in read(), we use $pos and $length to avoid reading Ruby Slippers tokens (if any).
431             # For the latter, see scripts/match.parentheses.02.pl in MarpaX::Demo::SampleScripts.
432              
433             for
434             (
435             $pos = $self -> recce -> read($stringref, $pos, $length);
436             ($pos < $total_length) && ( ($pos - $first_pos) <= $length);
437             $pos = $self -> recce -> resume($pos)
438             )
439             {
440             $delimiter_frequency = $self -> delimiter_frequency;
441             $delimiter_stack = $self -> delimiter_stack;
442             ($start, $span) = $self -> recce -> pause_span;
443             ($event_name, $span, $pos) = $self -> _validate_event($stringref, $start, $span, $pos, $delimiter_frequency);
444              
445             # If the input is exhausted, we exit immediately so we don't try to use
446             # the values of $start, $span or $pos. They are ignored upon exit.
447              
448             last if ($event_name eq "'exhausted"); # Yes, it has a leading quote.
449              
450             $lexeme = $self -> recce -> literal($start, $span);
451             $original_lexeme = $lexeme;
452             $pos = $self -> recce -> lexeme_read($event_name);
453              
454             die "lexeme_read($event_name) rejected lexeme |$lexeme|\n" if (! defined $pos);
455              
456             print sprintf($format, $event_name, $start, $span, $pos, $lexeme, '-') if ($self -> options & print_debugs);
457              
458             if ($event_name ne 'text')
459             {
460             $self -> _save_text($text);
461              
462             $text = '';
463             }
464              
465             if ($event_name eq 'close_delim')
466             {
467             $$delimiter_frequency{$lexeme}--;
468              
469             $self -> delimiter_frequency($delimiter_frequency);
470              
471             $tos = pop @$delimiter_stack;
472              
473             $self -> delimiter_stack($delimiter_stack);
474              
475             # If the top of the delimiter stack is not the lexeme corresponding to the
476             # opening delimiter of the current closing delimiter, then there's an error.
477              
478             if ($$matching_delimiter{$$tos{lexeme} } ne $lexeme)
479             {
480             $message = "Last open delimiter: $$tos{lexeme}. Unexpected closing delimiter: $lexeme";
481              
482             $self -> error_message($message);
483             $self -> error_number(1);
484              
485             # This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
486              
487             die "$message\n" if ($self -> options & overlap_is_fatal);
488              
489             # If we did not die, then it's a warning message.
490              
491             $self -> error_number(-1);
492              
493             print "Warning: $message\n" if ($self -> options & print_warnings);
494             }
495              
496             $self -> _pop_node_stack;
497             $self -> _add_daughter('close', {text => $lexeme});
498             }
499             elsif ($event_name eq 'open_delim')
500             {
501             $$delimiter_frequency{$$matching_delimiter{$lexeme} }++;
502              
503             # If the top of the delimiter stack reaches 2, then there's an error.
504             # Unlike mismatched delimiters (just above), this is never gets a warning.
505              
506             if ($$delimiter_frequency{$$matching_delimiter{$lexeme} } > 1)
507             {
508             $message = "Opened delimiter $lexeme again before closing previous one";
509              
510             $self -> error_message($message);
511             $self -> error_number(2);
512              
513             # This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
514              
515             die "$message\n" if ($self -> options & nesting_is_fatal);
516              
517             # If we did not die, then it's a warning message.
518              
519             $self -> error_number(-2);
520              
521             print "Warning: $message\n" if ($self -> options & print_warnings);
522             }
523              
524             push @$delimiter_stack,
525             {
526             count => $$delimiter_frequency{$$matching_delimiter{$lexeme} },
527             lexeme => $lexeme,
528             };
529              
530             $self -> delimiter_frequency($delimiter_frequency);
531             $self -> delimiter_stack($delimiter_stack);
532             $self -> _add_daughter('open', {text => $lexeme});
533             $self -> _push_node_stack;
534             }
535             elsif ($event_name eq 'text')
536             {
537             $text .= $lexeme;
538             }
539              
540             $last_event = $event_name;
541             }
542              
543             # Mop up any left-over chars.
544              
545             $self -> _save_text($text);
546              
547             if ($self -> recce -> exhausted)
548             {
549             $message = 'Parse exhausted';
550              
551             $self -> error_message($message);
552             $self -> error_number(6);
553              
554             if ($self -> options & exhaustion_is_fatal)
555             {
556             # This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
557              
558             die "$message\n";
559             }
560             else
561             {
562             $self -> error_number(-6);
563              
564             print "Warning: $message\n" if ($self -> options & print_warnings);
565             }
566             }
567             elsif (my $status = $self -> recce -> ambiguous)
568             {
569             my($terminals) = $self -> recce -> terminals_expected;
570             $terminals = ['(None)'] if ($#$terminals < 0);
571             $message = "Ambiguous parse. Status: $status. Terminals expected: " . join(', ', @$terminals);
572              
573             $self -> error_message($message);
574             $self -> error_number(3);
575              
576             if ($self -> options & ambiguity_is_fatal)
577             {
578             # This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
579              
580             die "$message\n";
581             }
582             elsif ($self -> options & print_warnings)
583             {
584             $self -> error_number(-3);
585              
586             print "Warning: $message\n";
587             }
588             }
589              
590             # Return a defined value for success and undef for failure.
591              
592             return $self -> recce -> value;
593              
594             } # End of _process.
595              
596             # ------------------------------------------------
597              
598             sub _push_node_stack
599             {
600             my($self) = @_;
601             my($stack) = $self -> node_stack;
602             my(@daughters) = $$stack[$#$stack] -> children;
603              
604             push @$stack, $daughters[$#daughters];
605              
606             $self -> node_stack($stack);
607              
608             } # End of _push_node_stack.
609              
610             # ------------------------------------------------
611              
612             sub _save_text
613             {
614             my($self, $text) = @_;
615              
616             $self -> _add_daughter('text', {text => $text}) if (length($text) );
617              
618             return '';
619              
620             } # End of _save_text.
621              
622             # ------------------------------------------------
623              
624             sub _validate_event
625             {
626             my($self, $stringref, $start, $span, $pos, $delimiter_frequency) = @_;
627             my(@event) = @{$self -> recce -> events};
628             my($event_count) = scalar @event;
629             my(@event_name) = sort map{$$_[0]} @event;
630             my($event_name) = $event_name[0]; # Default.
631              
632             # If the input is exhausted, we return immediately so we don't try to use
633             # the values of $start, $span or $pos. They are ignored upon return.
634              
635             if ($event_name eq "'exhausted") # Yes, it has a leading quote.
636             {
637             return ($event_name, $span, $pos);
638             }
639              
640             my($lexeme) = substr($$stringref, $start, $span);
641             my($line, $column) = $self -> recce -> line_column($start);
642             my($literal) = $self -> next_few_chars($stringref, $start + $span);
643             my($message) = "Location: ($line, $column). Lexeme: |$lexeme|. Next few chars: |$literal|";
644             $message = "$message. Events: $event_count. Names: ";
645              
646             print $message, join(', ', @event_name), "\n" if ($self -> options & print_debugs);
647              
648             my(%event_name);
649              
650             @event_name{@event_name} = (1) x @event_name;
651              
652             for (@event_name)
653             {
654             if (! ${$self -> known_events}{$_})
655             {
656             $message = "Unexpected event name '$_'";
657              
658             $self -> error_message($message);
659             $self -> error_number(10);
660              
661             # This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
662              
663             die "$message\n";
664             }
665             }
666              
667             if ($event_count > 1)
668             {
669             # We get here for single and double quotes because an open s. or d. quote is
670             # indistinguishable from a close s. or d. quote, and that leads to ambiguity.
671              
672             if ( ($lexeme =~ /["']/) && (join(', ', @event_name) eq 'close_delim, open_delim') ) # ".
673             {
674             # At the time _validate_event() is called, the quote count has not yet been bumped.
675             # If this is the 1st quote, then it's an open_delim.
676             # If this is the 2nd quote, them it's a close delim.
677              
678             if ($$delimiter_frequency{$lexeme} % 2 == 0)
679             {
680             $event_name = 'open_delim';
681              
682             print "Disambiguated lexeme |$lexeme| as '$event_name'\n" if ($self -> options & print_debugs);
683             }
684             else
685             {
686             $event_name = 'close_delim';
687              
688             print "Disambiguated lexeme |$lexeme| as '$event_name'\n" if ($self -> options & print_debugs);
689             }
690             }
691             else
692             {
693             $message = join(', ', @event_name);
694             $message = "The code does not handle these events simultaneously: $message";
695              
696             $self -> error_message($message);
697             $self -> error_number(11);
698              
699             # This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
700              
701             die "$message\n";
702             }
703             }
704              
705             return ($event_name, $span, $pos);
706              
707             } # End of _validate_event.
708              
709             # ------------------------------------------------
710              
711             sub _validate_open_close
712             {
713             my($self) = @_;
714             my($open) = $self -> open;
715             my($close) = $self -> close;
716              
717             my($message);
718              
719             if ( ($#$open < 0) || ($#$close < 0) )
720             {
721             $message = 'There must be at least 1 pair of open/close delimiters';
722              
723             $self -> error_message($message);
724             $self -> error_number(8);
725              
726             # This 'die' is not inside try{}catch{}, so we add the prefix 'Error: '.
727              
728             die "Error: $message\n";
729             }
730              
731             if ($#$open != $#$close)
732             {
733             $message = 'The # of open delimiters must match the # of close delimiters';
734              
735             $self -> error_message($message);
736             $self -> error_number(9);
737              
738             # This 'die' is not inside try{}catch{}, so we add the prefix 'Error: '.
739              
740             die "Error: $message\n";
741             }
742              
743             my(%substitute) = (_close_ => '', _delimiter_ => '', _open_ => '');
744             my($matching_delimiter) = {};
745             my(%seen) = (close => {}, open => {});
746              
747             my($close_quote);
748             my(%delimiter_action, %delimiter_frequency);
749             my($open_quote);
750             my($prefix, %prefix);
751              
752             for my $i (0 .. $#$open)
753             {
754             if ( ($$open[$i] =~ /\\/) || ($$close[$i] =~ /\\/) )
755             {
756             $message = 'Backslash is forbidden as a delimiter character';
757              
758             $self -> error_message($message);
759             $self -> error_number(4);
760              
761             # This 'die' is not inside try{}catch{}, so we add the prefix 'Error: '.
762              
763             die "Error: $message\n";
764             }
765              
766             if ( ( (length($$open[$i]) > 1) && ($$open[$i] =~ /'/) ) || ( (length($$close[$i]) > 1) && ($$close[$i] =~ /'/) ) )
767             {
768             $message = 'Single-quotes are forbidden in multi-character delimiters';
769              
770             $self -> error_message($message);
771             $self -> error_number(5);
772              
773             # This 'die' is not inside try{}catch{}, so we add the prefix 'Error: '.
774              
775             die "Error: $message\n";
776             }
777              
778             $seen{open}{$$open[$i]} = 0 if (! $seen{open}{$$open[$i]});
779             $seen{close}{$$close[$i]} = 0 if (! $seen{close}{$$close[$i]});
780              
781             $seen{open}{$$open[$i]}++;
782             $seen{close}{$$close[$i]}++;
783              
784             $delimiter_action{$$open[$i]} = 'open';
785             $delimiter_action{$$close[$i]} = 'close';
786             $$matching_delimiter{$$open[$i]} = $$close[$i];
787             $delimiter_frequency{$$open[$i]} = 0;
788             $delimiter_frequency{$$close[$i]} = 0;
789              
790             if (length($$open[$i]) == 1)
791             {
792             $open_quote = $$open[$i] eq '[' ? "[\\$$open[$i]]" : "[$$open[$i]]";
793             }
794             else
795             {
796             # This fails if length > 1 and open contains a single quote.
797              
798             $open_quote = "'$$open[$i]'";
799             }
800              
801             if (length($$close[$i]) == 1)
802             {
803             $close_quote = $$close[$i] eq ']' ? "[\\$$close[$i]]" : "[$$close[$i]]";
804             }
805             else
806             {
807             # This fails if length > 1 and close contains a single quote.
808              
809             $close_quote = "'$$close[$i]'";
810             }
811              
812             $substitute{_open_} .= "open_delim\t\t\t\~ $open_quote\n" if ($seen{open}{$$open[$i]} <= 1);
813             $substitute{_close_} .= "close_delim\t\t\t\~ $close_quote\n" if ($seen{close}{$$close[$i]} <= 1);
814             $prefix = substr($$open[$i], 0, 1);
815             $prefix = "\\$prefix" if ($prefix =~ /[\[\]]/);
816             $prefix{$prefix} = 0 if (! $prefix{$prefix});
817              
818             $prefix{$prefix}++;
819              
820             $substitute{_delimiter_} .= $prefix if ($prefix{$prefix} == 1);
821             $prefix = substr($$close[$i], 0, 1);
822             $prefix = "\\$prefix" if ($prefix =~ /[\[\]]/);
823             $prefix{$prefix} = 0 if (! $prefix{$prefix});
824              
825             $prefix{$prefix}++;
826              
827             $substitute{_delimiter_} .= $prefix if ($prefix{$prefix} == 1);
828             }
829              
830             $self -> delimiter_action(\%delimiter_action);
831             $self -> delimiter_frequency(\%delimiter_frequency);
832             $self -> matching_delimiter($matching_delimiter);
833              
834             return \%substitute;
835              
836             } # End of _validate_open_close.
837              
838             # ------------------------------------------------
839              
840             1;
841              
842             =pod
843              
844             =head1 NAME
845              
846             C - Extract delimited text sequences from strings
847              
848             =head1 Synopsis
849              
850             #!/usr/bin/env perl
851              
852             use strict;
853             use warnings;
854              
855             use Text::Balanced::Marpa ':constants';
856              
857             # -----------
858              
859             my($count) = 0;
860             my($parser) = Text::Balanced::Marpa -> new
861             (
862             open => ['<:' ,'[%'],
863             close => [':>', '%]'],
864             options => nesting_is_fatal | print_warnings,
865             );
866             my(@text) =
867             (
868             q|<: a :>|,
869             q|a [% b <: c :> d %] e|,
870             q|a <: b <: c :> d :> e|, # nesting_is_fatal triggers an error here.
871             );
872              
873             my($result);
874              
875             for my $text (@text)
876             {
877             $count++;
878              
879             print "Parsing |$text|\n";
880              
881             $result = $parser -> parse(text => \$text);
882              
883             print join("\n", @{$parser -> tree -> tree2string}), "\n";
884             print "Parse result: $result (0 is success)\n";
885              
886             if ($count == 3)
887             {
888             print "Deliberate error: Failed to parse |$text|\n";
889             print 'Error number: ', $parser -> error_number, '. Error message: ',
890             $parser -> error_message, "\n";
891             }
892              
893             print '-' x 50, "\n";
894             }
895              
896             See scripts/synopsis.pl.
897              
898             This is the printout of synopsis.pl:
899              
900             Parsing |<: a :>|
901             Parsed text:
902             root. Attributes: {}
903             |--- open. Attributes: {text => "<:"}
904             | |--- string. Attributes: {text => " a "}
905             |--- close. Attributes: {text => ":>"}
906             Parse result: 0 (0 is success)
907             --------------------------------------------------
908             Parsing |a [% b <: c :> d %] e|
909             Parsed text:
910             root. Attributes: {}
911             |--- string. Attributes: {text => "a "}
912             |--- open. Attributes: {text => "[%"}
913             | |--- string. Attributes: {text => " b "}
914             | |--- open. Attributes: {text => "<:"}
915             | | |--- string. Attributes: {text => " c "}
916             | |--- close. Attributes: {text => ":>"}
917             | |--- string. Attributes: {text => " d "}
918             |--- close. Attributes: {text => "%]"}
919             |--- string. Attributes: {text => " e"}
920             Parse result: 0 (0 is success)
921             --------------------------------------------------
922             Parsing |a <: b <: c :> d :> e|
923             Error: Parse failed. Opened delimiter <: again before closing previous one
924             Text parsed so far:
925             root. Attributes: {}
926             |--- string. Attributes: {text => "a "}
927             |--- open. Attributes: {text => "<:"}
928             |--- string. Attributes: {text => " b "}
929             Parse result: 1 (0 is success)
930             Deliberate error: Failed to parse |a <: b <: c :> d :> e|
931             Error number: 2. Error message: Opened delimiter <: again before closing previous one
932             --------------------------------------------------
933              
934             See also scripts/tiny.pl and scripts/traverse.pl.
935              
936             =head1 Description
937              
938             L provides a L-based parser for extracting delimited text
939             sequences from strings. The text outside and inside the delimiters, and delimiters themselves, are
940             all stored as nodes in a tree managed by L.
941              
942             Nested strings, with the same or different delimiters, are stored as daughters of the nodes which
943             hold the delimiters.
944              
945             This module is a companion to L. The differences are discussed in the L
946             below.
947              
948             See the L for various topics, including:
949              
950             =over 4
951              
952             =item o UFT8 handling
953              
954             See t/utf8.t.
955              
956             =item o Escaping delimiters within the text
957              
958             See t/escapes.t.
959              
960             =item o Options to make nested and/or overlapped delimiters fatal errors
961              
962             See t/colons.t.
963              
964             =item o Using delimiters which are part of another delimiter
965              
966             See t/escapes.t and t/perl.delimiters.
967              
968             =item o Processing the tree-structured output
969              
970             See scripts/traverse.pl.
971              
972             =item o Emulating L's use of '<:' and ':>
973              
974             See t/colons.t and t/percents.t.
975              
976             =item o Implementing a really trivial HTML parser
977              
978             See t/html.t.
979              
980             In the same vein, see t/angle.brackets.t, for code where the delimiters are just '<' and '>'.
981              
982             =item o Handling multiple sets of delimiters
983              
984             See t/multiple.delimiters.t.
985              
986             =item o Skipping (leading) characters in the input string
987              
988             See t/skip.prefix.t.
989              
990             =item o Implementing hard-to-read text strings as delimiters
991              
992             See t/silly.delimiters.
993              
994             =back
995              
996             =head1 Distributions
997              
998             This module is available as a Unix-style distro (*.tgz).
999              
1000             See L
1001             for help on unpacking and installing distros.
1002              
1003             =head1 Installation
1004              
1005             Install L as you would any C module:
1006              
1007             Run:
1008              
1009             cpanm Text::Balanced::Marpa
1010              
1011             or run:
1012              
1013             sudo cpan Text::Balanced::Marpa
1014              
1015             or unpack the distro, and then either:
1016              
1017             perl Build.PL
1018             ./Build
1019             ./Build test
1020             sudo ./Build install
1021              
1022             or:
1023              
1024             perl Makefile.PL
1025             make (or dmake or nmake)
1026             make test
1027             make install
1028              
1029             =head1 Constructor and Initialization
1030              
1031             C is called as C<< my($parser) = Text::Balanced::Marpa -> new(k1 => v1, k2 => v2, ...) >>.
1032              
1033             It returns a new object of type C.
1034              
1035             Key-value pairs accepted in the parameter list (see corresponding methods for details
1036             [e.g. L]):
1037              
1038             =over 4
1039              
1040             =item o close => $arrayref
1041              
1042             An arrayref of strings, each one a closing delimiter.
1043              
1044             The # of elements must match the # of elements in the 'open' arrayref.
1045              
1046             See the L for details and warnings.
1047              
1048             A value for this option is mandatory.
1049              
1050             Default: None.
1051              
1052             =item o length => $integer
1053              
1054             The maxiumum length of the input string to process.
1055              
1056             This parameter works in conjunction with the C parameter.
1057              
1058             C can also be used as a key in the hash passed to L.
1059              
1060             See the L for details.
1061              
1062             Default: Calls Perl's length() function on the input string.
1063              
1064             =item o next_few_limit => $integer
1065              
1066             This controls how many characters are printed when displaying 'the next few chars'.
1067              
1068             It only affects debug output.
1069              
1070             Default: 20.
1071              
1072             =item o open => $arrayref
1073              
1074             An arrayref of strings, each one an opening delimiter.
1075              
1076             The # of elements must match the # of elements in the 'open' arrayref.
1077              
1078             See the L for details and warnings.
1079              
1080             A value for this option is mandatory.
1081              
1082             Default: None.
1083              
1084             =item o options => $bit_string
1085              
1086             This allows you to turn on various options.
1087              
1088             C can also be used as a key in the hash passed to L.
1089              
1090             Default: 0 (nothing is fatal).
1091              
1092             See the L for details.
1093              
1094             =item o pos => $integer
1095              
1096             The offset within the input string at which to start processing.
1097              
1098             This parameter works in conjunction with the C parameter.
1099              
1100             C can also be used as a key in the hash passed to L.
1101              
1102             See the L for details.
1103              
1104             Note: The first character in the input string is at pos == 0.
1105              
1106             Default: 0.
1107              
1108             =item o text => $stringref
1109              
1110             This is a reference to the string to be parsed. A stringref is used to avoid copying what could
1111             potentially be a very long string.
1112              
1113             C can also be used as a key in the hash passed to L.
1114              
1115             Default: \''.
1116              
1117             =back
1118              
1119             =head1 Methods
1120              
1121             =head2 bnf()
1122              
1123             Returns a string containing the grammar constructed based on user input.
1124              
1125             =head2 close()
1126              
1127             Get the arrayref of closing delimiters.
1128              
1129             See also L.
1130              
1131             See the L for details and warnings.
1132              
1133             'close' is a parameter to L. See L for details.
1134              
1135             =head2 delimiter_action()
1136              
1137             Returns a hashref, where the keys are delimiters and the values are either 'open' or 'close'.
1138              
1139             =head2 delimiter_frequency()
1140              
1141             Returns a hashref where the keys are opening and closing delimiters, and the values are the # of
1142             times each delimiter appears in the input stream.
1143              
1144             The value is incremented for each opening delimiter and decremented for each closing delimiter.
1145              
1146             =head2 error_message()
1147              
1148             Returns the last error or warning message set.
1149              
1150             Error messages always start with 'Error: '. Messages never end with "\n".
1151              
1152             Parsing error strings is not a good idea, ever though this module's format for them is fixed.
1153              
1154             See L.
1155              
1156             =head2 error_number()
1157              
1158             Returns the last error or warning number set.
1159              
1160             Warnings have values < 0, and errors have values > 0.
1161              
1162             If the value is > 0, the message has the prefix 'Error: ', and if the value is < 0, it has the
1163             prefix 'Warning: '. If this is not the case, it's a reportable bug.
1164              
1165             Possible values for error_number() and error_message():
1166              
1167             =over 4
1168              
1169             =item o 0 => ""
1170              
1171             This is the default value.
1172              
1173             =item o 1/-1 => "Last open delimiter: $lexeme_1. Unexpected closing delimiter: $lexeme_2"
1174              
1175             If L returns 1, it's an error, and if it returns -1 it's a warning.
1176              
1177             You can set the option C to make it fatal.
1178              
1179             =item o 2/-2 => "Opened delimiter $lexeme again before closing previous one"
1180              
1181             If L returns 2, it's an error, and if it returns -2 it's a warning.
1182              
1183             You can set the option C to make it fatal.
1184              
1185             =item o 3/-3 => "Ambiguous parse. Status: $status. Terminals expected: a, b, ..."
1186              
1187             This message is only produced when the parse is ambiguous.
1188              
1189             If L returns 3, it's an error, and if it returns -3 it's a warning.
1190              
1191             You can set the option C to make it fatal.
1192              
1193             =item o 4 => "Backslash is forbidden as a delimiter character"
1194              
1195             This preempts some types of sabotage.
1196              
1197             This message can never be just a warning message.
1198              
1199             =item o 5 => "Single-quotes are forbidden in multi-character delimiters"
1200              
1201             This limitation is due to the syntax of
1202             L.
1203              
1204             This message can never be just a warning message.
1205              
1206             =item o 6/-6 => "Parse exhausted"
1207              
1208             If L returns 6, it's an error, and if it returns -6 it's a warning.
1209              
1210             You can set the option C to make it fatal.
1211              
1212             =item o 7 => 'Single-quote is forbidden as an escape character'
1213              
1214             This limitation is due to the syntax of
1215             L.
1216              
1217             This message can never be just a warning message.
1218              
1219             =item o 8 => "There must be at least 1 pair of open/close delimiters"
1220              
1221             This message can never be just a warning message.
1222              
1223             =item o 9 => "The # of open delimiters must match the # of close delimiters"
1224              
1225             This message can never be just a warning message.
1226              
1227             =item o 10 => "Unexpected event name 'xyz'"
1228              
1229             Marpa has trigged an event and it's name is not in the hash of event names derived from the BNF.
1230              
1231             This message can never be just a warning message.
1232              
1233             =item o 11 => "The code does not handle these events simultaneously: a, b, ..."
1234              
1235             The code is written to handle single events at a time, or in rare cases, 2 events at the same time.
1236             But here, multiple events have been triggered and the code cannot handle the given combination.
1237              
1238             This message can never be just a warning message.
1239              
1240             =back
1241              
1242             See L.
1243              
1244             =head2 escape_char()
1245              
1246             Get the escape char.
1247              
1248             =head2 known_events()
1249              
1250             Returns a hashref where the keys are event names and the values are 1.
1251              
1252             =head2 length([$integer])
1253              
1254             Here, the [] indicate an optional parameter.
1255              
1256             Get or set the length of the input string to process.
1257              
1258             See also the L and L.
1259              
1260             'length' is a parameter to L. See L for details.
1261              
1262             =head2 matching_delimiter()
1263              
1264             Returns a hashref where the keys are opening delimiters and the values are the corresponding closing
1265             delimiters.
1266              
1267             =head2 new()
1268              
1269             See L for details on the parameters accepted by L.
1270              
1271             =head2 next_few_chars($stringref, $offset)
1272              
1273             Returns a substring of $s, starting at $offset, for use in debug messages.
1274              
1275             See L.
1276              
1277             =head2 next_few_limit([$integer])
1278              
1279             Here, the [] indicate an optional parameter.
1280              
1281             Get or set the number of characters called 'the next few chars', which are printed during debugging.
1282              
1283             'next_few_limit' is a parameter to L. See L for details.
1284              
1285             =head2 open()
1286              
1287             Get the arrayref of opening delimiters.
1288              
1289             See also L.
1290              
1291             See the L for details and warnings.
1292              
1293             'open' is a parameter to L. See L for details.
1294              
1295             =head2 options([$bit_string])
1296              
1297             Here, the [] indicate an optional parameter.
1298              
1299             Get or set the option flags.
1300              
1301             For typical usage, see scripts/synopsis.pl.
1302              
1303             See the L for details.
1304              
1305             'options' is a parameter to L. See L for details.
1306              
1307             =head2 parse([%hash])
1308              
1309             Here, the [] indicate an optional parameter.
1310              
1311             This is the only method the user needs to call. All data can be supplied when calling L.
1312              
1313             You can of course call other methods (e.g. L ) after calling L but
1314             before calling C.
1315              
1316             The optional hash takes these ($key => $value) pairs (exactly the same as for L):
1317              
1318             =over 4
1319              
1320             =item o length => $integer
1321              
1322             =item o options => $bit_string
1323              
1324             =item o pos => $integer
1325              
1326             =item o text => $stringref
1327              
1328             =back
1329              
1330             Note: If a value is passed to C, it takes precedence over any value with the same
1331             key passed to L, and over any value previously passed to the method whose name is $key.
1332             Further, the value passed to C is always passed to the corresponding method (i.e. whose
1333             name is $key), meaning any subsequent call to that method returns the value passed to C.
1334              
1335             See scripts/samples.pl.
1336              
1337             Returns 0 for success and 1 for failure.
1338              
1339             If the value is 1, you should call L to find out what happened.
1340              
1341             =head2 pos([$integer])
1342              
1343             Here, the [] indicate an optional parameter.
1344              
1345             Get or set the offset within the input string at which to start processing.
1346              
1347             See also the L and L.
1348              
1349             'pos' is a parameter to L. See L for details.
1350              
1351             =head2 text([$stringref])
1352              
1353             Here, the [] indicate an optional parameter.
1354              
1355             Get or set a reference to the string to be parsed.
1356              
1357             'text' is a parameter to L. See L for details.
1358              
1359             =head2 tree()
1360              
1361             Returns an object of type L, which holds the parsed data.
1362              
1363             Obviously, it only makes sense to call C after calling C.
1364              
1365             See scripts/traverse.pl for sample code which processes this tree's nodes.
1366              
1367             =head1 FAQ
1368              
1369             =head2 What are the differences between Text::Balanced::Marpa and Text::Delimited::Marpa?
1370              
1371             I think this is shown most clearly by getting the 2 modules to process the same string. So,
1372             using this as input:
1373              
1374             'a <:b <:c:> d:> e <:f <: g <:h:> i:> j:> k'
1375              
1376             Output from Text::Balanced::Marpa's scripts/tiny.pl:
1377              
1378             (# 2) | 1 2 3 4 5 6 7 8 9
1379             |0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
1380             Parsing |Skip me ->a <:b <:c:> d:> e <:f <: g <:h:> i:> j:> k|. pos: 10. length: 42
1381             Parse result: 0 (0 is success)
1382             root. Attributes: {text => "", uid => "0"}
1383             |--- text. Attributes: {text => "a ", uid => "1"}
1384             |--- open. Attributes: {text => "<:", uid => "2"}
1385             | |--- text. Attributes: {text => "b ", uid => "3"}
1386             | |--- open. Attributes: {text => "<:", uid => "4"}
1387             | | |--- text. Attributes: {text => "c", uid => "5"}
1388             | |--- close. Attributes: {text => ":>", uid => "6"}
1389             | |--- text. Attributes: {text => " d", uid => "7"}
1390             |--- close. Attributes: {text => ":>", uid => "8"}
1391             |--- text. Attributes: {text => " e ", uid => "9"}
1392             |--- open. Attributes: {text => "<:", uid => "10"}
1393             | |--- text. Attributes: {text => "f ", uid => "11"}
1394             | |--- open. Attributes: {text => "<:", uid => "12"}
1395             | | |--- text. Attributes: {text => " g ", uid => "13"}
1396             | | |--- open. Attributes: {text => "<:", uid => "14"}
1397             | | | |--- text. Attributes: {text => "h", uid => "15"}
1398             | | |--- close. Attributes: {text => ":>", uid => "16"}
1399             | | |--- text. Attributes: {text => " i", uid => "17"}
1400             | |--- close. Attributes: {text => ":>", uid => "18"}
1401             | |--- text. Attributes: {text => " j", uid => "19"}
1402             |--- close. Attributes: {text => ":>", uid => "20"}
1403             |--- text. Attributes: {text => " k", uid => "21"}
1404              
1405             Output from Text::Delimited::Marpa's scripts/tiny.pl:
1406              
1407             (# 2) | 1 2 3 4 5 6 7 8 9
1408             |0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
1409             Parsing |Skip me ->a <:b <:c:> d:> e <:f <: g <:h:> i:> j:> k|. pos: 10. length: 42
1410             Parse result: 0 (0 is success)
1411             root. Attributes: {end => "0", length => "0", start => "0", text => "", uid => "0"}
1412             |--- span. Attributes: {end => "22", length => "9", start => "14", text => "b <:c:> d", uid => "1"}
1413             | |--- span. Attributes: {end => "18", length => "1", start => "18", text => "c", uid => "2"}
1414             |--- span. Attributes: {end => "47", length => "18", start => "30", text => "f <: g <:h:> i:> j", uid => "3"}
1415             |--- span. Attributes: {end => "43", length => "10", start => "34", text => " g <:h:> i", uid => "4"}
1416             |--- span. Attributes: {end => "39", length => "1", start => "39", text => "h", uid => "5"}
1417              
1418             Another example, using the same input string, but manually processing the tree nodes.
1419             Parent-daughter relationships are here represented by indentation.
1420              
1421             Output from Text::Balanced::Marpa's scripts/traverse.pl:
1422              
1423             | 1 2 3 4 5
1424             |012345678901234567890123456789012345678901234567890
1425             Parsing |a <:b <:c:> d:> e <:f <: g <:h:> i:> j:> k|.
1426             Span Text
1427             1 |a |
1428             2 |<:|
1429             3 |b |
1430             4 |<:|
1431             5 |c|
1432             6 |:>|
1433             7 | d|
1434             8 |:>|
1435             9 | e |
1436             10 |<:|
1437             11 |f |
1438             12 |<:|
1439             13 | g |
1440             14 |<:|
1441             15 |h|
1442             16 |:>|
1443             17 | i|
1444             18 |:>|
1445             19 | j|
1446             20 |:>|
1447             21 | k|
1448              
1449             Output from Text::Delimited::Marpa's scripts/traverse.pl:
1450              
1451             | 1 2 3 4 5
1452             |012345678901234567890123456789012345678901234567890
1453             Parsing |a <:b <:c:> d:> e <:f <: g <:h:> i:> j:> k|.
1454             Span Start End Length Text
1455             1 4 12 9 |b <:c:> d|
1456             2 8 8 1 |c|
1457             3 20 37 18 |f <: g <:h:> i:> j|
1458             4 24 33 10 | g <:h:> i|
1459             5 29 29 1 |h|
1460              
1461             =head2 Where are the error messages and numbers described?
1462              
1463             See L and L.
1464              
1465             =head2 How do I escape delimiters?
1466              
1467             By backslash-escaping the first character of all open and close delimiters which appear in the
1468             text.
1469              
1470             As an example, if the delimiters are '<:' and ':>', this means you have to escape I the '<'
1471             chars and I the colons in the text.
1472              
1473             The backslash is preserved in the output.
1474              
1475             If you don't want to use backslash for escaping, or can't, you can pass a different escape character
1476             to L.
1477              
1478             See t/escapes.t.
1479              
1480             =head2 How do the length and pos parameters to new() work?
1481              
1482             The recognizer - an object of type Marpa::R2::Scanless::R - is called in a loop, like this:
1483              
1484             for
1485             (
1486             $pos = $self -> recce -> read($stringref, $pos, $length);
1487             $pos < $length;
1488             $pos = $self -> recce -> resume($pos)
1489             )
1490              
1491             L and L can be used to initialize $pos and $length.
1492              
1493             Note: The first character in the input string is at pos == 0.
1494              
1495             See L for details.
1496              
1497             =head2 Does this package support Unicode/UTF8?
1498              
1499             Yes. See t/escapes.t, t/multiple.quotes.t and t/utf8.t.
1500              
1501             =head2 Does this package handler Perl delimiters (e.g. q|..|, qq|..|, qr/../, qw/../)?
1502              
1503             See t/perl.delimiters.t.
1504              
1505             =head2 Warning: Calling mutators after calling new()
1506              
1507             The only mutator which works after calling new() is L.
1508              
1509             In particular, you can't call L, L or L after calling L.
1510             This is because parameters passed to C are interpolated into the grammar before parsing
1511             begins. And that's why the docs for those methods all say 'Get the...' and not 'Get and set the...'.
1512              
1513             To make the code work, you would have to manually call _validate_open_close(). But even then
1514             a lot of things would have to be re-initialized to give the code any hope of working.
1515              
1516             =head2 What is the format of the 'open' and 'close' parameters to new()?
1517              
1518             Each of these parameters takes an arrayref as a value.
1519              
1520             The # of elements in the 2 arrayrefs must be the same.
1521              
1522             The 1st element in the 'open' arrayref is the 1st user-chosen opening delimiter, and the 1st
1523             element in the 'close' arrayref must be the corresponding closing delimiter.
1524              
1525             It is possible to use a delimiter which is part of another delimiter.
1526              
1527             See scripts/samples.pl. It uses both '<' and '<:' as opening delimiters and their corresponding
1528             closing delimiters are '>' and ':>'. Neat, huh?
1529              
1530             =head2 What are the possible values for the 'options' parameter to new()?
1531              
1532             Firstly, to make these constants available, you must say:
1533              
1534             use Text::Balanced::Marpa ':constants';
1535              
1536             Secondly, more detail on errors and warnings can be found at L.
1537              
1538             Thirdly, for usage of these option flags, see t/angle.brackets.t, t/colons.t, t/escapes.t,
1539             t/multiple.quotes.t, t/percents.t and scripts/samples.pl.
1540              
1541             Now the flags themselves:
1542              
1543             =over 4
1544              
1545             =item o nothing_is_fatal
1546              
1547             This is the default.
1548              
1549             C has the value of 0.
1550              
1551             =item o print_errors
1552              
1553             Print errors if this flag is set.
1554              
1555             C has the value of 1.
1556              
1557             =item o print_warnings
1558              
1559             Print various warnings if this flag is set:
1560              
1561             =over 4
1562              
1563             =item o The ambiguity status and terminals expected, if the parse is ambiguous
1564              
1565             =item o See L for other warnings which might be printed
1566              
1567             Ambiguity is not, in and of itself, an error. But see the C option, below.
1568              
1569             =back
1570              
1571             It's tempting to call this option C, but Perl already has C, so I didn't.
1572              
1573             C has the value of 2.
1574              
1575             =item o print_debugs
1576              
1577             Print extra stuff if this flag is set.
1578              
1579             C has the value of 4.
1580              
1581             =item o overlap_is_fatal
1582              
1583             This means overlapping delimiters cause a fatal error.
1584              
1585             So, setting C means '{Bold [Italic}]' would be a fatal error.
1586              
1587             I use this example since it gives me the opportunity to warn you, this will I do what you want
1588             if you try to use the delimiters of '<' and '>' for HTML. That is, 'Bold Italic' is
1589             not an error because what overlap are '' and '' BUT THEY ARE NOT TAGS. The tags are '<' and
1590             '>', ok? See also t/html.t.
1591              
1592             C has the value of 8.
1593              
1594             =item o nesting_is_fatal
1595              
1596             This means nesting of identical opening delimiters is fatal.
1597              
1598             So, using C means 'a <: b <: c :> d :> e' would be a fatal error.
1599              
1600             C has the value of 16.
1601              
1602             =item o ambiguity_is_fatal
1603              
1604             This makes L return 3 rather than -3.
1605              
1606             C has the value of 32.
1607              
1608             =item o exhaustion_is_fatal
1609              
1610             This makes L return 6 rather than -6.
1611              
1612             C has the value of 64.
1613              
1614             =back
1615              
1616             =head2 How do I print the tree built by the parser?
1617              
1618             See L.
1619              
1620             =head2 How do I make use of the tree built by the parser?
1621              
1622             See scripts/traverse.pl. It is a copy of t/html.t with tree-walking code instead of test code.
1623              
1624             =head2 How is the parsed data held in RAM?
1625              
1626             The parsed output is held in a tree managed by L.
1627              
1628             The tree always has a root node, which has nothing to do with the input data. So, even an empty
1629             imput string will produce a tree with 1 node. This root has an empty hashref associated with it.
1630              
1631             Nodes have a name and a hashref of attributes.
1632              
1633             The name indicates the type of node. Names are one of these literals:
1634              
1635             =over 4
1636              
1637             =item o close
1638              
1639             =item o open
1640              
1641             =item o root
1642              
1643             =item o text
1644              
1645             =back
1646              
1647             For 'open' and 'close', the delimiter is given by the value of the 'text' key in the hashref.
1648              
1649             The (key => value) pairs in the hashref are:
1650              
1651             =over 4
1652              
1653             =item o text => $string
1654              
1655             If the node name is 'open' or 'close', $string is the delimiter.
1656              
1657             If the node name is 'text', $string is the verbatim text from the document.
1658              
1659             Verbatim means, for example, that backslashes in the input are preserved.
1660              
1661             =back
1662              
1663             Try:
1664              
1665             perl -Ilib scripts/samples.pl info
1666              
1667             =head2 How is HTML/XML handled?
1668              
1669             The tree does not preserve the nested nature of HTML/XML.
1670              
1671             Post-processing (valid) HTML could easily generate another view of the data.
1672              
1673             But anyway, to get perfect HTML you'd be grabbing the output of L, right?
1674              
1675             See scripts/traverse.pl and t/html.t for a trivial HTML parser.
1676              
1677             =head2 What is the homepage of Marpa?
1678              
1679             L.
1680              
1681             That page has a long list of links.
1682              
1683             =head2 How do I run author tests?
1684              
1685             This runs both standard and author tests:
1686              
1687             shell> perl Build.PL; ./Build; ./Build authortest
1688              
1689             =head1 TODO
1690              
1691             =over 4
1692              
1693             =item o Advanced error reporting
1694              
1695             See L.
1696              
1697             Perhaps this could be a sub-class?
1698              
1699             =item o I8N support for error messages
1700              
1701             =item o An explicit test program for parse exhaustion
1702              
1703             =back
1704              
1705             =head1 See Also
1706              
1707             L.
1708              
1709             L and L.
1710              
1711             L.
1712              
1713             L - for various usages of L, but not of this module.
1714              
1715             =head1 Machine-Readable Change Log
1716              
1717             The file Changes was converted into Changelog.ini by L.
1718              
1719             =head1 Version Numbers
1720              
1721             Version numbers < 1.00 represent development versions. From 1.00 up, they are production versions.
1722              
1723             =head1 Thanks
1724              
1725             Thanks to Jeffrey Kegler, who wrote Marpa and L.
1726              
1727             And thanks to rns (Ruslan Shvedov) for writing the grammar for double-quoted strings used in
1728             L's scripts/quoted.strings.02.pl. I adapted it to HTML (see
1729             scripts/quoted.strings.05.pl in that module), and then incorporated the grammar into
1730             L, and - after more extensions - into this module.
1731              
1732             Lastly, thanks to Robert Rothenberg for L, a module which works the same way
1733             Perl does.
1734              
1735             =head1 Repository
1736              
1737             L
1738              
1739             =head1 Support
1740              
1741             Email the author, or log a bug on RT:
1742              
1743             L.
1744              
1745             =head1 Author
1746              
1747             L was written by Ron Savage Iron@savage.net.auE> in 2014.
1748              
1749             Marpa's homepage: L.
1750              
1751             My homepage: L.
1752              
1753             =head1 Copyright
1754              
1755             Australian copyright (c) 2014, Ron Savage.
1756              
1757             All Programs of mine are 'OSI Certified Open Source Software';
1758             you can redistribute them and/or modify them under the terms of
1759             The Artistic License 2.0, a copy of which is available at:
1760             http://opensource.org/licenses/alphabetical.
1761              
1762             =cut