File Coverage

blib/lib/Text/Delimited/Marpa.pm
Criterion Covered Total %
statement 220 281 78.2
branch 68 114 59.6
condition 14 26 53.8
subroutine 23 23 100.0
pod 2 3 66.6
total 327 447 73.1


line stmt bran cond sub pod time code
1             package Text::Delimited::Marpa;
2              
3 14     14   225215 use strict;
  14         27  
  14         482  
4 14     14   6477 use utf8;
  14         101  
  14         62  
5 14     14   438 use warnings;
  14         22  
  14         488  
6 14     14   55 use warnings qw(FATAL utf8); # Fatalize encoding glitches.
  14         22  
  14         584  
7 14     14   5582 use open qw(:std :utf8); # Undeclared streams in UTF-8.
  14         10729  
  14         62  
8              
9 14         125 use Const::Exporter constants =>
10             [
11             nothing_is_fatal => 0, # The default.
12             print_errors => 1,
13             print_warnings => 2,
14             print_debugs => 4,
15             mismatch_is_fatal => 8,
16             ambiguity_is_fatal => 16,
17             exhaustion_is_fatal => 32,
18 14     14   8480 ];
  14         188619  
19              
20 14     14   11400 use Marpa::R2;
  14         1728334  
  14         681  
21              
22 14     14   8186 use Moo;
  14         114745  
  14         100  
23              
24 14     14   25711 use Tree;
  14         67642  
  14         493  
25              
26 14     14   8729 use Types::Standard qw/Any ArrayRef HashRef Int ScalarRef Str/;
  14         771022  
  14         183  
27              
28 14     14   14550 use Try::Tiny;
  14         24  
  14         51701  
29              
30             has bnf =>
31             (
32             default => sub{return ''},
33             is => 'rw',
34             isa => Any,
35             required => 0,
36             );
37              
38             has close =>
39             (
40             default => sub{return ''},
41             is => 'rw',
42             isa => Str,
43             required => 0,
44             );
45              
46             has delimiter_action =>
47             (
48             default => sub{return {} },
49             is => 'rw',
50             isa => HashRef,
51             required => 0,
52             );
53              
54             has delimiter_frequency =>
55             (
56             default => sub{return {} },
57             is => 'rw',
58             isa => HashRef,
59             required => 0,
60             );
61              
62             has delimiter_stack =>
63             (
64             default => sub{return []},
65             is => 'rw',
66             isa => ArrayRef,
67             required => 0,
68             );
69              
70             has error_message =>
71             (
72             default => sub{return ''},
73             is => 'rw',
74             isa => Str,
75             required => 0,
76             );
77              
78             has error_number =>
79             (
80             default => sub{return 0},
81             is => 'rw',
82             isa => Int,
83             required => 0,
84             );
85              
86             has escape_char =>
87             (
88             default => sub{return '\\'},
89             is => 'rw',
90             isa => Str,
91             required => 0,
92             );
93              
94             has grammar =>
95             (
96             default => sub {return ''},
97             is => 'rw',
98             isa => Any,
99             required => 0,
100             );
101              
102             has known_events =>
103             (
104             default => sub{return {} },
105             is => 'rw',
106             isa => HashRef,
107             required => 0,
108             );
109              
110             has length =>
111             (
112             default => sub{return 0},
113             is => 'rw',
114             isa => Int,
115             required => 0,
116             );
117              
118             has matching_delimiter =>
119             (
120             default => sub{return {} },
121             is => 'rw',
122             isa => HashRef,
123             required => 0,
124             );
125              
126             has next_few_limit =>
127             (
128             default => sub{return 20},
129             is => 'rw',
130             isa => Int,
131             required => 0,
132             );
133              
134             has node_stack =>
135             (
136             default => sub{return []},
137             is => 'rw',
138             isa => ArrayRef,
139             required => 0,
140             );
141              
142             has open =>
143             (
144             default => sub{return ''},
145             is => 'rw',
146             isa => Str,
147             required => 0,
148             );
149              
150             has options =>
151             (
152             default => sub{return 0},
153             is => 'rw',
154             isa => Int,
155             required => 0,
156             );
157              
158             has pos =>
159             (
160             default => sub{return 0},
161             is => 'rw',
162             isa => Int,
163             required => 0,
164             );
165              
166             has recce =>
167             (
168             default => sub{return ''},
169             is => 'rw',
170             isa => Any,
171             required => 0,
172             );
173              
174             has tree =>
175             (
176             default => sub{return ''},
177             is => 'rw',
178             isa => Any,
179             required => 0,
180             );
181              
182             has text =>
183             (
184             default => sub{return \''}, # Use ' in comment for UltraEdit.
185             is => 'rw',
186             isa => ScalarRef[Str],
187             required => 0,
188             );
189              
190             has uid =>
191             (
192             default => sub{return 0},
193             is => 'rw',
194             isa => Int,
195             required => 0,
196             );
197              
198             our $VERSION = '1.00';
199              
200             # ------------------------------------------------
201              
202             sub BUILD
203             {
204 14     14 0 284 my($self) = @_;
205              
206             # Policy: Event names are always the same as the name of the corresponding lexeme.
207             #
208             # Note: Tokens of the form '_xxx_' are replaced just below, with values returned
209             # by the call to validate_open_close().
210              
211 14         34 my($bnf) = <<'END_OF_GRAMMAR';
212              
213             :default ::= action => [values]
214              
215             lexeme default = latm => 1
216              
217             :start ::= input_text
218              
219             input_text ::= input_string*
220              
221             input_string ::= quoted_text
222             | unquoted_text
223              
224             quoted_text ::= open_delim input_text close_delim
225              
226             unquoted_text ::= text
227              
228             # Lexemes in alphabetical order.
229              
230             delimiter_char ~ [_delimiter_]
231              
232             :lexeme ~ close_delim pause => before event => close_delim
233             _close_
234              
235             escaped_char ~ '_escape_char_' delimiter_char # Use ' in comment for UltraEdit.
236              
237             # Warning: Do not add '+' to this set, even though it speeds up things.
238             # The problem is that the set then gobbles up any '\', so the following
239             # character is no longer recognized as being escaped.
240             # Trapping the exception then generated would be possible.
241              
242             non_quote_char ~ [^_delimiter_] # Use " in comment for UltraEdit.
243              
244             :lexeme ~ open_delim pause => before event => open_delim
245             _open_
246              
247             :lexeme ~ text pause => before event => text
248             text ~ escaped_char
249             | non_quote_char
250             END_OF_GRAMMAR
251              
252 14         63 my($hashref) = $self -> _validate_open_close;
253 14         196 $bnf =~ s/_open_/$$hashref{_open_}/;
254 14         153 $bnf =~ s/_close_/$$hashref{_close_}/;
255 14         167 $bnf =~ s/_delimiter_/$$hashref{_delimiter_}/g;
256 14         71 my($escape_char) = $self -> escape_char;
257              
258 14 50       7288 if ($escape_char eq "'")
259             {
260 0         0 my($message) = 'Single-quote is forbidden as an escape character';
261              
262 0         0 $self -> error_message($message);
263 0         0 $self -> error_number(7);
264              
265             # This 'die' is not inside try{}catch{}, so we add the prefix 'Error: '.
266              
267 0         0 die "Error: $message\n";
268             }
269              
270 14         160 $bnf =~ s/_escape_char_/$escape_char/g;
271              
272 14         71 $self -> bnf($bnf);
273 14         7493 $self -> grammar
274             (
275             Marpa::R2::Scanless::G -> new
276             ({
277             source => \$self -> bnf
278             })
279             );
280              
281             # This hash does not contain the key "'exhausted" because the exhaustion
282             # event is everywhere handled explicitly. Yes, it has a leading quote.
283              
284 14         1433147 my(%event);
285              
286 14         305 for my $line (split(/\n/, $self -> bnf) )
287             {
288 560 100       1331 $event{$1} = 1 if ($line =~ /event\s+=>\s+(\w+)/);
289             }
290              
291 14         105 $self -> known_events(\%event);
292              
293             } # End of BUILD.
294              
295             # ------------------------------------------------
296              
297             sub _add_daughter
298             {
299 65     65   102 my($self, $name, $attributes) = @_;
300 65         1258 $attributes = {%$attributes, uid => $self -> uid($self -> uid + 1)};
301 65         2845 my($stack) = $self -> node_stack;
302 65         520 my($node) = Tree -> new($name);
303              
304 65         2452 $node -> meta($attributes);
305              
306 65         896 $$stack[$#$stack] -> add_child({}, $node);
307              
308             } # End of _add_daughter.
309              
310             # ------------------------------------------------
311              
312             sub next_few_chars
313             {
314 897     897 1 884 my($self, $stringref, $offset) = @_;
315 897         13154 my($s) = substr($$stringref, $offset, $self -> next_few_limit);
316 897         11285 $s =~ tr/\n/ /;
317 897         1804 $s =~ s/^\s+//;
318 897         1728 $s =~ s/\s+$//;
319              
320 897         1368 return $s;
321              
322             } # End of next_few_chars.
323              
324             # ------------------------------------------------
325              
326             sub parse
327             {
328 62     62 1 29110 my($self, %opts) = @_;
329              
330             # Emulate parts of new(), which makes things a bit earier for the caller.
331              
332 62 50       229 $self -> options($opts{options}) if (defined $opts{options});
333 62 50       1379 $self -> text($opts{text}) if (defined $opts{text});
334 62 50       8344 $self -> pos($opts{pos}) if (defined $opts{pos});
335 62 50       180 $self -> length($opts{length}) if (defined $opts{length});
336              
337 62         1168 $self -> recce
338             (
339             Marpa::R2::Scanless::R -> new
340             ({
341             exhaustion => 'event',
342             grammar => $self -> grammar,
343             })
344             );
345              
346             # Since $self -> node_stack has not been initialized yet,
347             # we can't call _add_daughter() until after this statement.
348              
349 62         25881 $self -> uid(0);
350 62         8306 $self -> tree(Tree -> new('root') );
351 62         11917 $self -> tree -> meta({end => 0, length => 0, start => 0, text => '', uid => $self -> uid});
352 62         3517 $self -> node_stack([$self -> tree -> root]);
353 62         9622 $self -> delimiter_stack([]);
354              
355             # Return 0 for success and 1 for failure.
356              
357 62         7625 my($result) = 0;
358              
359 62         76 my($message);
360              
361             try
362             {
363 62 50   62   2273 if (defined (my $value = $self -> _process) )
364             {
365 61         78552 $self -> _post_process;
366             }
367             else
368             {
369 0         0 $result = 1;
370              
371 0 0       0 print "Error: Parse failed\n" if ($self -> options & print_errors);
372             }
373             }
374             catch
375             {
376 1     1   14 $result = 1;
377              
378 1 50       17 print "Error: Parse failed. ${_}" if ($self -> options & print_errors);
379 62         628 };
380              
381             # Return 0 for success and 1 for failure.
382              
383 62         1798 return $result;
384              
385             } # End of parse.
386              
387             # ------------------------------------------------
388              
389             sub _pop_node_stack
390             {
391 1     1   1 my($self) = @_;
392 1         16 my($stack) = $self -> node_stack;
393              
394 1         38 pop @$stack;
395              
396 1         21 $self -> node_stack($stack);
397              
398             } # End of _pop_node_stack.
399              
400             # ------------------------------------------------
401              
402             sub _process
403             {
404 62     62   84 my($self) = @_;
405 62   50     1231 my($stringref) = $self -> text || \''; # Allow for undef. Use ' in comment for UltraEdit.
406 62         1162 my($pos) = $self -> pos;
407 62         6570 my($first_pos) = $pos;
408 62         136 my($total_length) = length($$stringref);
409 62   100     805 my($length) = $self -> length || $total_length;
410 62         6982 my($text) = '';
411 62         97 my($format) = "%-20s %5s %5s %5s %-20s %-20s\n";
412 62         91 my($last_event) = '';
413 62         1034 my($matching_delimiter) = $self -> matching_delimiter;
414              
415 62 50       1024 if ($self -> options & print_debugs)
416             {
417 0         0 print "Length of input: $length. Input |$$stringref|\n";
418 0         0 print sprintf($format, 'Event', 'Start', 'Span', 'Pos', 'Lexeme', 'Comment');
419             }
420              
421 62         7236 my($delimiter_frequency, $delimiter_stack);
422 0         0 my($event_name);
423 0         0 my($lexeme);
424 0         0 my($message);
425 0         0 my($original_lexeme);
426 0         0 my($span, $start);
427 0         0 my($tos);
428              
429             # We use read()/lexeme_read()/resume() because we pause at each lexeme.
430             # Also, in read(), we use $pos and $length to avoid reading Ruby Slippers tokens (if any).
431             # For the latter, see scripts/match.parentheses.02.pl in MarpaX::Demo::SampleScripts.
432              
433 62   66     1013 for
434             (
435             $pos = $self -> recce -> read($stringref, $pos, $length);
436             ($pos < $total_length) && ( ($pos - $first_pos) <= $length);
437             $pos = $self -> recce -> resume($pos)
438             )
439             {
440 897         70991 $delimiter_frequency = $self -> delimiter_frequency;
441 897         23252 $delimiter_stack = $self -> delimiter_stack;
442 897         15112 ($start, $span) = $self -> recce -> pause_span;
443 897         7962 ($event_name, $span, $pos) = $self -> _validate_event($stringref, $start, $span, $pos, $delimiter_frequency);
444              
445             # If the input is exhausted, we exit immediately so we don't try to use
446             # the values of $start, $span or $pos. They are ignored upon exit.
447              
448 897 50       1798 last if ($event_name eq "'exhausted"); # Yes, it has a leading quote.
449              
450 897         13503 $lexeme = $self -> recce -> literal($start, $span);
451 897         7698 $original_lexeme = $lexeme;
452 897         13192 $pos = $self -> recce -> lexeme_read($event_name);
453              
454 897 50       33811 die "lexeme_read($event_name) rejected lexeme |$lexeme|\n" if (! defined $pos);
455              
456 897 50       15617 print sprintf($format, $event_name, $start, $span, $pos, $lexeme, '-') if ($self -> options & print_debugs);
457              
458 897 100       7549 if ($event_name eq 'close_delim')
    100          
    50          
459             {
460 67         377 push @$delimiter_stack,
461             {
462             event_name => $event_name,
463             frequency => $$delimiter_frequency{$lexeme},
464             lexeme => $lexeme,
465             offset => $start - 1, # Do not use length($lexeme)!
466             };
467              
468 67         112 $$delimiter_frequency{$lexeme}--;
469              
470 67         1070 $self -> delimiter_frequency($delimiter_frequency);
471 67         2228 $self -> delimiter_stack($delimiter_stack);
472             }
473             elsif ($event_name eq 'open_delim')
474             {
475             # The reason for using the matching delimiter here, is that problems arise when
476             # the caller is using a mixture of symmetrical delimiters (open, close) = (", ")
477             # and non-matching ones (open, close) = (<:, :>). The problem becomes visible in
478             # the test 'if ($$delimiter_frequency{$lexeme} != 0)' in the loop just below.
479              
480 68         190 $$delimiter_frequency{$$matching_delimiter{$lexeme} }++;
481              
482 68         389 push @$delimiter_stack,
483             {
484             event_name => $event_name,
485             frequency => $$delimiter_frequency{$$matching_delimiter{$lexeme} },
486             lexeme => $lexeme,
487             offset => $start + length($lexeme),
488             };
489              
490 68         1103 $self -> delimiter_frequency($delimiter_frequency);
491 68         2217 $self -> delimiter_stack($delimiter_stack);
492             }
493             elsif ($event_name eq 'text')
494             {
495 762         917 $text .= $lexeme;
496             }
497              
498 897         15310 $last_event = $event_name;
499             }
500              
501             # Cross-check the # of open/close delimiter pairs.
502              
503 62         1645 for $lexeme (keys %$delimiter_frequency)
504             {
505 41 100       168 if ($$delimiter_frequency{$lexeme} != 0)
506             {
507 1         5 $message = "The # of open delimiters ($lexeme) does not match the # of close delimiters. Left over: $$delimiter_frequency{$lexeme}";
508              
509 1         6 $self -> error_message($message);
510 1         633 $self -> error_number(1);
511              
512 1 50       530 if ($self -> options & $self -> options & mismatch_is_fatal)
513             {
514             # This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
515              
516 1         23 die "$message\n";
517             }
518             else
519             {
520 0         0 $self -> error_number(-1);
521              
522 0 0       0 print "Warning: $message\n" if ($self -> options & print_warnings);
523             }
524             }
525             }
526              
527 61 50       1089 if ($self -> recce -> exhausted)
    50          
528             {
529 0         0 $message = 'Parse exhausted';
530              
531 0         0 $self -> error_message($message);
532 0         0 $self -> error_number(6);
533              
534 0 0       0 if ($self -> options & exhaustion_is_fatal)
535             {
536             # This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
537              
538 0         0 die "$message\n";
539             }
540             else
541             {
542 0         0 $self -> error_number(-6);
543              
544 0 0       0 print "Warning: $message\n" if ($self -> options & print_warnings);
545             }
546             }
547             elsif (my $status = $self -> recce -> ambiguous)
548             {
549 0         0 my($terminals) = $self -> recce -> terminals_expected;
550 0 0       0 $terminals = ['(None)'] if ($#$terminals < 0);
551 0         0 $message = "Ambiguous parse. Status: $status. Terminals expected: " . join(', ', @$terminals);
552              
553 0         0 $self -> error_message($message);
554 0         0 $self -> error_number(3);
555              
556 0 0       0 if ($self -> options & ambiguity_is_fatal)
    0          
557             {
558             # This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
559              
560 0         0 die "$message\n";
561             }
562             elsif ($self -> options & print_warnings)
563             {
564 0         0 $self -> error_number(-3);
565              
566 0         0 print "Warning: $message\n";
567             }
568             }
569              
570             # Return a defined value for success and undef for failure.
571              
572 61         7766 return $self -> recce -> value;
573              
574             } # End of _process.
575              
576             # ------------------------------------------------
577              
578             sub _post_process
579             {
580 61     61   105 my($self) = @_;
581              
582             # We scan the stack, looking for (open, close) delimiter events.
583              
584 61         1515 my($stack) = $self -> delimiter_stack;
585 61         1294 my($stringref) = $self -> text;
586 61         327 my($offset) = 0;
587 61         113 my($span_count) = 0;
588              
589 61         85 my($end_item);
590             my($j);
591 0         0 my($node_stack);
592 0         0 my($start_item, $span, @span);
593 0         0 my($text);
594              
595 61         294 for (my $i = 0; $i <= $#$stack; $i++)
596             {
597 130         181 $start_item = $$stack[$i];
598              
599             # Ignore everything but the next open delimiter event.
600              
601 130 100       502 next if ($$start_item{event_name} ne 'open_delim');
602              
603 65         81 $j = $i + 1;
604              
605 65         151 while ($j <= $#$stack)
606             {
607 77         104 $end_item = $$stack[$j];
608              
609 77         81 $j++;
610              
611             # Ignore everything but the corresponding close delimiter event.
612              
613 77 100       180 next if ($$end_item{event_name} ne 'close_delim');
614 71 100       162 next if ($$start_item{frequency} != $$end_item{frequency});
615              
616 65 100       187 if ($#span < 0)
    100          
617             {
618             # First entry.
619             }
620             elsif ($$start_item{offset} <= $span[$#span])
621             {
622             # This start delimiter is within the span of the current delimiter pair,
623             # so this span must be a child of the previous span.
624              
625 6         25 $self -> _push_node_stack;
626             }
627             else
628             {
629             # This start delimiter is after the span of the current delimiter pair,
630             # so it's a new span, and is a sibling of the previous (just-closed) span.
631              
632 21         23 pop @span;
633              
634             # We only pop the node stack if it hash anything besides the root in it.
635              
636 21         446 $node_stack = $self -> node_stack;
637              
638 21 100       138 $self -> _pop_node_stack if ($#$node_stack > 0);
639             }
640              
641 65         247 $span = $$end_item{offset} - $$start_item{offset} + 1;
642 65         142 $text = substr($$stringref, $$start_item{offset}, $span);
643              
644 65         392 $self -> _add_daughter('span', {end => $$end_item{offset}, length => $span, start => $$start_item{offset}, text => $text});
645              
646 65         10231 push @span, $$end_item{offset};
647              
648 65         208 last;
649             }
650             }
651              
652             } # End of _post_process.
653              
654             # ------------------------------------------------
655              
656             sub _push_node_stack
657             {
658 6     6   12 my($self) = @_;
659 6         148 my($stack) = $self -> node_stack;
660 6         49 my(@daughters) = $$stack[$#$stack] -> children;
661              
662 6         145 push @$stack, $daughters[$#daughters];
663              
664 6         114 $self -> node_stack($stack);
665              
666             } # End of _push_node_stack.
667              
668              
669             # ------------------------------------------------
670              
671             sub _validate_event
672             {
673 897     897   1090 my($self, $stringref, $start, $span, $pos, $delimiter_frequency) = @_;
674 897         733 my(@event) = @{$self -> recce -> events};
  897         12882  
675 897         6020 my($event_count) = scalar @event;
676 897         1167 my(@event_name) = sort map{$$_[0]} @event;
  905         2488  
677 897         1078 my($event_name) = $event_name[0]; # Default.
678              
679             # If the input is exhausted, we return immediately so we don't try to use
680             # the values of $start, $span or $pos. They are ignored upon return.
681              
682 897 50       1968 if ($event_name eq "'exhausted") # Yes, it has a leading quote.
683             {
684 0         0 return ($event_name, $span, $pos);
685             }
686              
687 897         1662 my($lexeme) = substr($$stringref, $start, $span);
688 897         13744 my($line, $column) = $self -> recce -> line_column($start);
689 897         8411 my($literal) = $self -> next_few_chars($stringref, $start + $span);
690 897         2196 my($message) = "Location: ($line, $column). Lexeme: |$lexeme|. Next few chars: |$literal|";
691 897         1246 $message = "$message. Events: $event_count. Names: ";
692              
693 897 50       13286 print $message, join(', ', @event_name), "\n" if ($self -> options & print_debugs);
694              
695 897         5676 my(%event_name);
696              
697 897         2113 @event_name{@event_name} = (1) x @event_name;
698              
699 897         1245 for (@event_name)
700             {
701 905 50       795 if (! ${$self -> known_events}{$_})
  905         13639  
702             {
703 0         0 $message = "Unexpected event name '$_'";
704              
705 0         0 $self -> error_message($message);
706 0         0 $self -> error_number(10);
707              
708             # This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
709              
710 0         0 die "$message\n";
711             }
712             }
713              
714 897 100       6099 if ($event_count > 1)
715             {
716             # We get here for single and double quotes because an open s. or d. quote is
717             # indistinguishable from a close s. or d. quote, and that leads to ambiguity.
718              
719 8 50 33     67 if ( ($lexeme =~ /["']/) && (join(', ', @event_name) eq 'close_delim, open_delim') ) # ".
720             {
721             # At the time _validate_event() is called, the quote count has not yet been bumped.
722             # If this is the 1st quote, then it's an open_delim.
723             # If this is the 2nd quote, them it's a close delim.
724              
725 8 50       29 if ($$delimiter_frequency{$lexeme} % 2 == 0)
726             {
727 0         0 $event_name = 'open_delim';
728              
729 0 0       0 print "Disambiguated lexeme |$lexeme| as '$event_name'\n" if ($self -> options & print_debugs);
730             }
731             else
732             {
733 8         15 $event_name = 'close_delim';
734              
735 8 50       129 print "Disambiguated lexeme |$lexeme| as '$event_name'\n" if ($self -> options & print_debugs);
736             }
737             }
738             else
739             {
740 0         0 $message = join(', ', @event_name);
741 0         0 $message = "The code does not handle these events simultaneously: $message";
742              
743 0         0 $self -> error_message($message);
744 0         0 $self -> error_number(11);
745              
746             # This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
747              
748 0         0 die "$message\n";
749             }
750             }
751              
752 897         3117 return ($event_name, $span, $pos);
753              
754             } # End of _validate_event.
755              
756             # ------------------------------------------------
757              
758             sub _validate_open_close
759             {
760 14     14   27 my($self) = @_;
761 14         55 my($open) = $self -> open;
762 14         7713 my($close) = $self -> close;
763              
764 14         7154 my($message);
765              
766 14 50 33     130 if ( ($open eq '') || ($close eq '') )
767             {
768 0         0 $message = 'You must specify a pair of open/close delimiters';
769              
770 0         0 $self -> error_message($message);
771 0         0 $self -> error_number(8);
772              
773             # This 'die' is not inside try{}catch{}, so we add the prefix 'Error: '.
774              
775 0         0 die "Error: $message\n";
776             }
777              
778 14         75 my(%substitute) = (_close_ => '', _delimiter_ => '', _open_ => '');
779 14         32 my($matching_delimiter) = {};
780 14         52 my(%seen) = (close => {}, open => {});
781              
782 14         24 my($close_quote);
783             my(%delimiter_action);
784 0         0 my($open_quote);
785 0         0 my($prefix, %prefix);
786              
787 14 50 33     143 if ( ($open =~ /\\/) || ($close =~ /\\/) )
788             {
789 0         0 $message = 'Backslash is forbidden as a delimiter character';
790              
791 0         0 $self -> error_message($message);
792 0         0 $self -> error_number(4);
793              
794             # This 'die' is not inside try{}catch{}, so we add the prefix 'Error: '.
795              
796 0         0 die "Error: $message\n";
797             }
798              
799 14 50 66     148 if ( ( (length($open) > 1) && ($open =~ /'/) ) || ( (length($close) > 1) && ($close =~ /'/) ) )
      66        
      33        
800             {
801 0         0 $message = 'Single-quotes are forbidden in multi-character delimiters';
802              
803 0         0 $self -> error_message($message);
804 0         0 $self -> error_number(5);
805              
806             # This 'die' is not inside try{}catch{}, so we add the prefix 'Error: '.
807              
808 0         0 die "Error: $message\n";
809             }
810              
811 14 50       80 $seen{open}{$open} = 0 if (! $seen{open}{$open});
812 14 50       63 $seen{close}{$close} = 0 if (! $seen{close}{$close});
813              
814 14         28 $seen{open}{$open}++;
815 14         24 $seen{close}{$close}++;
816              
817 14         23 $delimiter_action{$open} = 'open';
818 14         27 $delimiter_action{$close} = 'close';
819 14         23 $$matching_delimiter{$open} = $close;
820              
821 14 100       57 if (length($open) == 1)
822             {
823 9 100       37 $open_quote = $open eq '[' ? "[\\$open]" : "[$open]";
824             }
825             else
826             {
827             # This fails if length > 1 and open contains a single quote.
828              
829 5         14 $open_quote = "'$open'";
830             }
831              
832 14 100       54 if (length($close) == 1)
833             {
834 10 100       41 $close_quote = $close eq ']' ? "[\\$close]" : "[$close]";
835             }
836             else
837             {
838             # This fails if length > 1 and close contains a single quote.
839              
840 4         11 $close_quote = "'$close'";
841             }
842              
843 14 50       84 $substitute{_open_} .= "open_delim\t\t\t\~ $open_quote\n" if ($seen{open}{$open} <= 1);
844 14 50       78 $substitute{_close_} .= "close_delim\t\t\t\~ $close_quote\n" if ($seen{close}{$close} <= 1);
845 14         44 $prefix = substr($open, 0, 1);
846 14 100       73 $prefix = "\\$prefix" if ($prefix =~ /[\[\]]/);
847 14 50       51 $prefix{$prefix} = 0 if (! $prefix{$prefix});
848              
849 14         24 $prefix{$prefix}++;
850              
851 14 50       55 $substitute{_delimiter_} .= $prefix if ($prefix{$prefix} == 1);
852 14         34 $prefix = substr($close, 0, 1);
853 14 100       49 $prefix = "\\$prefix" if ($prefix =~ /[\[\]]/);
854 14 100       45 $prefix{$prefix} = 0 if (! $prefix{$prefix});
855              
856 14         24 $prefix{$prefix}++;
857              
858 14 100       52 $substitute{_delimiter_} .= $prefix if ($prefix{$prefix} == 1);
859              
860 14         85 $self -> delimiter_action(\%delimiter_action);
861 14         7652 $self -> matching_delimiter($matching_delimiter);
862              
863 14         7223 return \%substitute;
864              
865             } # End of _validate_open_close.
866              
867             # ------------------------------------------------
868              
869             1;
870              
871             =pod
872              
873             =head1 NAME
874              
875             C - Extract delimited text sequences from strings
876              
877             =head1 Synopsis
878              
879             This is scripts/synopsis.pl:
880              
881             #!/usr/bin/env perl
882              
883             use strict;
884             use warnings;
885              
886             use Text::Delimited::Marpa ':constants';
887              
888             # -----------
889              
890             my(%count) = (fail => 0, success => 0, total => 0);
891             my($parser) = Text::Delimited::Marpa -> new
892             (
893             open => '/*',
894             close => '*/',
895             options => print_errors | print_warnings | mismatch_is_fatal,
896             );
897             my(@text) =
898             (
899             q|Start /* One /* Two /* Three */ Four */ Five */ Finish|,
900             );
901              
902             my($result);
903             my($text);
904              
905             for my $i (0 .. $#text)
906             {
907             $count{total}++;
908              
909             $text = $text[$i];
910              
911             print "Parsing |$text|. pos: ", $parser -> pos, '. length: ', $parser -> length, "\n";
912              
913             $result = $parser -> parse(text => \$text);
914              
915             print "Parse result: $result (0 is success)\n";
916              
917             if ($result == 0)
918             {
919             $count{success}++;
920              
921             print join("\n", @{$parser -> tree -> tree2string}), "\n";
922             }
923             }
924              
925             $count{fail} = $count{total} - $count{success};
926              
927             print "\n";
928             print 'Statistics: ', join(', ', map{"$_ => $count{$_}"} sort keys %count), ". \n";
929              
930             This is the output of synopsis.pl:
931              
932             Parsing |Start /* One /* Two /* Three */ Four */ Five */ Finish|. pos: 0. length: 0
933             Parse result: 0 (0 is success)
934             root. Attributes: {end => "0", length => "0", start => "0", text => "", uid => "0"}
935             |--- span. Attributes: {end => "44", length => "37", start => "8", text => " One /* Two /* Three */ Four */ Five ", uid => "1"}
936             |--- span. Attributes: {end => "36", length => "22", start => "15", text => " Two /* Three */ Four ", uid => "2"}
937             |--- span. Attributes: {end => "28", length => "7", start => "22", text => " Three ", uid => "3"}
938              
939             Statistics: fail => 0, success => 1, total => 1.
940              
941             See also scripts/tiny.pl and scripts/traverse.pl.
942              
943             =head1 Description
944              
945             L provides a L-based parser for extracting delimited text
946             sequences from strings. The text between the delimiters is stored as nodes in a tree managed by
947             L. The delimiters, and the text outside the delimiters, is not saved in the tree.
948              
949             Nested strings with the same delimiters are saved as daughters of their enclosing strings' tree
950             nodes. As you can see from the output just above, this nesting process is repeated as many times as
951             the delimiters themselves are nested.
952              
953             You can ignore the nested, delimited, strings by just processing the daughters of the tree's root
954             node.
955              
956             This module is a companion to L. The differences are discussed in the L
957             below.
958              
959             See the L for various topics, including:
960              
961             =over 4
962              
963             =item o UFT8 handling
964              
965             See t/utf8.t.
966              
967             =item o Escaping delimiters within the text
968              
969             See t/escapes.t.
970              
971             =item o Options to make mismatched delimiters fatal errors
972              
973             See t/escapes.t and t/perl.delimiters.
974              
975             =item o Processing the tree-structured output
976              
977             See scripts/traverse.pl.
978              
979             =item o Emulating L's use of '<:' and ':>
980              
981             See t/colons.t and t/percents.t.
982              
983             =item o Skipping (leading) characters in the input string
984              
985             See t/skip.prefix.t.
986              
987             =item o Implementing hard-to-read text strings as delimiters
988              
989             See t/silly.delimiters.
990              
991             =back
992              
993             =head1 Distributions
994              
995             This module is available as a Unix-style distro (*.tgz).
996              
997             See L
998             for help on unpacking and installing distros.
999              
1000             =head1 Installation
1001              
1002             Install L as you would any C module:
1003              
1004             Run:
1005              
1006             cpanm Text::Delimited::Marpa
1007              
1008             or run:
1009              
1010             sudo cpan Text::Delimited::Marpa
1011              
1012             or unpack the distro, and then either:
1013              
1014             perl Build.PL
1015             ./Build
1016             ./Build test
1017             sudo ./Build install
1018              
1019             or:
1020              
1021             perl Makefile.PL
1022             make (or dmake or nmake)
1023             make test
1024             make install
1025              
1026             =head1 Constructor and Initialization
1027              
1028             C is called as C<< my($parser) = Text::Delimited::Marpa -> new(k1 => v1, k2 => v2, ...) >>.
1029              
1030             It returns a new object of type C.
1031              
1032             Key-value pairs accepted in the parameter list (see corresponding methods for details
1033             [e.g. L]):
1034              
1035             =over 4
1036              
1037             =item o close => $string
1038              
1039             The closing delimiter.
1040              
1041             A value for this option is mandatory.
1042              
1043             Default: None.
1044              
1045             =item o length => $integer
1046              
1047             The maxiumum length of the input string to process.
1048              
1049             This parameter works in conjunction with the C parameter.
1050              
1051             C can also be used as a key in the hash passed to L.
1052              
1053             See the L for details.
1054              
1055             Default: Calls Perl's length() function on the input string.
1056              
1057             =item o next_few_limit => $integer
1058              
1059             This controls how many characters are printed when displaying 'the next few chars'.
1060              
1061             It only affects debug output.
1062              
1063             Default: 20.
1064              
1065             =item o open => $string
1066              
1067             The opening delimiter.
1068              
1069             See the L for details and warnings.
1070              
1071             A value for this option is mandatory.
1072              
1073             Default: None.
1074              
1075             =item o options => $bit_string
1076              
1077             This allows you to turn on various options.
1078              
1079             C can also be used as a key in the hash passed to L.
1080              
1081             Default: 0 (nothing is fatal).
1082              
1083             See the L for details.
1084              
1085             =item o pos => $integer
1086              
1087             The offset within the input string at which to start processing.
1088              
1089             This parameter works in conjunction with the C parameter.
1090              
1091             C can also be used as a key in the hash passed to L.
1092              
1093             See the L for details.
1094              
1095             Note: The first character in the input string is at pos == 0.
1096              
1097             Default: 0.
1098              
1099             =item o text => $stringref
1100              
1101             This is a reference to the string to be parsed. A stringref is used to avoid copying what could
1102             potentially be a very long string.
1103              
1104             C can also be used as a key in the hash passed to L.
1105              
1106             Default: \''.
1107              
1108             =back
1109              
1110             =head1 Methods
1111              
1112             =head2 bnf()
1113              
1114             Returns a string containing the grammar constructed based on user input.
1115              
1116             =head2 close()
1117              
1118             Get the closing delimiter.
1119              
1120             See also L.
1121              
1122             See the L for details and warnings.
1123              
1124             'close' is a parameter to L. See L for details.
1125              
1126             =head2 delimiter_action()
1127              
1128             Returns a hashref, where the keys are delimiters and the values are either 'open' or 'close'.
1129              
1130             =head2 error_message()
1131              
1132             Returns the last error or warning message set.
1133              
1134             Error messages always start with 'Error: '. Messages never end with "\n".
1135              
1136             Parsing error strings is not a good idea, ever though this module's format for them is fixed.
1137              
1138             See L.
1139              
1140             =head2 error_number()
1141              
1142             Returns the last error or warning number set.
1143              
1144             Warnings have values < 0, and errors have values > 0.
1145              
1146             If the value is > 0, the message has the prefix 'Error: ', and if the value is < 0, it has the
1147             prefix 'Warning: '. If this is not the case, it's a reportable bug.
1148              
1149             Possible values for error_number() and error_message():
1150              
1151             =over 4
1152              
1153             =item o 0 => ""
1154              
1155             This is the default value.
1156              
1157             =item o 1/-1 => "The # of open delimiters ($lexeme) does not match the # of close delimiters. Left over: $integer"
1158              
1159             If L returns 1, it's an error, and if it returns -1 it's a warning.
1160              
1161             You can set the option C to make it fatal.
1162              
1163             =item o 2/-2 => (Not used)
1164              
1165             =item o 3/-3 => "Ambiguous parse. Status: $status. Terminals expected: a, b, ..."
1166              
1167             This message is only produced when the parse is ambiguous.
1168              
1169             If L returns 3, it's an error, and if it returns -3 it's a warning.
1170              
1171             You can set the option C to make it fatal.
1172              
1173             =item o 4 => "Backslash is forbidden as a delimiter character"
1174              
1175             This preempts some types of sabotage.
1176              
1177             This message can never be just a warning message.
1178              
1179             =item o 5 => "Single-quotes are forbidden in multi-character delimiters"
1180              
1181             This limitation is due to the syntax of
1182             L.
1183              
1184             This message can never be just a warning message.
1185              
1186             =item o 6/-6 => "Parse exhausted"
1187              
1188             If L returns 6, it's an error, and if it returns -6 it's a warning.
1189              
1190             You can set the option C to make it fatal.
1191              
1192             =item o 7 => 'Single-quote is forbidden as an escape character'
1193              
1194             This limitation is due to the syntax of
1195             L.
1196              
1197             This message can never be just a warning message.
1198              
1199             =item o 8 => "There must be at least 1 pair of open/close delimiters"
1200              
1201             This message can never be just a warning message.
1202              
1203             =item o 10 => "Unexpected event name 'xyz'"
1204              
1205             Marpa has trigged an event and it's name is not in the hash of event names derived from the BNF.
1206              
1207             This message can never be just a warning message.
1208              
1209             =item o 11 => "The code does not handle these events simultaneously: a, b, ..."
1210              
1211             The code is written to handle single events at a time, or in rare cases, 2 events at the same time.
1212             But here, multiple events have been triggered and the code cannot handle the given combination.
1213              
1214             This message can never be just a warning message.
1215              
1216             =back
1217              
1218             See L.
1219              
1220             =head2 escape_char()
1221              
1222             Get the escape char.
1223              
1224             =head2 known_events()
1225              
1226             Returns a hashref where the keys are event names and the values are 1.
1227              
1228             =head2 length([$integer])
1229              
1230             Here, the [] indicate an optional parameter.
1231              
1232             Get or set the length of the input string to process.
1233              
1234             See also the L and L.
1235              
1236             'length' is a parameter to L. See L for details.
1237              
1238             =head2 matching_delimiter()
1239              
1240             Returns a hashref where the keys are opening delimiters and the values are the corresponding closing
1241             delimiters.
1242              
1243             =head2 new()
1244              
1245             See L for details on the parameters accepted by L.
1246              
1247             =head2 next_few_chars($stringref, $offset)
1248              
1249             Returns a substring of $s, starting at $offset, for use in debug messages.
1250              
1251             See L.
1252              
1253             =head2 next_few_limit([$integer])
1254              
1255             Here, the [] indicate an optional parameter.
1256              
1257             Get or set the number of characters called 'the next few chars', which are printed during debugging.
1258              
1259             'next_few_limit' is a parameter to L. See L for details.
1260              
1261             =head2 open()
1262              
1263             Get the opening delimiter.
1264              
1265             See also L.
1266              
1267             See the L for details and warnings.
1268              
1269             'open' is a parameter to L. See L for details.
1270              
1271             =head2 options([$bit_string])
1272              
1273             Here, the [] indicate an optional parameter.
1274              
1275             Get or set the option flags.
1276              
1277             For typical usage, see scripts/synopsis.pl.
1278              
1279             See the L for details.
1280              
1281             'options' is a parameter to L. See L for details.
1282              
1283             =head2 parse([%hash])
1284              
1285             Here, the [] indicate an optional parameter.
1286              
1287             This is the only method the user needs to call. All data can be supplied when calling L.
1288              
1289             You can of course call other methods (e.g. L ) after calling L but
1290             before calling C.
1291              
1292             The optional hash takes these ($key => $value) pairs (exactly the same as for L):
1293              
1294             =over 4
1295              
1296             =item o length => $integer
1297              
1298             =item o options => $bit_string
1299              
1300             =item o pos => $integer
1301              
1302             =item o text => $stringref
1303              
1304             =back
1305              
1306             Note: If a value is passed to C, it takes precedence over any value with the same
1307             key passed to L, and over any value previously passed to the method whose name is $key.
1308             Further, the value passed to C is always passed to the corresponding method (i.e. whose
1309             name is $key), meaning any subsequent call to that method returns the value passed to C.
1310              
1311             Returns 0 for success and 1 for failure.
1312              
1313             If the value is 1, you should call L to find out what happened.
1314              
1315             =head2 pos([$integer])
1316              
1317             Here, the [] indicate an optional parameter.
1318              
1319             Get or set the offset within the input string at which to start processing.
1320              
1321             See also the L and L.
1322              
1323             'pos' is a parameter to L. See L for details.
1324              
1325             =head2 text([$stringref])
1326              
1327             Here, the [] indicate an optional parameter.
1328              
1329             Get or set a reference to the string to be parsed.
1330              
1331             'text' is a parameter to L. See L for details.
1332              
1333             =head2 tree()
1334              
1335             Returns an object of type L, which holds the parsed data.
1336              
1337             Obviously, it only makes sense to call C after calling C.
1338              
1339             See scripts/traverse.pl for sample code which processes this tree's nodes.
1340              
1341             =head1 FAQ
1342              
1343             =head2 What are the differences between Text::Balanced::Marpa and Text::Delimited::Marpa?
1344              
1345             I think this is shown most clearly by getting the 2 modules to process the same string. So,
1346             using this as input:
1347              
1348             'a <:b <:c:> d:> e <:f <: g <:h:> i:> j:> k'
1349              
1350             Output from Text::Balanced::Marpa's scripts/tiny.pl:
1351              
1352             (# 2) | 1 2 3 4 5 6 7 8 9
1353             |0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
1354             Parsing |Skip me ->a <:b <:c:> d:> e <:f <: g <:h:> i:> j:> k|. pos: 10. length: 42
1355             Parse result: 0 (0 is success)
1356             root. Attributes: {text => "", uid => "0"}
1357             |--- text. Attributes: {text => "a ", uid => "1"}
1358             |--- open. Attributes: {text => "<:", uid => "2"}
1359             | |--- text. Attributes: {text => "b ", uid => "3"}
1360             | |--- open. Attributes: {text => "<:", uid => "4"}
1361             | | |--- text. Attributes: {text => "c", uid => "5"}
1362             | |--- close. Attributes: {text => ":>", uid => "6"}
1363             | |--- text. Attributes: {text => " d", uid => "7"}
1364             |--- close. Attributes: {text => ":>", uid => "8"}
1365             |--- text. Attributes: {text => " e ", uid => "9"}
1366             |--- open. Attributes: {text => "<:", uid => "10"}
1367             | |--- text. Attributes: {text => "f ", uid => "11"}
1368             | |--- open. Attributes: {text => "<:", uid => "12"}
1369             | | |--- text. Attributes: {text => " g ", uid => "13"}
1370             | | |--- open. Attributes: {text => "<:", uid => "14"}
1371             | | | |--- text. Attributes: {text => "h", uid => "15"}
1372             | | |--- close. Attributes: {text => ":>", uid => "16"}
1373             | | |--- text. Attributes: {text => " i", uid => "17"}
1374             | |--- close. Attributes: {text => ":>", uid => "18"}
1375             | |--- text. Attributes: {text => " j", uid => "19"}
1376             |--- close. Attributes: {text => ":>", uid => "20"}
1377             |--- text. Attributes: {text => " k", uid => "21"}
1378              
1379             Output from Text::Delimited::Marpa's scripts/tiny.pl:
1380              
1381             (# 2) | 1 2 3 4 5 6 7 8 9
1382             |0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
1383             Parsing |Skip me ->a <:b <:c:> d:> e <:f <: g <:h:> i:> j:> k|. pos: 10. length: 42
1384             Parse result: 0 (0 is success)
1385             root. Attributes: {end => "0", length => "0", start => "0", text => "", uid => "0"}
1386             |--- span. Attributes: {end => "22", length => "9", start => "14", text => "b <:c:> d", uid => "1"}
1387             | |--- span. Attributes: {end => "18", length => "1", start => "18", text => "c", uid => "2"}
1388             |--- span. Attributes: {end => "47", length => "18", start => "30", text => "f <: g <:h:> i:> j", uid => "3"}
1389             |--- span. Attributes: {end => "43", length => "10", start => "34", text => " g <:h:> i", uid => "4"}
1390             |--- span. Attributes: {end => "39", length => "1", start => "39", text => "h", uid => "5"}
1391              
1392             Another example, using the same input string, but manually processing the tree nodes.
1393             Parent-daughter relationships are here represented by indentation.
1394              
1395             Output from Text::Balanced::Marpa's scripts/traverse.pl:
1396              
1397             | 1 2 3 4 5
1398             |012345678901234567890123456789012345678901234567890
1399             Parsing |a <:b <:c:> d:> e <:f <: g <:h:> i:> j:> k|.
1400             Span Text
1401             1 |a |
1402             2 |<:|
1403             3 |b |
1404             4 |<:|
1405             5 |c|
1406             6 |:>|
1407             7 | d|
1408             8 |:>|
1409             9 | e |
1410             10 |<:|
1411             11 |f |
1412             12 |<:|
1413             13 | g |
1414             14 |<:|
1415             15 |h|
1416             16 |:>|
1417             17 | i|
1418             18 |:>|
1419             19 | j|
1420             20 |:>|
1421             21 | k|
1422              
1423             Output from Text::Delimited::Marpa's scripts/traverse.pl:
1424              
1425             | 1 2 3 4 5
1426             |012345678901234567890123456789012345678901234567890
1427             Parsing |a <:b <:c:> d:> e <:f <: g <:h:> i:> j:> k|.
1428             Span Start End Length Text
1429             1 4 12 9 |b <:c:> d|
1430             2 8 8 1 |c|
1431             3 20 37 18 |f <: g <:h:> i:> j|
1432             4 24 33 10 | g <:h:> i|
1433             5 29 29 1 |h|
1434              
1435             =head2 How do I ignore embedded strings which have the same delimiters as their containing strings?
1436              
1437             You can ignore the nested, delimited, strings by just processing the daughters of the tree's root
1438             node.
1439              
1440             =head2 Where are the error messages and numbers described?
1441              
1442             See L and L.
1443              
1444             =head2 How do I escape delimiters?
1445              
1446             By backslash-escaping the first character of all open and close delimiters which appear in the
1447             text.
1448              
1449             As an example, if the delimiters are '<:' and ':>', this means you have to escape I the '<'
1450             chars and I the colons in the text.
1451              
1452             The backslash is preserved in the output.
1453              
1454             If you don't want to use backslash for escaping, or can't, you can pass a different escape character
1455             to L.
1456              
1457             See t/escapes.t.
1458              
1459             =head2 How do the length and pos parameters to new() work?
1460              
1461             The recognizer - an object of type Marpa::R2::Scanless::R - is called in a loop, like this:
1462              
1463             for
1464             (
1465             $pos = $self -> recce -> read($stringref, $pos, $length);
1466             $pos < $length;
1467             $pos = $self -> recce -> resume($pos)
1468             )
1469              
1470             L and L can be used to initialize $pos and $length.
1471              
1472             Note: The first character in the input string is at pos == 0.
1473              
1474             See L for details.
1475              
1476             =head2 Does this package support Unicode/UTF8?
1477              
1478             Yes. See t/escapes.t and t/utf8.t.
1479              
1480             =head2 Does this package handler Perl delimiters (e.g. q|..|, qq|..|, qr/../, qw/../)?
1481              
1482             See t/perl.delimiters.t.
1483              
1484             =head2 Warning: Calling mutators after calling new()
1485              
1486             The only mutator which works after calling new() is L.
1487              
1488             In particular, you can't call L, L or L after calling L.
1489             This is because parameters passed to C are interpolated into the grammar before parsing
1490             begins. And that's why the docs for those methods all say 'Get the...' and not 'Get and set the...'.
1491              
1492             To make the code work, you would have to manually call _validate_open_close(). But even then
1493             a lot of things would have to be re-initialized to give the code any hope of working.
1494              
1495             =head2 What is the format of the 'open' and 'close' parameters to new()?
1496              
1497             Each of these parameters takes a string as a value, and these are used as the opening and closing
1498             delimiter pair.
1499              
1500             See scripts/synopsis.pl and scripts/tiny.pl.
1501              
1502             =head2 What are the possible values for the 'options' parameter to new()?
1503              
1504             Firstly, to make these constants available, you must say:
1505              
1506             use Text::Delimited::Marpa ':constants';
1507              
1508             Secondly, more detail on errors and warnings can be found at L.
1509              
1510             Thirdly, for usage of these option flags, see t/angle.brackets.t, t/colons.t, t/escapes.t,
1511             t/percents.t and scripts/tiny.pl.
1512              
1513             Now the flags themselves:
1514              
1515             =over 4
1516              
1517             =item o nothing_is_fatal
1518              
1519             This is the default.
1520              
1521             C has the value of 0.
1522              
1523             =item o print_errors
1524              
1525             Print errors if this flag is set.
1526              
1527             C has the value of 1.
1528              
1529             =item o print_warnings
1530              
1531             Print various warnings if this flag is set:
1532              
1533             =over 4
1534              
1535             =item o The ambiguity status and terminals expected, if the parse is ambiguous
1536              
1537             =item o See L for other warnings which might be printed
1538              
1539             Ambiguity is not, in and of itself, an error. But see the C option, below.
1540              
1541             =back
1542              
1543             It's tempting to call this option C, but Perl already has C, so I didn't.
1544              
1545             C has the value of 2.
1546              
1547             =item o print_debugs
1548              
1549             Print extra stuff if this flag is set.
1550              
1551             C has the value of 4.
1552              
1553             =item o mismatch_is_fatal
1554              
1555             This means a fatal error occurs when the number of open delimiters does not match the number of
1556             close delimiters.
1557              
1558             C has the value of 8.
1559              
1560             =item o ambiguity_is_fatal
1561              
1562             This makes L return 3 rather than -3.
1563              
1564             C has the value of 16.
1565              
1566             =item o exhaustion_is_fatal
1567              
1568             This makes L return 6 rather than -6.
1569              
1570             C has the value of 32.
1571              
1572             =back
1573              
1574             =head2 How do I print the tree built by the parser?
1575              
1576             See L.
1577              
1578             =head2 How do I make use of the tree built by the parser?
1579              
1580             See scripts/traverse.pl.
1581              
1582             =head2 How is the parsed data held in RAM?
1583              
1584             The parsed output is held in a tree managed by L.
1585              
1586             The tree always has a root node, which has nothing to do with the input data. So, even an empty
1587             imput string will produce a tree with 1 node. This root has an empty hashref associated with it.
1588              
1589             Nodes have a name and a hashref of attributes.
1590              
1591             The name indicates the type of node. Names are one of these literals:
1592              
1593             =over 4
1594              
1595             =item o root
1596              
1597             =item o span
1598              
1599             =back
1600              
1601             The (key => value) pairs in the hashref are:
1602              
1603             =over 4
1604              
1605             =item o end => $integer
1606              
1607             The offset into the original stringref at which the current span of text ends.
1608              
1609             =item o length => $integer
1610              
1611             The number of characters in the current span.
1612              
1613             =item o start => $integer
1614              
1615             The offset into the original stringref at which the current span of text starts.
1616              
1617             =item o text => $string
1618              
1619             If the node name is 'text', $string is the verbatim text from the document.
1620              
1621             Verbatim means, for example, that backslashes in the input are preserved.
1622              
1623             =back
1624              
1625             =head2 What is the homepage of Marpa?
1626              
1627             L.
1628              
1629             That page has a long list of links.
1630              
1631             =head2 How do I run author tests?
1632              
1633             This runs both standard and author tests:
1634              
1635             shell> perl Build.PL; ./Build; ./Build authortest
1636              
1637             =head1 TODO
1638              
1639             =over 4
1640              
1641             =item o Advanced error reporting
1642              
1643             See L.
1644              
1645             Perhaps this could be a sub-class?
1646              
1647             =item o I8N support for error messages
1648              
1649             =item o An explicit test program for parse exhaustion
1650              
1651             =back
1652              
1653             =head1 See Also
1654              
1655             L.
1656              
1657             L and L.
1658              
1659             =head1 Machine-Readable Change Log
1660              
1661             The file Changes was converted into Changelog.ini by L.
1662              
1663             =head1 Version Numbers
1664              
1665             Version numbers < 1.00 represent development versions. From 1.00 up, they are production versions.
1666              
1667             =head1 Thanks
1668              
1669             Thanks to Jeffrey Kegler, who wrote Marpa and L.
1670              
1671             And thanks to rns (Ruslan Shvedov) for writing the grammar for double-quoted strings used in
1672             L's scripts/quoted.strings.02.pl. I adapted it to HTML (see
1673             scripts/quoted.strings.05.pl in that module), and then incorporated the grammar into
1674             L, and - after more extensions - into this module.
1675              
1676             Lastly, thanks to Robert Rothenberg for L, a module which works the same way
1677             Perl does.
1678              
1679             =head1 Repository
1680              
1681             L
1682              
1683             =head1 Support
1684              
1685             Email the author, or log a bug on RT:
1686              
1687             L.
1688              
1689             =head1 Author
1690              
1691             L was written by Ron Savage Iron@savage.net.auE> in 2014.
1692              
1693             Marpa's homepage: L.
1694              
1695             My homepage: L.
1696              
1697             =head1 Copyright
1698              
1699             Australian copyright (c) 2015, Ron Savage.
1700              
1701             All Programs of mine are 'OSI Certified Open Source Software';
1702             you can redistribute them and/or modify them under the terms of
1703             The Artistic License 2.0, a copy of which is available at:
1704             http://opensource.org/licenses/alphabetical.
1705              
1706             =cut