File Coverage

blib/lib/Regexp/Parsertron.pm
Criterion Covered Total %
statement 214 245 87.3
branch 74 102 72.5
condition 7 12 58.3
subroutine 27 33 81.8
pod 11 13 84.6
total 333 405 82.2


line stmt bran cond sub pod time code
1             package Regexp::Parsertron;
2              
3 6     6   47496 use strict;
  6         45  
  6         221  
4 6     6   29 use warnings;
  6         11  
  6         164  
5             #use warnings qw(FATAL utf8); # Fatalize encoding glitches.
6              
7 6     6   2581 use Data::Section::Simple 'get_data_section';
  6         3830  
  6         358  
8              
9 6     6   2513 use Marpa::R2;
  6         877369  
  6         305  
10              
11 6     6   3178 use Moo;
  6         74409  
  6         34  
12              
13 6     6   11225 use Scalar::Does '-constants'; # For does().
  6         676124  
  6         94  
14              
15 6     6   18731 use Tree;
  6         35051  
  6         210  
16              
17 6     6   45 use Try::Tiny;
  6         15  
  6         362  
18              
19 6     6   41 use Types::Standard qw/Any Int Str/;
  6         18  
  6         49  
20              
21             has ambiguous =>
22             (
23             default => sub{return 0},
24             is => 'rw',
25             isa => Int,
26             required => 0,
27             );
28              
29             has bnf =>
30             (
31             default => sub{return ''},
32             is => 'rw',
33             isa => Any,
34             required => 0,
35             );
36              
37             has current_node =>
38             (
39             default => sub{return ''},
40             is => 'rw',
41             isa => Any,
42             required => 0,
43             );
44              
45             has grammar =>
46             (
47             default => sub {return ''},
48             is => 'rw',
49             isa => Any,
50             required => 0,
51             );
52              
53             has re =>
54             (
55             default => sub {return ''},
56             is => 'rw',
57             isa => Any,
58             required => 0,
59             );
60              
61             has recce =>
62             (
63             default => sub{return ''},
64             is => 'rw',
65             isa => Any,
66             required => 0,
67             );
68              
69             has test_count =>
70             (
71             default => sub{return 0},
72             is => 'rw',
73             isa => Int,
74             required => 0,
75             );
76              
77             has tree =>
78             (
79             default => sub{return Tree -> new('Root')},
80             is => 'rw',
81             isa => Any,
82             required => 0,
83             );
84              
85             has uid =>
86             (
87             default => sub {return 0},
88             is => 'rw',
89             isa => Int,
90             required => 0,
91             );
92              
93             has verbose =>
94             (
95             default => sub {return 0},
96             is => 'rw',
97             isa => Int,
98             required => 0,
99             );
100              
101             has warning_str =>
102             (
103             default => sub {return ''},
104             is => 'rw',
105             isa => Str,
106             required => 0,
107             );
108              
109             our $VERSION = '1.05';
110              
111             # ------------------------------------------------
112              
113             sub BUILD
114             {
115 5     5 0 132 my($self) = @_;
116 5         33 my($bnf) = get_data_section('V 5.20');
117              
118 5         7632 $self -> bnf($bnf);
119 5         259 $self -> grammar
120             (
121             Marpa::R2::Scanless::G -> new
122             ({
123             source => \$self -> bnf
124             })
125             );
126 5         2480815 $self -> reset;
127              
128             } # End of BUILD.
129              
130             # ------------------------------------------------
131              
132             sub append
133             {
134 2     2 1 27 my($self, %opts) = @_;
135              
136 2         9 for my $param (qw/text uid/)
137             {
138             # The \n stops Perl printing the line number.
139              
140 4 50       20 die "Method append() takes a hash with these keys: text, uid\n" if (! defined($opts{$param}) );
141             }
142              
143 2         8 my($meta);
144             my($uid);
145              
146 2         48 for my $node ($self -> tree -> traverse)
147             {
148 14 100       301 next if ($node -> is_root);
149              
150 12         97 $meta = $node -> meta;
151 12         114 $uid = $$meta{uid};
152              
153 12 100       33 if ($opts{uid} == $uid)
154             {
155 2         11 $$meta{text} .= $opts{text};
156             }
157             }
158              
159             } # End of append.
160              
161             # ------------------------------------------------
162              
163             sub _add_daughter
164             {
165 14841     14841   33377 my($self, $event_name, $attributes) = @_;
166 14841         277597 $$attributes{uid} = $self -> uid($self -> uid + 1);
167 14841         450403 my($node) = Tree -> new($event_name);
168              
169 14841         622484 $node -> meta($attributes);
170              
171 14841 100       209151 if ($event_name =~ /^close_(?:bracket|parenthesis)$/)
172             {
173 3089         59098 $self -> current_node($self -> current_node -> parent);
174             }
175              
176 14841         396737 $self -> current_node -> add_child($node);
177              
178 14841 100 100     4015796 if ( ($event_name =~ /^open_(?:bracket|parenthesis)$/) || ($event_name =~ /_prefix$/) )
179             {
180 3308         67147 $self -> current_node($node);
181             }
182              
183             } # End of _add_daughter.
184              
185             # ------------------------------------------------
186              
187             sub as_string
188             {
189 872     872 1 9051 my($self) = @_;
190 872         2492 my($string) = '';
191              
192 872         1719 my($meta);
193              
194 872         18609 for my $node ($self -> tree -> traverse)
195             {
196 8382 100       150286 next if ($node -> is_root);
197              
198 7510         54686 $meta = $node -> meta;
199 7510         49296 $string .= $$meta{text};
200             }
201              
202 872         3621 return $string;
203              
204             } # End of as_string.
205              
206             # ------------------------------------------------
207              
208             sub find
209             {
210 2     2 1 314 my($self, $target) = @_;
211              
212             # The \n stops Perl printing the line number.
213              
214 2 50       11 die "Method find() takes a defined value as the parameter\n" if (! defined $target);
215              
216 2         6 my(@found);
217             my($meta);
218              
219 2         48 for my $node ($self -> tree -> traverse)
220             {
221 26 100       407 next if ($node -> is_root);
222              
223 24         179 $meta = $node -> meta;
224              
225 24 100       177 if (index($$meta{text}, $target) >= 0)
226             {
227 3         9 push @found, $$meta{uid};
228             }
229             }
230              
231 2         11 return [@found];
232              
233             } # End of find.
234              
235             # ------------------------------------------------
236              
237             sub get
238             {
239 7     7 1 1635 my($self, $wanted_uid) = @_;
240 7         156 my($max_uid) = $self -> uid;
241              
242 7 50 33     173 if (! defined($wanted_uid) || ($wanted_uid < 1) || ($wanted_uid > $self -> uid) )
      33        
243             {
244             # The \n stops Perl printing the line number.
245              
246 0         0 die "Method get() takes a uid parameter in the range 1 .. $max_uid\n";
247             }
248              
249 7         86 my($meta);
250             my($text);
251 7         0 my($uid);
252              
253 7         119 for my $node ($self -> tree -> traverse)
254             {
255 49 100       811 next if ($node -> is_root);
256              
257 42         293 $meta = $node -> meta;
258 42         238 $uid = $$meta{uid};
259              
260 42 100       80 if ($wanted_uid == $uid)
261             {
262 7         14 $text = $$meta{text};
263             }
264             }
265              
266 7         21 return $text;
267              
268             } # End of get.
269              
270             # ------------------------------------------------
271              
272             sub _next_few_chars
273             {
274 14833     14833   36521 my($self, $stringref, $offset) = @_;
275 14833         33415 my($s) = substr($$stringref, $offset, 20);
276 14833         29549 $s =~ tr/\n/ /;
277 14833         34631 $s =~ s/^\s+//;
278 14833         28397 $s =~ s/\s+$//;
279              
280 14833         32797 return $s;
281              
282             } # End of _next_few_chars.
283              
284             # ------------------------------------------------
285              
286             sub parse
287             {
288 1525     1525 1 3833082 my($self, %opts) = @_;
289              
290             # Emulate parts of new(), which makes things a bit earier for the caller.
291              
292 1525 50       47139 $self -> re($opts{re}) if (defined $opts{re});
293 1525 50       55208 $self -> verbose($opts{verbose}) if (defined $opts{verbose});
294 1525         29324 $self -> warning_str('');
295              
296 1525         67023 $self -> recce
297             (
298             Marpa::R2::Scanless::R -> new
299             ({
300             exhaustion => 'event',
301             grammar => $self -> grammar,
302             })
303             );
304              
305             # Return 0 for success and 1 for failure.
306              
307 1525         699690 my($result) = 0;
308              
309 1525         3483 my($message);
310              
311             try
312             {
313 1525 100   1525   65288 if (defined (my $value = $self -> _process) )
314             {
315 874 50       23915125 $self -> print_cooked_tree if ($self -> verbose > 1);
316             }
317             else
318             {
319 551         6079 $result = 1;
320              
321 551 50       8794 if ($self -> ambiguous)
322             {
323 551         7836 die "\n";
324             }
325             else
326             {
327             # The \n stops Perl printing the line number.
328              
329 0         0 die "Error: Marpa parse failed.\n";
330             }
331             }
332             }
333             catch
334             {
335             # The \n stops Perl printing the line number.
336              
337 651     651   82996 die "$_\n";
338 1525         14135 };
339              
340             # Return 0 for success and 1 for failure.
341              
342 874         38320 return $result;
343              
344             } # End of parse.
345              
346             # ------------------------------------------------
347              
348             sub prepend
349             {
350 2     2 1 617 my($self, %opts) = @_;
351              
352 2         7 for my $param (qw/text uid/)
353             {
354             # The \n stops Perl printing the line number.
355              
356 4 50       14 die "Method append() takes a hash with these keys: text, uid\n" if (! defined($opts{$param}) );
357             }
358              
359 2         4 my($meta);
360             my($uid);
361              
362 2         49 for my $node ($self -> tree -> traverse)
363             {
364 14 100       242 next if ($node -> is_root);
365              
366 12         91 $meta = $node -> meta;
367 12         72 $uid = $$meta{uid};
368              
369 12 100       46 if ($opts{uid} == $uid)
370             {
371 2         8 $$meta{text} = "$opts{text}$$meta{text}";
372             }
373             }
374              
375             } # End of prepend.
376              
377             # ------------------------------------------------
378              
379             sub _process
380             {
381 1525     1525   3968 my($self) = @_;
382 1525         35194 my($raw_re) = $self -> re;
383 1525         38726 my($test_count) = $self -> test_count($self -> test_count + 1);
384              
385             # This line is 'print', not 'say'!
386              
387 1525 50       80523 print "Test count: $test_count. Parsing (in qr/.../ form): " if ($self -> verbose);
388              
389 1525         14918 my($string_re) = $self -> _string2re($raw_re);
390              
391 1525 50       6541 if ($string_re eq '')
392             {
393 0 0       0 print "\n" if ($self -> verbose);
394              
395 0         0 return undef;
396             }
397              
398 1525 50       31900 print "'$string_re'. \n" if ($self -> verbose);
399              
400 1525 50       41577 if ($self -> verbose > 1)
401             {
402 0         0 my($format) = "%-10s %-5s %-20s %-6s %-30s %s \n";
403              
404 0         0 print sprintf($format, ' Location', 'Width', 'Lexeme', 'Events', 'Names', 'Next few chars');
405              
406             }
407              
408 1525         12700 my($ref_re) = \"$string_re"; # Use " in comment for UltraEdit.
409 1525         3995 my($length) = length($string_re);
410              
411 1525         7572 my($child);
412             my($event_name);
413 1525         0 my($lexeme);
414 1525         0 my($pos);
415 1525         0 my($span, $start);
416              
417             # We use read()/lexeme_read()/resume() because we pause at each lexeme.
418              
419 1525         34569 for
420             (
421             $pos = $self -> recce -> read($ref_re);
422             ($pos < $length);
423             $pos = $self -> recce -> resume($pos)
424             )
425             {
426 14843         1972274 ($start, $span) = $self -> recce -> pause_span;
427 14843         194072 ($event_name, $span, $pos) = $self -> _validate_event($ref_re, $start, $span, $pos,);
428              
429             # If the input is exhausted, we exit immediately so we don't try to use
430             # the values of $start, $span or $pos. They are ignored upon exit.
431              
432 14843 100       33447 last if ($event_name eq "'exhausted"); # Yes, it has a leading quote.
433              
434 14833         230965 $lexeme = $self -> recce -> literal($start, $span);
435 14833         381233 $pos = $self -> recce -> lexeme_read($event_name);
436              
437             # The \n stops Perl printing the line number.
438              
439 14833 50       926461 die "Marpa lexeme_read($event_name) rejected lexeme '$lexeme'\n" if (! defined $pos);
440              
441 14833         55741 $self -> _add_daughter($event_name, {text => $lexeme});
442             }
443              
444 1425         43409 my($message);
445              
446 1425 100       24606 if (my $status = $self -> recce -> ambiguous)
    100          
447             {
448 551         12543286 $self -> ambiguous(1);
449              
450 551         30192 my($terminals) = $self -> recce -> terminals_expected;
451 551 100       22669 $terminals = ['(None)'] if ($#$terminals < 0); # Next line deliberately omits '.' after $status, so output lines up.
452 551         2817 $message = "Marpa error. Parse ambiguous. Status: ${status}Terminals expected: " . join(', ', @$terminals);
453              
454 551         75883 print "$message\n";
455              
456 551         3647 $message = ''; # To stop it being stored just below, and to stop it being printed again.
457             }
458             elsif ($self -> recce -> exhausted)
459             {
460             # Special case. Sigh. I need to patch the BNF to do this. TODO.
461              
462 860 100 66     119911 if ( ($pos + 1 == $length) && (substr($string_re, $pos, 1) eq ')') )
463             {
464 8         38 $self -> _add_daughter('close_parenthesis', {text => ')'});
465             }
466              
467             # See https://metacpan.org/pod/distribution/Marpa-R2/pod/Exhaustion.pod#Exhaustion
468             # for why this code is exhaustion-loving. This is not an error. See docs for details.
469              
470 860 50       15375 $message = 'Marpa parse exhausted' if ($self -> verbose > 1);
471             }
472              
473 1425 50       12953 if ($message)
474             {
475 0         0 $self -> warning_str($message);
476              
477 0 0       0 print "$message\n" if ($self -> verbose);
478             }
479              
480 1425 50       28221 $self -> print_raw_tree if ($self -> verbose);
481              
482             # Return a defined value for success and undef for failure.
483             # Note: value() can return undef.
484              
485 1425 100       32635 return $self -> ambiguous ? undef : $self -> recce -> value;
486              
487             } # End of _process.
488              
489             # ------------------------------------------------
490              
491             sub print_cooked_tree
492             {
493 0     0 1 0 my($self) = @_;
494 0         0 my($format) = "%-30s %3s %s \n";
495              
496 0         0 print sprintf($format, 'Name', 'Uid', 'Text');
497 0         0 print sprintf($format, '----', '---', '----');
498              
499 0         0 my($meta);
500              
501 0         0 for my $node ($self -> tree -> traverse)
502             {
503 0 0       0 next if ($node -> is_root);
504              
505 0         0 $meta = $node -> meta;
506              
507 0         0 print sprintf($format, $node -> value, $$meta{uid}, $$meta{text});
508             }
509              
510             } # End of print_cooked_tree.
511              
512             # ------------------------------------------------
513              
514             sub print_raw_tree
515             {
516 0     0 1 0 my($self) = @_;
517              
518 0         0 print map("$_\n", @{$self -> tree -> tree2string});
  0         0  
519              
520             } # End of print_raw_tree.
521              
522             # ------------------------------------------------
523              
524             sub reset
525             {
526 1528     1528 1 861591 my($self) = @_;
527              
528 1528         36882 $self -> ambiguous(0);
529 1528         52485 $self -> tree(Tree -> new('Root') );
530 1528         170980 $self -> tree -> meta({text => 'Root', uid => 0});
531 1528         56272 $self -> current_node($self -> tree);
532 1528         143846 $self -> uid(0);
533 1528         69495 $self -> warning_str('');
534              
535             } # End of reset.
536              
537             # ------------------------------------------------
538              
539             sub search
540             {
541 2     2 1 62 my($self, $target) = @_;
542              
543             # The \n stops Perl printing the line number.
544              
545 2 50       10 die "Method search() takes a defined value as the parameter\n" if (! defined $target);
546              
547 2         9 my($re) = $self -> _string2re($target);
548              
549 2         6 my(@found);
550             my($meta);
551              
552 2         49 for my $node ($self -> tree -> traverse)
553             {
554 38 100       628 next if ($node -> is_root);
555              
556 36         264 $meta = $node -> meta;
557              
558 36 100       296 if ($$meta{text} =~ $re)
559             {
560 4         12 push @found, $$meta{uid};
561             }
562             }
563              
564 2         10 return [@found];
565              
566             } # End of search.
567              
568             # ------------------------------------------------
569              
570             sub set
571             {
572 1     1 1 312 my($self, %opts) = @_;
573              
574 1         4 for my $param (qw/text uid/)
575             {
576             # The \n stops Perl printing the line number.
577              
578 2 50       9 die "Method set() takes a hash with these keys: text, uid\n" if (! defined($opts{$param}) );
579             }
580              
581 1         3 my($meta);
582             my($uid);
583              
584 1         26 for my $node ($self -> tree -> traverse)
585             {
586 7 100       120 next if ($node -> is_root);
587              
588 6         44 $meta = $node -> meta;
589 6         37 $uid = $$meta{uid};
590              
591 6 100       16 if ($opts{uid} == $uid)
592             {
593 1         3 $$meta{text} = $opts{text};
594             }
595             }
596              
597             } # End of set.
598              
599             # ------------------------------------------------
600              
601             sub _string2re
602             {
603 1527     1527   5365 my($self, $raw_re) = @_;
604              
605 1527         2859 my($re);
606              
607             try
608             {
609 1527 100   1527   62403 $re = does($raw_re, 'Regexp') ? $raw_re : qr/$raw_re/;
610             }
611             catch
612             {
613             # The \n stops Perl printing the line number with 'die'.
614              
615 0     0   0 die "Error: Perl cannot convert $raw_re into qr/.../ form\n";
616 1527         11843 };
617              
618 1527         78146 return $re;
619              
620             } # End of _string2re.
621              
622             # ------------------------------------------------
623              
624             sub validate
625             {
626 0     0 0 0 my($self) = @_;
627 0         0 my($re) = $self -> as_string;
628              
629 0         0 my($result);
630              
631             try
632             {
633 0 0   0   0 $result = ('x' =~ $re) ? 0 : 0; # Use any test to force Perl to process the Regexp.
634             }
635             catch
636             {
637 0     0   0 $result = 1; # Failure.
638 0         0 };
639              
640             # Return 0 for success and 1 for failure.
641              
642 0         0 return $result;
643              
644             } # End of validate.
645              
646             # ------------------------------------------------
647              
648             sub _validate_event
649             {
650 14843     14843   29732 my($self, $stringref, $start, $span, $pos) = @_;
651 14843         20062 my(@event) = @{$self -> recce -> events};
  14843         237974  
652 14843         153582 my($event_count) = scalar @event;
653 14843         28809 my(@event_names) = sort map{$$_[0]} @event;
  15027         50907  
654 14843         27200 my($event_name) = $event_names[0]; # Default.
655              
656             # Handle some special cases.
657              
658 14843 100       37300 if ($event_count > 1)
659             {
660 184         580 my($event_list) = join(', ', @event_names);
661              
662 184 100       855 if ($event_list eq 'caret, string')
    100          
    50          
663             {
664 121         251 $event_count = 1;
665 121         212 $event_name = 'caret';
666 121         400 @event_names = $event_name;
667 121         247 $pos = $start;
668 121         255 $span = 1;
669             }
670             elsif ($event_list eq 'query, string')
671             {
672 23         44 $event_count = 1;
673 23         53 $event_name = 'query';
674 23         59 @event_names = $event_name;
675 23         46 $pos = $start;
676 23         39 $span = 1;
677             }
678             elsif ($event_list eq 'string, vertical_bar')
679             {
680 40         84 $event_count = 1;
681 40         78 $event_name = 'vertical_bar';
682 40         95 @event_names = $event_name;
683 40         98 $pos = $start;
684 40         74 $span = 1;
685             }
686             else
687             {
688             #$self -> print_cooked_tree;
689              
690             # The \n stops Perl printing the line number.
691              
692 0         0 die "event_count: $event_count. " . $event_list . "\n";
693             }
694             }
695              
696             # If the input is exhausted, we return immediately so we don't try to use
697             # the values of $start, $span or $pos. They are ignored upon return.
698              
699 14843 100       34049 if ($event_name eq "'exhausted") # Yes, it has a leading quote.
700             {
701 10         47 return ($event_name, $span, $pos);
702             }
703              
704 14833         36842 my($lexeme) = substr($$stringref, $start, $span);
705 14833         242367 my($line, $column) = $self -> recce -> line_column($start);
706 14833         207548 my($literal) = $self -> _next_few_chars($stringref, $start + $span);
707 14833         54294 my($message) = "Location: ($line, $column). Lexeme: $lexeme. Events: $event_count. Names: ";
708 14833         31289 my($name_list) = join(', ', @event_names);
709 14833         26408 $message .= ". Next few chars: $literal";
710              
711 14833 50       253529 if ($self -> verbose > 1)
712             {
713 0         0 my($format) = "%4d, %4d %5d %-20s %6d %-30s %s \n";
714              
715 0         0 print sprintf($format, $line, $column, length($lexeme), $lexeme, $event_count, $name_list, $literal);
716              
717             }
718              
719 14833         118753 return ($event_name, $span, $pos);
720              
721             } # End of _validate_event.
722              
723             # ------------------------------------------------
724              
725             1;
726              
727             =pod
728              
729             =head1 NAME
730              
731             C - Parse a Perl regexp into a data structure of type L
732              
733             Warning: Development version. See L for details.
734              
735             =head1 Synopsis
736              
737             =head2 Sample Code
738              
739             This is scripts/synopsis.pl:
740              
741             #!/usr/bin/env perl
742              
743             use v5.10;
744             use strict;
745             use warnings;
746              
747             use Regexp::Parsertron;
748              
749             # ---------------------
750              
751             my($re) = qr/Perl|JavaScript/i;
752             my($parser) = Regexp::Parsertron -> new(verbose => 1);
753              
754             # Return 0 for success and 1 for failure.
755              
756             my($result) = $parser -> parse(re => $re);
757             my($node_id) = 5; # Obtained from displaying and inspecting the tree.
758              
759             print "Calling append(text => '|C++', uid => $node_id) \n";
760              
761             $parser -> append(text => '|C++', uid => $node_id);
762             $parser -> print_raw_tree;
763             $parser -> print_cooked_tree;
764              
765             my($as_string) = $parser -> as_string;
766              
767             print "Original: $re. Result: $result (0 is success) \n";
768             print "as_string(): $as_string \n";
769              
770             $result = $parser -> validate;
771              
772             print "validate(): Result: $result (0 is success) \n";
773              
774             # Return 0 for success and 1 for failure.
775              
776             $parser -> reset;
777             $parser -> verbose(0);
778              
779             $re = qr/Perl|JavaScript|(?:Flub|BCPL)/i;
780             $result = $parser -> parse(re => $re);
781              
782             print "\nAdd complexity to the regexp by parsing a new regexp \n";
783              
784             $parser -> print_raw_tree;
785              
786             And its output:
787              
788             Test count: 1. Parsing (in qr/.../ form): '(?^i:Perl|JavaScript)'.
789             Root. Attributes: {text => "Root", uid => "0"}
790             |--- open_parenthesis. Attributes: {text => "(", uid => "1"}
791             | |--- query_caret. Attributes: {text => "?^", uid => "2"}
792             | |--- flag_set. Attributes: {text => "i", uid => "3"}
793             | |--- colon. Attributes: {text => ":", uid => "4"}
794             | |--- string. Attributes: {text => "Perl|JavaScript", uid => "5"}
795             |--- close_parenthesis. Attributes: {text => ")", uid => "6"}
796              
797             Calling append(text => '|C++', uid => 5)
798             Root. Attributes: {text => "Root", uid => "0"}
799             |--- open_parenthesis. Attributes: {text => "(", uid => "1"}
800             | |--- query_caret. Attributes: {text => "?^", uid => "2"}
801             | |--- flag_set. Attributes: {text => "i", uid => "3"}
802             | |--- colon. Attributes: {text => ":", uid => "4"}
803             | |--- string. Attributes: {text => "Perl|JavaScript|C++", uid => "5"}
804             |--- close_parenthesis. Attributes: {text => ")", uid => "6"}
805              
806             Name Uid Text
807             ---- --- ----
808             open_parenthesis 1 (
809             query_caret 2 ?^
810             flag_set 3 i
811             colon 4 :
812             string 5 Perl|JavaScript|C++
813             close_parenthesis 6 )
814             Original: (?^i:Perl|JavaScript). Result: 0 (0 is success)
815             as_string(): (?^i:Perl|JavaScript|C++)
816             validate(): Result: 0 (0 is success)
817              
818             Adding complexity to the regexp by parsing a new regexp:
819             Root. Attributes: {text => "Root", uid => "0"}
820             |--- open_parenthesis. Attributes: {text => "(", uid => "1"}
821             | |--- query_caret. Attributes: {text => "?^", uid => "2"}
822             | |--- flag_set. Attributes: {text => "i", uid => "3"}
823             | |--- colon. Attributes: {text => ":", uid => "4"}
824             | |--- string. Attributes: {text => "Perl|JavaScript|", uid => "5"}
825             | |--- colon_prefix. Attributes: {text => "(?:", uid => "6"}
826             | | |--- string. Attributes: {text => "Flub|BCPL", uid => "7"}
827             | |--- close_parenthesis. Attributes: {text => ")", uid => "8"}
828             |--- close_parenthesis. Attributes: {text => ")", uid => "9"}
829              
830              
831             Note: The 1st tree is printed due to verbose => 1 in the call to L, while the 2nd
832             is due to the call to L. The columnar output is due to the call to
833             L.
834              
835             =head2 Tutorial
836              
837             =over 4
838              
839             =item o Start with a simple program and a simple regexp
840              
841             This code, scripts/tutorial.pl, is a cut-down version of scripts/synopsis.pl:
842              
843             #!/usr/bin/env perl
844              
845             use v5.10;
846             use strict;
847             use warnings;
848              
849             use Regexp::Parsertron;
850              
851             # ---------------------
852              
853             my($re) = qr/Perl|JavaScript/i;
854             my($parser) = Regexp::Parsertron -> new(verbose => 1);
855              
856             # Return 0 for success and 1 for failure.
857              
858             my($result) = $parser -> parse(re => $re);
859              
860             print "Original: $re. Result: $result. (0 is success) \n";
861              
862             Running it outputs:
863              
864             Test count: 1. Parsing (in qr/.../ form): '(?^i:Perl|JavaScript)'.
865             Root. Attributes: {text => "Root", uid => "0"}
866             |--- open_parenthesis. Attributes: {text => "(", uid => "1"}
867             | |--- query_caret. Attributes: {text => "?^", uid => "2"}
868             | |--- flag_set. Attributes: {text => "i", uid => "3"}
869             | |--- colon. Attributes: {text => ":", uid => "4"}
870             | |--- string. Attributes: {text => "Perl|JavaScript", uid => "5"}
871             |--- close_parenthesis. Attributes: {text => ")", uid => "6"}
872              
873             Original: (?^i:Perl|JavaScript). Result: 0. (0 is success)
874              
875             =item o Examine the tree and determine which nodes you wish to edit
876              
877             The nodes are uniquely identified by their uids.
878              
879             =item o Proceed as does scripts/synopsis.pl
880              
881             Add these lines to the end of the tutorial code, and re-run:
882              
883             my($node_id) = 5; # Obtained from displaying and inspecting the tree.
884              
885             $parser -> append(text => '|C++', uid => $node_id);
886             $parser -> print_raw_tree;
887              
888             The extra output, showing the change to node uid == 5, is:
889              
890             Root. Attributes: {text => "Root", uid => "0"}
891             |--- open_parenthesis. Attributes: {text => "(", uid => "1"}
892             | |--- query_caret. Attributes: {text => "?^", uid => "2"}
893             | |--- flag_set. Attributes: {text => "i", uid => "3"}
894             | |--- colon. Attributes: {text => ":", uid => "4"}
895             | |--- string. Attributes: {text => "Perl|JavaScript|C++", uid => "5"}
896             |--- close_parenthesis. Attributes: {text => ")", uid => "6"}
897              
898             =item o Test also with L and L
899              
900             See t/get.set.t for sample code.
901              
902             =item o Since everything works, make a cup of tea
903              
904             =back
905              
906             =head2 The Edit Methods
907              
908             The I simply means any one or more of these methods, which can all change the text of
909             a node:
910              
911             =over 4
912              
913             =item o L
914              
915             =item o L
916              
917             =item o L
918              
919             =back
920              
921             The edit methods are exercised in t/get.set.t, as well as scripts/synopsis.pl (above).
922              
923             =head1 Description
924              
925             Parses a regexp into a tree object managed by the L module, and provides various methods for
926             updating and retrieving that tree's contents.
927              
928             This module uses L and L.
929              
930             =head1 Distributions
931              
932             This module is available as a Unix-style distro (*.tgz).
933              
934             See L
935             for help on unpacking and installing distros.
936              
937             =head1 Installation
938              
939             Install C as you would any C module:
940              
941             Run:
942              
943             cpanm Regexp::Parsertron
944              
945             or run:
946              
947             sudo cpan Regexp::Parsertron
948              
949             or unpack the distro, and then use:
950              
951             perl Makefile.PL
952             make (or dmake or nmake)
953             make test
954             make install
955              
956             =head1 Constructor and Initialization
957              
958             C is called as C<< my($parser) = Regexp::Parsertron -> new(k1 => v1, k2 => v2, ...) >>.
959              
960             It returns a new object of type C.
961              
962             Key-value pairs accepted in the parameter list (see corresponding methods for details
963             [e.g. L]):
964              
965             =over 4
966              
967             =item o re => $regexp
968              
969             The C method of L is called to see what C is. If it's already of the
970             form C, then it's processed as is, but if it's not, then it's transformed using C.
971              
972             Warning: Currently, the input is expected to have been pre-processed by Perl via qr/$regexp/.
973              
974             Default: ''.
975              
976             =item o verbose => $integer
977              
978             Takes values 0, 1 or 2, which print more and more progress reports.
979              
980             Used for debugging.
981              
982             Default: 0 (print nothing).
983              
984             =back
985              
986             =head1 Methods
987              
988             =head2 append(%opts)
989              
990             Append some text to the text of a node.
991              
992             %opts is a hash with these (key => value) pairs:
993              
994             =over 4
995              
996             =item o text => $string
997              
998             The text to append.
999              
1000             =item o uid => $uid
1001              
1002             The uid of the node to update.
1003              
1004             =back
1005              
1006             The code calls C if %opts does not have these 2 keys, or if either value is undef.
1007              
1008             See scripts/synopsis.pl for sample code.
1009              
1010             Note: Calling C never changes the uids of nodes, so repeated calling of C with
1011             the same C will apply more and more updates to the same node.
1012              
1013             See also L, L and t/get.set.t.
1014              
1015             =head2 as_string()
1016              
1017             Returns the parsed regexp as a string. The string contains all edits applied with
1018             L.
1019              
1020             =head2 find($target)
1021              
1022             Returns an arrayref of node uids whose text contains the given string.
1023              
1024             If the arrayref is empty, there were no matches.
1025              
1026             The Perl function C is used here to test for $target being a substring of the text
1027             associated with each node.
1028              
1029             The code calls C if $target is undef.
1030              
1031             See t/get.set.t for sample usage of C.
1032              
1033             See L for a regexp-based test. See also L.
1034              
1035             =head2 get($uid)
1036              
1037             Get the text of the node with the given $uid.
1038              
1039             The code calls C if $uid is undef, or outside the range 1 .. $self -> uid. The latter value
1040             is the highest uid so far assigned to any node.
1041              
1042             Returns undef if the given $uid is not found.
1043              
1044             See also L.
1045              
1046             =head2 new([%opts])
1047              
1048             Here, '[]' indicate an optional parameter.
1049              
1050             See L for details on the parameters accepted by L.
1051              
1052             =head2 parse([%opts])
1053              
1054             Here, '[]' indicate an optional parameter.
1055              
1056             Parses the regexp supplied with the parameter C in the call to L or in the call to
1057             L, or in the call to C<< parse(re => $regexp) >> itself. The latter takes precedence.
1058              
1059             The hash C<%opts> takes the same (key => value) pairs as L does.
1060              
1061             See L for details.
1062              
1063             =head2 prepend(%opts)
1064              
1065             Prepend some text to the text of a node.
1066              
1067             %opts is a hash with these (key => value) pairs:
1068              
1069             =over 4
1070              
1071             =item o text => $string
1072              
1073             The text to prepend.
1074              
1075             =item o uid => $uid
1076              
1077             The uid of the node to update.
1078              
1079             =back
1080              
1081             The code calls C if %opts does not have these 2 keys, or if either value is undef.
1082              
1083             Note: Calling C never changes the uids of nodes, so repeated calling of C with
1084             the same C will apply more and more updates to the same node.
1085              
1086             See also L, L, and t/get.set.t.
1087              
1088             =head2 print_cooked_tree()
1089              
1090             Prints, in a pretty format, the tree built from parsing.
1091              
1092             See the for sample output.
1093              
1094             See also L.
1095              
1096             =head2 print_raw_tree()
1097              
1098             Prints, in a simple format, the tree built from parsing.
1099              
1100             See the for sample output.
1101              
1102             See also L.
1103              
1104             =head2 re([$regexp])
1105              
1106             Here, '[]' indicate an optional parameter.
1107              
1108             Gets or sets the regexp to be processed.
1109              
1110             Note: C is a parameter to L.
1111              
1112             =head2 reset()
1113              
1114             Resets various internal things, except test_count.
1115              
1116             Used basically for debugging.
1117              
1118             =head2 search($target)
1119              
1120             Returns an arrayref of node uids whose text contains the given string.
1121              
1122             If the arrayref is empty, there were no matches.
1123              
1124             $target is converted to a regexp if a simple string is passed in.
1125              
1126             The code calls C if $target is undef.
1127              
1128             See t/search.t for sample usage of C.
1129              
1130             See L for a non-regexp search. See also L.
1131              
1132             =head2 set(%opts)
1133              
1134             Set the text of a node to $opt{text}.
1135              
1136             %opts is a hash with these (key => value) pairs:
1137              
1138             =over 4
1139              
1140             =item o text => $string
1141              
1142             The text to use to overwrite the text of the node.
1143              
1144             =item o uid => $uid
1145              
1146             The uid of the node to update.
1147              
1148             =back
1149              
1150             The code calls C if %opts does not have these 2 keys, or if either value is undef.
1151              
1152             See also L and L.
1153              
1154             =head2 tree()
1155              
1156             Returns an object of type L. Ignore the root node.
1157              
1158             Each node's C method returns a hashref of information about the node. See the
1159             L for details.
1160              
1161             See also the source code for L and L for ideas on how to
1162             use this object.
1163              
1164             =head2 uid()
1165              
1166             Returns the last-used uid.
1167              
1168             Each node in the tree is given a uid, which allows methods like L to work.
1169              
1170             =head2 verbose([$integer])
1171              
1172             Here, '[]' indicate an optional parameter.
1173              
1174             Gets or sets the verbosity level, within the range 0 .. 2. Higher numbers print more progress
1175             reports.
1176              
1177             Used basically for debugging.
1178              
1179             Note: C is a parameter to L.
1180              
1181             =head2 warning_str()
1182              
1183             Returns the last Marpa warning.
1184              
1185             In short, Marpa will always report 'Marpa parse exhausted' in warning_str() if the parse is not
1186             ambiguous, but do not worry - I.
1187              
1188             See L and
1189             L.
1190              
1191             =head1 FAQ
1192              
1193             =head2 Can I add a subtree to the tree?
1194              
1195             Not yet.
1196              
1197             There is a private method, C<_add_daughter()>, which I could make public, if I felt it was safe to
1198             do so.
1199              
1200             =head2 Why does the BNF not accept an empty regexp?
1201              
1202             Simple answer: Changing the BNF to handle this creates a massive problem elsewhere in the BNF.
1203              
1204             Complex answer:
1205              
1206             The BNF contains this countable rule to allow patterns to be juxtaposed without '|', say, to
1207             separate them:
1208              
1209             global_sequence ::= pattern_type+
1210              
1211             And in turn (further toward the leaves of the tree of BNF), I then use:
1212              
1213             pattern_sequence ::= pattern_set+
1214              
1215             To allow an empty regexp would mean changing this rule to:
1216              
1217             pattern_sequence ::= pattern_set*
1218              
1219             But that makes this rule nullable, and Marpa rejects the C rule on the grounds that
1220             a countable rule is not allowed to be nullable. ATM I cannot see a way of
1221             rewriting the rules to avoid this problem. But I'm hopeful such a rewrite is possible.
1222              
1223             =head2 Why does the code sometimes not store '|' - as in qr/(Perl|JavaScript/) - in its own node?
1224              
1225             It could be done by, for example, splitting such a string into three nodes, 'Perl', '|',
1226             'Javascript'. But does that offer any benefit?
1227              
1228             It makes processing by the user more complex because then if they wish to edit the list of
1229             alternatives, they might have to edit two or three nodes instead of one. Here, editing means perhaps
1230             replacing any existing string with the empty string.
1231              
1232             Further, to extend the list of alternatives, the user will be confused by not being sure if they
1233             should change 'Javascript' to 'Javascript|C' or if they have to add two nodes, containing '|' and
1234             'C'. And ATM adding nodes is contraindicated!
1235              
1236             Despite this, when the input stream triggers two events, C and C,
1237             simultaneously because the '|' is at the start of a string, special code in the private method
1238             C<_validate_event()> does put '|' in its own node. IOW the BNF does not do the work, which is really
1239             what I would prefer.
1240              
1241             =head2 Does this module ever use \Q...\E to quote regexp metacharacters?
1242              
1243             No.
1244              
1245             =head2 What is the format of the nodes in the tree built by this module?
1246              
1247             Each node's C is the name of the Marpa-style event which was triggered by detection of
1248             some C within the regexp.
1249              
1250             Each node's C method returns a hashref with these (key => value) pairs:
1251              
1252             =over 4
1253              
1254             =item o text => $string
1255              
1256             This is the text within the regexp which triggered the event just mentioned.
1257              
1258             =item o uid => $integer
1259              
1260             This is the unique id of the 'current' node.
1261              
1262             This C is often used by you to specify which node to work on.
1263              
1264             See t/get.set.t and t/simple.t for sample code.
1265              
1266             The code never changes the uid of a node.
1267              
1268             =back
1269              
1270             See also the source code for L and L for ideas on how to
1271             use the tree.
1272              
1273             See the L for sample code and a report after parsing a tiny regexp.
1274              
1275             =head2 Does the root node in the tree ever hold useful information?
1276              
1277             No. Always ignore it.
1278              
1279             =head2 Why does the BNF never use the lexeme adverb C?
1280              
1281             Because with Marpa::R2 the priority is only used when lexemes are the same length.
1282              
1283             L.
1284              
1285             =head2 Does this module interpret regexps in any way?
1286              
1287             No. You have to run your own Perl code to do that. This module just parses them into a data
1288             structure.
1289              
1290             And that really means this module does not match the regexp against anything. If I appear to do that
1291             while debugging new code, you can't rely on that appearing in production versions of the module.
1292              
1293             =head2 Does this module rewrite regexps?
1294              
1295             No, unless you call one of L.
1296              
1297             =head2 Does this module handle both Perl 5 and Perl 6?
1298              
1299             No. It will only handle Perl 5 syntax.
1300              
1301             =head2 Does this module handle regexps for various versions of Perl5?
1302              
1303             Not yet. Version-dependent regexp syntax will be supported for recent versions of Perl. This is
1304             done by having tokens within the BNF which are replaced at start-up time with version-dependent
1305             details.
1306              
1307             There are no such tokens at the moment.
1308              
1309             All debugging is done assuming the regexp syntax as documented online. See L for the
1310             urls in question.
1311              
1312             =head2 So which version of Perl is supported?
1313              
1314             The code is expected to work for Perls back to V 5.14.0, which is when stringification of regexps
1315             changed. See L below for more.
1316              
1317             I'm (2018-01-14) using Perl V 5.20.2 and making the BNF match the Perl regexp docs listed in
1318             L below.
1319              
1320             The program t/perl-5.21.11.t reads the file 'xt/author/re_tests' which I copied from the source code
1321             of Perl V 5.21.11. This test is the one which currently provides 858 passing tests out of the 1027
1322             tests which pass for me using prove -lv t.
1323              
1324             =head2 Could Perl and this module generate different parses of the same regexp?
1325              
1326             Absolutely! There is no escape from this fact simply because the code used in each program bears no
1327             relationship to the code in the other one.
1328              
1329             The real question is: How do we make the code in each program accept and reject exactly the same
1330             regexps as the code in the other program. I think trial-and-error is all we have available to us for
1331             dealing with this issue.
1332              
1333             =head2 After calling parse(), warning_str() contains the string '... Parse ambiguous ...'
1334              
1335             This is almost certainly an error with the BNF, although of course it may be an error with an
1336             exceptionally-badly formed regexp.
1337              
1338             See examples/ambiguous.pl and
1339             L.
1340              
1341             See examples/commit.pl and
1342             L.
1343              
1344             In such cases the code dies, as of V 1.04.
1345              
1346             Please report it via L, and
1347             include the regexp in the report. Thanx!
1348              
1349             =head2 Is this a (Marpa) exhaustion-hating or exhaustion-loving app?
1350              
1351             Exhaustion-loving.
1352              
1353             See L
1354              
1355             =head2 Will this code be modified to run under Marpa::R3 when the latter is stable?
1356              
1357             Yes.
1358              
1359             =head2 What is the purpose of this module?
1360              
1361             =over 4
1362              
1363             =item o To provide a stand-alone parser for regexps
1364              
1365             =item o To help me learn more about regexps
1366              
1367             =item o To become, I hope, a replacement for the horrendously complex L
1368              
1369             =back
1370              
1371             =head2 Who crafted the BNF?
1372              
1373             I did.
1374              
1375             =head1 Scripts
1376              
1377             This diagram indicates the flow of logic from script to script:
1378              
1379             xt/author/re_tests
1380             |
1381             V
1382             xt/author/generate.tests.pl
1383             |
1384             V
1385             xt/authors/perl-5.21.11.tests
1386             |
1387             V
1388             perl -Ilib t/perl-5.21.11.t > xt/author/perl-5.21.11.log 2>&1
1389              
1390             If xt/author/perl-5.21.11.log only contains lines starting with 'ok', then all Perl and Marpa
1391             errors have been hidden, so t/perl-5.21.11.t is ready to live in t/. Before that time it lives in
1392             xt/author/.
1393              
1394             =head1 TODO
1395              
1396             =over 4
1397              
1398             =item o How to best define 'code' in the BNF.
1399              
1400             =item o I could traverse the tree and store a pointer to each node in an array
1401              
1402             This would mean fast access to nodes in random order. But is there any point? Yes, it would speed up
1403             various methods. Specifically, any module which calls C on the tree object would
1404             benefit.
1405              
1406             =item o Allow users to add nodes and hence subtrees to the tree
1407              
1408             =back
1409              
1410             =head1 References
1411              
1412             L. Mastering Lookahead and Lookbehind.
1413              
1414             L. PCRE - Perl Compatible Regular Expressions.
1415              
1416             L. This is the definitive document.
1417              
1418             L.
1419              
1420             L. Samples with commentary.
1421              
1422             L
1423              
1424             L
1425              
1426             L
1427              
1428             L. This is when stringification
1429             changed to return (?^...) rather than (?-xism...).
1430              
1431             L
1432              
1433             L. Regular Expression
1434             Inconsistencies With Unicode.
1435              
1436             L
1437              
1438             L
1439              
1440             L
1441              
1442             =head1 See Also
1443              
1444             L
1445              
1446             L
1447              
1448             L
1449              
1450             L
1451              
1452             L
1453              
1454             L
1455              
1456             L
1457              
1458             L
1459              
1460             L
1461              
1462             L. This is vaguely a version of L.
1463              
1464             L
1465              
1466             L
1467              
1468             And many others...
1469              
1470             =head1 Machine-Readable Change Log
1471              
1472             The file Changes was converted into Changelog.ini by L.
1473              
1474             =head1 Version Numbers
1475              
1476             Version numbers < 1.00 represent development versions. From 1.00 up, they are production versions.
1477              
1478             =head1 CPAN Tester Results
1479              
1480             L
1481              
1482             =head1 Repository
1483              
1484             L
1485              
1486             =head1 Support
1487              
1488             Email the author, or log a bug on RT:
1489              
1490             L.
1491              
1492             =head1 Author
1493              
1494             L was written by Ron Savage Iron@savage.net.auE> in 2011.
1495              
1496             Marpa's homepage: L.
1497              
1498             L.
1499              
1500             =head1 Copyright
1501              
1502             Australian copyright (c) 2016, Ron Savage.
1503              
1504             All Programs of mine are 'OSI Certified Open Source Software';
1505             you can redistribute them and/or modify them under the terms of
1506             The Artistic License 2.0, a copy of which is available at:
1507             http://opensource.org/licenses/alphabetical.
1508              
1509             =cut
1510              
1511             __DATA__