line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Regexp::Parsertron; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
483
|
use re 'eval'; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
49
|
|
4
|
1
|
|
|
1
|
|
4
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
17
|
|
5
|
1
|
|
|
1
|
|
2
|
use warnings; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
21
|
|
6
|
|
|
|
|
|
|
#use warnings qw(FATAL utf8); # Fatalize encoding glitches. |
7
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
376
|
use Data::Section::Simple 'get_data_section'; |
|
1
|
|
|
|
|
403
|
|
|
1
|
|
|
|
|
47
|
|
9
|
|
|
|
|
|
|
|
10
|
1
|
|
|
1
|
|
379
|
use Marpa::R2; |
|
1
|
|
|
|
|
102421
|
|
|
1
|
|
|
|
|
41
|
|
11
|
|
|
|
|
|
|
|
12
|
1
|
|
|
1
|
|
481
|
use Moo; |
|
1
|
|
|
|
|
9290
|
|
|
1
|
|
|
|
|
3
|
|
13
|
|
|
|
|
|
|
|
14
|
1
|
|
|
1
|
|
1368
|
use Scalar::Does '-constants'; # For does(). |
|
1
|
|
|
|
|
73021
|
|
|
1
|
|
|
|
|
10
|
|
15
|
|
|
|
|
|
|
|
16
|
1
|
|
|
1
|
|
2003
|
use Tree; |
|
1
|
|
|
|
|
4004
|
|
|
1
|
|
|
|
|
24
|
|
17
|
|
|
|
|
|
|
|
18
|
1
|
|
|
1
|
|
5
|
use Try::Tiny; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
49
|
|
19
|
|
|
|
|
|
|
|
20
|
1
|
|
|
1
|
|
4
|
use Types::Standard qw/Any Int Str/; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
7
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
has bnf => |
23
|
|
|
|
|
|
|
( |
24
|
|
|
|
|
|
|
default => sub{return ''}, |
25
|
|
|
|
|
|
|
is => 'rw', |
26
|
|
|
|
|
|
|
isa => Any, |
27
|
|
|
|
|
|
|
required => 0, |
28
|
|
|
|
|
|
|
); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
has current_node => |
31
|
|
|
|
|
|
|
( |
32
|
|
|
|
|
|
|
default => sub{return ''}, |
33
|
|
|
|
|
|
|
is => 'rw', |
34
|
|
|
|
|
|
|
isa => Any, |
35
|
|
|
|
|
|
|
required => 0, |
36
|
|
|
|
|
|
|
); |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
has error_str => |
39
|
|
|
|
|
|
|
( |
40
|
|
|
|
|
|
|
default => sub {return ''}, |
41
|
|
|
|
|
|
|
is => 'rw', |
42
|
|
|
|
|
|
|
isa => Str, |
43
|
|
|
|
|
|
|
required => 0, |
44
|
|
|
|
|
|
|
); |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
has grammar => |
47
|
|
|
|
|
|
|
( |
48
|
|
|
|
|
|
|
default => sub {return ''}, |
49
|
|
|
|
|
|
|
is => 'rw', |
50
|
|
|
|
|
|
|
isa => Any, |
51
|
|
|
|
|
|
|
required => 0, |
52
|
|
|
|
|
|
|
); |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
has marpa_error_count => |
55
|
|
|
|
|
|
|
( |
56
|
|
|
|
|
|
|
default => sub{return 0}, |
57
|
|
|
|
|
|
|
is => 'rw', |
58
|
|
|
|
|
|
|
isa => Int, |
59
|
|
|
|
|
|
|
required => 0, |
60
|
|
|
|
|
|
|
); |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
has perl_error_count => |
63
|
|
|
|
|
|
|
( |
64
|
|
|
|
|
|
|
default => sub{return 0}, |
65
|
|
|
|
|
|
|
is => 'rw', |
66
|
|
|
|
|
|
|
isa => Int, |
67
|
|
|
|
|
|
|
required => 0, |
68
|
|
|
|
|
|
|
); |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
has re => |
71
|
|
|
|
|
|
|
( |
72
|
|
|
|
|
|
|
default => sub {return ''}, |
73
|
|
|
|
|
|
|
is => 'rw', |
74
|
|
|
|
|
|
|
isa => Any, |
75
|
|
|
|
|
|
|
required => 0, |
76
|
|
|
|
|
|
|
); |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
has recce => |
79
|
|
|
|
|
|
|
( |
80
|
|
|
|
|
|
|
default => sub{return ''}, |
81
|
|
|
|
|
|
|
is => 'rw', |
82
|
|
|
|
|
|
|
isa => Any, |
83
|
|
|
|
|
|
|
required => 0, |
84
|
|
|
|
|
|
|
); |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
has test_count => |
87
|
|
|
|
|
|
|
( |
88
|
|
|
|
|
|
|
default => sub{return 0}, |
89
|
|
|
|
|
|
|
is => 'rw', |
90
|
|
|
|
|
|
|
isa => Int, |
91
|
|
|
|
|
|
|
required => 0, |
92
|
|
|
|
|
|
|
); |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
has tree => |
95
|
|
|
|
|
|
|
( |
96
|
|
|
|
|
|
|
default => sub{return Tree -> new('Root')}, |
97
|
|
|
|
|
|
|
is => 'rw', |
98
|
|
|
|
|
|
|
isa => Any, |
99
|
|
|
|
|
|
|
required => 0, |
100
|
|
|
|
|
|
|
); |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
has uid => |
103
|
|
|
|
|
|
|
( |
104
|
|
|
|
|
|
|
default => sub {return 0}, |
105
|
|
|
|
|
|
|
is => 'rw', |
106
|
|
|
|
|
|
|
isa => Int, |
107
|
|
|
|
|
|
|
required => 0, |
108
|
|
|
|
|
|
|
); |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
has verbose => |
111
|
|
|
|
|
|
|
( |
112
|
|
|
|
|
|
|
default => sub {return 0}, |
113
|
|
|
|
|
|
|
is => 'rw', |
114
|
|
|
|
|
|
|
isa => Int, |
115
|
|
|
|
|
|
|
required => 0, |
116
|
|
|
|
|
|
|
); |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
our $VERSION = '0.51'; |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# ------------------------------------------------ |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub BUILD |
123
|
|
|
|
|
|
|
{ |
124
|
1
|
|
|
1
|
0
|
21
|
my($self) = @_; |
125
|
1
|
|
|
|
|
6
|
my($bnf) = get_data_section('V 5.20'); |
126
|
|
|
|
|
|
|
|
127
|
1
|
|
|
|
|
611
|
$self -> bnf($bnf); |
128
|
1
|
|
|
|
|
482
|
$self -> grammar |
129
|
|
|
|
|
|
|
( |
130
|
|
|
|
|
|
|
Marpa::R2::Scanless::G -> new |
131
|
|
|
|
|
|
|
({ |
132
|
|
|
|
|
|
|
source => \$self -> bnf |
133
|
|
|
|
|
|
|
}) |
134
|
|
|
|
|
|
|
); |
135
|
1
|
|
|
|
|
157130
|
$self -> reset; |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
} # End of BUILD. |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
# ------------------------------------------------ |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
sub add |
142
|
|
|
|
|
|
|
{ |
143
|
1
|
|
|
1
|
1
|
8
|
my($self, %opts) = @_; |
144
|
|
|
|
|
|
|
|
145
|
1
|
|
|
|
|
2
|
for my $param (qw/text uid/) |
146
|
|
|
|
|
|
|
{ |
147
|
2
|
50
|
|
|
|
7
|
die "Method add() takes a hash with these keys: text, uid\n" if (! defined($opts{$param}) ); |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
1
|
|
|
|
|
1
|
my($meta); |
151
|
|
|
|
|
|
|
my($uid); |
152
|
|
|
|
|
|
|
|
153
|
1
|
|
|
|
|
18
|
for my $node ($self -> tree -> traverse) |
154
|
|
|
|
|
|
|
{ |
155
|
8
|
100
|
|
|
|
73
|
next if ($node -> is_root); |
156
|
|
|
|
|
|
|
|
157
|
7
|
|
|
|
|
32
|
$meta = $node -> meta; |
158
|
7
|
|
|
|
|
21
|
$uid = $$meta{uid}; |
159
|
|
|
|
|
|
|
|
160
|
7
|
100
|
|
|
|
13
|
if ($opts{uid} == $uid) |
161
|
|
|
|
|
|
|
{ |
162
|
1
|
|
|
|
|
3
|
$$meta{text} .= $opts{text}; |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
} # End of add. |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
# ------------------------------------------------ |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
sub _add_daughter |
171
|
|
|
|
|
|
|
{ |
172
|
92
|
|
|
92
|
|
108
|
my($self, $event_name, $attributes) = @_; |
173
|
92
|
|
|
|
|
1229
|
$$attributes{uid} = $self -> uid($self -> uid + 1); |
174
|
92
|
|
|
|
|
1882
|
my($node) = Tree -> new($event_name); |
175
|
|
|
|
|
|
|
|
176
|
92
|
|
|
|
|
2351
|
$node -> meta($attributes); |
177
|
|
|
|
|
|
|
|
178
|
92
|
50
|
|
|
|
2179
|
print "Adding $event_name to tree. \n" if ($self -> verbose > 1); |
179
|
|
|
|
|
|
|
|
180
|
92
|
100
|
|
|
|
481
|
if ($event_name =~ /^close_(?:bracket|parenthesis)$/) |
181
|
|
|
|
|
|
|
{ |
182
|
21
|
|
|
|
|
292
|
$self -> current_node($self -> current_node -> parent); |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
|
185
|
92
|
|
|
|
|
1804
|
$self -> current_node -> add_child($node); |
186
|
|
|
|
|
|
|
|
187
|
92
|
100
|
|
|
|
13250
|
if ($event_name =~ /^open_(?:bracket|parenthesis)$/) |
188
|
|
|
|
|
|
|
{ |
189
|
21
|
|
|
|
|
352
|
$self -> current_node($node); |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
} # End of _add_daughter. |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
# ------------------------------------------------ |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
sub as_re |
197
|
|
|
|
|
|
|
{ |
198
|
0
|
|
|
0
|
1
|
0
|
my($self) = @_; |
199
|
0
|
|
|
|
|
0
|
my($string) = $self -> as_string; |
200
|
0
|
|
|
|
|
0
|
my($index) = index($string, '/'); |
201
|
|
|
|
|
|
|
|
202
|
0
|
0
|
|
|
|
0
|
if ($index >= 0) |
203
|
|
|
|
|
|
|
{ |
204
|
0
|
|
|
|
|
0
|
$string = substr($string, $index); |
205
|
0
|
|
|
|
|
0
|
substr($string, -1) = ''; |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
|
208
|
0
|
|
|
|
|
0
|
return $string; |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
} # End of as_re. |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
# ------------------------------------------------ |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
sub as_string |
215
|
|
|
|
|
|
|
{ |
216
|
12
|
|
|
12
|
1
|
83
|
my($self) = @_; |
217
|
12
|
|
|
|
|
18
|
my($string) = ''; |
218
|
|
|
|
|
|
|
|
219
|
12
|
|
|
|
|
15
|
my($meta); |
220
|
|
|
|
|
|
|
|
221
|
12
|
|
|
|
|
218
|
for my $node ($self -> tree -> traverse) |
222
|
|
|
|
|
|
|
{ |
223
|
104
|
100
|
|
|
|
1078
|
next if ($node -> is_root); |
224
|
|
|
|
|
|
|
|
225
|
92
|
|
|
|
|
402
|
$meta = $node -> meta; |
226
|
92
|
|
|
|
|
289
|
$string .= $$meta{text}; |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
|
229
|
12
|
|
|
|
|
26
|
return $string; |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
} # End of as_string. |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
# ------------------------------------------------ |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
sub cooked_tree |
236
|
|
|
|
|
|
|
{ |
237
|
0
|
|
|
0
|
1
|
0
|
my($self) = @_; |
238
|
0
|
|
|
|
|
0
|
my($format) = "%-30s %3s %s\n"; |
239
|
|
|
|
|
|
|
|
240
|
0
|
|
|
|
|
0
|
print sprintf($format, 'Name', 'Uid', 'Text'); |
241
|
0
|
|
|
|
|
0
|
print sprintf($format, '----', '---', '----'); |
242
|
|
|
|
|
|
|
|
243
|
0
|
|
|
|
|
0
|
my($meta); |
244
|
|
|
|
|
|
|
|
245
|
0
|
|
|
|
|
0
|
for my $node ($self -> tree -> traverse) |
246
|
|
|
|
|
|
|
{ |
247
|
0
|
0
|
|
|
|
0
|
next if ($node -> is_root); |
248
|
|
|
|
|
|
|
|
249
|
0
|
|
|
|
|
0
|
$meta = $node -> meta; |
250
|
|
|
|
|
|
|
|
251
|
0
|
|
|
|
|
0
|
print sprintf($format, $node -> value, $$meta{uid}, $$meta{text}); |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
} # End of cooked_tree. |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
# ------------------------------------------------ |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
sub _next_few_chars |
259
|
|
|
|
|
|
|
{ |
260
|
92
|
|
|
92
|
|
91
|
my($self, $stringref, $offset) = @_; |
261
|
92
|
|
|
|
|
133
|
my($s) = substr($$stringref, $offset, 20); |
262
|
92
|
|
|
|
|
125
|
$s =~ tr/\n/ /; |
263
|
92
|
|
|
|
|
140
|
$s =~ s/^\s+//; |
264
|
92
|
|
|
|
|
119
|
$s =~ s/\s+$//; |
265
|
|
|
|
|
|
|
|
266
|
92
|
|
|
|
|
152
|
return $s; |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
} # End of _next_few_chars. |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
# ------------------------------------------------ |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
sub parse |
273
|
|
|
|
|
|
|
{ |
274
|
12
|
|
|
12
|
1
|
666
|
my($self, %opts) = @_; |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
# Emulate parts of new(), which makes things a bit earier for the caller. |
277
|
|
|
|
|
|
|
|
278
|
12
|
|
|
|
|
166
|
$self -> error_str(''); |
279
|
12
|
50
|
|
|
|
796
|
$self -> re($opts{re}) if (defined $opts{re}); |
280
|
12
|
50
|
|
|
|
613
|
$self -> verbose($opts{verbose}) if (defined $opts{verbose}); |
281
|
|
|
|
|
|
|
|
282
|
12
|
|
|
|
|
182
|
$self -> recce |
283
|
|
|
|
|
|
|
( |
284
|
|
|
|
|
|
|
Marpa::R2::Scanless::R -> new |
285
|
|
|
|
|
|
|
({ |
286
|
|
|
|
|
|
|
exhaustion => 'event', |
287
|
|
|
|
|
|
|
grammar => $self -> grammar, |
288
|
|
|
|
|
|
|
ranking_method => 'high_rule_only', |
289
|
|
|
|
|
|
|
}) |
290
|
|
|
|
|
|
|
); |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
# Return 0 for success and 1 for failure. |
293
|
|
|
|
|
|
|
|
294
|
12
|
|
|
|
|
3741
|
my($result) = 0; |
295
|
|
|
|
|
|
|
|
296
|
12
|
|
|
|
|
15
|
my($message); |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
try |
299
|
|
|
|
|
|
|
{ |
300
|
12
|
50
|
|
12
|
|
374
|
if (defined (my $value = $self -> _process) ) |
301
|
|
|
|
|
|
|
{ |
302
|
12
|
50
|
|
|
|
103156
|
$self -> cooked_tree if ($self -> verbose > 1); |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
else |
305
|
|
|
|
|
|
|
{ |
306
|
0
|
|
|
|
|
0
|
$result = 1; |
307
|
|
|
|
|
|
|
|
308
|
0
|
0
|
|
|
|
0
|
$self -> error_str('Error: Parse failed') if (! $self -> error_str); |
309
|
|
|
|
|
|
|
|
310
|
0
|
0
|
0
|
|
|
0
|
print '1 Error str: ', $self -> error_str, "\n" if ($self -> verbose && $self -> error_str); |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
catch |
314
|
|
|
|
|
|
|
{ |
315
|
0
|
|
|
0
|
|
0
|
$result = 1; |
316
|
|
|
|
|
|
|
|
317
|
0
|
|
|
|
|
0
|
$self -> marpa_error_count($self -> marpa_error_count + 1); |
318
|
0
|
|
|
|
|
0
|
$self -> error_str("Error: Parse failed. $_"); |
319
|
|
|
|
|
|
|
|
320
|
0
|
0
|
0
|
|
|
0
|
print '2 Error str: ', $self -> error_str, "\n" if ($self -> verbose && $self -> error_str); |
321
|
12
|
|
|
|
|
143
|
}; |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
# Return 0 for success and 1 for failure. |
324
|
|
|
|
|
|
|
|
325
|
12
|
|
|
|
|
379
|
return $result; |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
} # End of parse. |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
# ------------------------------------------------ |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
sub _process |
332
|
|
|
|
|
|
|
{ |
333
|
12
|
|
|
12
|
|
15
|
my($self) = @_; |
334
|
12
|
|
|
|
|
209
|
my($raw_re) = $self -> re; |
335
|
12
|
|
|
|
|
202
|
my($test_count) = $self -> test_count($self -> test_count + 1); |
336
|
|
|
|
|
|
|
|
337
|
12
|
50
|
|
|
|
831
|
print "Test count: $test_count. Parsing '$raw_re' => (qr/.../) => " if ($self -> verbose); |
338
|
|
|
|
|
|
|
|
339
|
12
|
|
|
|
|
495
|
my($string_re) = $self -> _string2re($raw_re); |
340
|
|
|
|
|
|
|
|
341
|
12
|
50
|
|
|
|
31
|
if ($string_re eq '') |
342
|
|
|
|
|
|
|
{ |
343
|
0
|
0
|
|
|
|
0
|
print "\n" if ($self -> verbose); |
344
|
|
|
|
|
|
|
|
345
|
0
|
|
|
|
|
0
|
return undef; |
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
|
348
|
12
|
50
|
|
|
|
205
|
print "'$string_re'. \n" if ($self -> verbose); |
349
|
|
|
|
|
|
|
|
350
|
12
|
|
|
|
|
72
|
my($ref_re) = \"$string_re"; # Use " in comment for UltraEdit. |
351
|
12
|
|
|
|
|
17
|
my($length) = length($string_re); |
352
|
|
|
|
|
|
|
|
353
|
12
|
|
|
|
|
14
|
my($child); |
354
|
|
|
|
|
|
|
my($event_name); |
355
|
0
|
|
|
|
|
0
|
my($lexeme); |
356
|
0
|
|
|
|
|
0
|
my($pos); |
357
|
0
|
|
|
|
|
0
|
my($span, $start); |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
# We use read()/lexeme_read()/resume() because we pause at each lexeme. |
360
|
|
|
|
|
|
|
|
361
|
12
|
|
|
|
|
160
|
for |
362
|
|
|
|
|
|
|
( |
363
|
|
|
|
|
|
|
$pos = $self -> recce -> read($ref_re); |
364
|
|
|
|
|
|
|
($pos < $length); |
365
|
|
|
|
|
|
|
$pos = $self -> recce -> resume($pos) |
366
|
|
|
|
|
|
|
) |
367
|
|
|
|
|
|
|
{ |
368
|
92
|
|
|
|
|
10732
|
($start, $span) = $self -> recce -> pause_span; |
369
|
92
|
|
|
|
|
776
|
($event_name, $span, $pos) = $self -> _validate_event($ref_re, $start, $span, $pos,); |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
# If the input is exhausted, we exit immediately so we don't try to use |
372
|
|
|
|
|
|
|
# the values of $start, $span or $pos. They are ignored upon exit. |
373
|
|
|
|
|
|
|
|
374
|
92
|
50
|
|
|
|
170
|
last if ($event_name eq "'exhausted"); # Yes, it has a leading quote. |
375
|
|
|
|
|
|
|
|
376
|
92
|
|
|
|
|
1169
|
$lexeme = $self -> recce -> literal($start, $span); |
377
|
92
|
|
|
|
|
1788
|
$pos = $self -> recce -> lexeme_read($event_name); |
378
|
|
|
|
|
|
|
|
379
|
92
|
50
|
|
|
|
3301
|
die "lexeme_read($event_name) rejected lexeme |$lexeme|\n" if (! defined $pos); |
380
|
|
|
|
|
|
|
|
381
|
92
|
50
|
|
|
|
1408
|
print "event_name: $event_name. lexeme: $lexeme. \n" if ($self -> verbose > 1); |
382
|
|
|
|
|
|
|
|
383
|
92
|
|
|
|
|
546
|
$self -> _add_daughter($event_name, {text => $lexeme}); |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
|
386
|
12
|
|
|
|
|
211
|
my($message); |
387
|
|
|
|
|
|
|
|
388
|
12
|
50
|
|
|
|
165
|
if ($self -> recce -> exhausted) |
|
|
0
|
|
|
|
|
|
389
|
|
|
|
|
|
|
{ |
390
|
|
|
|
|
|
|
# See https://metacpan.org/pod/distribution/Marpa-R2/pod/Exhaustion.pod#Exhaustion |
391
|
|
|
|
|
|
|
# for why this code is exhaustion-loving. |
392
|
|
|
|
|
|
|
|
393
|
12
|
|
|
|
|
175
|
$message = 'Parse exhausted'; |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
#print "Warning: $message\n"; |
396
|
|
|
|
|
|
|
} |
397
|
|
|
|
|
|
|
elsif (my $status = $self -> recce -> ambiguous) |
398
|
|
|
|
|
|
|
{ |
399
|
0
|
|
|
|
|
0
|
my($terminals) = $self -> recce -> terminals_expected; |
400
|
0
|
0
|
|
|
|
0
|
$terminals = ['(None)'] if ($#$terminals < 0); |
401
|
0
|
|
|
|
|
0
|
$message = "Ambiguous parse. Status: $status. Terminals expected: " . join(', ', @$terminals); |
402
|
|
|
|
|
|
|
|
403
|
0
|
|
|
|
|
0
|
print "Warning: $message\n"; |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
|
406
|
12
|
50
|
|
|
|
167
|
$self -> raw_tree if ($self -> verbose); |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
# Return a defined value for success and undef for failure. |
409
|
|
|
|
|
|
|
|
410
|
12
|
|
|
|
|
184
|
return $self -> recce -> value; |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
} # End of _process. |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
# ------------------------------------------------ |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
sub raw_tree |
417
|
|
|
|
|
|
|
{ |
418
|
0
|
|
|
0
|
1
|
0
|
my($self) = @_; |
419
|
|
|
|
|
|
|
|
420
|
0
|
|
|
|
|
0
|
print map("$_\n", @{$self -> tree -> tree2string}); |
|
0
|
|
|
|
|
0
|
|
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
} # End of raw_tree. |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
# ------------------------------------------------ |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
sub reset |
427
|
|
|
|
|
|
|
{ |
428
|
13
|
|
|
13
|
1
|
5410
|
my($self) = @_; |
429
|
|
|
|
|
|
|
|
430
|
13
|
|
|
|
|
57
|
$self -> tree(Tree -> new('Root') ); |
431
|
13
|
|
|
|
|
1414
|
$self -> tree -> meta({text => 'Root', uid => 0}); |
432
|
13
|
|
|
|
|
384
|
$self -> current_node($self -> tree); |
433
|
13
|
|
|
|
|
1199
|
$self -> marpa_error_count(0); |
434
|
13
|
|
|
|
|
871
|
$self -> perl_error_count(0); |
435
|
13
|
|
|
|
|
856
|
$self -> uid(0); |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
} # End of reset. |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
# ------------------------------------------------ |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
sub _string2re |
442
|
|
|
|
|
|
|
{ |
443
|
12
|
|
|
12
|
|
18
|
my($self, $candidate) = @_; |
444
|
|
|
|
|
|
|
|
445
|
12
|
|
|
|
|
16
|
my($re); |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
try |
448
|
|
|
|
|
|
|
{ |
449
|
12
|
50
|
|
12
|
|
308
|
$re = does($candidate, 'Regexp') ? $candidate : qr/$candidate/; |
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
catch |
452
|
|
|
|
|
|
|
{ |
453
|
0
|
|
|
0
|
|
0
|
$re = ''; |
454
|
|
|
|
|
|
|
|
455
|
0
|
|
|
|
|
0
|
$self -> perl_error_count($self -> perl_error_count + 1); |
456
|
0
|
|
|
|
|
0
|
$self -> error_str($self -> test_count . ": Perl cannot convert $candidate into qr/.../ form"); |
457
|
12
|
|
|
|
|
73
|
}; |
458
|
|
|
|
|
|
|
|
459
|
12
|
|
|
|
|
1130
|
return $re; |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
} # End of _string2re. |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
# ------------------------------------------------ |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
sub _validate_event |
466
|
|
|
|
|
|
|
{ |
467
|
92
|
|
|
92
|
|
111
|
my($self, $stringref, $start, $span, $pos) = @_; |
468
|
92
|
|
|
|
|
74
|
my(@event) = @{$self -> recce -> events}; |
|
92
|
|
|
|
|
1213
|
|
469
|
92
|
|
|
|
|
601
|
my($event_count) = scalar @event; |
470
|
92
|
|
|
|
|
124
|
my(@event_name) = sort map{$$_[0]} @event; |
|
93
|
|
|
|
|
229
|
|
471
|
92
|
|
|
|
|
109
|
my($event_name) = $event_name[0]; # Default. |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
# If the input is exhausted, we return immediately so we don't try to use |
474
|
|
|
|
|
|
|
# the values of $start, $span or $pos. They are ignored upon return. |
475
|
|
|
|
|
|
|
|
476
|
92
|
50
|
|
|
|
145
|
if ($event_name eq "'exhausted") # Yes, it has a leading quote. |
477
|
|
|
|
|
|
|
{ |
478
|
0
|
|
|
|
|
0
|
return ($event_name, $span, $pos); |
479
|
|
|
|
|
|
|
} |
480
|
|
|
|
|
|
|
|
481
|
92
|
|
|
|
|
134
|
my($lexeme) = substr($$stringref, $start, $span); |
482
|
92
|
|
|
|
|
1294
|
my($line, $column) = $self -> recce -> line_column($start); |
483
|
92
|
|
|
|
|
776
|
my($literal) = $self -> _next_few_chars($stringref, $start + $span); |
484
|
92
|
|
|
|
|
214
|
my($message) = "Location: ($line, $column). Lexeme: |$lexeme|. Next few chars: |$literal|"; |
485
|
92
|
|
|
|
|
130
|
$message = "$message. Events: $event_count. Names: "; |
486
|
|
|
|
|
|
|
|
487
|
92
|
50
|
|
|
|
1266
|
print $message, join(', ', @event_name), "\n" if ($self -> verbose > 1); |
488
|
|
|
|
|
|
|
|
489
|
92
|
|
|
|
|
526
|
return ($event_name, $span, $pos); |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
} # End of _validate_event. |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
# ------------------------------------------------ |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
1; |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
=pod |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
=head1 NAME |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
C - Parse a Perl regexp into a data structure of type L |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
Warning: Development version. See L for details. |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
=head1 Synopsis |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
This is scripts/synopsis.pl: |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
#!/usr/bin/env perl |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
use strict; |
512
|
|
|
|
|
|
|
use warnings; |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
use Regexp::Parsertron; |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
# --------------------- |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
my($re) = qr/Perl|JavaScript/i; |
519
|
|
|
|
|
|
|
my($parser) = Regexp::Parsertron -> new(verbose => 1); |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
# Return 0 for success and 1 for failure. |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
my($result) = $parser -> parse(re => $re); |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
print "Calling add(text => '|C++', uid => 6)\n"; |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
$parser -> add(text => '|C++', uid => 6); |
528
|
|
|
|
|
|
|
$parser -> raw_tree; |
529
|
|
|
|
|
|
|
$parser -> cooked_tree; |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
my($as_string) = $parser -> as_string; |
532
|
|
|
|
|
|
|
my($as_re) = $parser -> as_re; |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
print "Original: $re. Result: $result. (0 is success)\n"; |
535
|
|
|
|
|
|
|
print "as_string: $as_string\n"; |
536
|
|
|
|
|
|
|
print "as_re: $as_re\n"; |
537
|
|
|
|
|
|
|
print 'Perl error count: ', $parser -> perl_error_count, "\n"; |
538
|
|
|
|
|
|
|
print 'Marpa error count: ', $parser -> marpa_error_count, "\n"; |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
my($target) = 'C++'; |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
if ($target =~ $as_re) |
543
|
|
|
|
|
|
|
{ |
544
|
|
|
|
|
|
|
print "Matches $target (without using \Q...\E)\n"; |
545
|
|
|
|
|
|
|
} |
546
|
|
|
|
|
|
|
else |
547
|
|
|
|
|
|
|
{ |
548
|
|
|
|
|
|
|
print "Doesn't match $target\n"; |
549
|
|
|
|
|
|
|
} |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
And its output: |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
Test count: 1. Parsing '(?^i:Perl|JavaScript)' => (qr/.../) => '(?^i:Perl|JavaScript)'. |
554
|
|
|
|
|
|
|
Root. Attributes: {text => "Root", uid => "0"} |
555
|
|
|
|
|
|
|
|--- open_parenthesis. Attributes: {text => "(", uid => "1"} |
556
|
|
|
|
|
|
|
| |--- question_mark. Attributes: {text => "?", uid => "2"} |
557
|
|
|
|
|
|
|
| |--- caret. Attributes: {text => "^", uid => "3"} |
558
|
|
|
|
|
|
|
| |--- flag_set. Attributes: {text => "i", uid => "4"} |
559
|
|
|
|
|
|
|
| |--- colon. Attributes: {text => ":", uid => "5"} |
560
|
|
|
|
|
|
|
| |--- character_set. Attributes: {text => "Perl|JavaScript", uid => "6"} |
561
|
|
|
|
|
|
|
|--- close_parenthesis. Attributes: {text => ")", uid => "7"} |
562
|
|
|
|
|
|
|
Calling add(text => '|C++', uid => 6) |
563
|
|
|
|
|
|
|
Root. Attributes: {text => "Root", uid => "0"} |
564
|
|
|
|
|
|
|
|--- open_parenthesis. Attributes: {text => "(", uid => "1"} |
565
|
|
|
|
|
|
|
| |--- question_mark. Attributes: {text => "?", uid => "2"} |
566
|
|
|
|
|
|
|
| |--- caret. Attributes: {text => "^", uid => "3"} |
567
|
|
|
|
|
|
|
| |--- flag_set. Attributes: {text => "i", uid => "4"} |
568
|
|
|
|
|
|
|
| |--- colon. Attributes: {text => ":", uid => "5"} |
569
|
|
|
|
|
|
|
| |--- character_set. Attributes: {text => "Perl|JavaScript|C++", uid => "6"} |
570
|
|
|
|
|
|
|
|--- close_parenthesis. Attributes: {text => ")", uid => "7"} |
571
|
|
|
|
|
|
|
Name Uid Text |
572
|
|
|
|
|
|
|
---- --- ---- |
573
|
|
|
|
|
|
|
open_parenthesis 1 ( |
574
|
|
|
|
|
|
|
question_mark 2 ? |
575
|
|
|
|
|
|
|
caret 3 ^ |
576
|
|
|
|
|
|
|
flag_set 4 i |
577
|
|
|
|
|
|
|
colon 5 : |
578
|
|
|
|
|
|
|
character_set 6 Perl|JavaScript|C++ |
579
|
|
|
|
|
|
|
close_parenthesis 7 ) |
580
|
|
|
|
|
|
|
Original: (?^i:Perl|JavaScript). Result: 0. (0 is success) |
581
|
|
|
|
|
|
|
as_string: (?^i:Perl|JavaScript|C++) |
582
|
|
|
|
|
|
|
as_re: (?^i:Perl|JavaScript|C++) |
583
|
|
|
|
|
|
|
Perl error count: 0 |
584
|
|
|
|
|
|
|
Marpa error count: 0 |
585
|
|
|
|
|
|
|
Matches C++ (without using \Q...\E) |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
=head1 Description |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
Parses a regexp into a tree object managed by the L module, and provides various methods for |
590
|
|
|
|
|
|
|
updating and retrieving that tree's contents. |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
Warning: Development version. See L for details. |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
This module uses L. |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
=head1 Distributions |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
This module is available as a Unix-style distro (*.tgz). |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
See L |
601
|
|
|
|
|
|
|
for help on unpacking and installing distros. |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
=head1 Installation |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
Install L as you would any C module: |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
Run: |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
cpanm Regexp::Parsertron |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
or run: |
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
sudo cpan Regexp::Parsertron |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
or unpack the distro, and then use: |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
perl Makefile.PL |
618
|
|
|
|
|
|
|
make (or dmake or nmake) |
619
|
|
|
|
|
|
|
make test |
620
|
|
|
|
|
|
|
make install |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
=head1 Constructor and Initialization |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
C is called as C<< my($parser) = Regexp::Parsertron -> new(k1 => v1, k2 => v2, ...) >>. |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
It returns a new object of type C. |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
Key-value pairs accepted in the parameter list (see corresponding methods for details |
629
|
|
|
|
|
|
|
[e.g. L]): |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
=over 4 |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
=item o re => $regexp |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
The C method of L is called to see what C is. If it's already of the |
636
|
|
|
|
|
|
|
form C, then it's processed as is, but if it's not, then it's transformed using C. |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
Warning: Currently, the input is expected to have been pre-processed by Perl via qr/$regexp/. |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
Default: ''. |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
=item o verbose => $integer |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
Takes values 0, 1 or 2, which print more and more progress reports. |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
Used for debugging. |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
Default: 0 (print nothing). |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
=back |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
=head1 Methods |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
=head2 add(%opts) |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
Add a string to the text of a node. |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
%opts is a hash with these (key => value) pairs: |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
=over 4 |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
=item o text => $string |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
The text to add. |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
=item o uid => $uid |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
The uid of the node to update. |
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
=back |
671
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
See scripts/simple.pl for sample code. |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
Note: Calling C never changes the uids of nodes, so repeated calling of C with the |
675
|
|
|
|
|
|
|
same C will apply more and more updates to the same node. |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
=head2 as_re() |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
Returns the parsed regexp as a string matching what Perl would return from qr/.../. |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
=head2 as_string() |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
Returns the parsed regexp as a string. |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
=head2 cooked_tree() |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
Prints, in a pretty format, the tree built from parsing. |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
See also L. |
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
=head2 error_str() |
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
Returns the last error, as a string. |
694
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
Errors will be in 1 of 2 categories: |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
=over 4 |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
=item o Perl errors |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
These arise when Perl cannot interpret the string form of the regexp supplied by you, when the code |
702
|
|
|
|
|
|
|
checks it using qr/$re/. |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
=item o Marpa errors |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
These arise when the BNF within the module is such that the string form of the regexp cannot be |
707
|
|
|
|
|
|
|
parsed by Marpa. |
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
If you can use the regexp in Perl code, then you should never get this error. In other words, if |
711
|
|
|
|
|
|
|
Perl accepts the regexp and the module does not, then the BNF in this module is wrong (barring bugs |
712
|
|
|
|
|
|
|
in Perl of course). |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
=back |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
See also L and L. |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
=head2 marpa_error_count() |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
Returns an integer count of errors detected by Marpa. This value should always be 0. |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
See also L. |
723
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
Used basically for debugging. |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
=head2 new([%opts]) |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
Here, '[]' indicate an optional parameter. |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
See L for details on the parameters accepted by L. |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
=head2 parse([%opts]) |
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
Here, '[]' indicate an optional parameter. |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
Parses the regexp supplied with the parameter C in the call to L or in the call to |
737
|
|
|
|
|
|
|
L, or in the call to C<< parse(re => $regexp) >> itself. The latter takes precedence. |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
The hash C<%opts> takes the same (key => value) pairs as L does. |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
See L for details. |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
=head2 perl_error_count() |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
Returns an integer count of errors detected by perl. This value should always be 0. |
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
See also L. |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
Used basically for debugging. |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
=head2 raw_tree() |
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
Prints, in a simple format, the tree built from parsing. |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
See also L. |
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
=head2 re([$regexp]) |
758
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
Here, '[]' indicate an optional parameter. |
760
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
Gets or sets the regexp to be processed. |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
Note: C is a parameter to L. |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
=head2 reset() |
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
Resets various internal thingys, except test_count. |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
Used basically for debugging. |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
=head2 tree() |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
Returns an object of type L. Ignore the root node. |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
Each node's C method returns a hashref of information about the node. See the L for |
776
|
|
|
|
|
|
|
details. |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
See also the source code for L and L for ideas on how to use this |
779
|
|
|
|
|
|
|
object. |
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
=head2 uid() |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
Returns the last-used uid. |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
Each node in the tree is given a uid, which allows methods like L to work. |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
=head2 verbose([$integer]) |
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
Here, '[]' indicate an optional parameter. |
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
Gets or sets the verbosity level, within the range 0 .. 2. Higher numbers print more progress |
792
|
|
|
|
|
|
|
reports. |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
Used basically for debugging. |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
Note: C is a parameter to L. |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
=head1 FAQ |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
=head2 What is the format of the nodes in the tree build by this module? |
801
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
Each node's C method returns a hashref with these (key => value) pairs: |
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
=over 4 |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
=item o name => $string |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
This is the name of the Marpa-style event which was triggered by detection of some C within |
809
|
|
|
|
|
|
|
the regexp. |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
=item o text => $string |
812
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
This is the text within the regexp which triggered the event just mentioned. |
814
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
=back |
816
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
See also the source code for L and L for ideas on how to use this |
818
|
|
|
|
|
|
|
object. |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
See the L for sample code and a report after parsing a tiny regexp. |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
=head2 What is the purpose of this module? |
823
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
=over 4 |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
=item o To provide a stand-alone parser for regexps |
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
=item o To help me learn more about regexps |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
=item o To become, I hope, a replacement for the horrendously complex L |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
=back |
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
=head2 Does this module interpret regexps in any way? |
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
No. You have to run your own Perl code to do that. This module just parses them into a data |
837
|
|
|
|
|
|
|
structure. |
838
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
And that really means this module does not match the regexp against anything. If I appear to do that |
840
|
|
|
|
|
|
|
while debugging new code, you can't rely on that appearing in production versions of the module. |
841
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
=head2 Does this module re-write regexps? |
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
Yes, on a small scale so far. See scripts/simple.pl for sample code. The source of this program |
845
|
|
|
|
|
|
|
and its output are given in the L. |
846
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
=head2 Does this module handle both Perl5 and Perl6? |
848
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
Initially, it will only handle Perl5 syntax. |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
=head2 Does this module handle various versions of regexps (i.e., of Perl5)? |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
Yes, version-dependent regexp syntax will be supported for recent versions of Perl. This is done by |
854
|
|
|
|
|
|
|
having tokens within the BNF which are replaced at start-up time with version-dependent details. |
855
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
There are no such tokens at the moment. |
857
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
All debugging is done assuming the regexp syntax as documented online. See L for the |
859
|
|
|
|
|
|
|
urls in question. |
860
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
=head2 Is this an exhaustion-hating or exhaustion-loving app? |
862
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
Exhaustion-loving. |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
In short, Marpa will always report 'Parse exhausted', but I. |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
See L |
868
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
=head1 References |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
L. This is the definitive document. |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
L. Samples with commentary. |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
L |
876
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
L |
878
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
L |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
L |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
=head1 See Also |
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
L |
886
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
L |
888
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
L |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
L |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
L |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
L |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
L |
898
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
L |
900
|
|
|
|
|
|
|
|
901
|
|
|
|
|
|
|
L. This is vaguely a version of L. |
902
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
L |
904
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
L |
906
|
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
And many others... |
908
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
=head1 Machine-Readable Change Log |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
The file Changes was converted into Changelog.ini by L. |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
=head1 Version Numbers |
914
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
Version numbers < 1.00 represent development versions. From 1.00 up, they are production versions. |
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
=head1 Repository |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
L |
920
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
=head1 Support |
922
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
Email the author, or log a bug on RT: |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
L. |
926
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
=head1 Author |
928
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
L was written by Ron Savage Iron@savage.net.auE> in 2011. |
930
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
Marpa's homepage: L. |
932
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
My homepage: L. |
934
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
=head1 Copyright |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
Australian copyright (c) 2016, Ron Savage. |
938
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
All Programs of mine are 'OSI Certified Open Source Software'; |
940
|
|
|
|
|
|
|
you can redistribute them and/or modify them under the terms of |
941
|
|
|
|
|
|
|
The Artistic License 2.0, a copy of which is available at: |
942
|
|
|
|
|
|
|
http://opensource.org/licenses/alphabetical. |
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
=cut |
945
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
__DATA__ |