line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Regexp::Parsertron; |
2
|
|
|
|
|
|
|
|
3
|
6
|
|
|
6
|
|
48414
|
use strict; |
|
6
|
|
|
|
|
35
|
|
|
6
|
|
|
|
|
132
|
|
4
|
6
|
|
|
6
|
|
24
|
use warnings; |
|
6
|
|
|
|
|
8
|
|
|
6
|
|
|
|
|
130
|
|
5
|
|
|
|
|
|
|
#use warnings qw(FATAL utf8); # Fatalize encoding glitches. |
6
|
|
|
|
|
|
|
|
7
|
6
|
|
|
6
|
|
2213
|
use Data::Section::Simple 'get_data_section'; |
|
6
|
|
|
|
|
2934
|
|
|
6
|
|
|
|
|
281
|
|
8
|
|
|
|
|
|
|
|
9
|
6
|
|
|
6
|
|
2130
|
use Marpa::R2; |
|
6
|
|
|
|
|
737094
|
|
|
6
|
|
|
|
|
216
|
|
10
|
|
|
|
|
|
|
|
11
|
6
|
|
|
6
|
|
2592
|
use Moo; |
|
6
|
|
|
|
|
59700
|
|
|
6
|
|
|
|
|
23
|
|
12
|
|
|
|
|
|
|
|
13
|
6
|
|
|
6
|
|
9336
|
use Scalar::Does '-constants'; # For does(). |
|
6
|
|
|
|
|
536415
|
|
|
6
|
|
|
|
|
61
|
|
14
|
|
|
|
|
|
|
|
15
|
6
|
|
|
6
|
|
14304
|
use Tree; |
|
6
|
|
|
|
|
27697
|
|
|
6
|
|
|
|
|
162
|
|
16
|
|
|
|
|
|
|
|
17
|
6
|
|
|
6
|
|
33
|
use Try::Tiny; |
|
6
|
|
|
|
|
10
|
|
|
6
|
|
|
|
|
276
|
|
18
|
|
|
|
|
|
|
|
19
|
6
|
|
|
6
|
|
30
|
use Types::Standard qw/Any Int Str/; |
|
6
|
|
|
|
|
14
|
|
|
6
|
|
|
|
|
37
|
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
has bnf => |
22
|
|
|
|
|
|
|
( |
23
|
|
|
|
|
|
|
default => sub{return ''}, |
24
|
|
|
|
|
|
|
is => 'rw', |
25
|
|
|
|
|
|
|
isa => Any, |
26
|
|
|
|
|
|
|
required => 0, |
27
|
|
|
|
|
|
|
); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
has current_node => |
30
|
|
|
|
|
|
|
( |
31
|
|
|
|
|
|
|
default => sub{return ''}, |
32
|
|
|
|
|
|
|
is => 'rw', |
33
|
|
|
|
|
|
|
isa => Any, |
34
|
|
|
|
|
|
|
required => 0, |
35
|
|
|
|
|
|
|
); |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
has grammar => |
38
|
|
|
|
|
|
|
( |
39
|
|
|
|
|
|
|
default => sub {return ''}, |
40
|
|
|
|
|
|
|
is => 'rw', |
41
|
|
|
|
|
|
|
isa => Any, |
42
|
|
|
|
|
|
|
required => 0, |
43
|
|
|
|
|
|
|
); |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
has re => |
46
|
|
|
|
|
|
|
( |
47
|
|
|
|
|
|
|
default => sub {return ''}, |
48
|
|
|
|
|
|
|
is => 'rw', |
49
|
|
|
|
|
|
|
isa => Any, |
50
|
|
|
|
|
|
|
required => 0, |
51
|
|
|
|
|
|
|
); |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
has recce => |
54
|
|
|
|
|
|
|
( |
55
|
|
|
|
|
|
|
default => sub{return ''}, |
56
|
|
|
|
|
|
|
is => 'rw', |
57
|
|
|
|
|
|
|
isa => Any, |
58
|
|
|
|
|
|
|
required => 0, |
59
|
|
|
|
|
|
|
); |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
has test_count => |
62
|
|
|
|
|
|
|
( |
63
|
|
|
|
|
|
|
default => sub{return 0}, |
64
|
|
|
|
|
|
|
is => 'rw', |
65
|
|
|
|
|
|
|
isa => Int, |
66
|
|
|
|
|
|
|
required => 0, |
67
|
|
|
|
|
|
|
); |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
has tree => |
70
|
|
|
|
|
|
|
( |
71
|
|
|
|
|
|
|
default => sub{return Tree -> new('Root')}, |
72
|
|
|
|
|
|
|
is => 'rw', |
73
|
|
|
|
|
|
|
isa => Any, |
74
|
|
|
|
|
|
|
required => 0, |
75
|
|
|
|
|
|
|
); |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
has uid => |
78
|
|
|
|
|
|
|
( |
79
|
|
|
|
|
|
|
default => sub {return 0}, |
80
|
|
|
|
|
|
|
is => 'rw', |
81
|
|
|
|
|
|
|
isa => Int, |
82
|
|
|
|
|
|
|
required => 0, |
83
|
|
|
|
|
|
|
); |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
has verbose => |
86
|
|
|
|
|
|
|
( |
87
|
|
|
|
|
|
|
default => sub {return 0}, |
88
|
|
|
|
|
|
|
is => 'rw', |
89
|
|
|
|
|
|
|
isa => Int, |
90
|
|
|
|
|
|
|
required => 0, |
91
|
|
|
|
|
|
|
); |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
has warning_str => |
94
|
|
|
|
|
|
|
( |
95
|
|
|
|
|
|
|
default => sub {return ''}, |
96
|
|
|
|
|
|
|
is => 'rw', |
97
|
|
|
|
|
|
|
isa => Str, |
98
|
|
|
|
|
|
|
required => 0, |
99
|
|
|
|
|
|
|
); |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
our $VERSION = '1.03'; |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# ------------------------------------------------ |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
sub BUILD |
106
|
|
|
|
|
|
|
{ |
107
|
5
|
|
|
5
|
0
|
99
|
my($self) = @_; |
108
|
5
|
|
|
|
|
23
|
my($bnf) = get_data_section('V 5.20'); |
109
|
|
|
|
|
|
|
|
110
|
5
|
|
|
|
|
5924
|
$self -> bnf($bnf); |
111
|
5
|
|
|
|
|
200
|
$self -> grammar |
112
|
|
|
|
|
|
|
( |
113
|
|
|
|
|
|
|
Marpa::R2::Scanless::G -> new |
114
|
|
|
|
|
|
|
({ |
115
|
|
|
|
|
|
|
source => \$self -> bnf |
116
|
|
|
|
|
|
|
}) |
117
|
|
|
|
|
|
|
); |
118
|
5
|
|
|
|
|
2017600
|
$self -> reset; |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
} # End of BUILD. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# ------------------------------------------------ |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub append |
125
|
|
|
|
|
|
|
{ |
126
|
2
|
|
|
2
|
1
|
23
|
my($self, %opts) = @_; |
127
|
|
|
|
|
|
|
|
128
|
2
|
|
|
|
|
6
|
for my $param (qw/text uid/) |
129
|
|
|
|
|
|
|
{ |
130
|
4
|
50
|
|
|
|
19
|
die "Method append() takes a hash with these keys: text, uid\n" if (! defined($opts{$param}) ); |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
2
|
|
|
|
|
5
|
my($meta); |
134
|
|
|
|
|
|
|
my($uid); |
135
|
|
|
|
|
|
|
|
136
|
2
|
|
|
|
|
40
|
for my $node ($self -> tree -> traverse) |
137
|
|
|
|
|
|
|
{ |
138
|
14
|
100
|
|
|
|
268
|
next if ($node -> is_root); |
139
|
|
|
|
|
|
|
|
140
|
12
|
|
|
|
|
75
|
$meta = $node -> meta; |
141
|
12
|
|
|
|
|
67
|
$uid = $$meta{uid}; |
142
|
|
|
|
|
|
|
|
143
|
12
|
100
|
|
|
|
25
|
if ($opts{uid} == $uid) |
144
|
|
|
|
|
|
|
{ |
145
|
2
|
|
|
|
|
7
|
$$meta{text} .= $opts{text}; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
} # End of append. |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
# ------------------------------------------------ |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
sub _add_daughter |
154
|
|
|
|
|
|
|
{ |
155
|
14841
|
|
|
14841
|
|
26962
|
my($self, $event_name, $attributes) = @_; |
156
|
14841
|
|
|
|
|
227402
|
$$attributes{uid} = $self -> uid($self -> uid + 1); |
157
|
14841
|
|
|
|
|
384243
|
my($node) = Tree -> new($event_name); |
158
|
|
|
|
|
|
|
|
159
|
14841
|
|
|
|
|
531561
|
$node -> meta($attributes); |
160
|
|
|
|
|
|
|
|
161
|
14841
|
100
|
|
|
|
175833
|
if ($event_name =~ /^close_(?:bracket|parenthesis)$/) |
162
|
|
|
|
|
|
|
{ |
163
|
3089
|
|
|
|
|
48998
|
$self -> current_node($self -> current_node -> parent); |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
14841
|
|
|
|
|
322566
|
$self -> current_node -> add_child($node); |
167
|
|
|
|
|
|
|
|
168
|
14841
|
100
|
100
|
|
|
3322057
|
if ( ($event_name =~ /^open_(?:bracket|parenthesis)$/) || ($event_name =~ /_prefix$/) ) |
169
|
|
|
|
|
|
|
{ |
170
|
3308
|
|
|
|
|
54622
|
$self -> current_node($node); |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
} # End of _add_daughter. |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# ------------------------------------------------ |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
sub as_string |
178
|
|
|
|
|
|
|
{ |
179
|
872
|
|
|
872
|
1
|
6560
|
my($self) = @_; |
180
|
872
|
|
|
|
|
1889
|
my($string) = ''; |
181
|
|
|
|
|
|
|
|
182
|
872
|
|
|
|
|
1576
|
my($meta); |
183
|
|
|
|
|
|
|
|
184
|
872
|
|
|
|
|
14877
|
for my $node ($self -> tree -> traverse) |
185
|
|
|
|
|
|
|
{ |
186
|
8382
|
100
|
|
|
|
123145
|
next if ($node -> is_root); |
187
|
|
|
|
|
|
|
|
188
|
7510
|
|
|
|
|
45367
|
$meta = $node -> meta; |
189
|
7510
|
|
|
|
|
41029
|
$string .= $$meta{text}; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
872
|
|
|
|
|
2515
|
return $string; |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
} # End of as_string. |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# ------------------------------------------------ |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
sub find |
199
|
|
|
|
|
|
|
{ |
200
|
2
|
|
|
2
|
1
|
260
|
my($self, $target) = @_; |
201
|
|
|
|
|
|
|
|
202
|
2
|
50
|
|
|
|
7
|
die "Method find() takes a defined value as the parameter\n" if (! defined $target); |
203
|
|
|
|
|
|
|
|
204
|
2
|
|
|
|
|
7
|
my(@found); |
205
|
|
|
|
|
|
|
my($meta); |
206
|
|
|
|
|
|
|
|
207
|
2
|
|
|
|
|
37
|
for my $node ($self -> tree -> traverse) |
208
|
|
|
|
|
|
|
{ |
209
|
26
|
100
|
|
|
|
319
|
next if ($node -> is_root); |
210
|
|
|
|
|
|
|
|
211
|
24
|
|
|
|
|
139
|
$meta = $node -> meta; |
212
|
|
|
|
|
|
|
|
213
|
24
|
100
|
|
|
|
128
|
if (index($$meta{text}, $target) >= 0) |
214
|
|
|
|
|
|
|
{ |
215
|
3
|
|
|
|
|
6
|
push @found, $$meta{uid}; |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
|
219
|
2
|
|
|
|
|
8
|
return [@found]; |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
} # End of find. |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
# ------------------------------------------------ |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
sub get |
226
|
|
|
|
|
|
|
{ |
227
|
7
|
|
|
7
|
1
|
1333
|
my($self, $wanted_uid) = @_; |
228
|
7
|
|
|
|
|
128
|
my($max_uid) = $self -> uid; |
229
|
|
|
|
|
|
|
|
230
|
7
|
50
|
33
|
|
|
145
|
if (! defined($wanted_uid) || ($wanted_uid < 1) || ($wanted_uid > $self -> uid) ) |
|
|
|
33
|
|
|
|
|
231
|
|
|
|
|
|
|
{ |
232
|
0
|
|
|
|
|
0
|
die "Method get() takes a uid parameter in the range 1 .. $max_uid\n"; |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
7
|
|
|
|
|
46
|
my($meta); |
236
|
|
|
|
|
|
|
my($text); |
237
|
7
|
|
|
|
|
0
|
my($uid); |
238
|
|
|
|
|
|
|
|
239
|
7
|
|
|
|
|
92
|
for my $node ($self -> tree -> traverse) |
240
|
|
|
|
|
|
|
{ |
241
|
49
|
100
|
|
|
|
697
|
next if ($node -> is_root); |
242
|
|
|
|
|
|
|
|
243
|
42
|
|
|
|
|
257
|
$meta = $node -> meta; |
244
|
42
|
|
|
|
|
205
|
$uid = $$meta{uid}; |
245
|
|
|
|
|
|
|
|
246
|
42
|
100
|
|
|
|
66
|
if ($wanted_uid == $uid) |
247
|
|
|
|
|
|
|
{ |
248
|
7
|
|
|
|
|
13
|
$text = $$meta{text}; |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
|
252
|
7
|
|
|
|
|
19
|
return $text; |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
} # End of get. |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
# ------------------------------------------------ |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
sub _next_few_chars |
259
|
|
|
|
|
|
|
{ |
260
|
14833
|
|
|
14833
|
|
23862
|
my($self, $stringref, $offset) = @_; |
261
|
14833
|
|
|
|
|
29350
|
my($s) = substr($$stringref, $offset, 20); |
262
|
14833
|
|
|
|
|
25988
|
$s =~ tr/\n/ /; |
263
|
14833
|
|
|
|
|
29711
|
$s =~ s/^\s+//; |
264
|
14833
|
|
|
|
|
25330
|
$s =~ s/\s+$//; |
265
|
|
|
|
|
|
|
|
266
|
14833
|
|
|
|
|
29830
|
return $s; |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
} # End of _next_few_chars. |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
# ------------------------------------------------ |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
sub parse |
273
|
|
|
|
|
|
|
{ |
274
|
1525
|
|
|
1525
|
1
|
3171860
|
my($self, %opts) = @_; |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
# Emulate parts of new(), which makes things a bit earier for the caller. |
277
|
|
|
|
|
|
|
|
278
|
1525
|
50
|
|
|
|
38236
|
$self -> re($opts{re}) if (defined $opts{re}); |
279
|
1525
|
50
|
|
|
|
41976
|
$self -> verbose($opts{verbose}) if (defined $opts{verbose}); |
280
|
1525
|
|
|
|
|
23900
|
$self -> warning_str(''); |
281
|
|
|
|
|
|
|
|
282
|
1525
|
|
|
|
|
54109
|
$self -> recce |
283
|
|
|
|
|
|
|
( |
284
|
|
|
|
|
|
|
Marpa::R2::Scanless::R -> new |
285
|
|
|
|
|
|
|
({ |
286
|
|
|
|
|
|
|
exhaustion => 'event', |
287
|
|
|
|
|
|
|
grammar => $self -> grammar, |
288
|
|
|
|
|
|
|
}) |
289
|
|
|
|
|
|
|
); |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
# Return 0 for success and 1 for failure. |
292
|
|
|
|
|
|
|
|
293
|
1525
|
|
|
|
|
540029
|
my($result) = 0; |
294
|
|
|
|
|
|
|
|
295
|
1525
|
|
|
|
|
2957
|
my($message); |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
try |
298
|
|
|
|
|
|
|
{ |
299
|
1525
|
100
|
|
1525
|
|
53844
|
if (defined (my $value = $self -> _process) ) |
300
|
|
|
|
|
|
|
{ |
301
|
874
|
50
|
|
|
|
19573008
|
$self -> print_cooked_tree if ($self -> verbose > 1); |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
else |
304
|
|
|
|
|
|
|
{ |
305
|
10
|
|
|
|
|
581
|
$result = 1; |
306
|
|
|
|
|
|
|
|
307
|
10
|
|
|
|
|
20
|
my($message) = 'Error: Marpa parse failed. '; |
308
|
|
|
|
|
|
|
|
309
|
10
|
50
|
|
|
|
147
|
print $message, "\n" if ($self -> verbose); |
310
|
|
|
|
|
|
|
|
311
|
10
|
|
|
|
|
117
|
die $message; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
} |
314
|
|
|
|
|
|
|
catch |
315
|
|
|
|
|
|
|
{ |
316
|
651
|
|
|
651
|
|
190842
|
die $_; |
317
|
1525
|
|
|
|
|
10522
|
}; |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
# Return 0 for success and 1 for failure. |
320
|
|
|
|
|
|
|
|
321
|
874
|
|
|
|
|
27834
|
return $result; |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
} # End of parse. |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
# ------------------------------------------------ |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
sub prepend |
328
|
|
|
|
|
|
|
{ |
329
|
2
|
|
|
2
|
1
|
504
|
my($self, %opts) = @_; |
330
|
|
|
|
|
|
|
|
331
|
2
|
|
|
|
|
5
|
for my $param (qw/text uid/) |
332
|
|
|
|
|
|
|
{ |
333
|
4
|
50
|
|
|
|
11
|
die "Method append() takes a hash with these keys: text, uid\n" if (! defined($opts{$param}) ); |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
|
336
|
2
|
|
|
|
|
5
|
my($meta); |
337
|
|
|
|
|
|
|
my($uid); |
338
|
|
|
|
|
|
|
|
339
|
2
|
|
|
|
|
39
|
for my $node ($self -> tree -> traverse) |
340
|
|
|
|
|
|
|
{ |
341
|
14
|
100
|
|
|
|
203
|
next if ($node -> is_root); |
342
|
|
|
|
|
|
|
|
343
|
12
|
|
|
|
|
75
|
$meta = $node -> meta; |
344
|
12
|
|
|
|
|
57
|
$uid = $$meta{uid}; |
345
|
|
|
|
|
|
|
|
346
|
12
|
100
|
|
|
|
25
|
if ($opts{uid} == $uid) |
347
|
|
|
|
|
|
|
{ |
348
|
2
|
|
|
|
|
5
|
$$meta{text} = "$opts{text}$$meta{text}"; |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
} # End of prepend. |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
# ------------------------------------------------ |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
sub _process |
357
|
|
|
|
|
|
|
{ |
358
|
1525
|
|
|
1525
|
|
3067
|
my($self) = @_; |
359
|
1525
|
|
|
|
|
27752
|
my($raw_re) = $self -> re; |
360
|
1525
|
|
|
|
|
30046
|
my($test_count) = $self -> test_count($self -> test_count + 1); |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
# This line is 'print', not 'say'! |
363
|
|
|
|
|
|
|
|
364
|
1525
|
50
|
|
|
|
62858
|
print "Test count: $test_count. Parsing (in qr/.../ form): " if ($self -> verbose); |
365
|
|
|
|
|
|
|
|
366
|
1525
|
|
|
|
|
11500
|
my($string_re) = $self -> _string2re($raw_re); |
367
|
|
|
|
|
|
|
|
368
|
1525
|
50
|
|
|
|
4773
|
if ($string_re eq '') |
369
|
|
|
|
|
|
|
{ |
370
|
0
|
0
|
|
|
|
0
|
print "\n" if ($self -> verbose); |
371
|
|
|
|
|
|
|
|
372
|
0
|
|
|
|
|
0
|
return undef; |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
|
375
|
1525
|
50
|
|
|
|
26143
|
print "'$string_re'. \n" if ($self -> verbose); |
376
|
|
|
|
|
|
|
|
377
|
1525
|
50
|
|
|
|
25865
|
if ($self -> verbose > 1) |
378
|
|
|
|
|
|
|
{ |
379
|
0
|
|
|
|
|
0
|
my($format) = "%-10s %-5s %-20s %-6s %-30s %s \n"; |
380
|
|
|
|
|
|
|
|
381
|
0
|
|
|
|
|
0
|
print sprintf($format, ' Location', 'Width', 'Lexeme', 'Events', 'Names', 'Next few chars'); |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
|
385
|
1525
|
|
|
|
|
10412
|
my($ref_re) = \"$string_re"; # Use " in comment for UltraEdit. |
386
|
1525
|
|
|
|
|
3556
|
my($length) = length($string_re); |
387
|
|
|
|
|
|
|
|
388
|
1525
|
|
|
|
|
6968
|
my($child); |
389
|
|
|
|
|
|
|
my($event_name); |
390
|
1525
|
|
|
|
|
0
|
my($lexeme); |
391
|
1525
|
|
|
|
|
0
|
my($pos); |
392
|
1525
|
|
|
|
|
0
|
my($span, $start); |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
# We use read()/lexeme_read()/resume() because we pause at each lexeme. |
395
|
|
|
|
|
|
|
|
396
|
1525
|
|
|
|
|
20517
|
for |
397
|
|
|
|
|
|
|
( |
398
|
|
|
|
|
|
|
$pos = $self -> recce -> read($ref_re); |
399
|
|
|
|
|
|
|
($pos < $length); |
400
|
|
|
|
|
|
|
$pos = $self -> recce -> resume($pos) |
401
|
|
|
|
|
|
|
) |
402
|
|
|
|
|
|
|
{ |
403
|
14843
|
|
|
|
|
1621671
|
($start, $span) = $self -> recce -> pause_span; |
404
|
14843
|
|
|
|
|
165271
|
($event_name, $span, $pos) = $self -> _validate_event($ref_re, $start, $span, $pos,); |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
# If the input is exhausted, we exit immediately so we don't try to use |
407
|
|
|
|
|
|
|
# the values of $start, $span or $pos. They are ignored upon exit. |
408
|
|
|
|
|
|
|
|
409
|
14843
|
100
|
|
|
|
29192
|
last if ($event_name eq "'exhausted"); # Yes, it has a leading quote. |
410
|
|
|
|
|
|
|
|
411
|
14833
|
|
|
|
|
182145
|
$lexeme = $self -> recce -> literal($start, $span); |
412
|
14833
|
|
|
|
|
316321
|
$pos = $self -> recce -> lexeme_read($event_name); |
413
|
|
|
|
|
|
|
|
414
|
14833
|
50
|
|
|
|
774466
|
die "Marpa lexeme_read($event_name) rejected lexeme '$lexeme'\n" if (! defined $pos); |
415
|
|
|
|
|
|
|
|
416
|
14833
|
|
|
|
|
49941
|
$self -> _add_daughter($event_name, {text => $lexeme}); |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
|
419
|
1425
|
|
|
|
|
37934
|
my($message); |
420
|
|
|
|
|
|
|
|
421
|
1425
|
100
|
|
|
|
20564
|
if (my $status = $self -> recce -> ambiguous) |
|
|
100
|
|
|
|
|
|
422
|
|
|
|
|
|
|
{ |
423
|
551
|
|
|
|
|
10611213
|
my($terminals) = $self -> recce -> terminals_expected; |
424
|
551
|
100
|
|
|
|
20368
|
$terminals = ['(None)'] if ($#$terminals < 0); |
425
|
551
|
|
|
|
|
2639
|
$message = "Marpa warning. Parse ambiguous. Status: $status. Terminals expected: " . join(', ', @$terminals); |
426
|
|
|
|
|
|
|
} |
427
|
|
|
|
|
|
|
elsif ($self -> recce -> exhausted) |
428
|
|
|
|
|
|
|
{ |
429
|
|
|
|
|
|
|
# Special case. Sigh. I need to patch the BNF to do this. TODO. |
430
|
|
|
|
|
|
|
|
431
|
860
|
100
|
66
|
|
|
95026
|
if ( ($pos + 1 == $length) && (substr($string_re, $pos, 1) eq ')') ) |
432
|
|
|
|
|
|
|
{ |
433
|
8
|
|
|
|
|
35
|
$self -> _add_daughter('close_parenthesis', {text => ')'}); |
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
# See https://metacpan.org/pod/distribution/Marpa-R2/pod/Exhaustion.pod#Exhaustion |
437
|
|
|
|
|
|
|
# for why this code is exhaustion-loving. |
438
|
|
|
|
|
|
|
|
439
|
860
|
50
|
|
|
|
12047
|
$message = 'Marpa parse exhausted' if ($self -> verbose > 1); |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
|
442
|
1425
|
100
|
|
|
|
9414
|
if ($message) |
443
|
|
|
|
|
|
|
{ |
444
|
551
|
|
|
|
|
9483
|
$self -> warning_str($message); |
445
|
|
|
|
|
|
|
|
446
|
551
|
50
|
|
|
|
23470
|
print $message, "\n" if ($self -> verbose); |
447
|
|
|
|
|
|
|
} |
448
|
|
|
|
|
|
|
|
449
|
1425
|
50
|
|
|
|
20977
|
$self -> print_raw_tree if ($self -> verbose); |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
# Return a defined value for success and undef for failure. |
452
|
|
|
|
|
|
|
|
453
|
1425
|
|
|
|
|
22798
|
return $self -> recce -> value; |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
} # End of _process. |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
# ------------------------------------------------ |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
sub print_cooked_tree |
460
|
|
|
|
|
|
|
{ |
461
|
0
|
|
|
0
|
1
|
0
|
my($self) = @_; |
462
|
0
|
|
|
|
|
0
|
my($format) = "%-30s %3s %s \n"; |
463
|
|
|
|
|
|
|
|
464
|
0
|
|
|
|
|
0
|
print sprintf($format, 'Name', 'Uid', 'Text'); |
465
|
0
|
|
|
|
|
0
|
print sprintf($format, '----', '---', '----'); |
466
|
|
|
|
|
|
|
|
467
|
0
|
|
|
|
|
0
|
my($meta); |
468
|
|
|
|
|
|
|
|
469
|
0
|
|
|
|
|
0
|
for my $node ($self -> tree -> traverse) |
470
|
|
|
|
|
|
|
{ |
471
|
0
|
0
|
|
|
|
0
|
next if ($node -> is_root); |
472
|
|
|
|
|
|
|
|
473
|
0
|
|
|
|
|
0
|
$meta = $node -> meta; |
474
|
|
|
|
|
|
|
|
475
|
0
|
|
|
|
|
0
|
print sprintf($format, $node -> value, $$meta{uid}, $$meta{text}); |
476
|
|
|
|
|
|
|
} |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
} # End of print_cooked_tree. |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
# ------------------------------------------------ |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
sub print_raw_tree |
483
|
|
|
|
|
|
|
{ |
484
|
0
|
|
|
0
|
1
|
0
|
my($self) = @_; |
485
|
|
|
|
|
|
|
|
486
|
0
|
|
|
|
|
0
|
print map("$_\n", @{$self -> tree -> tree2string}); |
|
0
|
|
|
|
|
0
|
|
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
} # End of print_raw_tree. |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
# ------------------------------------------------ |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
sub reset |
493
|
|
|
|
|
|
|
{ |
494
|
1528
|
|
|
1528
|
1
|
574402
|
my($self) = @_; |
495
|
|
|
|
|
|
|
|
496
|
1528
|
|
|
|
|
6859
|
$self -> tree(Tree -> new('Root') ); |
497
|
1528
|
|
|
|
|
146397
|
$self -> tree -> meta({text => 'Root', uid => 0}); |
498
|
1528
|
|
|
|
|
44977
|
$self -> current_node($self -> tree); |
499
|
1528
|
|
|
|
|
115087
|
$self -> uid(0); |
500
|
1528
|
|
|
|
|
56407
|
$self -> warning_str(''); |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
} # End of reset. |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
# ------------------------------------------------ |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
sub search |
507
|
|
|
|
|
|
|
{ |
508
|
2
|
|
|
2
|
1
|
20
|
my($self, $target) = @_; |
509
|
|
|
|
|
|
|
|
510
|
2
|
50
|
|
|
|
8
|
die "Method search() takes a defined value as the parameter\n" if (! defined $target); |
511
|
|
|
|
|
|
|
|
512
|
2
|
|
|
|
|
6
|
my($re) = $self -> _string2re($target); |
513
|
|
|
|
|
|
|
|
514
|
2
|
|
|
|
|
32
|
my(@found); |
515
|
|
|
|
|
|
|
my($meta); |
516
|
|
|
|
|
|
|
|
517
|
2
|
|
|
|
|
40
|
for my $node ($self -> tree -> traverse) |
518
|
|
|
|
|
|
|
{ |
519
|
38
|
100
|
|
|
|
481
|
next if ($node -> is_root); |
520
|
|
|
|
|
|
|
|
521
|
36
|
|
|
|
|
201
|
$meta = $node -> meta; |
522
|
|
|
|
|
|
|
|
523
|
36
|
100
|
|
|
|
233
|
if ($$meta{text} =~ $re) |
524
|
|
|
|
|
|
|
{ |
525
|
4
|
|
|
|
|
9
|
push @found, $$meta{uid}; |
526
|
|
|
|
|
|
|
} |
527
|
|
|
|
|
|
|
} |
528
|
|
|
|
|
|
|
|
529
|
2
|
|
|
|
|
8
|
return [@found]; |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
} # End of search. |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
# ------------------------------------------------ |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
sub set |
536
|
|
|
|
|
|
|
{ |
537
|
1
|
|
|
1
|
1
|
274
|
my($self, %opts) = @_; |
538
|
|
|
|
|
|
|
|
539
|
1
|
|
|
|
|
17
|
for my $param (qw/text uid/) |
540
|
|
|
|
|
|
|
{ |
541
|
2
|
50
|
|
|
|
8
|
die "Method set() takes a hash with these keys: text, uid\n" if (! defined($opts{$param}) ); |
542
|
|
|
|
|
|
|
} |
543
|
|
|
|
|
|
|
|
544
|
1
|
|
|
|
|
2
|
my($meta); |
545
|
|
|
|
|
|
|
my($uid); |
546
|
|
|
|
|
|
|
|
547
|
1
|
|
|
|
|
22
|
for my $node ($self -> tree -> traverse) |
548
|
|
|
|
|
|
|
{ |
549
|
7
|
100
|
|
|
|
101
|
next if ($node -> is_root); |
550
|
|
|
|
|
|
|
|
551
|
6
|
|
|
|
|
38
|
$meta = $node -> meta; |
552
|
6
|
|
|
|
|
30
|
$uid = $$meta{uid}; |
553
|
|
|
|
|
|
|
|
554
|
6
|
100
|
|
|
|
15
|
if ($opts{uid} == $uid) |
555
|
|
|
|
|
|
|
{ |
556
|
1
|
|
|
|
|
2
|
$$meta{text} = $opts{text}; |
557
|
|
|
|
|
|
|
} |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
} # End of set. |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
# ------------------------------------------------ |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
sub _string2re |
565
|
|
|
|
|
|
|
{ |
566
|
1527
|
|
|
1527
|
|
3948
|
my($self, $raw_re) = @_; |
567
|
|
|
|
|
|
|
|
568
|
1527
|
|
|
|
|
2519
|
my($re); |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
try |
571
|
|
|
|
|
|
|
{ |
572
|
1527
|
100
|
|
1527
|
|
50741
|
$re = does($raw_re, 'Regexp') ? $raw_re : qr/$raw_re/; |
573
|
|
|
|
|
|
|
} |
574
|
|
|
|
|
|
|
catch |
575
|
|
|
|
|
|
|
{ |
576
|
0
|
|
|
0
|
|
0
|
my($message) = "Error: Perl cannot convert $raw_re into qr/.../ form"; |
577
|
|
|
|
|
|
|
|
578
|
0
|
0
|
|
|
|
0
|
print $message, "\n" if ($self -> verbose); |
579
|
|
|
|
|
|
|
|
580
|
0
|
|
|
|
|
0
|
die $message; |
581
|
1527
|
|
|
|
|
9300
|
}; |
582
|
|
|
|
|
|
|
|
583
|
1527
|
|
|
|
|
62317
|
return $re; |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
} # End of _string2re. |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
# ------------------------------------------------ |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
sub validate |
590
|
|
|
|
|
|
|
{ |
591
|
0
|
|
|
0
|
0
|
0
|
my($self) = @_; |
592
|
0
|
|
|
|
|
0
|
my($re) = $self -> as_string; |
593
|
|
|
|
|
|
|
|
594
|
0
|
|
|
|
|
0
|
my($result); |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
try |
597
|
|
|
|
|
|
|
{ |
598
|
0
|
0
|
|
0
|
|
0
|
$result = ('x' =~ $re) ? 0 : 0; # Use any test to force Perl to process the Regexp. |
599
|
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
catch |
601
|
|
|
|
|
|
|
{ |
602
|
0
|
|
|
0
|
|
0
|
$result = 1; # Failure. |
603
|
0
|
|
|
|
|
0
|
}; |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
# Return 0 for success and 1 for failure. |
606
|
|
|
|
|
|
|
|
607
|
0
|
|
|
|
|
0
|
return $result; |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
} # End of validate. |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
# ------------------------------------------------ |
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
sub _validate_event |
614
|
|
|
|
|
|
|
{ |
615
|
14843
|
|
|
14843
|
|
26465
|
my($self, $stringref, $start, $span, $pos) = @_; |
616
|
14843
|
|
|
|
|
20228
|
my(@event) = @{$self -> recce -> events}; |
|
14843
|
|
|
|
|
187679
|
|
617
|
14843
|
|
|
|
|
125406
|
my($event_count) = scalar @event; |
618
|
14843
|
|
|
|
|
25146
|
my(@event_names) = sort map{$$_[0]} @event; |
|
15027
|
|
|
|
|
40743
|
|
619
|
14843
|
|
|
|
|
24471
|
my($event_name) = $event_names[0]; # Default. |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
# Handle some special cases. |
622
|
|
|
|
|
|
|
|
623
|
14843
|
100
|
|
|
|
28755
|
if ($event_count > 1) |
624
|
|
|
|
|
|
|
{ |
625
|
184
|
|
|
|
|
523
|
my($event_list) = join(', ', @event_names); |
626
|
|
|
|
|
|
|
|
627
|
184
|
100
|
|
|
|
734
|
if ($event_list eq 'caret, string') |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
628
|
|
|
|
|
|
|
{ |
629
|
121
|
|
|
|
|
188
|
$event_count = 1; |
630
|
121
|
|
|
|
|
212
|
$event_name = 'caret'; |
631
|
121
|
|
|
|
|
267
|
@event_names = $event_name; |
632
|
121
|
|
|
|
|
212
|
$pos = $start; |
633
|
121
|
|
|
|
|
235
|
$span = 1; |
634
|
|
|
|
|
|
|
} |
635
|
|
|
|
|
|
|
elsif ($event_list eq 'query, string') |
636
|
|
|
|
|
|
|
{ |
637
|
23
|
|
|
|
|
44
|
$event_count = 1; |
638
|
23
|
|
|
|
|
62
|
$event_name = 'query'; |
639
|
23
|
|
|
|
|
50
|
@event_names = $event_name; |
640
|
23
|
|
|
|
|
34
|
$pos = $start; |
641
|
23
|
|
|
|
|
35
|
$span = 1; |
642
|
|
|
|
|
|
|
} |
643
|
|
|
|
|
|
|
elsif ($event_list eq 'string, vertical_bar') |
644
|
|
|
|
|
|
|
{ |
645
|
40
|
|
|
|
|
72
|
$event_count = 1; |
646
|
40
|
|
|
|
|
58
|
$event_name = 'vertical_bar'; |
647
|
40
|
|
|
|
|
74
|
@event_names = $event_name; |
648
|
40
|
|
|
|
|
58
|
$pos = $start; |
649
|
40
|
|
|
|
|
59
|
$span = 1; |
650
|
|
|
|
|
|
|
} |
651
|
|
|
|
|
|
|
else |
652
|
|
|
|
|
|
|
{ |
653
|
|
|
|
|
|
|
#$self -> print_cooked_tree; |
654
|
|
|
|
|
|
|
|
655
|
0
|
|
|
|
|
0
|
die "event_count: $event_count. " . $event_list; |
656
|
|
|
|
|
|
|
} |
657
|
|
|
|
|
|
|
} |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
# If the input is exhausted, we return immediately so we don't try to use |
660
|
|
|
|
|
|
|
# the values of $start, $span or $pos. They are ignored upon return. |
661
|
|
|
|
|
|
|
|
662
|
14843
|
100
|
|
|
|
26544
|
if ($event_name eq "'exhausted") # Yes, it has a leading quote. |
663
|
|
|
|
|
|
|
{ |
664
|
10
|
|
|
|
|
31
|
return ($event_name, $span, $pos); |
665
|
|
|
|
|
|
|
} |
666
|
|
|
|
|
|
|
|
667
|
14833
|
|
|
|
|
31949
|
my($lexeme) = substr($$stringref, $start, $span); |
668
|
14833
|
|
|
|
|
194927
|
my($line, $column) = $self -> recce -> line_column($start); |
669
|
14833
|
|
|
|
|
182210
|
my($literal) = $self -> _next_few_chars($stringref, $start + $span); |
670
|
14833
|
|
|
|
|
40832
|
my($message) = "Location: ($line, $column). Lexeme: $lexeme. Events: $event_count. Names: "; |
671
|
14833
|
|
|
|
|
27848
|
my($name_list) = join(', ', @event_names); |
672
|
14833
|
|
|
|
|
21081
|
$message .= ". Next few chars: $literal"; |
673
|
|
|
|
|
|
|
|
674
|
14833
|
50
|
|
|
|
206085
|
if ($self -> verbose > 1) |
675
|
|
|
|
|
|
|
{ |
676
|
0
|
|
|
|
|
0
|
my($format) = "%4d, %4d %5d %-20s %6d %-30s %s \n"; |
677
|
|
|
|
|
|
|
|
678
|
0
|
|
|
|
|
0
|
print sprintf($format, $line, $column, length($lexeme), $lexeme, $event_count, $name_list, $literal); |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
} |
681
|
|
|
|
|
|
|
|
682
|
14833
|
|
|
|
|
97421
|
return ($event_name, $span, $pos); |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
} # End of _validate_event. |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
# ------------------------------------------------ |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
1; |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
=pod |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
=head1 NAME |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
C - Parse a Perl regexp into a data structure of type L |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
Warning: Development version. See L for details. |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
=head1 Synopsis |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
=head2 Sample Code |
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
This is scripts/synopsis.pl: |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
#!/usr/bin/env perl |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
use v5.10; |
707
|
|
|
|
|
|
|
use strict; |
708
|
|
|
|
|
|
|
use warnings; |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
use Regexp::Parsertron; |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
# --------------------- |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
my($re) = qr/Perl|JavaScript/i; |
715
|
|
|
|
|
|
|
my($parser) = Regexp::Parsertron -> new(verbose => 1); |
716
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
# Return 0 for success and 1 for failure. |
718
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
my($result) = $parser -> parse(re => $re); |
720
|
|
|
|
|
|
|
my($node_id) = 5; # Obtained from displaying and inspecting the tree. |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
print "Calling append(text => '|C++', uid => $node_id) \n"; |
723
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
$parser -> append(text => '|C++', uid => $node_id); |
725
|
|
|
|
|
|
|
$parser -> print_raw_tree; |
726
|
|
|
|
|
|
|
$parser -> print_cooked_tree; |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
my($as_string) = $parser -> as_string; |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
print "Original: $re. Result: $result (0 is success) \n"; |
731
|
|
|
|
|
|
|
print "as_string(): $as_string \n"; |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
$result = $parser -> validate; |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
print "validate(): Result: $result (0 is success) \n"; |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
# Return 0 for success and 1 for failure. |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
$parser -> reset; |
740
|
|
|
|
|
|
|
$parser -> verbose(0); |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
$re = qr/Perl|JavaScript|(?:Flub|BCPL)/i; |
743
|
|
|
|
|
|
|
$result = $parser -> parse(re => $re); |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
print "\nAdd complexity to the regexp by parsing a new regexp \n"; |
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
$parser -> print_raw_tree; |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
And its output: |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
Test count: 1. Parsing (in qr/.../ form): '(?^i:Perl|JavaScript)'. |
752
|
|
|
|
|
|
|
Root. Attributes: {text => "Root", uid => "0"} |
753
|
|
|
|
|
|
|
|--- open_parenthesis. Attributes: {text => "(", uid => "1"} |
754
|
|
|
|
|
|
|
| |--- query_caret. Attributes: {text => "?^", uid => "2"} |
755
|
|
|
|
|
|
|
| |--- flag_set. Attributes: {text => "i", uid => "3"} |
756
|
|
|
|
|
|
|
| |--- colon. Attributes: {text => ":", uid => "4"} |
757
|
|
|
|
|
|
|
| |--- string. Attributes: {text => "Perl|JavaScript", uid => "5"} |
758
|
|
|
|
|
|
|
|--- close_parenthesis. Attributes: {text => ")", uid => "6"} |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
Calling append(text => '|C++', uid => 5) |
761
|
|
|
|
|
|
|
Root. Attributes: {text => "Root", uid => "0"} |
762
|
|
|
|
|
|
|
|--- open_parenthesis. Attributes: {text => "(", uid => "1"} |
763
|
|
|
|
|
|
|
| |--- query_caret. Attributes: {text => "?^", uid => "2"} |
764
|
|
|
|
|
|
|
| |--- flag_set. Attributes: {text => "i", uid => "3"} |
765
|
|
|
|
|
|
|
| |--- colon. Attributes: {text => ":", uid => "4"} |
766
|
|
|
|
|
|
|
| |--- string. Attributes: {text => "Perl|JavaScript|C++", uid => "5"} |
767
|
|
|
|
|
|
|
|--- close_parenthesis. Attributes: {text => ")", uid => "6"} |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
Name Uid Text |
770
|
|
|
|
|
|
|
---- --- ---- |
771
|
|
|
|
|
|
|
open_parenthesis 1 ( |
772
|
|
|
|
|
|
|
query_caret 2 ?^ |
773
|
|
|
|
|
|
|
flag_set 3 i |
774
|
|
|
|
|
|
|
colon 4 : |
775
|
|
|
|
|
|
|
string 5 Perl|JavaScript|C++ |
776
|
|
|
|
|
|
|
close_parenthesis 6 ) |
777
|
|
|
|
|
|
|
Original: (?^i:Perl|JavaScript). Result: 0 (0 is success) |
778
|
|
|
|
|
|
|
as_string(): (?^i:Perl|JavaScript|C++) |
779
|
|
|
|
|
|
|
validate(): Result: 0 (0 is success) |
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
Adding complexity to the regexp by parsing a new regexp: |
782
|
|
|
|
|
|
|
Root. Attributes: {text => "Root", uid => "0"} |
783
|
|
|
|
|
|
|
|--- open_parenthesis. Attributes: {text => "(", uid => "1"} |
784
|
|
|
|
|
|
|
| |--- query_caret. Attributes: {text => "?^", uid => "2"} |
785
|
|
|
|
|
|
|
| |--- flag_set. Attributes: {text => "i", uid => "3"} |
786
|
|
|
|
|
|
|
| |--- colon. Attributes: {text => ":", uid => "4"} |
787
|
|
|
|
|
|
|
| |--- string. Attributes: {text => "Perl|JavaScript|", uid => "5"} |
788
|
|
|
|
|
|
|
| |--- colon_prefix. Attributes: {text => "(?:", uid => "6"} |
789
|
|
|
|
|
|
|
| | |--- string. Attributes: {text => "Flub|BCPL", uid => "7"} |
790
|
|
|
|
|
|
|
| |--- close_parenthesis. Attributes: {text => ")", uid => "8"} |
791
|
|
|
|
|
|
|
|--- close_parenthesis. Attributes: {text => ")", uid => "9"} |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
Note: The 1st tree is printed due to verbose => 1 in the call to L, while the 2nd |
795
|
|
|
|
|
|
|
is due to the call to L. The columnar output is due to the call to |
796
|
|
|
|
|
|
|
L. |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
=head2 Tutorial |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
=over 4 |
801
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
=item o Start with a simple program and a simple regexp |
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
This code, scripts/tutorial.pl, is a cut-down version of scripts/synopsis.pl: |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
#!/usr/bin/env perl |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
use v5.10; |
809
|
|
|
|
|
|
|
use strict; |
810
|
|
|
|
|
|
|
use warnings; |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
use Regexp::Parsertron; |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
# --------------------- |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
my($re) = qr/Perl|JavaScript/i; |
817
|
|
|
|
|
|
|
my($parser) = Regexp::Parsertron -> new(verbose => 1); |
818
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
# Return 0 for success and 1 for failure. |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
my($result) = $parser -> parse(re => $re); |
822
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
print "Original: $re. Result: $result. (0 is success) \n"; |
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
Running it outputs: |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
Test count: 1. Parsing (in qr/.../ form): '(?^i:Perl|JavaScript)'. |
828
|
|
|
|
|
|
|
Root. Attributes: {text => "Root", uid => "0"} |
829
|
|
|
|
|
|
|
|--- open_parenthesis. Attributes: {text => "(", uid => "1"} |
830
|
|
|
|
|
|
|
| |--- query_caret. Attributes: {text => "?^", uid => "2"} |
831
|
|
|
|
|
|
|
| |--- flag_set. Attributes: {text => "i", uid => "3"} |
832
|
|
|
|
|
|
|
| |--- colon. Attributes: {text => ":", uid => "4"} |
833
|
|
|
|
|
|
|
| |--- string. Attributes: {text => "Perl|JavaScript", uid => "5"} |
834
|
|
|
|
|
|
|
|--- close_parenthesis. Attributes: {text => ")", uid => "6"} |
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
Original: (?^i:Perl|JavaScript). Result: 0. (0 is success) |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
=item o Examine the tree and determine which nodes you wish to edit |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
The nodes are uniquely identified by their uids. |
841
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
=item o Proceed as does scripts/synopsis.pl |
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
Add these lines to the end of the tutorial code, and re-run: |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
my($node_id) = 5; # Obtained from displaying and inspecting the tree. |
847
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
$parser -> append(text => '|C++', uid => $node_id); |
849
|
|
|
|
|
|
|
$parser -> print_raw_tree; |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
The extra output, showing the change to node uid == 5, is: |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
Root. Attributes: {text => "Root", uid => "0"} |
854
|
|
|
|
|
|
|
|--- open_parenthesis. Attributes: {text => "(", uid => "1"} |
855
|
|
|
|
|
|
|
| |--- query_caret. Attributes: {text => "?^", uid => "2"} |
856
|
|
|
|
|
|
|
| |--- flag_set. Attributes: {text => "i", uid => "3"} |
857
|
|
|
|
|
|
|
| |--- colon. Attributes: {text => ":", uid => "4"} |
858
|
|
|
|
|
|
|
| |--- string. Attributes: {text => "Perl|JavaScript|C++", uid => "5"} |
859
|
|
|
|
|
|
|
|--- close_parenthesis. Attributes: {text => ")", uid => "6"} |
860
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
=item o Test also with L and L |
862
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
See t/get.set.t for sample code. |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
=item o Since everything works, make a cup of tea |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
=back |
868
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
=head2 The Edit Methods |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
The I simply means any one or more of these methods, which can all change the text of |
872
|
|
|
|
|
|
|
a node: |
873
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
=over 4 |
875
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
=item o L |
877
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
=item o L |
879
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
=item o L |
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
=back |
883
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
The edit methods are exercised in t/get.set.t, as well as scripts/synopsis.pl (above). |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
=head1 Description |
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
Parses a regexp into a tree object managed by the L module, and provides various methods for |
889
|
|
|
|
|
|
|
updating and retrieving that tree's contents. |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
This module uses L and L. |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
=head1 Distributions |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
This module is available as a Unix-style distro (*.tgz). |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
See L |
898
|
|
|
|
|
|
|
for help on unpacking and installing distros. |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
=head1 Installation |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
Install C as you would any C module: |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
Run: |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
cpanm Regexp::Parsertron |
907
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
or run: |
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
sudo cpan Regexp::Parsertron |
911
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
or unpack the distro, and then use: |
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
perl Makefile.PL |
915
|
|
|
|
|
|
|
make (or dmake or nmake) |
916
|
|
|
|
|
|
|
make test |
917
|
|
|
|
|
|
|
make install |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
=head1 Constructor and Initialization |
920
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
C is called as C<< my($parser) = Regexp::Parsertron -> new(k1 => v1, k2 => v2, ...) >>. |
922
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
It returns a new object of type C. |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
Key-value pairs accepted in the parameter list (see corresponding methods for details |
926
|
|
|
|
|
|
|
[e.g. L]): |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
=over 4 |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
=item o re => $regexp |
931
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
The C method of L is called to see what C is. If it's already of the |
933
|
|
|
|
|
|
|
form C, then it's processed as is, but if it's not, then it's transformed using C. |
934
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
Warning: Currently, the input is expected to have been pre-processed by Perl via qr/$regexp/. |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
Default: ''. |
938
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
=item o verbose => $integer |
940
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
Takes values 0, 1 or 2, which print more and more progress reports. |
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
Used for debugging. |
944
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
Default: 0 (print nothing). |
946
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
=back |
948
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
=head1 Methods |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
=head2 append(%opts) |
952
|
|
|
|
|
|
|
|
953
|
|
|
|
|
|
|
Append some text to the text of a node. |
954
|
|
|
|
|
|
|
|
955
|
|
|
|
|
|
|
%opts is a hash with these (key => value) pairs: |
956
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
=over 4 |
958
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
=item o text => $string |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
The text to append. |
962
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
=item o uid => $uid |
964
|
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
The uid of the node to update. |
966
|
|
|
|
|
|
|
|
967
|
|
|
|
|
|
|
=back |
968
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
The code calls C if %opts does not have these 2 keys, or if either value is undef. |
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
See scripts/synopsis.pl for sample code. |
972
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
Note: Calling C never changes the uids of nodes, so repeated calling of C with |
974
|
|
|
|
|
|
|
the same C will apply more and more updates to the same node. |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
See also L, L and t/get.set.t. |
977
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
=head2 as_string() |
979
|
|
|
|
|
|
|
|
980
|
|
|
|
|
|
|
Returns the parsed regexp as a string. The string contains all edits applied with |
981
|
|
|
|
|
|
|
L. |
982
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
=head2 find($target) |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
Returns an arrayref of node uids whose text contains the given string. |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
If the arrayref is empty, there were no matches. |
988
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
The Perl function C is used here to test for $target being a substring of the text |
990
|
|
|
|
|
|
|
associated with each node. |
991
|
|
|
|
|
|
|
|
992
|
|
|
|
|
|
|
The code calls C if $target is undef. |
993
|
|
|
|
|
|
|
|
994
|
|
|
|
|
|
|
See t/get.set.t for sample usage of C. |
995
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
See L for a regexp-based test. See also L. |
997
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
=head2 get($uid) |
999
|
|
|
|
|
|
|
|
1000
|
|
|
|
|
|
|
Get the text of the node with the given $uid. |
1001
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
The code calls C if $uid is undef, or outside the range 1 .. $self -> uid. The latter value |
1003
|
|
|
|
|
|
|
is the highest uid so far assigned to any node. |
1004
|
|
|
|
|
|
|
|
1005
|
|
|
|
|
|
|
Returns undef if the given $uid is not found. |
1006
|
|
|
|
|
|
|
|
1007
|
|
|
|
|
|
|
See also L. |
1008
|
|
|
|
|
|
|
|
1009
|
|
|
|
|
|
|
=head2 new([%opts]) |
1010
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
Here, '[]' indicate an optional parameter. |
1012
|
|
|
|
|
|
|
|
1013
|
|
|
|
|
|
|
See L for details on the parameters accepted by L. |
1014
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
=head2 parse([%opts]) |
1016
|
|
|
|
|
|
|
|
1017
|
|
|
|
|
|
|
Here, '[]' indicate an optional parameter. |
1018
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
Parses the regexp supplied with the parameter C in the call to L or in the call to |
1020
|
|
|
|
|
|
|
L, or in the call to C<< parse(re => $regexp) >> itself. The latter takes precedence. |
1021
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
The hash C<%opts> takes the same (key => value) pairs as L does. |
1023
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
See L for details. |
1025
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
=head2 prepend(%opts) |
1027
|
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
Prepend some text to the text of a node. |
1029
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
%opts is a hash with these (key => value) pairs: |
1031
|
|
|
|
|
|
|
|
1032
|
|
|
|
|
|
|
=over 4 |
1033
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
=item o text => $string |
1035
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
The text to prepend. |
1037
|
|
|
|
|
|
|
|
1038
|
|
|
|
|
|
|
=item o uid => $uid |
1039
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
The uid of the node to update. |
1041
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
=back |
1043
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
The code calls C if %opts does not have these 2 keys, or if either value is undef. |
1045
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
Note: Calling C never changes the uids of nodes, so repeated calling of C with |
1047
|
|
|
|
|
|
|
the same C will apply more and more updates to the same node. |
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
See also L, L, and t/get.set.t. |
1050
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
=head2 print_cooked_tree() |
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
Prints, in a pretty format, the tree built from parsing. |
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
See the for sample output. |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
See also L. |
1058
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
=head2 print_raw_tree() |
1060
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
Prints, in a simple format, the tree built from parsing. |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
See the for sample output. |
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
See also L. |
1066
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
=head2 re([$regexp]) |
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
Here, '[]' indicate an optional parameter. |
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
Gets or sets the regexp to be processed. |
1072
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
Note: C is a parameter to L. |
1074
|
|
|
|
|
|
|
|
1075
|
|
|
|
|
|
|
=head2 reset() |
1076
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
Resets various internal things, except test_count. |
1078
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
Used basically for debugging. |
1080
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
=head2 search($target) |
1082
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
Returns an arrayref of node uids whose text contains the given string. |
1084
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
If the arrayref is empty, there were no matches. |
1086
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
$target is converted to a regexp if a simple string is passed in. |
1088
|
|
|
|
|
|
|
|
1089
|
|
|
|
|
|
|
The code calls C if $target is undef. |
1090
|
|
|
|
|
|
|
|
1091
|
|
|
|
|
|
|
See t/search.t for sample usage of C. |
1092
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
See L for a non-regexp search. See also L. |
1094
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
=head2 set(%opts) |
1096
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
Set the text of a node to $opt{text}. |
1098
|
|
|
|
|
|
|
|
1099
|
|
|
|
|
|
|
%opts is a hash with these (key => value) pairs: |
1100
|
|
|
|
|
|
|
|
1101
|
|
|
|
|
|
|
=over 4 |
1102
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
=item o text => $string |
1104
|
|
|
|
|
|
|
|
1105
|
|
|
|
|
|
|
The text to use to overwrite the text of the node. |
1106
|
|
|
|
|
|
|
|
1107
|
|
|
|
|
|
|
=item o uid => $uid |
1108
|
|
|
|
|
|
|
|
1109
|
|
|
|
|
|
|
The uid of the node to update. |
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
=back |
1112
|
|
|
|
|
|
|
|
1113
|
|
|
|
|
|
|
The code calls C if %opts does not have these 2 keys, or if either value is undef. |
1114
|
|
|
|
|
|
|
|
1115
|
|
|
|
|
|
|
See also L and L. |
1116
|
|
|
|
|
|
|
|
1117
|
|
|
|
|
|
|
=head2 tree() |
1118
|
|
|
|
|
|
|
|
1119
|
|
|
|
|
|
|
Returns an object of type L. Ignore the root node. |
1120
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
Each node's C method returns a hashref of information about the node. See the |
1122
|
|
|
|
|
|
|
L for details. |
1123
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
See also the source code for L and L for ideas on how to |
1125
|
|
|
|
|
|
|
use this object. |
1126
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
=head2 uid() |
1128
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
Returns the last-used uid. |
1130
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
Each node in the tree is given a uid, which allows methods like L to work. |
1132
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
=head2 verbose([$integer]) |
1134
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
Here, '[]' indicate an optional parameter. |
1136
|
|
|
|
|
|
|
|
1137
|
|
|
|
|
|
|
Gets or sets the verbosity level, within the range 0 .. 2. Higher numbers print more progress |
1138
|
|
|
|
|
|
|
reports. |
1139
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
Used basically for debugging. |
1141
|
|
|
|
|
|
|
|
1142
|
|
|
|
|
|
|
Note: C is a parameter to L. |
1143
|
|
|
|
|
|
|
|
1144
|
|
|
|
|
|
|
=head2 warning_str() |
1145
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
Returns the last Marpa warning. |
1147
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
In short, Marpa will always report 'Marpa parse exhausted' in warning_str() if the parse is not |
1149
|
|
|
|
|
|
|
ambiguous, but do not worry - I. |
1150
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
See L and |
1152
|
|
|
|
|
|
|
L. |
1153
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
=head1 FAQ |
1155
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
=head2 Can I add a subtree to the tree? |
1157
|
|
|
|
|
|
|
|
1158
|
|
|
|
|
|
|
Not yet. |
1159
|
|
|
|
|
|
|
|
1160
|
|
|
|
|
|
|
There is a private method, C<_add_daughter()>, which I could make public, if I felt it was safe to |
1161
|
|
|
|
|
|
|
do so. |
1162
|
|
|
|
|
|
|
|
1163
|
|
|
|
|
|
|
=head2 Why does the BNF not accept an empty regexp? |
1164
|
|
|
|
|
|
|
|
1165
|
|
|
|
|
|
|
Simple answer: Changing the BNF to handle this creates a massive problem elsewhere in the BNF. |
1166
|
|
|
|
|
|
|
|
1167
|
|
|
|
|
|
|
Complex answer: |
1168
|
|
|
|
|
|
|
|
1169
|
|
|
|
|
|
|
The BNF contains this countable rule to allow patterns to be juxtaposed without '|', say, to |
1170
|
|
|
|
|
|
|
separate them: |
1171
|
|
|
|
|
|
|
|
1172
|
|
|
|
|
|
|
global_sequence ::= pattern_type+ |
1173
|
|
|
|
|
|
|
|
1174
|
|
|
|
|
|
|
And in turn (further toward the leaves of the tree of BNF), I then use: |
1175
|
|
|
|
|
|
|
|
1176
|
|
|
|
|
|
|
pattern_sequence ::= pattern_set+ |
1177
|
|
|
|
|
|
|
|
1178
|
|
|
|
|
|
|
To allow an empty regexp would mean changing this rule to: |
1179
|
|
|
|
|
|
|
|
1180
|
|
|
|
|
|
|
pattern_sequence ::= pattern_set* |
1181
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
But that makes this rule nullable, and Marpa rejects the C rule on the grounds that |
1183
|
|
|
|
|
|
|
a countable rule is not allowed to be nullable. ATM I cannot see a way of |
1184
|
|
|
|
|
|
|
rewriting the rules to avoid this problem. But I'm hopeful such a rewrite is possible. |
1185
|
|
|
|
|
|
|
|
1186
|
|
|
|
|
|
|
=head2 Why does the code sometimes not store '|' - as in qr/(Perl|JavaScript/) - in its own node? |
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
It could be done by, for example, splitting such a string into three nodes, 'Perl', '|', |
1189
|
|
|
|
|
|
|
'Javascript'. But does that offer any benefit? |
1190
|
|
|
|
|
|
|
|
1191
|
|
|
|
|
|
|
It makes processing by the user more complex because then if they wish to edit the list of |
1192
|
|
|
|
|
|
|
alternatives, they might have to edit two or three nodes instead of one. Here, editing means perhaps |
1193
|
|
|
|
|
|
|
replacing any existing string with the empty string. |
1194
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
Further, to extend the list of alternatives, the user will be confused by not being sure if they |
1196
|
|
|
|
|
|
|
should change 'Javascript' to 'Javascript|C' or if they have to add two nodes, containing '|' and |
1197
|
|
|
|
|
|
|
'C'. And ATM adding nodes is contraindicated! |
1198
|
|
|
|
|
|
|
|
1199
|
|
|
|
|
|
|
Despite this, when the input stream triggers two events, C and C, |
1200
|
|
|
|
|
|
|
simultaneously because the '|' is at the start of a string, special code in the private method |
1201
|
|
|
|
|
|
|
C<_validate_event()> does put '|' in its own node. IOW the BNF does not do the work, which is really |
1202
|
|
|
|
|
|
|
what I would prefer. |
1203
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
=head2 Does this module ever use \Q...\E to quote regexp metacharacters? |
1205
|
|
|
|
|
|
|
|
1206
|
|
|
|
|
|
|
No. |
1207
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
=head2 What is the format of the nodes in the tree built by this module? |
1209
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
Each node's C is the name of the Marpa-style event which was triggered by detection of |
1211
|
|
|
|
|
|
|
some C within the regexp. |
1212
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
Each node's C method returns a hashref with these (key => value) pairs: |
1214
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
=over 4 |
1216
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
=item o text => $string |
1218
|
|
|
|
|
|
|
|
1219
|
|
|
|
|
|
|
This is the text within the regexp which triggered the event just mentioned. |
1220
|
|
|
|
|
|
|
|
1221
|
|
|
|
|
|
|
=item o uid => $integer |
1222
|
|
|
|
|
|
|
|
1223
|
|
|
|
|
|
|
This is the unique id of the 'current' node. |
1224
|
|
|
|
|
|
|
|
1225
|
|
|
|
|
|
|
This C is often used by you to specify which node to work on. |
1226
|
|
|
|
|
|
|
|
1227
|
|
|
|
|
|
|
See t/get.set.t and t/simple.t for sample code. |
1228
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
The code never changes the uid of a node. |
1230
|
|
|
|
|
|
|
|
1231
|
|
|
|
|
|
|
=back |
1232
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
See also the source code for L and L for ideas on how to |
1234
|
|
|
|
|
|
|
use the tree. |
1235
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
See the L for sample code and a report after parsing a tiny regexp. |
1237
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
=head2 Does the root node in the tree ever hold useful information? |
1239
|
|
|
|
|
|
|
|
1240
|
|
|
|
|
|
|
No. Always ignore it. |
1241
|
|
|
|
|
|
|
|
1242
|
|
|
|
|
|
|
=head2 Why does the BNF never use the lexeme adverb C? |
1243
|
|
|
|
|
|
|
|
1244
|
|
|
|
|
|
|
Because with Marpa::R2 the priority is only used when lexemes are the same length. |
1245
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
L. |
1247
|
|
|
|
|
|
|
|
1248
|
|
|
|
|
|
|
=head2 Does this module interpret regexps in any way? |
1249
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
No. You have to run your own Perl code to do that. This module just parses them into a data |
1251
|
|
|
|
|
|
|
structure. |
1252
|
|
|
|
|
|
|
|
1253
|
|
|
|
|
|
|
And that really means this module does not match the regexp against anything. If I appear to do that |
1254
|
|
|
|
|
|
|
while debugging new code, you can't rely on that appearing in production versions of the module. |
1255
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
=head2 Does this module rewrite regexps? |
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
No, unless you call one of L. |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
=head2 Does this module handle both Perl 5 and Perl 6? |
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
No. It will only handle Perl 5 syntax. |
1263
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
=head2 Does this module handle regexps for various versions of Perl5? |
1265
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
Not yet. Version-dependent regexp syntax will be supported for recent versions of Perl. This is |
1267
|
|
|
|
|
|
|
done by having tokens within the BNF which are replaced at start-up time with version-dependent |
1268
|
|
|
|
|
|
|
details. |
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
There are no such tokens at the moment. |
1271
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
All debugging is done assuming the regexp syntax as documented online. See L for the |
1273
|
|
|
|
|
|
|
urls in question. |
1274
|
|
|
|
|
|
|
|
1275
|
|
|
|
|
|
|
=head2 So which version of Perl is supported? |
1276
|
|
|
|
|
|
|
|
1277
|
|
|
|
|
|
|
The code is expected to work for Perls back to V 5.14.0, which is when stringification of regexps |
1278
|
|
|
|
|
|
|
changed. See L below for more. |
1279
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
I'm (2018-01-14) using Perl V 5.20.2 and making the BNF match the Perl regexp docs listed in |
1281
|
|
|
|
|
|
|
L below. |
1282
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
The program t/perl-5.21.11.t reads the file 'xt/author/re_tests' which I copied from the source code |
1284
|
|
|
|
|
|
|
of Perl V 5.21.11. This test is the one which currently provides 858 passing tests out of the 1027 |
1285
|
|
|
|
|
|
|
tests which pass for me using prove -lv t. |
1286
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
=head2 Could Perl and this module generate different parses of the same regexp? |
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
Absolutely! There is no escape from this fact simply because the code used in each program bears no |
1290
|
|
|
|
|
|
|
relationship to the code in the other one. |
1291
|
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
|
The real question is: How do we make the code in each program accept and reject exactly the same |
1293
|
|
|
|
|
|
|
regexps as the code in the other program. I think trial-and-error is all we have available to us for |
1294
|
|
|
|
|
|
|
dealing with this issue. |
1295
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
=head2 After calling parse(), warning_str() contains the string '... Parse ambiguous ...' |
1297
|
|
|
|
|
|
|
|
1298
|
|
|
|
|
|
|
This is almost certainly a error with the BNF, although of course it may be an error will an |
1299
|
|
|
|
|
|
|
exceptionally-badly formed regexp. |
1300
|
|
|
|
|
|
|
|
1301
|
|
|
|
|
|
|
Report it via L, and please |
1302
|
|
|
|
|
|
|
include the regexp in the report. Thanx! |
1303
|
|
|
|
|
|
|
|
1304
|
|
|
|
|
|
|
=head2 Is this a (Marpa) exhaustion-hating or exhaustion-loving app? |
1305
|
|
|
|
|
|
|
|
1306
|
|
|
|
|
|
|
Exhaustion-loving. |
1307
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
See L |
1309
|
|
|
|
|
|
|
|
1310
|
|
|
|
|
|
|
=head2 Will this code be modified to run under Marpa::R3 when the latter is stable? |
1311
|
|
|
|
|
|
|
|
1312
|
|
|
|
|
|
|
Yes. |
1313
|
|
|
|
|
|
|
|
1314
|
|
|
|
|
|
|
=head2 What is the purpose of this module? |
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
=over 4 |
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
=item o To provide a stand-alone parser for regexps |
1319
|
|
|
|
|
|
|
|
1320
|
|
|
|
|
|
|
=item o To help me learn more about regexps |
1321
|
|
|
|
|
|
|
|
1322
|
|
|
|
|
|
|
=item o To become, I hope, a replacement for the horrendously complex L |
1323
|
|
|
|
|
|
|
|
1324
|
|
|
|
|
|
|
=back |
1325
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
=head2 Who crafted the BNF? |
1327
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
I did. |
1329
|
|
|
|
|
|
|
|
1330
|
|
|
|
|
|
|
=head1 Scripts |
1331
|
|
|
|
|
|
|
|
1332
|
|
|
|
|
|
|
This diagram indicates the flow of logic from script to script: |
1333
|
|
|
|
|
|
|
|
1334
|
|
|
|
|
|
|
xt/author/re_tests |
1335
|
|
|
|
|
|
|
| |
1336
|
|
|
|
|
|
|
V |
1337
|
|
|
|
|
|
|
xt/author/generate.tests.pl |
1338
|
|
|
|
|
|
|
| |
1339
|
|
|
|
|
|
|
V |
1340
|
|
|
|
|
|
|
xt/authors/perl-5.21.11.tests |
1341
|
|
|
|
|
|
|
| |
1342
|
|
|
|
|
|
|
V |
1343
|
|
|
|
|
|
|
perl -Ilib t/perl-5.21.11.t > xt/author/perl-5.21.11.log 2>&1 |
1344
|
|
|
|
|
|
|
|
1345
|
|
|
|
|
|
|
If xt/author/perl-5.21.11.log only contains lines starting with 'ok', then all Perl and Marpa |
1346
|
|
|
|
|
|
|
errors have been hidden, so t/perl-5.21.11.t is ready to live in t/. Before that time it lives in |
1347
|
|
|
|
|
|
|
xt/author/. |
1348
|
|
|
|
|
|
|
|
1349
|
|
|
|
|
|
|
=head1 TODO |
1350
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
=over 4 |
1352
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
=item o How to best define 'code' in the BNF. |
1354
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
=item o I could traverse the tree and store a pointer to each node in an array |
1356
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
This would mean fast access to nodes in random order. But is there any point? Yes, it would speed up |
1358
|
|
|
|
|
|
|
various methods. Specifically, any module which calls C on the tree object would |
1359
|
|
|
|
|
|
|
benefit. |
1360
|
|
|
|
|
|
|
|
1361
|
|
|
|
|
|
|
=item o Allow users to add nodes and hence subtrees to the tree |
1362
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
=back |
1364
|
|
|
|
|
|
|
|
1365
|
|
|
|
|
|
|
=head1 References |
1366
|
|
|
|
|
|
|
|
1367
|
|
|
|
|
|
|
L. PCRE - Perl Compatible Regular Expressions. |
1368
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
L. This is the definitive document. |
1370
|
|
|
|
|
|
|
|
1371
|
|
|
|
|
|
|
L. |
1372
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
L. Samples with commentary. |
1374
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
L |
1376
|
|
|
|
|
|
|
|
1377
|
|
|
|
|
|
|
L |
1378
|
|
|
|
|
|
|
|
1379
|
|
|
|
|
|
|
L |
1380
|
|
|
|
|
|
|
|
1381
|
|
|
|
|
|
|
L. This is when stringification |
1382
|
|
|
|
|
|
|
changed to return (?^...) rather than (?-xism...). |
1383
|
|
|
|
|
|
|
|
1384
|
|
|
|
|
|
|
L |
1385
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
L. Regular Expression |
1387
|
|
|
|
|
|
|
Inconsistencies With Unicode. |
1388
|
|
|
|
|
|
|
|
1389
|
|
|
|
|
|
|
L |
1390
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
L |
1392
|
|
|
|
|
|
|
|
1393
|
|
|
|
|
|
|
L |
1394
|
|
|
|
|
|
|
|
1395
|
|
|
|
|
|
|
=head1 See Also |
1396
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
L |
1398
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
L |
1400
|
|
|
|
|
|
|
|
1401
|
|
|
|
|
|
|
L |
1402
|
|
|
|
|
|
|
|
1403
|
|
|
|
|
|
|
L |
1404
|
|
|
|
|
|
|
|
1405
|
|
|
|
|
|
|
L |
1406
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
L |
1408
|
|
|
|
|
|
|
|
1409
|
|
|
|
|
|
|
L |
1410
|
|
|
|
|
|
|
|
1411
|
|
|
|
|
|
|
L |
1412
|
|
|
|
|
|
|
|
1413
|
|
|
|
|
|
|
L |
1414
|
|
|
|
|
|
|
|
1415
|
|
|
|
|
|
|
L. This is vaguely a version of L. |
1416
|
|
|
|
|
|
|
|
1417
|
|
|
|
|
|
|
L |
1418
|
|
|
|
|
|
|
|
1419
|
|
|
|
|
|
|
L |
1420
|
|
|
|
|
|
|
|
1421
|
|
|
|
|
|
|
And many others... |
1422
|
|
|
|
|
|
|
|
1423
|
|
|
|
|
|
|
=head1 Machine-Readable Change Log |
1424
|
|
|
|
|
|
|
|
1425
|
|
|
|
|
|
|
The file Changes was converted into Changelog.ini by L. |
1426
|
|
|
|
|
|
|
|
1427
|
|
|
|
|
|
|
=head1 Version Numbers |
1428
|
|
|
|
|
|
|
|
1429
|
|
|
|
|
|
|
Version numbers < 1.00 represent development versions. From 1.00 up, they are production versions. |
1430
|
|
|
|
|
|
|
|
1431
|
|
|
|
|
|
|
=head1 CPAN Tester Results |
1432
|
|
|
|
|
|
|
|
1433
|
|
|
|
|
|
|
L |
1434
|
|
|
|
|
|
|
|
1435
|
|
|
|
|
|
|
=head1 Repository |
1436
|
|
|
|
|
|
|
|
1437
|
|
|
|
|
|
|
L |
1438
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
=head1 Support |
1440
|
|
|
|
|
|
|
|
1441
|
|
|
|
|
|
|
Email the author, or log a bug on RT: |
1442
|
|
|
|
|
|
|
|
1443
|
|
|
|
|
|
|
L. |
1444
|
|
|
|
|
|
|
|
1445
|
|
|
|
|
|
|
=head1 Author |
1446
|
|
|
|
|
|
|
|
1447
|
|
|
|
|
|
|
L was written by Ron Savage Iron@savage.net.auE> in 2011. |
1448
|
|
|
|
|
|
|
|
1449
|
|
|
|
|
|
|
Marpa's homepage: L. |
1450
|
|
|
|
|
|
|
|
1451
|
|
|
|
|
|
|
L. |
1452
|
|
|
|
|
|
|
|
1453
|
|
|
|
|
|
|
=head1 Copyright |
1454
|
|
|
|
|
|
|
|
1455
|
|
|
|
|
|
|
Australian copyright (c) 2016, Ron Savage. |
1456
|
|
|
|
|
|
|
|
1457
|
|
|
|
|
|
|
All Programs of mine are 'OSI Certified Open Source Software'; |
1458
|
|
|
|
|
|
|
you can redistribute them and/or modify them under the terms of |
1459
|
|
|
|
|
|
|
The Artistic License 2.0, a copy of which is available at: |
1460
|
|
|
|
|
|
|
http://opensource.org/licenses/alphabetical. |
1461
|
|
|
|
|
|
|
|
1462
|
|
|
|
|
|
|
=cut |
1463
|
|
|
|
|
|
|
|
1464
|
|
|
|
|
|
|
__DATA__ |