line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Perl6::Rules; |
2
|
41
|
|
|
41
|
|
171531
|
use 5.008_003; |
|
41
|
|
|
|
|
352
|
|
|
41
|
|
|
|
|
2135
|
|
3
|
41
|
|
|
41
|
|
226
|
use re 'eval'; |
|
41
|
|
|
|
|
113
|
|
|
41
|
|
|
|
|
2287
|
|
4
|
41
|
|
|
41
|
|
232
|
use Carp; |
|
41
|
|
|
|
|
68
|
|
|
41
|
|
|
|
|
5573
|
|
5
|
|
|
|
|
|
|
# use strict; |
6
|
41
|
|
|
41
|
|
52352
|
use charnames ':full'; |
|
41
|
|
|
|
|
2684928
|
|
|
41
|
|
|
|
|
459
|
|
7
|
41
|
|
|
41
|
|
13635
|
use utf8; |
|
41
|
|
|
|
|
162
|
|
|
41
|
|
|
|
|
566
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = '0.03'; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
# Support for these properties on interpolated hashes... |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
our (%keymatch, %valuematch); |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# Are we debugging or translating? |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
our ($debug, $translate) = (0,0); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# Did we get an error? |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
my $bad = 0; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# Turn off special $0 magic |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
*0 = \ my $zero; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# Reset state package variables |
28
|
|
|
|
|
|
|
# (only used during filtering so no need for reentrancy) |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
our (@p5pat, $raw, %mod, @mark, @capindex, %caps, $nextcapindex, $rulename); |
31
|
|
|
|
|
|
|
our $rule_id = 2; |
32
|
|
|
|
|
|
|
our @rules = qr{(?{die "No successful rule\n"})(?!)}; |
33
|
|
|
|
|
|
|
our $prior = 0; |
34
|
|
|
|
|
|
|
our $has_prior; |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
sub new { |
37
|
0
|
|
|
0
|
|
|
@p5pat = ""; |
38
|
0
|
|
|
|
|
|
$raw = ""; |
39
|
0
|
|
|
|
|
|
%mod = (pre=>{}, post=>{}); |
40
|
0
|
|
|
|
|
|
@mark = (); |
41
|
0
|
|
|
|
|
|
@capindex = (); |
42
|
0
|
|
|
|
|
|
%caps = (); |
43
|
0
|
|
|
|
|
|
$nextcapindex = 1; |
44
|
0
|
|
|
|
|
|
$rulename = ""; |
45
|
0
|
|
|
|
|
|
$has_prior = 0; |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# Use this sub instead of C, so as to allow multiple error |
50
|
|
|
|
|
|
|
# messages before dying... |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
sub error { |
53
|
0
|
|
|
0
|
|
|
print {*STDERR} @_, "\n"; |
|
0
|
|
|
|
|
|
|
54
|
0
|
|
|
|
|
|
$bad++; |
55
|
|
|
|
|
|
|
} |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# Print debug info only if debugging... |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
sub debug { |
61
|
0
|
0
|
|
0
|
|
|
return unless $debug; |
62
|
0
|
0
|
|
|
|
|
my $mesg = join "", map { defined $_ ? $_ : ''} @_; |
|
0
|
|
|
|
|
|
|
63
|
0
|
0
|
|
|
|
|
$mesg .= "\n" unless substr($mesg,-1) eq "\n"; |
64
|
0
|
|
|
|
|
|
print STDERR $mesg; |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
sub add_debug { |
68
|
0
|
|
|
0
|
|
|
my $what = quotemeta $_[1]; |
69
|
0
|
0
|
|
|
|
|
$_[0] = "(?:(?{print STDERR qq{Trying $what...\\n}})" |
70
|
|
|
|
|
|
|
. $_[0] |
71
|
|
|
|
|
|
|
. "(?{print STDERR qq{...Matched $what\\n}}))" |
72
|
|
|
|
|
|
|
if $debug==1; # Higher value indicates inside codeblock or assertion. |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# Remember left delimiter and predict right delimiter... |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
our ($ldel, $rdel); |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
sub delim { |
80
|
0
|
|
|
0
|
|
|
($ldel, $rdel) = ($^N)x2; |
81
|
0
|
|
|
|
|
|
$rdel =~ tr/{[(}])>/; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# Record another component of the raw Perl 6 pattern |
86
|
|
|
|
|
|
|
# and add its back-translation to the Perl 5 pattern... |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
sub add { |
89
|
0
|
|
|
0
|
|
|
my ($p6pat, $p5pat) = @_; |
90
|
0
|
|
|
|
|
|
debug "Translating /$p6pat/ back to /$p5pat/\n"; |
91
|
0
|
|
|
|
|
|
$raw .= $p5pat; |
92
|
0
|
|
|
|
|
|
add_debug $p5pat, "/$p6pat/"; |
93
|
0
|
|
|
|
|
|
push @p5pat, $p5pat; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# Convert internal vars in subscripts from P6 to P5 syntax... |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub translate_vars_internal { |
100
|
0
|
|
|
0
|
|
|
my ($src) = @_; |
101
|
0
|
|
|
|
|
|
my ($next, $newsrc); |
102
|
0
|
|
|
|
|
|
while (1) { |
103
|
0
|
|
|
|
|
|
$next = index($src,'$?'); |
104
|
0
|
0
|
|
|
|
|
$next = index($src,'@?') if $next < 0; |
105
|
0
|
0
|
|
|
|
|
$next = index($src,'%?') if $next < 0; |
106
|
0
|
0
|
|
|
|
|
if ($next < 0) { |
107
|
0
|
|
|
|
|
|
$newsrc .= $src; |
108
|
0
|
|
|
|
|
|
last; |
109
|
|
|
|
|
|
|
} |
110
|
0
|
|
|
|
|
|
$newsrc .= substr($src,0,$next,""); # shift off and delete |
111
|
0
|
|
|
|
|
|
$next=2; |
112
|
0
|
|
|
|
|
|
while ($next < length $src) { |
113
|
0
|
|
|
|
|
|
my $char = lc substr($src,$next,1); |
114
|
0
|
0
|
0
|
|
|
|
last unless $char ge 'a' && $char le 'z' |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
115
|
|
|
|
|
|
|
|| $char ge '0' && $char le '9' |
116
|
|
|
|
|
|
|
|| $char eq '_'; |
117
|
0
|
|
|
|
|
|
$next++; |
118
|
|
|
|
|
|
|
} |
119
|
0
|
|
|
|
|
|
my $var = substr($src,0,$next,""); # shift off and delete |
120
|
0
|
|
|
|
|
|
my $follow1 = substr($src,0,1); |
121
|
0
|
|
|
|
|
|
my $follow2 = substr($src,0,2); |
122
|
0
|
0
|
|
|
|
|
if (substr($var,0,2) eq '$?') { |
|
|
0
|
|
|
|
|
|
123
|
0
|
0
|
0
|
|
|
|
if ($follow2 eq '.{' || $follow2 eq '.[') { #}] |
124
|
0
|
|
|
|
|
|
substr($src,0,1,""); # delete optional dot |
125
|
|
|
|
|
|
|
} |
126
|
0
|
|
|
|
|
|
$newsrc .= "\$Perl6::Rules::d0{'$var'}"; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
elsif (substr($var,0,2) eq '@?') { |
129
|
0
|
0
|
|
|
|
|
if ($follow1 eq '[') { #] |
|
|
0
|
|
|
|
|
|
130
|
0
|
|
|
|
|
|
$newsrc .= "\$Perl6::Rules::d0{'$var'}"; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
elsif ($follow2 eq '.[') { #] |
133
|
0
|
|
|
|
|
|
$newsrc .= "\$Perl6::Rules::d0{'$var'}"; |
134
|
0
|
|
|
|
|
|
substr($src,0,1,""); # delete optional dot |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
else { |
137
|
0
|
|
|
|
|
|
$newsrc .= "\@{\$Perl6::Rules::d0{'$var'}}"; |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
else { # %?var |
141
|
0
|
0
|
|
|
|
|
if ($follow1 eq '{') { #} |
|
|
0
|
|
|
|
|
|
142
|
0
|
|
|
|
|
|
$newsrc .= "\$Perl6::Rules::d0{'$var'}"; |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
elsif ($follow2 eq '.{') { #} |
145
|
0
|
|
|
|
|
|
$newsrc .= "\$Perl6::Rules::d0{'$var'}"; |
146
|
0
|
|
|
|
|
|
substr($src,0,1,""); # delete optional dot |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
else { |
149
|
0
|
|
|
|
|
|
$newsrc .= "\%{\$Perl6::Rules::d0{'$var'}}"; |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
} |
153
|
0
|
|
|
|
|
|
return $newsrc; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
# Record that next component of the raw Perl 6 pattern is an |
157
|
|
|
|
|
|
|
# interpolated scalar, and add its back-translation to the Perl 5 pattern. |
158
|
|
|
|
|
|
|
# This sub handles translation of both /$as_literal/ and /<$as_rules>/ |
159
|
|
|
|
|
|
|
# scalar interpolations... |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub addscalar_external { |
162
|
0
|
|
|
0
|
|
|
my ($var, $quotemeta) = @_; |
163
|
0
|
|
0
|
|
|
|
$quotemeta ||= ""; |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
# Record original... |
166
|
0
|
|
|
|
|
|
$raw .= $var; |
167
|
|
|
|
|
|
|
|
168
|
0
|
|
|
|
|
|
my $subscripts=""; |
169
|
0
|
|
|
|
|
|
my ($p5pat, $subscriptpos); |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# Translate Perl6ish @array[...] or %hash{...} |
172
|
|
|
|
|
|
|
# to Perl5ish $array[...]/$hash{...}... |
173
|
0
|
|
|
|
|
|
my $special = substr($var,0,1); |
174
|
0
|
0
|
0
|
|
|
|
if ($special eq '@') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
175
|
0
|
|
|
|
|
|
$subscriptpos = index($var,".["); |
176
|
0
|
0
|
|
|
|
|
$subscriptpos = index($var,"[") unless $subscriptpos > 0; |
177
|
0
|
|
|
|
|
|
$subscripts = translate_vars_internal substr($var,$subscriptpos); |
178
|
0
|
|
|
|
|
|
substr($var,$subscriptpos) = ""; |
179
|
0
|
0
|
|
|
|
|
$p5pat = $subscripts ? '$' . substr($var,1) . $subscripts : $var; |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
elsif ($special eq '%') { |
182
|
0
|
|
|
|
|
|
$subscriptpos = index($var,".{"); |
183
|
0
|
0
|
|
|
|
|
$subscriptpos = index($var,"{") unless $subscriptpos > 0; |
184
|
0
|
|
|
|
|
|
$subscripts = translate_vars_internal substr($var,$subscriptpos); |
185
|
0
|
|
|
|
|
|
substr($var,$subscriptpos) = ""; |
186
|
0
|
0
|
|
|
|
|
$p5pat = $subscripts ? '$' . substr($var,1) . $subscripts : $var; |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
# Translate Perl6ish $ref.{...} to Perl5ish $ref->[...] ... |
190
|
|
|
|
|
|
|
elsif (($special = index($var,'.')) > 0) { |
191
|
0
|
|
|
|
|
|
$p5pat = substr($var,0,$special) |
192
|
|
|
|
|
|
|
. "->" |
193
|
|
|
|
|
|
|
. translate_vars_internal(substr($var,$special+1)); |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# Translate Perl6ish $ref{...} or $ref[...] |
197
|
|
|
|
|
|
|
# to Perl5ish $ref->{...} or $ref->[...] |
198
|
|
|
|
|
|
|
elsif (($special = index($var,'[')) > 0 or |
199
|
|
|
|
|
|
|
($special = index($var,'{')) > 0 ) { |
200
|
0
|
|
|
|
|
|
$p5pat = substr($var,0,$special) |
201
|
|
|
|
|
|
|
. "->" |
202
|
|
|
|
|
|
|
. translate_vars_internal(substr($var,$special)); |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
else { |
205
|
0
|
|
|
|
|
|
$p5pat = $var; |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
# Quotemeta interpolation as required... |
209
|
0
|
0
|
|
|
|
|
$p5pat = $quotemeta eq 'rw' ? $p5pat : qq{(??{$quotemeta $p5pat})}; |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
# Insert debugging code if appropriate... |
212
|
0
|
|
|
|
|
|
add_debug $p5pat, "/$var/"; |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
# Add back-translation... |
215
|
0
|
0
|
|
|
|
|
my $what = $quotemeta ? "literal" : "pattern"; |
216
|
0
|
|
|
|
|
|
debug "Adding $var (as $what interpolation)\n"; |
217
|
|
|
|
|
|
|
|
218
|
0
|
|
|
|
|
|
push @p5pat, $p5pat; |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
sub addscalar_internal { |
222
|
0
|
|
|
0
|
|
|
my ($var, $quotemeta) = @_; |
223
|
0
|
|
0
|
|
|
|
$quotemeta ||= ""; |
224
|
|
|
|
|
|
|
|
225
|
0
|
|
|
|
|
|
my $subscripts=""; |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
# Record original... |
228
|
0
|
|
|
|
|
|
$raw .= $var; |
229
|
|
|
|
|
|
|
|
230
|
0
|
|
|
|
|
|
my ($p5pat, $subscriptpos); |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# Translate Perl6ish @?array[...] or %?hash{...} |
233
|
0
|
|
|
|
|
|
my $special = substr($var,0,2); |
234
|
0
|
0
|
0
|
|
|
|
if ($special eq '@?') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
235
|
0
|
|
|
|
|
|
$subscriptpos = index($var,".["); |
236
|
0
|
0
|
|
|
|
|
$subscriptpos = index($var,"[") unless $subscriptpos > 0; |
237
|
0
|
|
|
|
|
|
$subscripts = translate_vars_internal substr($var,$subscriptpos); |
238
|
0
|
|
|
|
|
|
substr($var,$subscriptpos) = ""; |
239
|
0
|
0
|
|
|
|
|
$p5pat = $subscripts ? "\$Perl6::Rules::d0{q{$var}}->$subscripts" |
240
|
|
|
|
|
|
|
: "\@{\$Perl6::Rules::d0{q{$var}}}"; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
elsif ($special eq '%?') { |
243
|
0
|
|
|
|
|
|
$subscriptpos = index($var,".{"); |
244
|
0
|
0
|
|
|
|
|
$subscriptpos = index($var,"{") unless $subscriptpos > 0; |
245
|
0
|
|
|
|
|
|
$subscripts = translate_vars_internal substr($var,$subscriptpos); |
246
|
0
|
|
|
|
|
|
substr($var,$subscriptpos) = ""; |
247
|
0
|
0
|
|
|
|
|
$p5pat = $subscripts ? "\$Perl6::Rules::d0{q{$var}}->$subscripts" |
248
|
|
|
|
|
|
|
: "\%{\$Perl6::Rules::d0{q{$var}}}"; |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
# Translate Perl6ish $ref.{...} to Perl5ish $ref->{...} ... |
252
|
|
|
|
|
|
|
elsif (($special = index($var,'.')) > 0) { |
253
|
0
|
|
|
|
|
|
$subscripts = substr($var,$special+1); |
254
|
0
|
|
|
|
|
|
$subscripts = translate_vars_internal $subscripts; |
255
|
0
|
|
|
|
|
|
substr($var,$special) = ""; |
256
|
0
|
|
|
|
|
|
$p5pat = "\$Perl6::Rules::d0{q{$var}}->$subscripts"; |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
# Translate Perl6ish $ref{...} or $ref[...] |
260
|
|
|
|
|
|
|
# to Perl5ish $ref->{...} or $ref->[...] |
261
|
|
|
|
|
|
|
elsif (($special = index($var,'[')) > 0 or |
262
|
|
|
|
|
|
|
($special = index($var,'{')) > 0 ) { |
263
|
0
|
|
|
|
|
|
$subscripts = translate_vars_internal substr($var,$special); |
264
|
0
|
|
|
|
|
|
substr($var,$special) = ""; |
265
|
0
|
|
|
|
|
|
$p5pat = "\$Perl6::Rules::d0{q{$var}}->$subscripts"; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
else { |
268
|
0
|
|
|
|
|
|
$p5pat = "\$Perl6::Rules::d0{q{$var}}"; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
# Quotemeta interpolation as required... |
272
|
0
|
0
|
|
|
|
|
$p5pat = $quotemeta eq 'rw' ? $p5pat : qq{(??{$quotemeta $p5pat})}; |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
# Insert debugging code if appropriate... |
275
|
0
|
|
|
|
|
|
add_debug $p5pat, "/$var/"; |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
# Add back-translation... |
278
|
0
|
0
|
|
|
|
|
my $what = $quotemeta ? "literal" : "pattern"; |
279
|
0
|
|
|
|
|
|
debug "Adding $var (as $what interpolation)\n"; |
280
|
|
|
|
|
|
|
|
281
|
0
|
|
|
|
|
|
push @p5pat, $p5pat; |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
# Record that next component of the raw Perl 6 pattern is an |
286
|
|
|
|
|
|
|
# interpolated array, and add its back-translation to the Perl 5 pattern. |
287
|
|
|
|
|
|
|
# This sub handles translation of both /@::as_literals/ and /<@::as_rules>/ |
288
|
|
|
|
|
|
|
# array interpolations... |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
sub addarray_external { |
291
|
0
|
|
|
0
|
|
|
my ($var, $quotemeta) = @_; |
292
|
0
|
0
|
|
|
|
|
$quotemeta = $quotemeta ? "map $quotemeta," : "map \$_,"; |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
# Record original... |
295
|
0
|
|
|
|
|
|
$raw .= $var; |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
# Expand elements of variable, conjoining with ORs... |
298
|
0
|
|
|
|
|
|
my $p5pat = qq{(??{join q{|}, $quotemeta $var})}; |
299
|
0
|
|
|
|
|
|
add_debug $p5pat, "/$var/"; |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
# Insert debugging code if requested... |
302
|
0
|
0
|
|
|
|
|
my $what = $quotemeta eq 'map quotemeta,' ? "literal" : "pattern"; |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
# Add back-translation... |
305
|
0
|
|
|
|
|
|
debug "Adding $var (as $what interpolation)\n"; |
306
|
0
|
|
|
|
|
|
push @p5pat, $p5pat; |
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
sub addarray_internal { |
310
|
0
|
|
|
0
|
|
|
my ($var, $quotemeta) = @_; |
311
|
0
|
0
|
|
|
|
|
$quotemeta = $quotemeta ? "map $quotemeta," : "map \$_,"; |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
# Record original... |
314
|
0
|
|
|
|
|
|
$raw .= $var; |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
# Expand elements of variable, conjoining with ORs... |
317
|
0
|
|
|
|
|
|
my $p5pat = qq{(??{join q{|}, $quotemeta \@{\$Perl6::Rules::d0{q{$var}}}})}; |
318
|
0
|
|
|
|
|
|
add_debug $p5pat, "/$var/"; |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
# Insert debugging code if requested... |
321
|
0
|
0
|
|
|
|
|
my $what = $quotemeta eq 'map quotemeta,' ? "literal" : "pattern"; |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
# Add back-translation... |
324
|
0
|
|
|
|
|
|
debug "Adding $var (as $what interpolation)\n"; |
325
|
0
|
|
|
|
|
|
push @p5pat, $p5pat; |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
sub addhash_internal { |
330
|
0
|
|
|
0
|
|
|
my ($var, $quotemeta) = @_; |
331
|
0
|
|
|
|
|
|
my $hashaccess = "getcap_name(q{$var})->"; |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
# Construct value matcher... |
334
|
|
|
|
|
|
|
# (Properties on internal hashes are not supported, so no |
335
|
|
|
|
|
|
|
# need to test for keymatch or valuematch) |
336
|
0
|
|
|
|
|
|
my $valhandler = # Match one of the keys... |
337
|
|
|
|
|
|
|
'(??{exists ' . $hashaccess . '{$^R} ? ' |
338
|
|
|
|
|
|
|
# and match against the corresponding value, or fail |
339
|
|
|
|
|
|
|
. $hashaccess . '{$^R} : "(?!)"})' |
340
|
|
|
|
|
|
|
; |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
# Construct the Perl 5 equivalent... |
343
|
|
|
|
|
|
|
# Match any of the hash's keys (or its keymatch property)... |
344
|
0
|
|
|
|
|
|
my $p5pat = '(?> ((?> \w+ )) (?{$^N})' |
345
|
|
|
|
|
|
|
# Then match the corresponding value (or valuematch property)... |
346
|
|
|
|
|
|
|
. $valhandler . ')'; |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# Insert debugging info if requested... |
349
|
0
|
0
|
|
|
|
|
my $what = $quotemeta ? "literal" : "pattern"; |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
# Add back-translation... |
352
|
0
|
|
|
|
|
|
debug "Adding $var (as $what interpolation)\n"; |
353
|
0
|
|
|
|
|
|
add_debug $p5pat, "/$var/"; |
354
|
0
|
|
|
|
|
|
push @p5pat, $p5pat; |
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
sub addhash_external { |
358
|
0
|
|
|
0
|
|
|
my ($var, $quotemeta) = @_; |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
# Record original... |
361
|
0
|
|
|
|
|
|
$raw .= $var; |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# Convert to Perl5 access syntax... |
364
|
0
|
|
|
|
|
|
my $hashaccess = '$'.substr($var,1); |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
# Construct value matcher... |
367
|
0
|
0
|
|
|
|
|
my $valhandler = $quotemeta |
368
|
|
|
|
|
|
|
# If hash has properties controlling interpolation... |
369
|
|
|
|
|
|
|
? '(??{exists ' . $hashaccess . '{$^N} ? "" : "(?!)"})' |
370
|
|
|
|
|
|
|
# Get the specified value matcher and match with it... |
371
|
|
|
|
|
|
|
. '((?> (??{Perl6::Rules::valuematch(\\' . $var. ')}) ))' |
372
|
|
|
|
|
|
|
# Fail if no such specified value matcher or if it fails... |
373
|
|
|
|
|
|
|
. '(??{!defined($^R)||' . $hashaccess . '{$^R} eq $^N ? "" : "(?!)"})' |
374
|
|
|
|
|
|
|
# Otherwise do normal hash interpolation... |
375
|
|
|
|
|
|
|
# Match one of the keys... |
376
|
|
|
|
|
|
|
: '(??{exists ' . $hashaccess . '{$^R} ? ' |
377
|
|
|
|
|
|
|
# and match against the corresponding value, or fail |
378
|
|
|
|
|
|
|
. $hashaccess . '{$^R} : "(?!)"})' |
379
|
|
|
|
|
|
|
; |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
# Construct the Perl 5 equivalent... |
382
|
|
|
|
|
|
|
# Match any of the hash's keys (or its keymatch property)... |
383
|
0
|
|
|
|
|
|
my $p5pat = '(?> ((?> (??{Perl6::Rules::keymatch(\\' . $var |
384
|
|
|
|
|
|
|
. ')}) )) (?{$^N})' |
385
|
|
|
|
|
|
|
# Then match the corresponding value (or valuematch property)... |
386
|
|
|
|
|
|
|
. $valhandler . ')'; |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
# Insert debugging info if requested... |
389
|
0
|
0
|
|
|
|
|
my $what = $quotemeta ? "literal" : "pattern"; |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
# Add back-translation... |
392
|
0
|
|
|
|
|
|
debug "Adding $var (as $what interpolation)\n"; |
393
|
0
|
|
|
|
|
|
add_debug $p5pat, "/$var/"; |
394
|
0
|
|
|
|
|
|
push @p5pat, $p5pat; |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
sub keymatch { |
398
|
0
|
|
|
0
|
|
|
my ($hashref) = @_; |
399
|
0
|
0
|
|
|
|
|
my $keymatch = exists $keymatch{$hashref} |
400
|
|
|
|
|
|
|
? eval $keymatch{$hashref} |
401
|
|
|
|
|
|
|
: qr{\w+}; |
402
|
0
|
|
|
|
|
|
debug "Matching hash key with: $keymatch\n"; |
403
|
0
|
|
|
|
|
|
return $keymatch; |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
sub valuematch { |
407
|
0
|
|
|
0
|
|
|
my ($hashref) = @_; |
408
|
0
|
0
|
|
|
|
|
return qr{(?{undef $^R})} unless exists $valuematch{$hashref}; |
409
|
0
|
|
|
|
|
|
my $valuematch = eval $valuematch{$hashref}; |
410
|
0
|
|
|
|
|
|
debug "Matching hash value with: $valuematch\n"; |
411
|
0
|
|
|
|
|
|
return $valuematch; |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
our $wordsp = '(?:(?<=\w)(?:\s+(?=\w)|\s*(?=\W|\z))|(?<=\W)\s*|\A\s*)'; |
415
|
|
|
|
|
|
|
sub wordspace { |
416
|
0
|
0
|
|
0
|
|
|
return unless $mod{words}; |
417
|
0
|
|
|
|
|
|
debug "Inserting space matching /$raw <-- HERE/\n"; |
418
|
0
|
|
|
|
|
|
my $pat = '(??{$Perl6::Rules::wordsp})'; |
419
|
0
|
|
|
|
|
|
add_debug $pat, "Skipping whitespace"; |
420
|
0
|
|
|
|
|
|
push @p5pat, "(?:$pat)"; |
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
my @failure; |
424
|
0
|
|
|
0
|
|
|
sub fallible { push @failure, 0 } |
425
|
41
|
|
|
41
|
|
230137
|
sub fail { no warnings; debug "Failed"; $failure[-1] = 1; last FALLIBLE } |
|
41
|
|
|
0
|
|
121
|
|
|
41
|
|
|
|
|
8391
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
426
|
0
|
0
|
|
0
|
|
|
sub failed { pop(@failure) ? '(?!)' : '' } |
427
|
|
|
|
|
|
|
|
428
|
41
|
|
|
41
|
|
262
|
{ no strict 'refs'; *{caller()."::fail"} = \&fail; } |
|
41
|
|
|
|
|
94
|
|
|
41
|
|
|
|
|
1147280
|
|
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
sub codeblock { |
431
|
0
|
|
|
0
|
|
|
my ($type, $nested) = @_; |
432
|
0
|
|
|
|
|
|
debug "Closed $type block"; |
433
|
0
|
|
|
|
|
|
$raw .= $type; |
434
|
0
|
|
|
|
|
|
my $mark = pop @mark; |
435
|
0
|
|
|
|
|
|
my $code = join "", splice @p5pat, $mark, @p5pat-$mark; |
436
|
0
|
0
|
|
|
|
|
if ($nested) { |
437
|
0
|
|
|
|
|
|
push @p5pat, "{$code}"; |
438
|
0
|
|
|
|
|
|
return; |
439
|
|
|
|
|
|
|
} |
440
|
0
|
0
|
|
|
|
|
$code = "do{$code}||fail" if $type eq ')>'; |
441
|
0
|
|
|
|
|
|
debug "Translated $type block to '$code'"; |
442
|
0
|
|
|
|
|
|
$code = "(??{Perl6::Rules::fallible;FALLIBLE:{$code}Perl6::Rules::failed})"; |
443
|
0
|
|
|
|
|
|
add_debug $code, "codeblock"; |
444
|
0
|
|
|
|
|
|
push @p5pat, $code; |
445
|
|
|
|
|
|
|
} |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
sub alternative { |
448
|
0
|
|
|
0
|
|
|
$raw .= '|'; |
449
|
0
|
|
|
|
|
|
push @p5pat, '|'; |
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
sub subrule { |
453
|
0
|
|
|
0
|
|
|
my ($subrule, $repeat, $capture, $negate) = @_; |
454
|
0
|
|
|
|
|
|
my $p6pat = "$subrule$repeat"; |
455
|
0
|
0
|
|
|
|
|
$p6pat = "($p6pat)" if $capture; |
456
|
0
|
|
|
|
|
|
$raw .= $p6pat; |
457
|
0
|
|
0
|
|
|
|
$negate ||= ""; |
458
|
|
|
|
|
|
|
|
459
|
0
|
|
|
|
|
|
my ($classname, $rulename); |
460
|
0
|
0
|
|
|
|
|
if ((my $index = index($subrule,'.'))>0) { |
461
|
0
|
|
|
|
|
|
$classname = substr($subrule,0,$index); |
462
|
0
|
|
|
|
|
|
$rulename = substr($subrule,$index+1); |
463
|
|
|
|
|
|
|
} |
464
|
|
|
|
|
|
|
else { |
465
|
0
|
|
|
|
|
|
$classname = '__PACKAGE__'; |
466
|
0
|
|
|
|
|
|
$rulename = $subrule; |
467
|
|
|
|
|
|
|
} |
468
|
0
|
|
|
|
|
|
my $callrule; |
469
|
0
|
0
|
|
|
|
|
if ($rulename eq 'prior') { |
|
|
0
|
|
|
|
|
|
470
|
0
|
0
|
|
|
|
|
error "Don't know how to match inverted <-prior>" if $negate eq '-'; |
471
|
0
|
|
|
|
|
|
$callrule = '(??{$Perl6::Rules::rules[$Perl6::Rules::prior]||"(?!)"})'; |
472
|
0
|
|
|
|
|
|
$has_prior = 1; |
473
|
|
|
|
|
|
|
} |
474
|
|
|
|
|
|
|
elsif ($rulename eq 'self') { |
475
|
0
|
0
|
|
|
|
|
error "Don't know how to match inverted <-self>" if $negate eq '-'; |
476
|
0
|
|
|
|
|
|
$callrule = "(?>(??{\$Perl6::Rules::rules[$rule_id]}))"; |
477
|
|
|
|
|
|
|
} |
478
|
|
|
|
|
|
|
else { |
479
|
0
|
0
|
|
|
|
|
$callrule = $negate eq "-" |
480
|
|
|
|
|
|
|
? "(??{$classname->can(q{$rulename})?$classname->$rulename(invert=>1):Perl6::Rules::stdrule(q{$subrule},1)})" |
481
|
|
|
|
|
|
|
: "(??{$classname->can(q{$rulename})?$classname->$rulename():Perl6::Rules::stdrule(q{$subrule})})"; |
482
|
|
|
|
|
|
|
} |
483
|
0
|
0
|
|
|
|
|
$repeat = trans_repeat($repeat) if $repeat; |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
# Save and restore match variables... |
486
|
|
|
|
|
|
|
|
487
|
0
|
|
|
|
|
|
$callrule = "(?:(?{local \%Perl6::Rules::d0 = \%Perl6::Rules::d0;local \@Perl6::Rules::d0 = \@Perl6::Rules::d0;Perl6::Rules::save_d0})$callrule(?{Perl6::Rules::restore_d0}))"; |
488
|
|
|
|
|
|
|
|
489
|
0
|
|
|
|
|
|
my $storage = "\$Perl6::Rules::d0{q{\$?$rulename}}"; |
490
|
0
|
0
|
0
|
|
|
|
if ($repeat && $capture) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
491
|
0
|
|
|
|
|
|
push @p5pat, "(?{local $storage = []})(?:$callrule(?{Perl6::Rules::apparray([$storage], \$0)}))$repeat"; |
492
|
|
|
|
|
|
|
} |
493
|
|
|
|
|
|
|
elsif ($repeat) { |
494
|
0
|
|
|
|
|
|
push @p5pat, "$callrule$repeat"; |
495
|
|
|
|
|
|
|
} |
496
|
|
|
|
|
|
|
elsif ($capture) { |
497
|
0
|
|
|
|
|
|
push @p5pat, "($callrule)(?{local $storage = $storage; $storage = \$0})"; |
498
|
|
|
|
|
|
|
} |
499
|
|
|
|
|
|
|
else { |
500
|
0
|
|
|
|
|
|
push @p5pat, $callrule; |
501
|
|
|
|
|
|
|
} |
502
|
0
|
0
|
|
|
|
|
if ($negate eq '!') { |
503
|
0
|
|
|
|
|
|
$p5pat[-1] = "(?!$p5pat[-1])"; |
504
|
|
|
|
|
|
|
} |
505
|
0
|
|
|
|
|
|
add_debug $p5pat[-1], "/$p6pat/"; |
506
|
|
|
|
|
|
|
} |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
my %sigil = ( '$'=>1, '%'=>1, '@'=>1 ); |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
sub trans_repeat { |
511
|
0
|
|
|
0
|
|
|
my ($rep) = @_; |
512
|
0
|
0
|
|
|
|
|
return "" unless $rep; |
513
|
0
|
|
|
|
|
|
my ($from, $to); |
514
|
0
|
0
|
|
|
|
|
if (substr($rep,0,1) eq '<') { |
515
|
0
|
|
|
|
|
|
my $comma = index $rep, ","; |
516
|
0
|
0
|
|
|
|
|
if ($comma > 0) { |
517
|
0
|
|
|
|
|
|
$from = substr($rep,1,$comma-1); |
518
|
0
|
|
|
|
|
|
chop($to = substr($rep,$comma+1)); |
519
|
0
|
|
|
|
|
|
error "The use of variables in repetitions is not yet supported: <$from,$to>" |
520
|
0
|
0
|
|
|
|
|
if grep { $sigil{substr($_,0,1)} } $from, $to; |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
else { |
523
|
0
|
|
|
|
|
|
chop($from = substr($rep,1)); |
524
|
0
|
|
|
|
|
|
$to = $from; |
525
|
0
|
0
|
|
|
|
|
error "The use of variables in repetitions is not yet supported: <$from>" |
526
|
|
|
|
|
|
|
if $sigil{substr($from,0,1)}; |
527
|
|
|
|
|
|
|
} |
528
|
|
|
|
|
|
|
#SHOULD BE: $from = "(??{$from})" if substr($from,0,1) eq '$'; |
529
|
|
|
|
|
|
|
#SHOULD BE: $to = "(??{$to})" if substr($to,0,1) eq '$'; |
530
|
0
|
|
|
|
|
|
$rep = "{$from,$to}"; |
531
|
|
|
|
|
|
|
} |
532
|
0
|
|
|
|
|
|
return $rep; |
533
|
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
sub nobacktrack { |
536
|
0
|
|
|
0
|
|
|
$p5pat[-1] = "(?>$p5pat[-1])"; |
537
|
|
|
|
|
|
|
} |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
sub repeat { |
540
|
0
|
|
|
0
|
|
|
$raw .= $^N; |
541
|
0
|
|
|
|
|
|
my $rep = trans_repeat($^N); |
542
|
0
|
0
|
0
|
|
|
|
$p5pat[-1] = "(?:$p5pat[-1])" if length($p5pat[-1]||"") > 1; |
543
|
0
|
|
|
|
|
|
$p5pat[-1] .= $rep; |
544
|
|
|
|
|
|
|
} |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
sub setcapindex { |
547
|
0
|
|
|
0
|
|
|
$nextcapindex = shift; |
548
|
|
|
|
|
|
|
} |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
sub capmark { |
551
|
0
|
|
|
0
|
|
|
my ($name) = @_; |
552
|
0
|
0
|
|
|
|
|
my $sigil = @capindex ? substr($capindex[-1],0,1) : ""; |
553
|
0
|
0
|
0
|
|
|
|
push @capindex, ($name ? $name : $nextcapindex++) |
|
|
0
|
0
|
|
|
|
|
554
|
|
|
|
|
|
|
unless $sigil && ($sigil eq '@' || $sigil eq '%'); |
555
|
0
|
|
|
|
|
|
push @mark, scalar @p5pat; |
556
|
0
|
0
|
|
|
|
|
$raw .= "$name := " if $name; |
557
|
0
|
|
|
|
|
|
$raw .= '('; |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
sub mark { |
561
|
0
|
|
|
0
|
|
|
my ($type) = @_; |
562
|
0
|
|
|
|
|
|
debug "Opening $type block\n"; |
563
|
0
|
|
|
|
|
|
push @mark, scalar @p5pat; |
564
|
0
|
|
|
|
|
|
$raw .= $type; |
565
|
|
|
|
|
|
|
} |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
sub lookaround { |
568
|
0
|
|
|
0
|
|
|
my ($type) = @_; |
569
|
0
|
|
|
|
|
|
my $mark = pop @mark; |
570
|
0
|
|
|
|
|
|
splice @p5pat, $mark, @p5pat-$mark, "$type@p5pat[$mark..$#p5pat])"; |
571
|
0
|
|
|
|
|
|
$raw .= '>'; |
572
|
|
|
|
|
|
|
} |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
sub make_charset { |
575
|
0
|
|
|
0
|
|
|
$raw .= '>'; |
576
|
0
|
|
|
|
|
|
my $mark = pop @mark; |
577
|
0
|
|
|
|
|
|
my (@neg, @pos); |
578
|
0
|
|
|
|
|
|
for (splice @p5pat, $mark, @p5pat-$mark) { |
579
|
0
|
|
|
|
|
|
push @neg, @{$_->[0]}; |
|
0
|
|
|
|
|
|
|
580
|
0
|
|
|
|
|
|
push @pos, @{$_->[1]}; |
|
0
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
} |
582
|
0
|
|
|
|
|
|
push @p5pat, '(?:'; |
583
|
0
|
0
|
|
|
|
|
$p5pat[-1] .= join "", map "(?!$_)", @neg if @neg; |
584
|
0
|
0
|
|
|
|
|
$p5pat[-1] .= @pos ? '(?:' . join('|', @pos) . ')' : '(?s:.)'; |
585
|
0
|
|
|
|
|
|
$p5pat[-1] .= ')'; |
586
|
0
|
|
|
|
|
|
debug "Emitted char class: /$p5pat[-1]/\n"; |
587
|
|
|
|
|
|
|
} |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
sub capture_internal { |
590
|
0
|
0
|
|
0
|
|
|
return unless @mark; # Implies unbalanced closing paren |
591
|
0
|
|
|
|
|
|
my $index = $capindex[-1]; |
592
|
0
|
|
|
|
|
|
my $sigil = substr($index,0,1); |
593
|
0
|
|
|
|
|
|
my $subsigil = substr($index,1,1); |
594
|
0
|
0
|
|
|
|
|
goto &capture_external if $subsigil ne '?'; |
595
|
0
|
|
|
|
|
|
$raw .= ')'; |
596
|
0
|
|
|
|
|
|
my $mark = pop @mark; |
597
|
0
|
0
|
0
|
|
|
|
pop @capindex unless $sigil eq '@' || $sigil eq '%'; |
598
|
0
|
|
|
|
|
|
my $action; |
599
|
0
|
|
|
|
|
|
my $storage = "\$Perl6::Rules::d0{q{$index}}"; |
600
|
0
|
0
|
|
|
|
|
if ($sigil eq '$') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
601
|
0
|
|
|
|
|
|
$action = "local $storage = \$^N"; |
602
|
|
|
|
|
|
|
} |
603
|
|
|
|
|
|
|
elsif ($sigil eq '@') { |
604
|
0
|
|
|
|
|
|
$action = "local $storage = $storage; Perl6::Rules::apparray($storage, \$^N)"; |
605
|
|
|
|
|
|
|
} |
606
|
|
|
|
|
|
|
elsif ($sigil eq '%') { |
607
|
0
|
|
|
|
|
|
$action = "local $storage = $storage; Perl6::Rules::apphash($storage, \$^N)"; |
608
|
|
|
|
|
|
|
} |
609
|
|
|
|
|
|
|
else { |
610
|
0
|
|
|
|
|
|
$action = "local \$Perl6::Rules::d0[$index] = \$^N"; |
611
|
|
|
|
|
|
|
} |
612
|
0
|
|
|
|
|
|
splice @p5pat, $mark, @p5pat-$mark, "(@p5pat[$mark..$#p5pat])(?{$action})"; |
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
# Translate \c[...] and \C[...] back to Perl5ish \N{...} and [^\N{...}] |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
my %trans = ( |
618
|
|
|
|
|
|
|
'\c' => sub { '(?-x:\N{'.$_.'})' }, |
619
|
|
|
|
|
|
|
'\C' => sub { '(?s-x:(?!\N{'.$_.'}).)' }, |
620
|
|
|
|
|
|
|
'\x' => sub { sprintf '\x{%0.4s}', $_ }, |
621
|
|
|
|
|
|
|
'\X' => sub { sprintf '[^\x{%0.4s}]', $_ }, |
622
|
|
|
|
|
|
|
'\0' => sub { sprintf '\x{%0.4x}', oct $_ }, |
623
|
|
|
|
|
|
|
); |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
sub transchars { |
626
|
0
|
|
|
0
|
|
|
my ($pat) = @_; |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
# Positive blocky escapes... |
629
|
0
|
|
|
|
|
|
for my $esc (qw(\c \x \0)) { |
630
|
0
|
|
|
|
|
|
my $esclen = length($esc)+1; |
631
|
0
|
|
|
|
|
|
while (1) { |
632
|
0
|
|
|
|
|
|
my $from = index($pat, $esc.'['); |
633
|
0
|
0
|
|
|
|
|
last unless $from >= 0; |
634
|
0
|
|
|
|
|
|
my $to = index(substr($pat,$from+$esclen),']'); |
635
|
0
|
0
|
0
|
|
|
|
error("Empty $esc\[...]") and last if $to == 0; |
636
|
0
|
|
|
|
|
|
substr($pat,$from,$to+$esclen+1) = |
637
|
0
|
|
|
|
|
|
join "", map { $trans{$esc}() } |
638
|
0
|
|
|
|
|
|
map { split ';', $_ } |
639
|
0
|
|
|
|
|
|
map { split '; ', $_ } |
640
|
|
|
|
|
|
|
substr($pat,$from+$esclen,$to); |
641
|
|
|
|
|
|
|
} |
642
|
|
|
|
|
|
|
} |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
# Negative blocky escapes... |
645
|
0
|
|
|
|
|
|
for my $esc (qw(\C \X)) { |
646
|
0
|
|
|
|
|
|
my $esclen = length($esc)+1; |
647
|
0
|
|
|
|
|
|
while (1) { |
648
|
0
|
|
|
|
|
|
my $from = index($pat, $esc.'['); |
649
|
0
|
0
|
|
|
|
|
last unless $from >= 0; |
650
|
0
|
|
|
|
|
|
my $to = index(substr($pat,$from+$esclen),']'); |
651
|
0
|
0
|
0
|
|
|
|
error("Empty $esc\[...]") and last if $to == 0; |
652
|
0
|
|
|
|
|
|
my @chars = map { split ';', $_ } |
|
0
|
|
|
|
|
|
|
653
|
0
|
|
|
|
|
|
map { split '; ', $_ } |
654
|
|
|
|
|
|
|
substr($pat,$from+$esclen,$to); |
655
|
0
|
0
|
|
|
|
|
my $after = @chars > 1 |
656
|
|
|
|
|
|
|
? join("",map $trans{lc $esc}(), @chars[1..$#chars]) |
657
|
|
|
|
|
|
|
: ""; |
658
|
0
|
|
|
|
|
|
substr($pat,$from,$to+$esclen+1) = |
659
|
0
|
|
|
|
|
|
"(?:" . do{local $_=$chars[0]; $trans{$esc}()} . $after . ")"; |
|
0
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
} |
661
|
|
|
|
|
|
|
} |
662
|
0
|
|
|
|
|
|
return $pat; |
663
|
|
|
|
|
|
|
} |
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
my ($cs_excl, $cs_incl) = (0,1); |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
sub transcharset { |
668
|
0
|
|
|
0
|
|
|
my ($pat, $sign) = @_; |
669
|
0
|
0
|
|
|
|
|
my $neg = $sign eq '-' ? 1 : 0; |
670
|
|
|
|
|
|
|
|
671
|
0
|
|
|
|
|
|
my @result = ([],[]); |
672
|
|
|
|
|
|
|
|
673
|
0
|
0
|
|
|
|
|
for my $esc ($neg ? qw(\C \X) : qw(\c \x \0)) { |
674
|
0
|
|
|
|
|
|
my $esclen = length($esc)+1; |
675
|
0
|
|
|
|
|
|
while (1) { |
676
|
0
|
|
|
|
|
|
my $from = index($pat, $esc.'['); |
677
|
0
|
0
|
|
|
|
|
last unless $from >= 0; |
678
|
0
|
|
|
|
|
|
my $to = index(substr($pat,$from+$esclen),']'); |
679
|
0
|
0
|
0
|
|
|
|
error("Empty $esc\[...]") and last if $to == 0; |
680
|
0
|
|
|
|
|
|
push @{$result[$cs_incl]}, |
|
0
|
|
|
|
|
|
|
681
|
0
|
|
|
|
|
|
join "", map { $trans{lc $esc}() } |
682
|
0
|
|
|
|
|
|
map { split ';', $_ } |
683
|
0
|
|
|
|
|
|
map { split '; ', $_ } |
684
|
|
|
|
|
|
|
substr($pat,$from+$esclen,$to); |
685
|
0
|
|
|
|
|
|
substr($pat,$from,$to+$esclen+1) = ""; |
686
|
|
|
|
|
|
|
} |
687
|
|
|
|
|
|
|
} |
688
|
|
|
|
|
|
|
|
689
|
0
|
0
|
|
|
|
|
for my $esc ($neg ? qw(\c \x \0) : qw(\C \X)) { |
690
|
0
|
|
|
|
|
|
my $esclen = length($esc)+1; |
691
|
0
|
|
|
|
|
|
while (1) { |
692
|
0
|
|
|
|
|
|
my $from = index($pat, $esc.'['); |
693
|
0
|
0
|
|
|
|
|
last unless $from >= 0; |
694
|
0
|
|
|
|
|
|
my $to = index(substr($pat,$from+$esclen),']'); |
695
|
0
|
0
|
0
|
|
|
|
error("Empty $esc\[...]") and last if $to == 0; |
696
|
0
|
|
|
|
|
|
push @{$result[$cs_excl]}, |
|
0
|
|
|
|
|
|
|
697
|
0
|
|
|
|
|
|
join "", map { $trans{lc $esc}() } |
698
|
0
|
|
|
|
|
|
map { split ';', $_ } |
699
|
0
|
|
|
|
|
|
map { split '; ', $_ } |
700
|
|
|
|
|
|
|
substr($pat,$from+$esclen,$to); |
701
|
0
|
|
|
|
|
|
substr($pat,$from,$to+$esclen+1) = ""; |
702
|
|
|
|
|
|
|
} |
703
|
|
|
|
|
|
|
} |
704
|
|
|
|
|
|
|
|
705
|
0
|
0
|
|
|
|
|
unshift @{$result[$neg ? $cs_excl : $cs_incl]}, $pat unless $pat eq '[]'; |
|
0
|
0
|
|
|
|
|
|
706
|
0
|
|
|
|
|
|
return \@result; |
707
|
|
|
|
|
|
|
} |
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
# Handle declaration of (...) captures |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
sub capture_external { |
713
|
0
|
0
|
|
0
|
|
|
return unless @mark; # Implies unbalanced closing paren |
714
|
0
|
|
|
|
|
|
$raw .= ')'; |
715
|
0
|
|
|
|
|
|
my $mark = pop @mark; |
716
|
0
|
|
|
|
|
|
my $index = $capindex[-1]; |
717
|
0
|
|
|
|
|
|
my $sigil = substr($index,0,1); |
718
|
0
|
0
|
0
|
|
|
|
pop @capindex unless $sigil eq '@' || $sigil eq '%'; |
719
|
0
|
|
|
|
|
|
my $action; |
720
|
0
|
0
|
|
|
|
|
if ($sigil eq '$') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
721
|
0
|
|
|
|
|
|
$action = "\$Perl6::Rules::exvar{scalar}{q{$index}}||=\\$index;" |
722
|
|
|
|
|
|
|
. "local $index = \$^N"; |
723
|
|
|
|
|
|
|
} |
724
|
|
|
|
|
|
|
elsif ($sigil eq '@') { |
725
|
0
|
|
|
|
|
|
$action = "\$Perl6::Rules::exvar{array}{q{$index}}||=\\$index;" |
726
|
|
|
|
|
|
|
. "local $index=$index; Perl6::Rules::apparray(\\$index, \$^N)"; |
727
|
|
|
|
|
|
|
} |
728
|
|
|
|
|
|
|
elsif ($sigil eq '%') { |
729
|
0
|
|
|
|
|
|
$action = "\$Perl6::Rules::exvar{hash}{q{$index}}||=\\$index;" |
730
|
|
|
|
|
|
|
. "local $index=$index; Perl6::Rules::apphash(\\$index, \$^N)"; |
731
|
|
|
|
|
|
|
} |
732
|
|
|
|
|
|
|
else { |
733
|
0
|
|
|
|
|
|
$action = "local \$Perl6::Rules::d0[$index] = \$^N"; |
734
|
|
|
|
|
|
|
} |
735
|
0
|
|
|
|
|
|
splice @p5pat, $mark, @p5pat-$mark, "(@p5pat[$mark..$#p5pat])(?{$action})"; |
736
|
|
|
|
|
|
|
} |
737
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
my %nametype = ( |
739
|
|
|
|
|
|
|
'@' => 'array', |
740
|
|
|
|
|
|
|
'%' => 'hash', |
741
|
|
|
|
|
|
|
'$' => 'scalar', |
742
|
|
|
|
|
|
|
); |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
sub caparraymark_external { |
745
|
0
|
|
|
0
|
|
|
my ($name) = @_; |
746
|
0
|
|
|
|
|
|
debug "Setting $name as capture target"; |
747
|
0
|
|
|
|
|
|
$raw .= "$name := ["; |
748
|
0
|
|
|
|
|
|
push @capindex, $name; |
749
|
0
|
|
|
|
|
|
push @mark, scalar @p5pat; |
750
|
0
|
|
|
|
|
|
my $sigil = substr($name,0,1); |
751
|
0
|
|
|
|
|
|
my $save = "\$Perl6::Rules::exvar{$nametype{$sigil}}{q{$name}}||=\\$name"; |
752
|
0
|
|
|
|
|
|
push @p5pat, "(?{$save;local $name = ($name,undef)})"; |
753
|
|
|
|
|
|
|
} |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
sub caparraymark_internal { |
756
|
0
|
|
|
0
|
|
|
my ($name) = @_; |
757
|
0
|
|
|
|
|
|
debug "Setting $name as capture target"; |
758
|
0
|
|
|
|
|
|
$raw .= "$name := ["; |
759
|
0
|
|
|
|
|
|
push @capindex, $name; |
760
|
0
|
|
|
|
|
|
push @mark, scalar @p5pat; |
761
|
0
|
|
|
|
|
|
my $storage = "\$Perl6::Rules::d0{q{$name}}"; |
762
|
0
|
|
|
|
|
|
push @p5pat, "(?{local $storage = [\@{$storage},undef]})"; |
763
|
|
|
|
|
|
|
} |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
sub endcaparraymark { |
766
|
0
|
0
|
|
0
|
|
|
return unless @mark; # Implies unbalanced closing paren |
767
|
0
|
|
|
|
|
|
$raw .= ']'; |
768
|
0
|
|
|
|
|
|
my $mark = pop @mark; |
769
|
0
|
|
|
|
|
|
my $name = pop @capindex; |
770
|
0
|
|
|
|
|
|
splice @p5pat, $mark, @p5pat-$mark, "(?:@p5pat[$mark..$#p5pat])"; |
771
|
|
|
|
|
|
|
} |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
sub caphashmark_external { |
774
|
0
|
|
|
0
|
|
|
my ($name) = @_; |
775
|
0
|
|
|
|
|
|
debug "Setting $name as capture target"; |
776
|
0
|
|
|
|
|
|
$raw .= "$name := ["; |
777
|
0
|
|
|
|
|
|
push @capindex, $name; |
778
|
0
|
|
|
|
|
|
push @mark, scalar @p5pat; |
779
|
0
|
|
|
|
|
|
my $sigil = substr($name,0,1); |
780
|
0
|
|
|
|
|
|
my $save = "\$Perl6::Rules::exvar{$nametype{$sigil}}{q{$name}}||=\\$name"; |
781
|
0
|
|
|
|
|
|
push @p5pat, "(?{$save;local $name = ($name, ''=>undef)})"; |
782
|
|
|
|
|
|
|
} |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
sub caphashmark_internal { |
785
|
0
|
|
|
0
|
|
|
my ($name) = @_; |
786
|
0
|
|
|
|
|
|
debug "Setting $name as capture target"; |
787
|
0
|
|
|
|
|
|
$raw .= "$name := ["; |
788
|
0
|
|
|
|
|
|
push @capindex, $name; |
789
|
0
|
|
|
|
|
|
push @mark, scalar @p5pat; |
790
|
0
|
|
|
|
|
|
my $storage = "\$Perl6::Rules::d0{q{$name}}"; |
791
|
0
|
|
|
|
|
|
push @p5pat, "(?{local $storage = {\%{$storage}, ''=>undef}})"; |
792
|
|
|
|
|
|
|
} |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
sub endcaphashmark { |
795
|
0
|
0
|
|
0
|
|
|
return unless @mark; # Implies unbalanced closing paren |
796
|
0
|
|
|
|
|
|
$raw .= ']'; |
797
|
0
|
|
|
|
|
|
my $mark = pop @mark; |
798
|
0
|
|
|
|
|
|
my $name = pop @capindex; |
799
|
0
|
|
|
|
|
|
splice @p5pat, $mark, @p5pat-$mark, "(?:@p5pat[$mark..$#p5pat])"; |
800
|
|
|
|
|
|
|
} |
801
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
sub noncapture { |
803
|
0
|
0
|
|
0
|
|
|
return unless @mark; # Implies unbalanced closing square bracket |
804
|
0
|
|
|
|
|
|
$raw .= ']'; |
805
|
0
|
|
|
|
|
|
debug "Closing non-capturing block\n"; |
806
|
0
|
|
|
|
|
|
my $mark = pop @mark; |
807
|
0
|
|
|
|
|
|
splice @p5pat, $mark, @p5pat-$mark, "(?:@p5pat[$mark..$#p5pat])"; |
808
|
0
|
|
|
|
|
|
add_debug $p5pat[-1], "non-capturing block"; |
809
|
|
|
|
|
|
|
} |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
sub rulename { |
812
|
0
|
|
|
0
|
|
|
$rulename = $^N; |
813
|
|
|
|
|
|
|
} |
814
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
sub empty { |
816
|
0
|
|
|
0
|
|
|
error "Empty pattern not allowed (use // or //)"; |
817
|
|
|
|
|
|
|
} |
818
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
sub badrep { |
820
|
0
|
|
|
0
|
|
|
error "Invalid quantifier: /$raw$^N <--HERE .../"; |
821
|
0
|
|
|
|
|
|
$raw .= $^N; |
822
|
|
|
|
|
|
|
} |
823
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
sub invalid { |
825
|
0
|
|
|
0
|
|
|
error "Invalid Perl 6 pattern (perhaps mismatched brackets?): $_[0]"; |
826
|
|
|
|
|
|
|
} |
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
# Parser starts here... |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
our $comment = q{[#][^\n]*}; # Comment |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
our $ws = qr{(?:$comment|\s)+}; # WhiteSpace |
833
|
|
|
|
|
|
|
our $ows = qr{(?:$comment|\s)*}; # Optional WhiteSpace |
834
|
|
|
|
|
|
|
our $ident = qr{(?>[^\W\d]\w*)}; # Identifier |
835
|
|
|
|
|
|
|
our $int = qr{(?:\d+|\$$ident)}; # Integer for range |
836
|
|
|
|
|
|
|
our $rrep = qr{<$int(?:,$int?)?>}; # Range repeater |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
our $qualident = qr{(?> $ident? (?: :: $ident)+ )}x; # Qualified identifier |
839
|
|
|
|
|
|
|
our $callident = qr{(?> $ident (::$ident)* (?:\.$ident)? )}x; |
840
|
|
|
|
|
|
|
# Qualified Perl6 call |
841
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
our $rep = qq{((?:[*+?]|$rrep)[?]?)}; |
843
|
|
|
|
|
|
|
our $orep = qq{((?:[*+?]|$rrep)?[?]?)}; |
844
|
|
|
|
|
|
|
our $stdrep = qq{$orep (?{repeat})}; # Optional repeater |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
our $sliteral = q{[-\w~`!=;"',]}; # Literal for a Slash-delimited pattern |
847
|
|
|
|
|
|
|
our $bliteral = q{[-\w~`!=;"',/]}; # Literal for a Bracketed pattern |
848
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
our $bracket = q{\[|\]|\{|\}|\<|\>|\(|\)}; # Any bracket |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
our $squareblock = qr{ \[ (?: (?> (?:[^][]|\\.)+ ) | (??{$squareblock}) )* \] }x; |
852
|
|
|
|
|
|
|
our $braceblock = qr{ \{ (?: (?> (?:[^{}]|\\.)+ ) | (??{$braceblock}) )* \} }x; |
853
|
|
|
|
|
|
|
our $parenblock = qr{ \( (?: (?> (?:[^()]|\\.)+ ) | (??{$parenblock}) )* \) }x; |
854
|
|
|
|
|
|
|
our $angleblock = qr{ \< (?: (?> (?:[^<>]|\\.)+ ) | (??{$angleblock}) )* \> }x; |
855
|
|
|
|
|
|
|
our $slashblock = qr{ (?> (?:[^/]|\\.)* ) / }x; |
856
|
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
our $delimblock = qr{ $braceblock |
858
|
|
|
|
|
|
|
| $squareblock |
859
|
|
|
|
|
|
|
| $parenblock |
860
|
|
|
|
|
|
|
| $angleblock |
861
|
|
|
|
|
|
|
}x; |
862
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
our $charset = qr{ \[ \]? (?:\\[cCxX]\[ [^]]* \]|\\.|[^]])* \] }xs; |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
our $mods = qr{ (?> (?: : \w+ (?: $parenblock? ) )* ) }x; |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
# These would be preferrable, but are not reliable... |
868
|
|
|
|
|
|
|
# our $newline = qr{(?:\015\012?|[\012\x85\x2028])}; |
869
|
|
|
|
|
|
|
# our $notnewline = qr{(?:(?!\015\012?|[\012\x85\x2028])[\s\S])}; |
870
|
|
|
|
|
|
|
# So we cheat with these instead... |
871
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
our $newline = qr{\n}; |
873
|
|
|
|
|
|
|
our $notnewline = qr{[^\n]}; |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
our %ews = ( # Explicit WhiteSpace |
876
|
|
|
|
|
|
|
q{\h} => q{[ \t\r]}, |
877
|
|
|
|
|
|
|
q{\v} => $newline, |
878
|
|
|
|
|
|
|
q{\s} => q{(?:\015\012?|[\s\012\x85\x2028])}, |
879
|
|
|
|
|
|
|
q{} => q{(?:\s+)}, |
880
|
|
|
|
|
|
|
q{} => q{[ ]}, |
881
|
|
|
|
|
|
|
); |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
our $ews = '(?:' . join('|',map quotemeta, keys %ews) . ')'; |
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
# Internal scalars... |
886
|
|
|
|
|
|
|
our $i_scalar = qr{ (?: \$ \? $ident |
887
|
|
|
|
|
|
|
| \@ \? $ident $squareblock |
888
|
|
|
|
|
|
|
| \% \? $ident $braceblock |
889
|
|
|
|
|
|
|
) |
890
|
|
|
|
|
|
|
(?: \.? (?>$squareblock|$braceblock) )* |
891
|
|
|
|
|
|
|
}x; |
892
|
|
|
|
|
|
|
our $i_array = qr{ \@ \? $ident }x; |
893
|
|
|
|
|
|
|
our $i_hash = qr{ \% \? $ident }x; |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
# External scalars... |
896
|
|
|
|
|
|
|
our $e_scalar = qr{ (?: \$ $qualident |
897
|
|
|
|
|
|
|
| \@ $qualident $squareblock |
898
|
|
|
|
|
|
|
| \% $qualident $braceblock |
899
|
|
|
|
|
|
|
) |
900
|
|
|
|
|
|
|
(?: (?:\.)? (?>$squareblock|$braceblock) )* |
901
|
|
|
|
|
|
|
}x; |
902
|
|
|
|
|
|
|
our $e_array = qr{ \@ $qualident }x; |
903
|
|
|
|
|
|
|
our $e_hash = qr{ \% $qualident }x; |
904
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
# Unqualified externals not allowed... |
906
|
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
our $bad_var = qr{ [\$\@%] $ident (?! :) }x; |
908
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
our $codeblock = qr{ |
910
|
|
|
|
|
|
|
(?{$debug = 2 if $debug}) |
911
|
|
|
|
|
|
|
(\{) (?{mark($^N)}) |
912
|
|
|
|
|
|
|
(?> |
913
|
|
|
|
|
|
|
(?: ($i_scalar) (?{addscalar_internal $^N, 'rw'}) |
914
|
|
|
|
|
|
|
| ($e_scalar) (?{addscalar_external $^N, 'rw'}) |
915
|
|
|
|
|
|
|
| ($i_array) (?{addarray_internal $^N, 'rw'}) |
916
|
|
|
|
|
|
|
| ($e_array) (?{addarray_external $^N, 'rw'}) |
917
|
|
|
|
|
|
|
| ($i_hash) (?{addhash_internal $^N, 'rw'}) |
918
|
|
|
|
|
|
|
| ($e_hash) (?{addhash_external $^N, 'rw'}) |
919
|
|
|
|
|
|
|
| \$ (\d+) (?{add '$'.$^N, "\$Perl6::Rules::d0[$^N]"}) |
920
|
|
|
|
|
|
|
| ((?:\$\^\w+|[^{}\$]|\\[{}\$])+) (?{add $^N, $^N}) |
921
|
|
|
|
|
|
|
| (??{$nestedcodeblock}) |
922
|
|
|
|
|
|
|
)* |
923
|
|
|
|
|
|
|
) |
924
|
|
|
|
|
|
|
(?{$debug = 1 if $debug}) |
925
|
|
|
|
|
|
|
(\}) (?{codeblock($^N)}) |
926
|
|
|
|
|
|
|
| |
927
|
|
|
|
|
|
|
(??{$debug=1 if $debug;'(?!)'}) |
928
|
|
|
|
|
|
|
}x; |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
our $nestedcodeblock = qr{ |
931
|
|
|
|
|
|
|
(\{) (?{mark($^N)}) |
932
|
|
|
|
|
|
|
(?> |
933
|
|
|
|
|
|
|
(?: ($i_scalar) (?{addscalar_internal $^N, 'rw'}) |
934
|
|
|
|
|
|
|
| ($e_scalar) (?{addscalar_external $^N, 'rw'}) |
935
|
|
|
|
|
|
|
| ($i_array) (?{addarray_internal $^N, 'rw'}) |
936
|
|
|
|
|
|
|
| ($e_array) (?{addarray_external $^N, 'rw'}) |
937
|
|
|
|
|
|
|
| ($i_hash) (?{addhash_internal $^N, 'rw'}) |
938
|
|
|
|
|
|
|
| ($e_hash) (?{addhash_external $^N, 'rw'}) |
939
|
|
|
|
|
|
|
| ($bad_var) (?{error("Can't use unqualified variable ($^N)")}) |
940
|
|
|
|
|
|
|
| \$ (\d+) (?{add '$'.$^N, "\$Perl6::Rules::d0[$^N]"}) |
941
|
|
|
|
|
|
|
| ((?:\$\^\w+|[^{}\$]|\\[{}\$])+) (?{add $^N, $^N}) |
942
|
|
|
|
|
|
|
| (??{$nestedcodeblock}) |
943
|
|
|
|
|
|
|
)* |
944
|
|
|
|
|
|
|
) |
945
|
|
|
|
|
|
|
(\}) (?{codeblock($^N,'nested')}) |
946
|
|
|
|
|
|
|
}x; |
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
our $assertblock = qr[ |
949
|
|
|
|
|
|
|
(?{$debug=2 if $debug}) |
950
|
|
|
|
|
|
|
(<\() (?{mark($^N)}) |
951
|
|
|
|
|
|
|
(?> |
952
|
|
|
|
|
|
|
(?: ($i_scalar) (?{addscalar_internal $^N, 'rw'}) |
953
|
|
|
|
|
|
|
| ($e_scalar) (?{addscalar_external $^N, 'rw'}) |
954
|
|
|
|
|
|
|
| ($i_array) (?{addarray_internal $^N, 'rw'}) |
955
|
|
|
|
|
|
|
| ($e_array) (?{addarray_external $^N, 'rw'}) |
956
|
|
|
|
|
|
|
| ($i_hash) (?{addhash_internal $^N, 'rw'}) |
957
|
|
|
|
|
|
|
| ($e_hash) (?{addhash_external $^N, 'rw'}) |
958
|
|
|
|
|
|
|
| ($bad_var) (?{error("Can't use unqualified variable ($^N)")}) |
959
|
|
|
|
|
|
|
| \$ (\d+) (?{add '$'.$^N, "\$Perl6::Rules::d0[$^N]"}) |
960
|
|
|
|
|
|
|
| ((?:\$\^\w+|[^{\)\$]|\\[{\)\$])+) (?{add $^N, $^N}) |
961
|
|
|
|
|
|
|
| (??{$nestedcodeblock}) |
962
|
|
|
|
|
|
|
)* |
963
|
|
|
|
|
|
|
) |
964
|
|
|
|
|
|
|
(?{$debug=1 if $debug}) |
965
|
|
|
|
|
|
|
(\)>) (?{codeblock($^N)}) |
966
|
|
|
|
|
|
|
| |
967
|
|
|
|
|
|
|
(??{$debug=1 if $debug;'(?!)'}) |
968
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
]x; |
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
our $bspat = qr{ # Bracketed and Slashed patterns |
973
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
# Explicit whitespace (possibly repeated)... |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
$ows ($ews) |
977
|
|
|
|
|
|
|
(?{add $^N, $ews{$^N}}) |
978
|
|
|
|
|
|
|
$stdrep $ows |
979
|
|
|
|
|
|
|
|
980
|
|
|
|
|
|
|
# Actual whitespace (insert :words spacing if in appropriate mode)... |
981
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
| $ws |
983
|
|
|
|
|
|
|
(?{wordspace}) |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
# Backreference as literal (interpolated $1, $2, etc.)... |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
| \$ (\d+) |
988
|
|
|
|
|
|
|
(?{add '$'.$^N, "(??{quotemeta \$Perl6::Rules::d0[$^N]})"}) |
989
|
|
|
|
|
|
|
$stdrep |
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
# Interpolated variable as literal... |
992
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
| ($i_scalar) (?{ addscalar_internal $^N, 'quotemeta' }) $stdrep |
994
|
|
|
|
|
|
|
| ($i_array) (?{ addarray_internal $^N, 'quotemeta' }) $stdrep |
995
|
|
|
|
|
|
|
| ($i_hash) (?{ addhash_internal $^N, 'quotemeta' }) $stdrep |
996
|
|
|
|
|
|
|
| ($e_scalar) (?{ addscalar_external $^N, 'quotemeta' }) $stdrep |
997
|
|
|
|
|
|
|
| ($e_array) (?{ addarray_external $^N, 'quotemeta' }) $stdrep |
998
|
|
|
|
|
|
|
| ($e_hash) (?{ addhash_external $^N, 'quotemeta' }) $stdrep |
999
|
|
|
|
|
|
|
| ($bad_var) (?{error("Can't use unqualified variable ($^N)")}) |
1000
|
|
|
|
|
|
|
|
1001
|
|
|
|
|
|
|
# Character class... |
1002
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
| < (?: ([+-]?) (?{$^N||""}) ($charset) |
1004
|
|
|
|
|
|
|
(?{mark ""; add "<$^R.$^N", transcharset($^N, $^R)}) |
1005
|
|
|
|
|
|
|
| ([+-]?) (?{$^N||""}) < ([-!]? $ident) > |
1006
|
|
|
|
|
|
|
(?{mark ""; add "<$^R.$^N", getprop($^N, $^R)}) |
1007
|
|
|
|
|
|
|
) |
1008
|
|
|
|
|
|
|
(?: ([+-]?) (?{$^N||""}) ($charset) |
1009
|
|
|
|
|
|
|
(?{add "$^R.$^N", transcharset($^N, $^R)}) |
1010
|
|
|
|
|
|
|
| ([+-]?) (?{$^N||""}) < ([-!]? $ident) > |
1011
|
|
|
|
|
|
|
(?{add "$^R.$^N", getprop($^N, $^R)}) |
1012
|
|
|
|
|
|
|
)* |
1013
|
|
|
|
|
|
|
> |
1014
|
|
|
|
|
|
|
(?{make_charset}) |
1015
|
|
|
|
|
|
|
$stdrep |
1016
|
|
|
|
|
|
|
|
1017
|
|
|
|
|
|
|
# <(...)> assertion block... |
1018
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
| $assertblock |
1020
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
# Backreference as pattern (interpolated <$1>, <$2>, etc.)... |
1022
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
| <(\$ \d+)> |
1024
|
|
|
|
|
|
|
(?{add "<$^N>", error("Cannot interpolate $^N as pattern")}) |
1025
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
# Interpolate variable as pattern... |
1027
|
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
| <($i_scalar)> (?{addscalar_internal $^N}) $stdrep |
1029
|
|
|
|
|
|
|
| <($i_array)> (?{addarray_internal $^N}) $stdrep |
1030
|
|
|
|
|
|
|
| <($i_hash)> (?{addhash_internal $^N}) $stdrep |
1031
|
|
|
|
|
|
|
| <($e_scalar)> (?{addscalar_external $^N}) $stdrep |
1032
|
|
|
|
|
|
|
| <($e_array)> (?{addarray_external $^N}) $stdrep |
1033
|
|
|
|
|
|
|
| <($e_hash)> (?{addhash_external $^N}) $stdrep |
1034
|
|
|
|
|
|
|
| <($bad_var)> (?{error("Can't use unqualified variable (<$^N>)")}) |
1035
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
# Code block as action... |
1037
|
|
|
|
|
|
|
|
1038
|
|
|
|
|
|
|
| $codeblock |
1039
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
# Code block as interpolated pattern... |
1041
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
| <($braceblock)> |
1043
|
|
|
|
|
|
|
(?{add $^N, "(??{Perl6::Rules::ispat do$^N})"}) |
1044
|
|
|
|
|
|
|
|
1045
|
|
|
|
|
|
|
# Literal in <'...'> format... |
1046
|
|
|
|
|
|
|
|
1047
|
|
|
|
|
|
|
| <' ( [^'\\]* (\\. [^'\\])* ) '> |
1048
|
|
|
|
|
|
|
(?{add "<'$^N'>", "\Q$^N\E"}) |
1049
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
# Match any Unicode character, regardless of :uN level... |
1051
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
| (< \. >) |
1053
|
|
|
|
|
|
|
(?{add $^N, '(?:\X)'}) |
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
# Match newline or anything-but-newline... |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
| \\n |
1058
|
|
|
|
|
|
|
(?{add '\n', $newline}) $stdrep |
1059
|
|
|
|
|
|
|
| \\N |
1060
|
|
|
|
|
|
|
(?{add '\N', $notnewline}) $stdrep |
1061
|
|
|
|
|
|
|
|
1062
|
|
|
|
|
|
|
# Quotemeta's literal (\Q[...])... |
1063
|
|
|
|
|
|
|
|
1064
|
|
|
|
|
|
|
| \\Q ( $squareblock ) |
1065
|
|
|
|
|
|
|
(?{add "\\Q$^N", quotemeta substr($^N,1,-1)}) $stdrep |
1066
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
# Named and numbered characters (\c[...], \C[...], \x[...], \0[...], etc)... |
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
| ( \\[cCxX0] $squareblock |
1070
|
|
|
|
|
|
|
| \\[xX][0-9A-Fa-f]+ |
1071
|
|
|
|
|
|
|
| \\0[0-7]+ |
1072
|
|
|
|
|
|
|
) |
1073
|
|
|
|
|
|
|
(?{add $^N, transchars($^N)}) $stdrep |
1074
|
|
|
|
|
|
|
|
1075
|
|
|
|
|
|
|
| (\\[cCxX0] \[) (?{$^N}) ((?>.*)) |
1076
|
|
|
|
|
|
|
(?{error "Untermimated $^R...] escape: $^R$^N"}) |
1077
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
# Literal dot... |
1079
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
| (\\.) |
1081
|
|
|
|
|
|
|
(?{add $^N, $^N}) $stdrep |
1082
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
# Backtracking limiter... |
1084
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
| : (?=\s|\z) |
1086
|
|
|
|
|
|
|
(?{nobacktrack}) |
1087
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
# Lexical insensitivity... |
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
| :i |
1091
|
|
|
|
|
|
|
(?{add ":i", '(?i)'}) |
1092
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
# Continuation marker... |
1094
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
| :c (?{add '\G'}) |
1096
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
# Other lexical flags (NOT YET IMPLEMENTED)... |
1098
|
|
|
|
|
|
|
|
1099
|
|
|
|
|
|
|
| :(u0|u1|u2|u3|w|p5) |
1100
|
|
|
|
|
|
|
(?{error "In-pattern :$^N not yet implemented"}) |
1101
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
# Match any character... |
1103
|
|
|
|
|
|
|
|
1104
|
|
|
|
|
|
|
| \. |
1105
|
|
|
|
|
|
|
(?{add '.', '[\s\S]'}) $stdrep |
1106
|
|
|
|
|
|
|
|
1107
|
|
|
|
|
|
|
# Start of line marker... |
1108
|
|
|
|
|
|
|
|
1109
|
|
|
|
|
|
|
| \^\^ |
1110
|
|
|
|
|
|
|
(?{add '^^', '(?:(?<=\n)|(?<=\A))'}) |
1111
|
|
|
|
|
|
|
|
1112
|
|
|
|
|
|
|
# End of line marker... |
1113
|
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
| \$\$ |
1115
|
|
|
|
|
|
|
(?{add '$$', '(?:(?<=\n)|(?=\z))'}) |
1116
|
|
|
|
|
|
|
|
1117
|
|
|
|
|
|
|
# Start of string marker... |
1118
|
|
|
|
|
|
|
|
1119
|
|
|
|
|
|
|
| \^ |
1120
|
|
|
|
|
|
|
(?{add '^', '\A'}) |
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
# End of string marker... |
1123
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
| \$ |
1125
|
|
|
|
|
|
|
(?{add '$', '\z'}) |
1126
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
# Non-capturing subrule or property... |
1128
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
| < ($callident) > |
1130
|
|
|
|
|
|
|
(?{subrule($^N,"")}) $stdrep |
1131
|
|
|
|
|
|
|
|
1132
|
|
|
|
|
|
|
| < - ($callident) > |
1133
|
|
|
|
|
|
|
(?{subrule($^N,"","","-")}) $stdrep |
1134
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
| < ! ($callident) > |
1136
|
|
|
|
|
|
|
(?{subrule($^N,"","","!")}) $stdrep |
1137
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
# Capturing subrule... |
1139
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
| < \? ($callident) > |
1141
|
|
|
|
|
|
|
(?{$Perl6::Rules::srname=$^N}) |
1142
|
|
|
|
|
|
|
$ows $rep |
1143
|
|
|
|
|
|
|
(?{ subrule($Perl6::Rules::srname, $^N, "cap")}) |
1144
|
|
|
|
|
|
|
| < \? ($callident) > |
1145
|
|
|
|
|
|
|
(?{ subrule($^N, "", "cap")}) |
1146
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
# Alternative marker... |
1148
|
|
|
|
|
|
|
|
1149
|
|
|
|
|
|
|
| \| |
1150
|
|
|
|
|
|
|
(?{alternative}) |
1151
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
# Comment... |
1153
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
| $comment |
1155
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
# Unattached repetition marker... |
1157
|
|
|
|
|
|
|
|
1158
|
|
|
|
|
|
|
| ($orep) |
1159
|
|
|
|
|
|
|
(?{$^N&&badrep()}) |
1160
|
|
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
}x; |
1162
|
|
|
|
|
|
|
|
1163
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
our $spat = qr{ # Slash-delimited pattern |
1165
|
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
# A literal character... |
1167
|
|
|
|
|
|
|
|
1168
|
|
|
|
|
|
|
(?: ($sliteral) |
1169
|
|
|
|
|
|
|
(?{add $^N, $^N}) $stdrep |
1170
|
|
|
|
|
|
|
|
1171
|
|
|
|
|
|
|
# Lookahead and lookbehind... |
1172
|
|
|
|
|
|
|
|
1173
|
|
|
|
|
|
|
| (
|
1174
|
|
|
|
|
|
|
(?{mark $^N}) |
1175
|
|
|
|
|
|
|
(??{$spat}) > |
1176
|
|
|
|
|
|
|
(?{lookaround('(?=')}) |
1177
|
|
|
|
|
|
|
|
1178
|
|
|
|
|
|
|
| (
|
1179
|
|
|
|
|
|
|
(?{mark $^N}) |
1180
|
|
|
|
|
|
|
(??{$spat}) > |
1181
|
|
|
|
|
|
|
(?{lookaround('(?!')}) |
1182
|
|
|
|
|
|
|
|
1183
|
|
|
|
|
|
|
| (
|
1184
|
|
|
|
|
|
|
(?{mark $^N}) |
1185
|
|
|
|
|
|
|
(??{$spat}) > |
1186
|
|
|
|
|
|
|
(?{lookaround('(?<=')}) |
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
| (
|
1189
|
|
|
|
|
|
|
(?{mark $^N}) |
1190
|
|
|
|
|
|
|
(??{$spat}) > |
1191
|
|
|
|
|
|
|
(?{lookaround('(?
|
1192
|
|
|
|
|
|
|
|
1193
|
|
|
|
|
|
|
# Named capture... |
1194
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
| \$ (\d+) $ows := $ows \( |
1196
|
|
|
|
|
|
|
(?{setcapindex $^N; capmark}) |
1197
|
|
|
|
|
|
|
(??{$spat}) \) |
1198
|
|
|
|
|
|
|
(?{capture_internal $^R}) |
1199
|
|
|
|
|
|
|
|
1200
|
|
|
|
|
|
|
| ($i_scalar) $ows := $ows \( |
1201
|
|
|
|
|
|
|
(?{capmark $^N}) |
1202
|
|
|
|
|
|
|
(??{$spat}) \) |
1203
|
|
|
|
|
|
|
(?{capture_internal $^R}) |
1204
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
| ($e_scalar) $ows := $ows \( |
1206
|
|
|
|
|
|
|
(?{capmark $^N}) |
1207
|
|
|
|
|
|
|
(??{$spat}) \) |
1208
|
|
|
|
|
|
|
(?{capture_external $^R}) |
1209
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
| ($i_array) $ows := $ows \( |
1211
|
|
|
|
|
|
|
(?{caparraymark_internal $^N; capmark;}) |
1212
|
|
|
|
|
|
|
(??{$spat}) \) |
1213
|
|
|
|
|
|
|
(?{capture_internal; endcaparraymark;}) |
1214
|
|
|
|
|
|
|
$stdrep |
1215
|
|
|
|
|
|
|
|
1216
|
|
|
|
|
|
|
| ($e_array) $ows := $ows \( |
1217
|
|
|
|
|
|
|
(?{caparraymark_external $^N; capmark;}) |
1218
|
|
|
|
|
|
|
(??{$spat}) \) |
1219
|
|
|
|
|
|
|
(?{capture_external; endcaparraymark;}) |
1220
|
|
|
|
|
|
|
$stdrep |
1221
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
| ($i_array) $ows := $ows \[ |
1223
|
|
|
|
|
|
|
(?{caparraymark_internal $^N}) |
1224
|
|
|
|
|
|
|
(??{$spat}) \] |
1225
|
|
|
|
|
|
|
(?{endcaparraymark}) |
1226
|
|
|
|
|
|
|
$stdrep |
1227
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
| ($e_array) $ows := $ows \[ |
1229
|
|
|
|
|
|
|
(?{caparraymark_external $^N}) |
1230
|
|
|
|
|
|
|
(??{$spat}) \] |
1231
|
|
|
|
|
|
|
(?{endcaparraymark}) |
1232
|
|
|
|
|
|
|
$stdrep |
1233
|
|
|
|
|
|
|
|
1234
|
|
|
|
|
|
|
| ($i_hash) $ows := $ows \( |
1235
|
|
|
|
|
|
|
(?{caphashmark_internal $^N; capmark;}) |
1236
|
|
|
|
|
|
|
(??{$spat}) \) |
1237
|
|
|
|
|
|
|
(?{capture_internal; endcaphashmark;}) |
1238
|
|
|
|
|
|
|
$stdrep |
1239
|
|
|
|
|
|
|
|
1240
|
|
|
|
|
|
|
| ($e_hash) $ows := $ows \( |
1241
|
|
|
|
|
|
|
(?{caphashmark_external $^N; capmark;}) |
1242
|
|
|
|
|
|
|
(??{$spat}) \) |
1243
|
|
|
|
|
|
|
(?{capture_external; endcaphashmark;}) |
1244
|
|
|
|
|
|
|
$stdrep |
1245
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
| ($i_hash) $ows := $ows \[ |
1247
|
|
|
|
|
|
|
(?{caphashmark_internal $^N}) |
1248
|
|
|
|
|
|
|
(??{$spat}) \] |
1249
|
|
|
|
|
|
|
(?{endcaphashmark}) |
1250
|
|
|
|
|
|
|
$stdrep |
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
| ($e_hash) $ows := $ows \[ |
1253
|
|
|
|
|
|
|
(?{caphashmark_external $^N}) |
1254
|
|
|
|
|
|
|
(??{$spat}) \] |
1255
|
|
|
|
|
|
|
(?{endcaphashmark}) |
1256
|
|
|
|
|
|
|
$stdrep |
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
# Other non-delimiter-specific constructs (as defined by $bspat above)... |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
| $bspat |
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
# Nameless capture... |
1263
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
| \( |
1265
|
|
|
|
|
|
|
(?{capmark}) |
1266
|
|
|
|
|
|
|
(??{$spat}) \) |
1267
|
|
|
|
|
|
|
(?{capture_internal}) |
1268
|
|
|
|
|
|
|
$stdrep |
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
# Non-capturing group... |
1271
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
| \[ |
1273
|
|
|
|
|
|
|
(?{mark '['}) |
1274
|
|
|
|
|
|
|
(??{$spat}) \] |
1275
|
|
|
|
|
|
|
(?{noncapture}) |
1276
|
|
|
|
|
|
|
$stdrep |
1277
|
|
|
|
|
|
|
)+ |
1278
|
|
|
|
|
|
|
}x; |
1279
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
our $bpat = qr{ # Bracketed pattern |
1281
|
|
|
|
|
|
|
(?: |
1282
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
# A literal character... |
1284
|
|
|
|
|
|
|
|
1285
|
|
|
|
|
|
|
($bliteral) (?{add $^N, $^N}) $stdrep |
1286
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
# Lookahead and lookbehind... |
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
| (
|
1290
|
|
|
|
|
|
|
(?{mark $^N}) |
1291
|
|
|
|
|
|
|
(??{$bpat}) > |
1292
|
|
|
|
|
|
|
(?{lookaround('(?=')}) |
1293
|
|
|
|
|
|
|
|
1294
|
|
|
|
|
|
|
| (
|
1295
|
|
|
|
|
|
|
(?{mark $^N}) |
1296
|
|
|
|
|
|
|
(??{$bpat}) > |
1297
|
|
|
|
|
|
|
(?{lookaround('(?!')}) |
1298
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
| (
|
1300
|
|
|
|
|
|
|
(?{mark $^N}) |
1301
|
|
|
|
|
|
|
(??{$bpat}) > |
1302
|
|
|
|
|
|
|
(?{lookaround('(?<=')}) |
1303
|
|
|
|
|
|
|
|
1304
|
|
|
|
|
|
|
| (
|
1305
|
|
|
|
|
|
|
(?{mark $^N}) |
1306
|
|
|
|
|
|
|
(??{$bpat}) > |
1307
|
|
|
|
|
|
|
(?{lookaround('(?
|
1308
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
# Named capture... |
1310
|
|
|
|
|
|
|
|
1311
|
|
|
|
|
|
|
| \$ (\d+) $ows := $ows \( |
1312
|
|
|
|
|
|
|
(?{setcapindex $^N; capmark}) |
1313
|
|
|
|
|
|
|
(??{$bpat}) \) |
1314
|
|
|
|
|
|
|
(?{capture_internal $^R}) |
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
| ($i_scalar) $ows := $ows \( |
1317
|
|
|
|
|
|
|
(?{capmark $^N}) |
1318
|
|
|
|
|
|
|
(??{$bpat}) \) |
1319
|
|
|
|
|
|
|
(?{capture_internal $^R}) |
1320
|
|
|
|
|
|
|
|
1321
|
|
|
|
|
|
|
| ($e_scalar) $ows := $ows \( |
1322
|
|
|
|
|
|
|
(?{capmark $^N}) |
1323
|
|
|
|
|
|
|
(??{$bpat}) \) |
1324
|
|
|
|
|
|
|
(?{capture_external $^R}) |
1325
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
| ($i_array) $ows := $ows \( |
1327
|
|
|
|
|
|
|
(?{caparraymark_internal $^N; capmark;}) |
1328
|
|
|
|
|
|
|
(??{$bpat}) \) |
1329
|
|
|
|
|
|
|
(?{capture_internal; endcaparraymark;}) |
1330
|
|
|
|
|
|
|
$stdrep |
1331
|
|
|
|
|
|
|
|
1332
|
|
|
|
|
|
|
| ($e_array) $ows := $ows \( |
1333
|
|
|
|
|
|
|
(?{caparraymark_external $^N; capmark;}) |
1334
|
|
|
|
|
|
|
(??{$bpat}) \) |
1335
|
|
|
|
|
|
|
(?{capture_external; endcaparraymark;}) |
1336
|
|
|
|
|
|
|
$stdrep |
1337
|
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
|
| ($i_array) $ows := $ows \[ |
1339
|
|
|
|
|
|
|
(?{caparraymark_internal $^N}) |
1340
|
|
|
|
|
|
|
(??{$bpat}) \] |
1341
|
|
|
|
|
|
|
(?{endcaparraymark}) |
1342
|
|
|
|
|
|
|
$stdrep |
1343
|
|
|
|
|
|
|
|
1344
|
|
|
|
|
|
|
| ($e_array) $ows := $ows \[ |
1345
|
|
|
|
|
|
|
(?{caparraymark_external $^N}) |
1346
|
|
|
|
|
|
|
(??{$bpat}) \] |
1347
|
|
|
|
|
|
|
(?{endcaparraymark}) |
1348
|
|
|
|
|
|
|
$stdrep |
1349
|
|
|
|
|
|
|
|
1350
|
|
|
|
|
|
|
# Other non-delimiter-specific constructs (as defined by $bspat above)... |
1351
|
|
|
|
|
|
|
|
1352
|
|
|
|
|
|
|
| $bspat |
1353
|
|
|
|
|
|
|
|
1354
|
|
|
|
|
|
|
# Nameless capture... |
1355
|
|
|
|
|
|
|
|
1356
|
|
|
|
|
|
|
| \( |
1357
|
|
|
|
|
|
|
(?{capmark}) |
1358
|
|
|
|
|
|
|
(??{$bpat}) \) |
1359
|
|
|
|
|
|
|
(?{capture_internal}) |
1360
|
|
|
|
|
|
|
$stdrep |
1361
|
|
|
|
|
|
|
|
1362
|
|
|
|
|
|
|
# Non-capturing group... |
1363
|
|
|
|
|
|
|
|
1364
|
|
|
|
|
|
|
| \[ |
1365
|
|
|
|
|
|
|
(?{mark '['}) |
1366
|
|
|
|
|
|
|
(??{$bpat}) \] |
1367
|
|
|
|
|
|
|
(?{noncapture}) |
1368
|
|
|
|
|
|
|
$stdrep |
1369
|
|
|
|
|
|
|
)+ |
1370
|
|
|
|
|
|
|
}x; |
1371
|
|
|
|
|
|
|
|
1372
|
|
|
|
|
|
|
our $rx = qr{ |
1373
|
|
|
|
|
|
|
|
1374
|
|
|
|
|
|
|
# Nameless rule requires {...} delimiters... |
1375
|
|
|
|
|
|
|
|
1376
|
|
|
|
|
|
|
\b rule $ows ($mods) (?{mods}) $ows |
1377
|
|
|
|
|
|
|
(?> (\{) (?{delim}) (?: $ows \} (?{empty})|$bpat \} ) |
1378
|
|
|
|
|
|
|
| ( (?:\{|\[|\<|/) .*) (?{invalid('rx'.$^N)}) |
1379
|
|
|
|
|
|
|
) |
1380
|
|
|
|
|
|
|
|
1381
|
|
|
|
|
|
|
# Nameless rx allows /.../ or any bracket delimiters... |
1382
|
|
|
|
|
|
|
|
1383
|
|
|
|
|
|
|
| \b rx $ows ($mods) (?{mods}) $ows |
1384
|
|
|
|
|
|
|
(?> (\{) (?{delim}) (?: $ows \} (?{empty})|$bpat \} ) |
1385
|
|
|
|
|
|
|
| (\[) (?{delim}) (?: $ows \] (?{empty})|$bpat \] ) |
1386
|
|
|
|
|
|
|
| (\<) (?{delim}) (?: $ows \> (?{empty})|$bpat \> ) |
1387
|
|
|
|
|
|
|
| (/) (?{delim}) (?: $ows / (?{empty})|$spat / ) |
1388
|
|
|
|
|
|
|
| ( (?:\{|\[|\<|/) .*) (?{invalid('rx'.$^N)}) |
1389
|
|
|
|
|
|
|
) |
1390
|
|
|
|
|
|
|
}x; |
1391
|
|
|
|
|
|
|
|
1392
|
|
|
|
|
|
|
|
1393
|
|
|
|
|
|
|
our $match = qr{ |
1394
|
|
|
|
|
|
|
|
1395
|
|
|
|
|
|
|
# Match construct allows /.../ or any bracket delimiters... |
1396
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
\b m \b $ows ($mods) (?{mods}) $ows |
1398
|
|
|
|
|
|
|
(?> (\{) (?{delim}) (?: $ows \} (?{empty})|$bpat \} ) |
1399
|
|
|
|
|
|
|
| (\[) (?{delim}) (?: $ows \] (?{empty})|$bpat \] ) |
1400
|
|
|
|
|
|
|
| (\<) (?{delim}) (?: $ows \> (?{empty})|$bpat \> ) |
1401
|
|
|
|
|
|
|
| (/) (?{delim}) (?: $ows / (?{empty})|$spat / ) |
1402
|
|
|
|
|
|
|
| ( (?:\{|\[|\<|/) .*) (?{invalid('m'.$^N)}) |
1403
|
|
|
|
|
|
|
) |
1404
|
|
|
|
|
|
|
}x; |
1405
|
|
|
|
|
|
|
|
1406
|
|
|
|
|
|
|
our $subst = qr{ |
1407
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
# Substitution construct allows /.../ or any bracket delimiters... |
1409
|
|
|
|
|
|
|
|
1410
|
|
|
|
|
|
|
\b s $ows ($mods) (?{mods}) $ows |
1411
|
|
|
|
|
|
|
(?> (\{) (?{delim}) (?: $ows \} (?{empty})|$bpat \} ) ($delimblock) |
1412
|
|
|
|
|
|
|
| (\[) (?{delim}) (?: $ows \] (?{empty})|$bpat \] ) ($delimblock) |
1413
|
|
|
|
|
|
|
| (\<) (?{delim}) (?: $ows \> (?{empty})|$bpat \> ) ($delimblock) |
1414
|
|
|
|
|
|
|
| (/) (?{delim}) (?: $ows / (?{empty})|$spat / ) ($slashblock|$delimblock) |
1415
|
|
|
|
|
|
|
| ( (?:\{|\[|\<|/) .*) (?{invalid('s'.$^N)}) |
1416
|
|
|
|
|
|
|
) |
1417
|
|
|
|
|
|
|
}x; |
1418
|
|
|
|
|
|
|
|
1419
|
|
|
|
|
|
|
|
1420
|
|
|
|
|
|
|
# Unimplemented modifiers... |
1421
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
my %unimpl; |
1423
|
|
|
|
|
|
|
@unimpl{qw(u0 u1 u2 u3 once p5 perl5)} = (); |
1424
|
|
|
|
|
|
|
|
1425
|
|
|
|
|
|
|
# Limit interpolated rules to precompiled regexes |
1426
|
|
|
|
|
|
|
# (at least, until recursive rule translation can be made to work)... |
1427
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
sub ispat { |
1429
|
0
|
|
|
0
|
|
|
my ($pat) = @_; |
1430
|
0
|
0
|
0
|
|
|
|
warn qq{Cannot interpolate raw string "$pat" as pattern\n} and exit(1) |
|
|
|
0
|
|
|
|
|
1431
|
|
|
|
|
|
|
unless (ref($pat)||"") eq 'Regexp'; |
1432
|
0
|
|
|
|
|
|
return $pat; |
1433
|
|
|
|
|
|
|
} |
1434
|
|
|
|
|
|
|
|
1435
|
|
|
|
|
|
|
sub arepat { |
1436
|
0
|
|
|
0
|
|
|
ispat($_) for @_; |
1437
|
|
|
|
|
|
|
} |
1438
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
# Extract and translate rule modifiers |
1441
|
|
|
|
|
|
|
|
1442
|
|
|
|
|
|
|
sub mods { |
1443
|
0
|
|
|
0
|
|
|
my @mods = split ":", $^N; |
1444
|
0
|
|
|
|
|
|
while (defined(my $mod = shift @mods)) { |
1445
|
0
|
0
|
0
|
|
|
|
if ($mod eq 'words' || $mod eq 'w') { |
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
1446
|
0
|
|
|
|
|
|
debug "Found skip-whitespace modifier (:$mod)\n"; |
1447
|
0
|
|
|
|
|
|
$mod{words} = 1; |
1448
|
|
|
|
|
|
|
} |
1449
|
|
|
|
|
|
|
elsif ($mod eq 'globally' || $mod eq 'g') { |
1450
|
0
|
|
|
|
|
|
debug "Found match-all modifier (:$mod)\n"; |
1451
|
0
|
|
|
|
|
|
$mod{post}{g} = 1; |
1452
|
|
|
|
|
|
|
} |
1453
|
|
|
|
|
|
|
elsif ($mod eq 'exhaustive' || $mod eq 'e') { |
1454
|
0
|
|
|
|
|
|
debug "Found match-every-way modifier (:$mod)\n"; |
1455
|
0
|
|
|
|
|
|
$mod{exhaust} = 1; |
1456
|
0
|
0
|
|
|
|
|
error "Can't specify both :exhaustive and :overlap on the same rule" |
1457
|
|
|
|
|
|
|
if $mod{overlap}; |
1458
|
|
|
|
|
|
|
} |
1459
|
|
|
|
|
|
|
elsif ($mod eq 'overlap' || $mod eq 'o') { |
1460
|
0
|
|
|
|
|
|
debug "Found match-overlapping modifier (:$mod)\n"; |
1461
|
0
|
|
|
|
|
|
$mod{overlap} = 1; |
1462
|
0
|
0
|
|
|
|
|
error "Can't specify both :exhaustive and :overlap on the same rule" |
1463
|
|
|
|
|
|
|
if $mod{exhaust}; |
1464
|
|
|
|
|
|
|
} |
1465
|
|
|
|
|
|
|
elsif ($mod eq 'ignore' || $mod eq 'i') { |
1466
|
0
|
|
|
|
|
|
debug "Found ignore-case modifier (:$mod)\n"; |
1467
|
0
|
|
|
|
|
|
$mod{post}{i} = 1; |
1468
|
|
|
|
|
|
|
} |
1469
|
|
|
|
|
|
|
elsif ($mod eq 'cont' || $mod eq 'c') { |
1470
|
0
|
|
|
|
|
|
debug "Found match continuation modifier (:$mod)\n"; |
1471
|
0
|
|
|
|
|
|
$mod{pre}{'\G'} = 1; |
1472
|
0
|
|
|
|
|
|
$mod{post}{gc} = 1; |
1473
|
|
|
|
|
|
|
} |
1474
|
|
|
|
|
|
|
elsif (length $mod > 5 && substr($mod,0,4) eq 'nth(' && substr($mod,-1) eq ')') { |
1475
|
0
|
|
|
|
|
|
$mod{nth} = substr($mod,4,-1); |
1476
|
0
|
|
|
|
|
|
debug "Found $mod{nth}th repetition modifier (:$mod)\n"; |
1477
|
|
|
|
|
|
|
} |
1478
|
|
|
|
|
|
|
elsif ( length $mod > 2 && substr($mod,0,1) ne 'n' && |
1479
|
|
|
|
|
|
|
(substr($mod,-2) eq 'th' || substr($mod,-2) eq 'st' |
1480
|
|
|
|
|
|
|
|| substr($mod,-2) eq 'nd' || substr($mod,-2) eq 'rd') |
1481
|
|
|
|
|
|
|
) { |
1482
|
0
|
|
|
|
|
|
$mod{nth} = substr($mod,0,-2); |
1483
|
0
|
|
|
|
|
|
debug "Found $mod{nth}th repetition modifier (:$mod)\n"; |
1484
|
|
|
|
|
|
|
} |
1485
|
|
|
|
|
|
|
elsif (length $mod > 3 && substr($mod,0,2) eq 'x(' && substr($mod,-1) eq ')' ) { |
1486
|
0
|
|
|
|
|
|
$mod{rep} = substr($mod,2,-1); |
1487
|
0
|
|
|
|
|
|
debug "Found $mod{rep}-times repetition modifier (:$mod)\n"; |
1488
|
|
|
|
|
|
|
} |
1489
|
|
|
|
|
|
|
elsif (length $mod > 1 && substr($mod,-1) eq 'x' ) { |
1490
|
0
|
|
|
|
|
|
$mod{rep} = substr($mod,0,-1); |
1491
|
0
|
|
|
|
|
|
debug "Found $mod{rep}-times repetition modifier (:$mod)\n"; |
1492
|
|
|
|
|
|
|
} |
1493
|
|
|
|
|
|
|
elsif (substr($mod,0,3) eq "nth" or substr($mod,0,1) eq "x") { |
1494
|
0
|
|
|
|
|
|
error "Missing parens after :$mod modifier"; |
1495
|
|
|
|
|
|
|
} |
1496
|
|
|
|
|
|
|
elsif (exists $unimpl{$mod}) { |
1497
|
0
|
|
|
|
|
|
error "The :$mod modifier is not currently implemented"; |
1498
|
|
|
|
|
|
|
} |
1499
|
|
|
|
|
|
|
elsif (length($mod) > 1) { |
1500
|
0
|
|
|
|
|
|
debug "Unknown modifier (:$mod). Trying to split.\n"; |
1501
|
0
|
|
|
|
|
|
unshift @mods, substr($mod,-1,1,"") while length $mod; |
1502
|
|
|
|
|
|
|
} |
1503
|
|
|
|
|
|
|
elsif (length $mod) { |
1504
|
0
|
|
|
|
|
|
error "Unknown modifier (:$mod)" |
1505
|
|
|
|
|
|
|
} |
1506
|
|
|
|
|
|
|
} |
1507
|
|
|
|
|
|
|
} |
1508
|
|
|
|
|
|
|
|
1509
|
|
|
|
|
|
|
# STD RULES AND PROPERTIES |
1510
|
|
|
|
|
|
|
|
1511
|
|
|
|
|
|
|
my %stdrules = ( |
1512
|
|
|
|
|
|
|
alpha => qr{[^\W\d_]}, |
1513
|
|
|
|
|
|
|
-alpha => qr{[\W\d_]}, |
1514
|
|
|
|
|
|
|
space => qr{\s}, |
1515
|
|
|
|
|
|
|
-space => qr{\S}, |
1516
|
|
|
|
|
|
|
digit => qr{\d}, |
1517
|
|
|
|
|
|
|
-digit => qr{\D}, |
1518
|
|
|
|
|
|
|
alnum => qr{[^\W_]}, |
1519
|
|
|
|
|
|
|
-alnum => qr{[\W_]}, |
1520
|
|
|
|
|
|
|
ascii => qr{\p{InBasicLatin}}, |
1521
|
|
|
|
|
|
|
-ascii => qr{\P{InBasicLatin}}, |
1522
|
|
|
|
|
|
|
blank => qr{[ \t\r]}, |
1523
|
|
|
|
|
|
|
-blank => qr{[^ \t\r]}, |
1524
|
|
|
|
|
|
|
cntrl => qr{[[:cntrl:]]}, |
1525
|
|
|
|
|
|
|
-cntrl => qr{[^[:cntrl:]]}, |
1526
|
|
|
|
|
|
|
ctrl => qr{[[:cntrl:]]}, |
1527
|
|
|
|
|
|
|
-ctrl => qr{[^[:cntrl:]]}, |
1528
|
|
|
|
|
|
|
graph => qr{[[:graph:]]}, |
1529
|
|
|
|
|
|
|
-graph => qr{[^[:graph:]]}, |
1530
|
|
|
|
|
|
|
lower => qr{\p{Ll}}, |
1531
|
|
|
|
|
|
|
-lower => qr{\P{Ll}}, |
1532
|
|
|
|
|
|
|
print => qr{[[:print:]]}, |
1533
|
|
|
|
|
|
|
-print => qr{[^[:print:]]}, |
1534
|
|
|
|
|
|
|
punct => qr{\p{P}}, |
1535
|
|
|
|
|
|
|
-punct => qr{\P{P}}, |
1536
|
|
|
|
|
|
|
upper => qr{\p{Lu}}, |
1537
|
|
|
|
|
|
|
-upper => qr{\P{Lu}}, |
1538
|
|
|
|
|
|
|
word => qr{\w}, |
1539
|
|
|
|
|
|
|
-word => qr{\W}, |
1540
|
|
|
|
|
|
|
xdigit => qr{\p{HexDigit}}, |
1541
|
|
|
|
|
|
|
-xdigit => qr{\P{HexDigit}}, |
1542
|
|
|
|
|
|
|
|
1543
|
|
|
|
|
|
|
ident => qr{[^\W\d]\w*}, |
1544
|
|
|
|
|
|
|
null => qr{(?=)}, |
1545
|
|
|
|
|
|
|
); |
1546
|
|
|
|
|
|
|
|
1547
|
|
|
|
|
|
|
# Locate back-translation for a std rule (like )... |
1548
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
sub stdrule { |
1550
|
0
|
|
|
0
|
|
|
my ($name, $invert) = @_; |
1551
|
0
|
0
|
|
|
|
|
my $iname = $invert ? -$name : $name; |
1552
|
0
|
|
|
|
|
|
my $stdrule = $stdrules{$iname}; |
1553
|
0
|
0
|
|
|
|
|
if (defined $stdrule) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1554
|
0
|
|
|
|
|
|
debug "Matching <$iname> with subrule /$stdrule/\n"; |
1555
|
0
|
|
|
|
|
|
return qr{($stdrule) (?{$0=$^N})}x; |
|
0
|
|
|
|
|
|
|
1556
|
|
|
|
|
|
|
} |
1557
|
|
|
|
|
|
|
elsif (index($name,'.') >= 0) { |
1558
|
0
|
|
|
|
|
|
die "Cannot match unknown named rule: <$iname>\n"; |
1559
|
|
|
|
|
|
|
} |
1560
|
|
|
|
|
|
|
elsif ($invert) { |
1561
|
0
|
0
|
|
|
|
|
$name = 'L&' if $name eq 'Lr'; |
1562
|
0
|
|
|
|
|
|
debug "Matching <$iname> with property \\P{$name}\n"; |
1563
|
0
|
|
|
|
|
|
return qr{\P{$name}}; |
1564
|
|
|
|
|
|
|
} |
1565
|
|
|
|
|
|
|
else { |
1566
|
0
|
0
|
|
|
|
|
$name = 'L&' if $name eq 'Lr'; |
1567
|
0
|
|
|
|
|
|
debug "Matching <$iname> with property \\p{$name}\n"; |
1568
|
0
|
|
|
|
|
|
return qr{\p{$name}}; |
1569
|
|
|
|
|
|
|
} |
1570
|
|
|
|
|
|
|
} |
1571
|
|
|
|
|
|
|
|
1572
|
|
|
|
|
|
|
# Locate back-translation for a std properties in a charset |
1573
|
|
|
|
|
|
|
# such as: <+>... |
1574
|
|
|
|
|
|
|
|
1575
|
|
|
|
|
|
|
my %stdprop = ( |
1576
|
|
|
|
|
|
|
alpha => '[^\W\d_]', |
1577
|
|
|
|
|
|
|
digit => '\d', |
1578
|
|
|
|
|
|
|
space => '\s', |
1579
|
|
|
|
|
|
|
alnum => '[^\W_]', |
1580
|
|
|
|
|
|
|
ascii => '\p{InBasicLatin}', |
1581
|
|
|
|
|
|
|
blank => '[ \t\r]', |
1582
|
|
|
|
|
|
|
cntrl => '[[:cntrl:]]', |
1583
|
|
|
|
|
|
|
ctrl => '[[:cntrl:]]', |
1584
|
|
|
|
|
|
|
graph => '[[:graph:]]', |
1585
|
|
|
|
|
|
|
lower => '\p{Ll}', |
1586
|
|
|
|
|
|
|
print => '[[:print:]]', |
1587
|
|
|
|
|
|
|
punct => '\p{P}', |
1588
|
|
|
|
|
|
|
upper => '\p{Lu}', |
1589
|
|
|
|
|
|
|
word => '\w', |
1590
|
|
|
|
|
|
|
xdigit => '\p{HexDigit}', |
1591
|
|
|
|
|
|
|
); |
1592
|
|
|
|
|
|
|
|
1593
|
|
|
|
|
|
|
sub getprop { |
1594
|
0
|
|
|
0
|
|
|
my ($name, $sign) = @_; |
1595
|
0
|
0
|
|
|
|
|
my $neg = $sign eq '-' ? 1 : 0; |
1596
|
0
|
|
|
|
|
|
my $insign = substr($name,0,1); |
1597
|
0
|
|
|
|
|
|
my @result = ([],[]); |
1598
|
|
|
|
|
|
|
|
1599
|
0
|
0
|
|
|
|
|
if ($insign eq '-') { |
|
|
0
|
|
|
|
|
|
1600
|
0
|
|
|
|
|
|
substr($name,0,1) = ""; |
1601
|
0
|
|
|
|
|
|
$neg = 1-$neg; |
1602
|
|
|
|
|
|
|
} |
1603
|
|
|
|
|
|
|
elsif ($insign eq '!') { |
1604
|
0
|
|
|
|
|
|
substr($name,0,1) = ""; |
1605
|
0
|
|
|
|
|
|
error "Can't use negative lookahead inside character class. " |
1606
|
|
|
|
|
|
|
. "Did you mean <-$name>?"; |
1607
|
0
|
|
|
|
|
|
return \@result; |
1608
|
|
|
|
|
|
|
} |
1609
|
|
|
|
|
|
|
|
1610
|
0
|
0
|
|
|
|
|
$name = 'L&' if $name eq 'Lr'; |
1611
|
0
|
0
|
0
|
|
|
|
$result[$neg ? $cs_excl : $cs_incl][0] = $stdprop{$name} || "\\p{$name}"; |
1612
|
0
|
|
|
|
|
|
return \@result; |
1613
|
|
|
|
|
|
|
} |
1614
|
|
|
|
|
|
|
|
1615
|
|
|
|
|
|
|
|
1616
|
|
|
|
|
|
|
# Filter source to translate rules... |
1617
|
|
|
|
|
|
|
|
1618
|
41
|
|
|
41
|
|
418945
|
use Filter::Simple; our @CARP_NOT = 'Filter::Simple'; |
|
41
|
|
|
|
|
1867440
|
|
|
41
|
|
|
|
|
417
|
|
1619
|
|
|
|
|
|
|
FILTER { |
1620
|
|
|
|
|
|
|
return unless $_; |
1621
|
|
|
|
|
|
|
$debug = 1 if grep /-debug\b/, @_; |
1622
|
|
|
|
|
|
|
$translate = 1 if grep /-translate\b/, @_; |
1623
|
|
|
|
|
|
|
$bad = 0; |
1624
|
|
|
|
|
|
|
|
1625
|
|
|
|
|
|
|
# Convert Perl6ish traits to Perl5ish attributes... |
1626
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
s[ is $ws (keymatch|valuematch) $ows \( $ows (?:rx|rule) $ows ] |
1628
|
|
|
|
|
|
|
[ :\u$1(]gx; |
1629
|
|
|
|
|
|
|
|
1630
|
|
|
|
|
|
|
# Convert Perl6ish grammars to Perl5ish packages |
1631
|
|
|
|
|
|
|
|
1632
|
|
|
|
|
|
|
s[grammar $ws ($qualident|$ident) (?: $ws is $ws ($qualident|$ident))? $ows \{ ] |
1633
|
|
|
|
|
|
|
[ $2 ? "{ package $1; use base '$2'; no warnings 'regexp';" |
1634
|
|
|
|
|
|
|
: "{ package $1; no warnings 'regexp';" |
1635
|
|
|
|
|
|
|
]gex; |
1636
|
|
|
|
|
|
|
|
1637
|
|
|
|
|
|
|
# Convert Perl6ish named rules to Perl5ish named subroutines... |
1638
|
|
|
|
|
|
|
|
1639
|
|
|
|
|
|
|
s[(?{new}) \b rule $ws ($ident) (?{rulename}) |
1640
|
|
|
|
|
|
|
$ows ($mods?) (?{mods}) |
1641
|
|
|
|
|
|
|
$ows (\{) (?{delim}) $bpat \} |
1642
|
|
|
|
|
|
|
] |
1643
|
|
|
|
|
|
|
[to_rule()]gex; |
1644
|
|
|
|
|
|
|
|
1645
|
|
|
|
|
|
|
# Convert Perl6ish unnamed rules and rx's to Perl5ish qr's... |
1646
|
|
|
|
|
|
|
|
1647
|
|
|
|
|
|
|
s[(?{new})($rx)] |
1648
|
|
|
|
|
|
|
[to_qms('qr')]gex; |
1649
|
|
|
|
|
|
|
|
1650
|
|
|
|
|
|
|
# Convert Perl6ish matches to Perl5ish matches... |
1651
|
|
|
|
|
|
|
|
1652
|
|
|
|
|
|
|
s[(?{new})$match] |
1653
|
|
|
|
|
|
|
[to_qms('m')]gex; |
1654
|
|
|
|
|
|
|
|
1655
|
|
|
|
|
|
|
# Convert Perl6ish substitutions to Perl5ish substitutions... |
1656
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
s[(?{new})$subst] |
1658
|
|
|
|
|
|
|
[to_qms('s',undef,$^N)]gex; |
1659
|
|
|
|
|
|
|
|
1660
|
|
|
|
|
|
|
# Convert references to $1, $2, $3, etc. to $0->[1], $0->[2], $0->[3], etc. |
1661
|
|
|
|
|
|
|
|
1662
|
|
|
|
|
|
|
our $curlies = qr/\{ (?: (?> [^}{]+) | (??{$curlies}))* \}/x; |
1663
|
|
|
|
|
|
|
our $parens = qr/\( (?: (?> [^)(]+) | (??{$parens }))* \)/x; |
1664
|
|
|
|
|
|
|
our $squares = qr/\[ (?: (?> [^][]+) | (??{$squares}))* \]/x; |
1665
|
|
|
|
|
|
|
our $angles = qr/\< (?: (?> [^><]+) | (??{$angles }))* \>/x; |
1666
|
|
|
|
|
|
|
|
1667
|
|
|
|
|
|
|
my $non_interp_str = qr{ |
1668
|
|
|
|
|
|
|
' [^'\\]* (?: \\. [^'\\]* )* ' |
1669
|
|
|
|
|
|
|
| q (?: [wxr]? \s* ' [^'\\]* (?: \\. [^'\\]* )* ' |
1670
|
|
|
|
|
|
|
| w? \s* / [^/\\]* (?: \\. [^/\\]* )* / |
1671
|
|
|
|
|
|
|
| w? \s* ! [^!\\]* (?: \\. [^!\\]* )* ! |
1672
|
|
|
|
|
|
|
| w? \s* \# [^#\\]* (?: \\. [^#\\]* )* \# |
1673
|
|
|
|
|
|
|
| w? \s* \| [^|\\]* (?: \\. [^|\\]* )* \| |
1674
|
|
|
|
|
|
|
| w? \s* (?: $curlies | $parens | $squares | $angles) |
1675
|
|
|
|
|
|
|
) |
1676
|
|
|
|
|
|
|
}xs; |
1677
|
|
|
|
|
|
|
|
1678
|
|
|
|
|
|
|
s/ ( $non_interp_str ) |
1679
|
|
|
|
|
|
|
| (?[$2]"/gesx; |
1680
|
|
|
|
|
|
|
|
1681
|
|
|
|
|
|
|
# Fail if any errors were detected in any of that... |
1682
|
|
|
|
|
|
|
|
1683
|
|
|
|
|
|
|
if ($bad) { |
1684
|
|
|
|
|
|
|
warn "Fatal error", ($bad>1?"s":""), " in one or more Perl 6 rules\n"; |
1685
|
|
|
|
|
|
|
exit($bad); |
1686
|
|
|
|
|
|
|
} |
1687
|
|
|
|
|
|
|
|
1688
|
|
|
|
|
|
|
# Turn of regex warnings in the resulting filtered code |
1689
|
|
|
|
|
|
|
# ('cos we did some wicked evil things in those back-translated regexes)... |
1690
|
|
|
|
|
|
|
|
1691
|
|
|
|
|
|
|
$_ = "no warnings 'regexp';use re 'eval';$_"; |
1692
|
|
|
|
|
|
|
|
1693
|
|
|
|
|
|
|
# Show the resulting source if we're translating |
1694
|
|
|
|
|
|
|
|
1695
|
|
|
|
|
|
|
if ($translate) { |
1696
|
|
|
|
|
|
|
warn "Source translated to:\n__START__\n" if $debug; |
1697
|
|
|
|
|
|
|
print; |
1698
|
|
|
|
|
|
|
print "\n__END__\n" if $debug; |
1699
|
|
|
|
|
|
|
exit unless $debug; |
1700
|
|
|
|
|
|
|
} |
1701
|
|
|
|
|
|
|
}; |
1702
|
|
|
|
|
|
|
|
1703
|
|
|
|
|
|
|
# Implement the C and C traits on hashes... |
1704
|
|
|
|
|
|
|
|
1705
|
41
|
|
|
41
|
|
170989
|
use Attribute::Handlers; |
|
41
|
|
|
|
|
313120
|
|
|
41
|
|
|
|
|
374
|
|
1706
|
|
|
|
|
|
|
|
1707
|
|
|
|
|
|
|
sub UNIVERSAL::Keymatch :ATTR(HASH,RAWDATA) { |
1708
|
0
|
|
|
0
|
0
|
0
|
my ($package, $symbol, $referent, $attr, $data) = @_; |
1709
|
|
|
|
|
|
|
|
1710
|
|
|
|
|
|
|
# Prepend the anonymous pattern marker, if necessary... |
1711
|
0
|
|
|
|
|
0
|
$data =~ s/^\s*(?!rx)/rx/; |
1712
|
|
|
|
|
|
|
|
1713
|
|
|
|
|
|
|
# Back-translate the resulting pattern... |
1714
|
0
|
0
|
|
|
|
0
|
$data =~ s[(?{new})($rx)][to_qms('qr')]gex |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
1715
|
|
|
|
|
|
|
or croak 'Usage: %var : Keymatch(/pattern/)'; |
1716
|
|
|
|
|
|
|
|
1717
|
|
|
|
|
|
|
# Cache the result... |
1718
|
0
|
|
|
|
|
0
|
$keymatch{$referent} = $data; |
1719
|
41
|
|
|
41
|
|
13098
|
} |
|
41
|
|
|
|
|
96
|
|
|
41
|
|
|
|
|
239
|
|
1720
|
|
|
|
|
|
|
|
1721
|
|
|
|
|
|
|
sub UNIVERSAL::Valuematch :ATTR(HASH,RAWDATA) { |
1722
|
0
|
|
|
0
|
0
|
0
|
my ($package, $symbol, $referent, $attr, $data) = @_; |
1723
|
|
|
|
|
|
|
|
1724
|
|
|
|
|
|
|
# Prepend the anonymous pattern marker, if necessary... |
1725
|
0
|
|
|
|
|
0
|
$data =~ s/^\s*(?!rx)/rx/; |
1726
|
|
|
|
|
|
|
|
1727
|
|
|
|
|
|
|
# Back-translate the resulting pattern... |
1728
|
0
|
0
|
|
|
|
0
|
$data =~ s[(?{new})($rx)][to_qms('qr')]gex |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
1729
|
|
|
|
|
|
|
or croak 'Usage: %var : Valuematch(/pattern/)'; |
1730
|
|
|
|
|
|
|
|
1731
|
|
|
|
|
|
|
# Cache the result... |
1732
|
0
|
|
|
|
|
0
|
$valuematch{$referent} = $data; |
1733
|
41
|
|
|
41
|
|
33782
|
} |
|
41
|
|
|
|
|
88
|
|
|
41
|
|
|
|
|
201
|
|
1734
|
|
|
|
|
|
|
|
1735
|
|
|
|
|
|
|
# Build a named Perl5ish subroutine/method that |
1736
|
|
|
|
|
|
|
# implements a named Perl6ish rule... |
1737
|
|
|
|
|
|
|
|
1738
|
|
|
|
|
|
|
sub to_rule { |
1739
|
0
|
|
|
0
|
|
|
local $" = ""; |
1740
|
0
|
|
|
|
|
|
my $trans = "sub $rulename { ". to_qms('qr',$rulename) . " }"; |
1741
|
0
|
|
|
|
|
|
return $trans; |
1742
|
|
|
|
|
|
|
} |
1743
|
|
|
|
|
|
|
|
1744
|
|
|
|
|
|
|
# Wrap the back-translated elements of a Perl6ish pattern |
1745
|
|
|
|
|
|
|
# in the necessary Perl5ish regex magic... |
1746
|
|
|
|
|
|
|
|
1747
|
|
|
|
|
|
|
sub to_qms { |
1748
|
0
|
|
|
0
|
|
|
my ($type,$name,$repl) = @_; |
1749
|
0
|
|
|
|
|
|
local $" = ""; |
1750
|
|
|
|
|
|
|
|
1751
|
|
|
|
|
|
|
# /gc flag not possible on Perl5ish qr's... |
1752
|
|
|
|
|
|
|
|
1753
|
0
|
0
|
|
|
|
|
delete $mod{post}{gc} if $type eq "qr"; |
1754
|
|
|
|
|
|
|
|
1755
|
|
|
|
|
|
|
# Default to a temporary name, if Perl6ish pattern is unnamed... |
1756
|
|
|
|
|
|
|
|
1757
|
0
|
|
0
|
|
|
|
$name ||= "anon_$type"; |
1758
|
|
|
|
|
|
|
|
1759
|
|
|
|
|
|
|
|
1760
|
|
|
|
|
|
|
|
1761
|
|
|
|
|
|
|
# Prepare "decorations" for the back-translation... |
1762
|
|
|
|
|
|
|
|
1763
|
0
|
0
|
|
|
|
|
my $priorness = $has_prior ? "" : "\$Perl6::Rules::prior = $rule_id;"; |
1764
|
|
|
|
|
|
|
|
1765
|
0
|
0
|
0
|
|
|
|
my $repcontrol = |
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1766
|
|
|
|
|
|
|
exists $mod{nth} && ($mod{post}{g} || $mod{post}{gc}) ? |
1767
|
|
|
|
|
|
|
q[)(??{++$Perl6::Rules::count%].$mod{nth}.q[==0?'':'(?!)'})] |
1768
|
|
|
|
|
|
|
: exists $mod{nth} ? |
1769
|
|
|
|
|
|
|
q[)(??{++$Perl6::Rules::count==].$mod{nth}.q[?'':'(?!)'})] |
1770
|
|
|
|
|
|
|
: exists $mod{rep} && $type eq 'm' ? |
1771
|
|
|
|
|
|
|
(exists $mod{post}{gc} |
1772
|
|
|
|
|
|
|
? q[){].$mod{rep}.q[}] |
1773
|
|
|
|
|
|
|
: q[(?s:.*?)){].$mod{rep}.q[}] |
1774
|
|
|
|
|
|
|
) |
1775
|
|
|
|
|
|
|
: exists $mod{rep} && $type eq 's' ? |
1776
|
|
|
|
|
|
|
q[)(??{++$Perl6::Rules::count<=].$mod{rep}.q[?'':'(?!)'})] |
1777
|
|
|
|
|
|
|
: q{}; |
1778
|
|
|
|
|
|
|
|
1779
|
0
|
0
|
0
|
|
|
|
if (exists $mod{rep} && ($mod{post}{g} || $mod{post}{gc})) { |
|
|
|
0
|
|
|
|
|
1780
|
0
|
|
|
|
|
|
error "Can't specify both :globally and :x($mod{rep}) on the same rule"; |
1781
|
|
|
|
|
|
|
} |
1782
|
0
|
0
|
0
|
|
|
|
$mod{post}{g} = 1 if $repcontrol && $type eq 's' && !$mod{post}{gc}; |
|
|
|
0
|
|
|
|
|
1783
|
|
|
|
|
|
|
|
1784
|
0
|
0
|
0
|
|
|
|
my $repprecontrol = !$repcontrol ? '' |
|
|
0
|
|
|
|
|
|
1785
|
|
|
|
|
|
|
: exists $mod{rep} && $type eq 'm' ? '(' |
1786
|
|
|
|
|
|
|
: '(?>'; |
1787
|
0
|
0
|
0
|
|
|
|
my $reppostcontrolcode = $repcontrol && $type eq 'm' |
1788
|
|
|
|
|
|
|
? '$Perl6::Rules::count=0;' : ''; |
1789
|
0
|
0
|
|
|
|
|
my $reppostcontrolpat = $repcontrol ? '|\z(?{$Perl6::Rules::count=0})(?!)' : ''; |
1790
|
|
|
|
|
|
|
|
1791
|
0
|
0
|
0
|
|
|
|
error('The :nth modifier can only be used with m/.../ or s/.../.../') |
|
|
|
0
|
|
|
|
|
1792
|
|
|
|
|
|
|
if exists $mod{nth} && $type ne 'm' && $type ne 's'; |
1793
|
0
|
0
|
0
|
|
|
|
error('The :x modifier can only be used with m/.../ or s/.../.../') |
|
|
|
0
|
|
|
|
|
1794
|
|
|
|
|
|
|
if exists $mod{rep} && $type ne 'm' && $type ne 's'; |
1795
|
|
|
|
|
|
|
|
1796
|
|
|
|
|
|
|
# "pre" modifiers are things like \G that go at the start of the |
1797
|
|
|
|
|
|
|
# back-translated Perl5ish regex. |
1798
|
|
|
|
|
|
|
# "post" modifiers are things like /gimsox that go after the |
1799
|
|
|
|
|
|
|
# back-translated Perl5ish regex. |
1800
|
|
|
|
|
|
|
|
1801
|
0
|
|
|
|
|
|
my $pre = join "", keys %{$mod{pre}}; |
|
0
|
|
|
|
|
|
|
1802
|
0
|
|
|
|
|
|
my $post = join "", keys %{$mod{post}}; |
|
0
|
|
|
|
|
|
|
1803
|
|
|
|
|
|
|
|
1804
|
|
|
|
|
|
|
|
1805
|
|
|
|
|
|
|
# Decorate the back-translation and cache... |
1806
|
|
|
|
|
|
|
|
1807
|
0
|
0
|
0
|
|
|
|
my $setup = ($mod{exhaust} || $mod{overlap}) |
1808
|
|
|
|
|
|
|
? ';eval q{@0=()}if!$Perl6::Rules::comb++;' |
1809
|
|
|
|
|
|
|
: ""; |
1810
|
0
|
0
|
|
|
|
|
my $cleanup = $mod{exhaust} ? "(?{Perl6::Rules::Match::_success('next')})(?!)|\\z(??{if(\@0){Perl6::Rules::Match::_success('done');$reppostcontrolcode$priorness}else{${reppostcontrolcode}Perl6::Rules::Match::_failure}\@0?'':'(?!)'})" |
|
|
0
|
|
|
|
|
|
1811
|
|
|
|
|
|
|
: $mod{overlap} ? "(?{Perl6::Rules::Match::_success('next','overlap')})(?!)|\\z(??{if(\@0){Perl6::Rules::Match::_success('done');$reppostcontrolcode$priorness}else{${reppostcontrolcode}Perl6::Rules::Match::_failure}\@0?'':'(?!)'})" |
1812
|
|
|
|
|
|
|
: "(?{Perl6::Rules::Match::_success();$reppostcontrolcode$priorness})$reppostcontrolpat|(?{Perl6::Rules::Match::_failure})(?!)"; |
1813
|
|
|
|
|
|
|
|
1814
|
0
|
|
|
|
|
|
my $trans = "(?xm)$repprecontrol(?{local (\@Perl6::Rules::d0,\%Perl6::Rules::d0);local\$Perl6::Rules::startpos=pos;$setup})($pre(?:@p5pat))$repcontrol$cleanup"; |
1815
|
|
|
|
|
|
|
|
1816
|
41
|
0
|
0
|
41
|
|
72095
|
$rules[$rule_id] = do { no warnings 'regexp'; qr{$trans}; } |
|
41
|
|
|
|
|
101
|
|
|
41
|
|
|
|
|
136390
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1817
|
|
|
|
|
|
|
unless $translate || $bad; |
1818
|
0
|
|
|
|
|
|
$rule_id++; |
1819
|
|
|
|
|
|
|
|
1820
|
0
|
|
|
|
|
|
$trans = "$type$ldel$trans$rdel"; |
1821
|
|
|
|
|
|
|
|
1822
|
|
|
|
|
|
|
# If there's a replacement string (i.e. it's a substitution)... |
1823
|
|
|
|
|
|
|
|
1824
|
0
|
0
|
|
|
|
|
if ($repl) { |
1825
|
|
|
|
|
|
|
|
1826
|
|
|
|
|
|
|
# Find the start and end of the (single) interpolator... |
1827
|
|
|
|
|
|
|
# (This ought to be done with a nested substitution, in which |
1828
|
|
|
|
|
|
|
# case we could allow multiple interpolators) |
1829
|
|
|
|
|
|
|
|
1830
|
0
|
|
|
|
|
|
my $from_s = index($repl, '$('); |
1831
|
0
|
|
|
|
|
|
my $from_l = index($repl, '@('); |
1832
|
0
|
|
|
|
|
|
my $to = rindex($repl, ')'); |
1833
|
|
|
|
|
|
|
|
1834
|
|
|
|
|
|
|
# Back-translate the interpolator, if any... |
1835
|
|
|
|
|
|
|
|
1836
|
0
|
0
|
0
|
|
|
|
if ($from_s>=0 && $to>=0) { |
|
|
0
|
0
|
|
|
|
|
1837
|
0
|
|
|
|
|
|
my $len = $to-$from_s+1; |
1838
|
0
|
|
|
|
|
|
substr($repl,$from_s,$len) = |
1839
|
|
|
|
|
|
|
'@{[scalar(' . substr($repl,$from_s+2,$len-3) . ')]}'; |
1840
|
|
|
|
|
|
|
} |
1841
|
|
|
|
|
|
|
elsif ($from_l>=0 && $to>=0) { |
1842
|
0
|
|
|
|
|
|
my $len = $to-$from_l+1; |
1843
|
0
|
|
|
|
|
|
substr($repl,$from_l,$len) = |
1844
|
|
|
|
|
|
|
'@{[' . substr($repl,$from_l+2,$len-3) . ']}'; |
1845
|
|
|
|
|
|
|
} |
1846
|
|
|
|
|
|
|
|
1847
|
|
|
|
|
|
|
# Append the replacement string to the back-translated regex... |
1848
|
|
|
|
|
|
|
|
1849
|
0
|
|
|
|
|
|
$trans .= $repl; |
1850
|
|
|
|
|
|
|
} |
1851
|
|
|
|
|
|
|
|
1852
|
|
|
|
|
|
|
# Return the back-translated regex, appending any flags... |
1853
|
|
|
|
|
|
|
|
1854
|
0
|
|
|
|
|
|
return "$trans$post"; |
1855
|
|
|
|
|
|
|
} |
1856
|
|
|
|
|
|
|
|
1857
|
|
|
|
|
|
|
|
1858
|
|
|
|
|
|
|
# Append multiple captures to the same array, changing |
1859
|
|
|
|
|
|
|
# value type as necessary |
1860
|
|
|
|
|
|
|
|
1861
|
|
|
|
|
|
|
sub apparray { |
1862
|
0
|
|
|
0
|
|
|
my ($arrayref, $newval) = @_; |
1863
|
0
|
0
|
0
|
|
|
|
$arrayref = $_[0] = [undef] unless $arrayref and @$arrayref; # EXPERIMENTAL |
1864
|
0
|
|
|
|
|
|
debug "Appending ", $newval, " to array"; |
1865
|
0
|
|
|
|
|
|
for ($arrayref->[-1]) { |
1866
|
0
|
0
|
|
|
|
|
if (!defined) { $_ = $newval } |
|
0
|
0
|
|
|
|
|
|
1867
|
0
|
|
|
|
|
|
elsif (ref ne 'ARRAY') { $_ = [ $_, $newval ] } |
1868
|
0
|
|
|
|
|
|
else { push @$_, $newval } |
1869
|
|
|
|
|
|
|
} |
1870
|
|
|
|
|
|
|
} |
1871
|
|
|
|
|
|
|
|
1872
|
|
|
|
|
|
|
# Create entry and subsequently assign multiple captures to the same hash |
1873
|
|
|
|
|
|
|
# element, changing value type as necessary |
1874
|
|
|
|
|
|
|
|
1875
|
|
|
|
|
|
|
sub apphash { |
1876
|
0
|
|
|
0
|
|
|
my ($hashref, $newval) = @_; |
1877
|
0
|
|
|
|
|
|
debug "Appending ", $newval, " to hash"; |
1878
|
0
|
0
|
|
|
|
|
if (!defined $hashref->{""}) { # "Appending" a key |
1879
|
|
|
|
|
|
|
# ($hash{""} stores current "focus" key |
1880
|
0
|
|
|
|
|
|
$hashref->{""} = $newval; |
1881
|
0
|
|
|
|
|
|
$hashref->{$newval} = undef; |
1882
|
|
|
|
|
|
|
} |
1883
|
|
|
|
|
|
|
else { # Appending a value |
1884
|
0
|
|
|
|
|
|
for ($hashref->{$hashref->{""}}) { |
1885
|
0
|
0
|
|
|
|
|
if (!defined) { $_ = $newval } |
|
0
|
0
|
|
|
|
|
|
1886
|
0
|
|
|
|
|
|
elsif (ref ne 'ARRAY') { $_ = [ $_, $newval ] } |
1887
|
0
|
|
|
|
|
|
else { push @$_, $newval } |
1888
|
|
|
|
|
|
|
} |
1889
|
|
|
|
|
|
|
} |
1890
|
|
|
|
|
|
|
} |
1891
|
|
|
|
|
|
|
|
1892
|
|
|
|
|
|
|
|
1893
|
|
|
|
|
|
|
# Save a restore internal variable sets on subrules |
1894
|
|
|
|
|
|
|
# (because localized variables "bleed" into qr/.../s that |
1895
|
|
|
|
|
|
|
# are interpolated via (??{...}) |
1896
|
|
|
|
|
|
|
|
1897
|
|
|
|
|
|
|
my @d0_stack; |
1898
|
|
|
|
|
|
|
our ($startpos, $lastpos, @d0, %d0); |
1899
|
|
|
|
|
|
|
|
1900
|
|
|
|
|
|
|
sub save_d0 { |
1901
|
0
|
|
|
0
|
|
|
debug "Saving internal state (", 0+@d0_stack, ")"; |
1902
|
0
|
|
|
|
|
|
push @d0_stack, [$startpos, $lastpos,\@d0, \%d0]; |
1903
|
|
|
|
|
|
|
} |
1904
|
|
|
|
|
|
|
|
1905
|
|
|
|
|
|
|
sub restore_d0 { |
1906
|
0
|
|
|
0
|
|
|
($startpos, $lastpos, *d0, *d0) = @{pop @d0_stack}; |
|
0
|
|
|
|
|
|
|
1907
|
0
|
|
|
|
|
|
debug "Restoring internal state (", 0+@d0_stack, ")"; |
1908
|
|
|
|
|
|
|
} |
1909
|
|
|
|
|
|
|
|
1910
|
|
|
|
|
|
|
package Perl6::Rules::Match; |
1911
|
|
|
|
|
|
|
|
1912
|
|
|
|
|
|
|
my %data; |
1913
|
|
|
|
|
|
|
|
1914
|
|
|
|
|
|
|
sub _failure { |
1915
|
0
|
|
|
0
|
|
|
$0 = bless \(my $anon); |
1916
|
0
|
|
|
|
|
|
$data{overload::StrVal $0} = { b=>0, a=>[], h=>{} }; |
1917
|
|
|
|
|
|
|
} |
1918
|
|
|
|
|
|
|
|
1919
|
|
|
|
|
|
|
sub _success { |
1920
|
0
|
|
|
0
|
|
|
my ($exhaust,$overlap) = (@_,"",""); |
1921
|
0
|
0
|
0
|
|
|
|
return if $overlap && $exhaust ne 'done' && $Perl6::Rules::startpos <= $Perl6::Rules::lastpos; |
|
|
|
0
|
|
|
|
|
1922
|
0
|
0
|
|
|
|
|
$Perl6::Rules::d0[0] = $^N unless defined $Perl6::Rules::d0[0]; |
1923
|
0
|
|
|
|
|
|
my %hash; |
1924
|
0
|
|
|
|
|
|
for my $var (keys %Perl6::Rules::d0) { |
1925
|
0
|
|
|
|
|
|
my $sigil = substr($var,0,1); |
1926
|
0
|
|
|
|
|
|
my $subsigil = substr($var,1,1); |
1927
|
0
|
0
|
0
|
|
|
|
if ($sigil eq '$') { |
|
|
0
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1928
|
0
|
0
|
|
|
|
|
if ($subsigil eq '?') { # Internal scalar -> cut off sigils |
1929
|
0
|
|
|
|
|
|
$Perl6::Rules::d0{substr($var,2)} = |
1930
|
|
|
|
|
|
|
delete $Perl6::Rules::d0{$var}; |
1931
|
|
|
|
|
|
|
} |
1932
|
|
|
|
|
|
|
else { # External scalar -> assign |
1933
|
0
|
|
|
|
|
|
eval "$var = q{$Perl6::Rules::d0{$var}}"; |
1934
|
|
|
|
|
|
|
} |
1935
|
|
|
|
|
|
|
} |
1936
|
|
|
|
|
|
|
elsif ($sigil eq '@' && $subsigil eq '?') { |
1937
|
0
|
|
|
|
|
|
@{$Perl6::Rules::d0{$var}} = grep defined, @{$Perl6::Rules::d0{$var}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1938
|
|
|
|
|
|
|
} |
1939
|
|
|
|
|
|
|
elsif ($sigil eq '%' && $subsigil eq '?') { |
1940
|
0
|
|
|
|
|
|
delete $Perl6::Rules::d0{$var}{''}; |
1941
|
|
|
|
|
|
|
} |
1942
|
|
|
|
|
|
|
elsif ($subsigil ne '?') { # External array or hash -> assign |
1943
|
0
|
|
|
|
|
|
eval "$var = q{$Perl6::Rules::d0{$var}}"; |
1944
|
|
|
|
|
|
|
} |
1945
|
|
|
|
|
|
|
} |
1946
|
0
|
|
|
|
|
|
while (my ($exvar,$exvarref) = each %{$Perl6::Rules::exvar{scalar}}) { |
|
0
|
|
|
|
|
|
|
1947
|
0
|
|
|
|
|
|
$$exvarref = eval $exvar; |
1948
|
|
|
|
|
|
|
} |
1949
|
0
|
|
|
|
|
|
while (my ($exvar,$exvarref) = each %{$Perl6::Rules::exvar{hash}}) { |
|
0
|
|
|
|
|
|
|
1950
|
0
|
|
|
|
|
|
%$exvarref = eval $exvar; |
1951
|
0
|
|
|
|
|
|
delete $exvarref->{''}; |
1952
|
|
|
|
|
|
|
} |
1953
|
0
|
|
|
|
|
|
while (my ($exvar,$exvarref) = each %{$Perl6::Rules::exvar{array}}) { |
|
0
|
|
|
|
|
|
|
1954
|
0
|
|
|
|
|
|
@$exvarref = eval $exvar; |
1955
|
|
|
|
|
|
|
} |
1956
|
0
|
|
|
|
|
|
$0 = bless \(my $anon); |
1957
|
0
|
0
|
|
|
|
|
if ($exhaust ne 'done') { |
1958
|
0
|
|
|
|
|
|
$data{overload::StrVal $0} = { |
1959
|
|
|
|
|
|
|
b=>1, |
1960
|
|
|
|
|
|
|
a=>[@Perl6::Rules::d0], |
1961
|
|
|
|
|
|
|
h=>{%Perl6::Rules::d0}, |
1962
|
|
|
|
|
|
|
p=>$Perl6::Rules::startpos, |
1963
|
|
|
|
|
|
|
}; |
1964
|
0
|
0
|
|
|
|
|
if ($exhaust) { |
1965
|
0
|
|
|
|
|
|
push @0, $0; |
1966
|
0
|
|
|
|
|
|
Perl6::Rules::debug "Adding alternative match: ", $0; |
1967
|
0
|
|
|
|
|
|
@Perl6::Rules::d0 = (); |
1968
|
0
|
|
|
|
|
|
%Perl6::Rules::d0 = (); |
1969
|
0
|
|
|
|
|
|
$Perl6::Rules::lastpos = $Perl6::Rules::startpos; |
1970
|
|
|
|
|
|
|
} |
1971
|
|
|
|
|
|
|
} |
1972
|
|
|
|
|
|
|
else { |
1973
|
0
|
|
|
|
|
|
$data{overload::StrVal $0} = { |
1974
|
|
|
|
|
|
|
b=>1, a=>\@0, h=>{}, p=>$Perl6::Rules::startpos, |
1975
|
|
|
|
|
|
|
}; |
1976
|
0
|
|
|
|
|
|
Perl6::Rules::debug "Finalizing alternative matches: ", 0+@0; |
1977
|
0
|
|
|
|
|
|
$Perl6::Rules::comb=0; |
1978
|
0
|
|
|
|
|
|
$Perl6::Rules::lastpos=-1; |
1979
|
|
|
|
|
|
|
} |
1980
|
|
|
|
|
|
|
} |
1981
|
|
|
|
|
|
|
|
1982
|
|
|
|
|
|
|
use overload |
1983
|
0
|
|
|
0
|
|
0
|
q{bool} => sub { $data{overload::StrVal $_[0]}{b} }, |
1984
|
0
|
|
|
0
|
|
0
|
q{""} => sub { $data{overload::StrVal $_[0]}{a}[0] }, |
1985
|
0
|
|
|
0
|
|
0
|
q{@{}} => sub { $data{overload::StrVal $_[0]}{a} }, |
1986
|
0
|
|
|
0
|
|
0
|
q{%{}} => sub { $data{overload::StrVal $_[0]}{h} }, |
1987
|
41
|
|
|
|
|
793
|
fallback => 1, |
1988
|
41
|
|
|
41
|
|
567
|
; |
|
41
|
|
|
|
|
89
|
|
1989
|
|
|
|
|
|
|
|
1990
|
|
|
|
|
|
|
sub _flatten { |
1991
|
0
|
|
|
0
|
|
|
my ($val) = @_; |
1992
|
0
|
0
|
|
|
|
|
if (ref $val eq 'Perl6::Rules::Match') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1993
|
0
|
0
|
|
|
|
|
return _flatten({ |
1994
|
|
|
|
|
|
|
'SCALAR'.($val?'(true)':'(false)') => "$val", |
1995
|
|
|
|
|
|
|
ARRAY => \@$val, |
1996
|
|
|
|
|
|
|
HASH => \%$val, |
1997
|
|
|
|
|
|
|
POS => $val->pos, |
1998
|
|
|
|
|
|
|
}); |
1999
|
|
|
|
|
|
|
} |
2000
|
|
|
|
|
|
|
elsif (ref $val eq 'ARRAY') { |
2001
|
0
|
|
|
|
|
|
return [ map { _flatten($_) } @$val ]; |
|
0
|
|
|
|
|
|
|
2002
|
|
|
|
|
|
|
} |
2003
|
|
|
|
|
|
|
elsif (ref $val eq 'HASH') { |
2004
|
0
|
|
|
|
|
|
return { map { ($_=>_flatten($val->{$_})) } keys %$val }; |
|
0
|
|
|
|
|
|
|
2005
|
|
|
|
|
|
|
} |
2006
|
|
|
|
|
|
|
elsif (!defined $val) { |
2007
|
0
|
|
|
|
|
|
return ""; |
2008
|
|
|
|
|
|
|
} |
2009
|
|
|
|
|
|
|
else { |
2010
|
0
|
|
|
|
|
|
return $val; |
2011
|
|
|
|
|
|
|
} |
2012
|
|
|
|
|
|
|
} |
2013
|
|
|
|
|
|
|
|
2014
|
|
|
|
|
|
|
sub pos { |
2015
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
2016
|
0
|
|
|
|
|
|
return $data{overload::StrVal $self}{p}; |
2017
|
|
|
|
|
|
|
} |
2018
|
|
|
|
|
|
|
|
2019
|
|
|
|
|
|
|
sub dump { |
2020
|
|
|
|
|
|
|
my ($self,$level) = @_; |
2021
|
41
|
|
|
41
|
|
148424
|
use YAML; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2022
|
|
|
|
|
|
|
local ($YAML::UseAliases, $YAML::UseHeader) |
2023
|
|
|
|
|
|
|
= 0, 0; |
2024
|
|
|
|
|
|
|
my $dump = Dump _flatten($self); |
2025
|
|
|
|
|
|
|
return $dump if defined wantarray; |
2026
|
|
|
|
|
|
|
print $dump; |
2027
|
|
|
|
|
|
|
} |
2028
|
|
|
|
|
|
|
|
2029
|
|
|
|
|
|
|
# Placate the mysterious regex gods... |
2030
|
|
|
|
|
|
|
|
2031
|
|
|
|
|
|
|
$Perl6::Rules::lastpos=-1; |
2032
|
|
|
|
|
|
|
$Perl6::Rules::startpos=-1; |
2033
|
|
|
|
|
|
|
Perl6::Rules::save_d0; |
2034
|
|
|
|
|
|
|
_success(); |
2035
|
|
|
|
|
|
|
Perl6::Rules::restore_d0; |
2036
|
|
|
|
|
|
|
|
2037
|
|
|
|
|
|
|
1; |
2038
|
|
|
|
|
|
|
|
2039
|
|
|
|
|
|
|
__END__ |