line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# Copyright (C) 1997-2001 Damian Conway. All rights reserved. |
2
|
|
|
|
|
|
|
# Copyright (C) 2009 Adam Kennedy. |
3
|
|
|
|
|
|
|
# Copyright (C) 2015, 2022 Steve Hay and other contributors. All rights |
4
|
|
|
|
|
|
|
# reserved. |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
# This module is free software; you can redistribute it and/or modify it under |
7
|
|
|
|
|
|
|
# the same terms as Perl itself, i.e. under the terms of either the GNU General |
8
|
|
|
|
|
|
|
# Public License or the Artistic License, as specified in the F file. |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
package Text::Balanced; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# EXTRACT VARIOUSLY DELIMITED TEXT SEQUENCES FROM STRINGS. |
13
|
|
|
|
|
|
|
# FOR FULL DOCUMENTATION SEE Balanced.pod |
14
|
|
|
|
|
|
|
|
15
|
9
|
|
|
9
|
|
402425
|
use 5.008001; |
|
9
|
|
|
|
|
99
|
|
16
|
9
|
|
|
9
|
|
44
|
use strict; |
|
9
|
|
|
|
|
23
|
|
|
9
|
|
|
|
|
178
|
|
17
|
9
|
|
|
9
|
|
38
|
use Exporter (); |
|
9
|
|
|
|
|
14
|
|
|
9
|
|
|
|
|
197
|
|
18
|
|
|
|
|
|
|
|
19
|
9
|
|
|
9
|
|
48
|
use vars qw { $VERSION @ISA %EXPORT_TAGS }; |
|
9
|
|
|
|
|
33
|
|
|
9
|
|
|
|
|
950
|
|
20
|
|
|
|
|
|
|
BEGIN { |
21
|
9
|
|
|
9
|
|
30
|
$VERSION = '2.05'; |
22
|
9
|
|
|
|
|
132
|
@ISA = 'Exporter'; |
23
|
9
|
|
|
|
|
22304
|
%EXPORT_TAGS = ( |
24
|
|
|
|
|
|
|
ALL => [ qw{ |
25
|
|
|
|
|
|
|
&extract_delimited |
26
|
|
|
|
|
|
|
&extract_bracketed |
27
|
|
|
|
|
|
|
&extract_quotelike |
28
|
|
|
|
|
|
|
&extract_codeblock |
29
|
|
|
|
|
|
|
&extract_variable |
30
|
|
|
|
|
|
|
&extract_tagged |
31
|
|
|
|
|
|
|
&extract_multiple |
32
|
|
|
|
|
|
|
&gen_delimited_pat |
33
|
|
|
|
|
|
|
&gen_extract_tagged |
34
|
|
|
|
|
|
|
&delimited_pat |
35
|
|
|
|
|
|
|
} ], |
36
|
|
|
|
|
|
|
); |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
Exporter::export_ok_tags('ALL'); |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
our $RE_PREREGEX_PAT = qr#( |
42
|
|
|
|
|
|
|
[!=]~ |
43
|
|
|
|
|
|
|
| split|grep|map |
44
|
|
|
|
|
|
|
)#x; |
45
|
|
|
|
|
|
|
our $RE_EXPR_PAT = qr#( |
46
|
|
|
|
|
|
|
(?:\*\*|&&|\|\||<<|>>|//|[-+*x%^&|.])=? |
47
|
|
|
|
|
|
|
| /(?:[^/]) |
48
|
|
|
|
|
|
|
| =(?!>) |
49
|
|
|
|
|
|
|
| return |
50
|
|
|
|
|
|
|
| [\(\[] |
51
|
|
|
|
|
|
|
)#x; |
52
|
|
|
|
|
|
|
our $RE_NUM = qr/\s*[+\-.0-9][+\-.0-9e]*/i; # numerical constant |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
our %ref2slashvalid; # is quotelike /.../ pattern valid here for given textref? |
55
|
|
|
|
|
|
|
our %ref2qmarkvalid; # is quotelike ?...? pattern valid here for given textref? |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# HANDLE RETURN VALUES IN VARIOUS CONTEXTS |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
sub _failmsg { |
60
|
2598
|
|
|
2598
|
|
4373
|
my ($message, $pos) = @_; |
61
|
2598
|
|
|
|
|
6528
|
$@ = bless { |
62
|
|
|
|
|
|
|
error => $message, |
63
|
|
|
|
|
|
|
pos => $pos, |
64
|
|
|
|
|
|
|
}, 'Text::Balanced::ErrorMsg'; |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
sub _fail { |
68
|
890
|
|
|
890
|
|
1297
|
my ($wantarray, $textref, $message, $pos) = @_; |
69
|
890
|
100
|
|
|
|
1246
|
_failmsg $message, $pos if $message; |
70
|
890
|
100
|
|
|
|
2586
|
return (undef, $$textref, undef) if $wantarray; |
71
|
20
|
|
|
|
|
129
|
return; |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
sub _succeed { |
75
|
506
|
|
|
506
|
|
646
|
$@ = undef; |
76
|
506
|
|
|
|
|
1244
|
my ($wantarray,$textref) = splice @_, 0, 2; |
77
|
506
|
100
|
|
|
|
1080
|
my ($extrapos, $extralen) = @_ > 18 |
78
|
|
|
|
|
|
|
? splice(@_, -2, 2) |
79
|
|
|
|
|
|
|
: (0, 0); |
80
|
506
|
|
|
|
|
1161
|
my ($startlen, $oppos) = @_[5,6]; |
81
|
506
|
|
|
|
|
630
|
my $remainderpos = $_[2]; |
82
|
506
|
100
|
|
|
|
712
|
if ( $wantarray ) { |
83
|
289
|
|
|
|
|
312
|
my @res; |
84
|
289
|
|
|
|
|
640
|
while (my ($from, $len) = splice @_, 0, 2) { |
85
|
1424
|
|
|
|
|
3261
|
push @res, substr($$textref, $from, $len); |
86
|
|
|
|
|
|
|
} |
87
|
289
|
100
|
|
|
|
451
|
if ( $extralen ) { # CORRECT FILLET |
88
|
13
|
|
|
|
|
27
|
my $extra = substr($res[0], $extrapos-$oppos, $extralen, "\n"); |
89
|
13
|
|
|
|
|
24
|
$res[1] = "$extra$res[1]"; |
90
|
13
|
|
|
|
|
15
|
eval { substr($$textref,$remainderpos,0) = $extra; |
|
13
|
|
|
|
|
32
|
|
91
|
12
|
|
|
|
|
19
|
substr($$textref,$extrapos,$extralen,"\n")} ; |
92
|
|
|
|
|
|
|
#REARRANGE HERE DOC AND FILLET IF POSSIBLE |
93
|
13
|
|
|
|
|
28
|
pos($$textref) = $remainderpos-$extralen+1; # RESET \G |
94
|
|
|
|
|
|
|
} else { |
95
|
276
|
|
|
|
|
481
|
pos($$textref) = $remainderpos; # RESET \G |
96
|
|
|
|
|
|
|
} |
97
|
289
|
|
|
|
|
2544
|
return @res; |
98
|
|
|
|
|
|
|
} else { |
99
|
217
|
|
|
|
|
402
|
my $match = substr($$textref,$_[0],$_[1]); |
100
|
217
|
100
|
|
|
|
384
|
substr($match,$extrapos-$_[0]-$startlen,$extralen,"") if $extralen; |
101
|
217
|
100
|
|
|
|
372
|
my $extra = $extralen |
102
|
|
|
|
|
|
|
? substr($$textref, $extrapos, $extralen)."\n" : ""; |
103
|
217
|
|
|
|
|
290
|
eval {substr($$textref,$_[4],$_[1]+$_[5])=$extra} ; #CHOP OUT PREFIX & MATCH, IF POSSIBLE |
|
217
|
|
|
|
|
569
|
|
104
|
217
|
|
|
|
|
371
|
pos($$textref) = $_[4]; # RESET \G |
105
|
217
|
|
|
|
|
1799
|
return $match; |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# BUILD A PATTERN MATCHING A SIMPLE DELIMITED STRING |
110
|
|
|
|
|
|
|
## no critic (Subroutines::ProhibitSubroutinePrototypes) |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub gen_delimited_pat($;$) # ($delimiters;$escapes) |
113
|
|
|
|
|
|
|
{ |
114
|
59
|
|
|
59
|
1
|
109
|
my ($dels, $escs) = @_; |
115
|
59
|
50
|
|
|
|
189
|
return "" unless $dels =~ /\S/; |
116
|
59
|
100
|
|
|
|
119
|
$escs = '\\' unless $escs; |
117
|
59
|
|
|
|
|
151
|
$escs .= substr($escs,-1) x (length($dels)-length($escs)); |
118
|
59
|
|
|
|
|
82
|
my @pat = (); |
119
|
59
|
|
|
|
|
64
|
my $i; |
120
|
59
|
|
|
|
|
131
|
for ($i=0; $i
|
121
|
|
|
|
|
|
|
{ |
122
|
161
|
|
|
|
|
235
|
my $del = quotemeta substr($dels,$i,1); |
123
|
161
|
|
|
|
|
192
|
my $esc = quotemeta substr($escs,$i,1); |
124
|
161
|
100
|
|
|
|
242
|
if ($del eq $esc) |
125
|
|
|
|
|
|
|
{ |
126
|
24
|
|
|
|
|
80
|
push @pat, "$del(?:[^$del]*(?:(?:$del$del)[^$del]*)*)$del"; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
else |
129
|
|
|
|
|
|
|
{ |
130
|
137
|
|
|
|
|
380
|
push @pat, "$del(?:[^$esc$del]*(?:$esc.[^$esc$del]*)*)$del"; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
} |
133
|
59
|
|
|
|
|
135
|
my $pat = join '|', @pat; |
134
|
59
|
|
|
|
|
159
|
return "(?:$pat)"; |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
*delimited_pat = \&gen_delimited_pat; |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
# THE EXTRACTION FUNCTIONS |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
sub extract_delimited (;$$$$) |
142
|
|
|
|
|
|
|
{ |
143
|
50
|
50
|
|
50
|
1
|
49879
|
my $textref = defined $_[0] ? \$_[0] : \$_; |
144
|
50
|
100
|
|
|
|
199
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
145
|
50
|
|
|
|
|
80
|
my $wantarray = wantarray; |
146
|
50
|
100
|
|
|
|
84
|
my $del = defined $_[1] ? $_[1] : qq{\'\"\`}; |
147
|
50
|
100
|
|
|
|
80
|
my $pre = defined $_[2] ? $_[2] : '\s*'; |
148
|
50
|
100
|
|
|
|
68
|
my $esc = defined $_[3] ? $_[3] : qq{\\}; |
149
|
50
|
|
|
|
|
94
|
my $pat = gen_delimited_pat($del, $esc); |
150
|
50
|
|
100
|
|
|
161
|
my $startpos = pos $$textref || 0; |
151
|
50
|
100
|
|
|
|
540
|
return _fail($wantarray, $textref, "Not a delimited pattern", 0) |
152
|
|
|
|
|
|
|
unless $$textref =~ m/\G($pre)($pat)/gc; |
153
|
43
|
|
|
|
|
89
|
my $prelen = length($1); |
154
|
43
|
|
|
|
|
53
|
my $matchpos = $startpos+$prelen; |
155
|
43
|
|
|
|
|
60
|
my $endpos = pos $$textref; |
156
|
43
|
|
|
|
|
93
|
return _succeed $wantarray, $textref, |
157
|
|
|
|
|
|
|
$matchpos, $endpos-$matchpos, # MATCH |
158
|
|
|
|
|
|
|
$endpos, length($$textref)-$endpos, # REMAINDER |
159
|
|
|
|
|
|
|
$startpos, $prelen; # PREFIX |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
my %eb_delim_cache; |
163
|
|
|
|
|
|
|
sub _eb_delims { |
164
|
51
|
|
|
51
|
|
76
|
my ($ldel_orig) = @_; |
165
|
51
|
100
|
|
|
|
88
|
return @{ $eb_delim_cache{$ldel_orig} } if $eb_delim_cache{$ldel_orig}; |
|
45
|
|
|
|
|
102
|
|
166
|
6
|
|
|
|
|
10
|
my $qdel = ""; |
167
|
6
|
|
|
|
|
7
|
my $quotelike; |
168
|
6
|
|
|
|
|
9
|
my $ldel = $ldel_orig; |
169
|
6
|
50
|
|
|
|
16
|
$ldel =~ s/'//g and $qdel .= q{'}; |
170
|
6
|
100
|
|
|
|
17
|
$ldel =~ s/"//g and $qdel .= q{"}; |
171
|
6
|
100
|
|
|
|
16
|
$ldel =~ s/`//g and $qdel .= q{`}; |
172
|
6
|
100
|
|
|
|
15
|
$ldel =~ s/q//g and $quotelike = 1; |
173
|
6
|
|
|
|
|
10
|
$ldel =~ tr/[](){}<>\0-\377/[[(({{<
|
174
|
6
|
|
|
|
|
10
|
my $rdel = $ldel; |
175
|
6
|
50
|
|
|
|
12
|
return @{ $eb_delim_cache{$ldel_orig} = [] } unless $rdel =~ tr/[({])}>/; |
|
0
|
|
|
|
|
0
|
|
176
|
6
|
|
|
|
|
18
|
my $posbug = pos; |
177
|
6
|
|
|
|
|
19
|
$ldel = join('|', map { quotemeta $_ } split('', $ldel)); |
|
12
|
|
|
|
|
29
|
|
178
|
6
|
|
|
|
|
15
|
$rdel = join('|', map { quotemeta $_ } split('', $rdel)); |
|
12
|
|
|
|
|
19
|
|
179
|
6
|
|
|
|
|
15
|
pos = $posbug; |
180
|
6
|
|
66
|
|
|
9
|
@{ $eb_delim_cache{$ldel_orig} = [ |
|
6
|
|
|
|
|
183
|
|
181
|
|
|
|
|
|
|
qr/\G($ldel)/, $qdel && qr/\G([$qdel])/, $quotelike, qr/\G($rdel)/ |
182
|
|
|
|
|
|
|
] }; |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
sub extract_bracketed (;$$$) |
185
|
|
|
|
|
|
|
{ |
186
|
51
|
50
|
|
51
|
1
|
9264
|
my $textref = defined $_[0] ? \$_[0] : \$_; |
187
|
51
|
100
|
|
|
|
116
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
188
|
51
|
100
|
|
|
|
88
|
my $ldel = defined $_[1] ? $_[1] : '{([<'; |
189
|
51
|
50
|
|
|
|
131
|
my $pre = defined $_[2] ? qr/\G$_[2]/ : qr/\G\s*/; |
190
|
51
|
|
|
|
|
74
|
my $wantarray = wantarray; |
191
|
51
|
|
|
|
|
73
|
my @ret = _eb_delims($ldel); |
192
|
51
|
50
|
|
|
|
89
|
unless (@ret) |
193
|
|
|
|
|
|
|
{ |
194
|
0
|
|
|
|
|
0
|
return _fail $wantarray, $textref, |
195
|
|
|
|
|
|
|
"Did not find a suitable bracket in delimiter: \"$_[1]\"", |
196
|
|
|
|
|
|
|
0; |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
|
199
|
51
|
|
100
|
|
|
108
|
my $startpos = pos $$textref || 0; |
200
|
51
|
|
|
|
|
74
|
my @match = _match_bracketed($textref, $pre, @ret); |
201
|
|
|
|
|
|
|
|
202
|
51
|
100
|
|
|
|
98
|
return _fail ($wantarray, $textref) unless @match; |
203
|
|
|
|
|
|
|
|
204
|
18
|
|
|
|
|
63
|
return _succeed ( $wantarray, $textref, |
205
|
|
|
|
|
|
|
$match[2], $match[5]+2, # MATCH |
206
|
|
|
|
|
|
|
@match[8,9], # REMAINDER |
207
|
|
|
|
|
|
|
@match[0,1], # PREFIX |
208
|
|
|
|
|
|
|
); |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
sub _match_bracketed # $textref, $pre, $ldel, $qdel, $quotelike, $rdel |
212
|
|
|
|
|
|
|
{ |
213
|
93
|
|
|
93
|
|
167
|
my ($textref, $pre, $ldel, $qdel, $quotelike, $rdel) = @_; |
214
|
93
|
|
100
|
|
|
274
|
my ($startpos, $ldelpos, $endpos) = (pos $$textref = pos $$textref||0); |
215
|
93
|
50
|
|
|
|
390
|
unless ($$textref =~ m/$pre/gc) |
216
|
|
|
|
|
|
|
{ |
217
|
0
|
|
|
|
|
0
|
_failmsg "Did not find prefix: /$pre/", $startpos; |
218
|
0
|
|
|
|
|
0
|
return; |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
93
|
|
|
|
|
131
|
$ldelpos = pos $$textref; |
222
|
|
|
|
|
|
|
|
223
|
93
|
100
|
|
|
|
296
|
unless ($$textref =~ m/$ldel/gc) |
224
|
|
|
|
|
|
|
{ |
225
|
31
|
|
|
|
|
90
|
_failmsg "Did not find opening bracket after prefix: \"$pre\"", |
226
|
|
|
|
|
|
|
pos $$textref; |
227
|
31
|
|
|
|
|
69
|
pos $$textref = $startpos; |
228
|
31
|
|
|
|
|
66
|
return; |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
|
231
|
62
|
|
|
|
|
163
|
my @nesting = ( $1 ); |
232
|
62
|
|
|
|
|
80
|
my $textlen = length $$textref; |
233
|
62
|
|
|
|
|
112
|
while (pos $$textref < $textlen) |
234
|
|
|
|
|
|
|
{ |
235
|
636
|
100
|
|
|
|
945
|
next if $$textref =~ m/\G\\./gcs; |
236
|
|
|
|
|
|
|
|
237
|
628
|
100
|
100
|
|
|
2933
|
if ($$textref =~ m/$ldel/gc) |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
238
|
|
|
|
|
|
|
{ |
239
|
18
|
|
|
|
|
44
|
push @nesting, $1; |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
elsif ($$textref =~ m/$rdel/gc) |
242
|
|
|
|
|
|
|
{ |
243
|
78
|
|
|
|
|
168
|
my ($found, $brackettype) = ($1, $1); |
244
|
78
|
50
|
|
|
|
139
|
if ($#nesting < 0) |
245
|
|
|
|
|
|
|
{ |
246
|
0
|
|
|
|
|
0
|
_failmsg "Unmatched closing bracket: \"$found\"", |
247
|
|
|
|
|
|
|
pos $$textref; |
248
|
0
|
|
|
|
|
0
|
pos $$textref = $startpos; |
249
|
0
|
|
|
|
|
0
|
return; |
250
|
|
|
|
|
|
|
} |
251
|
78
|
|
|
|
|
118
|
my $expected = pop(@nesting); |
252
|
78
|
|
|
|
|
110
|
$expected =~ tr/({[)}]>/; |
253
|
78
|
50
|
|
|
|
122
|
if ($expected ne $brackettype) |
254
|
|
|
|
|
|
|
{ |
255
|
0
|
|
|
|
|
0
|
_failmsg qq{Mismatched closing bracket: expected "$expected" but found "$found"}, |
256
|
|
|
|
|
|
|
pos $$textref; |
257
|
0
|
|
|
|
|
0
|
pos $$textref = $startpos; |
258
|
0
|
|
|
|
|
0
|
return; |
259
|
|
|
|
|
|
|
} |
260
|
78
|
100
|
|
|
|
161
|
last if $#nesting < 0; |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
elsif ($qdel && $$textref =~ m/$qdel/gc) |
263
|
|
|
|
|
|
|
{ |
264
|
10
|
50
|
|
|
|
214
|
$$textref =~ m/\G[^\\$1]*(?:\\.[^\\$1]*)*(\Q$1\E)/gsc and next; |
265
|
0
|
|
|
|
|
0
|
_failmsg "Unmatched embedded quote ($1)", |
266
|
|
|
|
|
|
|
pos $$textref; |
267
|
0
|
|
|
|
|
0
|
pos $$textref = $startpos; |
268
|
0
|
|
|
|
|
0
|
return; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
elsif ($quotelike && _match_quotelike($textref,qr/\G()/,$ref2slashvalid{$textref},$ref2qmarkvalid{$textref})) |
271
|
|
|
|
|
|
|
{ |
272
|
4
|
|
|
|
|
8
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 1; # back-compat |
273
|
4
|
|
|
|
|
10
|
next; |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
|
276
|
518
|
|
|
|
|
1127
|
else { $$textref =~ m/\G(?:[a-zA-Z0-9]+|.)/gcs } |
277
|
|
|
|
|
|
|
} |
278
|
62
|
100
|
|
|
|
145
|
if ($#nesting>=0) |
279
|
|
|
|
|
|
|
{ |
280
|
2
|
|
|
|
|
10
|
_failmsg "Unmatched opening bracket(s): " |
281
|
|
|
|
|
|
|
. join("..",@nesting)."..", |
282
|
|
|
|
|
|
|
pos $$textref; |
283
|
2
|
|
|
|
|
5
|
pos $$textref = $startpos; |
284
|
2
|
|
|
|
|
4
|
return; |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
|
287
|
60
|
|
|
|
|
72
|
$endpos = pos $$textref; |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
return ( |
290
|
60
|
|
|
|
|
182
|
$startpos, $ldelpos-$startpos, # PREFIX |
291
|
|
|
|
|
|
|
$ldelpos, 1, # OPENING BRACKET |
292
|
|
|
|
|
|
|
$ldelpos+1, $endpos-$ldelpos-2, # CONTENTS |
293
|
|
|
|
|
|
|
$endpos-1, 1, # CLOSING BRACKET |
294
|
|
|
|
|
|
|
$endpos, length($$textref)-$endpos, # REMAINDER |
295
|
|
|
|
|
|
|
); |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
sub _revbracket($) |
299
|
|
|
|
|
|
|
{ |
300
|
70
|
|
|
70
|
|
122
|
my $brack = reverse $_[0]; |
301
|
70
|
|
|
|
|
98
|
$brack =~ tr/[({])}>/; |
302
|
70
|
|
|
|
|
238
|
return $brack; |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
my $XMLNAME = q{[a-zA-Z_:][a-zA-Z0-9_:.-]*}; |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
my $et_default_ldel = '<\w+(?:' . gen_delimited_pat(q{'"}) . '|[^>])*>'; |
308
|
|
|
|
|
|
|
sub extract_tagged (;$$$$$) # ($text, $opentag, $closetag, $pre, \%options) |
309
|
|
|
|
|
|
|
{ |
310
|
48
|
50
|
|
48
|
1
|
44843
|
my $textref = defined $_[0] ? \$_[0] : \$_; |
311
|
48
|
50
|
|
|
|
172
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
312
|
48
|
|
|
|
|
68
|
my $ldel = $_[1]; |
313
|
48
|
|
|
|
|
54
|
my $rdel = $_[2]; |
314
|
48
|
100
|
|
|
|
147
|
my $pre = defined $_[3] ? qr/\G$_[3]/ : qr/\G\s*/; |
315
|
48
|
100
|
|
|
|
83
|
my %options = defined $_[4] ? %{$_[4]} : (); |
|
16
|
|
|
|
|
49
|
|
316
|
48
|
100
|
|
|
|
90
|
my $omode = defined $options{fail} ? $options{fail} : ''; |
317
|
6
|
|
|
|
|
12
|
my $bad = ref($options{reject}) eq 'ARRAY' ? join('|', @{$options{reject}}) |
318
|
|
|
|
|
|
|
: defined($options{reject}) ? $options{reject} |
319
|
48
|
50
|
|
|
|
103
|
: '' |
|
|
100
|
|
|
|
|
|
320
|
|
|
|
|
|
|
; |
321
|
10
|
|
|
|
|
19
|
my $ignore = ref($options{ignore}) eq 'ARRAY' ? join('|', @{$options{ignore}}) |
322
|
|
|
|
|
|
|
: defined($options{ignore}) ? $options{ignore} |
323
|
48
|
50
|
|
|
|
91
|
: '' |
|
|
100
|
|
|
|
|
|
324
|
|
|
|
|
|
|
; |
325
|
|
|
|
|
|
|
|
326
|
48
|
100
|
|
|
|
80
|
$ldel = $et_default_ldel if !defined $ldel; |
327
|
48
|
|
|
|
|
60
|
$@ = undef; |
328
|
|
|
|
|
|
|
|
329
|
48
|
|
|
|
|
81
|
my @match = _match_tagged($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore); |
330
|
|
|
|
|
|
|
|
331
|
48
|
100
|
|
|
|
100
|
return _fail(wantarray, $textref) unless @match; |
332
|
34
|
|
|
|
|
111
|
return _succeed wantarray, $textref, |
333
|
|
|
|
|
|
|
$match[2], $match[3]+$match[5]+$match[7], # MATCH |
334
|
|
|
|
|
|
|
@match[8..9,0..1,2..7]; # REM, PRE, BITS |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
sub _match_tagged # ($$$$$$$) |
338
|
|
|
|
|
|
|
{ |
339
|
120
|
|
|
120
|
|
236
|
my ($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore) = @_; |
340
|
120
|
|
|
|
|
132
|
my $rdelspec; |
341
|
|
|
|
|
|
|
|
342
|
120
|
|
100
|
|
|
504
|
my ($startpos, $opentagpos, $textpos, $parapos, $closetagpos, $endpos) = ( pos($$textref) = pos($$textref)||0 ); |
343
|
|
|
|
|
|
|
|
344
|
120
|
50
|
|
|
|
577
|
unless ($$textref =~ m/$pre/gc) |
345
|
|
|
|
|
|
|
{ |
346
|
0
|
|
|
|
|
0
|
_failmsg "Did not find prefix: /$pre/", pos $$textref; |
347
|
0
|
|
|
|
|
0
|
goto failed; |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
|
350
|
120
|
|
|
|
|
177
|
$opentagpos = pos($$textref); |
351
|
|
|
|
|
|
|
|
352
|
120
|
100
|
|
|
|
817
|
unless ($$textref =~ m/\G$ldel/gc) |
353
|
|
|
|
|
|
|
{ |
354
|
8
|
|
|
|
|
29
|
_failmsg "Did not find opening tag: /$ldel/", pos $$textref; |
355
|
8
|
|
|
|
|
31
|
goto failed; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
112
|
|
|
|
|
158
|
$textpos = pos($$textref); |
359
|
|
|
|
|
|
|
|
360
|
112
|
100
|
|
|
|
195
|
if (!defined $rdel) |
361
|
|
|
|
|
|
|
{ |
362
|
70
|
|
|
|
|
228
|
$rdelspec = substr($$textref, $-[0], $+[0] - $-[0]); |
363
|
70
|
50
|
|
|
|
311
|
unless ($rdelspec =~ s/\A([[(<{]+)($XMLNAME).*/ quotemeta "$1\/$2". _revbracket($1) /oes) |
|
70
|
|
|
|
|
192
|
|
364
|
|
|
|
|
|
|
{ |
365
|
0
|
|
|
|
|
0
|
_failmsg "Unable to construct closing tag to match: $rdel", |
366
|
|
|
|
|
|
|
pos $$textref; |
367
|
0
|
|
|
|
|
0
|
goto failed; |
368
|
|
|
|
|
|
|
} |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
else |
371
|
|
|
|
|
|
|
{ |
372
|
|
|
|
|
|
|
## no critic (BuiltinFunctions::ProhibitStringyEval) |
373
|
42
|
|
66
|
|
|
1820
|
$rdelspec = eval "qq{$rdel}" || do { |
374
|
|
|
|
|
|
|
my $del; |
375
|
|
|
|
|
|
|
for (qw,~ ! ^ & * ) _ + - = } ] : " ; ' > . ? / | ',) |
376
|
|
|
|
|
|
|
{ next if $rdel =~ /\Q$_/; $del = $_; last } |
377
|
|
|
|
|
|
|
unless ($del) { |
378
|
9
|
|
|
9
|
|
85
|
use Carp; |
|
9
|
|
|
|
|
17
|
|
|
9
|
|
|
|
|
34500
|
|
379
|
|
|
|
|
|
|
croak "Can't interpolate right delimiter $rdel" |
380
|
|
|
|
|
|
|
} |
381
|
|
|
|
|
|
|
eval "qq$del$rdel$del"; |
382
|
|
|
|
|
|
|
}; |
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
|
385
|
112
|
|
|
|
|
291
|
while (pos($$textref) < length($$textref)) |
386
|
|
|
|
|
|
|
{ |
387
|
1240
|
50
|
|
|
|
1859
|
next if $$textref =~ m/\G\\./gc; |
388
|
|
|
|
|
|
|
|
389
|
1240
|
50
|
100
|
|
|
6010
|
if ($$textref =~ m/\G(\n[ \t]*\n)/gc ) |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
390
|
|
|
|
|
|
|
{ |
391
|
0
|
0
|
|
|
|
0
|
$parapos = pos($$textref) - length($1) |
392
|
|
|
|
|
|
|
unless defined $parapos; |
393
|
|
|
|
|
|
|
} |
394
|
|
|
|
|
|
|
elsif ($$textref =~ m/\G($rdelspec)/gc ) |
395
|
|
|
|
|
|
|
{ |
396
|
82
|
|
|
|
|
172
|
$closetagpos = pos($$textref)-length($1); |
397
|
82
|
|
|
|
|
575
|
goto matched; |
398
|
|
|
|
|
|
|
} |
399
|
|
|
|
|
|
|
elsif ($ignore && $$textref =~ m/\G(?:$ignore)/gc) |
400
|
|
|
|
|
|
|
{ |
401
|
12
|
|
|
|
|
28
|
next; |
402
|
|
|
|
|
|
|
} |
403
|
|
|
|
|
|
|
elsif ($bad && $$textref =~ m/\G($bad)/gcs) |
404
|
|
|
|
|
|
|
{ |
405
|
12
|
|
|
|
|
31
|
pos($$textref) -= length($1); # CUT OFF WHATEVER CAUSED THE SHORTNESS |
406
|
12
|
100
|
100
|
|
|
96
|
goto short if ($omode eq 'PARA' || $omode eq 'MAX'); |
407
|
4
|
|
|
|
|
15
|
_failmsg "Found invalid nested tag: $1", pos $$textref; |
408
|
4
|
|
|
|
|
35
|
goto failed; |
409
|
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
elsif ($$textref =~ m/\G($ldel)/gc) |
411
|
|
|
|
|
|
|
{ |
412
|
32
|
|
|
|
|
57
|
my $tag = $1; |
413
|
32
|
|
|
|
|
62
|
pos($$textref) -= length($tag); # REWIND TO NESTED TAG |
414
|
32
|
100
|
|
|
|
79
|
unless (_match_tagged(@_)) # MATCH NESTED TAG |
415
|
|
|
|
|
|
|
{ |
416
|
4
|
50
|
33
|
|
|
18
|
goto short if $omode eq 'PARA' || $omode eq 'MAX'; |
417
|
4
|
|
|
|
|
32
|
_failmsg "Found unbalanced nested tag: $tag", |
418
|
|
|
|
|
|
|
pos $$textref; |
419
|
4
|
|
|
|
|
45
|
goto failed; |
420
|
|
|
|
|
|
|
} |
421
|
|
|
|
|
|
|
} |
422
|
1102
|
|
|
|
|
2410
|
else { $$textref =~ m/./gcs } |
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
short: |
426
|
22
|
|
|
|
|
34
|
$closetagpos = pos($$textref); |
427
|
22
|
100
|
|
|
|
40
|
goto matched if $omode eq 'MAX'; |
428
|
18
|
100
|
|
|
|
46
|
goto failed unless $omode eq 'PARA'; |
429
|
|
|
|
|
|
|
|
430
|
4
|
50
|
|
|
|
10
|
if (defined $parapos) { pos($$textref) = $parapos } |
|
0
|
|
|
|
|
0
|
|
431
|
4
|
|
|
|
|
6
|
else { $parapos = pos($$textref) } |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
return ( |
434
|
4
|
|
|
|
|
13
|
$startpos, $opentagpos-$startpos, # PREFIX |
435
|
|
|
|
|
|
|
$opentagpos, $textpos-$opentagpos, # OPENING TAG |
436
|
|
|
|
|
|
|
$textpos, $parapos-$textpos, # TEXT |
437
|
|
|
|
|
|
|
$parapos, 0, # NO CLOSING TAG |
438
|
|
|
|
|
|
|
$parapos, length($$textref)-$parapos, # REMAINDER |
439
|
|
|
|
|
|
|
); |
440
|
|
|
|
|
|
|
|
441
|
86
|
|
|
|
|
101
|
matched: |
442
|
|
|
|
|
|
|
$endpos = pos($$textref); |
443
|
|
|
|
|
|
|
return ( |
444
|
86
|
|
|
|
|
306
|
$startpos, $opentagpos-$startpos, # PREFIX |
445
|
|
|
|
|
|
|
$opentagpos, $textpos-$opentagpos, # OPENING TAG |
446
|
|
|
|
|
|
|
$textpos, $closetagpos-$textpos, # TEXT |
447
|
|
|
|
|
|
|
$closetagpos, $endpos-$closetagpos, # CLOSING TAG |
448
|
|
|
|
|
|
|
$endpos, length($$textref)-$endpos, # REMAINDER |
449
|
|
|
|
|
|
|
); |
450
|
|
|
|
|
|
|
|
451
|
30
|
100
|
|
|
|
126
|
failed: |
452
|
|
|
|
|
|
|
_failmsg "Did not find closing tag", pos $$textref unless $@; |
453
|
30
|
|
|
|
|
59
|
pos($$textref) = $startpos; |
454
|
30
|
|
|
|
|
67
|
return; |
455
|
|
|
|
|
|
|
} |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
sub extract_variable (;$$) |
458
|
|
|
|
|
|
|
{ |
459
|
597
|
50
|
|
597
|
1
|
168022
|
my $textref = defined $_[0] ? \$_[0] : \$_; |
460
|
597
|
50
|
|
|
|
1117
|
return ("","","") unless defined $$textref; |
461
|
597
|
100
|
|
|
|
1131
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
462
|
597
|
100
|
|
|
|
1556
|
my $pre = defined $_[1] ? qr/\G$_[1]/ : qr/\G\s*/; |
463
|
|
|
|
|
|
|
|
464
|
597
|
|
|
|
|
933
|
my @match = _match_variable($textref,$pre); |
465
|
|
|
|
|
|
|
|
466
|
597
|
100
|
|
|
|
1099
|
return _fail wantarray, $textref unless @match; |
467
|
|
|
|
|
|
|
|
468
|
232
|
|
|
|
|
570
|
return _succeed wantarray, $textref, |
469
|
|
|
|
|
|
|
@match[2..3,4..5,0..1]; # MATCH, REMAINDER, PREFIX |
470
|
|
|
|
|
|
|
} |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
sub _match_variable |
473
|
|
|
|
|
|
|
{ |
474
|
|
|
|
|
|
|
# $# |
475
|
|
|
|
|
|
|
# $^ |
476
|
|
|
|
|
|
|
# $$ |
477
|
1299
|
|
|
1299
|
|
1862
|
my ($textref, $pre) = @_; |
478
|
1299
|
|
100
|
|
|
3227
|
my $startpos = pos($$textref) = pos($$textref)||0; |
479
|
1299
|
100
|
|
|
|
4411
|
unless ($$textref =~ m/$pre/gc) |
480
|
|
|
|
|
|
|
{ |
481
|
339
|
|
|
|
|
954
|
_failmsg "Did not find prefix: /$pre/", pos $$textref; |
482
|
339
|
|
|
|
|
851
|
return; |
483
|
|
|
|
|
|
|
} |
484
|
960
|
|
|
|
|
1316
|
my $varpos = pos($$textref); |
485
|
960
|
100
|
|
|
|
2005
|
unless ($$textref =~ m{\G\$\s*(?!::)(\d+|[][&`'+*./|,";%=~:?!\@<>()-]|\^[a-z]?)}gci) |
486
|
|
|
|
|
|
|
{ |
487
|
868
|
100
|
|
|
|
2229
|
unless ($$textref =~ m/\G((\$#?|[*\@\%]|\\&)+)/gc) |
488
|
|
|
|
|
|
|
{ |
489
|
617
|
|
|
|
|
1253
|
_failmsg "Did not find leading dereferencer", pos $$textref; |
490
|
617
|
|
|
|
|
1208
|
pos $$textref = $startpos; |
491
|
617
|
|
|
|
|
1623
|
return; |
492
|
|
|
|
|
|
|
} |
493
|
251
|
|
|
|
|
449
|
my $deref = $1; |
494
|
|
|
|
|
|
|
|
495
|
251
|
50
|
100
|
|
|
980
|
unless ($$textref =~ m/\G\s*(?:::|')?(?:[_a-z]\w*(?:::|'))*[_a-z]\w*/gci |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
496
|
|
|
|
|
|
|
or _match_codeblock($textref, qr/\G()/, '\{', qr/\G\s*(\})/, '\{', '\}', 0) |
497
|
|
|
|
|
|
|
or $deref eq '$#' or $deref eq '$$' |
498
|
|
|
|
|
|
|
or pos($$textref) == length $$textref ) |
499
|
|
|
|
|
|
|
{ |
500
|
0
|
|
|
|
|
0
|
_failmsg "Bad identifier after dereferencer", pos $$textref; |
501
|
0
|
|
|
|
|
0
|
pos $$textref = $startpos; |
502
|
0
|
|
|
|
|
0
|
return; |
503
|
|
|
|
|
|
|
} |
504
|
|
|
|
|
|
|
} |
505
|
|
|
|
|
|
|
|
506
|
343
|
|
|
|
|
437
|
while (1) |
507
|
|
|
|
|
|
|
{ |
508
|
421
|
100
|
|
|
|
990
|
next if $$textref =~ m/\G\s*(?:->)?\s*[{]\w+[}]/gc; |
509
|
394
|
100
|
|
|
|
1928
|
next if _match_codeblock($textref, |
510
|
|
|
|
|
|
|
qr/\G\s*->\s*(?:[_a-zA-Z]\w+\s*)?/, |
511
|
|
|
|
|
|
|
qr/[({[]/, qr/\G\s*([)}\]])/, |
512
|
|
|
|
|
|
|
qr/[({[]/, qr/[)}\]]/, 0); |
513
|
371
|
100
|
|
|
|
1814
|
next if _match_codeblock($textref, |
514
|
|
|
|
|
|
|
qr/\G\s*/, qr/[{[]/, qr/\G\s*([}\]])/, |
515
|
|
|
|
|
|
|
qr/[{[]/, qr/[}\]]/, 0); |
516
|
355
|
50
|
|
|
|
1157
|
next if _match_variable($textref,qr/\G\s*->\s*/); |
517
|
355
|
100
|
|
|
|
841
|
next if $$textref =~ m/\G\s*->\s*\w+(?![{([])/gc; |
518
|
343
|
|
|
|
|
439
|
last; |
519
|
|
|
|
|
|
|
} |
520
|
343
|
|
|
|
|
671
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 0; |
521
|
|
|
|
|
|
|
|
522
|
343
|
|
|
|
|
431
|
my $endpos = pos($$textref); |
523
|
343
|
|
|
|
|
894
|
return ($startpos, $varpos-$startpos, |
524
|
|
|
|
|
|
|
$varpos, $endpos-$varpos, |
525
|
|
|
|
|
|
|
$endpos, length($$textref)-$endpos |
526
|
|
|
|
|
|
|
); |
527
|
|
|
|
|
|
|
} |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
my %ec_delim_cache; |
530
|
|
|
|
|
|
|
sub _ec_delims { |
531
|
95
|
|
|
95
|
|
186
|
my ($ldel_inner, $ldel_outer) = @_; |
532
|
0
|
|
|
|
|
0
|
return @{ $ec_delim_cache{$ldel_outer}{$ldel_inner} } |
533
|
95
|
50
|
|
|
|
571
|
if $ec_delim_cache{$ldel_outer}{$ldel_inner}; |
534
|
95
|
|
|
|
|
143
|
my $rdel_inner = $ldel_inner; |
535
|
95
|
|
|
|
|
122
|
my $rdel_outer = $ldel_outer; |
536
|
95
|
|
|
|
|
133
|
my $posbug = pos; |
537
|
95
|
|
|
|
|
183
|
for ($ldel_inner, $ldel_outer) { tr/[]()<>{}\0-\377/[[((<<{{/ds } |
|
190
|
|
|
|
|
360
|
|
538
|
95
|
|
|
|
|
145
|
for ($rdel_inner, $rdel_outer) { tr/[]()<>{}\0-\377/]]))>>}}/ds } |
|
190
|
|
|
|
|
250
|
|
539
|
95
|
|
|
|
|
345
|
for ($ldel_inner, $ldel_outer, $rdel_inner, $rdel_outer) |
540
|
|
|
|
|
|
|
{ |
541
|
380
|
|
|
|
|
762
|
$_ = '('.join('|',map { quotemeta $_ } split('',$_)).')' |
|
392
|
|
|
|
|
909
|
|
542
|
|
|
|
|
|
|
} |
543
|
95
|
|
|
|
|
198
|
pos = $posbug; |
544
|
95
|
|
|
|
|
144
|
@{ $ec_delim_cache{$ldel_outer}{$ldel_inner} = [ |
|
95
|
|
|
|
|
854
|
|
545
|
|
|
|
|
|
|
$ldel_outer, qr/\G\s*($rdel_outer)/, $ldel_inner, $rdel_inner |
546
|
|
|
|
|
|
|
] }; |
547
|
|
|
|
|
|
|
} |
548
|
|
|
|
|
|
|
sub extract_codeblock (;$$$$$) |
549
|
|
|
|
|
|
|
{ |
550
|
95
|
50
|
|
95
|
1
|
59030
|
my $textref = defined $_[0] ? \$_[0] : \$_; |
551
|
95
|
100
|
|
|
|
316
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
552
|
95
|
|
|
|
|
140
|
my $wantarray = wantarray; |
553
|
95
|
100
|
|
|
|
171
|
my $ldel_inner = defined $_[1] ? $_[1] : '{'; |
554
|
95
|
100
|
|
|
|
337
|
my $pre = !defined $_[2] ? qr/\G\s*/ : qr/\G$_[2]/; |
555
|
95
|
100
|
|
|
|
195
|
my $ldel_outer = defined $_[3] ? $_[3] : $ldel_inner; |
556
|
95
|
|
|
|
|
144
|
my $rd = $_[4]; |
557
|
95
|
|
|
|
|
255
|
my @delims = _ec_delims($ldel_inner, $ldel_outer); |
558
|
|
|
|
|
|
|
|
559
|
95
|
|
|
|
|
241
|
my @match = _match_codeblock($textref, $pre, @delims, $rd); |
560
|
95
|
100
|
|
|
|
198
|
return _fail($wantarray, $textref) unless @match; |
561
|
50
|
|
|
|
|
247
|
return _succeed($wantarray, $textref, |
562
|
|
|
|
|
|
|
@match[2..3,4..5,0..1] # MATCH, REMAINDER, PREFIX |
563
|
|
|
|
|
|
|
); |
564
|
|
|
|
|
|
|
} |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
sub _match_codeblock |
567
|
|
|
|
|
|
|
{ |
568
|
1025
|
|
|
1025
|
|
1828
|
my ($textref, $pre, $ldel_outer, $rdel_outer, $ldel_inner, $rdel_inner, $rd) = @_; |
569
|
1025
|
|
100
|
|
|
2385
|
my $startpos = pos($$textref) = pos($$textref) || 0; |
570
|
1025
|
100
|
|
|
|
3414
|
unless ($$textref =~ m/$pre/gc) |
571
|
|
|
|
|
|
|
{ |
572
|
355
|
|
|
|
|
1475
|
_failmsg qq{Did not match prefix /$pre/ at"} . |
573
|
|
|
|
|
|
|
substr($$textref,pos($$textref),20) . |
574
|
|
|
|
|
|
|
q{..."}, |
575
|
|
|
|
|
|
|
pos $$textref; |
576
|
355
|
|
|
|
|
922
|
return; |
577
|
|
|
|
|
|
|
} |
578
|
670
|
|
|
|
|
938
|
my $codepos = pos($$textref); |
579
|
670
|
100
|
|
|
|
4851
|
unless ($$textref =~ m/\G($ldel_outer)/gc) # OUTERMOST DELIMITER |
580
|
|
|
|
|
|
|
{ |
581
|
539
|
|
|
|
|
1956
|
_failmsg qq{Did not find expected opening bracket at "} . |
582
|
|
|
|
|
|
|
substr($$textref,pos($$textref),20) . |
583
|
|
|
|
|
|
|
q{..."}, |
584
|
|
|
|
|
|
|
pos $$textref; |
585
|
539
|
|
|
|
|
1221
|
pos $$textref = $startpos; |
586
|
539
|
|
|
|
|
1383
|
return; |
587
|
|
|
|
|
|
|
} |
588
|
131
|
|
|
|
|
280
|
my $closing = $1; |
589
|
131
|
|
|
|
|
216
|
$closing =~ tr/([<{/)]>}/; |
590
|
131
|
|
|
|
|
169
|
my $matched; |
591
|
|
|
|
|
|
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 |
592
|
131
|
50
|
33
|
|
|
541
|
if !pos($$textref) or !defined $ref2slashvalid{$textref}; # default, or reset |
593
|
131
|
|
|
|
|
302
|
while (pos($$textref) < length($$textref)) |
594
|
|
|
|
|
|
|
{ |
595
|
480
|
50
|
33
|
|
|
801
|
if ($rd && $$textref =~ m#\G(\Q(?)\E|\Q(s?)\E|\Q(s)\E)#gc) |
596
|
|
|
|
|
|
|
{ |
597
|
0
|
|
|
|
|
0
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 0; |
598
|
0
|
|
|
|
|
0
|
next; |
599
|
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
|
601
|
480
|
100
|
|
|
|
966
|
if ($$textref =~ m/\G\s*#.*/gc) |
602
|
|
|
|
|
|
|
{ |
603
|
6
|
|
|
|
|
11
|
next; |
604
|
|
|
|
|
|
|
} |
605
|
|
|
|
|
|
|
|
606
|
474
|
100
|
|
|
|
1622
|
if ($$textref =~ m/$rdel_outer/gc) |
607
|
|
|
|
|
|
|
{ |
608
|
127
|
100
|
66
|
|
|
542
|
unless ($matched = ($closing && $1 eq $closing) ) |
609
|
|
|
|
|
|
|
{ |
610
|
2
|
50
|
|
|
|
6
|
next if $1 eq '>'; # MIGHT BE A "LESS THAN" |
611
|
0
|
|
|
|
|
0
|
_failmsg q{Mismatched closing bracket at "} . |
612
|
|
|
|
|
|
|
substr($$textref,pos($$textref),20) . |
613
|
|
|
|
|
|
|
qq{...". Expected '$closing'}, |
614
|
|
|
|
|
|
|
pos $$textref; |
615
|
|
|
|
|
|
|
} |
616
|
125
|
|
|
|
|
210
|
last; |
617
|
|
|
|
|
|
|
} |
618
|
|
|
|
|
|
|
|
619
|
347
|
100
|
100
|
|
|
941
|
if (_match_variable($textref,qr/\G\s*/) || |
620
|
|
|
|
|
|
|
_match_quotelike($textref,qr/\G\s*/,$ref2slashvalid{$textref},$ref2qmarkvalid{$textref}) ) |
621
|
|
|
|
|
|
|
{ |
622
|
137
|
|
|
|
|
213
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 0; |
623
|
137
|
|
|
|
|
214
|
next; |
624
|
|
|
|
|
|
|
} |
625
|
|
|
|
|
|
|
|
626
|
210
|
100
|
|
|
|
3265
|
if ($$textref =~ m#\G\s*(?!$ldel_inner)(?:$RE_PREREGEX_PAT|$RE_EXPR_PAT)#gc) |
627
|
|
|
|
|
|
|
{ |
628
|
72
|
|
|
|
|
181
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 1; |
629
|
72
|
|
|
|
|
119
|
next; |
630
|
|
|
|
|
|
|
} |
631
|
|
|
|
|
|
|
|
632
|
138
|
100
|
|
|
|
923
|
if ( _match_codeblock($textref, qr/\G\s*/, $ldel_inner, qr/\G\s*($rdel_inner)/, $ldel_inner, $rdel_inner, $rd) ) |
633
|
|
|
|
|
|
|
{ |
634
|
15
|
|
|
|
|
33
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 1; |
635
|
15
|
|
|
|
|
25
|
next; |
636
|
|
|
|
|
|
|
} |
637
|
|
|
|
|
|
|
|
638
|
123
|
50
|
|
|
|
722
|
if ($$textref =~ m/\G\s*$ldel_outer/gc) |
639
|
|
|
|
|
|
|
{ |
640
|
0
|
|
|
|
|
0
|
_failmsg q{Improperly nested codeblock at "} . |
641
|
|
|
|
|
|
|
substr($$textref,pos($$textref),20) . |
642
|
|
|
|
|
|
|
q{..."}, |
643
|
|
|
|
|
|
|
pos $$textref; |
644
|
0
|
|
|
|
|
0
|
last; |
645
|
|
|
|
|
|
|
} |
646
|
|
|
|
|
|
|
|
647
|
123
|
|
|
|
|
280
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 0; |
648
|
123
|
|
|
|
|
386
|
$$textref =~ m/\G\s*(\w+|[-=>]>|.|\Z)/gc; |
649
|
|
|
|
|
|
|
} |
650
|
355
|
|
|
|
|
943
|
continue { $@ = undef } |
651
|
|
|
|
|
|
|
|
652
|
131
|
100
|
|
|
|
235
|
unless ($matched) |
653
|
|
|
|
|
|
|
{ |
654
|
6
|
50
|
|
|
|
28
|
_failmsg 'No match found for opening bracket', pos $$textref |
655
|
|
|
|
|
|
|
unless $@; |
656
|
6
|
|
|
|
|
28
|
return; |
657
|
|
|
|
|
|
|
} |
658
|
|
|
|
|
|
|
|
659
|
125
|
|
|
|
|
159
|
my $endpos = pos($$textref); |
660
|
125
|
|
|
|
|
428
|
return ( $startpos, $codepos-$startpos, |
661
|
|
|
|
|
|
|
$codepos, $endpos-$codepos, |
662
|
|
|
|
|
|
|
$endpos, length($$textref)-$endpos, |
663
|
|
|
|
|
|
|
); |
664
|
|
|
|
|
|
|
} |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
my %mods = ( |
668
|
|
|
|
|
|
|
'none' => '[cgimsox]*', |
669
|
|
|
|
|
|
|
'm' => '[cgimsox]*', |
670
|
|
|
|
|
|
|
's' => '[cegimsox]*', |
671
|
|
|
|
|
|
|
'tr' => '[cds]*', |
672
|
|
|
|
|
|
|
'y' => '[cds]*', |
673
|
|
|
|
|
|
|
'qq' => '', |
674
|
|
|
|
|
|
|
'qx' => '', |
675
|
|
|
|
|
|
|
'qw' => '', |
676
|
|
|
|
|
|
|
'qr' => '[imsx]*', |
677
|
|
|
|
|
|
|
'q' => '', |
678
|
|
|
|
|
|
|
); |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
sub extract_quotelike (;$$) |
681
|
|
|
|
|
|
|
{ |
682
|
515
|
100
|
|
515
|
1
|
110152
|
my $textref = $_[0] ? \$_[0] : \$_; |
683
|
515
|
100
|
|
|
|
974
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
684
|
515
|
|
|
|
|
614
|
my $wantarray = wantarray; |
685
|
515
|
100
|
|
|
|
1195
|
my $pre = defined $_[1] ? qr/\G$_[1]/ : qr/\G\s*/; |
686
|
|
|
|
|
|
|
|
687
|
515
|
|
|
|
|
1157
|
my @match = _match_quotelike($textref,$pre,$ref2slashvalid{$textref},$ref2qmarkvalid{$textref}); |
688
|
515
|
100
|
|
|
|
1009
|
return _fail($wantarray, $textref) unless @match; |
689
|
101
|
|
|
|
|
348
|
return _succeed($wantarray, $textref, |
690
|
|
|
|
|
|
|
$match[2], $match[18]-$match[2], # MATCH |
691
|
|
|
|
|
|
|
@match[18,19], # REMAINDER |
692
|
|
|
|
|
|
|
@match[0,1], # PREFIX |
693
|
|
|
|
|
|
|
@match[2..17], # THE BITS |
694
|
|
|
|
|
|
|
@match[20,21], # ANY FILLET? |
695
|
|
|
|
|
|
|
); |
696
|
|
|
|
|
|
|
}; |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
my %maybe_quote = map +($_=>1), qw(" ' `); |
699
|
|
|
|
|
|
|
sub _match_quotelike |
700
|
|
|
|
|
|
|
{ |
701
|
805
|
|
|
805
|
|
1225
|
my ($textref, $pre, $allow_slash_match, $allow_qmark_match) = @_; |
702
|
|
|
|
|
|
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 |
703
|
805
|
100
|
66
|
|
|
2217
|
if !pos($$textref) or !defined $ref2slashvalid{$textref}; # default, or reset |
704
|
|
|
|
|
|
|
|
705
|
805
|
|
100
|
|
|
2500
|
my ($textlen,$startpos, |
706
|
|
|
|
|
|
|
$preld1pos,$ld1pos,$str1pos,$rd1pos, |
707
|
|
|
|
|
|
|
$preld2pos,$ld2pos,$str2pos,$rd2pos, |
708
|
|
|
|
|
|
|
$modpos) = ( length($$textref), pos($$textref) = pos($$textref) || 0 ); |
709
|
|
|
|
|
|
|
|
710
|
805
|
50
|
|
|
|
2943
|
unless ($$textref =~ m/$pre/gc) |
711
|
|
|
|
|
|
|
{ |
712
|
0
|
|
|
|
|
0
|
_failmsg qq{Did not find prefix /$pre/ at "} . |
713
|
|
|
|
|
|
|
substr($$textref, pos($$textref), 20) . |
714
|
|
|
|
|
|
|
q{..."}, |
715
|
|
|
|
|
|
|
pos $$textref; |
716
|
0
|
|
|
|
|
0
|
return; |
717
|
|
|
|
|
|
|
} |
718
|
805
|
|
|
|
|
1122
|
my $oppos = pos($$textref); |
719
|
805
|
|
|
|
|
1208
|
my $initial = substr($$textref,$oppos,1); |
720
|
805
|
100
|
100
|
|
|
4008
|
if ($initial && $maybe_quote{$initial} |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
721
|
|
|
|
|
|
|
|| $allow_slash_match && $initial eq '/' |
722
|
|
|
|
|
|
|
|| $allow_qmark_match && $initial eq '?') |
723
|
|
|
|
|
|
|
{ |
724
|
58
|
100
|
|
|
|
691
|
unless ($$textref =~ m/\G \Q$initial\E [^\\$initial]* (\\.[^\\$initial]*)* \Q$initial\E /gcsx) |
725
|
|
|
|
|
|
|
{ |
726
|
2
|
|
|
|
|
11
|
_failmsg qq{Did not find closing delimiter to match '$initial' at "} . |
727
|
|
|
|
|
|
|
substr($$textref, $oppos, 20) . |
728
|
|
|
|
|
|
|
q{..."}, |
729
|
|
|
|
|
|
|
pos $$textref; |
730
|
2
|
|
|
|
|
5
|
pos $$textref = $startpos; |
731
|
2
|
|
|
|
|
8
|
return; |
732
|
|
|
|
|
|
|
} |
733
|
56
|
|
|
|
|
113
|
$modpos= pos($$textref); |
734
|
56
|
|
|
|
|
83
|
$rd1pos = $modpos-1; |
735
|
|
|
|
|
|
|
|
736
|
56
|
100
|
66
|
|
|
189
|
if ($initial eq '/' || $initial eq '?') |
737
|
|
|
|
|
|
|
{ |
738
|
15
|
|
|
|
|
97
|
$$textref =~ m/\G$mods{none}/gc |
739
|
|
|
|
|
|
|
} |
740
|
|
|
|
|
|
|
|
741
|
56
|
|
|
|
|
96
|
my $endpos = pos($$textref); |
742
|
56
|
|
|
|
|
112
|
$ref2qmarkvalid{$textref} = $ref2slashvalid{$textref} = 0; |
743
|
|
|
|
|
|
|
return ( |
744
|
56
|
|
|
|
|
261
|
$startpos, $oppos-$startpos, # PREFIX |
745
|
|
|
|
|
|
|
$oppos, 0, # NO OPERATOR |
746
|
|
|
|
|
|
|
$oppos, 1, # LEFT DEL |
747
|
|
|
|
|
|
|
$oppos+1, $rd1pos-$oppos-1, # STR/PAT |
748
|
|
|
|
|
|
|
$rd1pos, 1, # RIGHT DEL |
749
|
|
|
|
|
|
|
$modpos, 0, # NO 2ND LDEL |
750
|
|
|
|
|
|
|
$modpos, 0, # NO 2ND STR |
751
|
|
|
|
|
|
|
$modpos, 0, # NO 2ND RDEL |
752
|
|
|
|
|
|
|
$modpos, $endpos-$modpos, # MODIFIERS |
753
|
|
|
|
|
|
|
$endpos, $textlen-$endpos, # REMAINDER |
754
|
|
|
|
|
|
|
); |
755
|
|
|
|
|
|
|
} |
756
|
|
|
|
|
|
|
|
757
|
747
|
100
|
|
|
|
1867
|
unless ($$textref =~ m{\G(\b(?:m|s|qq|qx|qw|q|qr|tr|y)\b(?=\s*\S)|<<(?=[a-zA-Z]|\s*['"`;,]))}gc) |
758
|
|
|
|
|
|
|
{ |
759
|
665
|
|
|
|
|
2033
|
_failmsg q{No quotelike operator found after prefix at "} . |
760
|
|
|
|
|
|
|
substr($$textref, pos($$textref), 20) . |
761
|
|
|
|
|
|
|
q{..."}, |
762
|
|
|
|
|
|
|
pos $$textref; |
763
|
665
|
|
|
|
|
1439
|
pos $$textref = $startpos; |
764
|
665
|
|
|
|
|
1493
|
return; |
765
|
|
|
|
|
|
|
} |
766
|
|
|
|
|
|
|
|
767
|
82
|
|
|
|
|
150
|
my $op = $1; |
768
|
82
|
|
|
|
|
105
|
$preld1pos = pos($$textref); |
769
|
82
|
100
|
|
|
|
139
|
if ($op eq '<<') { |
770
|
28
|
|
|
|
|
31
|
$ld1pos = pos($$textref); |
771
|
28
|
|
|
|
|
31
|
my $label; |
772
|
28
|
100
|
|
|
|
109
|
if ($$textref =~ m{\G([A-Za-z_]\w*)}gc) { |
|
|
100
|
|
|
|
|
|
773
|
9
|
|
|
|
|
14
|
$label = $1; |
774
|
|
|
|
|
|
|
} |
775
|
|
|
|
|
|
|
elsif ($$textref =~ m{ \G ' ([^'\\]* (?:\\.[^'\\]*)*) ' |
776
|
|
|
|
|
|
|
| \G " ([^"\\]* (?:\\.[^"\\]*)*) " |
777
|
|
|
|
|
|
|
| \G ` ([^`\\]* (?:\\.[^`\\]*)*) ` |
778
|
|
|
|
|
|
|
}gcsx) { |
779
|
16
|
|
|
|
|
29
|
$label = $+; |
780
|
|
|
|
|
|
|
} |
781
|
|
|
|
|
|
|
else { |
782
|
3
|
|
|
|
|
4
|
$label = ""; |
783
|
|
|
|
|
|
|
} |
784
|
28
|
|
|
|
|
36
|
my $extrapos = pos($$textref); |
785
|
28
|
|
|
|
|
69
|
$$textref =~ m{.*\n}gc; |
786
|
28
|
|
|
|
|
60
|
$str1pos = pos($$textref)--; |
787
|
28
|
100
|
|
|
|
238
|
unless ($$textref =~ m{.*?\n(?=\Q$label\E\n)}gc) { |
788
|
4
|
|
|
|
|
21
|
_failmsg qq{Missing here doc terminator ('$label') after "} . |
789
|
|
|
|
|
|
|
substr($$textref, $startpos, 20) . |
790
|
|
|
|
|
|
|
q{..."}, |
791
|
|
|
|
|
|
|
pos $$textref; |
792
|
4
|
|
|
|
|
8
|
pos $$textref = $startpos; |
793
|
4
|
|
|
|
|
10
|
return; |
794
|
|
|
|
|
|
|
} |
795
|
24
|
|
|
|
|
34
|
$rd1pos = pos($$textref); |
796
|
24
|
|
|
|
|
103
|
$$textref =~ m{\Q$label\E\n}gc; |
797
|
24
|
|
|
|
|
41
|
$ld2pos = pos($$textref); |
798
|
24
|
|
|
|
|
38
|
$ref2qmarkvalid{$textref} = $ref2slashvalid{$textref} = 0; |
799
|
|
|
|
|
|
|
return ( |
800
|
24
|
|
|
|
|
106
|
$startpos, $oppos-$startpos, # PREFIX |
801
|
|
|
|
|
|
|
$oppos, length($op), # OPERATOR |
802
|
|
|
|
|
|
|
$ld1pos, $extrapos-$ld1pos, # LEFT DEL |
803
|
|
|
|
|
|
|
$str1pos, $rd1pos-$str1pos, # STR/PAT |
804
|
|
|
|
|
|
|
$rd1pos, $ld2pos-$rd1pos, # RIGHT DEL |
805
|
|
|
|
|
|
|
$ld2pos, 0, # NO 2ND LDEL |
806
|
|
|
|
|
|
|
$ld2pos, 0, # NO 2ND STR |
807
|
|
|
|
|
|
|
$ld2pos, 0, # NO 2ND RDEL |
808
|
|
|
|
|
|
|
$ld2pos, 0, # NO MODIFIERS |
809
|
|
|
|
|
|
|
$ld2pos, $textlen-$ld2pos, # REMAINDER |
810
|
|
|
|
|
|
|
$extrapos, $str1pos-$extrapos, # FILLETED BIT |
811
|
|
|
|
|
|
|
); |
812
|
|
|
|
|
|
|
} |
813
|
|
|
|
|
|
|
|
814
|
54
|
|
|
|
|
95
|
$$textref =~ m/\G\s*/gc; |
815
|
54
|
|
|
|
|
70
|
$ld1pos = pos($$textref); |
816
|
54
|
|
|
|
|
69
|
$str1pos = $ld1pos+1; |
817
|
|
|
|
|
|
|
|
818
|
54
|
50
|
|
|
|
164
|
if ($$textref !~ m/\G(\S)/gc) # SHOULD USE LOOKAHEAD |
|
|
100
|
|
|
|
|
|
819
|
|
|
|
|
|
|
{ |
820
|
0
|
|
|
|
|
0
|
_failmsg "No block delimiter found after quotelike $op", |
821
|
|
|
|
|
|
|
pos $$textref; |
822
|
0
|
|
|
|
|
0
|
pos $$textref = $startpos; |
823
|
0
|
|
|
|
|
0
|
return; |
824
|
|
|
|
|
|
|
} |
825
|
|
|
|
|
|
|
elsif (substr($$textref, $ld1pos, 2) eq '=>') |
826
|
|
|
|
|
|
|
{ |
827
|
1
|
|
|
|
|
4
|
_failmsg "quotelike $op was actually quoted by '=>'", |
828
|
|
|
|
|
|
|
pos $$textref; |
829
|
1
|
|
|
|
|
3
|
pos $$textref = $startpos; |
830
|
1
|
|
|
|
|
2
|
return; |
831
|
|
|
|
|
|
|
} |
832
|
53
|
|
|
|
|
88
|
pos($$textref) = $ld1pos; # HAVE TO DO THIS BECAUSE LOOKAHEAD BROKEN |
833
|
53
|
|
|
|
|
136
|
my ($ldel1, $rdel1) = ("\Q$1","\Q$1"); |
834
|
53
|
100
|
|
|
|
123
|
if ($ldel1 =~ /[[(<{]/) |
835
|
|
|
|
|
|
|
{ |
836
|
33
|
|
|
|
|
52
|
$rdel1 =~ tr/[({])}>/; |
837
|
|
|
|
|
|
|
defined(_match_bracketed($textref,qr/\G/,qr/\G($ldel1)/,"","",qr/\G($rdel1)/)) |
838
|
33
|
50
|
|
|
|
279
|
|| do { pos $$textref = $startpos; return }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
839
|
33
|
|
|
|
|
73
|
$ld2pos = pos($$textref); |
840
|
33
|
|
|
|
|
52
|
$rd1pos = $ld2pos-1; |
841
|
|
|
|
|
|
|
} |
842
|
|
|
|
|
|
|
else |
843
|
|
|
|
|
|
|
{ |
844
|
|
|
|
|
|
|
$$textref =~ /\G$ldel1[^\\$ldel1]*(\\.[^\\$ldel1]*)*$ldel1/gcs |
845
|
20
|
50
|
|
|
|
206
|
|| do { pos $$textref = $startpos; return }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
846
|
20
|
|
|
|
|
43
|
$ld2pos = $rd1pos = pos($$textref)-1; |
847
|
|
|
|
|
|
|
} |
848
|
|
|
|
|
|
|
|
849
|
53
|
100
|
|
|
|
166
|
my $second_arg = $op =~ /s|tr|y/ ? 1 : 0; |
850
|
53
|
100
|
|
|
|
83
|
if ($second_arg) |
851
|
|
|
|
|
|
|
{ |
852
|
23
|
|
|
|
|
28
|
my ($ldel2, $rdel2); |
853
|
23
|
100
|
|
|
|
43
|
if ($ldel1 =~ /[[(<{]/) |
854
|
|
|
|
|
|
|
{ |
855
|
11
|
50
|
|
|
|
30
|
unless ($$textref =~ /\G\s*(\S)/gc) # SHOULD USE LOOKAHEAD |
856
|
|
|
|
|
|
|
{ |
857
|
0
|
|
|
|
|
0
|
_failmsg "Missing second block for quotelike $op", |
858
|
|
|
|
|
|
|
pos $$textref; |
859
|
0
|
|
|
|
|
0
|
pos $$textref = $startpos; |
860
|
0
|
|
|
|
|
0
|
return; |
861
|
|
|
|
|
|
|
} |
862
|
11
|
|
|
|
|
22
|
$ldel2 = $rdel2 = "\Q$1"; |
863
|
11
|
|
|
|
|
13
|
$rdel2 =~ tr/[({])}>/; |
864
|
|
|
|
|
|
|
} |
865
|
|
|
|
|
|
|
else |
866
|
|
|
|
|
|
|
{ |
867
|
12
|
|
|
|
|
19
|
$ldel2 = $rdel2 = $ldel1; |
868
|
|
|
|
|
|
|
} |
869
|
23
|
|
|
|
|
46
|
$str2pos = $ld2pos+1; |
870
|
|
|
|
|
|
|
|
871
|
23
|
100
|
|
|
|
57
|
if ($ldel2 =~ /[[(<{]/) |
872
|
|
|
|
|
|
|
{ |
873
|
9
|
|
|
|
|
18
|
pos($$textref)--; # OVERCOME BROKEN LOOKAHEAD |
874
|
|
|
|
|
|
|
defined(_match_bracketed($textref,qr/\G/,qr/\G($ldel2)/,"","",qr/\G($rdel2)/)) |
875
|
9
|
50
|
|
|
|
58
|
|| do { pos $$textref = $startpos; return }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
876
|
|
|
|
|
|
|
} |
877
|
|
|
|
|
|
|
else |
878
|
|
|
|
|
|
|
{ |
879
|
|
|
|
|
|
|
$$textref =~ /[^\\$ldel2]*(\\.[^\\$ldel2]*)*$ldel2/gcs |
880
|
14
|
50
|
|
|
|
107
|
|| do { pos $$textref = $startpos; return }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
881
|
|
|
|
|
|
|
} |
882
|
23
|
|
|
|
|
41
|
$rd2pos = pos($$textref)-1; |
883
|
|
|
|
|
|
|
} |
884
|
|
|
|
|
|
|
else |
885
|
|
|
|
|
|
|
{ |
886
|
30
|
|
|
|
|
37
|
$ld2pos = $str2pos = $rd2pos = $rd1pos; |
887
|
|
|
|
|
|
|
} |
888
|
|
|
|
|
|
|
|
889
|
53
|
|
|
|
|
59
|
$modpos = pos $$textref; |
890
|
|
|
|
|
|
|
|
891
|
53
|
|
|
|
|
293
|
$$textref =~ m/\G($mods{$op})/gc; |
892
|
53
|
|
|
|
|
103
|
my $endpos = pos $$textref; |
893
|
53
|
|
|
|
|
109
|
$ref2qmarkvalid{$textref} = $ref2slashvalid{$textref} = 0; |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
return ( |
896
|
53
|
|
|
|
|
196
|
$startpos, $oppos-$startpos, # PREFIX |
897
|
|
|
|
|
|
|
$oppos, length($op), # OPERATOR |
898
|
|
|
|
|
|
|
$ld1pos, 1, # LEFT DEL |
899
|
|
|
|
|
|
|
$str1pos, $rd1pos-$str1pos, # STR/PAT |
900
|
|
|
|
|
|
|
$rd1pos, 1, # RIGHT DEL |
901
|
|
|
|
|
|
|
$ld2pos, $second_arg, # 2ND LDEL (MAYBE) |
902
|
|
|
|
|
|
|
$str2pos, $rd2pos-$str2pos, # 2ND STR (MAYBE) |
903
|
|
|
|
|
|
|
$rd2pos, $second_arg, # 2ND RDEL (MAYBE) |
904
|
|
|
|
|
|
|
$modpos, $endpos-$modpos, # MODIFIERS |
905
|
|
|
|
|
|
|
$endpos, $textlen-$endpos, # REMAINDER |
906
|
|
|
|
|
|
|
); |
907
|
|
|
|
|
|
|
} |
908
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
my $def_func = [ |
910
|
|
|
|
|
|
|
sub { extract_variable($_[0], '') }, |
911
|
|
|
|
|
|
|
sub { extract_quotelike($_[0],'') }, |
912
|
|
|
|
|
|
|
sub { extract_codeblock($_[0],'{}','') }, |
913
|
|
|
|
|
|
|
]; |
914
|
|
|
|
|
|
|
my %ref_not_regex = map +($_=>1), qw(CODE Text::Balanced::Extractor); |
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
sub _update_patvalid { |
917
|
563
|
|
|
563
|
|
1110
|
my ($textref, $text) = @_; |
918
|
563
|
100
|
100
|
|
|
6178
|
if ($ref2slashvalid{$textref} && $text =~ m/(?:$RE_NUM|[\)\]])\s*$/) |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
919
|
|
|
|
|
|
|
{ |
920
|
25
|
|
|
|
|
81
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 0; |
921
|
|
|
|
|
|
|
} elsif (!$ref2slashvalid{$textref} && $text =~ m/$RE_PREREGEX_PAT\s*$/) |
922
|
|
|
|
|
|
|
{ |
923
|
1
|
|
|
|
|
4
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 1; |
924
|
|
|
|
|
|
|
} elsif (!$ref2slashvalid{$textref} && $text =~ m/$RE_EXPR_PAT\s*$/) |
925
|
|
|
|
|
|
|
{ |
926
|
45
|
|
|
|
|
86
|
$ref2slashvalid{$textref} = 1; |
927
|
45
|
|
|
|
|
137
|
$ref2qmarkvalid{$textref} = 0; |
928
|
|
|
|
|
|
|
} |
929
|
|
|
|
|
|
|
} |
930
|
|
|
|
|
|
|
sub extract_multiple (;$$$$) # ($text, $functions_ref, $max_fields, $ignoreunknown) |
931
|
|
|
|
|
|
|
{ |
932
|
40
|
100
|
|
40
|
1
|
4133
|
my $textref = defined($_[0]) ? \$_[0] : \$_; |
933
|
40
|
50
|
|
|
|
137
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
934
|
40
|
|
|
|
|
51
|
my $posbug = pos; |
935
|
40
|
|
|
|
|
51
|
my ($lastpos, $firstpos); |
936
|
40
|
|
|
|
|
56
|
my @fields = (); |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
#for ($$textref) |
939
|
|
|
|
|
|
|
{ |
940
|
40
|
100
|
|
|
|
47
|
my @func = defined $_[1] ? @{$_[1]} : @{$def_func}; |
|
40
|
|
|
|
|
56
|
|
|
28
|
|
|
|
|
50
|
|
|
12
|
|
|
|
|
24
|
|
941
|
40
|
100
|
66
|
|
|
104
|
my $max = defined $_[2] && $_[2]>0 ? $_[2] : 1_000_000_000; |
942
|
40
|
|
|
|
|
46
|
my $igunk = $_[3]; |
943
|
|
|
|
|
|
|
|
944
|
40
|
|
50
|
|
|
190
|
pos $$textref ||= 0; |
945
|
|
|
|
|
|
|
|
946
|
40
|
100
|
|
|
|
88
|
unless (wantarray) |
947
|
|
|
|
|
|
|
{ |
948
|
9
|
|
|
9
|
|
87
|
use Carp; |
|
9
|
|
|
|
|
18
|
|
|
9
|
|
|
|
|
8289
|
|
949
|
14
|
0
|
33
|
|
|
36
|
carp "extract_multiple reset maximal count to 1 in scalar context" |
|
|
|
33
|
|
|
|
|
950
|
|
|
|
|
|
|
if $^W && defined($_[2]) && $max > 1; |
951
|
14
|
|
|
|
|
19
|
$max = 1 |
952
|
|
|
|
|
|
|
} |
953
|
|
|
|
|
|
|
|
954
|
40
|
|
|
|
|
47
|
my @class; |
955
|
40
|
|
|
|
|
57
|
foreach my $func ( @func ) |
956
|
|
|
|
|
|
|
{ |
957
|
79
|
|
|
|
|
88
|
push @class, undef; |
958
|
79
|
100
|
|
|
|
136
|
($class[-1], $func) = %$func if ref($func) eq 'HASH'; |
959
|
79
|
100
|
|
|
|
275
|
$func = qr/\G$func/ if !$ref_not_regex{ref $func}; |
960
|
|
|
|
|
|
|
} |
961
|
|
|
|
|
|
|
|
962
|
40
|
|
|
|
|
48
|
my $unkpos; |
963
|
40
|
|
|
|
|
81
|
FIELD: while (pos($$textref) < length($$textref)) |
964
|
|
|
|
|
|
|
{ |
965
|
566
|
|
|
|
|
1131
|
foreach my $i ( 0..$#func ) |
966
|
|
|
|
|
|
|
{ |
967
|
1082
|
|
|
|
|
1252
|
my ($field, $pref); |
968
|
1082
|
|
|
|
|
1503
|
my ($class, $func) = ($class[$i], $func[$i]); |
969
|
1082
|
|
|
|
|
1147
|
$lastpos = pos $$textref; |
970
|
1082
|
100
|
|
|
|
2098
|
if (ref($func) eq 'CODE') |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
971
|
923
|
|
|
|
|
1254
|
{ ($field,undef,$pref) = $func->($$textref) } |
972
|
|
|
|
|
|
|
elsif (ref($func) eq 'Text::Balanced::Extractor') |
973
|
0
|
|
|
|
|
0
|
{ $field = $func->extract($$textref) } |
974
|
|
|
|
|
|
|
elsif( $$textref =~ m/$func[$i]/gc ) |
975
|
33
|
100
|
|
|
|
128
|
{ $field = defined($1) |
976
|
|
|
|
|
|
|
? $1 |
977
|
|
|
|
|
|
|
: substr($$textref, $-[0], $+[0] - $-[0]) |
978
|
|
|
|
|
|
|
} |
979
|
1082
|
|
100
|
|
|
3116
|
$pref ||= ""; |
980
|
1082
|
100
|
100
|
|
|
2079
|
if (defined($field) && length($field)) |
981
|
|
|
|
|
|
|
{ |
982
|
105
|
100
|
|
|
|
163
|
if (!$igunk) { |
983
|
98
|
100
|
100
|
|
|
184
|
$unkpos = $lastpos |
984
|
|
|
|
|
|
|
if length($pref) && !defined($unkpos); |
985
|
98
|
100
|
|
|
|
145
|
if (defined $unkpos) |
986
|
|
|
|
|
|
|
{ |
987
|
70
|
|
|
|
|
136
|
push @fields, substr($$textref, $unkpos, $lastpos-$unkpos).$pref; |
988
|
70
|
100
|
|
|
|
116
|
$firstpos = $unkpos unless defined $firstpos; |
989
|
70
|
|
|
|
|
75
|
undef $unkpos; |
990
|
70
|
100
|
|
|
|
123
|
last FIELD if @fields == $max; |
991
|
|
|
|
|
|
|
} |
992
|
|
|
|
|
|
|
} |
993
|
102
|
50
|
|
|
|
396
|
push @fields, $class ? bless(\$field, $class) : $field; |
994
|
102
|
|
|
|
|
198
|
_update_patvalid($textref, $fields[-1]); |
995
|
102
|
100
|
|
|
|
193
|
$firstpos = $lastpos unless defined $firstpos; |
996
|
102
|
|
|
|
|
132
|
$lastpos = pos $$textref; |
997
|
102
|
100
|
|
|
|
171
|
last FIELD if @fields == $max; |
998
|
87
|
|
|
|
|
222
|
next FIELD; |
999
|
|
|
|
|
|
|
} |
1000
|
|
|
|
|
|
|
} |
1001
|
461
|
50
|
|
|
|
1185
|
if ($$textref =~ /\G(.)/gcs) |
1002
|
|
|
|
|
|
|
{ |
1003
|
461
|
100
|
100
|
|
|
1030
|
$unkpos = pos($$textref)-1 |
1004
|
|
|
|
|
|
|
unless $igunk || defined $unkpos; |
1005
|
461
|
|
|
|
|
909
|
_update_patvalid($textref, substr $$textref, $unkpos, pos($$textref)-$unkpos); |
1006
|
|
|
|
|
|
|
} |
1007
|
|
|
|
|
|
|
} |
1008
|
|
|
|
|
|
|
|
1009
|
40
|
100
|
|
|
|
70
|
if (defined $unkpos) |
1010
|
|
|
|
|
|
|
{ |
1011
|
17
|
|
|
|
|
30
|
push @fields, substr($$textref, $unkpos); |
1012
|
17
|
100
|
|
|
|
33
|
$firstpos = $unkpos unless defined $firstpos; |
1013
|
17
|
|
|
|
|
18
|
$lastpos = length $$textref; |
1014
|
|
|
|
|
|
|
} |
1015
|
40
|
|
|
|
|
75
|
last; |
1016
|
|
|
|
|
|
|
} |
1017
|
|
|
|
|
|
|
|
1018
|
40
|
|
|
|
|
94
|
pos $$textref = $lastpos; |
1019
|
40
|
100
|
|
|
|
297
|
return @fields if wantarray; |
1020
|
|
|
|
|
|
|
|
1021
|
14
|
|
100
|
|
|
42
|
$firstpos ||= 0; |
1022
|
14
|
|
|
|
|
17
|
eval { substr($$textref,$firstpos,$lastpos-$firstpos)=""; |
|
14
|
|
|
|
|
30
|
|
1023
|
14
|
|
|
|
|
25
|
pos $$textref = $firstpos }; |
1024
|
14
|
|
|
|
|
79
|
return $fields[0]; |
1025
|
|
|
|
|
|
|
} |
1026
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
sub gen_extract_tagged # ($opentag, $closetag, $pre, \%options) |
1028
|
|
|
|
|
|
|
{ |
1029
|
13
|
|
|
13
|
1
|
10996
|
my $ldel = $_[0]; |
1030
|
13
|
|
|
|
|
17
|
my $rdel = $_[1]; |
1031
|
13
|
100
|
|
|
|
59
|
my $pre = defined $_[2] ? qr/\G$_[2]/ : qr/\G\s*/; |
1032
|
13
|
100
|
|
|
|
31
|
my %options = defined $_[3] ? %{$_[3]} : (); |
|
6
|
|
|
|
|
21
|
|
1033
|
13
|
100
|
|
|
|
29
|
my $omode = defined $options{fail} ? $options{fail} : ''; |
1034
|
3
|
|
|
|
|
8
|
my $bad = ref($options{reject}) eq 'ARRAY' ? join('|', @{$options{reject}}) |
1035
|
|
|
|
|
|
|
: defined($options{reject}) ? $options{reject} |
1036
|
13
|
50
|
|
|
|
36
|
: '' |
|
|
100
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
; |
1038
|
3
|
|
|
|
|
6
|
my $ignore = ref($options{ignore}) eq 'ARRAY' ? join('|', @{$options{ignore}}) |
1039
|
|
|
|
|
|
|
: defined($options{ignore}) ? $options{ignore} |
1040
|
13
|
50
|
|
|
|
35
|
: '' |
|
|
100
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
; |
1042
|
|
|
|
|
|
|
|
1043
|
13
|
100
|
|
|
|
30
|
$ldel = $et_default_ldel if !defined $ldel; |
1044
|
|
|
|
|
|
|
|
1045
|
13
|
|
|
|
|
18
|
my $posbug = pos; |
1046
|
13
|
100
|
|
|
|
20
|
for ($ldel, $bad, $ignore) { $_ = qr/$_/ if $_ } |
|
39
|
|
|
|
|
283
|
|
1047
|
13
|
|
|
|
|
26
|
pos = $posbug; |
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
my $closure = sub |
1050
|
|
|
|
|
|
|
{ |
1051
|
40
|
50
|
|
40
|
|
27876
|
my $textref = defined $_[0] ? \$_[0] : \$_; |
1052
|
40
|
|
|
|
|
95
|
my @match = _match_tagged($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore); |
1053
|
|
|
|
|
|
|
|
1054
|
40
|
100
|
|
|
|
101
|
return _fail(wantarray, $textref) unless @match; |
1055
|
28
|
|
|
|
|
99
|
return _succeed wantarray, $textref, |
1056
|
|
|
|
|
|
|
$match[2], $match[3]+$match[5]+$match[7], # MATCH |
1057
|
|
|
|
|
|
|
@match[8..9,0..1,2..7]; # REM, PRE, BITS |
1058
|
13
|
|
|
|
|
57
|
}; |
1059
|
|
|
|
|
|
|
|
1060
|
13
|
|
|
|
|
95
|
bless $closure, 'Text::Balanced::Extractor'; |
1061
|
|
|
|
|
|
|
} |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
package Text::Balanced::Extractor; |
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
sub extract($$) # ($self, $text) |
1066
|
|
|
|
|
|
|
{ |
1067
|
0
|
|
|
0
|
|
|
&{$_[0]}($_[1]); |
|
0
|
|
|
|
|
|
|
1068
|
|
|
|
|
|
|
} |
1069
|
|
|
|
|
|
|
|
1070
|
|
|
|
|
|
|
package Text::Balanced::ErrorMsg; |
1071
|
|
|
|
|
|
|
|
1072
|
|
|
|
|
|
|
use overload |
1073
|
33
|
|
|
33
|
|
128
|
'""' => sub { "$_[0]->{error}, detected at offset $_[0]->{pos}" }, |
1074
|
9
|
|
|
9
|
|
8236
|
fallback => 1; |
|
9
|
|
|
|
|
6721
|
|
|
9
|
|
|
|
|
76
|
|
1075
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
1; |
1077
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
__END__ |