line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# Copyright (C) 1997-2001 Damian Conway. All rights reserved. |
2
|
|
|
|
|
|
|
# Copyright (C) 2009 Adam Kennedy. |
3
|
|
|
|
|
|
|
# Copyright (C) 2015, 2022 Steve Hay and other contributors. All rights |
4
|
|
|
|
|
|
|
# reserved. |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
# This module is free software; you can redistribute it and/or modify it under |
7
|
|
|
|
|
|
|
# the same terms as Perl itself, i.e. under the terms of either the GNU General |
8
|
|
|
|
|
|
|
# Public License or the Artistic License, as specified in the F file. |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
package Text::Balanced; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# EXTRACT VARIOUSLY DELIMITED TEXT SEQUENCES FROM STRINGS. |
13
|
|
|
|
|
|
|
# FOR FULL DOCUMENTATION SEE Balanced.pod |
14
|
|
|
|
|
|
|
|
15
|
9
|
|
|
9
|
|
396052
|
use 5.008001; |
|
9
|
|
|
|
|
89
|
|
16
|
9
|
|
|
9
|
|
39
|
use strict; |
|
9
|
|
|
|
|
21
|
|
|
9
|
|
|
|
|
169
|
|
17
|
9
|
|
|
9
|
|
41
|
use Exporter (); |
|
9
|
|
|
|
|
12
|
|
|
9
|
|
|
|
|
163
|
|
18
|
|
|
|
|
|
|
|
19
|
9
|
|
|
9
|
|
52
|
use vars qw { $VERSION @ISA %EXPORT_TAGS }; |
|
9
|
|
|
|
|
56
|
|
|
9
|
|
|
|
|
916
|
|
20
|
|
|
|
|
|
|
BEGIN { |
21
|
9
|
|
|
9
|
|
37
|
$VERSION = '2.06'; |
22
|
9
|
|
|
|
|
118
|
@ISA = 'Exporter'; |
23
|
9
|
|
|
|
|
22018
|
%EXPORT_TAGS = ( |
24
|
|
|
|
|
|
|
ALL => [ qw{ |
25
|
|
|
|
|
|
|
&extract_delimited |
26
|
|
|
|
|
|
|
&extract_bracketed |
27
|
|
|
|
|
|
|
&extract_quotelike |
28
|
|
|
|
|
|
|
&extract_codeblock |
29
|
|
|
|
|
|
|
&extract_variable |
30
|
|
|
|
|
|
|
&extract_tagged |
31
|
|
|
|
|
|
|
&extract_multiple |
32
|
|
|
|
|
|
|
&gen_delimited_pat |
33
|
|
|
|
|
|
|
&gen_extract_tagged |
34
|
|
|
|
|
|
|
&delimited_pat |
35
|
|
|
|
|
|
|
} ], |
36
|
|
|
|
|
|
|
); |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
Exporter::export_ok_tags('ALL'); |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
our $RE_PREREGEX_PAT = qr#( |
42
|
|
|
|
|
|
|
[!=]~ |
43
|
|
|
|
|
|
|
| split|grep|map |
44
|
|
|
|
|
|
|
| not|and|or|xor |
45
|
|
|
|
|
|
|
)#x; |
46
|
|
|
|
|
|
|
our $RE_EXPR_PAT = qr#( |
47
|
|
|
|
|
|
|
(?:\*\*|&&|\|\||<<|>>|//|[-+*x%^&|.])=? |
48
|
|
|
|
|
|
|
| /(?:[^/]) |
49
|
|
|
|
|
|
|
| =(?!>) |
50
|
|
|
|
|
|
|
| return |
51
|
|
|
|
|
|
|
| [\(\[] |
52
|
|
|
|
|
|
|
)#x; |
53
|
|
|
|
|
|
|
our $RE_NUM = qr/\s*[+\-.0-9][+\-.0-9e]*/i; # numerical constant |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
our %ref2slashvalid; # is quotelike /.../ pattern valid here for given textref? |
56
|
|
|
|
|
|
|
our %ref2qmarkvalid; # is quotelike ?...? pattern valid here for given textref? |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# HANDLE RETURN VALUES IN VARIOUS CONTEXTS |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
sub _failmsg { |
61
|
2697
|
|
|
2697
|
|
4433
|
my ($message, $pos) = @_; |
62
|
2697
|
|
|
|
|
6337
|
$@ = bless { |
63
|
|
|
|
|
|
|
error => $message, |
64
|
|
|
|
|
|
|
pos => $pos, |
65
|
|
|
|
|
|
|
}, 'Text::Balanced::ErrorMsg'; |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sub _fail { |
69
|
941
|
|
|
941
|
|
1365
|
my ($wantarray, $textref, $message, $pos) = @_; |
70
|
941
|
100
|
|
|
|
1337
|
_failmsg $message, $pos if $message; |
71
|
941
|
100
|
|
|
|
2660
|
return (undef, $$textref, undef) if $wantarray; |
72
|
20
|
|
|
|
|
130
|
return; |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
sub _succeed { |
76
|
511
|
|
|
511
|
|
696
|
$@ = undef; |
77
|
511
|
|
|
|
|
1124
|
my ($wantarray,$textref) = splice @_, 0, 2; |
78
|
511
|
100
|
|
|
|
1091
|
my ($extrapos, $extralen) = @_ > 18 |
79
|
|
|
|
|
|
|
? splice(@_, -2, 2) |
80
|
|
|
|
|
|
|
: (0, 0); |
81
|
511
|
|
|
|
|
793
|
my ($startlen, $oppos) = @_[5,6]; |
82
|
511
|
|
|
|
|
621
|
my $remainderpos = $_[2]; |
83
|
511
|
100
|
|
|
|
768
|
if ( $wantarray ) { |
84
|
294
|
|
|
|
|
307
|
my @res; |
85
|
294
|
|
|
|
|
684
|
while (my ($from, $len) = splice @_, 0, 2) { |
86
|
1455
|
|
|
|
|
3357
|
push @res, substr($$textref, $from, $len); |
87
|
|
|
|
|
|
|
} |
88
|
294
|
100
|
|
|
|
561
|
if ( $extralen ) { # CORRECT FILLET |
89
|
13
|
|
|
|
|
43
|
my $extra = substr($res[0], $extrapos-$oppos, $extralen, "\n"); |
90
|
13
|
|
|
|
|
25
|
$res[1] = "$extra$res[1]"; |
91
|
13
|
|
|
|
|
23
|
eval { substr($$textref,$remainderpos,0) = $extra; |
|
13
|
|
|
|
|
43
|
|
92
|
12
|
|
|
|
|
31
|
substr($$textref,$extrapos,$extralen,"\n")} ; |
93
|
|
|
|
|
|
|
#REARRANGE HERE DOC AND FILLET IF POSSIBLE |
94
|
13
|
|
|
|
|
41
|
pos($$textref) = $remainderpos-$extralen+1; # RESET \G |
95
|
|
|
|
|
|
|
} else { |
96
|
281
|
|
|
|
|
501
|
pos($$textref) = $remainderpos; # RESET \G |
97
|
|
|
|
|
|
|
} |
98
|
294
|
|
|
|
|
2521
|
return @res; |
99
|
|
|
|
|
|
|
} else { |
100
|
217
|
|
|
|
|
442
|
my $match = substr($$textref,$_[0],$_[1]); |
101
|
217
|
100
|
|
|
|
449
|
substr($match,$extrapos-$_[0]-$startlen,$extralen,"") if $extralen; |
102
|
217
|
100
|
|
|
|
466
|
my $extra = $extralen |
103
|
|
|
|
|
|
|
? substr($$textref, $extrapos, $extralen)."\n" : ""; |
104
|
217
|
|
|
|
|
326
|
eval {substr($$textref,$_[4],$_[1]+$_[5])=$extra} ; #CHOP OUT PREFIX & MATCH, IF POSSIBLE |
|
217
|
|
|
|
|
598
|
|
105
|
217
|
|
|
|
|
421
|
pos($$textref) = $_[4]; # RESET \G |
106
|
217
|
|
|
|
|
1788
|
return $match; |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# BUILD A PATTERN MATCHING A SIMPLE DELIMITED STRING |
111
|
|
|
|
|
|
|
## no critic (Subroutines::ProhibitSubroutinePrototypes) |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
sub gen_delimited_pat($;$) # ($delimiters;$escapes) |
114
|
|
|
|
|
|
|
{ |
115
|
59
|
|
|
59
|
1
|
116
|
my ($dels, $escs) = @_; |
116
|
59
|
50
|
|
|
|
206
|
return "" unless $dels =~ /\S/; |
117
|
59
|
100
|
|
|
|
110
|
$escs = '\\' unless $escs; |
118
|
59
|
|
|
|
|
140
|
$escs .= substr($escs,-1) x (length($dels)-length($escs)); |
119
|
59
|
|
|
|
|
77
|
my @pat = (); |
120
|
59
|
|
|
|
|
88
|
my $i; |
121
|
59
|
|
|
|
|
130
|
for ($i=0; $i
|
122
|
|
|
|
|
|
|
{ |
123
|
161
|
|
|
|
|
272
|
my $del = quotemeta substr($dels,$i,1); |
124
|
161
|
|
|
|
|
199
|
my $esc = quotemeta substr($escs,$i,1); |
125
|
161
|
100
|
|
|
|
257
|
if ($del eq $esc) |
126
|
|
|
|
|
|
|
{ |
127
|
24
|
|
|
|
|
69
|
push @pat, "$del(?:[^$del]*(?:(?:$del$del)[^$del]*)*)$del"; |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
else |
130
|
|
|
|
|
|
|
{ |
131
|
137
|
|
|
|
|
389
|
push @pat, "$del(?:[^$esc$del]*(?:$esc.[^$esc$del]*)*)$del"; |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
} |
134
|
59
|
|
|
|
|
134
|
my $pat = join '|', @pat; |
135
|
59
|
|
|
|
|
178
|
return "(?:$pat)"; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
*delimited_pat = \&gen_delimited_pat; |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# THE EXTRACTION FUNCTIONS |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
sub extract_delimited (;$$$$) |
143
|
|
|
|
|
|
|
{ |
144
|
50
|
50
|
|
50
|
1
|
41984
|
my $textref = defined $_[0] ? \$_[0] : \$_; |
145
|
50
|
100
|
|
|
|
167
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
146
|
50
|
|
|
|
|
102
|
my $wantarray = wantarray; |
147
|
50
|
100
|
|
|
|
79
|
my $del = defined $_[1] ? $_[1] : qq{\'\"\`}; |
148
|
50
|
100
|
|
|
|
70
|
my $pre = defined $_[2] ? $_[2] : '\s*'; |
149
|
50
|
100
|
|
|
|
68
|
my $esc = defined $_[3] ? $_[3] : qq{\\}; |
150
|
50
|
|
|
|
|
85
|
my $pat = gen_delimited_pat($del, $esc); |
151
|
50
|
|
100
|
|
|
151
|
my $startpos = pos $$textref || 0; |
152
|
50
|
100
|
|
|
|
541
|
return _fail($wantarray, $textref, "Not a delimited pattern", 0) |
153
|
|
|
|
|
|
|
unless $$textref =~ m/\G($pre)($pat)/gc; |
154
|
43
|
|
|
|
|
90
|
my $prelen = length($1); |
155
|
43
|
|
|
|
|
49
|
my $matchpos = $startpos+$prelen; |
156
|
43
|
|
|
|
|
50
|
my $endpos = pos $$textref; |
157
|
43
|
|
|
|
|
89
|
return _succeed $wantarray, $textref, |
158
|
|
|
|
|
|
|
$matchpos, $endpos-$matchpos, # MATCH |
159
|
|
|
|
|
|
|
$endpos, length($$textref)-$endpos, # REMAINDER |
160
|
|
|
|
|
|
|
$startpos, $prelen; # PREFIX |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
my %eb_delim_cache; |
164
|
|
|
|
|
|
|
sub _eb_delims { |
165
|
51
|
|
|
51
|
|
69
|
my ($ldel_orig) = @_; |
166
|
51
|
100
|
|
|
|
86
|
return @{ $eb_delim_cache{$ldel_orig} } if $eb_delim_cache{$ldel_orig}; |
|
45
|
|
|
|
|
91
|
|
167
|
6
|
|
|
|
|
10
|
my $qdel = ""; |
168
|
6
|
|
|
|
|
13
|
my $quotelike; |
169
|
6
|
|
|
|
|
9
|
my $ldel = $ldel_orig; |
170
|
6
|
50
|
|
|
|
14
|
$ldel =~ s/'//g and $qdel .= q{'}; |
171
|
6
|
100
|
|
|
|
17
|
$ldel =~ s/"//g and $qdel .= q{"}; |
172
|
6
|
100
|
|
|
|
15
|
$ldel =~ s/`//g and $qdel .= q{`}; |
173
|
6
|
100
|
|
|
|
13
|
$ldel =~ s/q//g and $quotelike = 1; |
174
|
6
|
|
|
|
|
9
|
$ldel =~ tr/[](){}<>\0-\377/[[(({{<
|
175
|
6
|
|
|
|
|
8
|
my $rdel = $ldel; |
176
|
6
|
50
|
|
|
|
11
|
return @{ $eb_delim_cache{$ldel_orig} = [] } unless $rdel =~ tr/[({])}>/; |
|
0
|
|
|
|
|
0
|
|
177
|
6
|
|
|
|
|
9
|
my $posbug = pos; |
178
|
6
|
|
|
|
|
16
|
$ldel = join('|', map { quotemeta $_ } split('', $ldel)); |
|
12
|
|
|
|
|
27
|
|
179
|
6
|
|
|
|
|
13
|
$rdel = join('|', map { quotemeta $_ } split('', $rdel)); |
|
12
|
|
|
|
|
21
|
|
180
|
6
|
|
|
|
|
15
|
pos = $posbug; |
181
|
6
|
|
66
|
|
|
12
|
@{ $eb_delim_cache{$ldel_orig} = [ |
|
6
|
|
|
|
|
169
|
|
182
|
|
|
|
|
|
|
qr/\G($ldel)/, $qdel && qr/\G([$qdel])/, $quotelike, qr/\G($rdel)/ |
183
|
|
|
|
|
|
|
] }; |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
sub extract_bracketed (;$$$) |
186
|
|
|
|
|
|
|
{ |
187
|
51
|
50
|
|
51
|
1
|
9310
|
my $textref = defined $_[0] ? \$_[0] : \$_; |
188
|
51
|
100
|
|
|
|
147
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
189
|
51
|
100
|
|
|
|
81
|
my $ldel = defined $_[1] ? $_[1] : '{([<'; |
190
|
51
|
50
|
|
|
|
132
|
my $pre = defined $_[2] ? qr/\G$_[2]/ : qr/\G\s*/; |
191
|
51
|
|
|
|
|
67
|
my $wantarray = wantarray; |
192
|
51
|
|
|
|
|
76
|
my @ret = _eb_delims($ldel); |
193
|
51
|
50
|
|
|
|
97
|
unless (@ret) |
194
|
|
|
|
|
|
|
{ |
195
|
0
|
|
|
|
|
0
|
return _fail $wantarray, $textref, |
196
|
|
|
|
|
|
|
"Did not find a suitable bracket in delimiter: \"$_[1]\"", |
197
|
|
|
|
|
|
|
0; |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
51
|
|
100
|
|
|
109
|
my $startpos = pos $$textref || 0; |
201
|
51
|
|
|
|
|
74
|
my @match = _match_bracketed($textref, $pre, @ret); |
202
|
|
|
|
|
|
|
|
203
|
51
|
100
|
|
|
|
89
|
return _fail ($wantarray, $textref) unless @match; |
204
|
|
|
|
|
|
|
|
205
|
18
|
|
|
|
|
48
|
return _succeed ( $wantarray, $textref, |
206
|
|
|
|
|
|
|
$match[2], $match[5]+2, # MATCH |
207
|
|
|
|
|
|
|
@match[8,9], # REMAINDER |
208
|
|
|
|
|
|
|
@match[0,1], # PREFIX |
209
|
|
|
|
|
|
|
); |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
sub _match_bracketed # $textref, $pre, $ldel, $qdel, $quotelike, $rdel |
213
|
|
|
|
|
|
|
{ |
214
|
93
|
|
|
93
|
|
177
|
my ($textref, $pre, $ldel, $qdel, $quotelike, $rdel) = @_; |
215
|
93
|
|
100
|
|
|
283
|
my ($startpos, $ldelpos, $endpos) = (pos $$textref = pos $$textref||0); |
216
|
93
|
50
|
|
|
|
370
|
unless ($$textref =~ m/$pre/gc) |
217
|
|
|
|
|
|
|
{ |
218
|
0
|
|
|
|
|
0
|
_failmsg "Did not find prefix: /$pre/", $startpos; |
219
|
0
|
|
|
|
|
0
|
return; |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
93
|
|
|
|
|
130
|
$ldelpos = pos $$textref; |
223
|
|
|
|
|
|
|
|
224
|
93
|
100
|
|
|
|
295
|
unless ($$textref =~ m/$ldel/gc) |
225
|
|
|
|
|
|
|
{ |
226
|
31
|
|
|
|
|
92
|
_failmsg "Did not find opening bracket after prefix: \"$pre\"", |
227
|
|
|
|
|
|
|
pos $$textref; |
228
|
31
|
|
|
|
|
65
|
pos $$textref = $startpos; |
229
|
31
|
|
|
|
|
46
|
return; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
62
|
|
|
|
|
145
|
my @nesting = ( $1 ); |
233
|
62
|
|
|
|
|
87
|
my $textlen = length $$textref; |
234
|
62
|
|
|
|
|
127
|
while (pos $$textref < $textlen) |
235
|
|
|
|
|
|
|
{ |
236
|
636
|
100
|
|
|
|
930
|
next if $$textref =~ m/\G\\./gcs; |
237
|
|
|
|
|
|
|
|
238
|
628
|
100
|
100
|
|
|
2574
|
if ($$textref =~ m/$ldel/gc) |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
239
|
|
|
|
|
|
|
{ |
240
|
18
|
|
|
|
|
41
|
push @nesting, $1; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
elsif ($$textref =~ m/$rdel/gc) |
243
|
|
|
|
|
|
|
{ |
244
|
78
|
|
|
|
|
167
|
my ($found, $brackettype) = ($1, $1); |
245
|
78
|
50
|
|
|
|
138
|
if ($#nesting < 0) |
246
|
|
|
|
|
|
|
{ |
247
|
0
|
|
|
|
|
0
|
_failmsg "Unmatched closing bracket: \"$found\"", |
248
|
|
|
|
|
|
|
pos $$textref; |
249
|
0
|
|
|
|
|
0
|
pos $$textref = $startpos; |
250
|
0
|
|
|
|
|
0
|
return; |
251
|
|
|
|
|
|
|
} |
252
|
78
|
|
|
|
|
117
|
my $expected = pop(@nesting); |
253
|
78
|
|
|
|
|
108
|
$expected =~ tr/({[)}]>/; |
254
|
78
|
50
|
|
|
|
138
|
if ($expected ne $brackettype) |
255
|
|
|
|
|
|
|
{ |
256
|
0
|
|
|
|
|
0
|
_failmsg qq{Mismatched closing bracket: expected "$expected" but found "$found"}, |
257
|
|
|
|
|
|
|
pos $$textref; |
258
|
0
|
|
|
|
|
0
|
pos $$textref = $startpos; |
259
|
0
|
|
|
|
|
0
|
return; |
260
|
|
|
|
|
|
|
} |
261
|
78
|
100
|
|
|
|
171
|
last if $#nesting < 0; |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
elsif ($qdel && $$textref =~ m/$qdel/gc) |
264
|
|
|
|
|
|
|
{ |
265
|
10
|
50
|
|
|
|
210
|
$$textref =~ m/\G[^\\$1]*(?:\\.[^\\$1]*)*(\Q$1\E)/gsc and next; |
266
|
0
|
|
|
|
|
0
|
_failmsg "Unmatched embedded quote ($1)", |
267
|
|
|
|
|
|
|
pos $$textref; |
268
|
0
|
|
|
|
|
0
|
pos $$textref = $startpos; |
269
|
0
|
|
|
|
|
0
|
return; |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
elsif ($quotelike && _match_quotelike($textref,qr/\G()/,$ref2slashvalid{$textref},$ref2qmarkvalid{$textref})) |
272
|
|
|
|
|
|
|
{ |
273
|
4
|
|
|
|
|
7
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 1; # back-compat |
274
|
4
|
|
|
|
|
11
|
next; |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
|
277
|
518
|
|
|
|
|
1118
|
else { $$textref =~ m/\G(?:[a-zA-Z0-9]+|.)/gcs } |
278
|
|
|
|
|
|
|
} |
279
|
62
|
100
|
|
|
|
109
|
if ($#nesting>=0) |
280
|
|
|
|
|
|
|
{ |
281
|
2
|
|
|
|
|
9
|
_failmsg "Unmatched opening bracket(s): " |
282
|
|
|
|
|
|
|
. join("..",@nesting)."..", |
283
|
|
|
|
|
|
|
pos $$textref; |
284
|
2
|
|
|
|
|
4
|
pos $$textref = $startpos; |
285
|
2
|
|
|
|
|
5
|
return; |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
|
288
|
60
|
|
|
|
|
77
|
$endpos = pos $$textref; |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
return ( |
291
|
60
|
|
|
|
|
203
|
$startpos, $ldelpos-$startpos, # PREFIX |
292
|
|
|
|
|
|
|
$ldelpos, 1, # OPENING BRACKET |
293
|
|
|
|
|
|
|
$ldelpos+1, $endpos-$ldelpos-2, # CONTENTS |
294
|
|
|
|
|
|
|
$endpos-1, 1, # CLOSING BRACKET |
295
|
|
|
|
|
|
|
$endpos, length($$textref)-$endpos, # REMAINDER |
296
|
|
|
|
|
|
|
); |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
sub _revbracket($) |
300
|
|
|
|
|
|
|
{ |
301
|
70
|
|
|
70
|
|
119
|
my $brack = reverse $_[0]; |
302
|
70
|
|
|
|
|
100
|
$brack =~ tr/[({])}>/; |
303
|
70
|
|
|
|
|
237
|
return $brack; |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
my $XMLNAME = q{[a-zA-Z_:][a-zA-Z0-9_:.-]*}; |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
my $et_default_ldel = '<\w+(?:' . gen_delimited_pat(q{'"}) . '|[^>])*>'; |
309
|
|
|
|
|
|
|
sub extract_tagged (;$$$$$) # ($text, $opentag, $closetag, $pre, \%options) |
310
|
|
|
|
|
|
|
{ |
311
|
48
|
50
|
|
48
|
1
|
44510
|
my $textref = defined $_[0] ? \$_[0] : \$_; |
312
|
48
|
50
|
|
|
|
164
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
313
|
48
|
|
|
|
|
74
|
my $ldel = $_[1]; |
314
|
48
|
|
|
|
|
56
|
my $rdel = $_[2]; |
315
|
48
|
100
|
|
|
|
138
|
my $pre = defined $_[3] ? qr/\G$_[3]/ : qr/\G\s*/; |
316
|
48
|
100
|
|
|
|
95
|
my %options = defined $_[4] ? %{$_[4]} : (); |
|
16
|
|
|
|
|
46
|
|
317
|
48
|
100
|
|
|
|
104
|
my $omode = defined $options{fail} ? $options{fail} : ''; |
318
|
6
|
|
|
|
|
14
|
my $bad = ref($options{reject}) eq 'ARRAY' ? join('|', @{$options{reject}}) |
319
|
|
|
|
|
|
|
: defined($options{reject}) ? $options{reject} |
320
|
48
|
50
|
|
|
|
107
|
: '' |
|
|
100
|
|
|
|
|
|
321
|
|
|
|
|
|
|
; |
322
|
10
|
|
|
|
|
19
|
my $ignore = ref($options{ignore}) eq 'ARRAY' ? join('|', @{$options{ignore}}) |
323
|
|
|
|
|
|
|
: defined($options{ignore}) ? $options{ignore} |
324
|
48
|
50
|
|
|
|
93
|
: '' |
|
|
100
|
|
|
|
|
|
325
|
|
|
|
|
|
|
; |
326
|
|
|
|
|
|
|
|
327
|
48
|
100
|
|
|
|
88
|
$ldel = $et_default_ldel if !defined $ldel; |
328
|
48
|
|
|
|
|
55
|
$@ = undef; |
329
|
|
|
|
|
|
|
|
330
|
48
|
|
|
|
|
107
|
my @match = _match_tagged($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore); |
331
|
|
|
|
|
|
|
|
332
|
48
|
100
|
|
|
|
101
|
return _fail(wantarray, $textref) unless @match; |
333
|
34
|
|
|
|
|
110
|
return _succeed wantarray, $textref, |
334
|
|
|
|
|
|
|
$match[2], $match[3]+$match[5]+$match[7], # MATCH |
335
|
|
|
|
|
|
|
@match[8..9,0..1,2..7]; # REM, PRE, BITS |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
sub _match_tagged # ($$$$$$$) |
339
|
|
|
|
|
|
|
{ |
340
|
120
|
|
|
120
|
|
287
|
my ($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore) = @_; |
341
|
120
|
|
|
|
|
173
|
my $rdelspec; |
342
|
|
|
|
|
|
|
|
343
|
120
|
|
100
|
|
|
507
|
my ($startpos, $opentagpos, $textpos, $parapos, $closetagpos, $endpos) = ( pos($$textref) = pos($$textref)||0 ); |
344
|
|
|
|
|
|
|
|
345
|
120
|
50
|
|
|
|
583
|
unless ($$textref =~ m/$pre/gc) |
346
|
|
|
|
|
|
|
{ |
347
|
0
|
|
|
|
|
0
|
_failmsg "Did not find prefix: /$pre/", pos $$textref; |
348
|
0
|
|
|
|
|
0
|
goto failed; |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
|
351
|
120
|
|
|
|
|
181
|
$opentagpos = pos($$textref); |
352
|
|
|
|
|
|
|
|
353
|
120
|
100
|
|
|
|
837
|
unless ($$textref =~ m/\G$ldel/gc) |
354
|
|
|
|
|
|
|
{ |
355
|
8
|
|
|
|
|
31
|
_failmsg "Did not find opening tag: /$ldel/", pos $$textref; |
356
|
8
|
|
|
|
|
26
|
goto failed; |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
|
359
|
112
|
|
|
|
|
181
|
$textpos = pos($$textref); |
360
|
|
|
|
|
|
|
|
361
|
112
|
100
|
|
|
|
175
|
if (!defined $rdel) |
362
|
|
|
|
|
|
|
{ |
363
|
70
|
|
|
|
|
232
|
$rdelspec = substr($$textref, $-[0], $+[0] - $-[0]); |
364
|
70
|
50
|
|
|
|
349
|
unless ($rdelspec =~ s/\A([[(<{]+)($XMLNAME).*/ quotemeta "$1\/$2". _revbracket($1) /oes) |
|
70
|
|
|
|
|
192
|
|
365
|
|
|
|
|
|
|
{ |
366
|
0
|
|
|
|
|
0
|
_failmsg "Unable to construct closing tag to match: $rdel", |
367
|
|
|
|
|
|
|
pos $$textref; |
368
|
0
|
|
|
|
|
0
|
goto failed; |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
|
else |
372
|
|
|
|
|
|
|
{ |
373
|
|
|
|
|
|
|
## no critic (BuiltinFunctions::ProhibitStringyEval) |
374
|
42
|
|
66
|
|
|
1969
|
$rdelspec = eval "qq{$rdel}" || do { |
375
|
|
|
|
|
|
|
my $del; |
376
|
|
|
|
|
|
|
for (qw,~ ! ^ & * ) _ + - = } ] : " ; ' > . ? / | ',) |
377
|
|
|
|
|
|
|
{ next if $rdel =~ /\Q$_/; $del = $_; last } |
378
|
|
|
|
|
|
|
unless ($del) { |
379
|
9
|
|
|
9
|
|
89
|
use Carp; |
|
9
|
|
|
|
|
17
|
|
|
9
|
|
|
|
|
33864
|
|
380
|
|
|
|
|
|
|
croak "Can't interpolate right delimiter $rdel" |
381
|
|
|
|
|
|
|
} |
382
|
|
|
|
|
|
|
eval "qq$del$rdel$del"; |
383
|
|
|
|
|
|
|
}; |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
|
386
|
112
|
|
|
|
|
308
|
while (pos($$textref) < length($$textref)) |
387
|
|
|
|
|
|
|
{ |
388
|
1240
|
50
|
|
|
|
1860
|
next if $$textref =~ m/\G\\./gc; |
389
|
|
|
|
|
|
|
|
390
|
1240
|
50
|
100
|
|
|
6258
|
if ($$textref =~ m/\G(\n[ \t]*\n)/gc ) |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
391
|
|
|
|
|
|
|
{ |
392
|
0
|
0
|
|
|
|
0
|
$parapos = pos($$textref) - length($1) |
393
|
|
|
|
|
|
|
unless defined $parapos; |
394
|
|
|
|
|
|
|
} |
395
|
|
|
|
|
|
|
elsif ($$textref =~ m/\G($rdelspec)/gc ) |
396
|
|
|
|
|
|
|
{ |
397
|
82
|
|
|
|
|
143
|
$closetagpos = pos($$textref)-length($1); |
398
|
82
|
|
|
|
|
596
|
goto matched; |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
elsif ($ignore && $$textref =~ m/\G(?:$ignore)/gc) |
401
|
|
|
|
|
|
|
{ |
402
|
12
|
|
|
|
|
28
|
next; |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
elsif ($bad && $$textref =~ m/\G($bad)/gcs) |
405
|
|
|
|
|
|
|
{ |
406
|
12
|
|
|
|
|
33
|
pos($$textref) -= length($1); # CUT OFF WHATEVER CAUSED THE SHORTNESS |
407
|
12
|
100
|
100
|
|
|
94
|
goto short if ($omode eq 'PARA' || $omode eq 'MAX'); |
408
|
4
|
|
|
|
|
15
|
_failmsg "Found invalid nested tag: $1", pos $$textref; |
409
|
4
|
|
|
|
|
35
|
goto failed; |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
elsif ($$textref =~ m/\G($ldel)/gc) |
412
|
|
|
|
|
|
|
{ |
413
|
32
|
|
|
|
|
64
|
my $tag = $1; |
414
|
32
|
|
|
|
|
63
|
pos($$textref) -= length($tag); # REWIND TO NESTED TAG |
415
|
32
|
100
|
|
|
|
80
|
unless (_match_tagged(@_)) # MATCH NESTED TAG |
416
|
|
|
|
|
|
|
{ |
417
|
4
|
50
|
33
|
|
|
20
|
goto short if $omode eq 'PARA' || $omode eq 'MAX'; |
418
|
4
|
|
|
|
|
29
|
_failmsg "Found unbalanced nested tag: $tag", |
419
|
|
|
|
|
|
|
pos $$textref; |
420
|
4
|
|
|
|
|
42
|
goto failed; |
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
} |
423
|
1102
|
|
|
|
|
2406
|
else { $$textref =~ m/./gcs } |
424
|
|
|
|
|
|
|
} |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
short: |
427
|
22
|
|
|
|
|
36
|
$closetagpos = pos($$textref); |
428
|
22
|
100
|
|
|
|
38
|
goto matched if $omode eq 'MAX'; |
429
|
18
|
100
|
|
|
|
44
|
goto failed unless $omode eq 'PARA'; |
430
|
|
|
|
|
|
|
|
431
|
4
|
50
|
|
|
|
8
|
if (defined $parapos) { pos($$textref) = $parapos } |
|
0
|
|
|
|
|
0
|
|
432
|
4
|
|
|
|
|
7
|
else { $parapos = pos($$textref) } |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
return ( |
435
|
4
|
|
|
|
|
15
|
$startpos, $opentagpos-$startpos, # PREFIX |
436
|
|
|
|
|
|
|
$opentagpos, $textpos-$opentagpos, # OPENING TAG |
437
|
|
|
|
|
|
|
$textpos, $parapos-$textpos, # TEXT |
438
|
|
|
|
|
|
|
$parapos, 0, # NO CLOSING TAG |
439
|
|
|
|
|
|
|
$parapos, length($$textref)-$parapos, # REMAINDER |
440
|
|
|
|
|
|
|
); |
441
|
|
|
|
|
|
|
|
442
|
86
|
|
|
|
|
106
|
matched: |
443
|
|
|
|
|
|
|
$endpos = pos($$textref); |
444
|
|
|
|
|
|
|
return ( |
445
|
86
|
|
|
|
|
309
|
$startpos, $opentagpos-$startpos, # PREFIX |
446
|
|
|
|
|
|
|
$opentagpos, $textpos-$opentagpos, # OPENING TAG |
447
|
|
|
|
|
|
|
$textpos, $closetagpos-$textpos, # TEXT |
448
|
|
|
|
|
|
|
$closetagpos, $endpos-$closetagpos, # CLOSING TAG |
449
|
|
|
|
|
|
|
$endpos, length($$textref)-$endpos, # REMAINDER |
450
|
|
|
|
|
|
|
); |
451
|
|
|
|
|
|
|
|
452
|
30
|
100
|
|
|
|
122
|
failed: |
453
|
|
|
|
|
|
|
_failmsg "Did not find closing tag", pos $$textref unless $@; |
454
|
30
|
|
|
|
|
63
|
pos($$textref) = $startpos; |
455
|
30
|
|
|
|
|
66
|
return; |
456
|
|
|
|
|
|
|
} |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
sub extract_variable (;$$) |
459
|
|
|
|
|
|
|
{ |
460
|
625
|
50
|
|
625
|
1
|
160933
|
my $textref = defined $_[0] ? \$_[0] : \$_; |
461
|
625
|
50
|
|
|
|
1127
|
return ("","","") unless defined $$textref; |
462
|
625
|
100
|
|
|
|
1187
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
463
|
625
|
100
|
|
|
|
1508
|
my $pre = defined $_[1] ? qr/\G$_[1]/ : qr/\G\s*/; |
464
|
|
|
|
|
|
|
|
465
|
625
|
|
|
|
|
971
|
my @match = _match_variable($textref,$pre); |
466
|
|
|
|
|
|
|
|
467
|
625
|
100
|
|
|
|
1177
|
return _fail wantarray, $textref unless @match; |
468
|
|
|
|
|
|
|
|
469
|
234
|
|
|
|
|
547
|
return _succeed wantarray, $textref, |
470
|
|
|
|
|
|
|
@match[2..3,4..5,0..1]; # MATCH, REMAINDER, PREFIX |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
sub _match_variable |
474
|
|
|
|
|
|
|
{ |
475
|
|
|
|
|
|
|
# $# |
476
|
|
|
|
|
|
|
# $^ |
477
|
|
|
|
|
|
|
# $$ |
478
|
1345
|
|
|
1345
|
|
1823
|
my ($textref, $pre) = @_; |
479
|
1345
|
|
100
|
|
|
3399
|
my $startpos = pos($$textref) = pos($$textref)||0; |
480
|
1345
|
100
|
|
|
|
4400
|
unless ($$textref =~ m/$pre/gc) |
481
|
|
|
|
|
|
|
{ |
482
|
341
|
|
|
|
|
923
|
_failmsg "Did not find prefix: /$pre/", pos $$textref; |
483
|
341
|
|
|
|
|
855
|
return; |
484
|
|
|
|
|
|
|
} |
485
|
1004
|
|
|
|
|
1363
|
my $varpos = pos($$textref); |
486
|
1004
|
100
|
|
|
|
1993
|
unless ($$textref =~ m{\G\$\s*(?!::)(\d+|[][&`'+*./|,";%=~:?!\@<>()-]|\^[a-z]?)}gci) |
487
|
|
|
|
|
|
|
{ |
488
|
912
|
100
|
|
|
|
2301
|
unless ($$textref =~ m/\G((\$#?|[*\@\%]|\\&)+)/gc) |
489
|
|
|
|
|
|
|
{ |
490
|
659
|
|
|
|
|
1302
|
_failmsg "Did not find leading dereferencer", pos $$textref; |
491
|
659
|
|
|
|
|
1269
|
pos $$textref = $startpos; |
492
|
659
|
|
|
|
|
1680
|
return; |
493
|
|
|
|
|
|
|
} |
494
|
253
|
|
|
|
|
431
|
my $deref = $1; |
495
|
|
|
|
|
|
|
|
496
|
253
|
50
|
100
|
|
|
890
|
unless ($$textref =~ m/\G\s*(?:::|')?(?:[_a-z]\w*(?:::|'))*[_a-z]\w*/gci |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
497
|
|
|
|
|
|
|
or _match_codeblock($textref, qr/\G()/, '\{', qr/\G\s*(\})/, '\{', '\}', 0, 1) |
498
|
|
|
|
|
|
|
or $deref eq '$#' or $deref eq '$$' |
499
|
|
|
|
|
|
|
or pos($$textref) == length $$textref ) |
500
|
|
|
|
|
|
|
{ |
501
|
0
|
|
|
|
|
0
|
_failmsg "Bad identifier after dereferencer", pos $$textref; |
502
|
0
|
|
|
|
|
0
|
pos $$textref = $startpos; |
503
|
0
|
|
|
|
|
0
|
return; |
504
|
|
|
|
|
|
|
} |
505
|
|
|
|
|
|
|
} |
506
|
|
|
|
|
|
|
|
507
|
345
|
|
|
|
|
467
|
while (1) |
508
|
|
|
|
|
|
|
{ |
509
|
423
|
100
|
|
|
|
914
|
next if $$textref =~ m/\G\s*(?:->)?\s*[{]\w+[}]/gc; |
510
|
396
|
100
|
|
|
|
1772
|
next if _match_codeblock($textref, |
511
|
|
|
|
|
|
|
qr/\G\s*->\s*(?:[_a-zA-Z]\w+\s*)?/, |
512
|
|
|
|
|
|
|
qr/[({[]/, qr/\G\s*([)}\]])/, |
513
|
|
|
|
|
|
|
qr/[({[]/, qr/[)}\]]/, 0, 1); |
514
|
373
|
100
|
|
|
|
1710
|
next if _match_codeblock($textref, |
515
|
|
|
|
|
|
|
qr/\G\s*/, qr/[{[]/, qr/\G\s*([}\]])/, |
516
|
|
|
|
|
|
|
qr/[{[]/, qr/[}\]]/, 0, 1); |
517
|
357
|
50
|
|
|
|
1150
|
next if _match_variable($textref,qr/\G\s*->\s*/); |
518
|
357
|
100
|
|
|
|
844
|
next if $$textref =~ m/\G\s*->\s*\w+(?![{([])/gc; |
519
|
345
|
|
|
|
|
470
|
last; |
520
|
|
|
|
|
|
|
} |
521
|
345
|
|
|
|
|
607
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 0; |
522
|
|
|
|
|
|
|
|
523
|
345
|
|
|
|
|
409
|
my $endpos = pos($$textref); |
524
|
345
|
|
|
|
|
883
|
return ($startpos, $varpos-$startpos, |
525
|
|
|
|
|
|
|
$varpos, $endpos-$varpos, |
526
|
|
|
|
|
|
|
$endpos, length($$textref)-$endpos |
527
|
|
|
|
|
|
|
); |
528
|
|
|
|
|
|
|
} |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
my %ec_delim_cache; |
531
|
|
|
|
|
|
|
sub _ec_delims { |
532
|
96
|
|
|
96
|
|
182
|
my ($ldel_inner, $ldel_outer) = @_; |
533
|
0
|
|
|
|
|
0
|
return @{ $ec_delim_cache{$ldel_outer}{$ldel_inner} } |
534
|
96
|
50
|
|
|
|
229
|
if $ec_delim_cache{$ldel_outer}{$ldel_inner}; |
535
|
96
|
|
|
|
|
133
|
my $rdel_inner = $ldel_inner; |
536
|
96
|
|
|
|
|
109
|
my $rdel_outer = $ldel_outer; |
537
|
96
|
|
|
|
|
156
|
my $posbug = pos; |
538
|
96
|
|
|
|
|
160
|
for ($ldel_inner, $ldel_outer) { tr/[]()<>{}\0-\377/[[((<<{{/ds } |
|
192
|
|
|
|
|
319
|
|
539
|
96
|
|
|
|
|
139
|
for ($rdel_inner, $rdel_outer) { tr/[]()<>{}\0-\377/]]))>>}}/ds } |
|
192
|
|
|
|
|
245
|
|
540
|
96
|
|
|
|
|
149
|
for ($ldel_inner, $ldel_outer, $rdel_inner, $rdel_outer) |
541
|
|
|
|
|
|
|
{ |
542
|
384
|
|
|
|
|
705
|
$_ = '('.join('|',map { quotemeta $_ } split('',$_)).')' |
|
400
|
|
|
|
|
924
|
|
543
|
|
|
|
|
|
|
} |
544
|
96
|
|
|
|
|
174
|
pos = $posbug; |
545
|
96
|
|
|
|
|
138
|
@{ $ec_delim_cache{$ldel_outer}{$ldel_inner} = [ |
|
96
|
|
|
|
|
846
|
|
546
|
|
|
|
|
|
|
$ldel_outer, qr/\G\s*($rdel_outer)/, $ldel_inner, $rdel_inner |
547
|
|
|
|
|
|
|
] }; |
548
|
|
|
|
|
|
|
} |
549
|
|
|
|
|
|
|
sub extract_codeblock (;$$$$$) |
550
|
|
|
|
|
|
|
{ |
551
|
96
|
50
|
|
96
|
1
|
52238
|
my $textref = defined $_[0] ? \$_[0] : \$_; |
552
|
96
|
100
|
|
|
|
306
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
553
|
96
|
|
|
|
|
144
|
my $wantarray = wantarray; |
554
|
96
|
100
|
|
|
|
179
|
my $ldel_inner = defined $_[1] ? $_[1] : '{'; |
555
|
96
|
100
|
|
|
|
304
|
my $pre = !defined $_[2] ? qr/\G\s*/ : qr/\G$_[2]/; |
556
|
96
|
100
|
|
|
|
189
|
my $ldel_outer = defined $_[3] ? $_[3] : $ldel_inner; |
557
|
96
|
|
|
|
|
122
|
my $rd = $_[4]; |
558
|
96
|
|
|
|
|
201
|
my @delims = _ec_delims($ldel_inner, $ldel_outer); |
559
|
|
|
|
|
|
|
|
560
|
96
|
|
|
|
|
210
|
my @match = _match_codeblock($textref, $pre, @delims, $rd, 1); |
561
|
96
|
100
|
|
|
|
202
|
return _fail($wantarray, $textref) unless @match; |
562
|
51
|
|
|
|
|
197
|
return _succeed($wantarray, $textref, |
563
|
|
|
|
|
|
|
@match[2..3,4..5,0..1] # MATCH, REMAINDER, PREFIX |
564
|
|
|
|
|
|
|
); |
565
|
|
|
|
|
|
|
} |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
sub _match_codeblock |
568
|
|
|
|
|
|
|
{ |
569
|
1044
|
|
|
1044
|
|
2889
|
my ($textref, $pre, $ldel_outer, $rdel_outer, $ldel_inner, $rdel_inner, $rd, $no_backcompat) = @_; |
570
|
1044
|
100
|
|
|
|
1547
|
$rdel_outer = qr/\G\s*($rdel_outer)/ if !$no_backcompat; # Switch calls this func directly |
571
|
1044
|
|
100
|
|
|
2236
|
my $startpos = pos($$textref) = pos($$textref) || 0; |
572
|
1044
|
100
|
|
|
|
3408
|
unless ($$textref =~ m/$pre/gc) |
573
|
|
|
|
|
|
|
{ |
574
|
357
|
|
|
|
|
1398
|
_failmsg qq{Did not match prefix /$pre/ at"} . |
575
|
|
|
|
|
|
|
substr($$textref,pos($$textref),20) . |
576
|
|
|
|
|
|
|
q{..."}, |
577
|
|
|
|
|
|
|
pos $$textref; |
578
|
357
|
|
|
|
|
902
|
return; |
579
|
|
|
|
|
|
|
} |
580
|
687
|
|
|
|
|
926
|
my $codepos = pos($$textref); |
581
|
687
|
100
|
|
|
|
4865
|
unless ($$textref =~ m/\G($ldel_outer)/gc) # OUTERMOST DELIMITER |
582
|
|
|
|
|
|
|
{ |
583
|
552
|
|
|
|
|
1758
|
_failmsg qq{Did not find expected opening bracket at "} . |
584
|
|
|
|
|
|
|
substr($$textref,pos($$textref),20) . |
585
|
|
|
|
|
|
|
q{..."}, |
586
|
|
|
|
|
|
|
pos $$textref; |
587
|
552
|
|
|
|
|
1216
|
pos $$textref = $startpos; |
588
|
552
|
|
|
|
|
1354
|
return; |
589
|
|
|
|
|
|
|
} |
590
|
135
|
|
|
|
|
268
|
my $closing = $1; |
591
|
135
|
|
|
|
|
197
|
$closing =~ tr/([<{/)]>}/; |
592
|
135
|
|
|
|
|
163
|
my $matched; |
593
|
|
|
|
|
|
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 |
594
|
135
|
100
|
66
|
|
|
509
|
if !pos($$textref) or !defined $ref2slashvalid{$textref}; # default, or reset |
595
|
135
|
|
|
|
|
282
|
while (pos($$textref) < length($$textref)) |
596
|
|
|
|
|
|
|
{ |
597
|
500
|
50
|
66
|
|
|
862
|
if ($rd && $$textref =~ m#\G(\Q(?)\E|\Q(s?)\E|\Q(s)\E)#gc) |
598
|
|
|
|
|
|
|
{ |
599
|
0
|
|
|
|
|
0
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 0; |
600
|
0
|
|
|
|
|
0
|
next; |
601
|
|
|
|
|
|
|
} |
602
|
|
|
|
|
|
|
|
603
|
500
|
100
|
|
|
|
1011
|
if ($$textref =~ m/\G\s*#.*/gc) |
604
|
|
|
|
|
|
|
{ |
605
|
6
|
|
|
|
|
11
|
next; |
606
|
|
|
|
|
|
|
} |
607
|
|
|
|
|
|
|
|
608
|
494
|
100
|
|
|
|
1653
|
if ($$textref =~ m/$rdel_outer/gc) |
609
|
|
|
|
|
|
|
{ |
610
|
131
|
100
|
66
|
|
|
539
|
unless ($matched = ($closing && $1 eq $closing) ) |
611
|
|
|
|
|
|
|
{ |
612
|
2
|
50
|
|
|
|
7
|
next if $1 eq '>'; # MIGHT BE A "LESS THAN" |
613
|
0
|
|
|
|
|
0
|
_failmsg q{Mismatched closing bracket at "} . |
614
|
|
|
|
|
|
|
substr($$textref,pos($$textref),20) . |
615
|
|
|
|
|
|
|
qq{...". Expected '$closing'}, |
616
|
|
|
|
|
|
|
pos $$textref; |
617
|
|
|
|
|
|
|
} |
618
|
129
|
|
|
|
|
196
|
last; |
619
|
|
|
|
|
|
|
} |
620
|
|
|
|
|
|
|
|
621
|
363
|
100
|
100
|
|
|
890
|
if (_match_variable($textref,qr/\G\s*/) || |
622
|
|
|
|
|
|
|
_match_quotelike($textref,qr/\G\s*/,$ref2slashvalid{$textref},$ref2qmarkvalid{$textref}) ) |
623
|
|
|
|
|
|
|
{ |
624
|
138
|
|
|
|
|
219
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 0; |
625
|
138
|
|
|
|
|
202
|
next; |
626
|
|
|
|
|
|
|
} |
627
|
|
|
|
|
|
|
|
628
|
225
|
100
|
|
|
|
3571
|
if ($$textref =~ m#\G\s*(?!$ldel_inner)(?:$RE_PREREGEX_PAT|$RE_EXPR_PAT)#gc) |
629
|
|
|
|
|
|
|
{ |
630
|
74
|
|
|
|
|
167
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 1; |
631
|
74
|
|
|
|
|
144
|
next; |
632
|
|
|
|
|
|
|
} |
633
|
|
|
|
|
|
|
|
634
|
151
|
100
|
|
|
|
1031
|
if ( _match_codeblock($textref, qr/\G\s*/, $ldel_inner, qr/\G\s*($rdel_inner)/, $ldel_inner, $rdel_inner, $rd, 1) ) |
635
|
|
|
|
|
|
|
{ |
636
|
17
|
|
|
|
|
36
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 1; |
637
|
17
|
|
|
|
|
30
|
next; |
638
|
|
|
|
|
|
|
} |
639
|
|
|
|
|
|
|
|
640
|
134
|
50
|
|
|
|
752
|
if ($$textref =~ m/\G\s*$ldel_outer/gc) |
641
|
|
|
|
|
|
|
{ |
642
|
0
|
|
|
|
|
0
|
_failmsg q{Improperly nested codeblock at "} . |
643
|
|
|
|
|
|
|
substr($$textref,pos($$textref),20) . |
644
|
|
|
|
|
|
|
q{..."}, |
645
|
|
|
|
|
|
|
pos $$textref; |
646
|
0
|
|
|
|
|
0
|
last; |
647
|
|
|
|
|
|
|
} |
648
|
|
|
|
|
|
|
|
649
|
134
|
|
|
|
|
300
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 0; |
650
|
134
|
|
|
|
|
389
|
$$textref =~ m/\G\s*(\w+|[-=>]>|.|\Z)/gc; |
651
|
|
|
|
|
|
|
} |
652
|
371
|
|
|
|
|
939
|
continue { $@ = undef } |
653
|
|
|
|
|
|
|
|
654
|
135
|
100
|
|
|
|
207
|
unless ($matched) |
655
|
|
|
|
|
|
|
{ |
656
|
6
|
50
|
|
|
|
23
|
_failmsg 'No match found for opening bracket', pos $$textref |
657
|
|
|
|
|
|
|
unless $@; |
658
|
6
|
|
|
|
|
49
|
return; |
659
|
|
|
|
|
|
|
} |
660
|
|
|
|
|
|
|
|
661
|
129
|
|
|
|
|
227
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = undef; |
662
|
129
|
|
|
|
|
157
|
my $endpos = pos($$textref); |
663
|
129
|
|
|
|
|
443
|
return ( $startpos, $codepos-$startpos, |
664
|
|
|
|
|
|
|
$codepos, $endpos-$codepos, |
665
|
|
|
|
|
|
|
$endpos, length($$textref)-$endpos, |
666
|
|
|
|
|
|
|
); |
667
|
|
|
|
|
|
|
} |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
my %mods = ( |
671
|
|
|
|
|
|
|
'none' => '[cgimsox]*', |
672
|
|
|
|
|
|
|
'm' => '[cgimsox]*', |
673
|
|
|
|
|
|
|
's' => '[cegimsox]*', |
674
|
|
|
|
|
|
|
'tr' => '[cds]*', |
675
|
|
|
|
|
|
|
'y' => '[cds]*', |
676
|
|
|
|
|
|
|
'qq' => '', |
677
|
|
|
|
|
|
|
'qx' => '', |
678
|
|
|
|
|
|
|
'qw' => '', |
679
|
|
|
|
|
|
|
'qr' => '[imsx]*', |
680
|
|
|
|
|
|
|
'q' => '', |
681
|
|
|
|
|
|
|
); |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
sub extract_quotelike (;$$) |
684
|
|
|
|
|
|
|
{ |
685
|
542
|
100
|
|
542
|
1
|
115177
|
my $textref = $_[0] ? \$_[0] : \$_; |
686
|
542
|
100
|
|
|
|
1059
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
687
|
542
|
|
|
|
|
667
|
my $wantarray = wantarray; |
688
|
542
|
100
|
|
|
|
1265
|
my $pre = defined $_[1] ? qr/\G$_[1]/ : qr/\G\s*/; |
689
|
|
|
|
|
|
|
|
690
|
542
|
|
|
|
|
1147
|
my @match = _match_quotelike($textref,$pre,$ref2slashvalid{$textref},$ref2qmarkvalid{$textref}); |
691
|
542
|
100
|
|
|
|
1080
|
return _fail($wantarray, $textref) unless @match; |
692
|
103
|
|
|
|
|
409
|
return _succeed($wantarray, $textref, |
693
|
|
|
|
|
|
|
$match[2], $match[18]-$match[2], # MATCH |
694
|
|
|
|
|
|
|
@match[18,19], # REMAINDER |
695
|
|
|
|
|
|
|
@match[0,1], # PREFIX |
696
|
|
|
|
|
|
|
@match[2..17], # THE BITS |
697
|
|
|
|
|
|
|
@match[20,21], # ANY FILLET? |
698
|
|
|
|
|
|
|
); |
699
|
|
|
|
|
|
|
}; |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
my %maybe_quote = map +($_=>1), qw(" ' `); |
702
|
|
|
|
|
|
|
sub _match_quotelike |
703
|
|
|
|
|
|
|
{ |
704
|
848
|
|
|
848
|
|
1283
|
my ($textref, $pre, $allow_slash_match, $allow_qmark_match) = @_; |
705
|
|
|
|
|
|
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 |
706
|
848
|
100
|
100
|
|
|
2412
|
if !pos($$textref) or !defined $ref2slashvalid{$textref}; # default, or reset |
707
|
|
|
|
|
|
|
|
708
|
848
|
|
100
|
|
|
2685
|
my ($textlen,$startpos, |
709
|
|
|
|
|
|
|
$preld1pos,$ld1pos,$str1pos,$rd1pos, |
710
|
|
|
|
|
|
|
$preld2pos,$ld2pos,$str2pos,$rd2pos, |
711
|
|
|
|
|
|
|
$modpos) = ( length($$textref), pos($$textref) = pos($$textref) || 0 ); |
712
|
|
|
|
|
|
|
|
713
|
848
|
50
|
|
|
|
3003
|
unless ($$textref =~ m/$pre/gc) |
714
|
|
|
|
|
|
|
{ |
715
|
0
|
|
|
|
|
0
|
_failmsg qq{Did not find prefix /$pre/ at "} . |
716
|
|
|
|
|
|
|
substr($$textref, pos($$textref), 20) . |
717
|
|
|
|
|
|
|
q{..."}, |
718
|
|
|
|
|
|
|
pos $$textref; |
719
|
0
|
|
|
|
|
0
|
return; |
720
|
|
|
|
|
|
|
} |
721
|
848
|
|
|
|
|
1158
|
my $oppos = pos($$textref); |
722
|
848
|
|
|
|
|
1222
|
my $initial = substr($$textref,$oppos,1); |
723
|
848
|
100
|
100
|
|
|
4079
|
if ($initial && $maybe_quote{$initial} |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
724
|
|
|
|
|
|
|
|| $allow_slash_match && $initial eq '/' |
725
|
|
|
|
|
|
|
|| $allow_qmark_match && $initial eq '?') |
726
|
|
|
|
|
|
|
{ |
727
|
60
|
100
|
|
|
|
889
|
unless ($$textref =~ m/\G \Q$initial\E [^\\$initial]* (\\.[^\\$initial]*)* \Q$initial\E /gcsx) |
728
|
|
|
|
|
|
|
{ |
729
|
2
|
|
|
|
|
11
|
_failmsg qq{Did not find closing delimiter to match '$initial' at "} . |
730
|
|
|
|
|
|
|
substr($$textref, $oppos, 20) . |
731
|
|
|
|
|
|
|
q{..."}, |
732
|
|
|
|
|
|
|
pos $$textref; |
733
|
2
|
|
|
|
|
5
|
pos $$textref = $startpos; |
734
|
2
|
|
|
|
|
7
|
return; |
735
|
|
|
|
|
|
|
} |
736
|
58
|
|
|
|
|
121
|
$modpos= pos($$textref); |
737
|
58
|
|
|
|
|
83
|
$rd1pos = $modpos-1; |
738
|
|
|
|
|
|
|
|
739
|
58
|
100
|
66
|
|
|
185
|
if ($initial eq '/' || $initial eq '?') |
740
|
|
|
|
|
|
|
{ |
741
|
17
|
|
|
|
|
110
|
$$textref =~ m/\G$mods{none}/gc |
742
|
|
|
|
|
|
|
} |
743
|
|
|
|
|
|
|
|
744
|
58
|
|
|
|
|
102
|
my $endpos = pos($$textref); |
745
|
58
|
|
|
|
|
117
|
$ref2qmarkvalid{$textref} = $ref2slashvalid{$textref} = 0; |
746
|
|
|
|
|
|
|
return ( |
747
|
58
|
|
|
|
|
263
|
$startpos, $oppos-$startpos, # PREFIX |
748
|
|
|
|
|
|
|
$oppos, 0, # NO OPERATOR |
749
|
|
|
|
|
|
|
$oppos, 1, # LEFT DEL |
750
|
|
|
|
|
|
|
$oppos+1, $rd1pos-$oppos-1, # STR/PAT |
751
|
|
|
|
|
|
|
$rd1pos, 1, # RIGHT DEL |
752
|
|
|
|
|
|
|
$modpos, 0, # NO 2ND LDEL |
753
|
|
|
|
|
|
|
$modpos, 0, # NO 2ND STR |
754
|
|
|
|
|
|
|
$modpos, 0, # NO 2ND RDEL |
755
|
|
|
|
|
|
|
$modpos, $endpos-$modpos, # MODIFIERS |
756
|
|
|
|
|
|
|
$endpos, $textlen-$endpos, # REMAINDER |
757
|
|
|
|
|
|
|
); |
758
|
|
|
|
|
|
|
} |
759
|
|
|
|
|
|
|
|
760
|
788
|
100
|
|
|
|
1886
|
unless ($$textref =~ m{\G(\b(?:m|s|qq|qx|qw|q|qr|tr|y)\b(?=\s*\S)|<<(?=[a-zA-Z]|\s*['"`;,]))}gc) |
761
|
|
|
|
|
|
|
{ |
762
|
705
|
|
|
|
|
2170
|
_failmsg q{No quotelike operator found after prefix at "} . |
763
|
|
|
|
|
|
|
substr($$textref, pos($$textref), 20) . |
764
|
|
|
|
|
|
|
q{..."}, |
765
|
|
|
|
|
|
|
pos $$textref; |
766
|
705
|
|
|
|
|
1522
|
pos $$textref = $startpos; |
767
|
705
|
|
|
|
|
1469
|
return; |
768
|
|
|
|
|
|
|
} |
769
|
|
|
|
|
|
|
|
770
|
83
|
|
|
|
|
162
|
my $op = $1; |
771
|
83
|
|
|
|
|
109
|
$preld1pos = pos($$textref); |
772
|
83
|
100
|
|
|
|
154
|
if ($op eq '<<') { |
773
|
28
|
|
|
|
|
37
|
$ld1pos = pos($$textref); |
774
|
28
|
|
|
|
|
32
|
my $label; |
775
|
28
|
100
|
|
|
|
129
|
if ($$textref =~ m{\G([A-Za-z_]\w*)}gc) { |
|
|
100
|
|
|
|
|
|
776
|
9
|
|
|
|
|
17
|
$label = $1; |
777
|
|
|
|
|
|
|
} |
778
|
|
|
|
|
|
|
elsif ($$textref =~ m{ \G ' ([^'\\]* (?:\\.[^'\\]*)*) ' |
779
|
|
|
|
|
|
|
| \G " ([^"\\]* (?:\\.[^"\\]*)*) " |
780
|
|
|
|
|
|
|
| \G ` ([^`\\]* (?:\\.[^`\\]*)*) ` |
781
|
|
|
|
|
|
|
}gcsx) { |
782
|
16
|
|
|
|
|
38
|
$label = $+; |
783
|
|
|
|
|
|
|
} |
784
|
|
|
|
|
|
|
else { |
785
|
3
|
|
|
|
|
8
|
$label = ""; |
786
|
|
|
|
|
|
|
} |
787
|
28
|
|
|
|
|
41
|
my $extrapos = pos($$textref); |
788
|
28
|
|
|
|
|
78
|
$$textref =~ m{.*\n}gc; |
789
|
28
|
|
|
|
|
67
|
$str1pos = pos($$textref)--; |
790
|
28
|
100
|
|
|
|
370
|
unless ($$textref =~ m{.*?\n(?=\Q$label\E\n)}gc) { |
791
|
4
|
|
|
|
|
27
|
_failmsg qq{Missing here doc terminator ('$label') after "} . |
792
|
|
|
|
|
|
|
substr($$textref, $startpos, 20) . |
793
|
|
|
|
|
|
|
q{..."}, |
794
|
|
|
|
|
|
|
pos $$textref; |
795
|
4
|
|
|
|
|
9
|
pos $$textref = $startpos; |
796
|
4
|
|
|
|
|
13
|
return; |
797
|
|
|
|
|
|
|
} |
798
|
24
|
|
|
|
|
40
|
$rd1pos = pos($$textref); |
799
|
24
|
|
|
|
|
135
|
$$textref =~ m{\Q$label\E\n}gc; |
800
|
24
|
|
|
|
|
41
|
$ld2pos = pos($$textref); |
801
|
24
|
|
|
|
|
58
|
$ref2qmarkvalid{$textref} = $ref2slashvalid{$textref} = 0; |
802
|
|
|
|
|
|
|
return ( |
803
|
24
|
|
|
|
|
162
|
$startpos, $oppos-$startpos, # PREFIX |
804
|
|
|
|
|
|
|
$oppos, length($op), # OPERATOR |
805
|
|
|
|
|
|
|
$ld1pos, $extrapos-$ld1pos, # LEFT DEL |
806
|
|
|
|
|
|
|
$str1pos, $rd1pos-$str1pos, # STR/PAT |
807
|
|
|
|
|
|
|
$rd1pos, $ld2pos-$rd1pos, # RIGHT DEL |
808
|
|
|
|
|
|
|
$ld2pos, 0, # NO 2ND LDEL |
809
|
|
|
|
|
|
|
$ld2pos, 0, # NO 2ND STR |
810
|
|
|
|
|
|
|
$ld2pos, 0, # NO 2ND RDEL |
811
|
|
|
|
|
|
|
$ld2pos, 0, # NO MODIFIERS |
812
|
|
|
|
|
|
|
$ld2pos, $textlen-$ld2pos, # REMAINDER |
813
|
|
|
|
|
|
|
$extrapos, $str1pos-$extrapos, # FILLETED BIT |
814
|
|
|
|
|
|
|
); |
815
|
|
|
|
|
|
|
} |
816
|
|
|
|
|
|
|
|
817
|
55
|
|
|
|
|
151
|
$$textref =~ m/\G\s*/gc; |
818
|
55
|
|
|
|
|
77
|
$ld1pos = pos($$textref); |
819
|
55
|
|
|
|
|
82
|
$str1pos = $ld1pos+1; |
820
|
|
|
|
|
|
|
|
821
|
55
|
50
|
|
|
|
224
|
if ($$textref !~ m/\G(\S)/gc) # SHOULD USE LOOKAHEAD |
|
|
100
|
|
|
|
|
|
822
|
|
|
|
|
|
|
{ |
823
|
0
|
|
|
|
|
0
|
_failmsg "No block delimiter found after quotelike $op", |
824
|
|
|
|
|
|
|
pos $$textref; |
825
|
0
|
|
|
|
|
0
|
pos $$textref = $startpos; |
826
|
0
|
|
|
|
|
0
|
return; |
827
|
|
|
|
|
|
|
} |
828
|
|
|
|
|
|
|
elsif (substr($$textref, $ld1pos, 2) eq '=>') |
829
|
|
|
|
|
|
|
{ |
830
|
1
|
|
|
|
|
4
|
_failmsg "quotelike $op was actually quoted by '=>'", |
831
|
|
|
|
|
|
|
pos $$textref; |
832
|
1
|
|
|
|
|
3
|
pos $$textref = $startpos; |
833
|
1
|
|
|
|
|
2
|
return; |
834
|
|
|
|
|
|
|
} |
835
|
54
|
|
|
|
|
105
|
pos($$textref) = $ld1pos; # HAVE TO DO THIS BECAUSE LOOKAHEAD BROKEN |
836
|
54
|
|
|
|
|
164
|
my ($ldel1, $rdel1) = ("\Q$1","\Q$1"); |
837
|
54
|
100
|
|
|
|
159
|
if ($ldel1 =~ /[[(<{]/) |
838
|
|
|
|
|
|
|
{ |
839
|
33
|
|
|
|
|
63
|
$rdel1 =~ tr/[({])}>/; |
840
|
|
|
|
|
|
|
defined(_match_bracketed($textref,qr/\G/,qr/\G($ldel1)/,"","",qr/\G($rdel1)/)) |
841
|
33
|
50
|
|
|
|
392
|
|| do { pos $$textref = $startpos; return }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
842
|
33
|
|
|
|
|
84
|
$ld2pos = pos($$textref); |
843
|
33
|
|
|
|
|
54
|
$rd1pos = $ld2pos-1; |
844
|
|
|
|
|
|
|
} |
845
|
|
|
|
|
|
|
else |
846
|
|
|
|
|
|
|
{ |
847
|
|
|
|
|
|
|
$$textref =~ /\G$ldel1[^\\$ldel1]*(\\.[^\\$ldel1]*)*$ldel1/gcs |
848
|
21
|
50
|
|
|
|
377
|
|| do { pos $$textref = $startpos; return }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
849
|
21
|
|
|
|
|
57
|
$ld2pos = $rd1pos = pos($$textref)-1; |
850
|
|
|
|
|
|
|
} |
851
|
|
|
|
|
|
|
|
852
|
54
|
100
|
|
|
|
212
|
my $second_arg = $op =~ /s|tr|y/ ? 1 : 0; |
853
|
54
|
100
|
|
|
|
92
|
if ($second_arg) |
854
|
|
|
|
|
|
|
{ |
855
|
23
|
|
|
|
|
39
|
my ($ldel2, $rdel2); |
856
|
23
|
100
|
|
|
|
51
|
if ($ldel1 =~ /[[(<{]/) |
857
|
|
|
|
|
|
|
{ |
858
|
11
|
50
|
|
|
|
44
|
unless ($$textref =~ /\G\s*(\S)/gc) # SHOULD USE LOOKAHEAD |
859
|
|
|
|
|
|
|
{ |
860
|
0
|
|
|
|
|
0
|
_failmsg "Missing second block for quotelike $op", |
861
|
|
|
|
|
|
|
pos $$textref; |
862
|
0
|
|
|
|
|
0
|
pos $$textref = $startpos; |
863
|
0
|
|
|
|
|
0
|
return; |
864
|
|
|
|
|
|
|
} |
865
|
11
|
|
|
|
|
28
|
$ldel2 = $rdel2 = "\Q$1"; |
866
|
11
|
|
|
|
|
24
|
$rdel2 =~ tr/[({])}>/; |
867
|
|
|
|
|
|
|
} |
868
|
|
|
|
|
|
|
else |
869
|
|
|
|
|
|
|
{ |
870
|
12
|
|
|
|
|
24
|
$ldel2 = $rdel2 = $ldel1; |
871
|
|
|
|
|
|
|
} |
872
|
23
|
|
|
|
|
53
|
$str2pos = $ld2pos+1; |
873
|
|
|
|
|
|
|
|
874
|
23
|
100
|
|
|
|
88
|
if ($ldel2 =~ /[[(<{]/) |
875
|
|
|
|
|
|
|
{ |
876
|
9
|
|
|
|
|
27
|
pos($$textref)--; # OVERCOME BROKEN LOOKAHEAD |
877
|
|
|
|
|
|
|
defined(_match_bracketed($textref,qr/\G/,qr/\G($ldel2)/,"","",qr/\G($rdel2)/)) |
878
|
9
|
50
|
|
|
|
88
|
|| do { pos $$textref = $startpos; return }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
879
|
|
|
|
|
|
|
} |
880
|
|
|
|
|
|
|
else |
881
|
|
|
|
|
|
|
{ |
882
|
|
|
|
|
|
|
$$textref =~ /[^\\$ldel2]*(\\.[^\\$ldel2]*)*$ldel2/gcs |
883
|
14
|
50
|
|
|
|
127
|
|| do { pos $$textref = $startpos; return }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
884
|
|
|
|
|
|
|
} |
885
|
23
|
|
|
|
|
48
|
$rd2pos = pos($$textref)-1; |
886
|
|
|
|
|
|
|
} |
887
|
|
|
|
|
|
|
else |
888
|
|
|
|
|
|
|
{ |
889
|
31
|
|
|
|
|
50
|
$ld2pos = $str2pos = $rd2pos = $rd1pos; |
890
|
|
|
|
|
|
|
} |
891
|
|
|
|
|
|
|
|
892
|
54
|
|
|
|
|
77
|
$modpos = pos $$textref; |
893
|
|
|
|
|
|
|
|
894
|
54
|
|
|
|
|
417
|
$$textref =~ m/\G($mods{$op})/gc; |
895
|
54
|
|
|
|
|
106
|
my $endpos = pos $$textref; |
896
|
54
|
|
|
|
|
122
|
$ref2qmarkvalid{$textref} = $ref2slashvalid{$textref} = undef; |
897
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
return ( |
899
|
54
|
|
|
|
|
230
|
$startpos, $oppos-$startpos, # PREFIX |
900
|
|
|
|
|
|
|
$oppos, length($op), # OPERATOR |
901
|
|
|
|
|
|
|
$ld1pos, 1, # LEFT DEL |
902
|
|
|
|
|
|
|
$str1pos, $rd1pos-$str1pos, # STR/PAT |
903
|
|
|
|
|
|
|
$rd1pos, 1, # RIGHT DEL |
904
|
|
|
|
|
|
|
$ld2pos, $second_arg, # 2ND LDEL (MAYBE) |
905
|
|
|
|
|
|
|
$str2pos, $rd2pos-$str2pos, # 2ND STR (MAYBE) |
906
|
|
|
|
|
|
|
$rd2pos, $second_arg, # 2ND RDEL (MAYBE) |
907
|
|
|
|
|
|
|
$modpos, $endpos-$modpos, # MODIFIERS |
908
|
|
|
|
|
|
|
$endpos, $textlen-$endpos, # REMAINDER |
909
|
|
|
|
|
|
|
); |
910
|
|
|
|
|
|
|
} |
911
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
my $def_func = [ |
913
|
|
|
|
|
|
|
sub { extract_variable($_[0], '') }, |
914
|
|
|
|
|
|
|
sub { extract_quotelike($_[0],'') }, |
915
|
|
|
|
|
|
|
sub { extract_codeblock($_[0],'{}','') }, |
916
|
|
|
|
|
|
|
]; |
917
|
|
|
|
|
|
|
my %ref_not_regex = map +($_=>1), qw(CODE Text::Balanced::Extractor); |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
sub _update_patvalid { |
920
|
591
|
|
|
591
|
|
1140
|
my ($textref, $text) = @_; |
921
|
591
|
100
|
100
|
|
|
6412
|
if ($ref2slashvalid{$textref} && $text =~ m/(?:$RE_NUM|[\)\]])\s*$/) |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
922
|
|
|
|
|
|
|
{ |
923
|
26
|
|
|
|
|
82
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 0; |
924
|
|
|
|
|
|
|
} elsif (!$ref2slashvalid{$textref} && $text =~ m/$RE_PREREGEX_PAT\s*$/) |
925
|
|
|
|
|
|
|
{ |
926
|
2
|
|
|
|
|
7
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 1; |
927
|
|
|
|
|
|
|
} elsif (!$ref2slashvalid{$textref} && $text =~ m/$RE_EXPR_PAT\s*$/) |
928
|
|
|
|
|
|
|
{ |
929
|
46
|
|
|
|
|
95
|
$ref2slashvalid{$textref} = 1; |
930
|
46
|
|
|
|
|
133
|
$ref2qmarkvalid{$textref} = 0; |
931
|
|
|
|
|
|
|
} |
932
|
|
|
|
|
|
|
} |
933
|
|
|
|
|
|
|
sub extract_multiple (;$$$$) # ($text, $functions_ref, $max_fields, $ignoreunknown) |
934
|
|
|
|
|
|
|
{ |
935
|
41
|
100
|
|
41
|
1
|
4027
|
my $textref = defined($_[0]) ? \$_[0] : \$_; |
936
|
41
|
50
|
|
|
|
128
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
937
|
41
|
|
|
|
|
53
|
my $posbug = pos; |
938
|
41
|
|
|
|
|
56
|
my ($lastpos, $firstpos); |
939
|
41
|
|
|
|
|
49
|
my @fields = (); |
940
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
#for ($$textref) |
942
|
|
|
|
|
|
|
{ |
943
|
41
|
100
|
|
|
|
48
|
my @func = defined $_[1] ? @{$_[1]} : @{$def_func}; |
|
41
|
|
|
|
|
66
|
|
|
29
|
|
|
|
|
47
|
|
|
12
|
|
|
|
|
22
|
|
944
|
41
|
100
|
66
|
|
|
113
|
my $max = defined $_[2] && $_[2]>0 ? $_[2] : 1_000_000_000; |
945
|
41
|
|
|
|
|
52
|
my $igunk = $_[3]; |
946
|
|
|
|
|
|
|
|
947
|
41
|
|
50
|
|
|
187
|
pos $$textref ||= 0; |
948
|
|
|
|
|
|
|
|
949
|
41
|
100
|
|
|
|
87
|
unless (wantarray) |
950
|
|
|
|
|
|
|
{ |
951
|
9
|
|
|
9
|
|
90
|
use Carp; |
|
9
|
|
|
|
|
20
|
|
|
9
|
|
|
|
|
8022
|
|
952
|
14
|
0
|
33
|
|
|
34
|
carp "extract_multiple reset maximal count to 1 in scalar context" |
|
|
|
33
|
|
|
|
|
953
|
|
|
|
|
|
|
if $^W && defined($_[2]) && $max > 1; |
954
|
14
|
|
|
|
|
17
|
$max = 1 |
955
|
|
|
|
|
|
|
} |
956
|
|
|
|
|
|
|
|
957
|
41
|
|
|
|
|
44
|
my @class; |
958
|
41
|
|
|
|
|
59
|
foreach my $func ( @func ) |
959
|
|
|
|
|
|
|
{ |
960
|
81
|
|
|
|
|
97
|
push @class, undef; |
961
|
81
|
100
|
|
|
|
145
|
($class[-1], $func) = %$func if ref($func) eq 'HASH'; |
962
|
81
|
100
|
|
|
|
251
|
$func = qr/\G$func/ if !$ref_not_regex{ref $func}; |
963
|
|
|
|
|
|
|
} |
964
|
|
|
|
|
|
|
|
965
|
41
|
|
|
|
|
50
|
my $unkpos; |
966
|
41
|
|
|
|
|
102
|
FIELD: while (pos($$textref) < length($$textref)) |
967
|
|
|
|
|
|
|
{ |
968
|
594
|
|
|
|
|
1176
|
foreach my $i ( 0..$#func ) |
969
|
|
|
|
|
|
|
{ |
970
|
1136
|
|
|
|
|
1216
|
my ($field, $pref); |
971
|
1136
|
|
|
|
|
1577
|
my ($class, $func) = ($class[$i], $func[$i]); |
972
|
1136
|
|
|
|
|
1235
|
$lastpos = pos $$textref; |
973
|
1136
|
100
|
|
|
|
2147
|
if (ref($func) eq 'CODE') |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
974
|
977
|
|
|
|
|
1253
|
{ ($field,undef,$pref) = $func->($$textref) } |
975
|
|
|
|
|
|
|
elsif (ref($func) eq 'Text::Balanced::Extractor') |
976
|
0
|
|
|
|
|
0
|
{ $field = $func->extract($$textref) } |
977
|
|
|
|
|
|
|
elsif( $$textref =~ m/$func[$i]/gc ) |
978
|
33
|
100
|
|
|
|
122
|
{ $field = defined($1) |
979
|
|
|
|
|
|
|
? $1 |
980
|
|
|
|
|
|
|
: substr($$textref, $-[0], $+[0] - $-[0]) |
981
|
|
|
|
|
|
|
} |
982
|
1136
|
|
100
|
|
|
3384
|
$pref ||= ""; |
983
|
1136
|
100
|
100
|
|
|
2193
|
if (defined($field) && length($field)) |
984
|
|
|
|
|
|
|
{ |
985
|
108
|
100
|
|
|
|
163
|
if (!$igunk) { |
986
|
101
|
100
|
100
|
|
|
225
|
$unkpos = $lastpos |
987
|
|
|
|
|
|
|
if length($pref) && !defined($unkpos); |
988
|
101
|
100
|
|
|
|
148
|
if (defined $unkpos) |
989
|
|
|
|
|
|
|
{ |
990
|
72
|
|
|
|
|
142
|
push @fields, substr($$textref, $unkpos, $lastpos-$unkpos).$pref; |
991
|
72
|
100
|
|
|
|
115
|
$firstpos = $unkpos unless defined $firstpos; |
992
|
72
|
|
|
|
|
75
|
undef $unkpos; |
993
|
72
|
100
|
|
|
|
123
|
last FIELD if @fields == $max; |
994
|
|
|
|
|
|
|
} |
995
|
|
|
|
|
|
|
} |
996
|
105
|
50
|
|
|
|
175
|
push @fields, $class ? bless(\$field, $class) : $field; |
997
|
105
|
|
|
|
|
196
|
_update_patvalid($textref, $fields[-1]); |
998
|
105
|
100
|
|
|
|
204
|
$firstpos = $lastpos unless defined $firstpos; |
999
|
105
|
|
|
|
|
131
|
$lastpos = pos $$textref; |
1000
|
105
|
100
|
|
|
|
198
|
last FIELD if @fields == $max; |
1001
|
90
|
|
|
|
|
223
|
next FIELD; |
1002
|
|
|
|
|
|
|
} |
1003
|
|
|
|
|
|
|
} |
1004
|
486
|
50
|
|
|
|
1229
|
if ($$textref =~ /\G(.)/gcs) |
1005
|
|
|
|
|
|
|
{ |
1006
|
486
|
100
|
100
|
|
|
1200
|
$unkpos = pos($$textref)-1 |
1007
|
|
|
|
|
|
|
unless $igunk || defined $unkpos; |
1008
|
486
|
|
|
|
|
955
|
_update_patvalid($textref, substr $$textref, $unkpos, pos($$textref)-$unkpos); |
1009
|
|
|
|
|
|
|
} |
1010
|
|
|
|
|
|
|
} |
1011
|
|
|
|
|
|
|
|
1012
|
41
|
100
|
|
|
|
73
|
if (defined $unkpos) |
1013
|
|
|
|
|
|
|
{ |
1014
|
18
|
|
|
|
|
34
|
push @fields, substr($$textref, $unkpos); |
1015
|
18
|
100
|
|
|
|
26
|
$firstpos = $unkpos unless defined $firstpos; |
1016
|
18
|
|
|
|
|
21
|
$lastpos = length $$textref; |
1017
|
|
|
|
|
|
|
} |
1018
|
41
|
|
|
|
|
74
|
last; |
1019
|
|
|
|
|
|
|
} |
1020
|
|
|
|
|
|
|
|
1021
|
41
|
|
|
|
|
64
|
pos $$textref = $lastpos; |
1022
|
41
|
100
|
|
|
|
291
|
return @fields if wantarray; |
1023
|
|
|
|
|
|
|
|
1024
|
14
|
|
100
|
|
|
40
|
$firstpos ||= 0; |
1025
|
14
|
|
|
|
|
20
|
eval { substr($$textref,$firstpos,$lastpos-$firstpos)=""; |
|
14
|
|
|
|
|
26
|
|
1026
|
14
|
|
|
|
|
23
|
pos $$textref = $firstpos }; |
1027
|
14
|
|
|
|
|
78
|
return $fields[0]; |
1028
|
|
|
|
|
|
|
} |
1029
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
sub gen_extract_tagged # ($opentag, $closetag, $pre, \%options) |
1031
|
|
|
|
|
|
|
{ |
1032
|
13
|
|
|
13
|
1
|
11021
|
my $ldel = $_[0]; |
1033
|
13
|
|
|
|
|
19
|
my $rdel = $_[1]; |
1034
|
13
|
100
|
|
|
|
64
|
my $pre = defined $_[2] ? qr/\G$_[2]/ : qr/\G\s*/; |
1035
|
13
|
100
|
|
|
|
32
|
my %options = defined $_[3] ? %{$_[3]} : (); |
|
6
|
|
|
|
|
18
|
|
1036
|
13
|
100
|
|
|
|
30
|
my $omode = defined $options{fail} ? $options{fail} : ''; |
1037
|
3
|
|
|
|
|
8
|
my $bad = ref($options{reject}) eq 'ARRAY' ? join('|', @{$options{reject}}) |
1038
|
|
|
|
|
|
|
: defined($options{reject}) ? $options{reject} |
1039
|
13
|
50
|
|
|
|
37
|
: '' |
|
|
100
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
; |
1041
|
3
|
|
|
|
|
8
|
my $ignore = ref($options{ignore}) eq 'ARRAY' ? join('|', @{$options{ignore}}) |
1042
|
|
|
|
|
|
|
: defined($options{ignore}) ? $options{ignore} |
1043
|
13
|
50
|
|
|
|
30
|
: '' |
|
|
100
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
; |
1045
|
|
|
|
|
|
|
|
1046
|
13
|
100
|
|
|
|
28
|
$ldel = $et_default_ldel if !defined $ldel; |
1047
|
|
|
|
|
|
|
|
1048
|
13
|
|
|
|
|
21
|
my $posbug = pos; |
1049
|
13
|
100
|
|
|
|
21
|
for ($ldel, $bad, $ignore) { $_ = qr/$_/ if $_ } |
|
39
|
|
|
|
|
289
|
|
1050
|
13
|
|
|
|
|
29
|
pos = $posbug; |
1051
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
my $closure = sub |
1053
|
|
|
|
|
|
|
{ |
1054
|
40
|
50
|
|
40
|
|
28141
|
my $textref = defined $_[0] ? \$_[0] : \$_; |
1055
|
40
|
|
|
|
|
86
|
my @match = _match_tagged($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore); |
1056
|
|
|
|
|
|
|
|
1057
|
40
|
100
|
|
|
|
86
|
return _fail(wantarray, $textref) unless @match; |
1058
|
28
|
|
|
|
|
96
|
return _succeed wantarray, $textref, |
1059
|
|
|
|
|
|
|
$match[2], $match[3]+$match[5]+$match[7], # MATCH |
1060
|
|
|
|
|
|
|
@match[8..9,0..1,2..7]; # REM, PRE, BITS |
1061
|
13
|
|
|
|
|
56
|
}; |
1062
|
|
|
|
|
|
|
|
1063
|
13
|
|
|
|
|
95
|
bless $closure, 'Text::Balanced::Extractor'; |
1064
|
|
|
|
|
|
|
} |
1065
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
package Text::Balanced::Extractor; |
1067
|
|
|
|
|
|
|
|
1068
|
|
|
|
|
|
|
sub extract($$) # ($self, $text) |
1069
|
|
|
|
|
|
|
{ |
1070
|
0
|
|
|
0
|
|
|
&{$_[0]}($_[1]); |
|
0
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
} |
1072
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
package Text::Balanced::ErrorMsg; |
1074
|
|
|
|
|
|
|
|
1075
|
|
|
|
|
|
|
use overload |
1076
|
33
|
|
|
33
|
|
130
|
'""' => sub { "$_[0]->{error}, detected at offset $_[0]->{pos}" }, |
1077
|
9
|
|
|
9
|
|
8056
|
fallback => 1; |
|
9
|
|
|
|
|
6629
|
|
|
9
|
|
|
|
|
71
|
|
1078
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
1; |
1080
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
__END__ |