| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# Copyright (C) 1997-2001 Damian Conway. All rights reserved. |
|
2
|
|
|
|
|
|
|
# Copyright (C) 2009 Adam Kennedy. |
|
3
|
|
|
|
|
|
|
# Copyright (C) 2015, 2022 Steve Hay and other contributors. All rights |
|
4
|
|
|
|
|
|
|
# reserved. |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
# This module is free software; you can redistribute it and/or modify it under |
|
7
|
|
|
|
|
|
|
# the same terms as Perl itself, i.e. under the terms of either the GNU General |
|
8
|
|
|
|
|
|
|
# Public License or the Artistic License, as specified in the F file. |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
package Text::Balanced; |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# EXTRACT VARIOUSLY DELIMITED TEXT SEQUENCES FROM STRINGS. |
|
13
|
|
|
|
|
|
|
# FOR FULL DOCUMENTATION SEE Balanced.pod |
|
14
|
|
|
|
|
|
|
|
|
15
|
9
|
|
|
9
|
|
376866
|
use 5.008001; |
|
|
9
|
|
|
|
|
88
|
|
|
16
|
9
|
|
|
9
|
|
41
|
use strict; |
|
|
9
|
|
|
|
|
19
|
|
|
|
9
|
|
|
|
|
194
|
|
|
17
|
9
|
|
|
9
|
|
35
|
use Exporter (); |
|
|
9
|
|
|
|
|
12
|
|
|
|
9
|
|
|
|
|
158
|
|
|
18
|
|
|
|
|
|
|
|
|
19
|
9
|
|
|
9
|
|
41
|
use vars qw { $VERSION @ISA %EXPORT_TAGS }; |
|
|
9
|
|
|
|
|
30
|
|
|
|
9
|
|
|
|
|
806
|
|
|
20
|
|
|
|
|
|
|
BEGIN { |
|
21
|
9
|
|
|
9
|
|
26
|
$VERSION = '2.05_01'; |
|
22
|
9
|
|
|
|
|
115
|
@ISA = 'Exporter'; |
|
23
|
9
|
|
|
|
|
20819
|
%EXPORT_TAGS = ( |
|
24
|
|
|
|
|
|
|
ALL => [ qw{ |
|
25
|
|
|
|
|
|
|
&extract_delimited |
|
26
|
|
|
|
|
|
|
&extract_bracketed |
|
27
|
|
|
|
|
|
|
&extract_quotelike |
|
28
|
|
|
|
|
|
|
&extract_codeblock |
|
29
|
|
|
|
|
|
|
&extract_variable |
|
30
|
|
|
|
|
|
|
&extract_tagged |
|
31
|
|
|
|
|
|
|
&extract_multiple |
|
32
|
|
|
|
|
|
|
&gen_delimited_pat |
|
33
|
|
|
|
|
|
|
&gen_extract_tagged |
|
34
|
|
|
|
|
|
|
&delimited_pat |
|
35
|
|
|
|
|
|
|
} ], |
|
36
|
|
|
|
|
|
|
); |
|
37
|
|
|
|
|
|
|
} |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
Exporter::export_ok_tags('ALL'); |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
our $RE_PREREGEX_PAT = qr#( |
|
42
|
|
|
|
|
|
|
[!=]~ |
|
43
|
|
|
|
|
|
|
| split|grep|map |
|
44
|
|
|
|
|
|
|
| not|and|or|xor |
|
45
|
|
|
|
|
|
|
)#x; |
|
46
|
|
|
|
|
|
|
our $RE_EXPR_PAT = qr#( |
|
47
|
|
|
|
|
|
|
(?:\*\*|&&|\|\||<<|>>|//|[-+*x%^&|.])=? |
|
48
|
|
|
|
|
|
|
| /(?:[^/]) |
|
49
|
|
|
|
|
|
|
| =(?!>) |
|
50
|
|
|
|
|
|
|
| return |
|
51
|
|
|
|
|
|
|
| [\(\[] |
|
52
|
|
|
|
|
|
|
)#x; |
|
53
|
|
|
|
|
|
|
our $RE_NUM = qr/\s*[+\-.0-9][+\-.0-9e]*/i; # numerical constant |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
our %ref2slashvalid; # is quotelike /.../ pattern valid here for given textref? |
|
56
|
|
|
|
|
|
|
our %ref2qmarkvalid; # is quotelike ?...? pattern valid here for given textref? |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# HANDLE RETURN VALUES IN VARIOUS CONTEXTS |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
sub _failmsg { |
|
61
|
2697
|
|
|
2697
|
|
4433
|
my ($message, $pos) = @_; |
|
62
|
2697
|
|
|
|
|
6341
|
$@ = bless { |
|
63
|
|
|
|
|
|
|
error => $message, |
|
64
|
|
|
|
|
|
|
pos => $pos, |
|
65
|
|
|
|
|
|
|
}, 'Text::Balanced::ErrorMsg'; |
|
66
|
|
|
|
|
|
|
} |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sub _fail { |
|
69
|
941
|
|
|
941
|
|
1328
|
my ($wantarray, $textref, $message, $pos) = @_; |
|
70
|
941
|
100
|
|
|
|
1255
|
_failmsg $message, $pos if $message; |
|
71
|
941
|
100
|
|
|
|
2413
|
return (undef, $$textref, undef) if $wantarray; |
|
72
|
20
|
|
|
|
|
128
|
return; |
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
sub _succeed { |
|
76
|
511
|
|
|
511
|
|
676
|
$@ = undef; |
|
77
|
511
|
|
|
|
|
1105
|
my ($wantarray,$textref) = splice @_, 0, 2; |
|
78
|
511
|
100
|
|
|
|
1076
|
my ($extrapos, $extralen) = @_ > 18 |
|
79
|
|
|
|
|
|
|
? splice(@_, -2, 2) |
|
80
|
|
|
|
|
|
|
: (0, 0); |
|
81
|
511
|
|
|
|
|
853
|
my ($startlen, $oppos) = @_[5,6]; |
|
82
|
511
|
|
|
|
|
662
|
my $remainderpos = $_[2]; |
|
83
|
511
|
100
|
|
|
|
713
|
if ( $wantarray ) { |
|
84
|
294
|
|
|
|
|
316
|
my @res; |
|
85
|
294
|
|
|
|
|
661
|
while (my ($from, $len) = splice @_, 0, 2) { |
|
86
|
1455
|
|
|
|
|
3680
|
push @res, substr($$textref, $from, $len); |
|
87
|
|
|
|
|
|
|
} |
|
88
|
294
|
100
|
|
|
|
497
|
if ( $extralen ) { # CORRECT FILLET |
|
89
|
13
|
|
|
|
|
27
|
my $extra = substr($res[0], $extrapos-$oppos, $extralen, "\n"); |
|
90
|
13
|
|
|
|
|
23
|
$res[1] = "$extra$res[1]"; |
|
91
|
13
|
|
|
|
|
17
|
eval { substr($$textref,$remainderpos,0) = $extra; |
|
|
13
|
|
|
|
|
35
|
|
|
92
|
12
|
|
|
|
|
23
|
substr($$textref,$extrapos,$extralen,"\n")} ; |
|
93
|
|
|
|
|
|
|
#REARRANGE HERE DOC AND FILLET IF POSSIBLE |
|
94
|
13
|
|
|
|
|
27
|
pos($$textref) = $remainderpos-$extralen+1; # RESET \G |
|
95
|
|
|
|
|
|
|
} else { |
|
96
|
281
|
|
|
|
|
496
|
pos($$textref) = $remainderpos; # RESET \G |
|
97
|
|
|
|
|
|
|
} |
|
98
|
294
|
|
|
|
|
2432
|
return @res; |
|
99
|
|
|
|
|
|
|
} else { |
|
100
|
217
|
|
|
|
|
401
|
my $match = substr($$textref,$_[0],$_[1]); |
|
101
|
217
|
100
|
|
|
|
379
|
substr($match,$extrapos-$_[0]-$startlen,$extralen,"") if $extralen; |
|
102
|
217
|
100
|
|
|
|
372
|
my $extra = $extralen |
|
103
|
|
|
|
|
|
|
? substr($$textref, $extrapos, $extralen)."\n" : ""; |
|
104
|
217
|
|
|
|
|
263
|
eval {substr($$textref,$_[4],$_[1]+$_[5])=$extra} ; #CHOP OUT PREFIX & MATCH, IF POSSIBLE |
|
|
217
|
|
|
|
|
502
|
|
|
105
|
217
|
|
|
|
|
371
|
pos($$textref) = $_[4]; # RESET \G |
|
106
|
217
|
|
|
|
|
2104
|
return $match; |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
} |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# BUILD A PATTERN MATCHING A SIMPLE DELIMITED STRING |
|
111
|
|
|
|
|
|
|
## no critic (Subroutines::ProhibitSubroutinePrototypes) |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
sub gen_delimited_pat($;$) # ($delimiters;$escapes) |
|
114
|
|
|
|
|
|
|
{ |
|
115
|
59
|
|
|
59
|
1
|
95
|
my ($dels, $escs) = @_; |
|
116
|
59
|
50
|
|
|
|
178
|
return "" unless $dels =~ /\S/; |
|
117
|
59
|
100
|
|
|
|
100
|
$escs = '\\' unless $escs; |
|
118
|
59
|
|
|
|
|
141
|
$escs .= substr($escs,-1) x (length($dels)-length($escs)); |
|
119
|
59
|
|
|
|
|
71
|
my @pat = (); |
|
120
|
59
|
|
|
|
|
64
|
my $i; |
|
121
|
59
|
|
|
|
|
113
|
for ($i=0; $i
|
|
122
|
|
|
|
|
|
|
{ |
|
123
|
161
|
|
|
|
|
215
|
my $del = quotemeta substr($dels,$i,1); |
|
124
|
161
|
|
|
|
|
175
|
my $esc = quotemeta substr($escs,$i,1); |
|
125
|
161
|
100
|
|
|
|
208
|
if ($del eq $esc) |
|
126
|
|
|
|
|
|
|
{ |
|
127
|
24
|
|
|
|
|
58
|
push @pat, "$del(?:[^$del]*(?:(?:$del$del)[^$del]*)*)$del"; |
|
128
|
|
|
|
|
|
|
} |
|
129
|
|
|
|
|
|
|
else |
|
130
|
|
|
|
|
|
|
{ |
|
131
|
137
|
|
|
|
|
373
|
push @pat, "$del(?:[^$esc$del]*(?:$esc.[^$esc$del]*)*)$del"; |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
} |
|
134
|
59
|
|
|
|
|
123
|
my $pat = join '|', @pat; |
|
135
|
59
|
|
|
|
|
209
|
return "(?:$pat)"; |
|
136
|
|
|
|
|
|
|
} |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
*delimited_pat = \&gen_delimited_pat; |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# THE EXTRACTION FUNCTIONS |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
sub extract_delimited (;$$$$) |
|
143
|
|
|
|
|
|
|
{ |
|
144
|
50
|
50
|
|
50
|
1
|
34947
|
my $textref = defined $_[0] ? \$_[0] : \$_; |
|
145
|
50
|
100
|
|
|
|
169
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
|
146
|
50
|
|
|
|
|
67
|
my $wantarray = wantarray; |
|
147
|
50
|
100
|
|
|
|
76
|
my $del = defined $_[1] ? $_[1] : qq{\'\"\`}; |
|
148
|
50
|
100
|
|
|
|
57
|
my $pre = defined $_[2] ? $_[2] : '\s*'; |
|
149
|
50
|
100
|
|
|
|
59
|
my $esc = defined $_[3] ? $_[3] : qq{\\}; |
|
150
|
50
|
|
|
|
|
82
|
my $pat = gen_delimited_pat($del, $esc); |
|
151
|
50
|
|
100
|
|
|
133
|
my $startpos = pos $$textref || 0; |
|
152
|
50
|
100
|
|
|
|
582
|
return _fail($wantarray, $textref, "Not a delimited pattern", 0) |
|
153
|
|
|
|
|
|
|
unless $$textref =~ m/\G($pre)($pat)/gc; |
|
154
|
43
|
|
|
|
|
74
|
my $prelen = length($1); |
|
155
|
43
|
|
|
|
|
72
|
my $matchpos = $startpos+$prelen; |
|
156
|
43
|
|
|
|
|
53
|
my $endpos = pos $$textref; |
|
157
|
43
|
|
|
|
|
77
|
return _succeed $wantarray, $textref, |
|
158
|
|
|
|
|
|
|
$matchpos, $endpos-$matchpos, # MATCH |
|
159
|
|
|
|
|
|
|
$endpos, length($$textref)-$endpos, # REMAINDER |
|
160
|
|
|
|
|
|
|
$startpos, $prelen; # PREFIX |
|
161
|
|
|
|
|
|
|
} |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
my %eb_delim_cache; |
|
164
|
|
|
|
|
|
|
sub _eb_delims { |
|
165
|
51
|
|
|
51
|
|
61
|
my ($ldel_orig) = @_; |
|
166
|
51
|
100
|
|
|
|
76
|
return @{ $eb_delim_cache{$ldel_orig} } if $eb_delim_cache{$ldel_orig}; |
|
|
45
|
|
|
|
|
93
|
|
|
167
|
6
|
|
|
|
|
8
|
my $qdel = ""; |
|
168
|
6
|
|
|
|
|
7
|
my $quotelike; |
|
169
|
6
|
|
|
|
|
7
|
my $ldel = $ldel_orig; |
|
170
|
6
|
50
|
|
|
|
16
|
$ldel =~ s/'//g and $qdel .= q{'}; |
|
171
|
6
|
100
|
|
|
|
18
|
$ldel =~ s/"//g and $qdel .= q{"}; |
|
172
|
6
|
100
|
|
|
|
16
|
$ldel =~ s/`//g and $qdel .= q{`}; |
|
173
|
6
|
100
|
|
|
|
13
|
$ldel =~ s/q//g and $quotelike = 1; |
|
174
|
6
|
|
|
|
|
11
|
$ldel =~ tr/[](){}<>\0-\377/[[(({{<
|
|
175
|
6
|
|
|
|
|
8
|
my $rdel = $ldel; |
|
176
|
6
|
50
|
|
|
|
10
|
return @{ $eb_delim_cache{$ldel_orig} = [] } unless $rdel =~ tr/[({])}>/; |
|
|
0
|
|
|
|
|
0
|
|
|
177
|
6
|
|
|
|
|
8
|
my $posbug = pos; |
|
178
|
6
|
|
|
|
|
19
|
$ldel = join('|', map { quotemeta $_ } split('', $ldel)); |
|
|
12
|
|
|
|
|
26
|
|
|
179
|
6
|
|
|
|
|
15
|
$rdel = join('|', map { quotemeta $_ } split('', $rdel)); |
|
|
12
|
|
|
|
|
17
|
|
|
180
|
6
|
|
|
|
|
14
|
pos = $posbug; |
|
181
|
6
|
|
66
|
|
|
8
|
@{ $eb_delim_cache{$ldel_orig} = [ |
|
|
6
|
|
|
|
|
170
|
|
|
182
|
|
|
|
|
|
|
qr/\G($ldel)/, $qdel && qr/\G([$qdel])/, $quotelike, qr/\G($rdel)/ |
|
183
|
|
|
|
|
|
|
] }; |
|
184
|
|
|
|
|
|
|
} |
|
185
|
|
|
|
|
|
|
sub extract_bracketed (;$$$) |
|
186
|
|
|
|
|
|
|
{ |
|
187
|
51
|
50
|
|
51
|
1
|
8064
|
my $textref = defined $_[0] ? \$_[0] : \$_; |
|
188
|
51
|
100
|
|
|
|
113
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
|
189
|
51
|
100
|
|
|
|
71
|
my $ldel = defined $_[1] ? $_[1] : '{([<'; |
|
190
|
51
|
50
|
|
|
|
111
|
my $pre = defined $_[2] ? qr/\G$_[2]/ : qr/\G\s*/; |
|
191
|
51
|
|
|
|
|
69
|
my $wantarray = wantarray; |
|
192
|
51
|
|
|
|
|
69
|
my @ret = _eb_delims($ldel); |
|
193
|
51
|
50
|
|
|
|
76
|
unless (@ret) |
|
194
|
|
|
|
|
|
|
{ |
|
195
|
0
|
|
|
|
|
0
|
return _fail $wantarray, $textref, |
|
196
|
|
|
|
|
|
|
"Did not find a suitable bracket in delimiter: \"$_[1]\"", |
|
197
|
|
|
|
|
|
|
0; |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
|
|
200
|
51
|
|
100
|
|
|
98
|
my $startpos = pos $$textref || 0; |
|
201
|
51
|
|
|
|
|
73
|
my @match = _match_bracketed($textref, $pre, @ret); |
|
202
|
|
|
|
|
|
|
|
|
203
|
51
|
100
|
|
|
|
78
|
return _fail ($wantarray, $textref) unless @match; |
|
204
|
|
|
|
|
|
|
|
|
205
|
18
|
|
|
|
|
49
|
return _succeed ( $wantarray, $textref, |
|
206
|
|
|
|
|
|
|
$match[2], $match[5]+2, # MATCH |
|
207
|
|
|
|
|
|
|
@match[8,9], # REMAINDER |
|
208
|
|
|
|
|
|
|
@match[0,1], # PREFIX |
|
209
|
|
|
|
|
|
|
); |
|
210
|
|
|
|
|
|
|
} |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
sub _match_bracketed # $textref, $pre, $ldel, $qdel, $quotelike, $rdel |
|
213
|
|
|
|
|
|
|
{ |
|
214
|
93
|
|
|
93
|
|
154
|
my ($textref, $pre, $ldel, $qdel, $quotelike, $rdel) = @_; |
|
215
|
93
|
|
100
|
|
|
266
|
my ($startpos, $ldelpos, $endpos) = (pos $$textref = pos $$textref||0); |
|
216
|
93
|
50
|
|
|
|
329
|
unless ($$textref =~ m/$pre/gc) |
|
217
|
|
|
|
|
|
|
{ |
|
218
|
0
|
|
|
|
|
0
|
_failmsg "Did not find prefix: /$pre/", $startpos; |
|
219
|
0
|
|
|
|
|
0
|
return; |
|
220
|
|
|
|
|
|
|
} |
|
221
|
|
|
|
|
|
|
|
|
222
|
93
|
|
|
|
|
122
|
$ldelpos = pos $$textref; |
|
223
|
|
|
|
|
|
|
|
|
224
|
93
|
100
|
|
|
|
284
|
unless ($$textref =~ m/$ldel/gc) |
|
225
|
|
|
|
|
|
|
{ |
|
226
|
31
|
|
|
|
|
82
|
_failmsg "Did not find opening bracket after prefix: \"$pre\"", |
|
227
|
|
|
|
|
|
|
pos $$textref; |
|
228
|
31
|
|
|
|
|
61
|
pos $$textref = $startpos; |
|
229
|
31
|
|
|
|
|
49
|
return; |
|
230
|
|
|
|
|
|
|
} |
|
231
|
|
|
|
|
|
|
|
|
232
|
62
|
|
|
|
|
133
|
my @nesting = ( $1 ); |
|
233
|
62
|
|
|
|
|
83
|
my $textlen = length $$textref; |
|
234
|
62
|
|
|
|
|
118
|
while (pos $$textref < $textlen) |
|
235
|
|
|
|
|
|
|
{ |
|
236
|
636
|
100
|
|
|
|
911
|
next if $$textref =~ m/\G\\./gcs; |
|
237
|
|
|
|
|
|
|
|
|
238
|
628
|
100
|
100
|
|
|
2307
|
if ($$textref =~ m/$ldel/gc) |
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
{ |
|
240
|
18
|
|
|
|
|
41
|
push @nesting, $1; |
|
241
|
|
|
|
|
|
|
} |
|
242
|
|
|
|
|
|
|
elsif ($$textref =~ m/$rdel/gc) |
|
243
|
|
|
|
|
|
|
{ |
|
244
|
78
|
|
|
|
|
155
|
my ($found, $brackettype) = ($1, $1); |
|
245
|
78
|
50
|
|
|
|
145
|
if ($#nesting < 0) |
|
246
|
|
|
|
|
|
|
{ |
|
247
|
0
|
|
|
|
|
0
|
_failmsg "Unmatched closing bracket: \"$found\"", |
|
248
|
|
|
|
|
|
|
pos $$textref; |
|
249
|
0
|
|
|
|
|
0
|
pos $$textref = $startpos; |
|
250
|
0
|
|
|
|
|
0
|
return; |
|
251
|
|
|
|
|
|
|
} |
|
252
|
78
|
|
|
|
|
103
|
my $expected = pop(@nesting); |
|
253
|
78
|
|
|
|
|
111
|
$expected =~ tr/({[)}]>/; |
|
254
|
78
|
50
|
|
|
|
129
|
if ($expected ne $brackettype) |
|
255
|
|
|
|
|
|
|
{ |
|
256
|
0
|
|
|
|
|
0
|
_failmsg qq{Mismatched closing bracket: expected "$expected" but found "$found"}, |
|
257
|
|
|
|
|
|
|
pos $$textref; |
|
258
|
0
|
|
|
|
|
0
|
pos $$textref = $startpos; |
|
259
|
0
|
|
|
|
|
0
|
return; |
|
260
|
|
|
|
|
|
|
} |
|
261
|
78
|
100
|
|
|
|
169
|
last if $#nesting < 0; |
|
262
|
|
|
|
|
|
|
} |
|
263
|
|
|
|
|
|
|
elsif ($qdel && $$textref =~ m/$qdel/gc) |
|
264
|
|
|
|
|
|
|
{ |
|
265
|
10
|
50
|
|
|
|
181
|
$$textref =~ m/\G[^\\$1]*(?:\\.[^\\$1]*)*(\Q$1\E)/gsc and next; |
|
266
|
0
|
|
|
|
|
0
|
_failmsg "Unmatched embedded quote ($1)", |
|
267
|
|
|
|
|
|
|
pos $$textref; |
|
268
|
0
|
|
|
|
|
0
|
pos $$textref = $startpos; |
|
269
|
0
|
|
|
|
|
0
|
return; |
|
270
|
|
|
|
|
|
|
} |
|
271
|
|
|
|
|
|
|
elsif ($quotelike && _match_quotelike($textref,qr/\G()/,$ref2slashvalid{$textref},$ref2qmarkvalid{$textref})) |
|
272
|
|
|
|
|
|
|
{ |
|
273
|
4
|
|
|
|
|
8
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 1; # back-compat |
|
274
|
4
|
|
|
|
|
9
|
next; |
|
275
|
|
|
|
|
|
|
} |
|
276
|
|
|
|
|
|
|
|
|
277
|
518
|
|
|
|
|
1351
|
else { $$textref =~ m/\G(?:[a-zA-Z0-9]+|.)/gcs } |
|
278
|
|
|
|
|
|
|
} |
|
279
|
62
|
100
|
|
|
|
94
|
if ($#nesting>=0) |
|
280
|
|
|
|
|
|
|
{ |
|
281
|
2
|
|
|
|
|
8
|
_failmsg "Unmatched opening bracket(s): " |
|
282
|
|
|
|
|
|
|
. join("..",@nesting)."..", |
|
283
|
|
|
|
|
|
|
pos $$textref; |
|
284
|
2
|
|
|
|
|
3
|
pos $$textref = $startpos; |
|
285
|
2
|
|
|
|
|
5
|
return; |
|
286
|
|
|
|
|
|
|
} |
|
287
|
|
|
|
|
|
|
|
|
288
|
60
|
|
|
|
|
74
|
$endpos = pos $$textref; |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
return ( |
|
291
|
60
|
|
|
|
|
197
|
$startpos, $ldelpos-$startpos, # PREFIX |
|
292
|
|
|
|
|
|
|
$ldelpos, 1, # OPENING BRACKET |
|
293
|
|
|
|
|
|
|
$ldelpos+1, $endpos-$ldelpos-2, # CONTENTS |
|
294
|
|
|
|
|
|
|
$endpos-1, 1, # CLOSING BRACKET |
|
295
|
|
|
|
|
|
|
$endpos, length($$textref)-$endpos, # REMAINDER |
|
296
|
|
|
|
|
|
|
); |
|
297
|
|
|
|
|
|
|
} |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
sub _revbracket($) |
|
300
|
|
|
|
|
|
|
{ |
|
301
|
70
|
|
|
70
|
|
118
|
my $brack = reverse $_[0]; |
|
302
|
70
|
|
|
|
|
105
|
$brack =~ tr/[({])}>/; |
|
303
|
70
|
|
|
|
|
231
|
return $brack; |
|
304
|
|
|
|
|
|
|
} |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
my $XMLNAME = q{[a-zA-Z_:][a-zA-Z0-9_:.-]*}; |
|
307
|
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
my $et_default_ldel = '<\w+(?:' . gen_delimited_pat(q{'"}) . '|[^>])*>'; |
|
309
|
|
|
|
|
|
|
sub extract_tagged (;$$$$$) # ($text, $opentag, $closetag, $pre, \%options) |
|
310
|
|
|
|
|
|
|
{ |
|
311
|
48
|
50
|
|
48
|
1
|
47425
|
my $textref = defined $_[0] ? \$_[0] : \$_; |
|
312
|
48
|
50
|
|
|
|
170
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
|
313
|
48
|
|
|
|
|
74
|
my $ldel = $_[1]; |
|
314
|
48
|
|
|
|
|
62
|
my $rdel = $_[2]; |
|
315
|
48
|
100
|
|
|
|
142
|
my $pre = defined $_[3] ? qr/\G$_[3]/ : qr/\G\s*/; |
|
316
|
48
|
100
|
|
|
|
87
|
my %options = defined $_[4] ? %{$_[4]} : (); |
|
|
16
|
|
|
|
|
49
|
|
|
317
|
48
|
100
|
|
|
|
89
|
my $omode = defined $options{fail} ? $options{fail} : ''; |
|
318
|
6
|
|
|
|
|
14
|
my $bad = ref($options{reject}) eq 'ARRAY' ? join('|', @{$options{reject}}) |
|
319
|
|
|
|
|
|
|
: defined($options{reject}) ? $options{reject} |
|
320
|
48
|
50
|
|
|
|
107
|
: '' |
|
|
|
100
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
; |
|
322
|
10
|
|
|
|
|
20
|
my $ignore = ref($options{ignore}) eq 'ARRAY' ? join('|', @{$options{ignore}}) |
|
323
|
|
|
|
|
|
|
: defined($options{ignore}) ? $options{ignore} |
|
324
|
48
|
50
|
|
|
|
90
|
: '' |
|
|
|
100
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
; |
|
326
|
|
|
|
|
|
|
|
|
327
|
48
|
100
|
|
|
|
88
|
$ldel = $et_default_ldel if !defined $ldel; |
|
328
|
48
|
|
|
|
|
50
|
$@ = undef; |
|
329
|
|
|
|
|
|
|
|
|
330
|
48
|
|
|
|
|
88
|
my @match = _match_tagged($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore); |
|
331
|
|
|
|
|
|
|
|
|
332
|
48
|
100
|
|
|
|
106
|
return _fail(wantarray, $textref) unless @match; |
|
333
|
34
|
|
|
|
|
113
|
return _succeed wantarray, $textref, |
|
334
|
|
|
|
|
|
|
$match[2], $match[3]+$match[5]+$match[7], # MATCH |
|
335
|
|
|
|
|
|
|
@match[8..9,0..1,2..7]; # REM, PRE, BITS |
|
336
|
|
|
|
|
|
|
} |
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
sub _match_tagged # ($$$$$$$) |
|
339
|
|
|
|
|
|
|
{ |
|
340
|
120
|
|
|
120
|
|
266
|
my ($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore) = @_; |
|
341
|
120
|
|
|
|
|
131
|
my $rdelspec; |
|
342
|
|
|
|
|
|
|
|
|
343
|
120
|
|
100
|
|
|
555
|
my ($startpos, $opentagpos, $textpos, $parapos, $closetagpos, $endpos) = ( pos($$textref) = pos($$textref)||0 ); |
|
344
|
|
|
|
|
|
|
|
|
345
|
120
|
50
|
|
|
|
568
|
unless ($$textref =~ m/$pre/gc) |
|
346
|
|
|
|
|
|
|
{ |
|
347
|
0
|
|
|
|
|
0
|
_failmsg "Did not find prefix: /$pre/", pos $$textref; |
|
348
|
0
|
|
|
|
|
0
|
goto failed; |
|
349
|
|
|
|
|
|
|
} |
|
350
|
|
|
|
|
|
|
|
|
351
|
120
|
|
|
|
|
167
|
$opentagpos = pos($$textref); |
|
352
|
|
|
|
|
|
|
|
|
353
|
120
|
100
|
|
|
|
877
|
unless ($$textref =~ m/\G$ldel/gc) |
|
354
|
|
|
|
|
|
|
{ |
|
355
|
8
|
|
|
|
|
32
|
_failmsg "Did not find opening tag: /$ldel/", pos $$textref; |
|
356
|
8
|
|
|
|
|
28
|
goto failed; |
|
357
|
|
|
|
|
|
|
} |
|
358
|
|
|
|
|
|
|
|
|
359
|
112
|
|
|
|
|
182
|
$textpos = pos($$textref); |
|
360
|
|
|
|
|
|
|
|
|
361
|
112
|
100
|
|
|
|
171
|
if (!defined $rdel) |
|
362
|
|
|
|
|
|
|
{ |
|
363
|
70
|
|
|
|
|
235
|
$rdelspec = substr($$textref, $-[0], $+[0] - $-[0]); |
|
364
|
70
|
50
|
|
|
|
307
|
unless ($rdelspec =~ s/\A([[(<{]+)($XMLNAME).*/ quotemeta "$1\/$2". _revbracket($1) /oes) |
|
|
70
|
|
|
|
|
194
|
|
|
365
|
|
|
|
|
|
|
{ |
|
366
|
0
|
|
|
|
|
0
|
_failmsg "Unable to construct closing tag to match: $rdel", |
|
367
|
|
|
|
|
|
|
pos $$textref; |
|
368
|
0
|
|
|
|
|
0
|
goto failed; |
|
369
|
|
|
|
|
|
|
} |
|
370
|
|
|
|
|
|
|
} |
|
371
|
|
|
|
|
|
|
else |
|
372
|
|
|
|
|
|
|
{ |
|
373
|
|
|
|
|
|
|
## no critic (BuiltinFunctions::ProhibitStringyEval) |
|
374
|
42
|
|
66
|
|
|
1939
|
$rdelspec = eval "qq{$rdel}" || do { |
|
375
|
|
|
|
|
|
|
my $del; |
|
376
|
|
|
|
|
|
|
for (qw,~ ! ^ & * ) _ + - = } ] : " ; ' > . ? / | ',) |
|
377
|
|
|
|
|
|
|
{ next if $rdel =~ /\Q$_/; $del = $_; last } |
|
378
|
|
|
|
|
|
|
unless ($del) { |
|
379
|
9
|
|
|
9
|
|
81
|
use Carp; |
|
|
9
|
|
|
|
|
14
|
|
|
|
9
|
|
|
|
|
31794
|
|
|
380
|
|
|
|
|
|
|
croak "Can't interpolate right delimiter $rdel" |
|
381
|
|
|
|
|
|
|
} |
|
382
|
|
|
|
|
|
|
eval "qq$del$rdel$del"; |
|
383
|
|
|
|
|
|
|
}; |
|
384
|
|
|
|
|
|
|
} |
|
385
|
|
|
|
|
|
|
|
|
386
|
112
|
|
|
|
|
326
|
while (pos($$textref) < length($$textref)) |
|
387
|
|
|
|
|
|
|
{ |
|
388
|
1240
|
50
|
|
|
|
1873
|
next if $$textref =~ m/\G\\./gc; |
|
389
|
|
|
|
|
|
|
|
|
390
|
1240
|
50
|
100
|
|
|
6316
|
if ($$textref =~ m/\G(\n[ \t]*\n)/gc ) |
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
{ |
|
392
|
0
|
0
|
|
|
|
0
|
$parapos = pos($$textref) - length($1) |
|
393
|
|
|
|
|
|
|
unless defined $parapos; |
|
394
|
|
|
|
|
|
|
} |
|
395
|
|
|
|
|
|
|
elsif ($$textref =~ m/\G($rdelspec)/gc ) |
|
396
|
|
|
|
|
|
|
{ |
|
397
|
82
|
|
|
|
|
141
|
$closetagpos = pos($$textref)-length($1); |
|
398
|
82
|
|
|
|
|
613
|
goto matched; |
|
399
|
|
|
|
|
|
|
} |
|
400
|
|
|
|
|
|
|
elsif ($ignore && $$textref =~ m/\G(?:$ignore)/gc) |
|
401
|
|
|
|
|
|
|
{ |
|
402
|
12
|
|
|
|
|
27
|
next; |
|
403
|
|
|
|
|
|
|
} |
|
404
|
|
|
|
|
|
|
elsif ($bad && $$textref =~ m/\G($bad)/gcs) |
|
405
|
|
|
|
|
|
|
{ |
|
406
|
12
|
|
|
|
|
37
|
pos($$textref) -= length($1); # CUT OFF WHATEVER CAUSED THE SHORTNESS |
|
407
|
12
|
100
|
100
|
|
|
95
|
goto short if ($omode eq 'PARA' || $omode eq 'MAX'); |
|
408
|
4
|
|
|
|
|
17
|
_failmsg "Found invalid nested tag: $1", pos $$textref; |
|
409
|
4
|
|
|
|
|
37
|
goto failed; |
|
410
|
|
|
|
|
|
|
} |
|
411
|
|
|
|
|
|
|
elsif ($$textref =~ m/\G($ldel)/gc) |
|
412
|
|
|
|
|
|
|
{ |
|
413
|
32
|
|
|
|
|
57
|
my $tag = $1; |
|
414
|
32
|
|
|
|
|
67
|
pos($$textref) -= length($tag); # REWIND TO NESTED TAG |
|
415
|
32
|
100
|
|
|
|
78
|
unless (_match_tagged(@_)) # MATCH NESTED TAG |
|
416
|
|
|
|
|
|
|
{ |
|
417
|
4
|
50
|
33
|
|
|
18
|
goto short if $omode eq 'PARA' || $omode eq 'MAX'; |
|
418
|
4
|
|
|
|
|
36
|
_failmsg "Found unbalanced nested tag: $tag", |
|
419
|
|
|
|
|
|
|
pos $$textref; |
|
420
|
4
|
|
|
|
|
43
|
goto failed; |
|
421
|
|
|
|
|
|
|
} |
|
422
|
|
|
|
|
|
|
} |
|
423
|
1102
|
|
|
|
|
2336
|
else { $$textref =~ m/./gcs } |
|
424
|
|
|
|
|
|
|
} |
|
425
|
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
short: |
|
427
|
22
|
|
|
|
|
29
|
$closetagpos = pos($$textref); |
|
428
|
22
|
100
|
|
|
|
48
|
goto matched if $omode eq 'MAX'; |
|
429
|
18
|
100
|
|
|
|
46
|
goto failed unless $omode eq 'PARA'; |
|
430
|
|
|
|
|
|
|
|
|
431
|
4
|
50
|
|
|
|
8
|
if (defined $parapos) { pos($$textref) = $parapos } |
|
|
0
|
|
|
|
|
0
|
|
|
432
|
4
|
|
|
|
|
6
|
else { $parapos = pos($$textref) } |
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
return ( |
|
435
|
4
|
|
|
|
|
17
|
$startpos, $opentagpos-$startpos, # PREFIX |
|
436
|
|
|
|
|
|
|
$opentagpos, $textpos-$opentagpos, # OPENING TAG |
|
437
|
|
|
|
|
|
|
$textpos, $parapos-$textpos, # TEXT |
|
438
|
|
|
|
|
|
|
$parapos, 0, # NO CLOSING TAG |
|
439
|
|
|
|
|
|
|
$parapos, length($$textref)-$parapos, # REMAINDER |
|
440
|
|
|
|
|
|
|
); |
|
441
|
|
|
|
|
|
|
|
|
442
|
86
|
|
|
|
|
321
|
matched: |
|
443
|
|
|
|
|
|
|
$endpos = pos($$textref); |
|
444
|
|
|
|
|
|
|
return ( |
|
445
|
86
|
|
|
|
|
302
|
$startpos, $opentagpos-$startpos, # PREFIX |
|
446
|
|
|
|
|
|
|
$opentagpos, $textpos-$opentagpos, # OPENING TAG |
|
447
|
|
|
|
|
|
|
$textpos, $closetagpos-$textpos, # TEXT |
|
448
|
|
|
|
|
|
|
$closetagpos, $endpos-$closetagpos, # CLOSING TAG |
|
449
|
|
|
|
|
|
|
$endpos, length($$textref)-$endpos, # REMAINDER |
|
450
|
|
|
|
|
|
|
); |
|
451
|
|
|
|
|
|
|
|
|
452
|
30
|
100
|
|
|
|
135
|
failed: |
|
453
|
|
|
|
|
|
|
_failmsg "Did not find closing tag", pos $$textref unless $@; |
|
454
|
30
|
|
|
|
|
56
|
pos($$textref) = $startpos; |
|
455
|
30
|
|
|
|
|
66
|
return; |
|
456
|
|
|
|
|
|
|
} |
|
457
|
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
sub extract_variable (;$$) |
|
459
|
|
|
|
|
|
|
{ |
|
460
|
625
|
50
|
|
625
|
1
|
172799
|
my $textref = defined $_[0] ? \$_[0] : \$_; |
|
461
|
625
|
50
|
|
|
|
1155
|
return ("","","") unless defined $$textref; |
|
462
|
625
|
100
|
|
|
|
1175
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
|
463
|
625
|
100
|
|
|
|
1504
|
my $pre = defined $_[1] ? qr/\G$_[1]/ : qr/\G\s*/; |
|
464
|
|
|
|
|
|
|
|
|
465
|
625
|
|
|
|
|
986
|
my @match = _match_variable($textref,$pre); |
|
466
|
|
|
|
|
|
|
|
|
467
|
625
|
100
|
|
|
|
1043
|
return _fail wantarray, $textref unless @match; |
|
468
|
|
|
|
|
|
|
|
|
469
|
234
|
|
|
|
|
653
|
return _succeed wantarray, $textref, |
|
470
|
|
|
|
|
|
|
@match[2..3,4..5,0..1]; # MATCH, REMAINDER, PREFIX |
|
471
|
|
|
|
|
|
|
} |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
sub _match_variable |
|
474
|
|
|
|
|
|
|
{ |
|
475
|
|
|
|
|
|
|
# $# |
|
476
|
|
|
|
|
|
|
# $^ |
|
477
|
|
|
|
|
|
|
# $$ |
|
478
|
1345
|
|
|
1345
|
|
1793
|
my ($textref, $pre) = @_; |
|
479
|
1345
|
|
100
|
|
|
3236
|
my $startpos = pos($$textref) = pos($$textref)||0; |
|
480
|
1345
|
100
|
|
|
|
5866
|
unless ($$textref =~ m/$pre/gc) |
|
481
|
|
|
|
|
|
|
{ |
|
482
|
341
|
|
|
|
|
1102
|
_failmsg "Did not find prefix: /$pre/", pos $$textref; |
|
483
|
341
|
|
|
|
|
814
|
return; |
|
484
|
|
|
|
|
|
|
} |
|
485
|
1004
|
|
|
|
|
1244
|
my $varpos = pos($$textref); |
|
486
|
1004
|
100
|
|
|
|
2049
|
unless ($$textref =~ m{\G\$\s*(?!::)(\d+|[][&`'+*./|,";%=~:?!\@<>()-]|\^[a-z]?)}gci) |
|
487
|
|
|
|
|
|
|
{ |
|
488
|
912
|
100
|
|
|
|
2152
|
unless ($$textref =~ m/\G((\$#?|[*\@\%]|\\&)+)/gc) |
|
489
|
|
|
|
|
|
|
{ |
|
490
|
659
|
|
|
|
|
1226
|
_failmsg "Did not find leading dereferencer", pos $$textref; |
|
491
|
659
|
|
|
|
|
1244
|
pos $$textref = $startpos; |
|
492
|
659
|
|
|
|
|
1595
|
return; |
|
493
|
|
|
|
|
|
|
} |
|
494
|
253
|
|
|
|
|
466
|
my $deref = $1; |
|
495
|
|
|
|
|
|
|
|
|
496
|
253
|
50
|
100
|
|
|
1039
|
unless ($$textref =~ m/\G\s*(?:::|')?(?:[_a-z]\w*(?:::|'))*[_a-z]\w*/gci |
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
497
|
|
|
|
|
|
|
or _match_codeblock($textref, qr/\G()/, '\{', qr/\G\s*(\})/, '\{', '\}', 0, 1) |
|
498
|
|
|
|
|
|
|
or $deref eq '$#' or $deref eq '$$' |
|
499
|
|
|
|
|
|
|
or pos($$textref) == length $$textref ) |
|
500
|
|
|
|
|
|
|
{ |
|
501
|
0
|
|
|
|
|
0
|
_failmsg "Bad identifier after dereferencer", pos $$textref; |
|
502
|
0
|
|
|
|
|
0
|
pos $$textref = $startpos; |
|
503
|
0
|
|
|
|
|
0
|
return; |
|
504
|
|
|
|
|
|
|
} |
|
505
|
|
|
|
|
|
|
} |
|
506
|
|
|
|
|
|
|
|
|
507
|
345
|
|
|
|
|
429
|
while (1) |
|
508
|
|
|
|
|
|
|
{ |
|
509
|
423
|
100
|
|
|
|
960
|
next if $$textref =~ m/\G\s*(?:->)?\s*[{]\w+[}]/gc; |
|
510
|
396
|
100
|
|
|
|
1857
|
next if _match_codeblock($textref, |
|
511
|
|
|
|
|
|
|
qr/\G\s*->\s*(?:[_a-zA-Z]\w+\s*)?/, |
|
512
|
|
|
|
|
|
|
qr/[({[]/, qr/\G\s*([)}\]])/, |
|
513
|
|
|
|
|
|
|
qr/[({[]/, qr/[)}\]]/, 0, 1); |
|
514
|
373
|
100
|
|
|
|
1793
|
next if _match_codeblock($textref, |
|
515
|
|
|
|
|
|
|
qr/\G\s*/, qr/[{[]/, qr/\G\s*([}\]])/, |
|
516
|
|
|
|
|
|
|
qr/[{[]/, qr/[}\]]/, 0, 1); |
|
517
|
357
|
50
|
|
|
|
1267
|
next if _match_variable($textref,qr/\G\s*->\s*/); |
|
518
|
357
|
100
|
|
|
|
824
|
next if $$textref =~ m/\G\s*->\s*\w+(?![{([])/gc; |
|
519
|
345
|
|
|
|
|
446
|
last; |
|
520
|
|
|
|
|
|
|
} |
|
521
|
345
|
|
|
|
|
639
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 0; |
|
522
|
|
|
|
|
|
|
|
|
523
|
345
|
|
|
|
|
409
|
my $endpos = pos($$textref); |
|
524
|
345
|
|
|
|
|
860
|
return ($startpos, $varpos-$startpos, |
|
525
|
|
|
|
|
|
|
$varpos, $endpos-$varpos, |
|
526
|
|
|
|
|
|
|
$endpos, length($$textref)-$endpos |
|
527
|
|
|
|
|
|
|
); |
|
528
|
|
|
|
|
|
|
} |
|
529
|
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
my %ec_delim_cache; |
|
531
|
|
|
|
|
|
|
sub _ec_delims { |
|
532
|
96
|
|
|
96
|
|
135
|
my ($ldel_inner, $ldel_outer) = @_; |
|
533
|
0
|
|
|
|
|
0
|
return @{ $ec_delim_cache{$ldel_outer}{$ldel_inner} } |
|
534
|
96
|
50
|
|
|
|
180
|
if $ec_delim_cache{$ldel_outer}{$ldel_inner}; |
|
535
|
96
|
|
|
|
|
101
|
my $rdel_inner = $ldel_inner; |
|
536
|
96
|
|
|
|
|
94
|
my $rdel_outer = $ldel_outer; |
|
537
|
96
|
|
|
|
|
97
|
my $posbug = pos; |
|
538
|
96
|
|
|
|
|
132
|
for ($ldel_inner, $ldel_outer) { tr/[]()<>{}\0-\377/[[((<<{{/ds } |
|
|
192
|
|
|
|
|
285
|
|
|
539
|
96
|
|
|
|
|
117
|
for ($rdel_inner, $rdel_outer) { tr/[]()<>{}\0-\377/]]))>>}}/ds } |
|
|
192
|
|
|
|
|
210
|
|
|
540
|
96
|
|
|
|
|
131
|
for ($ldel_inner, $ldel_outer, $rdel_inner, $rdel_outer) |
|
541
|
|
|
|
|
|
|
{ |
|
542
|
384
|
|
|
|
|
535
|
$_ = '('.join('|',map { quotemeta $_ } split('',$_)).')' |
|
|
400
|
|
|
|
|
769
|
|
|
543
|
|
|
|
|
|
|
} |
|
544
|
96
|
|
|
|
|
163
|
pos = $posbug; |
|
545
|
96
|
|
|
|
|
114
|
@{ $ec_delim_cache{$ldel_outer}{$ldel_inner} = [ |
|
|
96
|
|
|
|
|
680
|
|
|
546
|
|
|
|
|
|
|
$ldel_outer, qr/\G\s*($rdel_outer)/, $ldel_inner, $rdel_inner |
|
547
|
|
|
|
|
|
|
] }; |
|
548
|
|
|
|
|
|
|
} |
|
549
|
|
|
|
|
|
|
sub extract_codeblock (;$$$$$) |
|
550
|
|
|
|
|
|
|
{ |
|
551
|
96
|
50
|
|
96
|
1
|
40910
|
my $textref = defined $_[0] ? \$_[0] : \$_; |
|
552
|
96
|
100
|
|
|
|
237
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
|
553
|
96
|
|
|
|
|
113
|
my $wantarray = wantarray; |
|
554
|
96
|
100
|
|
|
|
147
|
my $ldel_inner = defined $_[1] ? $_[1] : '{'; |
|
555
|
96
|
100
|
|
|
|
236
|
my $pre = !defined $_[2] ? qr/\G\s*/ : qr/\G$_[2]/; |
|
556
|
96
|
100
|
|
|
|
172
|
my $ldel_outer = defined $_[3] ? $_[3] : $ldel_inner; |
|
557
|
96
|
|
|
|
|
105
|
my $rd = $_[4]; |
|
558
|
96
|
|
|
|
|
151
|
my @delims = _ec_delims($ldel_inner, $ldel_outer); |
|
559
|
|
|
|
|
|
|
|
|
560
|
96
|
|
|
|
|
185
|
my @match = _match_codeblock($textref, $pre, @delims, $rd, 1); |
|
561
|
96
|
100
|
|
|
|
170
|
return _fail($wantarray, $textref) unless @match; |
|
562
|
51
|
|
|
|
|
117
|
return _succeed($wantarray, $textref, |
|
563
|
|
|
|
|
|
|
@match[2..3,4..5,0..1] # MATCH, REMAINDER, PREFIX |
|
564
|
|
|
|
|
|
|
); |
|
565
|
|
|
|
|
|
|
} |
|
566
|
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
sub _match_codeblock |
|
568
|
|
|
|
|
|
|
{ |
|
569
|
1044
|
|
|
1044
|
|
2577
|
my ($textref, $pre, $ldel_outer, $rdel_outer, $ldel_inner, $rdel_inner, $rd, $no_backcompat) = @_; |
|
570
|
1044
|
100
|
|
|
|
1531
|
$rdel_outer = qr/\G\s*($rdel_outer)/ if !$no_backcompat; # Switch calls this func directly |
|
571
|
1044
|
|
100
|
|
|
2448
|
my $startpos = pos($$textref) = pos($$textref) || 0; |
|
572
|
1044
|
100
|
|
|
|
3572
|
unless ($$textref =~ m/$pre/gc) |
|
573
|
|
|
|
|
|
|
{ |
|
574
|
357
|
|
|
|
|
1453
|
_failmsg qq{Did not match prefix /$pre/ at"} . |
|
575
|
|
|
|
|
|
|
substr($$textref,pos($$textref),20) . |
|
576
|
|
|
|
|
|
|
q{..."}, |
|
577
|
|
|
|
|
|
|
pos $$textref; |
|
578
|
357
|
|
|
|
|
957
|
return; |
|
579
|
|
|
|
|
|
|
} |
|
580
|
687
|
|
|
|
|
915
|
my $codepos = pos($$textref); |
|
581
|
687
|
100
|
|
|
|
4555
|
unless ($$textref =~ m/\G($ldel_outer)/gc) # OUTERMOST DELIMITER |
|
582
|
|
|
|
|
|
|
{ |
|
583
|
552
|
|
|
|
|
1740
|
_failmsg qq{Did not find expected opening bracket at "} . |
|
584
|
|
|
|
|
|
|
substr($$textref,pos($$textref),20) . |
|
585
|
|
|
|
|
|
|
q{..."}, |
|
586
|
|
|
|
|
|
|
pos $$textref; |
|
587
|
552
|
|
|
|
|
1208
|
pos $$textref = $startpos; |
|
588
|
552
|
|
|
|
|
1316
|
return; |
|
589
|
|
|
|
|
|
|
} |
|
590
|
135
|
|
|
|
|
255
|
my $closing = $1; |
|
591
|
135
|
|
|
|
|
188
|
$closing =~ tr/([<{/)]>}/; |
|
592
|
135
|
|
|
|
|
161
|
my $matched; |
|
593
|
|
|
|
|
|
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 |
|
594
|
135
|
100
|
66
|
|
|
548
|
if !pos($$textref) or !defined $ref2slashvalid{$textref}; # default, or reset |
|
595
|
135
|
|
|
|
|
266
|
while (pos($$textref) < length($$textref)) |
|
596
|
|
|
|
|
|
|
{ |
|
597
|
500
|
50
|
66
|
|
|
827
|
if ($rd && $$textref =~ m#\G(\Q(?)\E|\Q(s?)\E|\Q(s)\E)#gc) |
|
598
|
|
|
|
|
|
|
{ |
|
599
|
0
|
|
|
|
|
0
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 0; |
|
600
|
0
|
|
|
|
|
0
|
next; |
|
601
|
|
|
|
|
|
|
} |
|
602
|
|
|
|
|
|
|
|
|
603
|
500
|
100
|
|
|
|
891
|
if ($$textref =~ m/\G\s*#.*/gc) |
|
604
|
|
|
|
|
|
|
{ |
|
605
|
6
|
|
|
|
|
8
|
next; |
|
606
|
|
|
|
|
|
|
} |
|
607
|
|
|
|
|
|
|
|
|
608
|
494
|
100
|
|
|
|
1499
|
if ($$textref =~ m/$rdel_outer/gc) |
|
609
|
|
|
|
|
|
|
{ |
|
610
|
131
|
100
|
66
|
|
|
493
|
unless ($matched = ($closing && $1 eq $closing) ) |
|
611
|
|
|
|
|
|
|
{ |
|
612
|
2
|
50
|
|
|
|
6
|
next if $1 eq '>'; # MIGHT BE A "LESS THAN" |
|
613
|
0
|
|
|
|
|
0
|
_failmsg q{Mismatched closing bracket at "} . |
|
614
|
|
|
|
|
|
|
substr($$textref,pos($$textref),20) . |
|
615
|
|
|
|
|
|
|
qq{...". Expected '$closing'}, |
|
616
|
|
|
|
|
|
|
pos $$textref; |
|
617
|
|
|
|
|
|
|
} |
|
618
|
129
|
|
|
|
|
161
|
last; |
|
619
|
|
|
|
|
|
|
} |
|
620
|
|
|
|
|
|
|
|
|
621
|
363
|
100
|
100
|
|
|
836
|
if (_match_variable($textref,qr/\G\s*/) || |
|
622
|
|
|
|
|
|
|
_match_quotelike($textref,qr/\G\s*/,$ref2slashvalid{$textref},$ref2qmarkvalid{$textref}) ) |
|
623
|
|
|
|
|
|
|
{ |
|
624
|
138
|
|
|
|
|
197
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 0; |
|
625
|
138
|
|
|
|
|
205
|
next; |
|
626
|
|
|
|
|
|
|
} |
|
627
|
|
|
|
|
|
|
|
|
628
|
225
|
100
|
|
|
|
3136
|
if ($$textref =~ m#\G\s*(?!$ldel_inner)(?:$RE_PREREGEX_PAT|$RE_EXPR_PAT)#gc) |
|
629
|
|
|
|
|
|
|
{ |
|
630
|
74
|
|
|
|
|
144
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 1; |
|
631
|
74
|
|
|
|
|
105
|
next; |
|
632
|
|
|
|
|
|
|
} |
|
633
|
|
|
|
|
|
|
|
|
634
|
151
|
100
|
|
|
|
906
|
if ( _match_codeblock($textref, qr/\G\s*/, $ldel_inner, qr/\G\s*($rdel_inner)/, $ldel_inner, $rdel_inner, $rd, 1) ) |
|
635
|
|
|
|
|
|
|
{ |
|
636
|
17
|
|
|
|
|
27
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 1; |
|
637
|
17
|
|
|
|
|
27
|
next; |
|
638
|
|
|
|
|
|
|
} |
|
639
|
|
|
|
|
|
|
|
|
640
|
134
|
50
|
|
|
|
668
|
if ($$textref =~ m/\G\s*$ldel_outer/gc) |
|
641
|
|
|
|
|
|
|
{ |
|
642
|
0
|
|
|
|
|
0
|
_failmsg q{Improperly nested codeblock at "} . |
|
643
|
|
|
|
|
|
|
substr($$textref,pos($$textref),20) . |
|
644
|
|
|
|
|
|
|
q{..."}, |
|
645
|
|
|
|
|
|
|
pos $$textref; |
|
646
|
0
|
|
|
|
|
0
|
last; |
|
647
|
|
|
|
|
|
|
} |
|
648
|
|
|
|
|
|
|
|
|
649
|
134
|
|
|
|
|
259
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 0; |
|
650
|
134
|
|
|
|
|
359
|
$$textref =~ m/\G\s*(\w+|[-=>]>|.|\Z)/gc; |
|
651
|
|
|
|
|
|
|
} |
|
652
|
371
|
|
|
|
|
880
|
continue { $@ = undef } |
|
653
|
|
|
|
|
|
|
|
|
654
|
135
|
100
|
|
|
|
192
|
unless ($matched) |
|
655
|
|
|
|
|
|
|
{ |
|
656
|
6
|
50
|
|
|
|
20
|
_failmsg 'No match found for opening bracket', pos $$textref |
|
657
|
|
|
|
|
|
|
unless $@; |
|
658
|
6
|
|
|
|
|
41
|
return; |
|
659
|
|
|
|
|
|
|
} |
|
660
|
|
|
|
|
|
|
|
|
661
|
129
|
|
|
|
|
206
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = undef; |
|
662
|
129
|
|
|
|
|
157
|
my $endpos = pos($$textref); |
|
663
|
129
|
|
|
|
|
393
|
return ( $startpos, $codepos-$startpos, |
|
664
|
|
|
|
|
|
|
$codepos, $endpos-$codepos, |
|
665
|
|
|
|
|
|
|
$endpos, length($$textref)-$endpos, |
|
666
|
|
|
|
|
|
|
); |
|
667
|
|
|
|
|
|
|
} |
|
668
|
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
my %mods = ( |
|
671
|
|
|
|
|
|
|
'none' => '[cgimsox]*', |
|
672
|
|
|
|
|
|
|
'm' => '[cgimsox]*', |
|
673
|
|
|
|
|
|
|
's' => '[cegimsox]*', |
|
674
|
|
|
|
|
|
|
'tr' => '[cds]*', |
|
675
|
|
|
|
|
|
|
'y' => '[cds]*', |
|
676
|
|
|
|
|
|
|
'qq' => '', |
|
677
|
|
|
|
|
|
|
'qx' => '', |
|
678
|
|
|
|
|
|
|
'qw' => '', |
|
679
|
|
|
|
|
|
|
'qr' => '[imsx]*', |
|
680
|
|
|
|
|
|
|
'q' => '', |
|
681
|
|
|
|
|
|
|
); |
|
682
|
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
sub extract_quotelike (;$$) |
|
684
|
|
|
|
|
|
|
{ |
|
685
|
542
|
100
|
|
542
|
1
|
111866
|
my $textref = $_[0] ? \$_[0] : \$_; |
|
686
|
542
|
100
|
|
|
|
965
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
|
687
|
542
|
|
|
|
|
610
|
my $wantarray = wantarray; |
|
688
|
542
|
100
|
|
|
|
1210
|
my $pre = defined $_[1] ? qr/\G$_[1]/ : qr/\G\s*/; |
|
689
|
|
|
|
|
|
|
|
|
690
|
542
|
|
|
|
|
1135
|
my @match = _match_quotelike($textref,$pre,$ref2slashvalid{$textref},$ref2qmarkvalid{$textref}); |
|
691
|
542
|
100
|
|
|
|
1018
|
return _fail($wantarray, $textref) unless @match; |
|
692
|
103
|
|
|
|
|
395
|
return _succeed($wantarray, $textref, |
|
693
|
|
|
|
|
|
|
$match[2], $match[18]-$match[2], # MATCH |
|
694
|
|
|
|
|
|
|
@match[18,19], # REMAINDER |
|
695
|
|
|
|
|
|
|
@match[0,1], # PREFIX |
|
696
|
|
|
|
|
|
|
@match[2..17], # THE BITS |
|
697
|
|
|
|
|
|
|
@match[20,21], # ANY FILLET? |
|
698
|
|
|
|
|
|
|
); |
|
699
|
|
|
|
|
|
|
}; |
|
700
|
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
my %maybe_quote = map +($_=>1), qw(" ' `); |
|
702
|
|
|
|
|
|
|
sub _match_quotelike |
|
703
|
|
|
|
|
|
|
{ |
|
704
|
848
|
|
|
848
|
|
1161
|
my ($textref, $pre, $allow_slash_match, $allow_qmark_match) = @_; |
|
705
|
|
|
|
|
|
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 |
|
706
|
848
|
100
|
100
|
|
|
2230
|
if !pos($$textref) or !defined $ref2slashvalid{$textref}; # default, or reset |
|
707
|
|
|
|
|
|
|
|
|
708
|
848
|
|
100
|
|
|
2423
|
my ($textlen,$startpos, |
|
709
|
|
|
|
|
|
|
$preld1pos,$ld1pos,$str1pos,$rd1pos, |
|
710
|
|
|
|
|
|
|
$preld2pos,$ld2pos,$str2pos,$rd2pos, |
|
711
|
|
|
|
|
|
|
$modpos) = ( length($$textref), pos($$textref) = pos($$textref) || 0 ); |
|
712
|
|
|
|
|
|
|
|
|
713
|
848
|
50
|
|
|
|
2802
|
unless ($$textref =~ m/$pre/gc) |
|
714
|
|
|
|
|
|
|
{ |
|
715
|
0
|
|
|
|
|
0
|
_failmsg qq{Did not find prefix /$pre/ at "} . |
|
716
|
|
|
|
|
|
|
substr($$textref, pos($$textref), 20) . |
|
717
|
|
|
|
|
|
|
q{..."}, |
|
718
|
|
|
|
|
|
|
pos $$textref; |
|
719
|
0
|
|
|
|
|
0
|
return; |
|
720
|
|
|
|
|
|
|
} |
|
721
|
848
|
|
|
|
|
1081
|
my $oppos = pos($$textref); |
|
722
|
848
|
|
|
|
|
1201
|
my $initial = substr($$textref,$oppos,1); |
|
723
|
848
|
100
|
100
|
|
|
3771
|
if ($initial && $maybe_quote{$initial} |
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
724
|
|
|
|
|
|
|
|| $allow_slash_match && $initial eq '/' |
|
725
|
|
|
|
|
|
|
|| $allow_qmark_match && $initial eq '?') |
|
726
|
|
|
|
|
|
|
{ |
|
727
|
60
|
100
|
|
|
|
976
|
unless ($$textref =~ m/\G \Q$initial\E [^\\$initial]* (\\.[^\\$initial]*)* \Q$initial\E /gcsx) |
|
728
|
|
|
|
|
|
|
{ |
|
729
|
2
|
|
|
|
|
10
|
_failmsg qq{Did not find closing delimiter to match '$initial' at "} . |
|
730
|
|
|
|
|
|
|
substr($$textref, $oppos, 20) . |
|
731
|
|
|
|
|
|
|
q{..."}, |
|
732
|
|
|
|
|
|
|
pos $$textref; |
|
733
|
2
|
|
|
|
|
5
|
pos $$textref = $startpos; |
|
734
|
2
|
|
|
|
|
8
|
return; |
|
735
|
|
|
|
|
|
|
} |
|
736
|
58
|
|
|
|
|
110
|
$modpos= pos($$textref); |
|
737
|
58
|
|
|
|
|
75
|
$rd1pos = $modpos-1; |
|
738
|
|
|
|
|
|
|
|
|
739
|
58
|
100
|
66
|
|
|
196
|
if ($initial eq '/' || $initial eq '?') |
|
740
|
|
|
|
|
|
|
{ |
|
741
|
17
|
|
|
|
|
97
|
$$textref =~ m/\G$mods{none}/gc |
|
742
|
|
|
|
|
|
|
} |
|
743
|
|
|
|
|
|
|
|
|
744
|
58
|
|
|
|
|
99
|
my $endpos = pos($$textref); |
|
745
|
58
|
|
|
|
|
129
|
$ref2qmarkvalid{$textref} = $ref2slashvalid{$textref} = 0; |
|
746
|
|
|
|
|
|
|
return ( |
|
747
|
58
|
|
|
|
|
254
|
$startpos, $oppos-$startpos, # PREFIX |
|
748
|
|
|
|
|
|
|
$oppos, 0, # NO OPERATOR |
|
749
|
|
|
|
|
|
|
$oppos, 1, # LEFT DEL |
|
750
|
|
|
|
|
|
|
$oppos+1, $rd1pos-$oppos-1, # STR/PAT |
|
751
|
|
|
|
|
|
|
$rd1pos, 1, # RIGHT DEL |
|
752
|
|
|
|
|
|
|
$modpos, 0, # NO 2ND LDEL |
|
753
|
|
|
|
|
|
|
$modpos, 0, # NO 2ND STR |
|
754
|
|
|
|
|
|
|
$modpos, 0, # NO 2ND RDEL |
|
755
|
|
|
|
|
|
|
$modpos, $endpos-$modpos, # MODIFIERS |
|
756
|
|
|
|
|
|
|
$endpos, $textlen-$endpos, # REMAINDER |
|
757
|
|
|
|
|
|
|
); |
|
758
|
|
|
|
|
|
|
} |
|
759
|
|
|
|
|
|
|
|
|
760
|
788
|
100
|
|
|
|
1979
|
unless ($$textref =~ m{\G(\b(?:m|s|qq|qx|qw|q|qr|tr|y)\b(?=\s*\S)|<<(?=[a-zA-Z]|\s*['"`;,]))}gc) |
|
761
|
|
|
|
|
|
|
{ |
|
762
|
705
|
|
|
|
|
2118
|
_failmsg q{No quotelike operator found after prefix at "} . |
|
763
|
|
|
|
|
|
|
substr($$textref, pos($$textref), 20) . |
|
764
|
|
|
|
|
|
|
q{..."}, |
|
765
|
|
|
|
|
|
|
pos $$textref; |
|
766
|
705
|
|
|
|
|
1418
|
pos $$textref = $startpos; |
|
767
|
705
|
|
|
|
|
1661
|
return; |
|
768
|
|
|
|
|
|
|
} |
|
769
|
|
|
|
|
|
|
|
|
770
|
83
|
|
|
|
|
159
|
my $op = $1; |
|
771
|
83
|
|
|
|
|
112
|
$preld1pos = pos($$textref); |
|
772
|
83
|
100
|
|
|
|
152
|
if ($op eq '<<') { |
|
773
|
28
|
|
|
|
|
29
|
$ld1pos = pos($$textref); |
|
774
|
28
|
|
|
|
|
34
|
my $label; |
|
775
|
28
|
100
|
|
|
|
113
|
if ($$textref =~ m{\G([A-Za-z_]\w*)}gc) { |
|
|
|
100
|
|
|
|
|
|
|
776
|
9
|
|
|
|
|
14
|
$label = $1; |
|
777
|
|
|
|
|
|
|
} |
|
778
|
|
|
|
|
|
|
elsif ($$textref =~ m{ \G ' ([^'\\]* (?:\\.[^'\\]*)*) ' |
|
779
|
|
|
|
|
|
|
| \G " ([^"\\]* (?:\\.[^"\\]*)*) " |
|
780
|
|
|
|
|
|
|
| \G ` ([^`\\]* (?:\\.[^`\\]*)*) ` |
|
781
|
|
|
|
|
|
|
}gcsx) { |
|
782
|
16
|
|
|
|
|
31
|
$label = $+; |
|
783
|
|
|
|
|
|
|
} |
|
784
|
|
|
|
|
|
|
else { |
|
785
|
3
|
|
|
|
|
6
|
$label = ""; |
|
786
|
|
|
|
|
|
|
} |
|
787
|
28
|
|
|
|
|
38
|
my $extrapos = pos($$textref); |
|
788
|
28
|
|
|
|
|
63
|
$$textref =~ m{.*\n}gc; |
|
789
|
28
|
|
|
|
|
65
|
$str1pos = pos($$textref)--; |
|
790
|
28
|
100
|
|
|
|
244
|
unless ($$textref =~ m{.*?\n(?=\Q$label\E\n)}gc) { |
|
791
|
4
|
|
|
|
|
20
|
_failmsg qq{Missing here doc terminator ('$label') after "} . |
|
792
|
|
|
|
|
|
|
substr($$textref, $startpos, 20) . |
|
793
|
|
|
|
|
|
|
q{..."}, |
|
794
|
|
|
|
|
|
|
pos $$textref; |
|
795
|
4
|
|
|
|
|
9
|
pos $$textref = $startpos; |
|
796
|
4
|
|
|
|
|
11
|
return; |
|
797
|
|
|
|
|
|
|
} |
|
798
|
24
|
|
|
|
|
39
|
$rd1pos = pos($$textref); |
|
799
|
24
|
|
|
|
|
100
|
$$textref =~ m{\Q$label\E\n}gc; |
|
800
|
24
|
|
|
|
|
38
|
$ld2pos = pos($$textref); |
|
801
|
24
|
|
|
|
|
46
|
$ref2qmarkvalid{$textref} = $ref2slashvalid{$textref} = 0; |
|
802
|
|
|
|
|
|
|
return ( |
|
803
|
24
|
|
|
|
|
107
|
$startpos, $oppos-$startpos, # PREFIX |
|
804
|
|
|
|
|
|
|
$oppos, length($op), # OPERATOR |
|
805
|
|
|
|
|
|
|
$ld1pos, $extrapos-$ld1pos, # LEFT DEL |
|
806
|
|
|
|
|
|
|
$str1pos, $rd1pos-$str1pos, # STR/PAT |
|
807
|
|
|
|
|
|
|
$rd1pos, $ld2pos-$rd1pos, # RIGHT DEL |
|
808
|
|
|
|
|
|
|
$ld2pos, 0, # NO 2ND LDEL |
|
809
|
|
|
|
|
|
|
$ld2pos, 0, # NO 2ND STR |
|
810
|
|
|
|
|
|
|
$ld2pos, 0, # NO 2ND RDEL |
|
811
|
|
|
|
|
|
|
$ld2pos, 0, # NO MODIFIERS |
|
812
|
|
|
|
|
|
|
$ld2pos, $textlen-$ld2pos, # REMAINDER |
|
813
|
|
|
|
|
|
|
$extrapos, $str1pos-$extrapos, # FILLETED BIT |
|
814
|
|
|
|
|
|
|
); |
|
815
|
|
|
|
|
|
|
} |
|
816
|
|
|
|
|
|
|
|
|
817
|
55
|
|
|
|
|
104
|
$$textref =~ m/\G\s*/gc; |
|
818
|
55
|
|
|
|
|
70
|
$ld1pos = pos($$textref); |
|
819
|
55
|
|
|
|
|
96
|
$str1pos = $ld1pos+1; |
|
820
|
|
|
|
|
|
|
|
|
821
|
55
|
50
|
|
|
|
173
|
if ($$textref !~ m/\G(\S)/gc) # SHOULD USE LOOKAHEAD |
|
|
|
100
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
{ |
|
823
|
0
|
|
|
|
|
0
|
_failmsg "No block delimiter found after quotelike $op", |
|
824
|
|
|
|
|
|
|
pos $$textref; |
|
825
|
0
|
|
|
|
|
0
|
pos $$textref = $startpos; |
|
826
|
0
|
|
|
|
|
0
|
return; |
|
827
|
|
|
|
|
|
|
} |
|
828
|
|
|
|
|
|
|
elsif (substr($$textref, $ld1pos, 2) eq '=>') |
|
829
|
|
|
|
|
|
|
{ |
|
830
|
1
|
|
|
|
|
6
|
_failmsg "quotelike $op was actually quoted by '=>'", |
|
831
|
|
|
|
|
|
|
pos $$textref; |
|
832
|
1
|
|
|
|
|
3
|
pos $$textref = $startpos; |
|
833
|
1
|
|
|
|
|
2
|
return; |
|
834
|
|
|
|
|
|
|
} |
|
835
|
54
|
|
|
|
|
104
|
pos($$textref) = $ld1pos; # HAVE TO DO THIS BECAUSE LOOKAHEAD BROKEN |
|
836
|
54
|
|
|
|
|
151
|
my ($ldel1, $rdel1) = ("\Q$1","\Q$1"); |
|
837
|
54
|
100
|
|
|
|
132
|
if ($ldel1 =~ /[[(<{]/) |
|
838
|
|
|
|
|
|
|
{ |
|
839
|
33
|
|
|
|
|
56
|
$rdel1 =~ tr/[({])}>/; |
|
840
|
|
|
|
|
|
|
defined(_match_bracketed($textref,qr/\G/,qr/\G($ldel1)/,"","",qr/\G($rdel1)/)) |
|
841
|
33
|
50
|
|
|
|
261
|
|| do { pos $$textref = $startpos; return }; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
842
|
33
|
|
|
|
|
72
|
$ld2pos = pos($$textref); |
|
843
|
33
|
|
|
|
|
49
|
$rd1pos = $ld2pos-1; |
|
844
|
|
|
|
|
|
|
} |
|
845
|
|
|
|
|
|
|
else |
|
846
|
|
|
|
|
|
|
{ |
|
847
|
|
|
|
|
|
|
$$textref =~ /\G$ldel1[^\\$ldel1]*(\\.[^\\$ldel1]*)*$ldel1/gcs |
|
848
|
21
|
50
|
|
|
|
233
|
|| do { pos $$textref = $startpos; return }; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
849
|
21
|
|
|
|
|
44
|
$ld2pos = $rd1pos = pos($$textref)-1; |
|
850
|
|
|
|
|
|
|
} |
|
851
|
|
|
|
|
|
|
|
|
852
|
54
|
100
|
|
|
|
174
|
my $second_arg = $op =~ /s|tr|y/ ? 1 : 0; |
|
853
|
54
|
100
|
|
|
|
89
|
if ($second_arg) |
|
854
|
|
|
|
|
|
|
{ |
|
855
|
23
|
|
|
|
|
33
|
my ($ldel2, $rdel2); |
|
856
|
23
|
100
|
|
|
|
66
|
if ($ldel1 =~ /[[(<{]/) |
|
857
|
|
|
|
|
|
|
{ |
|
858
|
11
|
50
|
|
|
|
29
|
unless ($$textref =~ /\G\s*(\S)/gc) # SHOULD USE LOOKAHEAD |
|
859
|
|
|
|
|
|
|
{ |
|
860
|
0
|
|
|
|
|
0
|
_failmsg "Missing second block for quotelike $op", |
|
861
|
|
|
|
|
|
|
pos $$textref; |
|
862
|
0
|
|
|
|
|
0
|
pos $$textref = $startpos; |
|
863
|
0
|
|
|
|
|
0
|
return; |
|
864
|
|
|
|
|
|
|
} |
|
865
|
11
|
|
|
|
|
23
|
$ldel2 = $rdel2 = "\Q$1"; |
|
866
|
11
|
|
|
|
|
14
|
$rdel2 =~ tr/[({])}>/; |
|
867
|
|
|
|
|
|
|
} |
|
868
|
|
|
|
|
|
|
else |
|
869
|
|
|
|
|
|
|
{ |
|
870
|
12
|
|
|
|
|
19
|
$ldel2 = $rdel2 = $ldel1; |
|
871
|
|
|
|
|
|
|
} |
|
872
|
23
|
|
|
|
|
47
|
$str2pos = $ld2pos+1; |
|
873
|
|
|
|
|
|
|
|
|
874
|
23
|
100
|
|
|
|
52
|
if ($ldel2 =~ /[[(<{]/) |
|
875
|
|
|
|
|
|
|
{ |
|
876
|
9
|
|
|
|
|
19
|
pos($$textref)--; # OVERCOME BROKEN LOOKAHEAD |
|
877
|
|
|
|
|
|
|
defined(_match_bracketed($textref,qr/\G/,qr/\G($ldel2)/,"","",qr/\G($rdel2)/)) |
|
878
|
9
|
50
|
|
|
|
59
|
|| do { pos $$textref = $startpos; return }; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
879
|
|
|
|
|
|
|
} |
|
880
|
|
|
|
|
|
|
else |
|
881
|
|
|
|
|
|
|
{ |
|
882
|
|
|
|
|
|
|
$$textref =~ /[^\\$ldel2]*(\\.[^\\$ldel2]*)*$ldel2/gcs |
|
883
|
14
|
50
|
|
|
|
107
|
|| do { pos $$textref = $startpos; return }; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
884
|
|
|
|
|
|
|
} |
|
885
|
23
|
|
|
|
|
39
|
$rd2pos = pos($$textref)-1; |
|
886
|
|
|
|
|
|
|
} |
|
887
|
|
|
|
|
|
|
else |
|
888
|
|
|
|
|
|
|
{ |
|
889
|
31
|
|
|
|
|
43
|
$ld2pos = $str2pos = $rd2pos = $rd1pos; |
|
890
|
|
|
|
|
|
|
} |
|
891
|
|
|
|
|
|
|
|
|
892
|
54
|
|
|
|
|
68
|
$modpos = pos $$textref; |
|
893
|
|
|
|
|
|
|
|
|
894
|
54
|
|
|
|
|
313
|
$$textref =~ m/\G($mods{$op})/gc; |
|
895
|
54
|
|
|
|
|
97
|
my $endpos = pos $$textref; |
|
896
|
54
|
|
|
|
|
121
|
$ref2qmarkvalid{$textref} = $ref2slashvalid{$textref} = undef; |
|
897
|
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
return ( |
|
899
|
54
|
|
|
|
|
203
|
$startpos, $oppos-$startpos, # PREFIX |
|
900
|
|
|
|
|
|
|
$oppos, length($op), # OPERATOR |
|
901
|
|
|
|
|
|
|
$ld1pos, 1, # LEFT DEL |
|
902
|
|
|
|
|
|
|
$str1pos, $rd1pos-$str1pos, # STR/PAT |
|
903
|
|
|
|
|
|
|
$rd1pos, 1, # RIGHT DEL |
|
904
|
|
|
|
|
|
|
$ld2pos, $second_arg, # 2ND LDEL (MAYBE) |
|
905
|
|
|
|
|
|
|
$str2pos, $rd2pos-$str2pos, # 2ND STR (MAYBE) |
|
906
|
|
|
|
|
|
|
$rd2pos, $second_arg, # 2ND RDEL (MAYBE) |
|
907
|
|
|
|
|
|
|
$modpos, $endpos-$modpos, # MODIFIERS |
|
908
|
|
|
|
|
|
|
$endpos, $textlen-$endpos, # REMAINDER |
|
909
|
|
|
|
|
|
|
); |
|
910
|
|
|
|
|
|
|
} |
|
911
|
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
my $def_func = [ |
|
913
|
|
|
|
|
|
|
sub { extract_variable($_[0], '') }, |
|
914
|
|
|
|
|
|
|
sub { extract_quotelike($_[0],'') }, |
|
915
|
|
|
|
|
|
|
sub { extract_codeblock($_[0],'{}','') }, |
|
916
|
|
|
|
|
|
|
]; |
|
917
|
|
|
|
|
|
|
my %ref_not_regex = map +($_=>1), qw(CODE Text::Balanced::Extractor); |
|
918
|
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
sub _update_patvalid { |
|
920
|
591
|
|
|
591
|
|
1110
|
my ($textref, $text) = @_; |
|
921
|
591
|
100
|
100
|
|
|
6030
|
if ($ref2slashvalid{$textref} && $text =~ m/(?:$RE_NUM|[\)\]])\s*$/) |
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
922
|
|
|
|
|
|
|
{ |
|
923
|
26
|
|
|
|
|
90
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 0; |
|
924
|
|
|
|
|
|
|
} elsif (!$ref2slashvalid{$textref} && $text =~ m/$RE_PREREGEX_PAT\s*$/) |
|
925
|
|
|
|
|
|
|
{ |
|
926
|
2
|
|
|
|
|
9
|
$ref2slashvalid{$textref} = $ref2qmarkvalid{$textref} = 1; |
|
927
|
|
|
|
|
|
|
} elsif (!$ref2slashvalid{$textref} && $text =~ m/$RE_EXPR_PAT\s*$/) |
|
928
|
|
|
|
|
|
|
{ |
|
929
|
46
|
|
|
|
|
80
|
$ref2slashvalid{$textref} = 1; |
|
930
|
46
|
|
|
|
|
141
|
$ref2qmarkvalid{$textref} = 0; |
|
931
|
|
|
|
|
|
|
} |
|
932
|
|
|
|
|
|
|
} |
|
933
|
|
|
|
|
|
|
sub extract_multiple (;$$$$) # ($text, $functions_ref, $max_fields, $ignoreunknown) |
|
934
|
|
|
|
|
|
|
{ |
|
935
|
41
|
100
|
|
41
|
1
|
3965
|
my $textref = defined($_[0]) ? \$_[0] : \$_; |
|
936
|
41
|
50
|
|
|
|
194
|
$ref2slashvalid{$textref} = 1, $ref2qmarkvalid{$textref} = 0 if !pos($$textref); # reset |
|
937
|
41
|
|
|
|
|
79
|
my $posbug = pos; |
|
938
|
41
|
|
|
|
|
61
|
my ($lastpos, $firstpos); |
|
939
|
41
|
|
|
|
|
69
|
my @fields = (); |
|
940
|
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
#for ($$textref) |
|
942
|
|
|
|
|
|
|
{ |
|
943
|
41
|
100
|
|
|
|
62
|
my @func = defined $_[1] ? @{$_[1]} : @{$def_func}; |
|
|
41
|
|
|
|
|
162
|
|
|
|
29
|
|
|
|
|
68
|
|
|
|
12
|
|
|
|
|
29
|
|
|
944
|
41
|
100
|
66
|
|
|
158
|
my $max = defined $_[2] && $_[2]>0 ? $_[2] : 1_000_000_000; |
|
945
|
41
|
|
|
|
|
60
|
my $igunk = $_[3]; |
|
946
|
|
|
|
|
|
|
|
|
947
|
41
|
|
50
|
|
|
244
|
pos $$textref ||= 0; |
|
948
|
|
|
|
|
|
|
|
|
949
|
41
|
100
|
|
|
|
104
|
unless (wantarray) |
|
950
|
|
|
|
|
|
|
{ |
|
951
|
9
|
|
|
9
|
|
79
|
use Carp; |
|
|
9
|
|
|
|
|
19
|
|
|
|
9
|
|
|
|
|
7086
|
|
|
952
|
14
|
0
|
33
|
|
|
66
|
carp "extract_multiple reset maximal count to 1 in scalar context" |
|
|
|
|
33
|
|
|
|
|
|
953
|
|
|
|
|
|
|
if $^W && defined($_[2]) && $max > 1; |
|
954
|
14
|
|
|
|
|
24
|
$max = 1 |
|
955
|
|
|
|
|
|
|
} |
|
956
|
|
|
|
|
|
|
|
|
957
|
41
|
|
|
|
|
60
|
my @class; |
|
958
|
41
|
|
|
|
|
91
|
foreach my $func ( @func ) |
|
959
|
|
|
|
|
|
|
{ |
|
960
|
81
|
|
|
|
|
91
|
push @class, undef; |
|
961
|
81
|
100
|
|
|
|
158
|
($class[-1], $func) = %$func if ref($func) eq 'HASH'; |
|
962
|
81
|
100
|
|
|
|
368
|
$func = qr/\G$func/ if !$ref_not_regex{ref $func}; |
|
963
|
|
|
|
|
|
|
} |
|
964
|
|
|
|
|
|
|
|
|
965
|
41
|
|
|
|
|
54
|
my $unkpos; |
|
966
|
41
|
|
|
|
|
100
|
FIELD: while (pos($$textref) < length($$textref)) |
|
967
|
|
|
|
|
|
|
{ |
|
968
|
594
|
|
|
|
|
1056
|
foreach my $i ( 0..$#func ) |
|
969
|
|
|
|
|
|
|
{ |
|
970
|
1136
|
|
|
|
|
1146
|
my ($field, $pref); |
|
971
|
1136
|
|
|
|
|
1760
|
my ($class, $func) = ($class[$i], $func[$i]); |
|
972
|
1136
|
|
|
|
|
1070
|
$lastpos = pos $$textref; |
|
973
|
1136
|
100
|
|
|
|
2002
|
if (ref($func) eq 'CODE') |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
974
|
977
|
|
|
|
|
1257
|
{ ($field,undef,$pref) = $func->($$textref) } |
|
975
|
|
|
|
|
|
|
elsif (ref($func) eq 'Text::Balanced::Extractor') |
|
976
|
0
|
|
|
|
|
0
|
{ $field = $func->extract($$textref) } |
|
977
|
|
|
|
|
|
|
elsif( $$textref =~ m/$func[$i]/gc ) |
|
978
|
33
|
100
|
|
|
|
138
|
{ $field = defined($1) |
|
979
|
|
|
|
|
|
|
? $1 |
|
980
|
|
|
|
|
|
|
: substr($$textref, $-[0], $+[0] - $-[0]) |
|
981
|
|
|
|
|
|
|
} |
|
982
|
1136
|
|
100
|
|
|
3045
|
$pref ||= ""; |
|
983
|
1136
|
100
|
100
|
|
|
2058
|
if (defined($field) && length($field)) |
|
984
|
|
|
|
|
|
|
{ |
|
985
|
108
|
100
|
|
|
|
151
|
if (!$igunk) { |
|
986
|
101
|
100
|
100
|
|
|
224
|
$unkpos = $lastpos |
|
987
|
|
|
|
|
|
|
if length($pref) && !defined($unkpos); |
|
988
|
101
|
100
|
|
|
|
142
|
if (defined $unkpos) |
|
989
|
|
|
|
|
|
|
{ |
|
990
|
72
|
|
|
|
|
176
|
push @fields, substr($$textref, $unkpos, $lastpos-$unkpos).$pref; |
|
991
|
72
|
100
|
|
|
|
134
|
$firstpos = $unkpos unless defined $firstpos; |
|
992
|
72
|
|
|
|
|
92
|
undef $unkpos; |
|
993
|
72
|
100
|
|
|
|
122
|
last FIELD if @fields == $max; |
|
994
|
|
|
|
|
|
|
} |
|
995
|
|
|
|
|
|
|
} |
|
996
|
105
|
50
|
|
|
|
192
|
push @fields, $class ? bless(\$field, $class) : $field; |
|
997
|
105
|
|
|
|
|
270
|
_update_patvalid($textref, $fields[-1]); |
|
998
|
105
|
100
|
|
|
|
203
|
$firstpos = $lastpos unless defined $firstpos; |
|
999
|
105
|
|
|
|
|
129
|
$lastpos = pos $$textref; |
|
1000
|
105
|
100
|
|
|
|
178
|
last FIELD if @fields == $max; |
|
1001
|
90
|
|
|
|
|
222
|
next FIELD; |
|
1002
|
|
|
|
|
|
|
} |
|
1003
|
|
|
|
|
|
|
} |
|
1004
|
486
|
50
|
|
|
|
1089
|
if ($$textref =~ /\G(.)/gcs) |
|
1005
|
|
|
|
|
|
|
{ |
|
1006
|
486
|
100
|
100
|
|
|
1028
|
$unkpos = pos($$textref)-1 |
|
1007
|
|
|
|
|
|
|
unless $igunk || defined $unkpos; |
|
1008
|
486
|
|
|
|
|
873
|
_update_patvalid($textref, substr $$textref, $unkpos, pos($$textref)-$unkpos); |
|
1009
|
|
|
|
|
|
|
} |
|
1010
|
|
|
|
|
|
|
} |
|
1011
|
|
|
|
|
|
|
|
|
1012
|
41
|
100
|
|
|
|
113
|
if (defined $unkpos) |
|
1013
|
|
|
|
|
|
|
{ |
|
1014
|
18
|
|
|
|
|
56
|
push @fields, substr($$textref, $unkpos); |
|
1015
|
18
|
100
|
|
|
|
50
|
$firstpos = $unkpos unless defined $firstpos; |
|
1016
|
18
|
|
|
|
|
40
|
$lastpos = length $$textref; |
|
1017
|
|
|
|
|
|
|
} |
|
1018
|
41
|
|
|
|
|
84
|
last; |
|
1019
|
|
|
|
|
|
|
} |
|
1020
|
|
|
|
|
|
|
|
|
1021
|
41
|
|
|
|
|
80
|
pos $$textref = $lastpos; |
|
1022
|
41
|
100
|
|
|
|
418
|
return @fields if wantarray; |
|
1023
|
|
|
|
|
|
|
|
|
1024
|
14
|
|
100
|
|
|
55
|
$firstpos ||= 0; |
|
1025
|
14
|
|
|
|
|
18
|
eval { substr($$textref,$firstpos,$lastpos-$firstpos)=""; |
|
|
14
|
|
|
|
|
40
|
|
|
1026
|
14
|
|
|
|
|
31
|
pos $$textref = $firstpos }; |
|
1027
|
14
|
|
|
|
|
84
|
return $fields[0]; |
|
1028
|
|
|
|
|
|
|
} |
|
1029
|
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
sub gen_extract_tagged # ($opentag, $closetag, $pre, \%options) |
|
1031
|
|
|
|
|
|
|
{ |
|
1032
|
13
|
|
|
13
|
1
|
12491
|
my $ldel = $_[0]; |
|
1033
|
13
|
|
|
|
|
24
|
my $rdel = $_[1]; |
|
1034
|
13
|
100
|
|
|
|
70
|
my $pre = defined $_[2] ? qr/\G$_[2]/ : qr/\G\s*/; |
|
1035
|
13
|
100
|
|
|
|
53
|
my %options = defined $_[3] ? %{$_[3]} : (); |
|
|
6
|
|
|
|
|
24
|
|
|
1036
|
13
|
100
|
|
|
|
41
|
my $omode = defined $options{fail} ? $options{fail} : ''; |
|
1037
|
3
|
|
|
|
|
11
|
my $bad = ref($options{reject}) eq 'ARRAY' ? join('|', @{$options{reject}}) |
|
1038
|
|
|
|
|
|
|
: defined($options{reject}) ? $options{reject} |
|
1039
|
13
|
50
|
|
|
|
52
|
: '' |
|
|
|
100
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
; |
|
1041
|
3
|
|
|
|
|
50
|
my $ignore = ref($options{ignore}) eq 'ARRAY' ? join('|', @{$options{ignore}}) |
|
1042
|
|
|
|
|
|
|
: defined($options{ignore}) ? $options{ignore} |
|
1043
|
13
|
50
|
|
|
|
50
|
: '' |
|
|
|
100
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
; |
|
1045
|
|
|
|
|
|
|
|
|
1046
|
13
|
100
|
|
|
|
39
|
$ldel = $et_default_ldel if !defined $ldel; |
|
1047
|
|
|
|
|
|
|
|
|
1048
|
13
|
|
|
|
|
22
|
my $posbug = pos; |
|
1049
|
13
|
100
|
|
|
|
27
|
for ($ldel, $bad, $ignore) { $_ = qr/$_/ if $_ } |
|
|
39
|
|
|
|
|
467
|
|
|
1050
|
13
|
|
|
|
|
116
|
pos = $posbug; |
|
1051
|
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
my $closure = sub |
|
1053
|
|
|
|
|
|
|
{ |
|
1054
|
40
|
50
|
|
40
|
|
26579
|
my $textref = defined $_[0] ? \$_[0] : \$_; |
|
1055
|
40
|
|
|
|
|
97
|
my @match = _match_tagged($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore); |
|
1056
|
|
|
|
|
|
|
|
|
1057
|
40
|
100
|
|
|
|
101
|
return _fail(wantarray, $textref) unless @match; |
|
1058
|
28
|
|
|
|
|
155
|
return _succeed wantarray, $textref, |
|
1059
|
|
|
|
|
|
|
$match[2], $match[3]+$match[5]+$match[7], # MATCH |
|
1060
|
|
|
|
|
|
|
@match[8..9,0..1,2..7]; # REM, PRE, BITS |
|
1061
|
13
|
|
|
|
|
74
|
}; |
|
1062
|
|
|
|
|
|
|
|
|
1063
|
13
|
|
|
|
|
99
|
bless $closure, 'Text::Balanced::Extractor'; |
|
1064
|
|
|
|
|
|
|
} |
|
1065
|
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
package Text::Balanced::Extractor; |
|
1067
|
|
|
|
|
|
|
|
|
1068
|
|
|
|
|
|
|
sub extract($$) # ($self, $text) |
|
1069
|
|
|
|
|
|
|
{ |
|
1070
|
0
|
|
|
0
|
|
|
&{$_[0]}($_[1]); |
|
|
0
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
} |
|
1072
|
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
package Text::Balanced::ErrorMsg; |
|
1074
|
|
|
|
|
|
|
|
|
1075
|
|
|
|
|
|
|
use overload |
|
1076
|
33
|
|
|
33
|
|
125
|
'""' => sub { "$_[0]->{error}, detected at offset $_[0]->{pos}" }, |
|
1077
|
9
|
|
|
9
|
|
7844
|
fallback => 1; |
|
|
9
|
|
|
|
|
6310
|
|
|
|
9
|
|
|
|
|
64
|
|
|
1078
|
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
1; |
|
1080
|
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
__END__ |