line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture; |
2
|
|
|
|
|
|
|
|
3
|
40
|
|
|
40
|
|
30701
|
use 5.010001; |
|
40
|
|
|
|
|
209
|
|
4
|
40
|
|
|
40
|
|
292
|
use strict; |
|
40
|
|
|
|
|
130
|
|
|
40
|
|
|
|
|
831
|
|
5
|
40
|
|
|
40
|
|
253
|
use warnings; |
|
40
|
|
|
|
|
141
|
|
|
40
|
|
|
|
|
1269
|
|
6
|
|
|
|
|
|
|
|
7
|
40
|
|
|
40
|
|
330
|
use List::SomeUtils qw(none); |
|
40
|
|
|
|
|
145
|
|
|
40
|
|
|
|
|
2086
|
|
8
|
40
|
|
|
40
|
|
265
|
use Readonly; |
|
40
|
|
|
|
|
155
|
|
|
40
|
|
|
|
|
1994
|
|
9
|
40
|
|
|
40
|
|
368
|
use Scalar::Util qw(refaddr); |
|
40
|
|
|
|
|
143
|
|
|
40
|
|
|
|
|
2382
|
|
10
|
|
|
|
|
|
|
|
11
|
40
|
|
|
|
|
2272
|
use Perl::Critic::Utils qw{ |
12
|
|
|
|
|
|
|
:booleans :characters :severities hashify precedence_of |
13
|
|
|
|
|
|
|
split_nodes_on_comma |
14
|
40
|
|
|
40
|
|
381
|
}; |
|
40
|
|
|
|
|
163
|
|
15
|
40
|
|
|
40
|
|
14582
|
use parent 'Perl::Critic::Policy'; |
|
40
|
|
|
|
|
135
|
|
|
40
|
|
|
|
|
305
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
our $VERSION = '1.150'; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
#----------------------------------------------------------------------------- |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
Readonly::Scalar my $SPLIT => q{split}; |
22
|
|
|
|
|
|
|
Readonly::Scalar my $WHILE => q{while}; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
Readonly::Hash my %ZERO_BASED_CAPTURE_REFERENCE => |
25
|
|
|
|
|
|
|
hashify( qw< ${^CAPTURE} > ); |
26
|
|
|
|
|
|
|
# TODO: additional logic to prevent ${^CAPTURE_ALL}[n] from being recognized |
27
|
|
|
|
|
|
|
# as a use of capture variable n. |
28
|
|
|
|
|
|
|
Readonly::Hash my %CAPTURE_REFERENCE => ( |
29
|
|
|
|
|
|
|
hashify( qw< $+ $- ${^CAPTURE_ALL} > ), |
30
|
|
|
|
|
|
|
%ZERO_BASED_CAPTURE_REFERENCE ); |
31
|
|
|
|
|
|
|
Readonly::Hash my %CAPTURE_REFERENCE_ENGLISH => ( |
32
|
|
|
|
|
|
|
hashify( qw{ $LAST_PAREN_MATCH $LAST_MATCH_START $LAST_MATCH_END } ), |
33
|
|
|
|
|
|
|
%CAPTURE_REFERENCE ); |
34
|
|
|
|
|
|
|
Readonly::Hash my %CAPTURE_ARRAY => hashify( qw< @- @+ @{^CAPTURE} > ); |
35
|
|
|
|
|
|
|
Readonly::Hash my %CAPTURE_ARRAY_ENGLISH => ( |
36
|
|
|
|
|
|
|
hashify( qw< @LAST_MATCH_START @LAST_MATCH_END > ), |
37
|
|
|
|
|
|
|
%CAPTURE_ARRAY ); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
Readonly::Scalar my $DESC => q{Only use a capturing group if you plan to use the captured value}; |
40
|
|
|
|
|
|
|
Readonly::Scalar my $EXPL => [252]; |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
#----------------------------------------------------------------------------- |
43
|
|
|
|
|
|
|
|
44
|
89
|
|
|
89
|
0
|
1633
|
sub supported_parameters { return qw() } |
45
|
74
|
|
|
74
|
1
|
312
|
sub default_severity { return $SEVERITY_MEDIUM } |
46
|
86
|
|
|
86
|
1
|
357
|
sub default_themes { return qw( core pbp maintenance ) } |
47
|
|
|
|
|
|
|
sub applies_to { |
48
|
30
|
|
|
30
|
1
|
89
|
return qw< PPI::Token::Regexp::Match PPI::Token::Regexp::Substitute >; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
#----------------------------------------------------------------------------- |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
Readonly::Scalar my $NUM_CAPTURES_FOR_GLOBAL => 100; # arbitrarily large number |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
sub violates { |
56
|
0
|
|
|
0
|
1
|
|
my ( $self, $elem, $doc ) = @_; |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# optimization: don't bother parsing the regexp if there are no parens |
59
|
0
|
0
|
|
|
|
|
return if 0 > index $elem->content(), '('; |
60
|
|
|
|
|
|
|
|
61
|
0
|
0
|
|
|
|
|
my $re = $doc->ppix_regexp_from_element( $elem ) or return; |
62
|
0
|
0
|
|
|
|
|
$re->failures() and return; |
63
|
|
|
|
|
|
|
|
64
|
0
|
0
|
|
|
|
|
my $ncaptures = $re->max_capture_number() or return; |
65
|
|
|
|
|
|
|
|
66
|
0
|
|
|
|
|
|
my @captures = ( undef ) x $ncaptures; # List of expected captures |
67
|
|
|
|
|
|
|
|
68
|
0
|
|
|
|
|
|
my %named_captures; # List of expected named captures. |
69
|
|
|
|
|
|
|
# Unlike the numbered capture logic, %named_captures |
70
|
|
|
|
|
|
|
# entries are made undefined when a use of the name is |
71
|
|
|
|
|
|
|
# found. Otherwise two hashes would be needed, one to |
72
|
|
|
|
|
|
|
# become defined when a use is found, and one to hold |
73
|
|
|
|
|
|
|
# the mapping of name to number. |
74
|
0
|
0
|
|
|
|
|
foreach my $struct ( @{ $re->find( 'PPIx::Regexp::Structure::NamedCapture' |
|
0
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
) || [] } ) { |
76
|
|
|
|
|
|
|
# There can be more than one capture with the same name, so we need to |
77
|
|
|
|
|
|
|
# record all of them. There will be duplications if the 'branch reset' |
78
|
|
|
|
|
|
|
# "(?| ... )" pattern is used, but this is benign given how numbered |
79
|
|
|
|
|
|
|
# captures are recorded. |
80
|
0
|
|
0
|
|
|
|
push @{ $named_captures{ $struct->name() } ||= [] }, $struct->number(); |
|
0
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
# Look for references to the capture in the regex itself |
84
|
0
|
0
|
|
|
|
|
return if _enough_uses_in_regexp( $re, \@captures, \%named_captures, $doc ); |
85
|
|
|
|
|
|
|
|
86
|
0
|
0
|
0
|
|
|
|
if ( $re->modifier_asserted( 'g' ) |
87
|
|
|
|
|
|
|
and not _check_if_in_while_condition_or_block( $elem ) ) { |
88
|
0
|
|
|
|
|
|
$ncaptures = $NUM_CAPTURES_FOR_GLOBAL; |
89
|
0
|
|
|
|
|
|
$#captures = $ncaptures - 1; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
0
|
0
|
0
|
|
|
|
return if _enough_assignments($elem, \@captures) && !%named_captures; |
93
|
0
|
0
|
0
|
|
|
|
return if _is_in_slurpy_array_context($elem) && !%named_captures; |
94
|
0
|
0
|
|
|
|
|
return if _enough_magic($elem, $re, \@captures, \%named_captures, $doc); |
95
|
|
|
|
|
|
|
|
96
|
0
|
|
|
|
|
|
return $self->violation( $DESC, $EXPL, $elem ); |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
# Find uses of both numbered and named capture variables in the regexp itself. |
100
|
|
|
|
|
|
|
# Return true if all are used. |
101
|
|
|
|
|
|
|
sub _enough_uses_in_regexp { |
102
|
0
|
|
|
0
|
|
|
my ( $re, $captures, $named_captures, $doc ) = @_; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# Look for references to the capture in the regex itself. Note that this |
105
|
|
|
|
|
|
|
# will also find backreferences in the replacement string of s///. |
106
|
0
|
0
|
|
|
|
|
foreach my $token ( @{ $re->find( 'PPIx::Regexp::Token::Reference' ) |
|
0
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
|| [] } ) { |
108
|
0
|
0
|
|
|
|
|
if ( $token->is_named() ) { |
109
|
0
|
|
|
|
|
|
_record_named_capture( $token->name(), $captures, $named_captures ); |
110
|
|
|
|
|
|
|
} else { |
111
|
0
|
|
|
|
|
|
_record_numbered_capture( $token->absolute(), $captures ); |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
0
|
0
|
|
|
|
|
foreach my $token ( @{ $re->find( |
|
0
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Code' ) || [] } ) { |
117
|
0
|
0
|
|
|
|
|
my $ppi = $token->ppi() or next; |
118
|
0
|
|
|
|
|
|
_check_node_children( $ppi, { |
119
|
|
|
|
|
|
|
regexp => $re, |
120
|
|
|
|
|
|
|
numbered_captures => $captures, |
121
|
|
|
|
|
|
|
named_captures => $named_captures, |
122
|
|
|
|
|
|
|
document => $doc, |
123
|
|
|
|
|
|
|
}, _make_regexp_checker() ); |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
0
|
|
|
0
|
|
|
return ( none {not defined} @{$captures} ) |
127
|
|
|
|
|
|
|
&& ( !%{$named_captures} || |
128
|
0
|
|
0
|
0
|
|
|
none {defined} values %{$named_captures} ); |
|
0
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
sub _enough_assignments { |
132
|
0
|
|
|
0
|
|
|
my ($elem, $captures) = @_; |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# look backward for the assignment operator |
135
|
0
|
|
|
|
|
|
my $psib = $elem->sprevious_sibling; |
136
|
|
|
|
|
|
|
SIBLING: |
137
|
0
|
|
|
|
|
|
while (1) { |
138
|
0
|
0
|
|
|
|
|
return if !$psib; |
139
|
0
|
0
|
|
|
|
|
if ($psib->isa('PPI::Token::Operator')) { |
140
|
0
|
0
|
|
|
|
|
last SIBLING if q{=} eq $psib->content; |
141
|
0
|
0
|
|
|
|
|
return if q{!~} eq $psib->content; |
142
|
|
|
|
|
|
|
} |
143
|
0
|
|
|
|
|
|
$psib = $psib->sprevious_sibling; |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
0
|
|
|
|
|
|
$psib = $psib->sprevious_sibling; |
147
|
0
|
0
|
|
|
|
|
return if !$psib; # syntax error: '=' at the beginning of a statement??? |
148
|
|
|
|
|
|
|
|
149
|
0
|
0
|
|
|
|
|
if ($psib->isa('PPI::Token::Symbol')) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
150
|
|
|
|
|
|
|
# @foo = m/(foo)/ |
151
|
|
|
|
|
|
|
# @$foo = m/(foo)/ |
152
|
|
|
|
|
|
|
# %foo = m/(foo)/ |
153
|
|
|
|
|
|
|
# %$foo = m/(foo)/ |
154
|
0
|
0
|
|
|
|
|
return $TRUE if _symbol_is_slurpy($psib); |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
} elsif ($psib->isa('PPI::Structure::Block')) { |
157
|
|
|
|
|
|
|
# @{$foo} = m/(foo)/ |
158
|
|
|
|
|
|
|
# %{$foo} = m/(foo)/ |
159
|
0
|
0
|
|
|
|
|
return $TRUE if _is_preceded_by_array_or_hash_cast($psib); |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
} elsif ($psib->isa('PPI::Structure::List')) { |
162
|
|
|
|
|
|
|
# () = m/(foo)/ |
163
|
|
|
|
|
|
|
# ($foo) = m/(foo)/ |
164
|
|
|
|
|
|
|
# ($foo,$bar) = m/(foo)(bar)/ |
165
|
|
|
|
|
|
|
# (@foo) = m/(foo)(bar)/ |
166
|
|
|
|
|
|
|
# ($foo,@foo) = m/(foo)(bar)/ |
167
|
|
|
|
|
|
|
# ($foo,@$foo) = m/(foo)(bar)/ |
168
|
|
|
|
|
|
|
# ($foo,@{$foo}) = m/(foo)(bar)/ |
169
|
|
|
|
|
|
|
|
170
|
0
|
|
|
|
|
|
my @args = $psib->schildren; |
171
|
0
|
0
|
|
|
|
|
return $TRUE if not @args; # empty list (perhaps the "goatse" operator) is slurpy |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
# Forward looking: PPI might change in v1.200 so schild(0) is a |
174
|
|
|
|
|
|
|
# PPI::Statement::Expression. |
175
|
0
|
0
|
0
|
|
|
|
if ( 1 == @args && $args[0]->isa('PPI::Statement::Expression') ) { |
176
|
0
|
|
|
|
|
|
@args = $args[0]->schildren; |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
|
179
|
0
|
|
|
|
|
|
my @parts = split_nodes_on_comma(@args); |
180
|
|
|
|
|
|
|
PART: |
181
|
0
|
|
|
|
|
|
for my $i (0 .. $#parts) { |
182
|
0
|
0
|
|
|
|
|
if (1 == @{$parts[$i]}) { |
|
0
|
|
|
|
|
|
|
183
|
0
|
|
|
|
|
|
my $var = $parts[$i]->[0]; |
184
|
0
|
0
|
0
|
|
|
|
if ($var->isa('PPI::Token::Symbol') || $var->isa('PPI::Token::Cast')) { |
185
|
0
|
0
|
|
|
|
|
return $TRUE if _has_array_sigil($var); |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
} |
188
|
0
|
|
|
|
|
|
_record_numbered_capture( $i + 1, $captures ); |
189
|
|
|
|
|
|
|
# ith variable capture |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
0
|
|
|
0
|
|
|
return none {not defined} @{$captures}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
sub _symbol_is_slurpy { |
197
|
0
|
|
|
0
|
|
|
my ($symbol) = @_; |
198
|
|
|
|
|
|
|
|
199
|
0
|
0
|
|
|
|
|
return $TRUE if _has_array_sigil($symbol); |
200
|
0
|
0
|
|
|
|
|
return $TRUE if _has_hash_sigil($symbol); |
201
|
0
|
0
|
|
|
|
|
return $TRUE if _is_preceded_by_array_or_hash_cast($symbol); |
202
|
0
|
|
|
|
|
|
return; |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
sub _has_array_sigil { |
206
|
0
|
|
|
0
|
|
|
my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast |
207
|
|
|
|
|
|
|
|
208
|
0
|
|
|
|
|
|
return q{@} eq substr $elem->content, 0, 1; |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
sub _has_hash_sigil { |
212
|
0
|
|
|
0
|
|
|
my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast |
213
|
|
|
|
|
|
|
|
214
|
0
|
|
|
|
|
|
return q{%} eq substr $elem->content, 0, 1; |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
sub _is_preceded_by_array_or_hash_cast { |
218
|
0
|
|
|
0
|
|
|
my ($elem) = @_; |
219
|
0
|
|
|
|
|
|
my $psib = $elem->sprevious_sibling; |
220
|
0
|
|
|
|
|
|
my $cast; |
221
|
0
|
|
0
|
|
|
|
while ($psib && $psib->isa('PPI::Token::Cast')) { |
222
|
0
|
|
|
|
|
|
$cast = $psib; |
223
|
0
|
|
|
|
|
|
$psib = $psib->sprevious_sibling; |
224
|
|
|
|
|
|
|
} |
225
|
0
|
0
|
|
|
|
|
return if !$cast; |
226
|
0
|
|
|
|
|
|
my $sigil = substr $cast->content, 0, 1; |
227
|
0
|
|
0
|
|
|
|
return q{@} eq $sigil || q{%} eq $sigil; |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
sub _is_in_slurpy_array_context { |
231
|
0
|
|
|
0
|
|
|
my ($elem) = @_; |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
# return true is the result of the regexp is passed to a subroutine. |
234
|
|
|
|
|
|
|
# doesn't check for array context due to assignment. |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
# look backward for explicit regex operator |
237
|
0
|
|
|
|
|
|
my $psib = $elem->sprevious_sibling; |
238
|
0
|
0
|
0
|
|
|
|
if ($psib && $psib->content eq q{=~}) { |
239
|
|
|
|
|
|
|
# Track back through value |
240
|
0
|
|
|
|
|
|
$psib = _skip_lhs($psib); |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
0
|
0
|
|
|
|
|
if (!$psib) { |
244
|
0
|
|
|
|
|
|
my $parent = $elem->parent; |
245
|
0
|
0
|
|
|
|
|
return if !$parent; |
246
|
0
|
0
|
|
|
|
|
if ($parent->isa('PPI::Statement')) { |
247
|
0
|
|
|
|
|
|
$parent = $parent->parent; |
248
|
0
|
0
|
|
|
|
|
return if !$parent; |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
# Return true if we have a list that isn't part of a foreach loop. |
252
|
|
|
|
|
|
|
# TECHNICAL DEBT: This code is basically shared with |
253
|
|
|
|
|
|
|
# RequireCheckingReturnValueOfEval. I don't want to put this code |
254
|
|
|
|
|
|
|
# into Perl::Critic::Utils::*, but I don't have time to sort out |
255
|
|
|
|
|
|
|
# PPIx::Utilities::Structure::List yet. |
256
|
0
|
0
|
|
|
|
|
if ( $parent->isa('PPI::Structure::List') ) { |
257
|
0
|
0
|
|
|
|
|
my $parent_statement = $parent->statement() or return $TRUE; |
258
|
0
|
0
|
|
|
|
|
return $TRUE if not |
259
|
|
|
|
|
|
|
$parent_statement->isa('PPI::Statement::Compound'); |
260
|
0
|
0
|
|
|
|
|
return $TRUE if $parent_statement->type() ne 'foreach'; |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
|
263
|
0
|
0
|
|
|
|
|
return $TRUE if $parent->isa('PPI::Structure::Constructor'); |
264
|
0
|
0
|
|
|
|
|
if ($parent->isa('PPI::Structure::Block')) { |
265
|
0
|
0
|
|
|
|
|
return $TRUE |
266
|
|
|
|
|
|
|
if |
267
|
|
|
|
|
|
|
refaddr($elem->statement) |
268
|
|
|
|
|
|
|
eq refaddr([$parent->schildren]->[-1]); |
269
|
|
|
|
|
|
|
} |
270
|
0
|
|
|
|
|
|
return; |
271
|
|
|
|
|
|
|
} |
272
|
0
|
0
|
|
|
|
|
if ($psib->isa('PPI::Token::Operator')) { |
273
|
|
|
|
|
|
|
# Most operators kill slurpiness (except assignment, which is handled elsewhere). |
274
|
0
|
|
|
|
|
|
return q{,} eq $psib->content; |
275
|
|
|
|
|
|
|
} |
276
|
0
|
|
|
|
|
|
return $TRUE; |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
sub _skip_lhs { |
280
|
0
|
|
|
0
|
|
|
my ($elem) = @_; |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
# TODO: better implementation to handle casts, expressions, subcalls, etc. |
283
|
0
|
|
|
|
|
|
$elem = $elem->sprevious_sibling(); |
284
|
|
|
|
|
|
|
|
285
|
0
|
|
|
|
|
|
return $elem; |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
sub _enough_magic { |
289
|
0
|
|
|
0
|
|
|
my ($elem, $re, $captures, $named_captures, $doc) = @_; |
290
|
|
|
|
|
|
|
|
291
|
0
|
|
|
|
|
|
_check_for_magic($elem, $re, $captures, $named_captures, $doc); |
292
|
|
|
|
|
|
|
|
293
|
0
|
|
|
0
|
|
|
return ( none {not defined} @{$captures} ) |
294
|
|
|
|
|
|
|
&& ( !%{$named_captures} || |
295
|
0
|
|
0
|
0
|
|
|
none {defined} values %{$named_captures} ); |
|
0
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
# void return |
299
|
|
|
|
|
|
|
sub _check_for_magic { |
300
|
0
|
|
|
0
|
|
|
my ($elem, $re, $captures, $named_captures, $doc) = @_; |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
# Search for $1..$9 in : |
303
|
|
|
|
|
|
|
# * the rest of this statement |
304
|
|
|
|
|
|
|
# * subsequent sibling statements |
305
|
|
|
|
|
|
|
# * if this is in a conditional boolean, the if/else bodies of the conditional |
306
|
|
|
|
|
|
|
# * if this is in a while/for condition, the loop body |
307
|
|
|
|
|
|
|
# But NO intervening regexps! |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
# Package up the usual arguments for _check_rest_of_statement(). |
310
|
0
|
|
|
|
|
|
my $arg = { |
311
|
|
|
|
|
|
|
regexp => $re, |
312
|
|
|
|
|
|
|
numbered_captures => $captures, |
313
|
|
|
|
|
|
|
named_captures => $named_captures, |
314
|
|
|
|
|
|
|
document => $doc, |
315
|
|
|
|
|
|
|
}; |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
# Capture whether or not the regular expression is negated -- that |
318
|
|
|
|
|
|
|
# is, whether it is preceded by the '!~' binding operator. |
319
|
0
|
0
|
|
|
|
|
if ( my $prior_token = $elem->sprevious_sibling() ) { |
320
|
0
|
|
0
|
|
|
|
$arg->{negated} = $prior_token->isa( 'PPI::Token::Operator' ) && |
321
|
|
|
|
|
|
|
q<!~> eq $prior_token->content(); |
322
|
|
|
|
|
|
|
} |
323
|
|
|
|
|
|
|
|
324
|
0
|
0
|
|
|
|
|
return if ! _check_rest_of_statement( $elem, $arg ); |
325
|
|
|
|
|
|
|
|
326
|
0
|
|
|
|
|
|
my $parent = $elem->parent(); |
327
|
0
|
|
0
|
|
|
|
while ($parent && ! $parent->isa('PPI::Statement::Sub')) { |
328
|
0
|
0
|
|
|
|
|
return if ! _check_rest_of_statement( $parent, $arg ); |
329
|
0
|
|
|
|
|
|
$parent = $parent->parent(); |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
|
332
|
0
|
|
|
|
|
|
return; |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
# Check if we are in the condition or block of a 'while' |
336
|
|
|
|
|
|
|
sub _check_if_in_while_condition_or_block { |
337
|
0
|
|
|
0
|
|
|
my ( $elem ) = @_; |
338
|
0
|
0
|
|
|
|
|
$elem or return; |
339
|
|
|
|
|
|
|
|
340
|
0
|
0
|
|
|
|
|
my $parent = $elem->parent() or return; |
341
|
0
|
0
|
|
|
|
|
$parent->isa( 'PPI::Statement' ) or return; |
342
|
|
|
|
|
|
|
|
343
|
0
|
0
|
|
|
|
|
my $item = $parent = $parent->parent() or return; |
344
|
0
|
0
|
|
|
|
|
if ( $item->isa( 'PPI::Structure::Block' ) ) { |
345
|
0
|
0
|
|
|
|
|
$item = $item->sprevious_sibling() or return; |
346
|
|
|
|
|
|
|
} |
347
|
0
|
0
|
|
|
|
|
$item->isa( 'PPI::Structure::Condition' ) or return; |
348
|
|
|
|
|
|
|
|
349
|
0
|
0
|
|
|
|
|
$item = $item->sprevious_sibling() or return; |
350
|
0
|
0
|
|
|
|
|
$item->isa( 'PPI::Token::Word' ) or return; |
351
|
|
|
|
|
|
|
|
352
|
0
|
|
|
|
|
|
return $WHILE eq $item->content(); |
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
{ |
356
|
|
|
|
|
|
|
# Shortcut operators '||', '//', and 'or' can cause everything after |
357
|
|
|
|
|
|
|
# them to be skipped. 'and' trumps '||' and '//', and causes things |
358
|
|
|
|
|
|
|
# to be evaluated again. The value is true to skip, false to cancel |
359
|
|
|
|
|
|
|
# skipping. |
360
|
|
|
|
|
|
|
Readonly::Hash my %SHORTCUT_OPERATOR => ( |
361
|
|
|
|
|
|
|
q<||> => $FALSE, |
362
|
|
|
|
|
|
|
q<//> => $FALSE, |
363
|
|
|
|
|
|
|
and => $TRUE, |
364
|
|
|
|
|
|
|
or => $FALSE, |
365
|
|
|
|
|
|
|
); |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
# RT #38942 |
368
|
|
|
|
|
|
|
# The issue in the ticket is that in something like |
369
|
|
|
|
|
|
|
# if ( /(a)/ || /(b)/ ) { |
370
|
|
|
|
|
|
|
# say $1 |
371
|
|
|
|
|
|
|
# } |
372
|
|
|
|
|
|
|
# the capture variable can come from either /(a)/ or /(b)/. If we |
373
|
|
|
|
|
|
|
# don't take into account the short-cutting nature of the '||' we |
374
|
|
|
|
|
|
|
# erroneously conclude that the capture in /(a)/ is not used. So we |
375
|
|
|
|
|
|
|
# need to skip every regular expression after an alternation. |
376
|
|
|
|
|
|
|
# |
377
|
|
|
|
|
|
|
# The trick is that we want to still mark magic variables, because |
378
|
|
|
|
|
|
|
# of code like |
379
|
|
|
|
|
|
|
# my $foo = $1 || $2; |
380
|
|
|
|
|
|
|
# so we can't just ignore everything after an alternation. |
381
|
|
|
|
|
|
|
# |
382
|
|
|
|
|
|
|
# To do all this correctly, we have to track precedence, and start |
383
|
|
|
|
|
|
|
# paying attention again if an 'and' is found after a '||'. |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
# Subroutine _make_regexp_checker() manufactures a snippet of code |
386
|
|
|
|
|
|
|
# which is used to track regular expressions. It takes one optional |
387
|
|
|
|
|
|
|
# argument, which is the snippet used to track the parent object's |
388
|
|
|
|
|
|
|
# regular expressions. |
389
|
|
|
|
|
|
|
# |
390
|
|
|
|
|
|
|
# The snippet is passed each token encountered, and returns true if |
391
|
|
|
|
|
|
|
# the scan for capture variables is to be stopped. This will happen |
392
|
|
|
|
|
|
|
# if the token is a regular expression which is _not_ to the right |
393
|
|
|
|
|
|
|
# of an alternation operator ('||', '//', or 'or'), or it _is_ to |
394
|
|
|
|
|
|
|
# the right of an 'and', without an intervening alternation |
395
|
|
|
|
|
|
|
# operator. |
396
|
|
|
|
|
|
|
# |
397
|
|
|
|
|
|
|
# If _make_regexp_checker() was passed a snippet which |
398
|
|
|
|
|
|
|
# returns false on encountering a regular expression, the returned |
399
|
|
|
|
|
|
|
# snippet always returns false, for the benefit of code like |
400
|
|
|
|
|
|
|
# /(a)/ || ( /(b)/ || /(c)/ ). |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
sub _make_regexp_checker { |
403
|
0
|
|
|
0
|
|
|
my ( $parent ) = @_; |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
$parent |
406
|
|
|
|
|
|
|
and not $parent->() |
407
|
0
|
0
|
0
|
0
|
|
|
and return sub { return $FALSE }; |
|
0
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
|
409
|
0
|
|
|
|
|
|
my $check = $TRUE; |
410
|
0
|
|
|
|
|
|
my $precedence = 0; |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
return sub { |
413
|
0
|
|
|
0
|
|
|
my ( $elem ) = @_; |
414
|
|
|
|
|
|
|
|
415
|
0
|
0
|
|
|
|
|
$elem or return $check; |
416
|
|
|
|
|
|
|
|
417
|
0
|
0
|
|
|
|
|
if ( $elem->isa( 'PPI::Token::Regexp' ) ) { |
418
|
0
|
0
|
|
|
|
|
return _regexp_is_in_split( $elem ) ? $FALSE : $check; |
419
|
|
|
|
|
|
|
} |
420
|
|
|
|
|
|
|
|
421
|
0
|
0
|
0
|
|
|
|
if ( $elem->isa( 'PPI::Token::Structure' ) |
422
|
|
|
|
|
|
|
&& q<;> eq $elem->content() ) { |
423
|
0
|
|
|
|
|
|
$check = $TRUE; |
424
|
0
|
|
|
|
|
|
$precedence = 0; |
425
|
0
|
|
|
|
|
|
return $FALSE; |
426
|
|
|
|
|
|
|
} |
427
|
|
|
|
|
|
|
|
428
|
0
|
0
|
|
|
|
|
$elem->isa( 'PPI::Token::Operator' ) |
429
|
|
|
|
|
|
|
or return $FALSE; |
430
|
|
|
|
|
|
|
|
431
|
0
|
|
|
|
|
|
my $content = $elem->content(); |
432
|
0
|
0
|
|
|
|
|
defined( my $oper_check = $SHORTCUT_OPERATOR{$content} ) |
433
|
|
|
|
|
|
|
or return $FALSE; |
434
|
|
|
|
|
|
|
|
435
|
0
|
|
|
|
|
|
my $oper_precedence = precedence_of( $content ); |
436
|
0
|
0
|
|
|
|
|
$oper_precedence >= $precedence |
437
|
|
|
|
|
|
|
or return $FALSE; |
438
|
|
|
|
|
|
|
|
439
|
0
|
|
|
|
|
|
$precedence = $oper_precedence; |
440
|
0
|
|
|
|
|
|
$check = $oper_check; |
441
|
|
|
|
|
|
|
|
442
|
0
|
|
|
|
|
|
return $FALSE; |
443
|
0
|
|
|
|
|
|
}; |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
} |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
# Argument is regexp. |
448
|
|
|
|
|
|
|
# True if it is the regexp in a split() |
449
|
|
|
|
|
|
|
sub _regexp_is_in_split { |
450
|
0
|
|
|
0
|
|
|
my ( $elem ) = @_; |
451
|
|
|
|
|
|
|
|
452
|
0
|
|
|
|
|
|
my $prev; |
453
|
0
|
0
|
|
|
|
|
if ( ! ( $prev = $elem->sprevious_sibling() ) ) { |
454
|
|
|
|
|
|
|
# Maybe we have split( /.../, ... ) |
455
|
0
|
0
|
|
|
|
|
my $stmt = $elem->statement() |
456
|
|
|
|
|
|
|
or return $FALSE; |
457
|
0
|
0
|
|
|
|
|
$stmt->parent() |
458
|
|
|
|
|
|
|
or return $FALSE; |
459
|
0
|
0
|
|
|
|
|
$prev = $elem->sprevious_sibling() |
460
|
|
|
|
|
|
|
or return $FALSE; |
461
|
|
|
|
|
|
|
} |
462
|
0
|
|
0
|
|
|
|
return $prev->isa( 'PPI::Token::Word' ) && $SPLIT eq $prev->content(); |
463
|
|
|
|
|
|
|
} |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
# false if we hit another regexp |
467
|
|
|
|
|
|
|
# The arguments are: |
468
|
|
|
|
|
|
|
# $elem - The PPI::Element whose siblings are to be checked; |
469
|
|
|
|
|
|
|
# $arg - A hash reference containing the following keys: |
470
|
|
|
|
|
|
|
# regexp => the relevant PPIx::Regexp object; |
471
|
|
|
|
|
|
|
# numbered_captures => a reference to the array used to track the |
472
|
|
|
|
|
|
|
# use of numbered captures; |
473
|
|
|
|
|
|
|
# named_captures => a reference to the hash used to track the |
474
|
|
|
|
|
|
|
# use of named captures; |
475
|
|
|
|
|
|
|
# negated => true if the regexp was bound to its target with the |
476
|
|
|
|
|
|
|
# '!~' operator; |
477
|
|
|
|
|
|
|
# document => a reference to the Perl::Critic::Document; |
478
|
|
|
|
|
|
|
# Converted to passing the arguments everyone gets in a hash because of |
479
|
|
|
|
|
|
|
# the need to add the 'negated' argument, which would put us at six |
480
|
|
|
|
|
|
|
# arguments. |
481
|
|
|
|
|
|
|
sub _check_rest_of_statement { |
482
|
0
|
|
|
0
|
|
|
my ( $elem, $arg ) = @_; |
483
|
|
|
|
|
|
|
|
484
|
0
|
|
|
|
|
|
my $checker = _make_regexp_checker(); |
485
|
0
|
|
|
|
|
|
my $nsib = $elem->snext_sibling; |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
# If we are an if (or elsif) and the result of the regexp is |
488
|
|
|
|
|
|
|
# negated, we skip the first block found. RT #69867 |
489
|
0
|
0
|
0
|
|
|
|
if ( $arg->{negated} && _is_condition_of_if_statement( $elem ) ) { |
490
|
0
|
|
0
|
|
|
|
while ( $nsib && ! $nsib->isa( 'PPI::Structure::Block' ) ) { |
491
|
0
|
|
|
|
|
|
$nsib = $nsib->snext_sibling(); |
492
|
|
|
|
|
|
|
} |
493
|
0
|
0
|
|
|
|
|
$nsib and $nsib = $nsib->snext_sibling(); |
494
|
|
|
|
|
|
|
} |
495
|
|
|
|
|
|
|
|
496
|
0
|
|
|
|
|
|
while ($nsib) { |
497
|
0
|
0
|
|
|
|
|
return if $checker->($nsib); |
498
|
0
|
0
|
|
|
|
|
if ($nsib->isa('PPI::Node')) { |
499
|
0
|
0
|
|
|
|
|
return if ! _check_node_children($nsib, $arg, $checker ); |
500
|
|
|
|
|
|
|
} else { |
501
|
|
|
|
|
|
|
_mark_magic( $nsib, $arg->{regexp}, $arg->{numbered_captures}, |
502
|
0
|
|
|
|
|
|
$arg->{named_captures}, $arg->{document} ); |
503
|
|
|
|
|
|
|
} |
504
|
0
|
|
|
|
|
|
$nsib = $nsib->snext_sibling; |
505
|
|
|
|
|
|
|
} |
506
|
0
|
|
|
|
|
|
return $TRUE; |
507
|
|
|
|
|
|
|
} |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
{ |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
Readonly::Hash my %IS_IF_STATEMENT => hashify( qw{ if elsif } ); |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
# Return true if the argument is the condition of an if or elsif |
514
|
|
|
|
|
|
|
# statement, otherwise return false. |
515
|
|
|
|
|
|
|
sub _is_condition_of_if_statement { |
516
|
0
|
|
|
0
|
|
|
my ( $elem ) = @_; |
517
|
0
|
0
|
0
|
|
|
|
$elem |
518
|
|
|
|
|
|
|
and $elem->isa( 'PPI::Structure::Condition' ) |
519
|
|
|
|
|
|
|
or return $FALSE; |
520
|
0
|
0
|
|
|
|
|
my $psib = $elem->sprevious_sibling() |
521
|
|
|
|
|
|
|
or return $FALSE; |
522
|
0
|
0
|
|
|
|
|
$psib->isa( 'PPI::Token::Word' ) |
523
|
|
|
|
|
|
|
or return $FALSE; |
524
|
0
|
|
|
|
|
|
return $IS_IF_STATEMENT{ $psib->content() }; |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
} |
527
|
|
|
|
|
|
|
} |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
# false if we hit another regexp |
530
|
|
|
|
|
|
|
# The arguments are: |
531
|
|
|
|
|
|
|
# $elem - The PPI::Node whose children are to be checked; |
532
|
|
|
|
|
|
|
# $arg - A hash reference containing the following keys: |
533
|
|
|
|
|
|
|
# regexp => the relevant PPIx::Regexp object; |
534
|
|
|
|
|
|
|
# numbered_captures => a reference to the array used to track the |
535
|
|
|
|
|
|
|
# use of numbered captures; |
536
|
|
|
|
|
|
|
# named_captures => a reference to the hash used to track the |
537
|
|
|
|
|
|
|
# use of named captures; |
538
|
|
|
|
|
|
|
# document => a reference to the Perl::Critic::Document; |
539
|
|
|
|
|
|
|
# $parent_checker - The parent's regexp checking code snippet, |
540
|
|
|
|
|
|
|
# manufactured by _make_regexp_checker(). This argument is not in |
541
|
|
|
|
|
|
|
# the $arg hash because that hash is shared among levels of the |
542
|
|
|
|
|
|
|
# parse tree, whereas the regexp checker is not. |
543
|
|
|
|
|
|
|
# TODO the things in the $arg hash are widely shared among the various |
544
|
|
|
|
|
|
|
# pieces/parts of this policy; maybe more subroutines should use this |
545
|
|
|
|
|
|
|
# hash rather than passing all this stuff around as individual |
546
|
|
|
|
|
|
|
# arguments. This particular subroutine got the hash-reference treatment |
547
|
|
|
|
|
|
|
# because Subroutines::ProhibitManyArgs started complaining when the |
548
|
|
|
|
|
|
|
# checker argument was added. |
549
|
|
|
|
|
|
|
sub _check_node_children { |
550
|
0
|
|
|
0
|
|
|
my ($elem, $arg, $parent_checker) = @_; |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
# caveat: this will descend into subroutine definitions... |
553
|
|
|
|
|
|
|
|
554
|
0
|
|
|
|
|
|
my $checker = _make_regexp_checker($parent_checker); |
555
|
0
|
|
|
|
|
|
for my $child ($elem->schildren) { |
556
|
0
|
0
|
|
|
|
|
return if $checker->($child); |
557
|
0
|
0
|
|
|
|
|
if ($child->isa('PPI::Node')) { |
558
|
0
|
0
|
|
|
|
|
return if ! _check_node_children($child, $arg, $checker); |
559
|
|
|
|
|
|
|
} else { |
560
|
|
|
|
|
|
|
_mark_magic($child, $arg->{regexp}, |
561
|
|
|
|
|
|
|
$arg->{numbered_captures}, $arg->{named_captures}, |
562
|
0
|
|
|
|
|
|
$arg->{document}); |
563
|
|
|
|
|
|
|
} |
564
|
|
|
|
|
|
|
} |
565
|
0
|
|
|
|
|
|
return $TRUE; |
566
|
|
|
|
|
|
|
} |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
sub _mark_magic { |
569
|
0
|
|
|
0
|
|
|
my ($elem, $re, $captures, $named_captures, $doc) = @_; |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
# If we're a double-quotish element, we need to grub through its |
572
|
|
|
|
|
|
|
# content. RT #38942 |
573
|
0
|
0
|
|
|
|
|
if ( _is_double_quotish_element( $elem ) ) { |
574
|
0
|
|
|
|
|
|
_mark_magic_in_content( |
575
|
|
|
|
|
|
|
$elem->content(), $re, $captures, $named_captures, $doc ); |
576
|
0
|
|
|
|
|
|
return; |
577
|
|
|
|
|
|
|
} |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
# Ditto a here document, though the logic is different. RT #38942 |
580
|
0
|
0
|
|
|
|
|
if ( $elem->isa( 'PPI::Token::HereDoc' ) ) { |
581
|
0
|
0
|
|
|
|
|
$elem->content() =~ m/ \A << ~? \s* ' /sxm |
582
|
|
|
|
|
|
|
or _mark_magic_in_content( |
583
|
|
|
|
|
|
|
join( $EMPTY, $elem->heredoc() ), $re, $captures, |
584
|
|
|
|
|
|
|
$named_captures, $doc ); |
585
|
0
|
|
|
|
|
|
return; |
586
|
|
|
|
|
|
|
} |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
# Only interested in magic, or known English equivalent. |
589
|
0
|
|
|
|
|
|
my $content = $elem->content(); |
590
|
0
|
0
|
|
|
|
|
my ( $capture_ref, $capture_array ) = $doc->uses_module( 'English' ) ? |
591
|
|
|
|
|
|
|
( \%CAPTURE_REFERENCE_ENGLISH, \%CAPTURE_ARRAY_ENGLISH ) : |
592
|
|
|
|
|
|
|
( \%CAPTURE_REFERENCE, \%CAPTURE_ARRAY ); |
593
|
|
|
|
|
|
|
$elem->isa( 'PPI::Token::Magic' ) |
594
|
0
|
0
|
0
|
|
|
|
or $capture_ref->{$content} |
595
|
|
|
|
|
|
|
or return; |
596
|
|
|
|
|
|
|
|
597
|
0
|
0
|
|
|
|
|
if ( $content =~ m/ \A \$ ( \d+ ) /xms ) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
# Record if we see $1, $2, $3, ... |
600
|
0
|
|
|
|
|
|
my $num = $1; |
601
|
0
|
0
|
|
|
|
|
if (0 < $num) { # don't mark $0 |
602
|
|
|
|
|
|
|
# Only mark the captures we really need -- don't mark superfluous magic vars |
603
|
0
|
0
|
|
|
|
|
if ($num <= @{$captures}) { |
|
0
|
|
|
|
|
|
|
604
|
0
|
|
|
|
|
|
_record_numbered_capture( $num, $captures ); |
605
|
|
|
|
|
|
|
} |
606
|
|
|
|
|
|
|
} |
607
|
|
|
|
|
|
|
} elsif ( $capture_array->{$content} ) { # GitHub #778 |
608
|
0
|
|
|
|
|
|
foreach my $num ( 1 .. @{$captures} ) { |
|
0
|
|
|
|
|
|
|
609
|
0
|
|
|
|
|
|
_record_numbered_capture( $num, $captures ); |
610
|
|
|
|
|
|
|
} |
611
|
|
|
|
|
|
|
} elsif ( $capture_ref->{$content} ) { |
612
|
0
|
|
|
|
|
|
_mark_magic_subscripted_code( $elem, $re, $captures, $named_captures ); |
613
|
|
|
|
|
|
|
} |
614
|
0
|
|
|
|
|
|
return; |
615
|
|
|
|
|
|
|
} |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
# Record a named capture referenced by a hash or array found in code. |
618
|
|
|
|
|
|
|
# The arguments are: |
619
|
|
|
|
|
|
|
# $elem - The element that represents a subscripted capture variable; |
620
|
|
|
|
|
|
|
# $re - The PPIx::Regexp object; |
621
|
|
|
|
|
|
|
# $captures - A reference to the numbered capture array; |
622
|
|
|
|
|
|
|
# $named_captures - A reference to the named capture hash. |
623
|
|
|
|
|
|
|
sub _mark_magic_subscripted_code { |
624
|
0
|
|
|
0
|
|
|
my ( $elem, $re, $captures, $named_captures ) = @_; |
625
|
0
|
0
|
|
|
|
|
my $subscr = $elem->snext_sibling() or return; |
626
|
0
|
0
|
|
|
|
|
$subscr->isa( 'PPI::Structure::Subscript' ) or return; |
627
|
0
|
|
|
|
|
|
my $subval = $subscr->content(); |
628
|
0
|
|
|
|
|
|
_record_subscripted_capture( |
629
|
|
|
|
|
|
|
$elem->content(), $subval, $re, $captures, $named_captures ); |
630
|
0
|
|
|
|
|
|
return; |
631
|
|
|
|
|
|
|
} |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
# Find capture variables in the content of a double-quotish thing, and |
634
|
|
|
|
|
|
|
# record their use. RT #38942. The arguments are: |
635
|
|
|
|
|
|
|
# $content - The content() ( or heredoc() in the case of a here |
636
|
|
|
|
|
|
|
# document) to be analyzed; |
637
|
|
|
|
|
|
|
# $re - The PPIx::Regexp object; |
638
|
|
|
|
|
|
|
# $captures - A reference to the numbered capture array; |
639
|
|
|
|
|
|
|
# $named_captures - A reference to the named capture hash. |
640
|
|
|
|
|
|
|
sub _mark_magic_in_content { |
641
|
0
|
|
|
0
|
|
|
my ( $content, $re, $captures, $named_captures, $doc ) = @_; |
642
|
|
|
|
|
|
|
|
643
|
0
|
0
|
|
|
|
|
my ( $capture_ref, $capture_array ) = $doc->uses_module( 'English' ) ? |
644
|
|
|
|
|
|
|
( \%CAPTURE_REFERENCE_ENGLISH, \%CAPTURE_ARRAY_ENGLISH ) : |
645
|
|
|
|
|
|
|
( \%CAPTURE_REFERENCE, \%CAPTURE_ARRAY ); |
646
|
|
|
|
|
|
|
|
647
|
0
|
|
|
|
|
|
while ( $content =~ m< ( [\$\@] (?: |
648
|
|
|
|
|
|
|
[{] \^? (?: \w+ | . ) [}] | \w+ | . ) ) >sxmg ) { |
649
|
0
|
|
|
|
|
|
my $name = $1; |
650
|
0
|
0
|
|
|
|
|
$name =~ s/ \A ( [\$\@] ) [{] (?! \^ ) /$1/sxm |
651
|
|
|
|
|
|
|
and $name =~ s/ [}] \z //sxm; |
652
|
|
|
|
|
|
|
|
653
|
0
|
0
|
0
|
|
|
|
if ( $name =~ m/ \A \$ ( \d+ ) \z /sxm ) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
654
|
|
|
|
|
|
|
|
655
|
0
|
|
|
|
|
|
my $num = $1; |
656
|
|
|
|
|
|
|
0 < $num |
657
|
0
|
0
|
0
|
|
|
|
and $num <= @{ $captures } |
|
0
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
and _record_numbered_capture( $num, $captures ); |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
} elsif ( $capture_array->{$name} ) { # GitHub #778 |
661
|
0
|
|
|
|
|
|
foreach my $num ( 1 .. @{$captures} ) { |
|
0
|
|
|
|
|
|
|
662
|
0
|
|
|
|
|
|
_record_numbered_capture( $num, $captures ); |
663
|
|
|
|
|
|
|
} |
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
} elsif ( $capture_ref->{$name} && |
666
|
|
|
|
|
|
|
$content =~ m/ \G ( [{] [^}]+ [}] | [[] [^]] []] ) /smxgc ) |
667
|
|
|
|
|
|
|
{ |
668
|
0
|
|
|
|
|
|
_record_subscripted_capture( |
669
|
|
|
|
|
|
|
$name, $1, $re, $captures, $named_captures ); |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
} |
672
|
|
|
|
|
|
|
} |
673
|
0
|
|
|
|
|
|
return; |
674
|
|
|
|
|
|
|
} |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
# Return true if the given element is double-quotish. Always returns |
677
|
|
|
|
|
|
|
# false for a PPI::Token::HereDoc, since they're a different beast. |
678
|
|
|
|
|
|
|
# RT #38942. |
679
|
|
|
|
|
|
|
sub _is_double_quotish_element { |
680
|
0
|
|
|
0
|
|
|
my ( $elem ) = @_; |
681
|
|
|
|
|
|
|
|
682
|
0
|
0
|
|
|
|
|
$elem or return; |
683
|
|
|
|
|
|
|
|
684
|
0
|
|
|
|
|
|
my $content = $elem->content(); |
685
|
|
|
|
|
|
|
|
686
|
0
|
0
|
|
|
|
|
if ( $elem->isa( 'PPI::Token::QuoteLike::Command' ) ) { |
687
|
0
|
|
|
|
|
|
return $content !~ m/ \A qx \s* ' /sxm; |
688
|
|
|
|
|
|
|
} |
689
|
|
|
|
|
|
|
|
690
|
0
|
|
|
|
|
|
foreach my $class ( qw{ |
691
|
|
|
|
|
|
|
PPI::Token::Quote::Double |
692
|
|
|
|
|
|
|
PPI::Token::Quote::Interpolate |
693
|
|
|
|
|
|
|
PPI::Token::QuoteLike::Backtick |
694
|
|
|
|
|
|
|
PPI::Token::QuoteLike::Readline |
695
|
|
|
|
|
|
|
} ) { |
696
|
0
|
0
|
|
|
|
|
$elem->isa( $class ) and return $TRUE; |
697
|
|
|
|
|
|
|
} |
698
|
|
|
|
|
|
|
|
699
|
0
|
|
|
|
|
|
return $FALSE; |
700
|
|
|
|
|
|
|
} |
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
# Record a subscripted capture, either hash dereference or array |
703
|
|
|
|
|
|
|
# dereference. We assume that an array represents a numbered capture and |
704
|
|
|
|
|
|
|
# a hash represents a named capture, since we have to handle (e.g.) both |
705
|
|
|
|
|
|
|
# @+ and %+. |
706
|
|
|
|
|
|
|
sub _record_subscripted_capture { |
707
|
0
|
|
|
0
|
|
|
my ( $variable_name, $suffix, $re, $captures, $named_captures ) = @_; |
708
|
0
|
0
|
|
|
|
|
if ( $suffix =~ m/ \A [{] ( .*? ) [}] /smx ) { |
|
|
0
|
|
|
|
|
|
709
|
0
|
|
|
|
|
|
( my $name = $1 ) =~ s/ \A ( ["'] ) ( .*? ) \1 \z /$2/smx; |
710
|
0
|
|
|
|
|
|
_record_named_capture( $name, $captures, $named_captures ); |
711
|
|
|
|
|
|
|
} elsif ( $suffix =~ m/ \A [[] \s* ( [-+]? \d+ ) \s* []] /smx ) { |
712
|
|
|
|
|
|
|
# GitHub #778 |
713
|
|
|
|
|
|
|
# Mostly capture numbers encountered here are 1-based (e.g. @+, @-). |
714
|
|
|
|
|
|
|
# But @{^CAPTURE} is zero-based, so we need to tweak the subscript |
715
|
|
|
|
|
|
|
# before we record the capture number. |
716
|
0
|
|
|
|
|
|
my $num = $1 + 0; |
717
|
|
|
|
|
|
|
$num >= 0 |
718
|
0
|
0
|
0
|
|
|
|
and $ZERO_BASED_CAPTURE_REFERENCE{$variable_name} |
719
|
|
|
|
|
|
|
and $num++; |
720
|
0
|
|
|
|
|
|
_record_numbered_capture( $num, $captures, $re ); |
721
|
|
|
|
|
|
|
} |
722
|
0
|
|
|
|
|
|
return; |
723
|
|
|
|
|
|
|
} |
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
# Because a named capture is also one or more numbered captures, the recording |
726
|
|
|
|
|
|
|
# of the use of a named capture seemed complex enough to wrap in a subroutine. |
727
|
|
|
|
|
|
|
sub _record_named_capture { |
728
|
0
|
|
|
0
|
|
|
my ( $name, $captures, $named_captures ) = @_; |
729
|
0
|
0
|
|
|
|
|
defined ( my $numbers = $named_captures->{$name} ) or return; |
730
|
0
|
|
|
|
|
|
foreach my $capnum ( @{ $numbers } ) { |
|
0
|
|
|
|
|
|
|
731
|
0
|
|
|
|
|
|
_record_numbered_capture( $capnum, $captures ); |
732
|
|
|
|
|
|
|
} |
733
|
0
|
|
|
|
|
|
$named_captures->{$name} = undef; |
734
|
0
|
|
|
|
|
|
return; |
735
|
|
|
|
|
|
|
} |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
sub _record_numbered_capture { |
738
|
0
|
|
|
0
|
|
|
my ( $number, $captures, $re ) = @_; |
739
|
0
|
0
|
0
|
|
|
|
$re and $number < 0 |
740
|
|
|
|
|
|
|
and $number = $re->max_capture_number() + $number + 1; |
741
|
0
|
0
|
|
|
|
|
return if $number <= 0; |
742
|
0
|
|
|
|
|
|
$captures->[ $number - 1 ] = 1; |
743
|
0
|
|
|
|
|
|
return; |
744
|
|
|
|
|
|
|
} |
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
1; |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
__END__ |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
#----------------------------------------------------------------------------- |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
=pod |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
=for stopwords refactored |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
=head1 NAME |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture - Only use a capturing group if you plan to use the captured value. |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
=head1 AFFILIATION |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
This Policy is part of the core L<Perl::Critic|Perl::Critic> |
764
|
|
|
|
|
|
|
distribution. |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
=head1 DESCRIPTION |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
Perl regular expressions have multiple types of grouping syntax. The |
770
|
|
|
|
|
|
|
basic parentheses (e.g. C<m/(foo)/>) captures into the magic variable |
771
|
|
|
|
|
|
|
C<$1>. Non-capturing groups (e.g. C<m/(?:foo)/>) are useful because |
772
|
|
|
|
|
|
|
they have better runtime performance and do not copy strings to the |
773
|
|
|
|
|
|
|
magic global capture variables. |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
It's also easier on the maintenance programmer if you consistently use |
776
|
|
|
|
|
|
|
capturing vs. non-capturing groups, because that programmer can tell |
777
|
|
|
|
|
|
|
more easily which regexps can be refactored without breaking |
778
|
|
|
|
|
|
|
surrounding code which may use the captured values. |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
=head1 CONFIGURATION |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
This Policy is not configurable except for the standard options. |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
=head1 CAVEATS |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
=head2 C<qr//> interpolation |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
This policy can be confused by interpolation of C<qr//> elements, but |
791
|
|
|
|
|
|
|
those are always false negatives. For example: |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
my $foo_re = qr/(foo)/; |
794
|
|
|
|
|
|
|
my ($foo) = m/$foo_re (bar)/x; |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
A human can tell that this should be a violation because there are two |
797
|
|
|
|
|
|
|
captures but only the first capture is used, not the second. The |
798
|
|
|
|
|
|
|
policy only notices that there is one capture in the regexp and |
799
|
|
|
|
|
|
|
remains happy. |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
=head2 C<@->, C<@+>, C<$LAST_MATCH_START> and C<$LAST_MATCH_END> |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
This policy will only recognize capture groups referred to by these |
804
|
|
|
|
|
|
|
variables if the use is subscripted by a literal integer. |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
=head2 C<$^N> and C<$LAST_SUBMATCH_RESULT> |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
This policy will not recognize capture groups referred to only by these |
809
|
|
|
|
|
|
|
variables, because there is in general no way by static analysis to |
810
|
|
|
|
|
|
|
determine which capture group is referred to. For example, |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
m/ (?: (A[[:alpha:]]+) | (N\d+) ) (?{$foo=$^N}) /smx |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
makes use of the first capture group if it matches, or the second |
815
|
|
|
|
|
|
|
capture group if the first does not match but the second does. |
816
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
=head2 split() |
818
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
Normally, this policy thinks that if a capture is used at all it must be |
820
|
|
|
|
|
|
|
used before the next regular expression in the same scope. The regular |
821
|
|
|
|
|
|
|
expression in a C<split()> needs to be exempted because it does not |
822
|
|
|
|
|
|
|
affect the caller's capture variables. |
823
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
At present, this policy recognizes and exempts the regular expressions |
825
|
|
|
|
|
|
|
in |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
split /.../, ... |
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
and |
830
|
|
|
|
|
|
|
|
831
|
|
|
|
|
|
|
split( /.../, ... ) |
832
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
but more exotic syntax may produce false positives. |
834
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
=head1 CREDITS |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
Initial development of this policy was supported by a grant from the |
839
|
|
|
|
|
|
|
Perl Foundation. |
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
=head1 AUTHOR |
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
Chris Dolan <cdolan@cpan.org> |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
=head1 COPYRIGHT |
848
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
Copyright (c) 2007-2023 Chris Dolan. Many rights reserved. |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify |
852
|
|
|
|
|
|
|
it under the same terms as Perl itself. The full text of this license |
853
|
|
|
|
|
|
|
can be found in the LICENSE file included with this module |
854
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
=cut |
856
|
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
# Local Variables: |
858
|
|
|
|
|
|
|
# mode: cperl |
859
|
|
|
|
|
|
|
# cperl-indent-level: 4 |
860
|
|
|
|
|
|
|
# fill-column: 78 |
861
|
|
|
|
|
|
|
# indent-tabs-mode: nil |
862
|
|
|
|
|
|
|
# c-indentation-style: bsd |
863
|
|
|
|
|
|
|
# End: |
864
|
|
|
|
|
|
|
# ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround : |