File Coverage

blib/lib/Perl/Critic/Policy/RegularExpressions/ProhibitUnusedCapture.pm
Criterion Covered Total %
statement 27 280 9.6
branch 0 216 0.0
condition 0 71 0.0
subroutine 12 43 27.9
pod 4 5 80.0
total 43 615 6.9


line stmt bran cond sub pod time code
1             package Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture;
2              
3 40     40   30701 use 5.010001;
  40         209  
4 40     40   292 use strict;
  40         130  
  40         831  
5 40     40   253 use warnings;
  40         141  
  40         1269  
6              
7 40     40   330 use List::SomeUtils qw(none);
  40         145  
  40         2086  
8 40     40   265 use Readonly;
  40         155  
  40         1994  
9 40     40   368 use Scalar::Util qw(refaddr);
  40         143  
  40         2382  
10              
11 40         2272 use Perl::Critic::Utils qw{
12             :booleans :characters :severities hashify precedence_of
13             split_nodes_on_comma
14 40     40   381 };
  40         163  
15 40     40   14582 use parent 'Perl::Critic::Policy';
  40         135  
  40         305  
16              
17             our $VERSION = '1.150';
18              
19             #-----------------------------------------------------------------------------
20              
21             Readonly::Scalar my $SPLIT => q{split};
22             Readonly::Scalar my $WHILE => q{while};
23              
24             Readonly::Hash my %ZERO_BASED_CAPTURE_REFERENCE =>
25             hashify( qw< ${^CAPTURE} > );
26             # TODO: additional logic to prevent ${^CAPTURE_ALL}[n] from being recognized
27             # as a use of capture variable n.
28             Readonly::Hash my %CAPTURE_REFERENCE => (
29             hashify( qw< $+ $- ${^CAPTURE_ALL} > ),
30             %ZERO_BASED_CAPTURE_REFERENCE );
31             Readonly::Hash my %CAPTURE_REFERENCE_ENGLISH => (
32             hashify( qw{ $LAST_PAREN_MATCH $LAST_MATCH_START $LAST_MATCH_END } ),
33             %CAPTURE_REFERENCE );
34             Readonly::Hash my %CAPTURE_ARRAY => hashify( qw< @- @+ @{^CAPTURE} > );
35             Readonly::Hash my %CAPTURE_ARRAY_ENGLISH => (
36             hashify( qw< @LAST_MATCH_START @LAST_MATCH_END > ),
37             %CAPTURE_ARRAY );
38              
39             Readonly::Scalar my $DESC => q{Only use a capturing group if you plan to use the captured value};
40             Readonly::Scalar my $EXPL => [252];
41              
42             #-----------------------------------------------------------------------------
43              
44 89     89 0 1633 sub supported_parameters { return qw() }
45 74     74 1 312 sub default_severity { return $SEVERITY_MEDIUM }
46 86     86 1 357 sub default_themes { return qw( core pbp maintenance ) }
47             sub applies_to {
48 30     30 1 89 return qw< PPI::Token::Regexp::Match PPI::Token::Regexp::Substitute >;
49             }
50              
51             #-----------------------------------------------------------------------------
52              
53             Readonly::Scalar my $NUM_CAPTURES_FOR_GLOBAL => 100; # arbitrarily large number
54              
55             sub violates {
56 0     0 1   my ( $self, $elem, $doc ) = @_;
57              
58             # optimization: don't bother parsing the regexp if there are no parens
59 0 0         return if 0 > index $elem->content(), '(';
60              
61 0 0         my $re = $doc->ppix_regexp_from_element( $elem ) or return;
62 0 0         $re->failures() and return;
63              
64 0 0         my $ncaptures = $re->max_capture_number() or return;
65              
66 0           my @captures = ( undef ) x $ncaptures; # List of expected captures
67              
68 0           my %named_captures; # List of expected named captures.
69             # Unlike the numbered capture logic, %named_captures
70             # entries are made undefined when a use of the name is
71             # found. Otherwise two hashes would be needed, one to
72             # become defined when a use is found, and one to hold
73             # the mapping of name to number.
74 0 0         foreach my $struct ( @{ $re->find( 'PPIx::Regexp::Structure::NamedCapture'
  0            
75             ) || [] } ) {
76             # There can be more than one capture with the same name, so we need to
77             # record all of them. There will be duplications if the 'branch reset'
78             # "(?| ... )" pattern is used, but this is benign given how numbered
79             # captures are recorded.
80 0   0       push @{ $named_captures{ $struct->name() } ||= [] }, $struct->number();
  0            
81             }
82              
83             # Look for references to the capture in the regex itself
84 0 0         return if _enough_uses_in_regexp( $re, \@captures, \%named_captures, $doc );
85              
86 0 0 0       if ( $re->modifier_asserted( 'g' )
87             and not _check_if_in_while_condition_or_block( $elem ) ) {
88 0           $ncaptures = $NUM_CAPTURES_FOR_GLOBAL;
89 0           $#captures = $ncaptures - 1;
90             }
91              
92 0 0 0       return if _enough_assignments($elem, \@captures) && !%named_captures;
93 0 0 0       return if _is_in_slurpy_array_context($elem) && !%named_captures;
94 0 0         return if _enough_magic($elem, $re, \@captures, \%named_captures, $doc);
95              
96 0           return $self->violation( $DESC, $EXPL, $elem );
97             }
98              
99             # Find uses of both numbered and named capture variables in the regexp itself.
100             # Return true if all are used.
101             sub _enough_uses_in_regexp {
102 0     0     my ( $re, $captures, $named_captures, $doc ) = @_;
103              
104             # Look for references to the capture in the regex itself. Note that this
105             # will also find backreferences in the replacement string of s///.
106 0 0         foreach my $token ( @{ $re->find( 'PPIx::Regexp::Token::Reference' )
  0            
107             || [] } ) {
108 0 0         if ( $token->is_named() ) {
109 0           _record_named_capture( $token->name(), $captures, $named_captures );
110             } else {
111 0           _record_numbered_capture( $token->absolute(), $captures );
112             }
113             }
114              
115 0 0         foreach my $token ( @{ $re->find(
  0            
116             'PPIx::Regexp::Token::Code' ) || [] } ) {
117 0 0         my $ppi = $token->ppi() or next;
118 0           _check_node_children( $ppi, {
119             regexp => $re,
120             numbered_captures => $captures,
121             named_captures => $named_captures,
122             document => $doc,
123             }, _make_regexp_checker() );
124             }
125              
126 0     0     return ( none {not defined} @{$captures} )
127             && ( !%{$named_captures} ||
128 0   0 0     none {defined} values %{$named_captures} );
  0            
129             }
130              
131             sub _enough_assignments {
132 0     0     my ($elem, $captures) = @_;
133              
134             # look backward for the assignment operator
135 0           my $psib = $elem->sprevious_sibling;
136             SIBLING:
137 0           while (1) {
138 0 0         return if !$psib;
139 0 0         if ($psib->isa('PPI::Token::Operator')) {
140 0 0         last SIBLING if q{=} eq $psib->content;
141 0 0         return if q{!~} eq $psib->content;
142             }
143 0           $psib = $psib->sprevious_sibling;
144             }
145              
146 0           $psib = $psib->sprevious_sibling;
147 0 0         return if !$psib; # syntax error: '=' at the beginning of a statement???
148              
149 0 0         if ($psib->isa('PPI::Token::Symbol')) {
    0          
    0          
150             # @foo = m/(foo)/
151             # @$foo = m/(foo)/
152             # %foo = m/(foo)/
153             # %$foo = m/(foo)/
154 0 0         return $TRUE if _symbol_is_slurpy($psib);
155              
156             } elsif ($psib->isa('PPI::Structure::Block')) {
157             # @{$foo} = m/(foo)/
158             # %{$foo} = m/(foo)/
159 0 0         return $TRUE if _is_preceded_by_array_or_hash_cast($psib);
160              
161             } elsif ($psib->isa('PPI::Structure::List')) {
162             # () = m/(foo)/
163             # ($foo) = m/(foo)/
164             # ($foo,$bar) = m/(foo)(bar)/
165             # (@foo) = m/(foo)(bar)/
166             # ($foo,@foo) = m/(foo)(bar)/
167             # ($foo,@$foo) = m/(foo)(bar)/
168             # ($foo,@{$foo}) = m/(foo)(bar)/
169              
170 0           my @args = $psib->schildren;
171 0 0         return $TRUE if not @args; # empty list (perhaps the "goatse" operator) is slurpy
172              
173             # Forward looking: PPI might change in v1.200 so schild(0) is a
174             # PPI::Statement::Expression.
175 0 0 0       if ( 1 == @args && $args[0]->isa('PPI::Statement::Expression') ) {
176 0           @args = $args[0]->schildren;
177             }
178              
179 0           my @parts = split_nodes_on_comma(@args);
180             PART:
181 0           for my $i (0 .. $#parts) {
182 0 0         if (1 == @{$parts[$i]}) {
  0            
183 0           my $var = $parts[$i]->[0];
184 0 0 0       if ($var->isa('PPI::Token::Symbol') || $var->isa('PPI::Token::Cast')) {
185 0 0         return $TRUE if _has_array_sigil($var);
186             }
187             }
188 0           _record_numbered_capture( $i + 1, $captures );
189             # ith variable capture
190             }
191             }
192              
193 0     0     return none {not defined} @{$captures};
  0            
  0            
194             }
195              
196             sub _symbol_is_slurpy {
197 0     0     my ($symbol) = @_;
198              
199 0 0         return $TRUE if _has_array_sigil($symbol);
200 0 0         return $TRUE if _has_hash_sigil($symbol);
201 0 0         return $TRUE if _is_preceded_by_array_or_hash_cast($symbol);
202 0           return;
203             }
204              
205             sub _has_array_sigil {
206 0     0     my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast
207              
208 0           return q{@} eq substr $elem->content, 0, 1;
209             }
210              
211             sub _has_hash_sigil {
212 0     0     my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast
213              
214 0           return q{%} eq substr $elem->content, 0, 1;
215             }
216              
217             sub _is_preceded_by_array_or_hash_cast {
218 0     0     my ($elem) = @_;
219 0           my $psib = $elem->sprevious_sibling;
220 0           my $cast;
221 0   0       while ($psib && $psib->isa('PPI::Token::Cast')) {
222 0           $cast = $psib;
223 0           $psib = $psib->sprevious_sibling;
224             }
225 0 0         return if !$cast;
226 0           my $sigil = substr $cast->content, 0, 1;
227 0   0       return q{@} eq $sigil || q{%} eq $sigil;
228             }
229              
230             sub _is_in_slurpy_array_context {
231 0     0     my ($elem) = @_;
232              
233             # return true is the result of the regexp is passed to a subroutine.
234             # doesn't check for array context due to assignment.
235              
236             # look backward for explicit regex operator
237 0           my $psib = $elem->sprevious_sibling;
238 0 0 0       if ($psib && $psib->content eq q{=~}) {
239             # Track back through value
240 0           $psib = _skip_lhs($psib);
241             }
242              
243 0 0         if (!$psib) {
244 0           my $parent = $elem->parent;
245 0 0         return if !$parent;
246 0 0         if ($parent->isa('PPI::Statement')) {
247 0           $parent = $parent->parent;
248 0 0         return if !$parent;
249             }
250              
251             # Return true if we have a list that isn't part of a foreach loop.
252             # TECHNICAL DEBT: This code is basically shared with
253             # RequireCheckingReturnValueOfEval. I don't want to put this code
254             # into Perl::Critic::Utils::*, but I don't have time to sort out
255             # PPIx::Utilities::Structure::List yet.
256 0 0         if ( $parent->isa('PPI::Structure::List') ) {
257 0 0         my $parent_statement = $parent->statement() or return $TRUE;
258 0 0         return $TRUE if not
259             $parent_statement->isa('PPI::Statement::Compound');
260 0 0         return $TRUE if $parent_statement->type() ne 'foreach';
261             }
262              
263 0 0         return $TRUE if $parent->isa('PPI::Structure::Constructor');
264 0 0         if ($parent->isa('PPI::Structure::Block')) {
265 0 0         return $TRUE
266             if
267             refaddr($elem->statement)
268             eq refaddr([$parent->schildren]->[-1]);
269             }
270 0           return;
271             }
272 0 0         if ($psib->isa('PPI::Token::Operator')) {
273             # Most operators kill slurpiness (except assignment, which is handled elsewhere).
274 0           return q{,} eq $psib->content;
275             }
276 0           return $TRUE;
277             }
278              
279             sub _skip_lhs {
280 0     0     my ($elem) = @_;
281              
282             # TODO: better implementation to handle casts, expressions, subcalls, etc.
283 0           $elem = $elem->sprevious_sibling();
284              
285 0           return $elem;
286             }
287              
288             sub _enough_magic {
289 0     0     my ($elem, $re, $captures, $named_captures, $doc) = @_;
290              
291 0           _check_for_magic($elem, $re, $captures, $named_captures, $doc);
292              
293 0     0     return ( none {not defined} @{$captures} )
294             && ( !%{$named_captures} ||
295 0   0 0     none {defined} values %{$named_captures} );
  0            
296             }
297              
298             # void return
299             sub _check_for_magic {
300 0     0     my ($elem, $re, $captures, $named_captures, $doc) = @_;
301              
302             # Search for $1..$9 in :
303             # * the rest of this statement
304             # * subsequent sibling statements
305             # * if this is in a conditional boolean, the if/else bodies of the conditional
306             # * if this is in a while/for condition, the loop body
307             # But NO intervening regexps!
308              
309             # Package up the usual arguments for _check_rest_of_statement().
310 0           my $arg = {
311             regexp => $re,
312             numbered_captures => $captures,
313             named_captures => $named_captures,
314             document => $doc,
315             };
316              
317             # Capture whether or not the regular expression is negated -- that
318             # is, whether it is preceded by the '!~' binding operator.
319 0 0         if ( my $prior_token = $elem->sprevious_sibling() ) {
320 0   0       $arg->{negated} = $prior_token->isa( 'PPI::Token::Operator' ) &&
321             q<!~> eq $prior_token->content();
322             }
323              
324 0 0         return if ! _check_rest_of_statement( $elem, $arg );
325              
326 0           my $parent = $elem->parent();
327 0   0       while ($parent && ! $parent->isa('PPI::Statement::Sub')) {
328 0 0         return if ! _check_rest_of_statement( $parent, $arg );
329 0           $parent = $parent->parent();
330             }
331              
332 0           return;
333             }
334              
335             # Check if we are in the condition or block of a 'while'
336             sub _check_if_in_while_condition_or_block {
337 0     0     my ( $elem ) = @_;
338 0 0         $elem or return;
339              
340 0 0         my $parent = $elem->parent() or return;
341 0 0         $parent->isa( 'PPI::Statement' ) or return;
342              
343 0 0         my $item = $parent = $parent->parent() or return;
344 0 0         if ( $item->isa( 'PPI::Structure::Block' ) ) {
345 0 0         $item = $item->sprevious_sibling() or return;
346             }
347 0 0         $item->isa( 'PPI::Structure::Condition' ) or return;
348              
349 0 0         $item = $item->sprevious_sibling() or return;
350 0 0         $item->isa( 'PPI::Token::Word' ) or return;
351              
352 0           return $WHILE eq $item->content();
353             }
354              
355             {
356             # Shortcut operators '||', '//', and 'or' can cause everything after
357             # them to be skipped. 'and' trumps '||' and '//', and causes things
358             # to be evaluated again. The value is true to skip, false to cancel
359             # skipping.
360             Readonly::Hash my %SHORTCUT_OPERATOR => (
361             q<||> => $FALSE,
362             q<//> => $FALSE,
363             and => $TRUE,
364             or => $FALSE,
365             );
366              
367             # RT #38942
368             # The issue in the ticket is that in something like
369             # if ( /(a)/ || /(b)/ ) {
370             # say $1
371             # }
372             # the capture variable can come from either /(a)/ or /(b)/. If we
373             # don't take into account the short-cutting nature of the '||' we
374             # erroneously conclude that the capture in /(a)/ is not used. So we
375             # need to skip every regular expression after an alternation.
376             #
377             # The trick is that we want to still mark magic variables, because
378             # of code like
379             # my $foo = $1 || $2;
380             # so we can't just ignore everything after an alternation.
381             #
382             # To do all this correctly, we have to track precedence, and start
383             # paying attention again if an 'and' is found after a '||'.
384              
385             # Subroutine _make_regexp_checker() manufactures a snippet of code
386             # which is used to track regular expressions. It takes one optional
387             # argument, which is the snippet used to track the parent object's
388             # regular expressions.
389             #
390             # The snippet is passed each token encountered, and returns true if
391             # the scan for capture variables is to be stopped. This will happen
392             # if the token is a regular expression which is _not_ to the right
393             # of an alternation operator ('||', '//', or 'or'), or it _is_ to
394             # the right of an 'and', without an intervening alternation
395             # operator.
396             #
397             # If _make_regexp_checker() was passed a snippet which
398             # returns false on encountering a regular expression, the returned
399             # snippet always returns false, for the benefit of code like
400             # /(a)/ || ( /(b)/ || /(c)/ ).
401              
402             sub _make_regexp_checker {
403 0     0     my ( $parent ) = @_;
404              
405             $parent
406             and not $parent->()
407 0 0 0 0     and return sub { return $FALSE };
  0            
408              
409 0           my $check = $TRUE;
410 0           my $precedence = 0;
411              
412             return sub {
413 0     0     my ( $elem ) = @_;
414              
415 0 0         $elem or return $check;
416              
417 0 0         if ( $elem->isa( 'PPI::Token::Regexp' ) ) {
418 0 0         return _regexp_is_in_split( $elem ) ? $FALSE : $check;
419             }
420              
421 0 0 0       if ( $elem->isa( 'PPI::Token::Structure' )
422             && q<;> eq $elem->content() ) {
423 0           $check = $TRUE;
424 0           $precedence = 0;
425 0           return $FALSE;
426             }
427              
428 0 0         $elem->isa( 'PPI::Token::Operator' )
429             or return $FALSE;
430              
431 0           my $content = $elem->content();
432 0 0         defined( my $oper_check = $SHORTCUT_OPERATOR{$content} )
433             or return $FALSE;
434              
435 0           my $oper_precedence = precedence_of( $content );
436 0 0         $oper_precedence >= $precedence
437             or return $FALSE;
438              
439 0           $precedence = $oper_precedence;
440 0           $check = $oper_check;
441              
442 0           return $FALSE;
443 0           };
444             }
445             }
446              
447             # Argument is regexp.
448             # True if it is the regexp in a split()
449             sub _regexp_is_in_split {
450 0     0     my ( $elem ) = @_;
451              
452 0           my $prev;
453 0 0         if ( ! ( $prev = $elem->sprevious_sibling() ) ) {
454             # Maybe we have split( /.../, ... )
455 0 0         my $stmt = $elem->statement()
456             or return $FALSE;
457 0 0         $stmt->parent()
458             or return $FALSE;
459 0 0         $prev = $elem->sprevious_sibling()
460             or return $FALSE;
461             }
462 0   0       return $prev->isa( 'PPI::Token::Word' ) && $SPLIT eq $prev->content();
463             }
464              
465              
466             # false if we hit another regexp
467             # The arguments are:
468             # $elem - The PPI::Element whose siblings are to be checked;
469             # $arg - A hash reference containing the following keys:
470             # regexp => the relevant PPIx::Regexp object;
471             # numbered_captures => a reference to the array used to track the
472             # use of numbered captures;
473             # named_captures => a reference to the hash used to track the
474             # use of named captures;
475             # negated => true if the regexp was bound to its target with the
476             # '!~' operator;
477             # document => a reference to the Perl::Critic::Document;
478             # Converted to passing the arguments everyone gets in a hash because of
479             # the need to add the 'negated' argument, which would put us at six
480             # arguments.
481             sub _check_rest_of_statement {
482 0     0     my ( $elem, $arg ) = @_;
483              
484 0           my $checker = _make_regexp_checker();
485 0           my $nsib = $elem->snext_sibling;
486              
487             # If we are an if (or elsif) and the result of the regexp is
488             # negated, we skip the first block found. RT #69867
489 0 0 0       if ( $arg->{negated} && _is_condition_of_if_statement( $elem ) ) {
490 0   0       while ( $nsib && ! $nsib->isa( 'PPI::Structure::Block' ) ) {
491 0           $nsib = $nsib->snext_sibling();
492             }
493 0 0         $nsib and $nsib = $nsib->snext_sibling();
494             }
495              
496 0           while ($nsib) {
497 0 0         return if $checker->($nsib);
498 0 0         if ($nsib->isa('PPI::Node')) {
499 0 0         return if ! _check_node_children($nsib, $arg, $checker );
500             } else {
501             _mark_magic( $nsib, $arg->{regexp}, $arg->{numbered_captures},
502 0           $arg->{named_captures}, $arg->{document} );
503             }
504 0           $nsib = $nsib->snext_sibling;
505             }
506 0           return $TRUE;
507             }
508              
509             {
510              
511             Readonly::Hash my %IS_IF_STATEMENT => hashify( qw{ if elsif } );
512              
513             # Return true if the argument is the condition of an if or elsif
514             # statement, otherwise return false.
515             sub _is_condition_of_if_statement {
516 0     0     my ( $elem ) = @_;
517 0 0 0       $elem
518             and $elem->isa( 'PPI::Structure::Condition' )
519             or return $FALSE;
520 0 0         my $psib = $elem->sprevious_sibling()
521             or return $FALSE;
522 0 0         $psib->isa( 'PPI::Token::Word' )
523             or return $FALSE;
524 0           return $IS_IF_STATEMENT{ $psib->content() };
525              
526             }
527             }
528              
529             # false if we hit another regexp
530             # The arguments are:
531             # $elem - The PPI::Node whose children are to be checked;
532             # $arg - A hash reference containing the following keys:
533             # regexp => the relevant PPIx::Regexp object;
534             # numbered_captures => a reference to the array used to track the
535             # use of numbered captures;
536             # named_captures => a reference to the hash used to track the
537             # use of named captures;
538             # document => a reference to the Perl::Critic::Document;
539             # $parent_checker - The parent's regexp checking code snippet,
540             # manufactured by _make_regexp_checker(). This argument is not in
541             # the $arg hash because that hash is shared among levels of the
542             # parse tree, whereas the regexp checker is not.
543             # TODO the things in the $arg hash are widely shared among the various
544             # pieces/parts of this policy; maybe more subroutines should use this
545             # hash rather than passing all this stuff around as individual
546             # arguments. This particular subroutine got the hash-reference treatment
547             # because Subroutines::ProhibitManyArgs started complaining when the
548             # checker argument was added.
549             sub _check_node_children {
550 0     0     my ($elem, $arg, $parent_checker) = @_;
551              
552             # caveat: this will descend into subroutine definitions...
553              
554 0           my $checker = _make_regexp_checker($parent_checker);
555 0           for my $child ($elem->schildren) {
556 0 0         return if $checker->($child);
557 0 0         if ($child->isa('PPI::Node')) {
558 0 0         return if ! _check_node_children($child, $arg, $checker);
559             } else {
560             _mark_magic($child, $arg->{regexp},
561             $arg->{numbered_captures}, $arg->{named_captures},
562 0           $arg->{document});
563             }
564             }
565 0           return $TRUE;
566             }
567              
568             sub _mark_magic {
569 0     0     my ($elem, $re, $captures, $named_captures, $doc) = @_;
570              
571             # If we're a double-quotish element, we need to grub through its
572             # content. RT #38942
573 0 0         if ( _is_double_quotish_element( $elem ) ) {
574 0           _mark_magic_in_content(
575             $elem->content(), $re, $captures, $named_captures, $doc );
576 0           return;
577             }
578              
579             # Ditto a here document, though the logic is different. RT #38942
580 0 0         if ( $elem->isa( 'PPI::Token::HereDoc' ) ) {
581 0 0         $elem->content() =~ m/ \A << ~? \s* ' /sxm
582             or _mark_magic_in_content(
583             join( $EMPTY, $elem->heredoc() ), $re, $captures,
584             $named_captures, $doc );
585 0           return;
586             }
587              
588             # Only interested in magic, or known English equivalent.
589 0           my $content = $elem->content();
590 0 0         my ( $capture_ref, $capture_array ) = $doc->uses_module( 'English' ) ?
591             ( \%CAPTURE_REFERENCE_ENGLISH, \%CAPTURE_ARRAY_ENGLISH ) :
592             ( \%CAPTURE_REFERENCE, \%CAPTURE_ARRAY );
593             $elem->isa( 'PPI::Token::Magic' )
594 0 0 0       or $capture_ref->{$content}
595             or return;
596              
597 0 0         if ( $content =~ m/ \A \$ ( \d+ ) /xms ) {
    0          
    0          
598              
599             # Record if we see $1, $2, $3, ...
600 0           my $num = $1;
601 0 0         if (0 < $num) { # don't mark $0
602             # Only mark the captures we really need -- don't mark superfluous magic vars
603 0 0         if ($num <= @{$captures}) {
  0            
604 0           _record_numbered_capture( $num, $captures );
605             }
606             }
607             } elsif ( $capture_array->{$content} ) { # GitHub #778
608 0           foreach my $num ( 1 .. @{$captures} ) {
  0            
609 0           _record_numbered_capture( $num, $captures );
610             }
611             } elsif ( $capture_ref->{$content} ) {
612 0           _mark_magic_subscripted_code( $elem, $re, $captures, $named_captures );
613             }
614 0           return;
615             }
616              
617             # Record a named capture referenced by a hash or array found in code.
618             # The arguments are:
619             # $elem - The element that represents a subscripted capture variable;
620             # $re - The PPIx::Regexp object;
621             # $captures - A reference to the numbered capture array;
622             # $named_captures - A reference to the named capture hash.
623             sub _mark_magic_subscripted_code {
624 0     0     my ( $elem, $re, $captures, $named_captures ) = @_;
625 0 0         my $subscr = $elem->snext_sibling() or return;
626 0 0         $subscr->isa( 'PPI::Structure::Subscript' ) or return;
627 0           my $subval = $subscr->content();
628 0           _record_subscripted_capture(
629             $elem->content(), $subval, $re, $captures, $named_captures );
630 0           return;
631             }
632              
633             # Find capture variables in the content of a double-quotish thing, and
634             # record their use. RT #38942. The arguments are:
635             # $content - The content() ( or heredoc() in the case of a here
636             # document) to be analyzed;
637             # $re - The PPIx::Regexp object;
638             # $captures - A reference to the numbered capture array;
639             # $named_captures - A reference to the named capture hash.
640             sub _mark_magic_in_content {
641 0     0     my ( $content, $re, $captures, $named_captures, $doc ) = @_;
642              
643 0 0         my ( $capture_ref, $capture_array ) = $doc->uses_module( 'English' ) ?
644             ( \%CAPTURE_REFERENCE_ENGLISH, \%CAPTURE_ARRAY_ENGLISH ) :
645             ( \%CAPTURE_REFERENCE, \%CAPTURE_ARRAY );
646              
647 0           while ( $content =~ m< ( [\$\@] (?:
648             [{] \^? (?: \w+ | . ) [}] | \w+ | . ) ) >sxmg ) {
649 0           my $name = $1;
650 0 0         $name =~ s/ \A ( [\$\@] ) [{] (?! \^ ) /$1/sxm
651             and $name =~ s/ [}] \z //sxm;
652              
653 0 0 0       if ( $name =~ m/ \A \$ ( \d+ ) \z /sxm ) {
    0          
    0          
654              
655 0           my $num = $1;
656             0 < $num
657 0 0 0       and $num <= @{ $captures }
  0            
658             and _record_numbered_capture( $num, $captures );
659              
660             } elsif ( $capture_array->{$name} ) { # GitHub #778
661 0           foreach my $num ( 1 .. @{$captures} ) {
  0            
662 0           _record_numbered_capture( $num, $captures );
663             }
664              
665             } elsif ( $capture_ref->{$name} &&
666             $content =~ m/ \G ( [{] [^}]+ [}] | [[] [^]] []] ) /smxgc )
667             {
668 0           _record_subscripted_capture(
669             $name, $1, $re, $captures, $named_captures );
670              
671             }
672             }
673 0           return;
674             }
675              
676             # Return true if the given element is double-quotish. Always returns
677             # false for a PPI::Token::HereDoc, since they're a different beast.
678             # RT #38942.
679             sub _is_double_quotish_element {
680 0     0     my ( $elem ) = @_;
681              
682 0 0         $elem or return;
683              
684 0           my $content = $elem->content();
685              
686 0 0         if ( $elem->isa( 'PPI::Token::QuoteLike::Command' ) ) {
687 0           return $content !~ m/ \A qx \s* ' /sxm;
688             }
689              
690 0           foreach my $class ( qw{
691             PPI::Token::Quote::Double
692             PPI::Token::Quote::Interpolate
693             PPI::Token::QuoteLike::Backtick
694             PPI::Token::QuoteLike::Readline
695             } ) {
696 0 0         $elem->isa( $class ) and return $TRUE;
697             }
698              
699 0           return $FALSE;
700             }
701              
702             # Record a subscripted capture, either hash dereference or array
703             # dereference. We assume that an array represents a numbered capture and
704             # a hash represents a named capture, since we have to handle (e.g.) both
705             # @+ and %+.
706             sub _record_subscripted_capture {
707 0     0     my ( $variable_name, $suffix, $re, $captures, $named_captures ) = @_;
708 0 0         if ( $suffix =~ m/ \A [{] ( .*? ) [}] /smx ) {
    0          
709 0           ( my $name = $1 ) =~ s/ \A ( ["'] ) ( .*? ) \1 \z /$2/smx;
710 0           _record_named_capture( $name, $captures, $named_captures );
711             } elsif ( $suffix =~ m/ \A [[] \s* ( [-+]? \d+ ) \s* []] /smx ) {
712             # GitHub #778
713             # Mostly capture numbers encountered here are 1-based (e.g. @+, @-).
714             # But @{^CAPTURE} is zero-based, so we need to tweak the subscript
715             # before we record the capture number.
716 0           my $num = $1 + 0;
717             $num >= 0
718 0 0 0       and $ZERO_BASED_CAPTURE_REFERENCE{$variable_name}
719             and $num++;
720 0           _record_numbered_capture( $num, $captures, $re );
721             }
722 0           return;
723             }
724              
725             # Because a named capture is also one or more numbered captures, the recording
726             # of the use of a named capture seemed complex enough to wrap in a subroutine.
727             sub _record_named_capture {
728 0     0     my ( $name, $captures, $named_captures ) = @_;
729 0 0         defined ( my $numbers = $named_captures->{$name} ) or return;
730 0           foreach my $capnum ( @{ $numbers } ) {
  0            
731 0           _record_numbered_capture( $capnum, $captures );
732             }
733 0           $named_captures->{$name} = undef;
734 0           return;
735             }
736              
737             sub _record_numbered_capture {
738 0     0     my ( $number, $captures, $re ) = @_;
739 0 0 0       $re and $number < 0
740             and $number = $re->max_capture_number() + $number + 1;
741 0 0         return if $number <= 0;
742 0           $captures->[ $number - 1 ] = 1;
743 0           return;
744             }
745              
746             1;
747              
748             __END__
749              
750             #-----------------------------------------------------------------------------
751              
752             =pod
753              
754             =for stopwords refactored
755              
756             =head1 NAME
757              
758             Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture - Only use a capturing group if you plan to use the captured value.
759              
760              
761             =head1 AFFILIATION
762              
763             This Policy is part of the core L<Perl::Critic|Perl::Critic>
764             distribution.
765              
766              
767             =head1 DESCRIPTION
768              
769             Perl regular expressions have multiple types of grouping syntax. The
770             basic parentheses (e.g. C<m/(foo)/>) captures into the magic variable
771             C<$1>. Non-capturing groups (e.g. C<m/(?:foo)/>) are useful because
772             they have better runtime performance and do not copy strings to the
773             magic global capture variables.
774              
775             It's also easier on the maintenance programmer if you consistently use
776             capturing vs. non-capturing groups, because that programmer can tell
777             more easily which regexps can be refactored without breaking
778             surrounding code which may use the captured values.
779              
780              
781             =head1 CONFIGURATION
782              
783             This Policy is not configurable except for the standard options.
784              
785              
786             =head1 CAVEATS
787              
788             =head2 C<qr//> interpolation
789              
790             This policy can be confused by interpolation of C<qr//> elements, but
791             those are always false negatives. For example:
792              
793             my $foo_re = qr/(foo)/;
794             my ($foo) = m/$foo_re (bar)/x;
795              
796             A human can tell that this should be a violation because there are two
797             captures but only the first capture is used, not the second. The
798             policy only notices that there is one capture in the regexp and
799             remains happy.
800              
801             =head2 C<@->, C<@+>, C<$LAST_MATCH_START> and C<$LAST_MATCH_END>
802              
803             This policy will only recognize capture groups referred to by these
804             variables if the use is subscripted by a literal integer.
805              
806             =head2 C<$^N> and C<$LAST_SUBMATCH_RESULT>
807              
808             This policy will not recognize capture groups referred to only by these
809             variables, because there is in general no way by static analysis to
810             determine which capture group is referred to. For example,
811              
812             m/ (?: (A[[:alpha:]]+) | (N\d+) ) (?{$foo=$^N}) /smx
813              
814             makes use of the first capture group if it matches, or the second
815             capture group if the first does not match but the second does.
816              
817             =head2 split()
818              
819             Normally, this policy thinks that if a capture is used at all it must be
820             used before the next regular expression in the same scope. The regular
821             expression in a C<split()> needs to be exempted because it does not
822             affect the caller's capture variables.
823              
824             At present, this policy recognizes and exempts the regular expressions
825             in
826              
827             split /.../, ...
828              
829             and
830              
831             split( /.../, ... )
832              
833             but more exotic syntax may produce false positives.
834              
835              
836             =head1 CREDITS
837              
838             Initial development of this policy was supported by a grant from the
839             Perl Foundation.
840              
841              
842             =head1 AUTHOR
843              
844             Chris Dolan <cdolan@cpan.org>
845              
846              
847             =head1 COPYRIGHT
848              
849             Copyright (c) 2007-2023 Chris Dolan. Many rights reserved.
850              
851             This program is free software; you can redistribute it and/or modify
852             it under the same terms as Perl itself. The full text of this license
853             can be found in the LICENSE file included with this module
854              
855             =cut
856              
857             # Local Variables:
858             # mode: cperl
859             # cperl-indent-level: 4
860             # fill-column: 78
861             # indent-tabs-mode: nil
862             # c-indentation-style: bsd
863             # End:
864             # ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround :