File Coverage

blib/lib/Perl/Critic/Policy/RegularExpressions/ProhibitUnusedCapture.pm
Criterion Covered Total %
statement 29 284 10.2
branch 1 220 0.4
condition 0 71 0.0
subroutine 13 43 30.2
pod 4 5 80.0
total 47 623 7.5


line stmt bran cond sub pod time code
1             package Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture;
2              
3 40     40   26920 use 5.010001;
  40         154  
4 40     40   163 use strict;
  40         70  
  40         754  
5 40     40   185 use warnings;
  40         62  
  40         1878  
6              
7 40     40   192 use List::SomeUtils qw(none);
  40         86  
  40         2261  
8 40     40   183 use Readonly;
  40         66  
  40         1709  
9 40     40   170 use Scalar::Util qw(refaddr);
  40         71  
  40         1730  
10              
11 40         1907 use Perl::Critic::Utils qw{
12             :booleans :characters :severities hashify precedence_of
13             split_nodes_on_comma
14 40     40   173 };
  40         76  
15 40     40   10615 use parent 'Perl::Critic::Policy';
  40         128  
  40         253  
16              
17             our $VERSION = '1.156';
18              
19             #-----------------------------------------------------------------------------
20              
21             Readonly::Scalar my $SPLIT => q{split};
22             Readonly::Scalar my $WHILE => q{while};
23              
24             Readonly::Hash my %ZERO_BASED_CAPTURE_REFERENCE =>
25             hashify( qw< ${^CAPTURE} > );
26             # TODO: additional logic to prevent ${^CAPTURE_ALL}[n] from being recognized
27             # as a use of capture variable n.
28             Readonly::Hash my %CAPTURE_REFERENCE => (
29             hashify( qw< $+ $- ${^CAPTURE_ALL} > ),
30             %ZERO_BASED_CAPTURE_REFERENCE );
31             Readonly::Hash my %CAPTURE_REFERENCE_ENGLISH => (
32             hashify( qw{ $LAST_PAREN_MATCH $LAST_MATCH_START $LAST_MATCH_END } ),
33             %CAPTURE_REFERENCE );
34             Readonly::Hash my %CAPTURE_ARRAY => hashify( qw< @- @+ @{^CAPTURE} > );
35             Readonly::Hash my %CAPTURE_ARRAY_ENGLISH => (
36             hashify( qw< @LAST_MATCH_START @LAST_MATCH_END > ),
37             %CAPTURE_ARRAY );
38             Readonly::Hash my %CAPTURE_HASH => hashify( qw< %- %+ %{^CAPTURE} >);
39             Readonly::Hash my %CAPTURE_HASH_ENGLISH => (
40             hashify( qw< %LAST_PAREN_MATCH > ),
41             %CAPTURE_HASH );
42              
43              
44             Readonly::Scalar my $DESC => q{Only use a capturing group if you plan to use the captured value};
45             Readonly::Scalar my $EXPL => [252];
46              
47             #-----------------------------------------------------------------------------
48              
49 90     90 0 773 sub supported_parameters { return qw() }
50 75     75 1 254 sub default_severity { return $SEVERITY_MEDIUM }
51 86     86 1 235 sub default_themes { return qw( core pbp maintenance ) }
52             sub applies_to {
53 31     31 1 87 return qw< PPI::Token::Regexp::Match PPI::Token::Regexp::Substitute >;
54             }
55              
56             #-----------------------------------------------------------------------------
57              
58             Readonly::Scalar my $NUM_CAPTURES_FOR_GLOBAL => 100; # arbitrarily large number
59              
60             sub violates {
61 1     1 1 3 my ( $self, $elem, $doc ) = @_;
62              
63             # optimization: don't bother parsing the regexp if there are no parens
64 1 50       9 return if 0 > index $elem->content(), '(';
65              
66 0 0         my $re = $doc->ppix_regexp_from_element( $elem ) or return;
67 0 0         $re->failures() and return;
68              
69 0 0         my $ncaptures = $re->max_capture_number() or return;
70              
71 0           my @captures = ( undef ) x $ncaptures; # List of expected captures
72              
73 0           my %named_captures; # List of expected named captures.
74             # Unlike the numbered capture logic, %named_captures
75             # entries are made undefined when a use of the name is
76             # found. Otherwise two hashes would be needed, one to
77             # become defined when a use is found, and one to hold
78             # the mapping of name to number.
79 0 0         foreach my $struct ( @{ $re->find( 'PPIx::Regexp::Structure::NamedCapture'
  0            
80             ) || [] } ) {
81             # There can be more than one capture with the same name, so we need to
82             # record all of them. There will be duplications if the 'branch reset'
83             # "(?| ... )" pattern is used, but this is benign given how numbered
84             # captures are recorded.
85 0   0       push @{ $named_captures{ $struct->name() } ||= [] }, $struct->number();
  0            
86             }
87              
88             # Look for references to the capture in the regex itself
89 0 0         return if _enough_uses_in_regexp( $re, \@captures, \%named_captures, $doc );
90              
91 0 0 0       if ( $re->modifier_asserted( 'g' )
92             and not _check_if_in_while_condition_or_block( $elem ) ) {
93 0           $ncaptures = $NUM_CAPTURES_FOR_GLOBAL;
94 0           $#captures = $ncaptures - 1;
95             }
96              
97 0 0         if ( !%named_captures ) {
98 0 0         return if _enough_assignments($elem, \@captures);
99 0 0         return if _is_in_slurpy_array_context($elem);
100             }
101 0 0         return if _enough_magic($elem, $re, \@captures, \%named_captures, $doc);
102              
103 0           return $self->violation( $DESC, $EXPL, $elem );
104             }
105              
106             # Find uses of both numbered and named capture variables in the regexp itself.
107             # Return true if all are used.
108             sub _enough_uses_in_regexp {
109 0     0     my ( $re, $captures, $named_captures, $doc ) = @_;
110              
111             # Look for references to the capture in the regex itself. Note that this
112             # will also find backreferences in the replacement string of s///.
113 0 0         foreach my $token ( @{ $re->find( 'PPIx::Regexp::Token::Reference' )
  0            
114             || [] } ) {
115 0 0         if ( $token->is_named() ) {
116 0           _record_named_capture( $token->name(), $captures, $named_captures );
117             } else {
118 0           _record_numbered_capture( $token->absolute(), $captures );
119             }
120             }
121              
122 0 0         foreach my $token ( @{ $re->find(
  0            
123             'PPIx::Regexp::Token::Code' ) || [] } ) {
124 0 0         my $ppi = $token->ppi() or next;
125 0           _check_node_children( $ppi, {
126             regexp => $re,
127             numbered_captures => $captures,
128             named_captures => $named_captures,
129             document => $doc,
130             }, _make_regexp_checker() );
131             }
132              
133 0     0     return ( none {not defined} @{$captures} )
134             && ( !%{$named_captures} ||
135 0   0 0     none {defined} values %{$named_captures} );
  0            
136             }
137              
138             sub _enough_assignments {
139 0     0     my ($elem, $captures) = @_;
140              
141             # look backward for the assignment operator
142 0           my $psib = $elem->sprevious_sibling;
143             SIBLING:
144 0           while (1) {
145 0 0         return if !$psib;
146 0 0         if ($psib->isa('PPI::Token::Operator')) {
147 0 0         last SIBLING if q{=} eq $psib->content;
148 0 0         return if q{!~} eq $psib->content;
149             }
150 0           $psib = $psib->sprevious_sibling;
151             }
152              
153 0           $psib = $psib->sprevious_sibling;
154 0 0         return if !$psib; # syntax error: '=' at the beginning of a statement???
155              
156 0 0         if ($psib->isa('PPI::Token::Symbol')) {
    0          
    0          
157             # @foo = m/(foo)/
158             # @$foo = m/(foo)/
159             # %foo = m/(foo)/
160             # %$foo = m/(foo)/
161 0 0         return $TRUE if _symbol_is_slurpy($psib);
162              
163             } elsif ($psib->isa('PPI::Structure::Block')) {
164             # @{$foo} = m/(foo)/
165             # %{$foo} = m/(foo)/
166 0 0         return $TRUE if _is_preceded_by_array_or_hash_cast($psib);
167              
168             } elsif ($psib->isa('PPI::Structure::List')) {
169             # () = m/(foo)/
170             # ($foo) = m/(foo)/
171             # ($foo,$bar) = m/(foo)(bar)/
172             # (@foo) = m/(foo)(bar)/
173             # ($foo,@foo) = m/(foo)(bar)/
174             # ($foo,@$foo) = m/(foo)(bar)/
175             # ($foo,@{$foo}) = m/(foo)(bar)/
176              
177 0           my @args = $psib->schildren;
178 0 0         return $TRUE if not @args; # empty list (perhaps the "goatse" operator) is slurpy
179              
180             # Forward looking: PPI might change in v1.200 so schild(0) is a
181             # PPI::Statement::Expression.
182 0 0 0       if ( 1 == @args && $args[0]->isa('PPI::Statement::Expression') ) {
183 0           @args = $args[0]->schildren;
184             }
185              
186 0           my @parts = split_nodes_on_comma(@args);
187             PART:
188 0           for my $i (0 .. $#parts) {
189 0 0         if (1 == @{$parts[$i]}) {
  0            
190 0           my $var = $parts[$i]->[0];
191 0 0 0       if ($var->isa('PPI::Token::Symbol') || $var->isa('PPI::Token::Cast')) {
192 0 0         return $TRUE if _has_array_sigil($var);
193             }
194             }
195 0           _record_numbered_capture( $i + 1, $captures );
196             # ith variable capture
197             }
198             }
199              
200 0     0     return none {not defined} @{$captures};
  0            
  0            
201             }
202              
203             sub _symbol_is_slurpy {
204 0     0     my ($symbol) = @_;
205              
206 0 0         return $TRUE if _has_array_sigil($symbol);
207 0 0         return $TRUE if _has_hash_sigil($symbol);
208 0 0         return $TRUE if _is_preceded_by_array_or_hash_cast($symbol);
209 0           return;
210             }
211              
212             sub _has_array_sigil {
213 0     0     my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast
214              
215 0           return q{@} eq substr $elem->content, 0, 1;
216             }
217              
218             sub _has_hash_sigil {
219 0     0     my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast
220              
221 0           return q{%} eq substr $elem->content, 0, 1;
222             }
223              
224             sub _is_preceded_by_array_or_hash_cast {
225 0     0     my ($elem) = @_;
226 0           my $psib = $elem->sprevious_sibling;
227 0           my $cast;
228 0   0       while ($psib && $psib->isa('PPI::Token::Cast')) {
229 0           $cast = $psib;
230 0           $psib = $psib->sprevious_sibling;
231             }
232 0 0         return if !$cast;
233 0           my $sigil = substr $cast->content, 0, 1;
234 0   0       return q{@} eq $sigil || q{%} eq $sigil;
235             }
236              
237             sub _is_in_slurpy_array_context {
238 0     0     my ($elem) = @_;
239              
240             # return true is the result of the regexp is passed to a subroutine.
241             # doesn't check for array context due to assignment.
242              
243             # look backward for explicit regex operator
244 0           my $psib = $elem->sprevious_sibling;
245 0 0 0       if ($psib && $psib->content eq q{=~}) {
246             # Track back through value
247 0           $psib = _skip_lhs($psib);
248             }
249              
250 0 0         if (!$psib) {
251 0           my $parent = $elem->parent;
252 0 0         return if !$parent;
253 0 0         if ($parent->isa('PPI::Statement')) {
254 0           $parent = $parent->parent;
255 0 0         return if !$parent;
256             }
257              
258             # Return true if we have a list that isn't part of a foreach loop.
259             # TECHNICAL DEBT: This code is basically shared with
260             # RequireCheckingReturnValueOfEval. I don't want to put this code
261             # into Perl::Critic::Utils::*, but I don't have time to sort out
262             # PPIx::Utilities::Structure::List yet.
263 0 0         if ( $parent->isa('PPI::Structure::List') ) {
264 0 0         my $parent_statement = $parent->statement() or return $TRUE;
265 0 0         return $TRUE if not
266             $parent_statement->isa('PPI::Statement::Compound');
267 0 0         return $TRUE if $parent_statement->type() ne 'foreach';
268             }
269              
270 0 0         return $TRUE if $parent->isa('PPI::Structure::Constructor');
271 0 0         if ($parent->isa('PPI::Structure::Block')) {
272 0 0         return $TRUE
273             if
274             refaddr($elem->statement)
275             eq refaddr([$parent->schildren]->[-1]);
276             }
277 0           return;
278             }
279 0 0         if ($psib->isa('PPI::Token::Operator')) {
280             # Most operators kill slurpiness (except assignment, which is handled elsewhere).
281 0           return q{,} eq $psib->content;
282             }
283 0           return $TRUE;
284             }
285              
286             sub _skip_lhs {
287 0     0     my ($elem) = @_;
288              
289             # TODO: better implementation to handle casts, expressions, subcalls, etc.
290 0           $elem = $elem->sprevious_sibling();
291              
292 0           return $elem;
293             }
294              
295             sub _enough_magic {
296 0     0     my ($elem, $re, $captures, $named_captures, $doc) = @_;
297              
298 0           _check_for_magic($elem, $re, $captures, $named_captures, $doc);
299              
300 0     0     return ( none {not defined} @{$captures} )
301             && ( !%{$named_captures} ||
302 0   0 0     none {defined} values %{$named_captures} );
  0            
303             }
304              
305             # void return
306             sub _check_for_magic {
307 0     0     my ($elem, $re, $captures, $named_captures, $doc) = @_;
308              
309             # Search for $1..$9 in :
310             # * the rest of this statement
311             # * subsequent sibling statements
312             # * if this is in a conditional boolean, the if/else bodies of the conditional
313             # * if this is in a while/for condition, the loop body
314             # But NO intervening regexps!
315              
316             # Package up the usual arguments for _check_rest_of_statement().
317 0           my $arg = {
318             regexp => $re,
319             numbered_captures => $captures,
320             named_captures => $named_captures,
321             document => $doc,
322             };
323              
324             # Capture whether or not the regular expression is negated -- that
325             # is, whether it is preceded by the '!~' binding operator.
326 0 0         if ( my $prior_token = $elem->sprevious_sibling() ) {
327 0   0       $arg->{negated} = $prior_token->isa( 'PPI::Token::Operator' ) &&
328             q<!~> eq $prior_token->content();
329             }
330              
331 0 0         return if ! _check_rest_of_statement( $elem, $arg );
332              
333 0           my $parent = $elem->parent();
334 0   0       while ($parent && ! $parent->isa('PPI::Statement::Sub')) {
335 0 0         return if ! _check_rest_of_statement( $parent, $arg );
336 0           $parent = $parent->parent();
337             }
338              
339 0           return;
340             }
341              
342             # Check if we are in the condition or block of a 'while'
343             sub _check_if_in_while_condition_or_block {
344 0     0     my ( $elem ) = @_;
345 0 0         $elem or return;
346              
347 0 0         my $parent = $elem->parent() or return;
348 0 0         $parent->isa( 'PPI::Statement' ) or return;
349              
350 0 0         my $item = $parent = $parent->parent() or return;
351 0 0         if ( $item->isa( 'PPI::Structure::Block' ) ) {
352 0 0         $item = $item->sprevious_sibling() or return;
353             }
354 0 0         $item->isa( 'PPI::Structure::Condition' ) or return;
355              
356 0 0         $item = $item->sprevious_sibling() or return;
357 0 0         $item->isa( 'PPI::Token::Word' ) or return;
358              
359 0           return $WHILE eq $item->content();
360             }
361              
362             {
363             # Shortcut operators '||', '//', and 'or' can cause everything after
364             # them to be skipped. 'and' trumps '||' and '//', and causes things
365             # to be evaluated again. The value is true to skip, false to cancel
366             # skipping.
367             Readonly::Hash my %SHORTCUT_OPERATOR => (
368             q<||> => $FALSE,
369             q<//> => $FALSE,
370             and => $TRUE,
371             or => $FALSE,
372             );
373              
374             # RT #38942
375             # The issue in the ticket is that in something like
376             # if ( /(a)/ || /(b)/ ) {
377             # say $1
378             # }
379             # the capture variable can come from either /(a)/ or /(b)/. If we
380             # don't take into account the short-cutting nature of the '||' we
381             # erroneously conclude that the capture in /(a)/ is not used. So we
382             # need to skip every regular expression after an alternation.
383             #
384             # The trick is that we want to still mark magic variables, because
385             # of code like
386             # my $foo = $1 || $2;
387             # so we can't just ignore everything after an alternation.
388             #
389             # To do all this correctly, we have to track precedence, and start
390             # paying attention again if an 'and' is found after a '||'.
391              
392             # Subroutine _make_regexp_checker() manufactures a snippet of code
393             # which is used to track regular expressions. It takes one optional
394             # argument, which is the snippet used to track the parent object's
395             # regular expressions.
396             #
397             # The snippet is passed each token encountered, and returns true if
398             # the scan for capture variables is to be stopped. This will happen
399             # if the token is a regular expression which is _not_ to the right
400             # of an alternation operator ('||', '//', or 'or'), or it _is_ to
401             # the right of an 'and', without an intervening alternation
402             # operator.
403             #
404             # If _make_regexp_checker() was passed a snippet which
405             # returns false on encountering a regular expression, the returned
406             # snippet always returns false, for the benefit of code like
407             # /(a)/ || ( /(b)/ || /(c)/ ).
408              
409             sub _make_regexp_checker {
410 0     0     my ( $parent ) = @_;
411              
412             $parent
413             and not $parent->()
414 0 0 0 0     and return sub { return $FALSE };
  0            
415              
416 0           my $check = $TRUE;
417 0           my $precedence = 0;
418              
419             return sub {
420 0     0     my ( $elem ) = @_;
421              
422 0 0         $elem or return $check;
423              
424 0 0         if ( $elem->isa( 'PPI::Token::Regexp' ) ) {
425 0 0         return _regexp_is_in_split( $elem ) ? $FALSE : $check;
426             }
427              
428 0 0 0       if ( $elem->isa( 'PPI::Token::Structure' )
429             && q<;> eq $elem->content() ) {
430 0           $check = $TRUE;
431 0           $precedence = 0;
432 0           return $FALSE;
433             }
434              
435 0 0         $elem->isa( 'PPI::Token::Operator' )
436             or return $FALSE;
437              
438 0           my $content = $elem->content();
439 0 0         defined( my $oper_check = $SHORTCUT_OPERATOR{$content} )
440             or return $FALSE;
441              
442 0           my $oper_precedence = precedence_of( $content );
443 0 0         $oper_precedence >= $precedence
444             or return $FALSE;
445              
446 0           $precedence = $oper_precedence;
447 0           $check = $oper_check;
448              
449 0           return $FALSE;
450 0           };
451             }
452             }
453              
454             # Argument is regexp.
455             # True if it is the regexp in a split()
456             sub _regexp_is_in_split {
457 0     0     my ( $elem ) = @_;
458              
459 0           my $prev;
460 0 0         if ( ! ( $prev = $elem->sprevious_sibling() ) ) {
461             # Maybe we have split( /.../, ... )
462 0 0         my $stmt = $elem->statement()
463             or return $FALSE;
464 0 0         $stmt->parent()
465             or return $FALSE;
466 0 0         $prev = $elem->sprevious_sibling()
467             or return $FALSE;
468             }
469 0   0       return $prev->isa( 'PPI::Token::Word' ) && $SPLIT eq $prev->content();
470             }
471              
472              
473             # false if we hit another regexp
474             # The arguments are:
475             # $elem - The PPI::Element whose siblings are to be checked;
476             # $arg - A hash reference containing the following keys:
477             # regexp => the relevant PPIx::Regexp object;
478             # numbered_captures => a reference to the array used to track the
479             # use of numbered captures;
480             # named_captures => a reference to the hash used to track the
481             # use of named captures;
482             # negated => true if the regexp was bound to its target with the
483             # '!~' operator;
484             # document => a reference to the Perl::Critic::Document;
485             # Converted to passing the arguments everyone gets in a hash because of
486             # the need to add the 'negated' argument, which would put us at six
487             # arguments.
488             sub _check_rest_of_statement {
489 0     0     my ( $elem, $arg ) = @_;
490              
491 0           my $checker = _make_regexp_checker();
492 0           my $nsib = $elem->snext_sibling;
493              
494             # If we are an if (or elsif) and the result of the regexp is
495             # negated, we skip the first block found. RT #69867
496 0 0 0       if ( $arg->{negated} && _is_condition_of_if_statement( $elem ) ) {
497 0   0       while ( $nsib && ! $nsib->isa( 'PPI::Structure::Block' ) ) {
498 0           $nsib = $nsib->snext_sibling();
499             }
500 0 0         $nsib and $nsib = $nsib->snext_sibling();
501             }
502              
503 0           while ($nsib) {
504 0 0         return if $checker->($nsib);
505 0 0         if ($nsib->isa('PPI::Node')) {
506 0 0         return if ! _check_node_children($nsib, $arg, $checker );
507             } else {
508             _mark_magic( $nsib, $arg->{regexp}, $arg->{numbered_captures},
509 0           $arg->{named_captures}, $arg->{document} );
510             }
511 0           $nsib = $nsib->snext_sibling;
512             }
513 0           return $TRUE;
514             }
515              
516             {
517              
518             Readonly::Hash my %IS_IF_STATEMENT => hashify( qw{ if elsif } );
519              
520             # Return true if the argument is the condition of an if or elsif
521             # statement, otherwise return false.
522             sub _is_condition_of_if_statement {
523 0     0     my ( $elem ) = @_;
524 0 0 0       $elem
525             and $elem->isa( 'PPI::Structure::Condition' )
526             or return $FALSE;
527 0 0         my $psib = $elem->sprevious_sibling()
528             or return $FALSE;
529 0 0         $psib->isa( 'PPI::Token::Word' )
530             or return $FALSE;
531 0           return $IS_IF_STATEMENT{ $psib->content() };
532              
533             }
534             }
535              
536             # false if we hit another regexp
537             # The arguments are:
538             # $elem - The PPI::Node whose children are to be checked;
539             # $arg - A hash reference containing the following keys:
540             # regexp => the relevant PPIx::Regexp object;
541             # numbered_captures => a reference to the array used to track the
542             # use of numbered captures;
543             # named_captures => a reference to the hash used to track the
544             # use of named captures;
545             # document => a reference to the Perl::Critic::Document;
546             # $parent_checker - The parent's regexp checking code snippet,
547             # manufactured by _make_regexp_checker(). This argument is not in
548             # the $arg hash because that hash is shared among levels of the
549             # parse tree, whereas the regexp checker is not.
550             # TODO the things in the $arg hash are widely shared among the various
551             # pieces/parts of this policy; maybe more subroutines should use this
552             # hash rather than passing all this stuff around as individual
553             # arguments. This particular subroutine got the hash-reference treatment
554             # because Subroutines::ProhibitManyArgs started complaining when the
555             # checker argument was added.
556             sub _check_node_children {
557 0     0     my ($elem, $arg, $parent_checker) = @_;
558              
559             # caveat: this will descend into subroutine definitions...
560              
561 0           my $checker = _make_regexp_checker($parent_checker);
562 0           for my $child ($elem->schildren) {
563 0 0         return if $checker->($child);
564 0 0         if ($child->isa('PPI::Node')) {
565 0 0         return if ! _check_node_children($child, $arg, $checker);
566             } else {
567             _mark_magic($child, $arg->{regexp},
568             $arg->{numbered_captures}, $arg->{named_captures},
569 0           $arg->{document});
570             }
571             }
572 0           return $TRUE;
573             }
574              
575             sub _mark_magic {
576 0     0     my ($elem, $re, $captures, $named_captures, $doc) = @_;
577              
578             # If we're a double-quotish element, we need to grub through its
579             # content. RT #38942
580 0 0         if ( _is_double_quotish_element( $elem ) ) {
581 0           _mark_magic_in_content(
582             $elem->content(), $re, $captures, $named_captures, $doc );
583 0           return;
584             }
585              
586             # Ditto a here document, though the logic is different. RT #38942
587 0 0         if ( $elem->isa( 'PPI::Token::HereDoc' ) ) {
588 0 0         $elem->content() =~ m/ \A << ~? \s* ' /sxm
589             or _mark_magic_in_content(
590             join( $EMPTY, $elem->heredoc() ), $re, $captures,
591             $named_captures, $doc );
592 0           return;
593             }
594              
595             # Only interested in magic, or known English equivalent.
596 0           my $content = $elem->content();
597 0 0         my ( $capture_ref, $capture_array, $capture_hash ) = $doc->uses_module( 'English' ) ?
598             ( \%CAPTURE_REFERENCE_ENGLISH, \%CAPTURE_ARRAY_ENGLISH, \%CAPTURE_HASH_ENGLISH ) :
599             ( \%CAPTURE_REFERENCE, \%CAPTURE_ARRAY, \%CAPTURE_HASH );
600             $elem->isa( 'PPI::Token::Magic' )
601             or $capture_ref->{$content}
602             or $capture_array->{$content}
603 0 0 0       or $capture_hash->{$content}
      0        
      0        
604             or return;
605              
606 0 0         if ( $content =~ m/ \A \$ ( \d+ ) /xms ) {
    0          
    0          
    0          
607              
608             # Record if we see $1, $2, $3, ...
609 0           my $num = $1;
610 0 0         if (0 < $num) { # don't mark $0
611             # Only mark the captures we really need -- don't mark superfluous magic vars
612 0 0         if ($num <= @{$captures}) {
  0            
613 0           _record_numbered_capture( $num, $captures );
614             }
615             }
616             } elsif ( $capture_array->{$content} ) { # GitHub #778
617 0           foreach my $num ( 1 .. @{$captures} ) {
  0            
618 0           _record_numbered_capture( $num, $captures );
619             }
620             } elsif ( $capture_hash->{$content} ) {
621 0           foreach my $name ( keys %{$named_captures} ) {
  0            
622 0           _record_named_capture( $name, $captures, $named_captures );
623             }
624             } elsif ( $capture_ref->{$content} ) {
625 0           _mark_magic_subscripted_code( $elem, $re, $captures, $named_captures );
626             }
627 0           return;
628             }
629              
630             # Record a named capture referenced by a hash or array found in code.
631             # The arguments are:
632             # $elem - The element that represents a subscripted capture variable;
633             # $re - The PPIx::Regexp object;
634             # $captures - A reference to the numbered capture array;
635             # $named_captures - A reference to the named capture hash.
636             sub _mark_magic_subscripted_code {
637 0     0     my ( $elem, $re, $captures, $named_captures ) = @_;
638 0 0         my $subscr = $elem->snext_sibling() or return;
639 0 0         $subscr->isa( 'PPI::Structure::Subscript' ) or return;
640 0           my $subval = $subscr->content();
641 0           _record_subscripted_capture(
642             $elem->content(), $subval, $re, $captures, $named_captures );
643 0           return;
644             }
645              
646             # Find capture variables in the content of a double-quotish thing, and
647             # record their use. RT #38942. The arguments are:
648             # $content - The content() ( or heredoc() in the case of a here
649             # document) to be analyzed;
650             # $re - The PPIx::Regexp object;
651             # $captures - A reference to the numbered capture array;
652             # $named_captures - A reference to the named capture hash.
653             sub _mark_magic_in_content {
654 0     0     my ( $content, $re, $captures, $named_captures, $doc ) = @_;
655              
656 0 0         my ( $capture_ref, $capture_array ) = $doc->uses_module( 'English' ) ?
657             ( \%CAPTURE_REFERENCE_ENGLISH, \%CAPTURE_ARRAY_ENGLISH ) :
658             ( \%CAPTURE_REFERENCE, \%CAPTURE_ARRAY );
659              
660 0           while ( $content =~ m< ( [\$\@] (?:
661             [{] \^? (?: \w+ | . ) [}] | \w+ | . ) ) >sxmg ) {
662 0           my $name = $1;
663 0 0         $name =~ s/ \A ( [\$\@] ) [{] (?! \^ ) /$1/sxm
664             and $name =~ s/ [}] \z //sxm;
665              
666 0 0 0       if ( $name =~ m/ \A \$ ( \d+ ) \z /sxm ) {
    0          
    0          
667              
668 0           my $num = $1;
669             0 < $num
670 0 0 0       and $num <= @{ $captures }
  0            
671             and _record_numbered_capture( $num, $captures );
672              
673             } elsif ( $capture_array->{$name} ) { # GitHub #778
674 0           foreach my $num ( 1 .. @{$captures} ) {
  0            
675 0           _record_numbered_capture( $num, $captures );
676             }
677              
678             } elsif ( $capture_ref->{$name} &&
679             $content =~ m/ \G ( [{] [^}]+ [}] | [[] [^]] []] ) /smxgc )
680             {
681 0           _record_subscripted_capture(
682             $name, $1, $re, $captures, $named_captures );
683              
684             }
685             }
686 0           return;
687             }
688              
689             # Return true if the given element is double-quotish. Always returns
690             # false for a PPI::Token::HereDoc, since they're a different beast.
691             # RT #38942.
692             sub _is_double_quotish_element {
693 0     0     my ( $elem ) = @_;
694              
695 0 0         $elem or return;
696              
697 0           my $content = $elem->content();
698              
699 0 0         if ( $elem->isa( 'PPI::Token::QuoteLike::Command' ) ) {
700 0           return $content !~ m/ \A qx \s* ' /sxm;
701             }
702              
703 0           foreach my $class ( qw{
704             PPI::Token::Quote::Double
705             PPI::Token::Quote::Interpolate
706             PPI::Token::QuoteLike::Backtick
707             PPI::Token::QuoteLike::Readline
708             } ) {
709 0 0         $elem->isa( $class ) and return $TRUE;
710             }
711              
712 0           return $FALSE;
713             }
714              
715             # Record a subscripted capture, either hash dereference or array
716             # dereference. We assume that an array represents a numbered capture and
717             # a hash represents a named capture, since we have to handle (e.g.) both
718             # @+ and %+.
719             sub _record_subscripted_capture {
720 0     0     my ( $variable_name, $suffix, $re, $captures, $named_captures ) = @_;
721 0 0         if ( $suffix =~ m/ \A [{] ( .*? ) [}] /smx ) {
    0          
722 0           ( my $name = $1 ) =~ s/ \A ( ["'] ) ( .*? ) \1 \z /$2/smx;
723 0           _record_named_capture( $name, $captures, $named_captures );
724             } elsif ( $suffix =~ m/ \A [[] \s* ( [-+]? \d+ ) \s* []] /smx ) {
725             # GitHub #778
726             # Mostly capture numbers encountered here are 1-based (e.g. @+, @-).
727             # But @{^CAPTURE} is zero-based, so we need to tweak the subscript
728             # before we record the capture number.
729 0           my $num = $1 + 0;
730             $num >= 0
731 0 0 0       and $ZERO_BASED_CAPTURE_REFERENCE{$variable_name}
732             and $num++;
733 0           _record_numbered_capture( $num, $captures, $re );
734             }
735 0           return;
736             }
737              
738             # Because a named capture is also one or more numbered captures, the recording
739             # of the use of a named capture seemed complex enough to wrap in a subroutine.
740             sub _record_named_capture {
741 0     0     my ( $name, $captures, $named_captures ) = @_;
742 0 0         defined ( my $numbers = $named_captures->{$name} ) or return;
743 0           foreach my $capnum ( @{ $numbers } ) {
  0            
744 0           _record_numbered_capture( $capnum, $captures );
745             }
746 0           $named_captures->{$name} = undef;
747 0           return;
748             }
749              
750             sub _record_numbered_capture {
751 0     0     my ( $number, $captures, $re ) = @_;
752 0 0 0       $re and $number < 0
753             and $number = $re->max_capture_number() + $number + 1;
754 0 0         return if $number <= 0;
755 0           $captures->[ $number - 1 ] = 1;
756 0           return;
757             }
758              
759             1;
760              
761             __END__
762              
763             #-----------------------------------------------------------------------------
764              
765             =pod
766              
767             =for stopwords refactored
768              
769             =head1 NAME
770              
771             Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture - Only use a capturing group if you plan to use the captured value.
772              
773              
774             =head1 AFFILIATION
775              
776             This Policy is part of the core L<Perl::Critic|Perl::Critic>
777             distribution.
778              
779              
780             =head1 DESCRIPTION
781              
782             Perl regular expressions have multiple types of grouping syntax. The
783             basic parentheses (e.g. C<m/(foo)/>) captures into the magic variable
784             C<$1>. Non-capturing groups (e.g. C<m/(?:foo)/>) are useful because
785             they have better runtime performance and do not copy strings to the
786             magic global capture variables.
787              
788             It's also easier on the maintenance programmer if you consistently use
789             capturing vs. non-capturing groups, because that programmer can tell
790             more easily which regexps can be refactored without breaking
791             surrounding code which may use the captured values.
792              
793              
794             =head1 CONFIGURATION
795              
796             This Policy is not configurable except for the standard options.
797              
798              
799             =head1 CAVEATS
800              
801             =head2 C<qr//> interpolation
802              
803             This policy can be confused by interpolation of C<qr//> elements, but
804             those are always false negatives. For example:
805              
806             my $foo_re = qr/(foo)/;
807             my ($foo) = m/$foo_re (bar)/x;
808              
809             A human can tell that this should be a violation because there are two
810             captures but only the first capture is used, not the second. The
811             policy only notices that there is one capture in the regexp and
812             remains happy.
813              
814             =head2 C<@->, C<@+>, C<$LAST_MATCH_START> and C<$LAST_MATCH_END>
815              
816             This policy will only recognize capture groups referred to by these
817             variables if the use is subscripted by a literal integer.
818              
819             =head2 C<$^N> and C<$LAST_SUBMATCH_RESULT>
820              
821             This policy will not recognize capture groups referred to only by these
822             variables, because there is in general no way by static analysis to
823             determine which capture group is referred to. For example,
824              
825             m/ (?: (A[[:alpha:]]+) | (N\d+) ) (?{$foo=$^N}) /smx
826              
827             makes use of the first capture group if it matches, or the second
828             capture group if the first does not match but the second does.
829              
830             =head2 split()
831              
832             Normally, this policy thinks that if a capture is used at all it must be
833             used before the next regular expression in the same scope. The regular
834             expression in a C<split()> needs to be exempted because it does not
835             affect the caller's capture variables.
836              
837             At present, this policy recognizes and exempts the regular expressions
838             in
839              
840             split /.../, ...
841              
842             and
843              
844             split( /.../, ... )
845              
846             but more exotic syntax may produce false positives.
847              
848              
849             =head1 CREDITS
850              
851             Initial development of this policy was supported by a grant from the
852             Perl Foundation.
853              
854              
855             =head1 AUTHOR
856              
857             Chris Dolan <cdolan@cpan.org>
858              
859              
860             =head1 COPYRIGHT
861              
862             Copyright (c) 2007-2023 Chris Dolan. Many rights reserved.
863              
864             This program is free software; you can redistribute it and/or modify
865             it under the same terms as Perl itself. The full text of this license
866             can be found in the LICENSE file included with this module
867              
868             =cut
869              
870             # Local Variables:
871             # mode: cperl
872             # cperl-indent-level: 4
873             # fill-column: 78
874             # indent-tabs-mode: nil
875             # c-indentation-style: bsd
876             # End:
877             # ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround :