File Coverage

blib/lib/Perl/Critic/Policy/RegularExpressions/ProhibitUnusedCapture.pm

Criterion	Covered	Total	%
statement	29	284	10.2
branch	1	220	0.4
condition	0	71	0.0
subroutine	13	43	30.2
pod	4	5	80.0
total	47	623	7.5

line	stmt	bran	cond	sub	pod	time	code
1							package Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture;
2
3	40			40		26920	use 5.010001;
	40					154
4	40			40		163	use strict;
	40					70
	40					754
5	40			40		185	use warnings;
	40					62
	40					1878
6
7	40			40		192	use List::SomeUtils qw(none);
	40					86
	40					2261
8	40			40		183	use Readonly;
	40					66
	40					1709
9	40			40		170	use Scalar::Util qw(refaddr);
	40					71
	40					1730
10
11	40					1907	use Perl::Critic::Utils qw{
12							:booleans :characters :severities hashify precedence_of
13							split_nodes_on_comma
14	40			40		173	};
	40					76
15	40			40		10615	use parent 'Perl::Critic::Policy';
	40					128
	40					253
16
17							our $VERSION = '1.156';
18
19							#-----------------------------------------------------------------------------
20
21							Readonly::Scalar my $SPLIT => q{split};
22							Readonly::Scalar my $WHILE => q{while};
23
24							Readonly::Hash my %ZERO_BASED_CAPTURE_REFERENCE =>
25							hashify( qw< ${^CAPTURE} > );
26							# TODO: additional logic to prevent ${^CAPTURE_ALL}[n] from being recognized
27							# as a use of capture variable n.
28							Readonly::Hash my %CAPTURE_REFERENCE => (
29							hashify( qw< $+ $- ${^CAPTURE_ALL} > ),
30							%ZERO_BASED_CAPTURE_REFERENCE );
31							Readonly::Hash my %CAPTURE_REFERENCE_ENGLISH => (
32							hashify( qw{ $LAST_PAREN_MATCH $LAST_MATCH_START $LAST_MATCH_END } ),
33							%CAPTURE_REFERENCE );
34							Readonly::Hash my %CAPTURE_ARRAY => hashify( qw< @- @+ @{^CAPTURE} > );
35							Readonly::Hash my %CAPTURE_ARRAY_ENGLISH => (
36							hashify( qw< @LAST_MATCH_START @LAST_MATCH_END > ),
37							%CAPTURE_ARRAY );
38							Readonly::Hash my %CAPTURE_HASH => hashify( qw< %- %+ %{^CAPTURE} >);
39							Readonly::Hash my %CAPTURE_HASH_ENGLISH => (
40							hashify( qw< %LAST_PAREN_MATCH > ),
41							%CAPTURE_HASH );
42
43
44							Readonly::Scalar my $DESC => q{Only use a capturing group if you plan to use the captured value};
45							Readonly::Scalar my $EXPL => [252];
46
47							#-----------------------------------------------------------------------------
48
49	90			90	0	773	sub supported_parameters { return qw() }
50	75			75	1	254	sub default_severity { return $SEVERITY_MEDIUM }
51	86			86	1	235	sub default_themes { return qw( core pbp maintenance ) }
52							sub applies_to {
53	31			31	1	87	return qw< PPI::Token::Regexp::Match PPI::Token::Regexp::Substitute >;
54							}
55
56							#-----------------------------------------------------------------------------
57
58							Readonly::Scalar my $NUM_CAPTURES_FOR_GLOBAL => 100; # arbitrarily large number
59
60							sub violates {
61	1			1	1	3	my ( $self, $elem, $doc ) = @_;
62
63							# optimization: don't bother parsing the regexp if there are no parens
64	1	50				9	return if 0 > index $elem->content(), '(';
65
66	0	0					my $re = $doc->ppix_regexp_from_element( $elem ) or return;
67	0	0					$re->failures() and return;
68
69	0	0					my $ncaptures = $re->max_capture_number() or return;
70
71	0						my @captures = ( undef ) x $ncaptures; # List of expected captures
72
73	0						my %named_captures; # List of expected named captures.
74							# Unlike the numbered capture logic, %named_captures
75							# entries are made undefined when a use of the name is
76							# found. Otherwise two hashes would be needed, one to
77							# become defined when a use is found, and one to hold
78							# the mapping of name to number.
79	0	0					foreach my $struct ( @{ $re->find( 'PPIx::Regexp::Structure::NamedCapture'
	0
80							) \|\| [] } ) {
81							# There can be more than one capture with the same name, so we need to
82							# record all of them. There will be duplications if the 'branch reset'
83							# "(?\| ... )" pattern is used, but this is benign given how numbered
84							# captures are recorded.
85	0		0				push @{ $named_captures{ $struct->name() } \|\|= [] }, $struct->number();
	0
86							}
87
88							# Look for references to the capture in the regex itself
89	0	0					return if _enough_uses_in_regexp( $re, \@captures, \%named_captures, $doc );
90
91	0	0	0				if ( $re->modifier_asserted( 'g' )
92							and not _check_if_in_while_condition_or_block( $elem ) ) {
93	0						$ncaptures = $NUM_CAPTURES_FOR_GLOBAL;
94	0						$#captures = $ncaptures - 1;
95							}
96
97	0	0					if ( !%named_captures ) {
98	0	0					return if _enough_assignments($elem, \@captures);
99	0	0					return if _is_in_slurpy_array_context($elem);
100							}
101	0	0					return if _enough_magic($elem, $re, \@captures, \%named_captures, $doc);
102
103	0						return $self->violation( $DESC, $EXPL, $elem );
104							}
105
106							# Find uses of both numbered and named capture variables in the regexp itself.
107							# Return true if all are used.
108							sub _enough_uses_in_regexp {
109	0			0			my ( $re, $captures, $named_captures, $doc ) = @_;
110
111							# Look for references to the capture in the regex itself. Note that this
112							# will also find backreferences in the replacement string of s///.
113	0	0					foreach my $token ( @{ $re->find( 'PPIx::Regexp::Token::Reference' )
	0
114							\|\| [] } ) {
115	0	0					if ( $token->is_named() ) {
116	0						_record_named_capture( $token->name(), $captures, $named_captures );
117							} else {
118	0						_record_numbered_capture( $token->absolute(), $captures );
119							}
120							}
121
122	0	0					foreach my $token ( @{ $re->find(
	0
123							'PPIx::Regexp::Token::Code' ) \|\| [] } ) {
124	0	0					my $ppi = $token->ppi() or next;
125	0						_check_node_children( $ppi, {
126							regexp => $re,
127							numbered_captures => $captures,
128							named_captures => $named_captures,
129							document => $doc,
130							}, _make_regexp_checker() );
131							}
132
133	0			0			return ( none {not defined} @{$captures} )
134							&& ( !%{$named_captures} \|\|
135	0		0	0			none {defined} values %{$named_captures} );
	0
136							}
137
138							sub _enough_assignments {
139	0			0			my ($elem, $captures) = @_;
140
141							# look backward for the assignment operator
142	0						my $psib = $elem->sprevious_sibling;
143							SIBLING:
144	0						while (1) {
145	0	0					return if !$psib;
146	0	0					if ($psib->isa('PPI::Token::Operator')) {
147	0	0					last SIBLING if q{=} eq $psib->content;
148	0	0					return if q{!~} eq $psib->content;
149							}
150	0						$psib = $psib->sprevious_sibling;
151							}
152
153	0						$psib = $psib->sprevious_sibling;
154	0	0					return if !$psib; # syntax error: '=' at the beginning of a statement???
155
156	0	0					if ($psib->isa('PPI::Token::Symbol')) {
		0
		0
157							# @foo = m/(foo)/
158							# @$foo = m/(foo)/
159							# %foo = m/(foo)/
160							# %$foo = m/(foo)/
161	0	0					return $TRUE if _symbol_is_slurpy($psib);
162
163							} elsif ($psib->isa('PPI::Structure::Block')) {
164							# @{$foo} = m/(foo)/
165							# %{$foo} = m/(foo)/
166	0	0					return $TRUE if _is_preceded_by_array_or_hash_cast($psib);
167
168							} elsif ($psib->isa('PPI::Structure::List')) {
169							# () = m/(foo)/
170							# ($foo) = m/(foo)/
171							# ($foo,$bar) = m/(foo)(bar)/
172							# (@foo) = m/(foo)(bar)/
173							# ($foo,@foo) = m/(foo)(bar)/
174							# ($foo,@$foo) = m/(foo)(bar)/
175							# ($foo,@{$foo}) = m/(foo)(bar)/
176
177	0						my @args = $psib->schildren;
178	0	0					return $TRUE if not @args; # empty list (perhaps the "goatse" operator) is slurpy
179
180							# Forward looking: PPI might change in v1.200 so schild(0) is a
181							# PPI::Statement::Expression.
182	0	0	0				if ( 1 == @args && $args[0]->isa('PPI::Statement::Expression') ) {
183	0						@args = $args[0]->schildren;
184							}
185
186	0						my @parts = split_nodes_on_comma(@args);
187							PART:
188	0						for my $i (0 .. $#parts) {
189	0	0					if (1 == @{$parts[$i]}) {
	0
190	0						my $var = $parts[$i]->[0];
191	0	0	0				if ($var->isa('PPI::Token::Symbol') \|\| $var->isa('PPI::Token::Cast')) {
192	0	0					return $TRUE if _has_array_sigil($var);
193							}
194							}
195	0						_record_numbered_capture( $i + 1, $captures );
196							# ith variable capture
197							}
198							}
199
200	0			0			return none {not defined} @{$captures};
	0
	0
201							}
202
203							sub _symbol_is_slurpy {
204	0			0			my ($symbol) = @_;
205
206	0	0					return $TRUE if _has_array_sigil($symbol);
207	0	0					return $TRUE if _has_hash_sigil($symbol);
208	0	0					return $TRUE if _is_preceded_by_array_or_hash_cast($symbol);
209	0						return;
210							}
211
212							sub _has_array_sigil {
213	0			0			my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast
214
215	0						return q{@} eq substr $elem->content, 0, 1;
216							}
217
218							sub _has_hash_sigil {
219	0			0			my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast
220
221	0						return q{%} eq substr $elem->content, 0, 1;
222							}
223
224							sub _is_preceded_by_array_or_hash_cast {
225	0			0			my ($elem) = @_;
226	0						my $psib = $elem->sprevious_sibling;
227	0						my $cast;
228	0		0				while ($psib && $psib->isa('PPI::Token::Cast')) {
229	0						$cast = $psib;
230	0						$psib = $psib->sprevious_sibling;
231							}
232	0	0					return if !$cast;
233	0						my $sigil = substr $cast->content, 0, 1;
234	0		0				return q{@} eq $sigil \|\| q{%} eq $sigil;
235							}
236
237							sub _is_in_slurpy_array_context {
238	0			0			my ($elem) = @_;
239
240							# return true is the result of the regexp is passed to a subroutine.
241							# doesn't check for array context due to assignment.
242
243							# look backward for explicit regex operator
244	0						my $psib = $elem->sprevious_sibling;
245	0	0	0				if ($psib && $psib->content eq q{=~}) {
246							# Track back through value
247	0						$psib = _skip_lhs($psib);
248							}
249
250	0	0					if (!$psib) {
251	0						my $parent = $elem->parent;
252	0	0					return if !$parent;
253	0	0					if ($parent->isa('PPI::Statement')) {
254	0						$parent = $parent->parent;
255	0	0					return if !$parent;
256							}
257
258							# Return true if we have a list that isn't part of a foreach loop.
259							# TECHNICAL DEBT: This code is basically shared with
260							# RequireCheckingReturnValueOfEval. I don't want to put this code
261							# into Perl::Critic::Utils::*, but I don't have time to sort out
262							# PPIx::Utilities::Structure::List yet.
263	0	0					if ( $parent->isa('PPI::Structure::List') ) {
264	0	0					my $parent_statement = $parent->statement() or return $TRUE;
265	0	0					return $TRUE if not
266							$parent_statement->isa('PPI::Statement::Compound');
267	0	0					return $TRUE if $parent_statement->type() ne 'foreach';
268							}
269
270	0	0					return $TRUE if $parent->isa('PPI::Structure::Constructor');
271	0	0					if ($parent->isa('PPI::Structure::Block')) {
272	0	0					return $TRUE
273							if
274							refaddr($elem->statement)
275							eq refaddr([$parent->schildren]->[-1]);
276							}
277	0						return;
278							}
279	0	0					if ($psib->isa('PPI::Token::Operator')) {
280							# Most operators kill slurpiness (except assignment, which is handled elsewhere).
281	0						return q{,} eq $psib->content;
282							}
283	0						return $TRUE;
284							}
285
286							sub _skip_lhs {
287	0			0			my ($elem) = @_;
288
289							# TODO: better implementation to handle casts, expressions, subcalls, etc.
290	0						$elem = $elem->sprevious_sibling();
291
292	0						return $elem;
293							}
294
295							sub _enough_magic {
296	0			0			my ($elem, $re, $captures, $named_captures, $doc) = @_;
297
298	0						_check_for_magic($elem, $re, $captures, $named_captures, $doc);
299
300	0			0			return ( none {not defined} @{$captures} )
301							&& ( !%{$named_captures} \|\|
302	0		0	0			none {defined} values %{$named_captures} );
	0
303							}
304
305							# void return
306							sub _check_for_magic {
307	0			0			my ($elem, $re, $captures, $named_captures, $doc) = @_;
308
309							# Search for $1..$9 in :
310							# * the rest of this statement
311							# * subsequent sibling statements
312							# * if this is in a conditional boolean, the if/else bodies of the conditional
313							# * if this is in a while/for condition, the loop body
314							# But NO intervening regexps!
315
316							# Package up the usual arguments for _check_rest_of_statement().
317	0						my $arg = {
318							regexp => $re,
319							numbered_captures => $captures,
320							named_captures => $named_captures,
321							document => $doc,
322							};
323
324							# Capture whether or not the regular expression is negated -- that
325							# is, whether it is preceded by the '!~' binding operator.
326	0	0					if ( my $prior_token = $elem->sprevious_sibling() ) {
327	0		0				$arg->{negated} = $prior_token->isa( 'PPI::Token::Operator' ) &&
328							q<!~> eq $prior_token->content();
329							}
330
331	0	0					return if ! _check_rest_of_statement( $elem, $arg );
332
333	0						my $parent = $elem->parent();
334	0		0				while ($parent && ! $parent->isa('PPI::Statement::Sub')) {
335	0	0					return if ! _check_rest_of_statement( $parent, $arg );
336	0						$parent = $parent->parent();
337							}
338
339	0						return;
340							}
341
342							# Check if we are in the condition or block of a 'while'
343							sub _check_if_in_while_condition_or_block {
344	0			0			my ( $elem ) = @_;
345	0	0					$elem or return;
346
347	0	0					my $parent = $elem->parent() or return;
348	0	0					$parent->isa( 'PPI::Statement' ) or return;
349
350	0	0					my $item = $parent = $parent->parent() or return;
351	0	0					if ( $item->isa( 'PPI::Structure::Block' ) ) {
352	0	0					$item = $item->sprevious_sibling() or return;
353							}
354	0	0					$item->isa( 'PPI::Structure::Condition' ) or return;
355
356	0	0					$item = $item->sprevious_sibling() or return;
357	0	0					$item->isa( 'PPI::Token::Word' ) or return;
358
359	0						return $WHILE eq $item->content();
360							}
361
362							{
363							# Shortcut operators '\|\|', '//', and 'or' can cause everything after
364							# them to be skipped. 'and' trumps '\|\|' and '//', and causes things
365							# to be evaluated again. The value is true to skip, false to cancel
366							# skipping.
367							Readonly::Hash my %SHORTCUT_OPERATOR => (
368							q<\|\|> => $FALSE,
369							q<//> => $FALSE,
370							and => $TRUE,
371							or => $FALSE,
372							);
373
374							# RT #38942
375							# The issue in the ticket is that in something like
376							# if ( /(a)/ \|\| /(b)/ ) {
377							# say $1
378							# }
379							# the capture variable can come from either /(a)/ or /(b)/. If we
380							# don't take into account the short-cutting nature of the '\|\|' we
381							# erroneously conclude that the capture in /(a)/ is not used. So we
382							# need to skip every regular expression after an alternation.
383							#
384							# The trick is that we want to still mark magic variables, because
385							# of code like
386							# my $foo = $1 \|\| $2;
387							# so we can't just ignore everything after an alternation.
388							#
389							# To do all this correctly, we have to track precedence, and start
390							# paying attention again if an 'and' is found after a '\|\|'.
391
392							# Subroutine _make_regexp_checker() manufactures a snippet of code
393							# which is used to track regular expressions. It takes one optional
394							# argument, which is the snippet used to track the parent object's
395							# regular expressions.
396							#
397							# The snippet is passed each token encountered, and returns true if
398							# the scan for capture variables is to be stopped. This will happen
399							# if the token is a regular expression which is _not_ to the right
400							# of an alternation operator ('\|\|', '//', or 'or'), or it _is_ to
401							# the right of an 'and', without an intervening alternation
402							# operator.
403							#
404							# If _make_regexp_checker() was passed a snippet which
405							# returns false on encountering a regular expression, the returned
406							# snippet always returns false, for the benefit of code like
407							# /(a)/ \|\| ( /(b)/ \|\| /(c)/ ).
408
409							sub _make_regexp_checker {
410	0			0			my ( $parent ) = @_;
411
412							$parent
413							and not $parent->()
414	0	0	0	0			and return sub { return $FALSE };
	0
415
416	0						my $check = $TRUE;
417	0						my $precedence = 0;
418
419							return sub {
420	0			0			my ( $elem ) = @_;
421
422	0	0					$elem or return $check;
423
424	0	0					if ( $elem->isa( 'PPI::Token::Regexp' ) ) {
425	0	0					return _regexp_is_in_split( $elem ) ? $FALSE : $check;
426							}
427
428	0	0	0				if ( $elem->isa( 'PPI::Token::Structure' )
429							&& q<;> eq $elem->content() ) {
430	0						$check = $TRUE;
431	0						$precedence = 0;
432	0						return $FALSE;
433							}
434
435	0	0					$elem->isa( 'PPI::Token::Operator' )
436							or return $FALSE;
437
438	0						my $content = $elem->content();
439	0	0					defined( my $oper_check = $SHORTCUT_OPERATOR{$content} )
440							or return $FALSE;
441
442	0						my $oper_precedence = precedence_of( $content );
443	0	0					$oper_precedence >= $precedence
444							or return $FALSE;
445
446	0						$precedence = $oper_precedence;
447	0						$check = $oper_check;
448
449	0						return $FALSE;
450	0						};
451							}
452							}
453
454							# Argument is regexp.
455							# True if it is the regexp in a split()
456							sub _regexp_is_in_split {
457	0			0			my ( $elem ) = @_;
458
459	0						my $prev;
460	0	0					if ( ! ( $prev = $elem->sprevious_sibling() ) ) {
461							# Maybe we have split( /.../, ... )
462	0	0					my $stmt = $elem->statement()
463							or return $FALSE;
464	0	0					$stmt->parent()
465							or return $FALSE;
466	0	0					$prev = $elem->sprevious_sibling()
467							or return $FALSE;
468							}
469	0		0				return $prev->isa( 'PPI::Token::Word' ) && $SPLIT eq $prev->content();
470							}
471
472
473							# false if we hit another regexp
474							# The arguments are:
475							# $elem - The PPI::Element whose siblings are to be checked;
476							# $arg - A hash reference containing the following keys:
477							# regexp => the relevant PPIx::Regexp object;
478							# numbered_captures => a reference to the array used to track the
479							# use of numbered captures;
480							# named_captures => a reference to the hash used to track the
481							# use of named captures;
482							# negated => true if the regexp was bound to its target with the
483							# '!~' operator;
484							# document => a reference to the Perl::Critic::Document;
485							# Converted to passing the arguments everyone gets in a hash because of
486							# the need to add the 'negated' argument, which would put us at six
487							# arguments.
488							sub _check_rest_of_statement {
489	0			0			my ( $elem, $arg ) = @_;
490
491	0						my $checker = _make_regexp_checker();
492	0						my $nsib = $elem->snext_sibling;
493
494							# If we are an if (or elsif) and the result of the regexp is
495							# negated, we skip the first block found. RT #69867
496	0	0	0				if ( $arg->{negated} && _is_condition_of_if_statement( $elem ) ) {
497	0		0				while ( $nsib && ! $nsib->isa( 'PPI::Structure::Block' ) ) {
498	0						$nsib = $nsib->snext_sibling();
499							}
500	0	0					$nsib and $nsib = $nsib->snext_sibling();
501							}
502
503	0						while ($nsib) {
504	0	0					return if $checker->($nsib);
505	0	0					if ($nsib->isa('PPI::Node')) {
506	0	0					return if ! _check_node_children($nsib, $arg, $checker );
507							} else {
508							_mark_magic( $nsib, $arg->{regexp}, $arg->{numbered_captures},
509	0						$arg->{named_captures}, $arg->{document} );
510							}
511	0						$nsib = $nsib->snext_sibling;
512							}
513	0						return $TRUE;
514							}
515
516							{
517
518							Readonly::Hash my %IS_IF_STATEMENT => hashify( qw{ if elsif } );
519
520							# Return true if the argument is the condition of an if or elsif
521							# statement, otherwise return false.
522							sub _is_condition_of_if_statement {
523	0			0			my ( $elem ) = @_;
524	0	0	0				$elem
525							and $elem->isa( 'PPI::Structure::Condition' )
526							or return $FALSE;
527	0	0					my $psib = $elem->sprevious_sibling()
528							or return $FALSE;
529	0	0					$psib->isa( 'PPI::Token::Word' )
530							or return $FALSE;
531	0						return $IS_IF_STATEMENT{ $psib->content() };
532
533							}
534							}
535
536							# false if we hit another regexp
537							# The arguments are:
538							# $elem - The PPI::Node whose children are to be checked;
539							# $arg - A hash reference containing the following keys:
540							# regexp => the relevant PPIx::Regexp object;
541							# numbered_captures => a reference to the array used to track the
542							# use of numbered captures;
543							# named_captures => a reference to the hash used to track the
544							# use of named captures;
545							# document => a reference to the Perl::Critic::Document;
546							# $parent_checker - The parent's regexp checking code snippet,
547							# manufactured by _make_regexp_checker(). This argument is not in
548							# the $arg hash because that hash is shared among levels of the
549							# parse tree, whereas the regexp checker is not.
550							# TODO the things in the $arg hash are widely shared among the various
551							# pieces/parts of this policy; maybe more subroutines should use this
552							# hash rather than passing all this stuff around as individual
553							# arguments. This particular subroutine got the hash-reference treatment
554							# because Subroutines::ProhibitManyArgs started complaining when the
555							# checker argument was added.
556							sub _check_node_children {
557	0			0			my ($elem, $arg, $parent_checker) = @_;
558
559							# caveat: this will descend into subroutine definitions...
560
561	0						my $checker = _make_regexp_checker($parent_checker);
562	0						for my $child ($elem->schildren) {
563	0	0					return if $checker->($child);
564	0	0					if ($child->isa('PPI::Node')) {
565	0	0					return if ! _check_node_children($child, $arg, $checker);
566							} else {
567							_mark_magic($child, $arg->{regexp},
568							$arg->{numbered_captures}, $arg->{named_captures},
569	0						$arg->{document});
570							}
571							}
572	0						return $TRUE;
573							}
574
575							sub _mark_magic {
576	0			0			my ($elem, $re, $captures, $named_captures, $doc) = @_;
577
578							# If we're a double-quotish element, we need to grub through its
579							# content. RT #38942
580	0	0					if ( _is_double_quotish_element( $elem ) ) {
581	0						_mark_magic_in_content(
582							$elem->content(), $re, $captures, $named_captures, $doc );
583	0						return;
584							}
585
586							# Ditto a here document, though the logic is different. RT #38942
587	0	0					if ( $elem->isa( 'PPI::Token::HereDoc' ) ) {
588	0	0					$elem->content() =~ m/ \A << ~? \s* ' /sxm
589							or _mark_magic_in_content(
590							join( $EMPTY, $elem->heredoc() ), $re, $captures,
591							$named_captures, $doc );
592	0						return;
593							}
594
595							# Only interested in magic, or known English equivalent.
596	0						my $content = $elem->content();
597	0	0					my ( $capture_ref, $capture_array, $capture_hash ) = $doc->uses_module( 'English' ) ?
598							( \%CAPTURE_REFERENCE_ENGLISH, \%CAPTURE_ARRAY_ENGLISH, \%CAPTURE_HASH_ENGLISH ) :
599							( \%CAPTURE_REFERENCE, \%CAPTURE_ARRAY, \%CAPTURE_HASH );
600							$elem->isa( 'PPI::Token::Magic' )
601							or $capture_ref->{$content}
602							or $capture_array->{$content}
603	0	0	0				or $capture_hash->{$content}
			0
			0
604							or return;
605
606	0	0					if ( $content =~ m/ \A \$ ( \d+ ) /xms ) {
		0
		0
		0
607
608							# Record if we see $1, $2, $3, ...
609	0						my $num = $1;
610	0	0					if (0 < $num) { # don't mark $0
611							# Only mark the captures we really need -- don't mark superfluous magic vars
612	0	0					if ($num <= @{$captures}) {
	0
613	0						_record_numbered_capture( $num, $captures );
614							}
615							}
616							} elsif ( $capture_array->{$content} ) { # GitHub #778
617	0						foreach my $num ( 1 .. @{$captures} ) {
	0
618	0						_record_numbered_capture( $num, $captures );
619							}
620							} elsif ( $capture_hash->{$content} ) {
621	0						foreach my $name ( keys %{$named_captures} ) {
	0
622	0						_record_named_capture( $name, $captures, $named_captures );
623							}
624							} elsif ( $capture_ref->{$content} ) {
625	0						_mark_magic_subscripted_code( $elem, $re, $captures, $named_captures );
626							}
627	0						return;
628							}
629
630							# Record a named capture referenced by a hash or array found in code.
631							# The arguments are:
632							# $elem - The element that represents a subscripted capture variable;
633							# $re - The PPIx::Regexp object;
634							# $captures - A reference to the numbered capture array;
635							# $named_captures - A reference to the named capture hash.
636							sub _mark_magic_subscripted_code {
637	0			0			my ( $elem, $re, $captures, $named_captures ) = @_;
638	0	0					my $subscr = $elem->snext_sibling() or return;
639	0	0					$subscr->isa( 'PPI::Structure::Subscript' ) or return;
640	0						my $subval = $subscr->content();
641	0						_record_subscripted_capture(
642							$elem->content(), $subval, $re, $captures, $named_captures );
643	0						return;
644							}
645
646							# Find capture variables in the content of a double-quotish thing, and
647							# record their use. RT #38942. The arguments are:
648							# $content - The content() ( or heredoc() in the case of a here
649							# document) to be analyzed;
650							# $re - The PPIx::Regexp object;
651							# $captures - A reference to the numbered capture array;
652							# $named_captures - A reference to the named capture hash.
653							sub _mark_magic_in_content {
654	0			0			my ( $content, $re, $captures, $named_captures, $doc ) = @_;
655
656	0	0					my ( $capture_ref, $capture_array ) = $doc->uses_module( 'English' ) ?
657							( \%CAPTURE_REFERENCE_ENGLISH, \%CAPTURE_ARRAY_ENGLISH ) :
658							( \%CAPTURE_REFERENCE, \%CAPTURE_ARRAY );
659
660	0						while ( $content =~ m< ( [\$\@] (?:
661							[{] \^? (?: \w+ \| . ) [}] \| \w+ \| . ) ) >sxmg ) {
662	0						my $name = $1;
663	0	0					$name =~ s/ \A ( [\$\@] ) [{] (?! \^ ) /$1/sxm
664							and $name =~ s/ [}] \z //sxm;
665
666	0	0	0				if ( $name =~ m/ \A \$ ( \d+ ) \z /sxm ) {
		0
		0
667
668	0						my $num = $1;
669							0 < $num
670	0	0	0				and $num <= @{ $captures }
	0
671							and _record_numbered_capture( $num, $captures );
672
673							} elsif ( $capture_array->{$name} ) { # GitHub #778
674	0						foreach my $num ( 1 .. @{$captures} ) {
	0
675	0						_record_numbered_capture( $num, $captures );
676							}
677
678							} elsif ( $capture_ref->{$name} &&
679							$content =~ m/ \G ( [{] [^}]+ [}] \| [[] [^]] []] ) /smxgc )
680							{
681	0						_record_subscripted_capture(
682							$name, $1, $re, $captures, $named_captures );
683
684							}
685							}
686	0						return;
687							}
688
689							# Return true if the given element is double-quotish. Always returns
690							# false for a PPI::Token::HereDoc, since they're a different beast.
691							# RT #38942.
692							sub _is_double_quotish_element {
693	0			0			my ( $elem ) = @_;
694
695	0	0					$elem or return;
696
697	0						my $content = $elem->content();
698
699	0	0					if ( $elem->isa( 'PPI::Token::QuoteLike::Command' ) ) {
700	0						return $content !~ m/ \A qx \s* ' /sxm;
701							}
702
703	0						foreach my $class ( qw{
704							PPI::Token::Quote::Double
705							PPI::Token::Quote::Interpolate
706							PPI::Token::QuoteLike::Backtick
707							PPI::Token::QuoteLike::Readline
708							} ) {
709	0	0					$elem->isa( $class ) and return $TRUE;
710							}
711
712	0						return $FALSE;
713							}
714
715							# Record a subscripted capture, either hash dereference or array
716							# dereference. We assume that an array represents a numbered capture and
717							# a hash represents a named capture, since we have to handle (e.g.) both
718							# @+ and %+.
719							sub _record_subscripted_capture {
720	0			0			my ( $variable_name, $suffix, $re, $captures, $named_captures ) = @_;
721	0	0					if ( $suffix =~ m/ \A [{] ( .*? ) [}] /smx ) {
		0
722	0						( my $name = $1 ) =~ s/ \A ( ["'] ) ( .*? ) \1 \z /$2/smx;
723	0						_record_named_capture( $name, $captures, $named_captures );
724							} elsif ( $suffix =~ m/ \A [[] \s* ( [-+]? \d+ ) \s* []] /smx ) {
725							# GitHub #778
726							# Mostly capture numbers encountered here are 1-based (e.g. @+, @-).
727							# But @{^CAPTURE} is zero-based, so we need to tweak the subscript
728							# before we record the capture number.
729	0						my $num = $1 + 0;
730							$num >= 0
731	0	0	0				and $ZERO_BASED_CAPTURE_REFERENCE{$variable_name}
732							and $num++;
733	0						_record_numbered_capture( $num, $captures, $re );
734							}
735	0						return;
736							}
737
738							# Because a named capture is also one or more numbered captures, the recording
739							# of the use of a named capture seemed complex enough to wrap in a subroutine.
740							sub _record_named_capture {
741	0			0			my ( $name, $captures, $named_captures ) = @_;
742	0	0					defined ( my $numbers = $named_captures->{$name} ) or return;
743	0						foreach my $capnum ( @{ $numbers } ) {
	0
744	0						_record_numbered_capture( $capnum, $captures );
745							}
746	0						$named_captures->{$name} = undef;
747	0						return;
748							}
749
750							sub _record_numbered_capture {
751	0			0			my ( $number, $captures, $re ) = @_;
752	0	0	0				$re and $number < 0
753							and $number = $re->max_capture_number() + $number + 1;
754	0	0					return if $number <= 0;
755	0						$captures->[ $number - 1 ] = 1;
756	0						return;
757							}
758
759							1;
760
761							__END__
762
763							#-----------------------------------------------------------------------------
764
765							=pod
766
767							=for stopwords refactored
768
769							=head1 NAME
770
771							Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture - Only use a capturing group if you plan to use the captured value.
772
773
774							=head1 AFFILIATION
775
776							This Policy is part of the core L<Perl::Critic\|Perl::Critic>
777							distribution.
778
779
780							=head1 DESCRIPTION
781
782							Perl regular expressions have multiple types of grouping syntax. The
783							basic parentheses (e.g. C<m/(foo)/>) captures into the magic variable
784							C<$1>. Non-capturing groups (e.g. C<m/(?:foo)/>) are useful because
785							they have better runtime performance and do not copy strings to the
786							magic global capture variables.
787
788							It's also easier on the maintenance programmer if you consistently use
789							capturing vs. non-capturing groups, because that programmer can tell
790							more easily which regexps can be refactored without breaking
791							surrounding code which may use the captured values.
792
793
794							=head1 CONFIGURATION
795
796							This Policy is not configurable except for the standard options.
797
798
799							=head1 CAVEATS
800
801							=head2 C<qr//> interpolation
802
803							This policy can be confused by interpolation of C<qr//> elements, but
804							those are always false negatives. For example:
805
806							my $foo_re = qr/(foo)/;
807							my ($foo) = m/$foo_re (bar)/x;
808
809							A human can tell that this should be a violation because there are two
810							captures but only the first capture is used, not the second. The
811							policy only notices that there is one capture in the regexp and
812							remains happy.
813
814							=head2 C<@->, C<@+>, C<$LAST_MATCH_START> and C<$LAST_MATCH_END>
815
816							This policy will only recognize capture groups referred to by these
817							variables if the use is subscripted by a literal integer.
818
819							=head2 C<$^N> and C<$LAST_SUBMATCH_RESULT>
820
821							This policy will not recognize capture groups referred to only by these
822							variables, because there is in general no way by static analysis to
823							determine which capture group is referred to. For example,
824
825							m/ (?: (A[[:alpha:]]+) \| (N\d+) ) (?{$foo=$^N}) /smx
826
827							makes use of the first capture group if it matches, or the second
828							capture group if the first does not match but the second does.
829
830							=head2 split()
831
832							Normally, this policy thinks that if a capture is used at all it must be
833							used before the next regular expression in the same scope. The regular
834							expression in a C<split()> needs to be exempted because it does not
835							affect the caller's capture variables.
836
837							At present, this policy recognizes and exempts the regular expressions
838							in
839
840							split /.../, ...
841
842							and
843
844							split( /.../, ... )
845
846							but more exotic syntax may produce false positives.
847
848
849							=head1 CREDITS
850
851							Initial development of this policy was supported by a grant from the
852							Perl Foundation.
853
854
855							=head1 AUTHOR
856
857							Chris Dolan <cdolan@cpan.org>
858
859
860							=head1 COPYRIGHT
861
862							Copyright (c) 2007-2023 Chris Dolan. Many rights reserved.
863
864							This program is free software; you can redistribute it and/or modify
865							it under the same terms as Perl itself. The full text of this license
866							can be found in the LICENSE file included with this module
867
868							=cut
869
870							# Local Variables:
871							# mode: cperl
872							# cperl-indent-level: 4
873							# fill-column: 78
874							# indent-tabs-mode: nil
875							# c-indentation-style: bsd
876							# End:
877							# ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround :