File Coverage

blib/lib/Perl/Critic/Policy/RegularExpressions/ProhibitUnusedCapture.pm

Criterion	Covered	Total	%
statement	27	280	9.6
branch	0	216	0.0
condition	0	71	0.0
subroutine	12	43	27.9
pod	4	5	80.0
total	43	615	6.9

line	stmt	bran	cond	sub	pod	time	code
1							package Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture;
2
3	40			40		30701	use 5.010001;
	40					209
4	40			40		292	use strict;
	40					130
	40					831
5	40			40		253	use warnings;
	40					141
	40					1269
6
7	40			40		330	use List::SomeUtils qw(none);
	40					145
	40					2086
8	40			40		265	use Readonly;
	40					155
	40					1994
9	40			40		368	use Scalar::Util qw(refaddr);
	40					143
	40					2382
10
11	40					2272	use Perl::Critic::Utils qw{
12							:booleans :characters :severities hashify precedence_of
13							split_nodes_on_comma
14	40			40		381	};
	40					163
15	40			40		14582	use parent 'Perl::Critic::Policy';
	40					135
	40					305
16
17							our $VERSION = '1.150';
18
19							#-----------------------------------------------------------------------------
20
21							Readonly::Scalar my $SPLIT => q{split};
22							Readonly::Scalar my $WHILE => q{while};
23
24							Readonly::Hash my %ZERO_BASED_CAPTURE_REFERENCE =>
25							hashify( qw< ${^CAPTURE} > );
26							# TODO: additional logic to prevent ${^CAPTURE_ALL}[n] from being recognized
27							# as a use of capture variable n.
28							Readonly::Hash my %CAPTURE_REFERENCE => (
29							hashify( qw< $+ $- ${^CAPTURE_ALL} > ),
30							%ZERO_BASED_CAPTURE_REFERENCE );
31							Readonly::Hash my %CAPTURE_REFERENCE_ENGLISH => (
32							hashify( qw{ $LAST_PAREN_MATCH $LAST_MATCH_START $LAST_MATCH_END } ),
33							%CAPTURE_REFERENCE );
34							Readonly::Hash my %CAPTURE_ARRAY => hashify( qw< @- @+ @{^CAPTURE} > );
35							Readonly::Hash my %CAPTURE_ARRAY_ENGLISH => (
36							hashify( qw< @LAST_MATCH_START @LAST_MATCH_END > ),
37							%CAPTURE_ARRAY );
38
39							Readonly::Scalar my $DESC => q{Only use a capturing group if you plan to use the captured value};
40							Readonly::Scalar my $EXPL => [252];
41
42							#-----------------------------------------------------------------------------
43
44	89			89	0	1633	sub supported_parameters { return qw() }
45	74			74	1	312	sub default_severity { return $SEVERITY_MEDIUM }
46	86			86	1	357	sub default_themes { return qw( core pbp maintenance ) }
47							sub applies_to {
48	30			30	1	89	return qw< PPI::Token::Regexp::Match PPI::Token::Regexp::Substitute >;
49							}
50
51							#-----------------------------------------------------------------------------
52
53							Readonly::Scalar my $NUM_CAPTURES_FOR_GLOBAL => 100; # arbitrarily large number
54
55							sub violates {
56	0			0	1		my ( $self, $elem, $doc ) = @_;
57
58							# optimization: don't bother parsing the regexp if there are no parens
59	0	0					return if 0 > index $elem->content(), '(';
60
61	0	0					my $re = $doc->ppix_regexp_from_element( $elem ) or return;
62	0	0					$re->failures() and return;
63
64	0	0					my $ncaptures = $re->max_capture_number() or return;
65
66	0						my @captures = ( undef ) x $ncaptures; # List of expected captures
67
68	0						my %named_captures; # List of expected named captures.
69							# Unlike the numbered capture logic, %named_captures
70							# entries are made undefined when a use of the name is
71							# found. Otherwise two hashes would be needed, one to
72							# become defined when a use is found, and one to hold
73							# the mapping of name to number.
74	0	0					foreach my $struct ( @{ $re->find( 'PPIx::Regexp::Structure::NamedCapture'
	0
75							) \|\| [] } ) {
76							# There can be more than one capture with the same name, so we need to
77							# record all of them. There will be duplications if the 'branch reset'
78							# "(?\| ... )" pattern is used, but this is benign given how numbered
79							# captures are recorded.
80	0		0				push @{ $named_captures{ $struct->name() } \|\|= [] }, $struct->number();
	0
81							}
82
83							# Look for references to the capture in the regex itself
84	0	0					return if _enough_uses_in_regexp( $re, \@captures, \%named_captures, $doc );
85
86	0	0	0				if ( $re->modifier_asserted( 'g' )
87							and not _check_if_in_while_condition_or_block( $elem ) ) {
88	0						$ncaptures = $NUM_CAPTURES_FOR_GLOBAL;
89	0						$#captures = $ncaptures - 1;
90							}
91
92	0	0	0				return if _enough_assignments($elem, \@captures) && !%named_captures;
93	0	0	0				return if _is_in_slurpy_array_context($elem) && !%named_captures;
94	0	0					return if _enough_magic($elem, $re, \@captures, \%named_captures, $doc);
95
96	0						return $self->violation( $DESC, $EXPL, $elem );
97							}
98
99							# Find uses of both numbered and named capture variables in the regexp itself.
100							# Return true if all are used.
101							sub _enough_uses_in_regexp {
102	0			0			my ( $re, $captures, $named_captures, $doc ) = @_;
103
104							# Look for references to the capture in the regex itself. Note that this
105							# will also find backreferences in the replacement string of s///.
106	0	0					foreach my $token ( @{ $re->find( 'PPIx::Regexp::Token::Reference' )
	0
107							\|\| [] } ) {
108	0	0					if ( $token->is_named() ) {
109	0						_record_named_capture( $token->name(), $captures, $named_captures );
110							} else {
111	0						_record_numbered_capture( $token->absolute(), $captures );
112							}
113							}
114
115	0	0					foreach my $token ( @{ $re->find(
	0
116							'PPIx::Regexp::Token::Code' ) \|\| [] } ) {
117	0	0					my $ppi = $token->ppi() or next;
118	0						_check_node_children( $ppi, {
119							regexp => $re,
120							numbered_captures => $captures,
121							named_captures => $named_captures,
122							document => $doc,
123							}, _make_regexp_checker() );
124							}
125
126	0			0			return ( none {not defined} @{$captures} )
127							&& ( !%{$named_captures} \|\|
128	0		0	0			none {defined} values %{$named_captures} );
	0
129							}
130
131							sub _enough_assignments {
132	0			0			my ($elem, $captures) = @_;
133
134							# look backward for the assignment operator
135	0						my $psib = $elem->sprevious_sibling;
136							SIBLING:
137	0						while (1) {
138	0	0					return if !$psib;
139	0	0					if ($psib->isa('PPI::Token::Operator')) {
140	0	0					last SIBLING if q{=} eq $psib->content;
141	0	0					return if q{!~} eq $psib->content;
142							}
143	0						$psib = $psib->sprevious_sibling;
144							}
145
146	0						$psib = $psib->sprevious_sibling;
147	0	0					return if !$psib; # syntax error: '=' at the beginning of a statement???
148
149	0	0					if ($psib->isa('PPI::Token::Symbol')) {
		0
		0
150							# @foo = m/(foo)/
151							# @$foo = m/(foo)/
152							# %foo = m/(foo)/
153							# %$foo = m/(foo)/
154	0	0					return $TRUE if _symbol_is_slurpy($psib);
155
156							} elsif ($psib->isa('PPI::Structure::Block')) {
157							# @{$foo} = m/(foo)/
158							# %{$foo} = m/(foo)/
159	0	0					return $TRUE if _is_preceded_by_array_or_hash_cast($psib);
160
161							} elsif ($psib->isa('PPI::Structure::List')) {
162							# () = m/(foo)/
163							# ($foo) = m/(foo)/
164							# ($foo,$bar) = m/(foo)(bar)/
165							# (@foo) = m/(foo)(bar)/
166							# ($foo,@foo) = m/(foo)(bar)/
167							# ($foo,@$foo) = m/(foo)(bar)/
168							# ($foo,@{$foo}) = m/(foo)(bar)/
169
170	0						my @args = $psib->schildren;
171	0	0					return $TRUE if not @args; # empty list (perhaps the "goatse" operator) is slurpy
172
173							# Forward looking: PPI might change in v1.200 so schild(0) is a
174							# PPI::Statement::Expression.
175	0	0	0				if ( 1 == @args && $args[0]->isa('PPI::Statement::Expression') ) {
176	0						@args = $args[0]->schildren;
177							}
178
179	0						my @parts = split_nodes_on_comma(@args);
180							PART:
181	0						for my $i (0 .. $#parts) {
182	0	0					if (1 == @{$parts[$i]}) {
	0
183	0						my $var = $parts[$i]->[0];
184	0	0	0				if ($var->isa('PPI::Token::Symbol') \|\| $var->isa('PPI::Token::Cast')) {
185	0	0					return $TRUE if _has_array_sigil($var);
186							}
187							}
188	0						_record_numbered_capture( $i + 1, $captures );
189							# ith variable capture
190							}
191							}
192
193	0			0			return none {not defined} @{$captures};
	0
	0
194							}
195
196							sub _symbol_is_slurpy {
197	0			0			my ($symbol) = @_;
198
199	0	0					return $TRUE if _has_array_sigil($symbol);
200	0	0					return $TRUE if _has_hash_sigil($symbol);
201	0	0					return $TRUE if _is_preceded_by_array_or_hash_cast($symbol);
202	0						return;
203							}
204
205							sub _has_array_sigil {
206	0			0			my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast
207
208	0						return q{@} eq substr $elem->content, 0, 1;
209							}
210
211							sub _has_hash_sigil {
212	0			0			my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast
213
214	0						return q{%} eq substr $elem->content, 0, 1;
215							}
216
217							sub _is_preceded_by_array_or_hash_cast {
218	0			0			my ($elem) = @_;
219	0						my $psib = $elem->sprevious_sibling;
220	0						my $cast;
221	0		0				while ($psib && $psib->isa('PPI::Token::Cast')) {
222	0						$cast = $psib;
223	0						$psib = $psib->sprevious_sibling;
224							}
225	0	0					return if !$cast;
226	0						my $sigil = substr $cast->content, 0, 1;
227	0		0				return q{@} eq $sigil \|\| q{%} eq $sigil;
228							}
229
230							sub _is_in_slurpy_array_context {
231	0			0			my ($elem) = @_;
232
233							# return true is the result of the regexp is passed to a subroutine.
234							# doesn't check for array context due to assignment.
235
236							# look backward for explicit regex operator
237	0						my $psib = $elem->sprevious_sibling;
238	0	0	0				if ($psib && $psib->content eq q{=~}) {
239							# Track back through value
240	0						$psib = _skip_lhs($psib);
241							}
242
243	0	0					if (!$psib) {
244	0						my $parent = $elem->parent;
245	0	0					return if !$parent;
246	0	0					if ($parent->isa('PPI::Statement')) {
247	0						$parent = $parent->parent;
248	0	0					return if !$parent;
249							}
250
251							# Return true if we have a list that isn't part of a foreach loop.
252							# TECHNICAL DEBT: This code is basically shared with
253							# RequireCheckingReturnValueOfEval. I don't want to put this code
254							# into Perl::Critic::Utils::*, but I don't have time to sort out
255							# PPIx::Utilities::Structure::List yet.
256	0	0					if ( $parent->isa('PPI::Structure::List') ) {
257	0	0					my $parent_statement = $parent->statement() or return $TRUE;
258	0	0					return $TRUE if not
259							$parent_statement->isa('PPI::Statement::Compound');
260	0	0					return $TRUE if $parent_statement->type() ne 'foreach';
261							}
262
263	0	0					return $TRUE if $parent->isa('PPI::Structure::Constructor');
264	0	0					if ($parent->isa('PPI::Structure::Block')) {
265	0	0					return $TRUE
266							if
267							refaddr($elem->statement)
268							eq refaddr([$parent->schildren]->[-1]);
269							}
270	0						return;
271							}
272	0	0					if ($psib->isa('PPI::Token::Operator')) {
273							# Most operators kill slurpiness (except assignment, which is handled elsewhere).
274	0						return q{,} eq $psib->content;
275							}
276	0						return $TRUE;
277							}
278
279							sub _skip_lhs {
280	0			0			my ($elem) = @_;
281
282							# TODO: better implementation to handle casts, expressions, subcalls, etc.
283	0						$elem = $elem->sprevious_sibling();
284
285	0						return $elem;
286							}
287
288							sub _enough_magic {
289	0			0			my ($elem, $re, $captures, $named_captures, $doc) = @_;
290
291	0						_check_for_magic($elem, $re, $captures, $named_captures, $doc);
292
293	0			0			return ( none {not defined} @{$captures} )
294							&& ( !%{$named_captures} \|\|
295	0		0	0			none {defined} values %{$named_captures} );
	0
296							}
297
298							# void return
299							sub _check_for_magic {
300	0			0			my ($elem, $re, $captures, $named_captures, $doc) = @_;
301
302							# Search for $1..$9 in :
303							# * the rest of this statement
304							# * subsequent sibling statements
305							# * if this is in a conditional boolean, the if/else bodies of the conditional
306							# * if this is in a while/for condition, the loop body
307							# But NO intervening regexps!
308
309							# Package up the usual arguments for _check_rest_of_statement().
310	0						my $arg = {
311							regexp => $re,
312							numbered_captures => $captures,
313							named_captures => $named_captures,
314							document => $doc,
315							};
316
317							# Capture whether or not the regular expression is negated -- that
318							# is, whether it is preceded by the '!~' binding operator.
319	0	0					if ( my $prior_token = $elem->sprevious_sibling() ) {
320	0		0				$arg->{negated} = $prior_token->isa( 'PPI::Token::Operator' ) &&
321							q<!~> eq $prior_token->content();
322							}
323
324	0	0					return if ! _check_rest_of_statement( $elem, $arg );
325
326	0						my $parent = $elem->parent();
327	0		0				while ($parent && ! $parent->isa('PPI::Statement::Sub')) {
328	0	0					return if ! _check_rest_of_statement( $parent, $arg );
329	0						$parent = $parent->parent();
330							}
331
332	0						return;
333							}
334
335							# Check if we are in the condition or block of a 'while'
336							sub _check_if_in_while_condition_or_block {
337	0			0			my ( $elem ) = @_;
338	0	0					$elem or return;
339
340	0	0					my $parent = $elem->parent() or return;
341	0	0					$parent->isa( 'PPI::Statement' ) or return;
342
343	0	0					my $item = $parent = $parent->parent() or return;
344	0	0					if ( $item->isa( 'PPI::Structure::Block' ) ) {
345	0	0					$item = $item->sprevious_sibling() or return;
346							}
347	0	0					$item->isa( 'PPI::Structure::Condition' ) or return;
348
349	0	0					$item = $item->sprevious_sibling() or return;
350	0	0					$item->isa( 'PPI::Token::Word' ) or return;
351
352	0						return $WHILE eq $item->content();
353							}
354
355							{
356							# Shortcut operators '\|\|', '//', and 'or' can cause everything after
357							# them to be skipped. 'and' trumps '\|\|' and '//', and causes things
358							# to be evaluated again. The value is true to skip, false to cancel
359							# skipping.
360							Readonly::Hash my %SHORTCUT_OPERATOR => (
361							q<\|\|> => $FALSE,
362							q<//> => $FALSE,
363							and => $TRUE,
364							or => $FALSE,
365							);
366
367							# RT #38942
368							# The issue in the ticket is that in something like
369							# if ( /(a)/ \|\| /(b)/ ) {
370							# say $1
371							# }
372							# the capture variable can come from either /(a)/ or /(b)/. If we
373							# don't take into account the short-cutting nature of the '\|\|' we
374							# erroneously conclude that the capture in /(a)/ is not used. So we
375							# need to skip every regular expression after an alternation.
376							#
377							# The trick is that we want to still mark magic variables, because
378							# of code like
379							# my $foo = $1 \|\| $2;
380							# so we can't just ignore everything after an alternation.
381							#
382							# To do all this correctly, we have to track precedence, and start
383							# paying attention again if an 'and' is found after a '\|\|'.
384
385							# Subroutine _make_regexp_checker() manufactures a snippet of code
386							# which is used to track regular expressions. It takes one optional
387							# argument, which is the snippet used to track the parent object's
388							# regular expressions.
389							#
390							# The snippet is passed each token encountered, and returns true if
391							# the scan for capture variables is to be stopped. This will happen
392							# if the token is a regular expression which is _not_ to the right
393							# of an alternation operator ('\|\|', '//', or 'or'), or it _is_ to
394							# the right of an 'and', without an intervening alternation
395							# operator.
396							#
397							# If _make_regexp_checker() was passed a snippet which
398							# returns false on encountering a regular expression, the returned
399							# snippet always returns false, for the benefit of code like
400							# /(a)/ \|\| ( /(b)/ \|\| /(c)/ ).
401
402							sub _make_regexp_checker {
403	0			0			my ( $parent ) = @_;
404
405							$parent
406							and not $parent->()
407	0	0	0	0			and return sub { return $FALSE };
	0
408
409	0						my $check = $TRUE;
410	0						my $precedence = 0;
411
412							return sub {
413	0			0			my ( $elem ) = @_;
414
415	0	0					$elem or return $check;
416
417	0	0					if ( $elem->isa( 'PPI::Token::Regexp' ) ) {
418	0	0					return _regexp_is_in_split( $elem ) ? $FALSE : $check;
419							}
420
421	0	0	0				if ( $elem->isa( 'PPI::Token::Structure' )
422							&& q<;> eq $elem->content() ) {
423	0						$check = $TRUE;
424	0						$precedence = 0;
425	0						return $FALSE;
426							}
427
428	0	0					$elem->isa( 'PPI::Token::Operator' )
429							or return $FALSE;
430
431	0						my $content = $elem->content();
432	0	0					defined( my $oper_check = $SHORTCUT_OPERATOR{$content} )
433							or return $FALSE;
434
435	0						my $oper_precedence = precedence_of( $content );
436	0	0					$oper_precedence >= $precedence
437							or return $FALSE;
438
439	0						$precedence = $oper_precedence;
440	0						$check = $oper_check;
441
442	0						return $FALSE;
443	0						};
444							}
445							}
446
447							# Argument is regexp.
448							# True if it is the regexp in a split()
449							sub _regexp_is_in_split {
450	0			0			my ( $elem ) = @_;
451
452	0						my $prev;
453	0	0					if ( ! ( $prev = $elem->sprevious_sibling() ) ) {
454							# Maybe we have split( /.../, ... )
455	0	0					my $stmt = $elem->statement()
456							or return $FALSE;
457	0	0					$stmt->parent()
458							or return $FALSE;
459	0	0					$prev = $elem->sprevious_sibling()
460							or return $FALSE;
461							}
462	0		0				return $prev->isa( 'PPI::Token::Word' ) && $SPLIT eq $prev->content();
463							}
464
465
466							# false if we hit another regexp
467							# The arguments are:
468							# $elem - The PPI::Element whose siblings are to be checked;
469							# $arg - A hash reference containing the following keys:
470							# regexp => the relevant PPIx::Regexp object;
471							# numbered_captures => a reference to the array used to track the
472							# use of numbered captures;
473							# named_captures => a reference to the hash used to track the
474							# use of named captures;
475							# negated => true if the regexp was bound to its target with the
476							# '!~' operator;
477							# document => a reference to the Perl::Critic::Document;
478							# Converted to passing the arguments everyone gets in a hash because of
479							# the need to add the 'negated' argument, which would put us at six
480							# arguments.
481							sub _check_rest_of_statement {
482	0			0			my ( $elem, $arg ) = @_;
483
484	0						my $checker = _make_regexp_checker();
485	0						my $nsib = $elem->snext_sibling;
486
487							# If we are an if (or elsif) and the result of the regexp is
488							# negated, we skip the first block found. RT #69867
489	0	0	0				if ( $arg->{negated} && _is_condition_of_if_statement( $elem ) ) {
490	0		0				while ( $nsib && ! $nsib->isa( 'PPI::Structure::Block' ) ) {
491	0						$nsib = $nsib->snext_sibling();
492							}
493	0	0					$nsib and $nsib = $nsib->snext_sibling();
494							}
495
496	0						while ($nsib) {
497	0	0					return if $checker->($nsib);
498	0	0					if ($nsib->isa('PPI::Node')) {
499	0	0					return if ! _check_node_children($nsib, $arg, $checker );
500							} else {
501							_mark_magic( $nsib, $arg->{regexp}, $arg->{numbered_captures},
502	0						$arg->{named_captures}, $arg->{document} );
503							}
504	0						$nsib = $nsib->snext_sibling;
505							}
506	0						return $TRUE;
507							}
508
509							{
510
511							Readonly::Hash my %IS_IF_STATEMENT => hashify( qw{ if elsif } );
512
513							# Return true if the argument is the condition of an if or elsif
514							# statement, otherwise return false.
515							sub _is_condition_of_if_statement {
516	0			0			my ( $elem ) = @_;
517	0	0	0				$elem
518							and $elem->isa( 'PPI::Structure::Condition' )
519							or return $FALSE;
520	0	0					my $psib = $elem->sprevious_sibling()
521							or return $FALSE;
522	0	0					$psib->isa( 'PPI::Token::Word' )
523							or return $FALSE;
524	0						return $IS_IF_STATEMENT{ $psib->content() };
525
526							}
527							}
528
529							# false if we hit another regexp
530							# The arguments are:
531							# $elem - The PPI::Node whose children are to be checked;
532							# $arg - A hash reference containing the following keys:
533							# regexp => the relevant PPIx::Regexp object;
534							# numbered_captures => a reference to the array used to track the
535							# use of numbered captures;
536							# named_captures => a reference to the hash used to track the
537							# use of named captures;
538							# document => a reference to the Perl::Critic::Document;
539							# $parent_checker - The parent's regexp checking code snippet,
540							# manufactured by _make_regexp_checker(). This argument is not in
541							# the $arg hash because that hash is shared among levels of the
542							# parse tree, whereas the regexp checker is not.
543							# TODO the things in the $arg hash are widely shared among the various
544							# pieces/parts of this policy; maybe more subroutines should use this
545							# hash rather than passing all this stuff around as individual
546							# arguments. This particular subroutine got the hash-reference treatment
547							# because Subroutines::ProhibitManyArgs started complaining when the
548							# checker argument was added.
549							sub _check_node_children {
550	0			0			my ($elem, $arg, $parent_checker) = @_;
551
552							# caveat: this will descend into subroutine definitions...
553
554	0						my $checker = _make_regexp_checker($parent_checker);
555	0						for my $child ($elem->schildren) {
556	0	0					return if $checker->($child);
557	0	0					if ($child->isa('PPI::Node')) {
558	0	0					return if ! _check_node_children($child, $arg, $checker);
559							} else {
560							_mark_magic($child, $arg->{regexp},
561							$arg->{numbered_captures}, $arg->{named_captures},
562	0						$arg->{document});
563							}
564							}
565	0						return $TRUE;
566							}
567
568							sub _mark_magic {
569	0			0			my ($elem, $re, $captures, $named_captures, $doc) = @_;
570
571							# If we're a double-quotish element, we need to grub through its
572							# content. RT #38942
573	0	0					if ( _is_double_quotish_element( $elem ) ) {
574	0						_mark_magic_in_content(
575							$elem->content(), $re, $captures, $named_captures, $doc );
576	0						return;
577							}
578
579							# Ditto a here document, though the logic is different. RT #38942
580	0	0					if ( $elem->isa( 'PPI::Token::HereDoc' ) ) {
581	0	0					$elem->content() =~ m/ \A << ~? \s* ' /sxm
582							or _mark_magic_in_content(
583							join( $EMPTY, $elem->heredoc() ), $re, $captures,
584							$named_captures, $doc );
585	0						return;
586							}
587
588							# Only interested in magic, or known English equivalent.
589	0						my $content = $elem->content();
590	0	0					my ( $capture_ref, $capture_array ) = $doc->uses_module( 'English' ) ?
591							( \%CAPTURE_REFERENCE_ENGLISH, \%CAPTURE_ARRAY_ENGLISH ) :
592							( \%CAPTURE_REFERENCE, \%CAPTURE_ARRAY );
593							$elem->isa( 'PPI::Token::Magic' )
594	0	0	0				or $capture_ref->{$content}
595							or return;
596
597	0	0					if ( $content =~ m/ \A \$ ( \d+ ) /xms ) {
		0
		0
598
599							# Record if we see $1, $2, $3, ...
600	0						my $num = $1;
601	0	0					if (0 < $num) { # don't mark $0
602							# Only mark the captures we really need -- don't mark superfluous magic vars
603	0	0					if ($num <= @{$captures}) {
	0
604	0						_record_numbered_capture( $num, $captures );
605							}
606							}
607							} elsif ( $capture_array->{$content} ) { # GitHub #778
608	0						foreach my $num ( 1 .. @{$captures} ) {
	0
609	0						_record_numbered_capture( $num, $captures );
610							}
611							} elsif ( $capture_ref->{$content} ) {
612	0						_mark_magic_subscripted_code( $elem, $re, $captures, $named_captures );
613							}
614	0						return;
615							}
616
617							# Record a named capture referenced by a hash or array found in code.
618							# The arguments are:
619							# $elem - The element that represents a subscripted capture variable;
620							# $re - The PPIx::Regexp object;
621							# $captures - A reference to the numbered capture array;
622							# $named_captures - A reference to the named capture hash.
623							sub _mark_magic_subscripted_code {
624	0			0			my ( $elem, $re, $captures, $named_captures ) = @_;
625	0	0					my $subscr = $elem->snext_sibling() or return;
626	0	0					$subscr->isa( 'PPI::Structure::Subscript' ) or return;
627	0						my $subval = $subscr->content();
628	0						_record_subscripted_capture(
629							$elem->content(), $subval, $re, $captures, $named_captures );
630	0						return;
631							}
632
633							# Find capture variables in the content of a double-quotish thing, and
634							# record their use. RT #38942. The arguments are:
635							# $content - The content() ( or heredoc() in the case of a here
636							# document) to be analyzed;
637							# $re - The PPIx::Regexp object;
638							# $captures - A reference to the numbered capture array;
639							# $named_captures - A reference to the named capture hash.
640							sub _mark_magic_in_content {
641	0			0			my ( $content, $re, $captures, $named_captures, $doc ) = @_;
642
643	0	0					my ( $capture_ref, $capture_array ) = $doc->uses_module( 'English' ) ?
644							( \%CAPTURE_REFERENCE_ENGLISH, \%CAPTURE_ARRAY_ENGLISH ) :
645							( \%CAPTURE_REFERENCE, \%CAPTURE_ARRAY );
646
647	0						while ( $content =~ m< ( [\$\@] (?:
648							[{] \^? (?: \w+ \| . ) [}] \| \w+ \| . ) ) >sxmg ) {
649	0						my $name = $1;
650	0	0					$name =~ s/ \A ( [\$\@] ) [{] (?! \^ ) /$1/sxm
651							and $name =~ s/ [}] \z //sxm;
652
653	0	0	0				if ( $name =~ m/ \A \$ ( \d+ ) \z /sxm ) {
		0
		0
654
655	0						my $num = $1;
656							0 < $num
657	0	0	0				and $num <= @{ $captures }
	0
658							and _record_numbered_capture( $num, $captures );
659
660							} elsif ( $capture_array->{$name} ) { # GitHub #778
661	0						foreach my $num ( 1 .. @{$captures} ) {
	0
662	0						_record_numbered_capture( $num, $captures );
663							}
664
665							} elsif ( $capture_ref->{$name} &&
666							$content =~ m/ \G ( [{] [^}]+ [}] \| [[] [^]] []] ) /smxgc )
667							{
668	0						_record_subscripted_capture(
669							$name, $1, $re, $captures, $named_captures );
670
671							}
672							}
673	0						return;
674							}
675
676							# Return true if the given element is double-quotish. Always returns
677							# false for a PPI::Token::HereDoc, since they're a different beast.
678							# RT #38942.
679							sub _is_double_quotish_element {
680	0			0			my ( $elem ) = @_;
681
682	0	0					$elem or return;
683
684	0						my $content = $elem->content();
685
686	0	0					if ( $elem->isa( 'PPI::Token::QuoteLike::Command' ) ) {
687	0						return $content !~ m/ \A qx \s* ' /sxm;
688							}
689
690	0						foreach my $class ( qw{
691							PPI::Token::Quote::Double
692							PPI::Token::Quote::Interpolate
693							PPI::Token::QuoteLike::Backtick
694							PPI::Token::QuoteLike::Readline
695							} ) {
696	0	0					$elem->isa( $class ) and return $TRUE;
697							}
698
699	0						return $FALSE;
700							}
701
702							# Record a subscripted capture, either hash dereference or array
703							# dereference. We assume that an array represents a numbered capture and
704							# a hash represents a named capture, since we have to handle (e.g.) both
705							# @+ and %+.
706							sub _record_subscripted_capture {
707	0			0			my ( $variable_name, $suffix, $re, $captures, $named_captures ) = @_;
708	0	0					if ( $suffix =~ m/ \A [{] ( .*? ) [}] /smx ) {
		0
709	0						( my $name = $1 ) =~ s/ \A ( ["'] ) ( .*? ) \1 \z /$2/smx;
710	0						_record_named_capture( $name, $captures, $named_captures );
711							} elsif ( $suffix =~ m/ \A [[] \s* ( [-+]? \d+ ) \s* []] /smx ) {
712							# GitHub #778
713							# Mostly capture numbers encountered here are 1-based (e.g. @+, @-).
714							# But @{^CAPTURE} is zero-based, so we need to tweak the subscript
715							# before we record the capture number.
716	0						my $num = $1 + 0;
717							$num >= 0
718	0	0	0				and $ZERO_BASED_CAPTURE_REFERENCE{$variable_name}
719							and $num++;
720	0						_record_numbered_capture( $num, $captures, $re );
721							}
722	0						return;
723							}
724
725							# Because a named capture is also one or more numbered captures, the recording
726							# of the use of a named capture seemed complex enough to wrap in a subroutine.
727							sub _record_named_capture {
728	0			0			my ( $name, $captures, $named_captures ) = @_;
729	0	0					defined ( my $numbers = $named_captures->{$name} ) or return;
730	0						foreach my $capnum ( @{ $numbers } ) {
	0
731	0						_record_numbered_capture( $capnum, $captures );
732							}
733	0						$named_captures->{$name} = undef;
734	0						return;
735							}
736
737							sub _record_numbered_capture {
738	0			0			my ( $number, $captures, $re ) = @_;
739	0	0	0				$re and $number < 0
740							and $number = $re->max_capture_number() + $number + 1;
741	0	0					return if $number <= 0;
742	0						$captures->[ $number - 1 ] = 1;
743	0						return;
744							}
745
746							1;
747
748							__END__
749
750							#-----------------------------------------------------------------------------
751
752							=pod
753
754							=for stopwords refactored
755
756							=head1 NAME
757
758							Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture - Only use a capturing group if you plan to use the captured value.
759
760
761							=head1 AFFILIATION
762
763							This Policy is part of the core L<Perl::Critic\|Perl::Critic>
764							distribution.
765
766
767							=head1 DESCRIPTION
768
769							Perl regular expressions have multiple types of grouping syntax. The
770							basic parentheses (e.g. C<m/(foo)/>) captures into the magic variable
771							C<$1>. Non-capturing groups (e.g. C<m/(?:foo)/>) are useful because
772							they have better runtime performance and do not copy strings to the
773							magic global capture variables.
774
775							It's also easier on the maintenance programmer if you consistently use
776							capturing vs. non-capturing groups, because that programmer can tell
777							more easily which regexps can be refactored without breaking
778							surrounding code which may use the captured values.
779
780
781							=head1 CONFIGURATION
782
783							This Policy is not configurable except for the standard options.
784
785
786							=head1 CAVEATS
787
788							=head2 C<qr//> interpolation
789
790							This policy can be confused by interpolation of C<qr//> elements, but
791							those are always false negatives. For example:
792
793							my $foo_re = qr/(foo)/;
794							my ($foo) = m/$foo_re (bar)/x;
795
796							A human can tell that this should be a violation because there are two
797							captures but only the first capture is used, not the second. The
798							policy only notices that there is one capture in the regexp and
799							remains happy.
800
801							=head2 C<@->, C<@+>, C<$LAST_MATCH_START> and C<$LAST_MATCH_END>
802
803							This policy will only recognize capture groups referred to by these
804							variables if the use is subscripted by a literal integer.
805
806							=head2 C<$^N> and C<$LAST_SUBMATCH_RESULT>
807
808							This policy will not recognize capture groups referred to only by these
809							variables, because there is in general no way by static analysis to
810							determine which capture group is referred to. For example,
811
812							m/ (?: (A[[:alpha:]]+) \| (N\d+) ) (?{$foo=$^N}) /smx
813
814							makes use of the first capture group if it matches, or the second
815							capture group if the first does not match but the second does.
816
817							=head2 split()
818
819							Normally, this policy thinks that if a capture is used at all it must be
820							used before the next regular expression in the same scope. The regular
821							expression in a C<split()> needs to be exempted because it does not
822							affect the caller's capture variables.
823
824							At present, this policy recognizes and exempts the regular expressions
825							in
826
827							split /.../, ...
828
829							and
830
831							split( /.../, ... )
832
833							but more exotic syntax may produce false positives.
834
835
836							=head1 CREDITS
837
838							Initial development of this policy was supported by a grant from the
839							Perl Foundation.
840
841
842							=head1 AUTHOR
843
844							Chris Dolan <cdolan@cpan.org>
845
846
847							=head1 COPYRIGHT
848
849							Copyright (c) 2007-2023 Chris Dolan. Many rights reserved.
850
851							This program is free software; you can redistribute it and/or modify
852							it under the same terms as Perl itself. The full text of this license
853							can be found in the LICENSE file included with this module
854
855							=cut
856
857							# Local Variables:
858							# mode: cperl
859							# cperl-indent-level: 4
860							# fill-column: 78
861							# indent-tabs-mode: nil
862							# c-indentation-style: bsd
863							# End:
864							# ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround :