File Coverage

blib/lib/PPIx/Regexp/Token/Backreference.pm
Criterion Covered Total %
statement 74 80 92.5
branch 37 54 68.5
condition 2 2 100.0
subroutine 13 14 92.8
pod 3 3 100.0
total 129 153 84.3


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             PPIx::Regexp::Token::Backreference - Represent a back reference
4              
5             =head1 SYNOPSIS
6              
7             use PPIx::Regexp::Dumper;
8             PPIx::Regexp::Dumper->new( 'qr{(foo|bar)baz\1}smx' )
9             ->print();
10              
11             =head1 INHERITANCE
12              
13             C<PPIx::Regexp::Token::Backreference> is a
14             L<PPIx::Regexp::Token::Reference|PPIx::Regexp::Token::Reference>.
15              
16             C<PPIx::Regexp::Token::Backreference> has no descendants.
17              
18             =head1 DESCRIPTION
19              
20             This class represents back references of all sorts, both the traditional
21             numbered variety and the Perl 5.010 named kind.
22              
23             =head1 METHODS
24              
25             This class provides no public methods beyond those provided by its
26             superclass.
27              
28             =cut
29              
30             package PPIx::Regexp::Token::Backreference;
31              
32 9     9   49 use strict;
  9         16  
  9         278  
33 9     9   34 use warnings;
  9         12  
  9         351  
34              
35 9     9   33 use base qw{ PPIx::Regexp::Token::Reference };
  9         39  
  9         4184  
36              
37 9     9   50 use Carp qw{ confess };
  9         12  
  9         405  
38 9         753 use PPIx::Regexp::Constant qw{
39             MINIMUM_PERL
40             RE_CAPTURE_NAME
41             TOKEN_LITERAL
42             TOKEN_UNKNOWN
43             @CARP_NOT
44 9     9   35 };
  9         12  
45 9     9   39 use PPIx::Regexp::Util qw{ __to_ordinal_en width };
  9         14  
  9         9164  
46              
47             our $VERSION = '0.091';
48              
49             # Return true if the token can be quantified, and false otherwise
50             # sub can_be_quantified { return };
51              
52             sub explain {
53 3     3 1 9 my ( $self ) = @_;
54 3 100       13 $self->is_named()
55             and return sprintf q<Back reference to capture group '%s'>,
56             $self->name();
57 2 100       12 $self->is_relative()
58             and return sprintf
59             q<Back reference to %s previous capture group (%d in this regexp)>,
60             __to_ordinal_en( - $self->number() ),
61             $self->absolute();
62 1         5 return sprintf q<Back reference to capture group %d>,
63             $self->absolute();
64             }
65              
66             {
67              
68             my %perl_version_introduced = (
69             g => '5.009005', # \g1 \g-1 \g{1} \g{-1}
70             k => '5.009005', # \k<name> \k'name'
71             '?' => '5.009005', # (?P=name) (PCRE/Python)
72             );
73              
74             sub perl_version_introduced {
75 21     21 1 2171 my ( $self ) = @_;
76 21   100     65 return $perl_version_introduced{substr( $self->content(), 1, 1 )} ||
77             MINIMUM_PERL;
78             }
79              
80             }
81              
82             sub raw_width {
83 30     30 1 67 my ( $self ) = @_;
84 30 50       84 my $re = $self->top()
85             or return ( undef, undef ); # Shouldn't happen.
86 30         35 my @capture;
87 30 100       83 if ( $self->is_named() ) {
88 6         14 my $name = $self->name();
89 6 50       8 foreach my $elem ( @{ $re->find(
  6         15  
90             'PPIx::Regexp::Structure::NamedCapture' ) || [] } ) {
91 6 50       17 $elem->name() eq $name
92             or next;
93 6 50       19 $re->__token_post_order( $elem, $self ) < 0
94             or last;
95 6         12 push @capture, $elem;
96             }
97             } else {
98 24         50 my $number = $self->absolute();
99 24 50       33 foreach my $elem ( @{ $re->find(
  24         58  
100             'PPIx::Regexp::Structure::Capture' ) || [] } ) {
101 36 50       95 $elem->number() == $number
102             or next;
103 36 50       82 $re->__token_post_order( $elem, $self ) < 0
104             or last;
105 36         75 push @capture, $elem;
106             }
107             }
108 30 100       88 @capture == 1
109             and return $capture[0]->raw_width();
110 12         32 my ( $base_min, $base_max ) = $capture[0]->raw_width();
111 12         31 foreach my $elem ( @capture[ 1 .. $#capture ] ) {
112 12         25 my ( $ele_min, $ele_max ) = $elem->raw_width();
113 12 50       30 defined $ele_min
114             or $base_min = undef;
115 12 100       35 defined $base_min
    50          
116             and $base_min = $base_min == $ele_min ? $base_min : undef;
117 12 50       26 defined $ele_max
118             or $base_max = undef;
119 12 100       30 defined $base_max
    50          
120             and $base_max = $base_max == $ele_max ? $base_max : undef;
121             }
122 12         32 return ( $base_min, $base_max );
123             }
124              
125             my @external = ( # Recognition used externally
126             [ qr{ \A \( \? P = ( @{[ RE_CAPTURE_NAME ]} ) \) }smxo,
127             { is_named => 1 },
128             ],
129             );
130              
131             my @recognize_regexp = ( # recognition used internally
132             [
133             qr{ \A \\ (?: # numbered (including relative)
134             ( [0-9]+ ) |
135             g (?: ( -? [0-9]+ ) | \{ ( -? [0-9]+ ) \} )
136             )
137             }smx, { is_named => 0 }, ],
138             [
139             qr{ \A \\ (?: # named
140             g [{] ( @{[ RE_CAPTURE_NAME ]} ) [}] |
141             k (?: \< ( @{[ RE_CAPTURE_NAME ]} ) \> | # named with angles
142             ' ( @{[ RE_CAPTURE_NAME ]} ) ' ) # or quotes
143             )
144             }smxo, { is_named => 1 }, ],
145             );
146              
147             my %recognize = (
148             regexp => \@recognize_regexp,
149             repl => [
150             [ qr{ \A \\ ( [0-9]+ ) }smx, { is_named => 0 } ],
151             ],
152             );
153              
154             # This must be implemented by tokens which do not recognize themselves.
155             # The return is a list of list references. Each list reference must
156             # contain a regular expression that recognizes the token, and optionally
157             # a reference to a hash to pass to make_token as the class-specific
158             # arguments. The regular expression MUST be anchored to the beginning of
159             # the string.
160             sub __PPIX_TOKEN__recognize {
161 18 100   18   175 return __PACKAGE__->isa( scalar caller ) ?
162             ( @external, @recognize_regexp ) :
163             ( @external );
164             }
165              
166             sub __PPIX_TOKENIZER__regexp {
167 110     110   226 my ( undef, $tokenizer, $character ) = @_;
168              
169             # PCRE/Python back references are handled in
170             # PPIx::Regexp::Token::Structure, because they are parenthesized.
171              
172             # All the other styles are escaped.
173 110 100       331 $character eq '\\'
174             or return;
175              
176 46         65 foreach ( @{ $recognize{$tokenizer->get_mode()} } ) {
  46         109  
177 58         102 my ( $re, $arg ) = @{ $_ };
  58         98  
178 58 100       145 my $accept = $tokenizer->find_regexp( $re ) or next;
179 39         87 my %arg = ( %{ $arg }, tokenizer => $tokenizer );
  39         168  
180 39         123 return $tokenizer->make_token( $accept, __PACKAGE__, \%arg );
181             }
182              
183 7         13 return;
184             }
185              
186             sub __PPIX_TOKENIZER__repl {
187 13     13   20 my ( undef, $tokenizer ) = @_; # Invocant, $character unused
188              
189 13 50       25 $tokenizer->interpolates()
190             or return;
191              
192 13         25 goto &__PPIX_TOKENIZER__regexp;
193             }
194              
195             # Called by the lexer to disambiguate between captures, literals, and
196             # whatever. We have to return the number of tokens reblessed to
197             # TOKEN_UNKNOWN (i.e. either 0 or 1) because we get called after the
198             # parse is finalized.
199             sub __PPIX_LEXER__rebless {
200 25     25   84 my ( $self, %arg ) = @_;
201              
202             # Handle named back references
203 25 100       77 if ( $self->is_named() ) {
204 8 50       33 $arg{capture_name}{$self->name()}
205             and return 0;
206 0         0 return $self->__error();
207             }
208              
209             # Get the absolute capture group number.
210 17         50 my $absolute = $self->absolute();
211              
212             # If it is zero or negative, we have a relateive reference to a
213             # non-existent capture group.
214 17 50       61 $absolute <= 0
215             and return $self->__error();
216              
217             # If the absolute number is less than or equal to the maximum
218             # capture group number, we are good.
219             $absolute <= $arg{max_capture}
220 17 100       73 and return 0;
221              
222             # It's not a valid capture. If it's an octal literal, rebless it so.
223             # Note that we can't rebless single-digit numbers, since they can't
224             # be octal literals.
225 1         5 my $content = $self->content();
226 1 50       6 if ( $content =~ m/ \A \\ [0-7]{2,} \z /smx ) {
227 1         11 TOKEN_LITERAL->__PPIX_ELEM__rebless( $self );
228 1         4 return 0;
229             }
230              
231             # Anything else is an error.
232 0           return $self->__error();
233             }
234              
235             sub __error {
236 0     0     my ( $self, $msg ) = @_;
237 0 0         defined $msg
238             or $msg = 'No corresponding capture group';
239 0           TOKEN_UNKNOWN->__PPIX_ELEM__rebless( $self, error => $msg );
240 0           return 1;
241             }
242              
243             1;
244              
245             __END__
246              
247             =head1 SUPPORT
248              
249             Support is by the author. Please file bug reports at
250             L<https://rt.cpan.org/Public/Dist/Display.html?Name=PPIx-Regexp>,
251             L<https://github.com/trwyant/perl-PPIx-Regexp/issues>, or in
252             electronic mail to the author.
253              
254             =head1 AUTHOR
255              
256             Thomas R. Wyant, III F<wyant at cpan dot org>
257              
258             =head1 COPYRIGHT AND LICENSE
259              
260             Copyright (C) 2009-2023, 2025 by Thomas R. Wyant, III
261              
262             This program is free software; you can redistribute it and/or modify it
263             under the same terms as Perl 5.10.0. For more details, see the full text
264             of the licenses in the directory LICENSES.
265              
266             This program is distributed in the hope that it will be useful, but
267             without any warranty; without even the implied warranty of
268             merchantability or fitness for a particular purpose.
269              
270             =cut
271              
272             # ex: set textwidth=72 :