File Coverage

blib/lib/PPIx/Regexp/Token/Interpolation.pm
Criterion Covered Total %
statement 105 111 94.5
branch 62 90 68.8
condition 13 24 54.1
subroutine 17 18 94.4
pod 2 2 100.0
total 199 245 81.2


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             PPIx::Regexp::Token::Interpolation - Represent an interpolation in the PPIx::Regexp package.
4              
5             =head1 SYNOPSIS
6              
7             use PPIx::Regexp::Dumper;
8             PPIx::Regexp::Dumper->new('qr{$foo}smx')->print();
9              
10             =head1 INHERITANCE
11              
12             C is a
13             L.
14              
15             C has no descendants.
16              
17             =head1 DESCRIPTION
18              
19             This class represents a variable interpolation into a regular
20             expression. In the L the C<$foo> would be represented by an
21             object of this class.
22              
23             =head2 Incompatible Change: Parse of '@{[ ... ]}'
24              
25             In versions 0.083 and earlier, C<'@{[ ... ]}'> parsed as a member of
26             this class. In 0.084 and later, it parses as a member of
27             C. This more accurately reflects the actual
28             contents of the token, and enables the recognition of the full range of
29             postfix dereference operators for versioning purposes, not just those
30             valid in interpolations.
31              
32             =head1 METHODS
33              
34             This class provides the following public methods beyond those provided
35             by its superclass.
36              
37             =cut
38              
39             package PPIx::Regexp::Token::Interpolation;
40              
41 9     9   43 use strict;
  9         13  
  9         247  
42 9     9   34 use warnings;
  9         14  
  9         356  
43              
44 9     9   59 use base qw{ PPIx::Regexp::Token::Code };
  9         14  
  9         754  
45              
46 9     9   54 use Carp qw{ confess };
  9         15  
  9         496  
47 9         904 use PPIx::Regexp::Constant qw{
48             COOKIE_CLASS
49             COOKIE_REGEX_SET
50             MINIMUM_PERL
51             TOKEN_LITERAL
52             @CARP_NOT
53 9     9   41 };
  9         18  
54              
55             our $VERSION = '0.091_01';
56              
57 9     9   48 use constant VERSION_WHEN_IN_REGEX_SET => '5.017009';
  9         18  
  9         9608  
58              
59             sub __new {
60 105     105   8654 my ( $class, $content, %arg ) = @_;
61              
62             defined $arg{perl_version_introduced}
63 105 50       350 or $arg{perl_version_introduced} = MINIMUM_PERL;
64              
65 105         513 my $self = $class->SUPER::__new( $content, %arg );
66              
67 105         357 return $self;
68             }
69              
70             =head2 is_matcher
71              
72             This method returns C because a static analysis can not in
73             general tell whether a piece of code matches anything.
74              
75             =cut
76              
77 0     0 1 0 sub is_matcher { return undef; } ## no critic (ProhibitExplicitReturnUndef)
78              
79             # Return true if the token can be quantified, and false otherwise
80             # This can be quantified because it might interpolate a quantifiable
81             # token. Of course, it might not, but we need to be permissive here.
82             # sub can_be_quantified { return };
83              
84             # We overrode this in PPIx::Regexp::Token::Code, since (?{...}) did not
85             # appear until Perl 5.5. But interpolation has been there since the
86             # beginning, so we have to override again. This turns out to be OK,
87             # though, because while Regex Sets were introduced in 5.17.8,
88             # interpolation inside them was not introduced until 5.17.9.
89             sub perl_version_introduced {
90 17     17 1 1448 my ( $self ) = @_;
91 17         41 return $self->{perl_version_introduced};
92             }
93              
94             # Normalize the content of an interpolation object before making it into
95             # a PPI document. The issue here is that things like ${x} are at least
96             # warnings outside strings, but are normal inside them.
97             sub __ppi_normalize_content {
98 115     115   180 my ( $self ) = @_;
99 115         137 my $content;
100 115 50       249 defined( $content = $self->content() )
101             or return $content;
102             # NOTE: perldata gives a regexp for this, but it requires Perl 5.10.
103             # I believe the following caputures the intent, except possibly for
104             # various weird combinations of '::' and "'".
105 115         541 $content =~
106             s/ \A
107             ( \$ \# \$* | [\@\$] \$* ) # Sigil and possible casts
108             [{] \s* (?: :: )* '? # per perldata
109             ( ^? (?: \w+ (?: (?: :: | ' ) \w+ )* (?: :: )? | [[:punct:]] ) )
110             \s* [}] \z
111             /$1$2/smx;
112 115         374 return $content;
113             }
114              
115             # Match the beginning of an interpolation.
116              
117             my $interp_re =
118             qr{ \A (?= [\@\$]? \$ [-\w&`'+^./\\";%=~:?!\@\$<>\[\]\{\},#] |
119             \@ [\w\{] )
120             }smx;
121              
122             # Match bracketed interpolation
123              
124             my $brkt_interp_re =
125             qr{ \A (?: [\@\$] \$* [#]? \$* [\{] (?: [][\-&`'+,^./\\";%=:?\@\$<>,#] |
126             \^? \w+ (?: :: \w+ )* ) [\}] |
127             \@ [\{] \w+ (?: :: \w+ )* [\}] )
128             }smx;
129              
130             # We pull out the logic of finding and dealing with the interpolation
131             # into a separate subroutine because if we fail to find an interpolation
132             # we want to do something with the sigils.
133              
134             my %allow_subscript_based_on_cast_symbol = (
135             q<$#> => 0,
136             q<$> => 1,
137             q<@> => 1,
138             );
139              
140             sub _interpolation {
141 110     110   243 my ( $class, $tokenizer, undef, $in_regexp ) = @_; # $character unused
142              
143             # If the regexp does not interpolate, bail now.
144 110 100       254 $tokenizer->interpolates() or return;
145              
146             # If we're a bracketed interpolation, just accept it
147 108 100       278 if ( my $len = $tokenizer->find_regexp( $brkt_interp_re ) ) {
148 8         34 return $len;
149             }
150              
151             # Make sure we start off plausibly
152 100 100       201 defined $tokenizer->find_regexp( $interp_re )
153             or return;
154              
155             # See if PPI can figure out what we have
156 83 50       292 my $doc = $tokenizer->ppi_document()
157             or return;
158              
159             # Get the first statement to work on.
160 83 50       102913 my $stmt = $doc->find_first( 'PPI::Statement' )
161             or return;
162              
163 83         13981 my @accum; # The elements of the interpolation
164             my $allow_subscript; # Assume no subscripts allowed
165 83         156 my $want_class = __PACKAGE__; # Assume we want an interpolation.
166              
167             # Find the beginning of the interpolation
168 83 50       216 my $next = $stmt->schild( 0 ) or return;
169              
170             # The interpolation should start with
171 83 100       1268 if ( $next->isa( 'PPI::Token::Symbol' ) ) {
    100          
    50          
172              
173             # A symbol
174 73         128 push @accum, $next;
175 73         127 $allow_subscript = 1; # Subscripts are allowed
176              
177             } elsif ( $next->isa( 'PPI::Token::Cast' ) ) {
178              
179             # Or a cast followed by a block
180 8         17 push @accum, $next;
181 8 50       32 $next = $next->next_sibling() or return;
182 8 100       304 if ( $next->isa( 'PPI::Token::Symbol' ) ) {
    50          
183             defined (
184             $allow_subscript =
185             $allow_subscript_based_on_cast_symbol{
186 6 50       22 $accum[-1]->content()
187             }
188             ) or return;
189 6         34 push @accum, $next;
190             } elsif ( $next->isa( 'PPI::Structure::Block' ) ) {
191             # We want @{[ ... ]} to parse as a PPIx::Regexp::Token::Code.
192             # PPI parses this as a cast followed by a block. The block
193             # contains a single statement, which contains a single
194             # constructor. So:
195 2         12 my @kids = $next->schildren();
196 2 50 33     30 if ( @kids == 1 && $kids[0]->isa( 'PPI::Statement' ) ) {
197 2         8 @kids = $kids[0]->schildren();
198 2 50 33     23 if ( @kids == 1 &&
      33        
199             $kids[0]->isa( 'PPI::Structure::Constructor' ) &&
200             $kids[0]->start() eq '[' ) {
201 2         36 $want_class = 'PPIx::Regexp::Token::Code';
202             }
203             }
204 2         5 push @accum, $next;
205             } else {
206 0         0 return;
207             }
208              
209             } elsif ( $next->isa( 'PPI::Token::ArrayIndex' ) ) {
210              
211             # Or an array index
212 2         5 push @accum, $next;
213              
214             } else {
215              
216             # None others need apply.
217 0         0 return;
218              
219             }
220              
221             # The interpolation _may_ be subscripted. If so ...
222             {
223              
224             # Only accept a subscript if wanted and available
225 83 100 100     128 $allow_subscript and $next = $next->snext_sibling() or last;
  106         394  
226              
227             # Accept an optional dereference operator.
228 63         1760 my @subscr;
229 63 100       251 if ( $next->isa( 'PPI::Token::Operator' ) ) {
230 18 100       75 $next->content() eq '->' or last;
231 4         18 push @subscr, $next;
232 4 50       14 $next = $next->next_sibling() or last;
233              
234             # postderef was introduced in 5.19.5, per perl5195delta.
235 4 50       78 if ( my $deref = $tokenizer->__recognize_postderef(
236             __PACKAGE__, $next ) ) {
237 4         63 push @accum, @subscr, $deref;
238 4         7 last;
239             }
240             }
241              
242             # Accept only a subscript
243 45 100       111 $next->isa( 'PPI::Structure::Subscript' ) or last;
244              
245             # The subscript must have a closing delimiter.
246 25 50       74 $next->finish() or last;
247              
248             # If we are in a regular expression rather than a replacement
249             # string, screen the subscript for content, since [] could be a
250             # character class, and {} could be a quantifier. The perlop docs
251             # say that Perl applies undocumented heuristics subject to
252             # change without notice to figure this out. So we do our poor
253             # best to be heuristical and undocumented.
254 25 100 100     148 not $in_regexp or $class->_subscript( $next ) or last;
255              
256             # If we got this far, accept the subscript and try for another
257             # one.
258 23         65 push @accum, @subscr, $next;
259 23         30 redo;
260             }
261              
262             # Compute the length of all the PPI elements accumulated, and return
263             # it.
264 83         968 my $length = 0;
265 83         161 foreach ( @accum ) {
266 122 100       466 $length += ref $_ ? length $_->content() : $_;
267             }
268 83         970 return ( $length, $want_class );
269             }
270              
271             {
272 9     9   64 no warnings qw{ qw }; ## no critic (ProhibitNoWarnings)
  9         13  
  9         4625  
273              
274             my %accept = map { $_ => 1 } qw{ $ $# @ };
275              
276             sub __postderef_accept_cast {
277 108     108   230 return \%accept;
278             }
279             }
280              
281             {
282              
283             my %allowed = (
284             '[' => '_square',
285             '{' => '_curly',
286             );
287              
288             sub _subscript {
289 21     21   37 my ( $class, $struct ) = @_;
290              
291             # We expect to have a left delimiter, which is either a '[' or a
292             # '{'.
293 21 50       41 my $left = $struct->start() or return;
294 21         91 my $lc = $left->content();
295 21 50       89 my $handler = $allowed{$lc} or return;
296              
297             # We expect a single child, which is a PPI::Statement
298 21 50       47 ( my @kids = $struct->schildren() ) == 1 or return;
299 21 50       252 $kids[0]->isa( 'PPI::Statement' ) or return;
300              
301             # We expect the statement to have at least one child.
302 21 50       45 ( @kids = $kids[0]->schildren() ) or return;
303              
304 21         171 return $class->$handler( @kids );
305              
306             }
307              
308             }
309              
310             # Return true if we think a curly-bracketed subscript is really a
311             # subscript, rather than a quantifier.
312             # Called as $class->$handler( ... ) above
313             sub _curly { ## no critic (ProhibitUnusedPrivateSubroutines)
314 17     17   42 my ( undef, @kids ) = @_; # Invocant unused
315              
316             # If the first child is a word, and either it is an only child or
317             # the next child is the fat comma operator, we accept it as a
318             # subscript.
319 17 100       68 if ( $kids[0]->isa( 'PPI::Token::Word' ) ) {
320 14 100       52 @kids == 1 and return 1;
321 4 50 33     15 $kids[1]->isa( 'PPI::Token::Operator' )
322             and $kids[1]->content() eq '=>'
323             and return 1;
324             }
325              
326             # If the first child is a symbol,
327 3 100 66     26 if ( @kids && $kids[0]->isa( 'PPI::Token::Symbol' ) ) {
328             # Accept it if it is the only child
329 1 50       3 @kids == 1
330             and return 1;
331             # Accept it if there are exactly two children and the second is
332             # a subscript.
333 1 50 33     8 @kids == 2
334             and $kids[1]->isa( 'PPI::Structure::Subscript' )
335             and return 1;
336             }
337              
338             # We reject anything else.
339 2         9 return;
340             }
341              
342             # Return true if we think a square-bracketed subscript is really a
343             # subscript, rather than a character class.
344             # Called as $class->$handler( ... ) above
345             sub _square { ## no critic (ProhibitUnusedPrivateSubroutines)
346 4     4   10 my ( undef, @kids ) = @_; # Invocant unused
347              
348             # We expect to have either a number or a symbol as the first
349             # element.
350 4 50       21 $kids[0]->isa( 'PPI::Token::Number' ) and return 1;
351 0 0       0 $kids[0]->isa( 'PPI::Token::Symbol' ) and return 1;
352              
353             # Anything else is rejected.
354 0         0 return;
355             }
356              
357             # Alternate classes for the sigils, depending on whether we are in a
358             # character class (index 1) or not (index 0).
359             my %sigil_alternate = (
360             '$' => [ 'PPIx::Regexp::Token::Assertion', TOKEN_LITERAL ],
361             '@' => [ TOKEN_LITERAL, TOKEN_LITERAL ],
362             );
363              
364             sub __PPIX_TOKENIZER__regexp {
365 1434     1434   2312 my ( $class, $tokenizer, $character ) = @_;
366              
367 1434 100       3648 exists $sigil_alternate{$character} or return;
368              
369 105 100       333 if ( my ( $accept, $want_class ) =
370             $class->_interpolation( $tokenizer, $character, 1 )
371             ) {
372 86         6104 return $tokenizer->make_token( $accept, $want_class );
373             }
374              
375 19 50       55 my $alternate = $sigil_alternate{$character} or return;
376 19 50       68 return $tokenizer->make_token(
377             1, $alternate->[$tokenizer->cookie( COOKIE_CLASS ) ? 1 : 0 ] );
378              
379             }
380              
381             sub __PPIX_TOKENIZER__repl {
382 18     18   41 my ( $class, $tokenizer, $character ) = @_;
383              
384 18 100       64 exists $sigil_alternate{$character} or return;
385              
386 5 50       16 if ( my ( $accept, $want_class ) =
387             $class->_interpolation( $tokenizer, $character, 0 ) ) {
388 5         303 return $tokenizer->make_token( $accept, $want_class );
389             }
390              
391 0           return $tokenizer->make_token( 1, TOKEN_LITERAL );
392              
393             }
394              
395             1;
396              
397             __END__