File Coverage

blib/lib/PPIx/Regexp/Token/Control.pm
Criterion Covered Total %
statement 46 47 97.8
branch 18 20 90.0
condition 3 5 60.0
subroutine 12 12 100.0
pod 1 1 100.0
total 80 85 94.1


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             PPIx::Regexp::Token::Control - Case and quote control.
4              
5             =head1 SYNOPSIS
6              
7             use PPIx::Regexp::Dumper;
8             PPIx::Regexp::Dumper->new( 'qr{\Ufoo\E}smx' )
9             ->print();
10              
11             =head1 INHERITANCE
12              
13             C<PPIx::Regexp::Token::Control> is a
14             L<PPIx::Regexp::Token|PPIx::Regexp::Token>.
15              
16             C<PPIx::Regexp::Token::Control> has no descendants.
17              
18             =head1 DESCRIPTION
19              
20             This class represents the case and quote controls. These apply when the
21             regular expression is compiled, changing the actual expression
22             generated. For example
23              
24             print qr{\Ufoo\E}, "\n"
25              
26             prints
27              
28             (?-xism:FOO)
29              
30             =head1 METHODS
31              
32             This class provides no public methods beyond those provided by its
33             superclass.
34              
35             =cut
36              
37             package PPIx::Regexp::Token::Control;
38              
39 9     9   45 use strict;
  9         13  
  9         261  
40 9     9   33 use warnings;
  9         12  
  9         366  
41              
42 9     9   36 use base qw{ PPIx::Regexp::Token };
  9         11  
  9         636  
43              
44 9         2152 use PPIx::Regexp::Constant qw{
45             COOKIE_QUOTE
46             MINIMUM_PERL
47             TOKEN_LITERAL
48             TOKEN_UNKNOWN
49             @CARP_NOT
50 9     9   36 };
  9         37  
51              
52             our $VERSION = '0.091';
53              
54             # Return true if the token can be quantified, and false otherwise
55             # sub can_be_quantified { return };
56              
57             {
58              
59             my %explanation = (
60             '\\E' => 'End of interpolation control',
61             '\\F' => 'Fold case until \\E',
62             '\\L' => 'Lowercase until \\E',
63             '\\Q' => 'Quote metacharacters until \\E',
64             '\\U' => 'Uppercase until \\E',
65             '\\l' => 'Lowercase next character',
66             '\\u' => 'Uppercase next character',
67             );
68              
69             sub __explanation {
70 7     7   15 return \%explanation;
71             }
72              
73             }
74              
75             {
76             my %version_introduced = (
77             '\\F' => '5.015008',
78             );
79              
80             sub perl_version_introduced {
81 7     7 1 1840 my ( $self ) = @_;
82 7         25 my $content = $self->content();
83             defined $version_introduced{$content}
84 7 100       20 and return $version_introduced{$content};
85 6         10 return MINIMUM_PERL;
86             }
87             }
88              
89             my %is_control = map { $_ => 1 } qw{ l u L U Q E F };
90              
91             my %cookie_slot = (
92             Q => 'quote',
93             E => 'end',
94             U => 'case',
95             L => 'case',
96             F => 'case',
97             );
98              
99 9     9   46 use constant CONTROL_MASK_QUOTE => 1 << 1;
  9         15  
  9         3599  
100              
101             my %cookie_mask = (
102             case => 1 << 0,
103             end => 0, # must be 0.
104             quote => CONTROL_MASK_QUOTE,
105             );
106              
107             sub __PPIX_TOKENIZER__regexp {
108 1342     1342   2148 my ( undef, $tokenizer, $character ) = @_;
109              
110             # If we are inside a quote sequence, we want to make literals out of
111             # all the characters we reject; otherwise we just want to return
112             # nothing.
113 1342   66     2271 my $in_quote = $tokenizer->cookie( COOKIE_QUOTE ) || do {
114 1299     1299   2268 my @stack = ( { mask => 0, reject => sub { return; } } );
115 4278     4278   8350 $tokenizer->cookie( COOKIE_QUOTE, sub { return \@stack } );
116             };
117 1342         2314 my $cookie_stack = $in_quote->( $tokenizer );
118 1342         2112 my $reject = $cookie_stack->[-1]{reject};
119              
120             # We are not interested in anything that is not escaped.
121 1342 100       3058 $character eq '\\' or return $reject->( 1 );
122              
123             # We need to see what the next character is to figure out what to
124             # do. If there is no next character, we do not know what to call the
125             # back slash.
126 227 50       592 my $control = $tokenizer->peek( 1 )
127             or return $reject->( 1, TOKEN_UNKNOWN, {
128             error => 'Trailing back slash'
129             },
130             );
131              
132             # We reject any escapes that do not represent controls.
133 227 100       657 $is_control{$control} or return $reject->( 2 );
134              
135             # Anything left gets made into a token now, to avoid its processing
136             # by the cookie we may make.
137 36         114 my $token = $tokenizer->make_token( 2 );
138              
139             # \U, \L, and \F supersede each other, but they stack with \Q. So we
140             # need to track that behavior, so that we know what to do when we
141             # hit a \E.
142             # TODO if we wanted we could actually track which (if any) of \U, \L
143             # and \F is in effect, and make that an attribute of any literals
144             # made.
145 36 100       98 if ( my $slot = $cookie_slot{$control} ) {
146 30 100       76 if ( my $mask = $cookie_mask{$slot} ) {
147             # We need another stack entry only if the current slot
148             # ('case' or 'quote') is not occupied
149 20 100       59 unless ( $mask & $cookie_stack->[-1]{mask} ) {
150             # Clone the previous entry.
151 16         26 push @{ $cookie_stack }, { %{ $cookie_stack->[-1] } };
  16         27  
  16         48  
152             # Set the mask to show this slot is occupied
153 16         36 $cookie_stack->[-1]{mask} |= $mask;
154             # Code to call when this tokenizer rejects a token
155             $cookie_stack->[-1]{reject} =
156             ( $mask & CONTROL_MASK_QUOTE ) ?
157             sub {
158 7     7   16 my ( $size, $class ) = @_;
159 7   50     32 return $tokenizer->make_token(
160             $size, $class || TOKEN_LITERAL );
161 16 100       88 } : $cookie_stack->[0]{reject};
162             }
163             # TODO if I want to try to track what controls are in effect
164             # where.
165             # Record the specific content of the current slot
166             # $cookie_stack->[-1]{$slot} = $control;
167             } else {
168             # \E - pop data, but don't leave empty.
169 10         29 @{ $cookie_stack } > 1
170 10 100       15 and pop @{ $cookie_stack };
  9         16  
171             }
172             }
173              
174             # Return our token.
175 36         100 return $token;
176             }
177              
178             sub __PPIX_TOKENIZER__repl {
179 13     13   21 my ( undef, $tokenizer ) = @_; # Invocant, $character unused
180              
181 13 50       33 $tokenizer->interpolates() and goto &__PPIX_TOKENIZER__regexp;
182              
183 0           return;
184             }
185              
186             1;
187              
188             __END__
189              
190             =head1 SUPPORT
191              
192             Support is by the author. Please file bug reports at
193             L<https://rt.cpan.org/Public/Dist/Display.html?Name=PPIx-Regexp>,
194             L<https://github.com/trwyant/perl-PPIx-Regexp/issues>, or in
195             electronic mail to the author.
196              
197             =head1 AUTHOR
198              
199             Thomas R. Wyant, III F<wyant at cpan dot org>
200              
201             =head1 COPYRIGHT AND LICENSE
202              
203             Copyright (C) 2009-2023, 2025 by Thomas R. Wyant, III
204              
205             This program is free software; you can redistribute it and/or modify it
206             under the same terms as Perl 5.10.0. For more details, see the full text
207             of the licenses in the directory LICENSES.
208              
209             This program is distributed in the hope that it will be useful, but
210             without any warranty; without even the implied warranty of
211             merchantability or fitness for a particular purpose.
212              
213             =cut
214              
215             # ex: set textwidth=72 :