File Coverage

blib/lib/PPIx/Regexp/Token/Control.pm
Criterion Covered Total %
statement 46 47 97.8
branch 18 20 90.0
condition 3 5 60.0
subroutine 12 12 100.0
pod 1 1 100.0
total 80 85 94.1


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             PPIx::Regexp::Token::Control - Case and quote control.
4              
5             =head1 SYNOPSIS
6              
7             use PPIx::Regexp::Dumper;
8             PPIx::Regexp::Dumper->new( 'qr{\Ufoo\E}smx' )
9             ->print();
10              
11             =head1 INHERITANCE
12              
13             C is a
14             L.
15              
16             C has no descendants.
17              
18             =head1 DESCRIPTION
19              
20             This class represents the case and quote controls. These apply when the
21             regular expression is compiled, changing the actual expression
22             generated. For example
23              
24             print qr{\Ufoo\E}, "\n"
25              
26             prints
27              
28             (?-xism:FOO)
29              
30             =head1 METHODS
31              
32             This class provides no public methods beyond those provided by its
33             superclass.
34              
35             =cut
36              
37             package PPIx::Regexp::Token::Control;
38              
39 9     9   65 use strict;
  9         17  
  9         258  
40 9     9   45 use warnings;
  9         21  
  9         239  
41              
42 9     9   43 use base qw{ PPIx::Regexp::Token };
  9         20  
  9         777  
43              
44 9         2737 use PPIx::Regexp::Constant qw{
45             COOKIE_QUOTE
46             MINIMUM_PERL
47             TOKEN_LITERAL
48             TOKEN_UNKNOWN
49             @CARP_NOT
50 9     9   58 };
  9         27  
51              
52             our $VERSION = '0.087_01';
53              
54             # Return true if the token can be quantified, and false otherwise
55             # sub can_be_quantified { return };
56              
57             {
58              
59             my %explanation = (
60             '\\E' => 'End of interpolation control',
61             '\\F' => 'Fold case until \\E',
62             '\\L' => 'Lowercase until \\E',
63             '\\Q' => 'Quote metacharacters until \\E',
64             '\\U' => 'Uppercase until \\E',
65             '\\l' => 'Lowercase next character',
66             '\\u' => 'Uppercase next character',
67             );
68              
69             sub __explanation {
70 7     7   17 return \%explanation;
71             }
72              
73             }
74              
75             {
76             my %version_introduced = (
77             '\\F' => '5.015008',
78             );
79              
80             sub perl_version_introduced {
81 7     7 1 2308 my ( $self ) = @_;
82 7         26 my $content = $self->content();
83             defined $version_introduced{$content}
84 7 100       24 and return $version_introduced{$content};
85 6         13 return MINIMUM_PERL;
86             }
87             }
88              
89             my %is_control = map { $_ => 1 } qw{ l u L U Q E F };
90              
91             my %cookie_slot = (
92             Q => 'quote',
93             E => 'end',
94             U => 'case',
95             L => 'case',
96             F => 'case',
97             );
98              
99 9     9   77 use constant CONTROL_MASK_QUOTE => 1 << 1;
  9         28  
  9         4970  
100              
101             my %cookie_mask = (
102             case => 1 << 0,
103             end => 0, # must be 0.
104             quote => CONTROL_MASK_QUOTE,
105             );
106              
107             sub __PPIX_TOKENIZER__regexp {
108 1342     1342   2986 my ( undef, $tokenizer, $character ) = @_;
109              
110             # If we are inside a quote sequence, we want to make literals out of
111             # all the characters we reject; otherwise we just want to return
112             # nothing.
113 1342   66     3451 my $in_quote = $tokenizer->cookie( COOKIE_QUOTE ) || do {
114 1299     1299   3291 my @stack = ( { mask => 0, reject => sub { return; } } );
115 4278     4278   11819 $tokenizer->cookie( COOKIE_QUOTE, sub { return \@stack } );
116             };
117 1342         3117 my $cookie_stack = $in_quote->( $tokenizer );
118 1342         2623 my $reject = $cookie_stack->[-1]{reject};
119              
120             # We are not interested in anything that is not escaped.
121 1342 100       3828 $character eq '\\' or return $reject->( 1 );
122              
123             # We need to see what the next character is to figure out what to
124             # do. If there is no next character, we do not know what to call the
125             # back slash.
126 227 50       824 my $control = $tokenizer->peek( 1 )
127             or return $reject->( 1, TOKEN_UNKNOWN, {
128             error => 'Trailing back slash'
129             },
130             );
131              
132             # We reject any escapes that do not represent controls.
133 227 100       859 $is_control{$control} or return $reject->( 2 );
134              
135             # Anything left gets made into a token now, to avoid its processing
136             # by the cookie we may make.
137 36         104 my $token = $tokenizer->make_token( 2 );
138              
139             # \U, \L, and \F supersede each other, but they stack with \Q. So we
140             # need to track that behavior, so that we know what to do when we
141             # hit a \E.
142             # TODO if we wanted we could actually track which (if any) of \U, \L
143             # and \F is in effect, and make that an attribute of any literals
144             # made.
145 36 100       140 if ( my $slot = $cookie_slot{$control} ) {
146 30 100       103 if ( my $mask = $cookie_mask{$slot} ) {
147             # We need another stack entry only if the current slot
148             # ('case' or 'quote') is not occupied
149 20 100       73 unless ( $mask & $cookie_stack->[-1]{mask} ) {
150             # Clone the previous entry.
151 16         30 push @{ $cookie_stack }, { %{ $cookie_stack->[-1] } };
  16         34  
  16         61  
152             # Set the mask to show this slot is occupied
153 16         56 $cookie_stack->[-1]{mask} |= $mask;
154             # Code to call when this tokenizer rejects a token
155             $cookie_stack->[-1]{reject} =
156             ( $mask & CONTROL_MASK_QUOTE ) ?
157             sub {
158 7     7   50 my ( $size, $class ) = @_;
159 7   50     62 return $tokenizer->make_token(
160             $size, $class || TOKEN_LITERAL );
161 16 100       82 } : $cookie_stack->[0]{reject};
162             }
163             # TODO if I want to try to track what controls are in effect
164             # where.
165             # Record the specific content of the current slot
166             # $cookie_stack->[-1]{$slot} = $control;
167             } else {
168             # \E - pop data, but don't leave empty.
169 10         44 @{ $cookie_stack } > 1
170 10 100       38 and pop @{ $cookie_stack };
  9         20  
171             }
172             }
173              
174             # Return our token.
175 36         159 return $token;
176             }
177              
178             sub __PPIX_TOKENIZER__repl {
179 13     13   47 my ( undef, $tokenizer ) = @_; # Invocant, $character unused
180              
181 13 50       49 $tokenizer->interpolates() and goto &__PPIX_TOKENIZER__regexp;
182              
183 0           return;
184             }
185              
186             1;
187              
188             __END__