File Coverage

blib/lib/PPIx/Regexp/Token/Assertion.pm
Criterion Covered Total %
statement 45 47 95.7
branch 20 24 83.3
condition 10 14 71.4
subroutine 12 13 92.3
pod 3 3 100.0
total 90 101 89.1


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             PPIx::Regexp::Token::Assertion - Represent a simple assertion.
4              
5             =head1 SYNOPSIS
6              
7             use PPIx::Regexp::Dumper;
8             PPIx::Regexp::Dumper->new( 'qr{\bfoo\b}smx' )
9             ->print();
10              
11             =head1 INHERITANCE
12              
13             C<PPIx::Regexp::Token::Assertion> is a
14             L<PPIx::Regexp::Token|PPIx::Regexp::Token>.
15              
16             C<PPIx::Regexp::Token::Assertion> has no descendants.
17              
18             =head1 DESCRIPTION
19              
20             This class represents one of the simple assertions; that is, those that
21             are not defined via parentheses. This includes the zero-width assertions
22             C<^>, C<$>, C<\b>, C<\B>, C<\A>, C<\Z>, C<\z> and C<\G>, as well as:
23              
24             =over
25              
26             =item * The C<\z> assertion added in Perl 5.005,
27              
28             =item * The C<\K> assertion added in Perl 5.009005,
29              
30             =item * The C<\b{gcb}> assertion (and friends) added in Perl 5.021009.
31             Similar braced constructions (like C<\b{foo}>) are unknown tokens.
32              
33             =back
34              
35             =head1 METHODS
36              
37             This class provides no public methods beyond those provided by its
38             superclass.
39              
40             =cut
41              
42             package PPIx::Regexp::Token::Assertion;
43              
44 9     9   52 use strict;
  9         12  
  9         268  
45 9     9   30 use warnings;
  9         15  
  9         451  
46              
47 9     9   38 use base qw{ PPIx::Regexp::Token };
  9         15  
  9         777  
48              
49 9         945 use PPIx::Regexp::Constant qw{
50             COOKIE_CLASS
51             COOKIE_LOOKAROUND_ASSERTION
52             LITERAL_LEFT_CURLY_ALLOWED
53             MINIMUM_PERL
54             TOKEN_LITERAL
55             TOKEN_UNKNOWN
56             @CARP_NOT
57 9     9   79 };
  9         14  
58              
59 9     9   41 use constant KEEP_EXPLANATION => 'In s///, keep everything before the \\K';
  9         16  
  9         6919  
60              
61             our $VERSION = '0.091';
62              
63             # Return true if the token can be quantified, and false otherwise
64             # sub can_be_quantified { return };
65              
66             my @braced_assertions = (
67             [ qr< \\ [bB] [{] (?: g | gcb | wb | sb ) [}] >smx, '5.021009' ],
68             [ qr< \\ [bB] [{] (?: lb ) [}] >smx, '5.023007' ],
69             [ qr< \\ [bB] [{] .*? [}] >smx, undef, TOKEN_UNKNOWN,
70             { error => 'Unknown bound type' },
71             ],
72             );
73              
74             =head2 is_matcher
75              
76             This method returns a true value because an assertion actually matches
77             something.
78              
79             =cut
80              
81 0     0 1 0 sub is_matcher { return 1; }
82              
83             sub perl_version_introduced {
84 24     24 1 5058 my ( $self ) = @_;
85             return ( $self->{perl_version_introduced} ||=
86 24   33     96 $self->_perl_version_introduced() );
87             }
88              
89             {
90              
91             my %perl_version_introduced = (
92             '\\K' => '5.009005',
93             '\\z' => '5.005',
94             );
95              
96             sub _perl_version_introduced {
97 24     24   36 my ( $self ) = @_;
98 24         67 my $content = $self->content();
99 24         40 foreach my $item ( @braced_assertions ) {
100 54 100       1245 $content =~ m/ \A $item->[0] \z /smx
101             and return $item->[1];
102             }
103 14   100     112 return $perl_version_introduced{ $content } || MINIMUM_PERL;
104             }
105              
106             }
107              
108             sub perl_version_removed {
109 24     24 1 11490 my ( $self ) = @_;
110             return ( $self->{perl_version_removed} ||=
111 24   66     93 $self->_perl_version_removed() );
112             }
113              
114             sub _perl_version_removed {
115 21     21   29 my ( $self ) = @_;
116 21 100       57 if ( '\\K' eq $self->content() ) {
117 3         6 my $parent = $self;
118 3         17 while ( $parent = $parent->parent() ) {
119 5 50       25 $parent->isa( 'PPIx::Regexp::Structure::Assertion' )
120             and return '5.031003';
121             }
122             }
123 21         56 return $self->SUPER::perl_version_removed();
124             }
125              
126             {
127             my %explanation = (
128             '$' => 'Assert position is at end of string or newline',
129             '\\A' => 'Assert position is at beginning of string',
130             '\\B' => 'Assert position is not at word/nonword boundary',
131             '\\B{gcb}' => 'Assert position is not at grapheme cluster boundary',
132             '\\B{g}' => 'Assert position is not at grapheme cluster boundary',
133             '\\B{lb}' => 'Assert position is not at line boundary',
134             '\\B{sb}' => 'Assert position is not at sentence boundary',
135             '\\B{wb}' => 'Assert position is not at word boundary',
136             '\\G' => 'Assert position is at pos()',
137             '\\K' => KEEP_EXPLANATION,
138             '\\Z' => 'Assert position is at end of string, or newline before end',
139             '\\b' => 'Assert position is at word/nonword boundary',
140             '\\b{gcb}' => 'Assert position is at grapheme cluster boundary',
141             '\\b{g}' => 'Assert position is at grapheme cluster boundary',
142             '\\b{lb}' => 'Assert position is at line boundary',
143             '\\b{sb}' => 'Assert position is at sentence boundary',
144             '\\b{wb}' => 'Assert position is at word boundary',
145             '\\z' => 'Assert position is at end of string',
146             '^' => 'Assert position is at beginning of string or after newline',
147             );
148              
149             sub __explanation {
150 20     20   39 return \%explanation;
151             }
152             }
153              
154             # An un-escaped literal left curly bracket can always follow this
155             # element.
156             sub __following_literal_left_curly_disallowed_in {
157 1     1   4 return LITERAL_LEFT_CURLY_ALLOWED;
158             }
159              
160             # By logic we should handle '$' here. But
161             # PPIx::Regexp::Token::Interpolation needs to process it to see if it is
162             # a sigil. If it is not, that module is expected to make it into an
163             # assertion. This is to try to keep the order in which the tokenizers
164             # are called non-critical, and try to keep all processing for a
165             # character in one place. Except for the back slash, which gets in
166             # everywhere.
167             #
168             ## my %assertion = map { $_ => 1 } qw{ ^ $ };
169             my %assertion = map { $_ => 1 } qw{ ^ };
170             my %escaped = map { $_ => 1 } qw{ b B A Z z G K };
171              
172             sub __PPIX_TOKENIZER__regexp {
173 153     153   279 my ( undef, $tokenizer, $character ) = @_;
174              
175             # Inside a character class, these are all literals.
176 153 100       333 my $make = $tokenizer->cookie( COOKIE_CLASS ) ?
177             TOKEN_LITERAL :
178             __PACKAGE__;
179              
180             # '^' and '$'. Or at least '^'. See note above for '$'.
181 153 100       433 $assertion{$character}
182             and return $tokenizer->make_token( 1, $make );
183              
184 142 100       327 $character eq '\\' or return;
185              
186 82 50       159 defined ( my $next = $tokenizer->peek( 1 ) ) or return;
187              
188             # Handle assertions of the form \b{gcb} and friends, introduced in
189             # Perl 5.21.9. These are not recognized inside square bracketed
190             # character classes, where \b is not an assertion but a backspace
191             # character.
192 82 50       174 if ( __PACKAGE__ eq $make ) { # Only outside [...]
193 82         154 foreach my $item ( @braced_assertions ) {
194 228 100       6239 my $end = $tokenizer->find_regexp( qr/ \A $item->[0] /smx )
195             or next;
196 10 50       38 $item->[2]
197             or return $end;
198 0         0 return $tokenizer->make_token( $end, $item->[2], $item->[3] );
199             }
200             }
201              
202             # We special-case '\K' because it was retracted inside look-around
203             # assertions in 5.31.3.
204 72 100 66     231 if ( 'K' eq $next && __PACKAGE__ eq $make &&
      100        
205             $tokenizer->__cookie_exists( COOKIE_LOOKAROUND_ASSERTION ) ) {
206 3         18 return $tokenizer->make_token( 2, $make, {
207             perl_version_removed => '5.031003',
208             explanation => KEEP_EXPLANATION .
209             '; retracted inside look-around assertion',
210             },
211             );
212             }
213              
214 69 100       248 $escaped{$next}
215             and return $tokenizer->make_token( 2, $make );
216              
217 37         75 return;
218             }
219              
220             1;
221              
222             __END__
223              
224             =head1 SUPPORT
225              
226             Support is by the author. Please file bug reports at
227             L<https://rt.cpan.org/Public/Dist/Display.html?Name=PPIx-Regexp>,
228             L<https://github.com/trwyant/perl-PPIx-Regexp/issues>, or in
229             electronic mail to the author.
230              
231             =head1 AUTHOR
232              
233             Thomas R. Wyant, III F<wyant at cpan dot org>
234              
235             =head1 COPYRIGHT AND LICENSE
236              
237             Copyright (C) 2009-2023, 2025 by Thomas R. Wyant, III
238              
239             This program is free software; you can redistribute it and/or modify it
240             under the same terms as Perl 5.10.0. For more details, see the full text
241             of the licenses in the directory LICENSES.
242              
243             This program is distributed in the hope that it will be useful, but
244             without any warranty; without even the implied warranty of
245             merchantability or fitness for a particular purpose.
246              
247             =cut
248              
249             # ex: set textwidth=72 :