File Coverage

blib/lib/PPIx/Regexp/Token/Operator.pm
Criterion Covered Total %
statement 42 48 87.5
branch 10 14 71.4
condition 4 5 80.0
subroutine 11 13 84.6
pod 2 2 100.0
total 69 82 84.1


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             PPIx::Regexp::Token::Operator - Represent an operator.
4              
5             =head1 SYNOPSIS
6              
7             use PPIx::Regexp::Dumper;
8             PPIx::Regexp::Dumper->new( 'qr{foo|bar}smx' )
9             ->print();
10              
11             =head1 INHERITANCE
12              
13             C<PPIx::Regexp::Token::Operator> is a
14             L<PPIx::Regexp::Token|PPIx::Regexp::Token>.
15              
16             C<PPIx::Regexp::Token::Operator> has no descendants.
17              
18             =head1 DESCRIPTION
19              
20             This class represents an operator. In a character class, it represents
21             the negation (C<^>) and range (C<->) operators. Outside a character
22             class, it represents the alternation (C<|>) operator.
23              
24             =head1 METHODS
25              
26             This class provides no public methods beyond those provided by its
27             superclass.
28              
29             =cut
30              
31             package PPIx::Regexp::Token::Operator;
32              
33 9     9   46 use strict;
  9         14  
  9         258  
34 9     9   27 use warnings;
  9         14  
  9         387  
35              
36 9     9   36 use base qw{ PPIx::Regexp::Token };
  9         15  
  9         808  
37              
38 9         869 use PPIx::Regexp::Constant qw{
39             COOKIE_CLASS COOKIE_REGEX_SET
40             LITERAL_LEFT_CURLY_ALLOWED
41             TOKEN_LITERAL
42             @CARP_NOT
43 9     9   42 };
  9         10  
44 9     9   42 use PPIx::Regexp::Util qw{ __instance };
  9         12  
  9         551  
45              
46             our $VERSION = '0.091';
47              
48 9     9   38 use constant TOKENIZER_ARGUMENT_REQUIRED => 1;
  9         11  
  9         5197  
49              
50             sub __new {
51 64     64   205 my ( $class, $content, %arg ) = @_;
52              
53 64 50       247 my $self = $class->SUPER::__new( $content, %arg )
54             or return;
55              
56             $self->{operation} = $self->_compute_operation_name(
57 64   100     212 $arg{tokenizer} ) || 'unknown';
58              
59 64         164 return $self;
60             }
61              
62             # Return true if the token can be quantified, and false otherwise
63             # sub can_be_quantified { return };
64              
65             sub explain {
66 5     5 1 11 my ( $self ) = @_;
67 5         12 my $expl = ucfirst "$self->{operation} operator";
68 5         11 $expl =~ s/ _ / /smxg;
69 5         11 return $expl;
70             }
71              
72             =head2 operation
73              
74             This method returns the name of the operation performed by the operator.
75             This depends not only on the operator itself but its context:
76              
77             =over
78              
79             =item In a bracketed character class
80              
81             '-' => 'range',
82             '^' => 'inversion',
83              
84             =item In an extended bracketed character class
85              
86             '&' => 'intersection',
87             '+' => 'union',
88             '|' => 'union',
89             '-' => 'subtraction',
90             '^' => 'symmetric_difference',
91             '!' => 'complement',
92              
93             =item Outside any sort of bracketed character class
94              
95             '|' => 'alternation',
96              
97             =back
98              
99             =cut
100              
101             sub operation {
102 0     0 1 0 my ( $self ) = @_;
103 0         0 return $self->{operation};
104             }
105              
106             # These will be intercepted by PPIx::Regexp::Token::Literal if they are
107             # really literals, so here we may process them unconditionally.
108              
109             # Note that if we receive a '-' we unconditionally make it an operator,
110             # relying on the lexer to turn it back into a literal if necessary.
111              
112             my %operator = map { $_ => 1 } qw{ | - };
113              
114             sub _treat_as_literal {
115 36     36   57 my ( $token ) = @_;
116 36   66     86 return __instance( $token, 'PPIx::Regexp::Token::Literal' ) ||
117             __instance( $token, 'PPIx::Regexp::Token::Interpolation' );
118             }
119              
120             {
121              
122             my %operation = (
123             COOKIE_CLASS() => {
124             '-' => 'range',
125             '^' => 'inversion',
126             },
127             COOKIE_REGEX_SET() => {
128             '&' => 'intersection',
129             '+' => 'union',
130             '|' => 'union',
131             '-' => 'subtraction',
132             '^' => 'symmetric_difference',
133             '!' => 'complement',
134             },
135             '' => {
136             '|' => 'alternation',
137             },
138             );
139              
140             sub _compute_operation_name {
141 64     64   112 my ( $self, $tokenizer ) = @_;
142              
143 64         134 my $content = $self->content();
144              
145 64 100       152 if ( $tokenizer->cookie( COOKIE_CLASS ) ) {
    100          
146 24         113 return $operation{ COOKIE_CLASS() }{$content};
147             } elsif ( $tokenizer->cookie( COOKIE_REGEX_SET ) ) {
148 13         58 return $operation{ COOKIE_REGEX_SET() }{$content};
149             } else {
150 27         137 return $operation{''}{$content};
151             }
152             }
153              
154             }
155              
156             {
157             my $removed_in = {
158             '|' => LITERAL_LEFT_CURLY_ALLOWED, # Allowed after alternation
159             };
160              
161             sub __following_literal_left_curly_disallowed_in {
162 0     0   0 my ( $self ) = @_;
163 0         0 my $content = $self->content();
164             exists $removed_in->{$content}
165 0 0       0 and return $removed_in->{$content};
166 0         0 return $self->SUPER::__following_literal_left_curly_disallowed_in();
167             }
168             }
169              
170             sub __PPIX_TOKENIZER__regexp {
171 62     62   128 my ( undef, $tokenizer, $character ) = @_;
172              
173             # We only receive the '-' if we are inside a character class. But it
174             # is only an operator if it is preceded and followed by literals. We
175             # can use prior() because there are no insignificant tokens inside a
176             # character class.
177 62 100       146 if ( $character eq '-' ) {
178              
179 19 100       51 _treat_as_literal( $tokenizer->prior_significant_token() )
180             or return $tokenizer->make_token( 1, TOKEN_LITERAL );
181            
182 17         49 my @tokens = ( $tokenizer->make_token( 1 ) );
183 17         70 push @tokens, $tokenizer->get_token();
184            
185 17 50       45 _treat_as_literal( $tokens[1] )
186             or TOKEN_LITERAL->__PPIX_ELEM__rebless( $tokens[0] );
187            
188 17         48 return ( @tokens );
189             }
190              
191 43         114 return $operator{$character};
192             }
193              
194             1;
195              
196             __END__
197              
198             =head1 SUPPORT
199              
200             Support is by the author. Please file bug reports at
201             L<https://rt.cpan.org/Public/Dist/Display.html?Name=PPIx-Regexp>,
202             L<https://github.com/trwyant/perl-PPIx-Regexp/issues>, or in
203             electronic mail to the author.
204              
205             =head1 AUTHOR
206              
207             Thomas R. Wyant, III F<wyant at cpan dot org>
208              
209             =head1 COPYRIGHT AND LICENSE
210              
211             Copyright (C) 2009-2023, 2025 by Thomas R. Wyant, III
212              
213             This program is free software; you can redistribute it and/or modify it
214             under the same terms as Perl 5.10.0. For more details, see the full text
215             of the licenses in the directory LICENSES.
216              
217             This program is distributed in the hope that it will be useful, but
218             without any warranty; without even the implied warranty of
219             merchantability or fitness for a particular purpose.
220              
221             =cut
222              
223             # ex: set textwidth=72 :