| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | =head1 NAME | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | PPIx::Regexp::Token::Operator - Represent an operator. | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | use PPIx::Regexp::Dumper; | 
| 8 |  |  |  |  |  |  | PPIx::Regexp::Dumper->new( 'qr{foo|bar}smx' ) | 
| 9 |  |  |  |  |  |  | ->print(); | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | =head1 INHERITANCE | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | C is a | 
| 14 |  |  |  |  |  |  | L. | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | C has no descendants. | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | This class represents an operator. In a character class, it represents | 
| 21 |  |  |  |  |  |  | the negation (C<^>) and range (C<->) operators. Outside a character | 
| 22 |  |  |  |  |  |  | class, it represents the alternation (C<|>) operator. | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | =head1 METHODS | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | This class provides no public methods beyond those provided by its | 
| 27 |  |  |  |  |  |  | superclass. | 
| 28 |  |  |  |  |  |  |  | 
| 29 |  |  |  |  |  |  | =cut | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | package PPIx::Regexp::Token::Operator; | 
| 32 |  |  |  |  |  |  |  | 
| 33 | 9 |  |  | 9 |  | 70 | use strict; | 
|  | 9 |  |  |  |  | 22 |  | 
|  | 9 |  |  |  |  | 296 |  | 
| 34 | 9 |  |  | 9 |  | 47 | use warnings; | 
|  | 9 |  |  |  |  | 35 |  | 
|  | 9 |  |  |  |  | 254 |  | 
| 35 |  |  |  |  |  |  |  | 
| 36 | 9 |  |  | 9 |  | 64 | use base qw{ PPIx::Regexp::Token }; | 
|  | 9 |  |  |  |  | 21 |  | 
|  | 9 |  |  |  |  | 803 |  | 
| 37 |  |  |  |  |  |  |  | 
| 38 | 9 |  |  |  |  | 931 | use PPIx::Regexp::Constant qw{ | 
| 39 |  |  |  |  |  |  | COOKIE_CLASS COOKIE_REGEX_SET | 
| 40 |  |  |  |  |  |  | LITERAL_LEFT_CURLY_ALLOWED | 
| 41 |  |  |  |  |  |  | TOKEN_LITERAL | 
| 42 |  |  |  |  |  |  | @CARP_NOT | 
| 43 | 9 |  |  | 9 |  | 71 | }; | 
|  | 9 |  |  |  |  | 18 |  | 
| 44 | 9 |  |  | 9 |  | 59 | use PPIx::Regexp::Util qw{ __instance }; | 
|  | 9 |  |  |  |  | 22 |  | 
|  | 9 |  |  |  |  | 609 |  | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | our $VERSION = '0.087'; | 
| 47 |  |  |  |  |  |  |  | 
| 48 | 9 |  |  | 9 |  | 66 | use constant TOKENIZER_ARGUMENT_REQUIRED => 1; | 
|  | 9 |  |  |  |  | 21 |  | 
|  | 9 |  |  |  |  | 6311 |  | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | sub __new { | 
| 51 | 64 |  |  | 64 |  | 318 | my ( $class, $content, %arg ) = @_; | 
| 52 |  |  |  |  |  |  |  | 
| 53 | 64 | 50 |  |  |  | 340 | my $self = $class->SUPER::__new( $content, %arg ) | 
| 54 |  |  |  |  |  |  | or return; | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | $self->{operation} = $self->_compute_operation_name( | 
| 57 | 64 |  | 100 |  |  | 388 | $arg{tokenizer} ) || 'unknown'; | 
| 58 |  |  |  |  |  |  |  | 
| 59 | 64 |  |  |  |  | 254 | return $self; | 
| 60 |  |  |  |  |  |  | } | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  | # Return true if the token can be quantified, and false otherwise | 
| 63 |  |  |  |  |  |  | # sub can_be_quantified { return }; | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | sub explain { | 
| 66 | 5 |  |  | 5 | 1 | 14 | my ( $self ) = @_; | 
| 67 | 5 |  |  |  |  | 18 | my $expl = ucfirst "$self->{operation} operator"; | 
| 68 | 5 |  |  |  |  | 17 | $expl =~ s/ _ / /smxg; | 
| 69 | 5 |  |  |  |  | 11 | return $expl; | 
| 70 |  |  |  |  |  |  | } | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | =head2 operation | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | This method returns the name of the operation performed by the operator. | 
| 75 |  |  |  |  |  |  | This depends not only on the operator itself but its context: | 
| 76 |  |  |  |  |  |  |  | 
| 77 |  |  |  |  |  |  | =over | 
| 78 |  |  |  |  |  |  |  | 
| 79 |  |  |  |  |  |  | =item In a bracketed character class | 
| 80 |  |  |  |  |  |  |  | 
| 81 |  |  |  |  |  |  | '-' => 'range', | 
| 82 |  |  |  |  |  |  | '^' => 'inversion', | 
| 83 |  |  |  |  |  |  |  | 
| 84 |  |  |  |  |  |  | =item In an extended bracketed character class | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  | '&' => 'intersection', | 
| 87 |  |  |  |  |  |  | '+' => 'union', | 
| 88 |  |  |  |  |  |  | '|' => 'union', | 
| 89 |  |  |  |  |  |  | '-' => 'subtraction', | 
| 90 |  |  |  |  |  |  | '^' => 'symmetric_difference', | 
| 91 |  |  |  |  |  |  | '!' => 'complement', | 
| 92 |  |  |  |  |  |  |  | 
| 93 |  |  |  |  |  |  | =item Outside any sort of bracketed character class | 
| 94 |  |  |  |  |  |  |  | 
| 95 |  |  |  |  |  |  | '|' => 'alternation', | 
| 96 |  |  |  |  |  |  |  | 
| 97 |  |  |  |  |  |  | =back | 
| 98 |  |  |  |  |  |  |  | 
| 99 |  |  |  |  |  |  | =cut | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  | sub operation { | 
| 102 | 0 |  |  | 0 | 1 | 0 | my ( $self ) = @_; | 
| 103 | 0 |  |  |  |  | 0 | return $self->{operation}; | 
| 104 |  |  |  |  |  |  | } | 
| 105 |  |  |  |  |  |  |  | 
| 106 |  |  |  |  |  |  | # These will be intercepted by PPIx::Regexp::Token::Literal if they are | 
| 107 |  |  |  |  |  |  | # really literals, so here we may process them unconditionally. | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | # Note that if we receive a '-' we unconditionally make it an operator, | 
| 110 |  |  |  |  |  |  | # relying on the lexer to turn it back into a literal if necessary. | 
| 111 |  |  |  |  |  |  |  | 
| 112 |  |  |  |  |  |  | my %operator = map { $_ => 1 } qw{ | - }; | 
| 113 |  |  |  |  |  |  |  | 
| 114 |  |  |  |  |  |  | sub _treat_as_literal { | 
| 115 | 36 |  |  | 36 |  | 90 | my ( $token ) = @_; | 
| 116 | 36 |  | 66 |  |  | 109 | return __instance( $token, 'PPIx::Regexp::Token::Literal' ) || | 
| 117 |  |  |  |  |  |  | __instance( $token, 'PPIx::Regexp::Token::Interpolation' ); | 
| 118 |  |  |  |  |  |  | } | 
| 119 |  |  |  |  |  |  |  | 
| 120 |  |  |  |  |  |  | { | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | my %operation = ( | 
| 123 |  |  |  |  |  |  | COOKIE_CLASS()	=> { | 
| 124 |  |  |  |  |  |  | '-'	=> 'range', | 
| 125 |  |  |  |  |  |  | '^'	=> 'inversion', | 
| 126 |  |  |  |  |  |  | }, | 
| 127 |  |  |  |  |  |  | COOKIE_REGEX_SET()		=> { | 
| 128 |  |  |  |  |  |  | '&'	=> 'intersection', | 
| 129 |  |  |  |  |  |  | '+'	=> 'union', | 
| 130 |  |  |  |  |  |  | '|'	=> 'union', | 
| 131 |  |  |  |  |  |  | '-'	=> 'subtraction', | 
| 132 |  |  |  |  |  |  | '^'	=> 'symmetric_difference', | 
| 133 |  |  |  |  |  |  | '!'	=> 'complement', | 
| 134 |  |  |  |  |  |  | }, | 
| 135 |  |  |  |  |  |  | ''	=> { | 
| 136 |  |  |  |  |  |  | '|'	=> 'alternation', | 
| 137 |  |  |  |  |  |  | }, | 
| 138 |  |  |  |  |  |  | ); | 
| 139 |  |  |  |  |  |  |  | 
| 140 |  |  |  |  |  |  | sub _compute_operation_name { | 
| 141 | 64 |  |  | 64 |  | 198 | my ( $self, $tokenizer ) = @_; | 
| 142 |  |  |  |  |  |  |  | 
| 143 | 64 |  |  |  |  | 245 | my $content = $self->content(); | 
| 144 |  |  |  |  |  |  |  | 
| 145 | 64 | 100 |  |  |  | 224 | if ( $tokenizer->cookie( COOKIE_CLASS ) ) { | 
|  |  | 100 |  |  |  |  |  | 
| 146 | 24 |  |  |  |  | 204 | return $operation{ COOKIE_CLASS() }{$content}; | 
| 147 |  |  |  |  |  |  | } elsif ( $tokenizer->cookie( COOKIE_REGEX_SET ) ) { | 
| 148 | 13 |  |  |  |  | 86 | return $operation{ COOKIE_REGEX_SET() }{$content}; | 
| 149 |  |  |  |  |  |  | } else { | 
| 150 | 27 |  |  |  |  | 210 | return $operation{''}{$content}; | 
| 151 |  |  |  |  |  |  | } | 
| 152 |  |  |  |  |  |  | } | 
| 153 |  |  |  |  |  |  |  | 
| 154 |  |  |  |  |  |  | } | 
| 155 |  |  |  |  |  |  |  | 
| 156 |  |  |  |  |  |  | { | 
| 157 |  |  |  |  |  |  | my $removed_in = { | 
| 158 |  |  |  |  |  |  | '|'	=> LITERAL_LEFT_CURLY_ALLOWED,	# Allowed after alternation | 
| 159 |  |  |  |  |  |  | }; | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | sub __following_literal_left_curly_disallowed_in { | 
| 162 | 0 |  |  | 0 |  | 0 | my ( $self ) = @_; | 
| 163 | 0 |  |  |  |  | 0 | my $content = $self->content(); | 
| 164 |  |  |  |  |  |  | exists $removed_in->{$content} | 
| 165 | 0 | 0 |  |  |  | 0 | and return $removed_in->{$content}; | 
| 166 | 0 |  |  |  |  | 0 | return $self->SUPER::__following_literal_left_curly_disallowed_in(); | 
| 167 |  |  |  |  |  |  | } | 
| 168 |  |  |  |  |  |  | } | 
| 169 |  |  |  |  |  |  |  | 
| 170 |  |  |  |  |  |  | sub __PPIX_TOKENIZER__regexp { | 
| 171 | 62 |  |  | 62 |  | 219 | my ( undef, $tokenizer, $character ) = @_; | 
| 172 |  |  |  |  |  |  |  | 
| 173 |  |  |  |  |  |  | # We only receive the '-' if we are inside a character class. But it | 
| 174 |  |  |  |  |  |  | # is only an operator if it is preceded and followed by literals. We | 
| 175 |  |  |  |  |  |  | # can use prior() because there are no insignificant tokens inside a | 
| 176 |  |  |  |  |  |  | # character class. | 
| 177 | 62 | 100 |  |  |  | 268 | if ( $character eq '-' ) { | 
| 178 |  |  |  |  |  |  |  | 
| 179 | 19 | 100 |  |  |  | 78 | _treat_as_literal( $tokenizer->prior_significant_token() ) | 
| 180 |  |  |  |  |  |  | or return $tokenizer->make_token( 1, TOKEN_LITERAL ); | 
| 181 |  |  |  |  |  |  |  | 
| 182 | 17 |  |  |  |  | 80 | my @tokens = ( $tokenizer->make_token( 1 ) ); | 
| 183 | 17 |  |  |  |  | 154 | push @tokens, $tokenizer->get_token(); | 
| 184 |  |  |  |  |  |  |  | 
| 185 | 17 | 50 |  |  |  | 110 | _treat_as_literal( $tokens[1] ) | 
| 186 |  |  |  |  |  |  | or TOKEN_LITERAL->__PPIX_ELEM__rebless( $tokens[0] ); | 
| 187 |  |  |  |  |  |  |  | 
| 188 | 17 |  |  |  |  | 75 | return ( @tokens ); | 
| 189 |  |  |  |  |  |  | } | 
| 190 |  |  |  |  |  |  |  | 
| 191 | 43 |  |  |  |  | 150 | return $operator{$character}; | 
| 192 |  |  |  |  |  |  | } | 
| 193 |  |  |  |  |  |  |  | 
| 194 |  |  |  |  |  |  | 1; | 
| 195 |  |  |  |  |  |  |  | 
| 196 |  |  |  |  |  |  | __END__ |