line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
=head1 NAME |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
PPIx::Regexp::Token::Modifier - Represent modifiers. |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 SYNOPSIS |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use PPIx::Regexp::Dumper; |
8
|
|
|
|
|
|
|
PPIx::Regexp::Dumper->new( 'qr{foo}smx' ) |
9
|
|
|
|
|
|
|
->print(); |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
The trailing C will be represented by this class. |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
This class also represents the whole of things like C<(?ismx)>. But the |
14
|
|
|
|
|
|
|
modifiers in something like C<(?i:foo)> are represented by a |
15
|
|
|
|
|
|
|
L. |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 INHERITANCE |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
C is a |
20
|
|
|
|
|
|
|
L. |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
C is the parent of |
23
|
|
|
|
|
|
|
L. |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head1 DESCRIPTION |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
This class represents modifier characters at the end of the regular |
28
|
|
|
|
|
|
|
expression. For example, in C this class would represent |
29
|
|
|
|
|
|
|
the terminal C. |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head2 The C, C, C, C, and C modifiers |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
The C, C, C, C, and C modifiers, introduced starting in |
34
|
|
|
|
|
|
|
Perl 5.13.6, are used to force either Unicode pattern semantics (C), |
35
|
|
|
|
|
|
|
locale semantics (C) default semantics (C the traditional Perl |
36
|
|
|
|
|
|
|
semantics, which can also mean 'dual' since it means Unicode if the |
37
|
|
|
|
|
|
|
string's UTF-8 bit is on, and locale if the UTF-8 bit is off), or |
38
|
|
|
|
|
|
|
restricted default semantics (C). These are mutually exclusive, and |
39
|
|
|
|
|
|
|
only one can be asserted at a time. Asserting any of these overrides |
40
|
|
|
|
|
|
|
the inherited value of any of the others. The C method |
41
|
|
|
|
|
|
|
reports as asserted the last one it sees, or none of them if it has seen |
42
|
|
|
|
|
|
|
none. |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
For example, given C C<$elem> |
45
|
|
|
|
|
|
|
representing the invalid regular expression fragment C<(?dul)>, |
46
|
|
|
|
|
|
|
C<< $elem->asserted( 'l' ) >> would return true, but |
47
|
|
|
|
|
|
|
C<< $elem->asserted( 'u' ) >> would return false. Note that |
48
|
|
|
|
|
|
|
C<< $elem->negated( 'u' ) >> would also return false, since C is not |
49
|
|
|
|
|
|
|
explicitly negated. |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
If C<$elem> represented regular expression fragment C<(?i)>, |
52
|
|
|
|
|
|
|
C<< $elem->asserted( 'd' ) >> would return false, since even though C |
53
|
|
|
|
|
|
|
represents the default behavior it is not explicitly asserted. |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head2 The caret (C<^>) modifier |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
Calling C<^> a modifier is a bit of a misnomer. The C<(?^...)> |
58
|
|
|
|
|
|
|
construction was introduced in Perl 5.13.6, to prevent the inheritance |
59
|
|
|
|
|
|
|
of modifiers. The documentation calls the caret a shorthand equivalent |
60
|
|
|
|
|
|
|
for C, and that it the way this class handles it. |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
For example, given C C<$elem> |
63
|
|
|
|
|
|
|
representing regular expression fragment C<(?^i)>, |
64
|
|
|
|
|
|
|
C<< $elem->asserts( 'd' ) >> would return true, since in the absence of |
65
|
|
|
|
|
|
|
an explicit C or C this class considers the C<^> to explicitly |
66
|
|
|
|
|
|
|
assert C. |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
The caret handling is complicated by the fact that the C<'n'> modifier |
69
|
|
|
|
|
|
|
was introduced in 5.21.8, at which point the caret became equivalent to |
70
|
|
|
|
|
|
|
C. I did not feel I could unconditionally add the C<-n> to the |
71
|
|
|
|
|
|
|
expansion of the caret, because that would produce confusing output from |
72
|
|
|
|
|
|
|
methods like L. Nor could I |
73
|
|
|
|
|
|
|
make it conditional on the minimum perl version, because that |
74
|
|
|
|
|
|
|
information is not available early enough in the parse. What I did was |
75
|
|
|
|
|
|
|
to expand the caret into C if and only if C<'n'> was in effect |
76
|
|
|
|
|
|
|
at some point in the scope in which the modifier was parsed. |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
Continuing the above example, C<< $elem->asserts( 'n' ) >> and |
79
|
|
|
|
|
|
|
C<< $elem->modifier_asserted( 'n' ) >> would both return false, but |
80
|
|
|
|
|
|
|
C<< $elem->negates( 'n' ) >> would return true if and only if the C |
81
|
|
|
|
|
|
|
modifier has been asserted somewhere before and in-scope from this |
82
|
|
|
|
|
|
|
token. The |
83
|
|
|
|
|
|
|
L |
84
|
|
|
|
|
|
|
method is inherited from L. |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 METHODS |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
This class provides the following public methods. Methods not documented |
89
|
|
|
|
|
|
|
here are private, and unsupported in the sense that the author reserves |
90
|
|
|
|
|
|
|
the right to change or remove them without notice. |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=cut |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
package PPIx::Regexp::Token::Modifier; |
95
|
|
|
|
|
|
|
|
96
|
9
|
|
|
9
|
|
67
|
use strict; |
|
9
|
|
|
|
|
17
|
|
|
9
|
|
|
|
|
248
|
|
97
|
9
|
|
|
9
|
|
50
|
use warnings; |
|
9
|
|
|
|
|
71
|
|
|
9
|
|
|
|
|
238
|
|
98
|
|
|
|
|
|
|
|
99
|
9
|
|
|
9
|
|
49
|
use base qw{ PPIx::Regexp::Token }; |
|
9
|
|
|
|
|
18
|
|
|
9
|
|
|
|
|
729
|
|
100
|
|
|
|
|
|
|
|
101
|
9
|
|
|
9
|
|
64
|
use Carp; |
|
9
|
|
|
|
|
32
|
|
|
9
|
|
|
|
|
636
|
|
102
|
9
|
|
|
|
|
1834
|
use PPIx::Regexp::Constant qw{ |
103
|
|
|
|
|
|
|
MINIMUM_PERL |
104
|
|
|
|
|
|
|
MODIFIER_GROUP_MATCH_SEMANTICS |
105
|
|
|
|
|
|
|
@CARP_NOT |
106
|
9
|
|
|
9
|
|
62
|
}; |
|
9
|
|
|
|
|
31
|
|
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
our $VERSION = '0.087_01'; |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# Define modifiers that are to be aggregated internally for ease of |
111
|
|
|
|
|
|
|
# computation. |
112
|
|
|
|
|
|
|
my %aggregate = ( |
113
|
|
|
|
|
|
|
a => MODIFIER_GROUP_MATCH_SEMANTICS, |
114
|
|
|
|
|
|
|
aa => MODIFIER_GROUP_MATCH_SEMANTICS, |
115
|
|
|
|
|
|
|
d => MODIFIER_GROUP_MATCH_SEMANTICS, |
116
|
|
|
|
|
|
|
l => MODIFIER_GROUP_MATCH_SEMANTICS, |
117
|
|
|
|
|
|
|
u => MODIFIER_GROUP_MATCH_SEMANTICS, |
118
|
|
|
|
|
|
|
); |
119
|
|
|
|
|
|
|
my %de_aggregate; |
120
|
|
|
|
|
|
|
foreach my $value ( values %aggregate ) { |
121
|
|
|
|
|
|
|
$de_aggregate{$value}++; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
# Note that we do NOT want the /o modifier on regexen that make use of |
125
|
|
|
|
|
|
|
# this, because it is already compiled. |
126
|
|
|
|
|
|
|
my $capture_group_leader = qr{ [?/(] }smx; # ); |
127
|
|
|
|
|
|
|
|
128
|
9
|
|
|
9
|
|
112
|
use constant TOKENIZER_ARGUMENT_REQUIRED => 1; |
|
9
|
|
|
|
|
32
|
|
|
9
|
|
|
|
|
17289
|
|
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub __new { |
131
|
586
|
|
|
586
|
|
2922
|
my ( $class, $content, %arg ) = @_; |
132
|
|
|
|
|
|
|
|
133
|
586
|
50
|
|
|
|
2842
|
my $self = $class->SUPER::__new( $content, %arg ) |
134
|
|
|
|
|
|
|
or return; |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
$content =~ m{ \A $capture_group_leader* \^ }smx # no /o! |
137
|
|
|
|
|
|
|
and defined $arg{tokenizer}->modifier_seen( 'n' ) |
138
|
586
|
100
|
100
|
|
|
5075
|
and $self->{__caret_undoes_n} = 1; |
139
|
|
|
|
|
|
|
|
140
|
586
|
|
|
|
|
2502
|
$arg{tokenizer}->modifier_modify( $self->modifiers() ); |
141
|
|
|
|
|
|
|
|
142
|
586
|
|
|
|
|
2185
|
return $self; |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=head2 asserts |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
$token->asserts( 'i' ) and print "token asserts i"; |
148
|
|
|
|
|
|
|
foreach ( $token->asserts() ) { print "token asserts $_\n" } |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
This method returns true if the token explicitly asserts the given |
151
|
|
|
|
|
|
|
modifier. The example would return true for the modifier in |
152
|
|
|
|
|
|
|
C<(?i:foo)>, but false for C<(?-i:foo)>. |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
Starting with version 0.036_01, if the argument is a |
155
|
|
|
|
|
|
|
single-character modifier followed by an asterisk (intended as a wild |
156
|
|
|
|
|
|
|
card character), the return is the number of times that modifier |
157
|
|
|
|
|
|
|
appears. In this case an exception will be thrown if you specify a |
158
|
|
|
|
|
|
|
multi-character modifier (e.g. C<'ee*'>). |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
If called without an argument, or with an undef argument, all modifiers |
161
|
|
|
|
|
|
|
explicitly asserted by this token are returned. |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=cut |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub asserts { |
166
|
410
|
|
|
410
|
1
|
798
|
my ( $self, $modifier ) = @_; |
167
|
410
|
|
33
|
|
|
855
|
$self->{modifiers} ||= $self->_decode(); |
168
|
410
|
50
|
|
|
|
779
|
if ( defined $modifier ) { |
169
|
410
|
|
|
|
|
719
|
return __asserts( $self->{modifiers}, $modifier ); |
170
|
|
|
|
|
|
|
} else { |
171
|
0
|
0
|
|
|
|
0
|
return ( sort grep { defined $_ && $self->{modifiers}{$_} } |
172
|
0
|
0
|
|
|
|
0
|
map { $de_aggregate{$_} ? $self->{modifiers}{$_} : $_ } |
173
|
0
|
|
|
|
|
0
|
keys %{ $self->{modifiers} } ); |
|
0
|
|
|
|
|
0
|
|
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
# This is a kluge for both determining whether the object asserts |
178
|
|
|
|
|
|
|
# modifiers (hence the 'ductype') and determining whether the given |
179
|
|
|
|
|
|
|
# modifier is actually asserted. The signature is the invocant and the |
180
|
|
|
|
|
|
|
# modifier name, which must not be undef. The return is a boolean. |
181
|
|
|
|
|
|
|
*__ducktype_modifier_asserted = \&asserts; |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
sub __asserts { |
184
|
5461
|
|
|
5461
|
|
9699
|
my ( $present, $modifier ) = @_; |
185
|
5461
|
|
|
|
|
19747
|
my $wild = $modifier =~ s/ [*] \z //smx; |
186
|
5461
|
50
|
66
|
|
|
18469
|
not $wild |
187
|
|
|
|
|
|
|
or 1 == length $modifier |
188
|
|
|
|
|
|
|
or croak "Can not use wild card on multi-character modifier '$modifier*'"; |
189
|
5461
|
100
|
|
|
|
11814
|
if ( my $bin = $aggregate{$modifier} ) { |
190
|
15
|
|
|
|
|
38
|
my $aggr = $present->{$bin}; |
191
|
15
|
50
|
100
|
|
|
95
|
$wild |
192
|
|
|
|
|
|
|
or return ( defined $aggr && $modifier eq $aggr ); |
193
|
0
|
0
|
|
|
|
0
|
defined $aggr |
194
|
|
|
|
|
|
|
or return 0; |
195
|
0
|
0
|
|
|
|
0
|
$aggr =~ m/ \A ( (?: \Q$modifier\E )* ) \z /smx |
196
|
|
|
|
|
|
|
or return 0; |
197
|
0
|
|
|
|
|
0
|
return length $1; |
198
|
|
|
|
|
|
|
} |
199
|
5446
|
100
|
|
|
|
10413
|
if ( $wild ) { |
200
|
2583
|
|
100
|
|
|
16416
|
return $present->{$modifier} || 0; |
201
|
|
|
|
|
|
|
} |
202
|
2863
|
|
|
|
|
4867
|
my $len = length $modifier; |
203
|
2863
|
|
|
|
|
5561
|
$modifier = substr $modifier, 0, 1; |
204
|
2863
|
|
100
|
|
|
12101
|
return $present->{$modifier} && $len == $present->{$modifier}; |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
|
207
|
8
|
|
|
8
|
1
|
32
|
sub can_be_quantified { return }; |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
{ |
210
|
|
|
|
|
|
|
my %explanation = ( |
211
|
|
|
|
|
|
|
'm' => 'm: ^ and $ match within string', |
212
|
|
|
|
|
|
|
'-m' => '-m: ^ and $ match only at ends of string', |
213
|
|
|
|
|
|
|
's' => 's: . can match newline', |
214
|
|
|
|
|
|
|
'-s' => '-s: . can not match newline', |
215
|
|
|
|
|
|
|
'i' => 'i: do case-insensitive matching', |
216
|
|
|
|
|
|
|
'-i' => '-i: do case-sensitive matching', |
217
|
|
|
|
|
|
|
'x' => 'x: ignore whitespace and comments', |
218
|
|
|
|
|
|
|
'xx' => 'xx: ignore whitespace even in bracketed character classes', |
219
|
|
|
|
|
|
|
'-x' => '-x: regard whitespace as literal', |
220
|
|
|
|
|
|
|
'p' => 'p: provide ${^PREMATCH} etc (pre 5.20)', |
221
|
|
|
|
|
|
|
'-p' => '-p: no ${^PREMATCH} etc (pre 5.20)', |
222
|
|
|
|
|
|
|
'a' => 'a: restrict non-Unicode classes to ASCII', |
223
|
|
|
|
|
|
|
'aa' => 'aa: restrict non-Unicode classes & ASCII-Unicode matches', |
224
|
|
|
|
|
|
|
'd' => 'd: match using default semantics', |
225
|
|
|
|
|
|
|
'l' => 'l: match using locale semantics', |
226
|
|
|
|
|
|
|
'u' => 'u: match using Unicode semantics', |
227
|
|
|
|
|
|
|
'n' => 'n: parentheses do not capture', |
228
|
|
|
|
|
|
|
'-n' => '-n: parentheses capture', |
229
|
|
|
|
|
|
|
'c' => 'c: preserve current position on match failure', |
230
|
|
|
|
|
|
|
'g' => 'g: match repeatedly', |
231
|
|
|
|
|
|
|
'e' => 'e: substitution string is an expression', |
232
|
|
|
|
|
|
|
'ee' => 'ee: substitution is expression to eval()', |
233
|
|
|
|
|
|
|
'o' => 'o: only interpolate once', |
234
|
|
|
|
|
|
|
'r' => 'r: aubstitution returns modified string', |
235
|
|
|
|
|
|
|
); |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
sub explain { |
238
|
4
|
|
|
4
|
1
|
16
|
my ( $self ) = @_; |
239
|
4
|
|
|
|
|
8
|
my @rslt; |
240
|
4
|
|
|
|
|
10
|
my %mods = $self->modifiers(); |
241
|
4
|
50
|
|
|
|
18
|
if ( defined( my $val = delete $mods{match_semantics} ) ) { |
242
|
4
|
|
|
|
|
17
|
push @rslt, $explanation{$val}; |
243
|
|
|
|
|
|
|
} |
244
|
4
|
|
|
|
|
18
|
foreach my $key ( sort keys %mods ) { |
245
|
14
|
100
|
|
|
|
33
|
if ( my $val = $mods{$key} ) { |
246
|
4
|
|
|
|
|
12
|
push @rslt, $explanation{ $key x $val }; |
247
|
|
|
|
|
|
|
} else { |
248
|
10
|
|
|
|
|
26
|
push @rslt, $explanation{ "-$key" }; |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
} |
251
|
4
|
50
|
|
|
|
27
|
return wantarray ? @rslt : join '; ', @rslt; |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=head2 match_semantics |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
my $sem = $token->match_semantics(); |
258
|
|
|
|
|
|
|
defined $sem or $sem = 'undefined'; |
259
|
|
|
|
|
|
|
print "This token has $sem match semantics\n"; |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
This method returns the match semantics asserted by the token, as one of |
262
|
|
|
|
|
|
|
the strings C<'a'>, C<'aa'>, C<'d'>, C<'l'>, or C<'u'>. If no explicit |
263
|
|
|
|
|
|
|
match semantics are asserted, this method returns C. |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
=cut |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
sub match_semantics { |
268
|
96
|
|
|
96
|
1
|
248
|
my ( $self ) = @_; |
269
|
96
|
|
33
|
|
|
284
|
$self->{modifiers} ||= $self->_decode(); |
270
|
96
|
|
|
|
|
238
|
return $self->{modifiers}{ MODIFIER_GROUP_MATCH_SEMANTICS() }; |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
=head2 modifiers |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
my %mods = $token->modifiers(); |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
Returns all modifiers asserted or negated by this token, and the values |
278
|
|
|
|
|
|
|
set (true for asserted, false for negated). If called in scalar context, |
279
|
|
|
|
|
|
|
returns a reference to a hash containing the values. |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=cut |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
sub modifiers { |
284
|
591
|
|
|
591
|
1
|
1411
|
my ( $self ) = @_; |
285
|
591
|
|
66
|
|
|
3576
|
$self->{modifiers} ||= $self->_decode(); |
286
|
591
|
|
|
|
|
970
|
my %mods = %{ $self->{modifiers} }; |
|
591
|
|
|
|
|
2039
|
|
287
|
591
|
|
|
|
|
2040
|
foreach my $bin ( keys %de_aggregate ) { |
288
|
591
|
100
|
|
|
|
2252
|
defined ( my $val = delete $mods{$bin} ) |
289
|
|
|
|
|
|
|
or next; |
290
|
30
|
|
|
|
|
99
|
$mods{$bin} = $val; |
291
|
|
|
|
|
|
|
} |
292
|
591
|
50
|
|
|
|
3791
|
return wantarray ? %mods : \%mods; |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
=head2 negates |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
$token->negates( 'i' ) and print "token negates i\n"; |
298
|
|
|
|
|
|
|
foreach ( $token->negates() ) { print "token negates $_\n" } |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
This method returns true if the token explicitly negates the given |
301
|
|
|
|
|
|
|
modifier. The example would return true for the modifier in |
302
|
|
|
|
|
|
|
C<(?-i:foo)>, but false for C<(?i:foo)>. |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
If called without an argument, or with an undef argument, all modifiers |
305
|
|
|
|
|
|
|
explicitly negated by this token are returned. |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=cut |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
sub negates { |
310
|
5
|
|
|
5
|
1
|
13
|
my ( $self, $modifier ) = @_; |
311
|
5
|
|
33
|
|
|
14
|
$self->{modifiers} ||= $self->_decode(); |
312
|
|
|
|
|
|
|
# Note that since the values of hash entries that represent |
313
|
|
|
|
|
|
|
# aggregated modifiers will never be false (at least, not unless '0' |
314
|
|
|
|
|
|
|
# becomes a modifier) we need no special logic to handle them. |
315
|
|
|
|
|
|
|
defined $modifier |
316
|
0
|
|
|
|
|
0
|
or return ( sort grep { ! $self->{modifiers}{$_} } |
317
|
5
|
50
|
|
|
|
16
|
keys %{ $self->{modifiers} } ); |
|
0
|
|
|
|
|
0
|
|
318
|
|
|
|
|
|
|
return exists $self->{modifiers}{$modifier} |
319
|
5
|
|
66
|
|
|
55
|
&& ! $self->{modifiers}{$modifier}; |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
sub perl_version_introduced { |
323
|
132
|
|
|
132
|
1
|
9900
|
my ( $self ) = @_; |
324
|
|
|
|
|
|
|
return ( $self->{perl_version_introduced} ||= |
325
|
132
|
|
66
|
|
|
706
|
$self->_perl_version_introduced() ); |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
sub _perl_version_introduced { |
329
|
93
|
|
|
93
|
|
222
|
my ( $self ) = @_; |
330
|
93
|
|
|
|
|
274
|
my $content = $self->content(); |
331
|
93
|
|
|
|
|
343
|
my $is_statement_modifier = ( $content !~ m/ \A [(]? [?] /smx ); |
332
|
93
|
|
|
|
|
577
|
my $match_semantics = $self->match_semantics(); |
333
|
|
|
|
|
|
|
|
334
|
93
|
100
|
|
|
|
459
|
$self->asserts( 'xx' ) |
335
|
|
|
|
|
|
|
and return '5.025009'; |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
# Disabling capture with /n was introduced in 5.21.8 |
338
|
92
|
100
|
|
|
|
327
|
$self->asserts( 'n' ) |
339
|
|
|
|
|
|
|
and return '5.021008'; |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
# Match semantics modifiers became available as regular expression |
342
|
|
|
|
|
|
|
# modifiers in 5.13.10. |
343
|
91
|
100
|
100
|
|
|
389
|
defined $match_semantics |
344
|
|
|
|
|
|
|
and $is_statement_modifier |
345
|
|
|
|
|
|
|
and return '5.013010'; |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
# /aa was introduced in 5.13.10. |
348
|
85
|
100
|
100
|
|
|
334
|
defined $match_semantics |
349
|
|
|
|
|
|
|
and 'aa' eq $match_semantics |
350
|
|
|
|
|
|
|
and return '5.013010'; |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
# /a was introduced in 5.13.9, but only in (?...), not as modifier |
353
|
|
|
|
|
|
|
# of the entire regular expression. |
354
|
84
|
100
|
66
|
|
|
277
|
defined $match_semantics |
|
|
|
100
|
|
|
|
|
355
|
|
|
|
|
|
|
and not $is_statement_modifier |
356
|
|
|
|
|
|
|
and 'a' eq $match_semantics |
357
|
|
|
|
|
|
|
and return '5.013009'; |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
# /d, /l, and /u were introduced in 5.13.6, but only in (?...), not |
360
|
|
|
|
|
|
|
# as modifiers of the entire regular expression. |
361
|
83
|
100
|
66
|
|
|
316
|
defined $match_semantics |
362
|
|
|
|
|
|
|
and not $is_statement_modifier |
363
|
|
|
|
|
|
|
and return '5.013006'; |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
# The '^' reassert-defaults modifier in embedded modifiers was |
366
|
|
|
|
|
|
|
# introduced in 5.13.6. |
367
|
73
|
50
|
66
|
|
|
222
|
not $is_statement_modifier |
368
|
|
|
|
|
|
|
and $content =~ m/ \^ /smx |
369
|
|
|
|
|
|
|
and return '5.013006'; |
370
|
|
|
|
|
|
|
|
371
|
73
|
100
|
|
|
|
180
|
$self->asserts( 'r' ) and return '5.013002'; |
372
|
70
|
100
|
|
|
|
257
|
$self->asserts( 'p' ) and return '5.009005'; |
373
|
68
|
100
|
|
|
|
297
|
$self->content() =~ m/ \A [(]? [?] .* - /smx |
374
|
|
|
|
|
|
|
and return '5.005'; |
375
|
66
|
100
|
|
|
|
186
|
$self->asserts( 'c' ) and return '5.004'; |
376
|
65
|
|
|
|
|
328
|
return MINIMUM_PERL; |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
# Return true if the token can be quantified, and false otherwise |
380
|
|
|
|
|
|
|
# sub can_be_quantified { return }; |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
# $present => __aggregate_modifiers( 'modifiers', ... ); |
383
|
|
|
|
|
|
|
# |
384
|
|
|
|
|
|
|
# This subroutine is private to the PPIx::Regexp package. It may change |
385
|
|
|
|
|
|
|
# or be retracted without notice. Its purpose is to support defaulted |
386
|
|
|
|
|
|
|
# modifiers. |
387
|
|
|
|
|
|
|
# |
388
|
|
|
|
|
|
|
# Aggregate the given modifiers left-to-right, returning a hash of those |
389
|
|
|
|
|
|
|
# present and their values. |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
sub __aggregate_modifiers { |
392
|
1110
|
|
|
1110
|
|
2425
|
my ( @mods ) = @_; |
393
|
1110
|
|
|
|
|
1871
|
my %present; |
394
|
1110
|
|
|
|
|
2336
|
foreach my $content ( @mods ) { |
395
|
1118
|
|
|
|
|
4300
|
$content =~ s{ \A $capture_group_leader+ }{}smxg; # no /o! |
396
|
1118
|
100
|
|
|
|
3291
|
if ( $content =~ m/ \A \^ /smx ) { |
397
|
8
|
|
|
|
|
65
|
@present{ MODIFIER_GROUP_MATCH_SEMANTICS(), qw{ i s m x } } |
398
|
|
|
|
|
|
|
= qw{ d 0 0 0 0 }; |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
# Have to do the global match rather than a split, because the |
402
|
|
|
|
|
|
|
# expression modifiers come through here too, and we need to |
403
|
|
|
|
|
|
|
# distinguish between s/.../.../e and s/.../.../ee. But the |
404
|
|
|
|
|
|
|
# modifiers can be randomized (that is, /eie is the same as |
405
|
|
|
|
|
|
|
# /eei), so we reorder the content first. |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
# The following line is WRONG because it ignores the |
408
|
|
|
|
|
|
|
# significance of '-'. This bug was introduced in version 0.035, |
409
|
|
|
|
|
|
|
# specifically by the change that handled multi-character |
410
|
|
|
|
|
|
|
# modifiers. |
411
|
|
|
|
|
|
|
# $content = join '', sort split qr{}smx, $content; |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
# The following is better because it re-orders the modifiers |
414
|
|
|
|
|
|
|
# separately. It does not recognize multiple dashes as |
415
|
|
|
|
|
|
|
# representing an error (though it could!), and modifiers that |
416
|
|
|
|
|
|
|
# are both asserted and negated (e.g. '(?i-i:foo)') are simply |
417
|
|
|
|
|
|
|
# considered to be negated (as Perl does as of 5.20.0). |
418
|
|
|
|
|
|
|
$content = join '-', |
419
|
1118
|
|
|
|
|
6481
|
map { join '', sort split qr{}smx } |
|
247
|
|
|
|
|
2423
|
|
420
|
|
|
|
|
|
|
split qr{-}smx, $content; |
421
|
1118
|
|
|
|
|
2635
|
my $value = 1; |
422
|
1118
|
|
|
|
|
4507
|
while ( $content =~ m/ ( ( [[:alpha:]-] ) \2* ) /smxg ) { |
423
|
412
|
100
|
|
|
|
1697
|
if ( '-' eq $1 ) { |
|
|
100
|
|
|
|
|
|
424
|
11
|
|
|
|
|
63
|
$value = 0; |
425
|
|
|
|
|
|
|
} elsif ( my $bin = $aggregate{$1} ) { |
426
|
|
|
|
|
|
|
# Yes, technically the match semantics stuff can't be |
427
|
|
|
|
|
|
|
# negated in a regex. But it can in a 'use re', which |
428
|
|
|
|
|
|
|
# also comes through here, so we have to handle it. |
429
|
26
|
100
|
|
|
|
148
|
$present{$bin} = $value ? $1 : undef; |
430
|
|
|
|
|
|
|
} else { |
431
|
|
|
|
|
|
|
# TODO have to think about this, since I need asserts( |
432
|
|
|
|
|
|
|
# 'e' ) to be 2 if we in fact have 'ee'. Is this |
433
|
|
|
|
|
|
|
# correct? |
434
|
|
|
|
|
|
|
# $present{$1} = $value; |
435
|
375
|
|
|
|
|
2036
|
$present{$2} = $value * length $1; |
436
|
|
|
|
|
|
|
} |
437
|
|
|
|
|
|
|
} |
438
|
|
|
|
|
|
|
} |
439
|
1110
|
|
|
|
|
3616
|
return \%present; |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
# This must be implemented by tokens which do not recognize themselves. |
443
|
|
|
|
|
|
|
# The return is a list of list references. Each list reference must |
444
|
|
|
|
|
|
|
# contain a regular expression that recognizes the token, and optionally |
445
|
|
|
|
|
|
|
# a reference to a hash to pass to make_token as the class-specific |
446
|
|
|
|
|
|
|
# arguments. The regular expression MUST be anchored to the beginning of |
447
|
|
|
|
|
|
|
# the string. |
448
|
|
|
|
|
|
|
sub __PPIX_TOKEN__recognize { |
449
|
|
|
|
|
|
|
return ( |
450
|
9
|
|
|
9
|
|
51
|
[ qr{ \A [(] [?] [[:lower:]]* -? [[:lower:]]* [)] }smx ], |
451
|
|
|
|
|
|
|
[ qr{ \A [(] [?] \^ [[:lower:]]* [)] }smx ], |
452
|
|
|
|
|
|
|
); |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
{ |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
# Called by the tokenizer to modify the current modifiers with a new |
458
|
|
|
|
|
|
|
# set. Both are passed as hash references, and a reference to the |
459
|
|
|
|
|
|
|
# new hash is returned. |
460
|
|
|
|
|
|
|
sub __PPIX_TOKENIZER__modifier_modify { |
461
|
592
|
|
|
592
|
|
1432
|
my ( @args ) = @_; |
462
|
|
|
|
|
|
|
|
463
|
592
|
|
|
|
|
1040
|
my %merged; |
464
|
592
|
|
|
|
|
1276
|
foreach my $hash ( @args ) { |
465
|
1184
|
|
|
|
|
1982
|
while ( my ( $key, $val ) = each %{ $hash } ) { |
|
1469
|
|
|
|
|
4953
|
|
466
|
285
|
100
|
|
|
|
630
|
if ( $val ) { |
467
|
242
|
|
|
|
|
641
|
$merged{$key} = $val; |
468
|
|
|
|
|
|
|
} else { |
469
|
43
|
|
|
|
|
140
|
delete $merged{$key}; |
470
|
|
|
|
|
|
|
} |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
} |
473
|
|
|
|
|
|
|
|
474
|
592
|
|
|
|
|
2365
|
return \%merged; |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
} |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
# Decode modifiers from the content of the token. |
479
|
|
|
|
|
|
|
sub _decode { |
480
|
586
|
|
|
586
|
|
1326
|
my ( $self ) = @_; |
481
|
586
|
|
|
|
|
2086
|
my $mod = __aggregate_modifiers( $self->content() ); |
482
|
|
|
|
|
|
|
$self->{__caret_undoes_n} |
483
|
586
|
100
|
|
|
|
1699
|
and $mod->{n} = 0; |
484
|
586
|
|
|
|
|
1835
|
return $mod; |
485
|
|
|
|
|
|
|
} |
486
|
|
|
|
|
|
|
} |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
1; |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
__END__ |