line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Regexp::Genex; |
2
|
1
|
|
|
1
|
|
32126
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
44
|
|
3
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
27
|
|
4
|
1
|
|
|
1
|
|
6
|
use Carp (); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
240
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
our $VERSION = '0.07'; |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
our $MAX_QUANTIFIER = 20; |
9
|
|
|
|
|
|
|
our $rx; |
10
|
|
|
|
|
|
|
our $in = ''; |
11
|
|
|
|
|
|
|
our @stack = { |
12
|
|
|
|
|
|
|
dot_nl => 0, # /s modifier |
13
|
|
|
|
|
|
|
multiline => 0,# /m modifier |
14
|
|
|
|
|
|
|
anycase => 0, # /i modifier |
15
|
|
|
|
|
|
|
}; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
package Regexp::Genex::Element; |
19
|
1
|
|
|
1
|
|
21
|
use List::Util qw(shuffle); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
2517
|
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
my $top = -1; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# global status |
24
|
|
|
|
|
|
|
sub anycase { |
25
|
124
|
100
|
|
124
|
|
584
|
return $stack[$top]{anycase} unless defined $_[1]; |
26
|
38
|
|
|
|
|
73
|
$stack[$top]{anycase} = $_[1]; |
27
|
|
|
|
|
|
|
} |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
sub dot_nl { |
30
|
77
|
100
|
|
77
|
|
251
|
return $stack[$top]{dot_nl} unless defined $_[1]; |
31
|
38
|
|
|
|
|
71
|
$stack[$top]{dot_nl} = $_[1]; |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub multiline { |
35
|
77
|
100
|
|
77
|
|
398
|
return $stack[$top]{multiline} unless defined $_[1]; |
36
|
38
|
|
|
|
|
85
|
$stack[$top]{multiline} = $_[1]; |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
sub adjust_mods { |
40
|
38
|
|
|
38
|
|
75
|
my ($self, $on, $off) = @_; |
41
|
38
|
100
|
|
|
|
126
|
$self->anycase(1) if $on =~ /i/; |
42
|
38
|
100
|
|
|
|
113
|
$self->anycase(0) if $off =~ /i/; |
43
|
38
|
50
|
|
|
|
73
|
$self->dot_nl(1) if $on =~ /s/; |
44
|
38
|
50
|
|
|
|
134
|
$self->dot_nl(0) if $off =~ /s/; |
45
|
38
|
50
|
|
|
|
80
|
$self->multiline(1) if $on =~ /m/; |
46
|
38
|
50
|
|
|
|
134
|
$self->multiline(0) if $off =~ /m/; |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
sub push_state { |
50
|
39
|
|
|
39
|
|
243
|
my ($self) = shift; |
51
|
39
|
|
|
|
|
107
|
push @stack, { |
52
|
|
|
|
|
|
|
# current state overwriten by new state |
53
|
|
|
|
|
|
|
anycase => $self->anycase, |
54
|
|
|
|
|
|
|
dot_nl => $self->dot_nl, |
55
|
|
|
|
|
|
|
multiline => $self->multiline, |
56
|
|
|
|
|
|
|
quant => $stack[$top]{quant}, |
57
|
|
|
|
|
|
|
@_, # new state |
58
|
|
|
|
|
|
|
}; |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
sub pop_state { |
61
|
39
|
|
|
39
|
|
58
|
my ($self) = @_; |
62
|
39
|
50
|
|
|
|
114
|
pop @stack or Carp::confess "Pop without a push"; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub add { |
66
|
246
|
|
|
246
|
|
604
|
my ($self, $code, $comment) = @_; |
67
|
|
|
|
|
|
|
|
68
|
246
|
|
|
|
|
418
|
$code = $in.$code; |
69
|
246
|
100
|
|
|
|
568
|
if((my $len = length($code)) < 40) { |
70
|
|
|
|
|
|
|
# comment after code at col 40 |
71
|
242
|
|
|
|
|
279
|
$rx .= $code; |
72
|
242
|
50
|
|
|
|
388
|
if(defined $comment) { |
73
|
242
|
|
|
|
|
773
|
$rx .= (' 'x(40-$len))."## $comment\n"; |
74
|
|
|
|
|
|
|
} else { |
75
|
0
|
|
|
|
|
0
|
$rx .= "\n"; |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
} else { |
78
|
|
|
|
|
|
|
# comment on line before code |
79
|
4
|
50
|
|
|
|
18
|
$rx .= "\n".(' 'x40)."## $comment (below)\n" if defined $comment; |
80
|
4
|
|
|
|
|
12
|
$rx .= "$code\n\n"; |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub safe_quant { |
85
|
92
|
|
|
92
|
|
143
|
my ($self, $quant) = @_; |
86
|
|
|
|
|
|
|
# dodge perl's optimizations |
87
|
92
|
|
|
|
|
111
|
my $nq = $quant; |
88
|
|
|
|
|
|
|
#$nq =~ s/\*/{0,$MAX_QUANTIFIER}/; |
89
|
|
|
|
|
|
|
#$nq =~ s/\+/{1,$MAX_QUANTIFIER}/; |
90
|
92
|
|
|
|
|
201
|
return $nq; |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
sub case_mod { |
94
|
|
|
|
|
|
|
# i modifier in effect, use \u \L etc to muck with string at rx creation |
95
|
47
|
100
|
|
47
|
|
121
|
return (!$_[0]->anycase) ? '' : ( "", qw(\U \L \u \l) )[rand 5]; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
# $; = undef ??? |
99
|
|
|
|
|
|
|
# keys = all characters, values = quoted string equivalent |
100
|
|
|
|
|
|
|
# (String::Escape \80 != perl \x80) |
101
|
|
|
|
|
|
|
my %all_chars = map { chr($_), '"'.escape(chr($_)).'"' } 0..255; |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# regex to pick random |
104
|
|
|
|
|
|
|
# x "string" =~ /(?=(?>^.*(?{$n=int rand$+[0]})))(??{".{$n}"})(.)/s |
105
|
|
|
|
|
|
|
sub class_chars { |
106
|
5
|
|
|
5
|
|
17
|
my ($self, $qr_class) = @_; |
107
|
|
|
|
|
|
|
|
108
|
7
|
|
|
|
|
31
|
my @chars = map { $all_chars{$_} } |
|
1280
|
|
|
|
|
3604
|
|
109
|
5
|
|
|
|
|
304
|
grep { $_ =~ $qr_class } |
110
|
|
|
|
|
|
|
keys %all_chars; |
111
|
|
|
|
|
|
|
|
112
|
5
|
50
|
|
|
|
82
|
if(@chars > 10) { # too big, sample |
113
|
0
|
|
|
|
|
0
|
@chars = shuffle(@chars); |
114
|
|
|
|
|
|
|
# XXX can't produce anything possible for regex .{$n+1} exhausts range |
115
|
0
|
|
|
|
|
0
|
$#chars = 4; |
116
|
|
|
|
|
|
|
# could put %all_chars generation in regex and do \d filter |
117
|
|
|
|
|
|
|
} |
118
|
5
|
|
|
|
|
26
|
return scalar(@chars), @chars; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
sub escape { |
122
|
335
|
|
|
335
|
|
522
|
local($_) = shift; |
123
|
335
|
|
|
|
|
552
|
s/([\\{}"@\$])/\\$1/g; # protect " string interpolation & {} regex parse |
124
|
335
|
|
|
|
|
636
|
s/([^[:graph:] ])/sprintf "\\%03o", ord($1)/eg; |
|
161
|
|
|
|
|
1399
|
|
125
|
|
|
|
|
|
|
#s/(.*)/"$1"/s; |
126
|
335
|
|
|
|
|
2777
|
return $_; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
#use String::Escape qw(qprintable); |
129
|
|
|
|
|
|
|
#print qprintable($_)," = ",escape($_),"\n" |
130
|
|
|
|
|
|
|
# for grep { $_ ne eval escape($_) } map chr, 0..255; |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
package Regexp::Genex::flags; |
133
|
|
|
|
|
|
|
sub new { |
134
|
0
|
|
|
0
|
|
0
|
my $self = $_[0]->SUPER::new(@_[1..$#_]); |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
#my ($on, $off) = @_[1,2]; |
137
|
|
|
|
|
|
|
# ignore x, always on for us |
138
|
|
|
|
|
|
|
# off overrides: perl -le 'print "A" =~ /(?i-i)a/' |
139
|
0
|
|
|
|
|
0
|
$self->adjust_mods(@_[1,2]); |
140
|
|
|
|
|
|
|
|
141
|
0
|
|
|
|
|
0
|
$self->add('',$self->string); |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
package Regexp::Genex::group; |
145
|
|
|
|
|
|
|
sub new { |
146
|
38
|
|
|
38
|
|
3657
|
my $self = $_[0]->SUPER::new(@_[1..$#_]); |
147
|
|
|
|
|
|
|
|
148
|
38
|
|
|
|
|
779
|
$self->push_state(quant => $self->quant); |
149
|
|
|
|
|
|
|
# modify new state |
150
|
38
|
|
|
|
|
123
|
$self->adjust_mods(@_[1,2]); |
151
|
|
|
|
|
|
|
|
152
|
38
|
|
|
|
|
122
|
$self->add("(?:", $self->string); |
153
|
38
|
|
|
|
|
53
|
$in .= ' '; # ->add_indent |
154
|
38
|
|
|
|
|
96
|
return $self; |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
package Regexp::Genex::capture; |
158
|
|
|
|
|
|
|
my $number = 0; |
159
|
|
|
|
|
|
|
sub new { |
160
|
1
|
|
|
1
|
|
99
|
my $self = $_[0]->SUPER::new(@_[1..$#_]); |
161
|
1
|
|
|
|
|
11
|
$number++; |
162
|
1
|
|
|
|
|
8
|
$self->push_state(quant => $self->quant); |
163
|
|
|
|
|
|
|
|
164
|
1
|
|
|
|
|
11
|
$self->add("(","( -> \$$number"); |
165
|
1
|
|
|
|
|
2
|
$in .= ' '; |
166
|
1
|
|
|
|
|
4
|
return $self; |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
package Regexp::Genex::close; |
170
|
|
|
|
|
|
|
# group, capture, perl code bit |
171
|
|
|
|
|
|
|
# Pcond Pcut Pahead Pbehind Pgroup Pcapture Pcode Plater |
172
|
|
|
|
|
|
|
sub new { |
173
|
39
|
|
|
39
|
|
4446
|
my $self = $_[0]->SUPER::new(@_[1..$#_]); |
174
|
39
|
|
|
|
|
310
|
chop($in); |
175
|
1
|
|
|
1
|
|
9
|
no warnings 'uninitialized'; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
2158
|
|
176
|
39
|
|
|
|
|
74
|
my $q = "$_[1]$_[2]"; |
177
|
39
|
|
|
|
|
95
|
my $nq = $self->safe_quant($q); |
178
|
39
|
|
|
|
|
144
|
$self->add(")$nq",")$q"); |
179
|
39
|
|
|
|
|
100
|
$self->pop_state; |
180
|
39
|
|
|
|
|
151
|
return $self; |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
package Regexp::Genex::alt; |
184
|
|
|
|
|
|
|
sub new { |
185
|
1
|
|
|
1
|
|
84
|
my $self = $_[0]->SUPER::new(@_[1..$#_]); |
186
|
1
|
|
|
|
|
14
|
$self->add('|','|'); |
187
|
1
|
|
|
|
|
2
|
return $self; |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
package Regexp::Genex::backref; |
191
|
|
|
|
|
|
|
# perl -W -MRegexp::Genex -e 'Regexp::Genex::rx(qr/(.)=\1{0,2}/)' |
192
|
|
|
|
|
|
|
sub new { |
193
|
1
|
|
|
1
|
|
103
|
my $self = $_[0]->SUPER::new(@_[1..$#_]); |
194
|
1
|
|
|
|
|
12
|
my $var = $_[1]; |
195
|
1
|
|
|
|
|
7
|
my $q = $self->quant; |
196
|
1
|
|
|
|
|
19
|
my $nq = $self->safe_quant($q); |
197
|
1
|
|
|
|
|
8
|
my $text = $self->text; |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
# the offsets are to the target string but we take that section of $^R |
200
|
1
|
|
|
|
|
15
|
$self->add( |
201
|
|
|
|
|
|
|
'(?: .{1} (?{ $^R.substr($^R,$-[1],$+[1]-$-[1]) }) )'.$nq, $text.$q |
202
|
|
|
|
|
|
|
); |
203
|
1
|
|
|
|
|
3
|
return $self; |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
package Regexp::Genex::text; |
207
|
|
|
|
|
|
|
sub new { |
208
|
41
|
|
|
41
|
|
4505
|
my $self = $_[0]->SUPER::new(@_[1..$#_]); |
209
|
41
|
|
|
|
|
430
|
my $text = $self->text; |
210
|
41
|
|
|
|
|
269
|
my $q = $self->quant; |
211
|
41
|
|
|
|
|
215
|
my $nq = $self->safe_quant($q); |
212
|
41
|
|
|
|
|
94
|
my $case_mod = $self->case_mod; |
213
|
|
|
|
|
|
|
|
214
|
41
|
|
|
|
|
75
|
my $len = length($text); |
215
|
41
|
|
|
|
|
81
|
$text = Regexp::Genex::Element::escape($text); |
216
|
41
|
|
|
|
|
177
|
$self->add("(?: .{$len} (?{ \$^R.\"$case_mod$text\" }) )$nq", $text.$q); |
217
|
41
|
|
|
|
|
146
|
return $self; |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
package Regexp::Genex::oct; |
221
|
|
|
|
|
|
|
sub new { |
222
|
1
|
|
|
1
|
|
102
|
my $self = $_[0]->SUPER::new(@_[1..$#_]); |
223
|
1
|
|
|
|
|
16
|
my $text = $self->text; |
224
|
1
|
|
|
|
|
21
|
my $q = $self->quant; |
225
|
1
|
|
|
|
|
15
|
my $nq = $self->safe_quant($q); |
226
|
1
|
|
|
|
|
10
|
my $case_mod = $self->case_mod; |
227
|
|
|
|
|
|
|
|
228
|
1
|
|
|
|
|
13
|
$self->add("(?: . (?{ \$^R.\"$case_mod$text\" }) )$nq", $text.$q); |
229
|
1
|
|
|
|
|
4
|
return $self; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
package Regexp::Genex::hex; |
233
|
|
|
|
|
|
|
sub new { |
234
|
1
|
|
|
1
|
|
128
|
my $self = $_[0]->SUPER::new(@_[1..$#_]); |
235
|
1
|
|
|
|
|
17
|
my $text = $self->text; |
236
|
1
|
|
|
|
|
20
|
my $q = $self->quant; |
237
|
1
|
|
|
|
|
12
|
my $nq = $self->safe_quant($q); |
238
|
1
|
|
|
|
|
7
|
my $case_mod = $self->case_mod; |
239
|
|
|
|
|
|
|
|
240
|
1
|
|
|
|
|
13
|
$self->add("(?: . (?{ \$^R.\"$case_mod$text\" }) )$nq", $text.$q); |
241
|
1
|
|
|
|
|
4
|
return $self; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
package Regexp::Genex::utf8hex; |
245
|
|
|
|
|
|
|
sub new { |
246
|
0
|
|
|
0
|
|
0
|
my $self = $_[0]->SUPER::new(@_[1..$#_]); |
247
|
0
|
|
|
|
|
0
|
my $text = $self->text; |
248
|
0
|
|
|
|
|
0
|
my $q = $self->quant; |
249
|
0
|
|
|
|
|
0
|
my $nq = $self->safe_quant($q); |
250
|
0
|
|
|
|
|
0
|
my $case_mod = $self->case_mod; |
251
|
|
|
|
|
|
|
|
252
|
0
|
|
|
|
|
0
|
$self->add("(?: . (?{ \$^R.\"$case_mod$text\" }) )$nq", $text.$q); |
253
|
0
|
|
|
|
|
0
|
return $self; |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
package Regexp::Genex::ctrl; |
257
|
|
|
|
|
|
|
sub new { |
258
|
0
|
|
|
0
|
|
0
|
my $self = $_[0]->SUPER::new(@_[1..$#_]); |
259
|
0
|
|
|
|
|
0
|
my $text = $self->text; |
260
|
0
|
|
|
|
|
0
|
my $q = $self->quant; |
261
|
0
|
|
|
|
|
0
|
my $nq = $self->safe_quant($q); |
262
|
0
|
|
|
|
|
0
|
my $case_mod = $self->case_mod; |
263
|
|
|
|
|
|
|
|
264
|
0
|
|
|
|
|
0
|
$self->add("(?: . (?{ \$^R.\"$case_mod$text\" }) )$nq", $text.$q); |
265
|
0
|
|
|
|
|
0
|
return $self; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
package Regexp::Genex::named; |
269
|
|
|
|
|
|
|
sub new { |
270
|
0
|
|
|
0
|
|
0
|
my $self = $_[0]->SUPER::new(@_[1..$#_]); |
271
|
0
|
|
|
|
|
0
|
my $text = $self->text; |
272
|
0
|
|
|
|
|
0
|
my $q = $self->quant; |
273
|
0
|
|
|
|
|
0
|
my $nq = $self->safe_quant($q); |
274
|
0
|
|
|
|
|
0
|
my $case_mod = $self->case_mod; |
275
|
|
|
|
|
|
|
|
276
|
0
|
|
|
|
|
0
|
$self->add("(?: . (?{ \$^R.\"$case_mod$text\" }) )$nq", $text.$q); |
277
|
0
|
|
|
|
|
0
|
return $self; |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
package Regexp::Genex::Cchar; |
281
|
|
|
|
|
|
|
sub new { |
282
|
0
|
|
|
0
|
|
0
|
my $self = $_[0]->SUPER::new(@_[1..$#_]); |
283
|
0
|
|
|
|
|
0
|
my $text = $self->text; |
284
|
0
|
|
|
|
|
0
|
my $q = $self->quant; |
285
|
0
|
|
|
|
|
0
|
my $nq = $self->safe_quant($q); |
286
|
0
|
|
|
|
|
0
|
my $case_mod = $self->case_mod; |
287
|
|
|
|
|
|
|
|
288
|
0
|
|
|
|
|
0
|
$self->add("(?: . (?{ \$^R.\"$case_mod$text\" }) )$nq", $text.$q); |
289
|
0
|
|
|
|
|
0
|
return $self; |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
package Regexp::Genex::slash; |
293
|
|
|
|
|
|
|
sub new { |
294
|
4
|
|
|
4
|
|
345
|
my $self = $_[0]->SUPER::new(@_[1..$#_]); |
295
|
4
|
|
|
|
|
48
|
my $text = $self->text; |
296
|
4
|
|
|
|
|
36
|
my $q = $self->quant; |
297
|
4
|
|
|
|
|
28
|
my $nq = $self->safe_quant($q); |
298
|
4
|
|
|
|
|
14
|
my $case_mod = $self->case_mod; |
299
|
|
|
|
|
|
|
|
300
|
4
|
|
|
|
|
23
|
$self->add("(?: . (?{ \$^R.\"$case_mod$text\" }) )$nq", $text.$q); |
301
|
4
|
|
|
|
|
12
|
return $self; |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
package Regexp::Genex::any; |
305
|
|
|
|
|
|
|
sub new { |
306
|
0
|
|
|
0
|
|
0
|
my $self = $_[0]->SUPER::new(@_[1..$#_]); |
307
|
0
|
|
|
|
|
0
|
my $q = $self->quant; |
308
|
0
|
|
|
|
|
0
|
my $nq = $self->safe_quant($q); |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
#my ($nl, $n) = ('', 3); |
311
|
|
|
|
|
|
|
#($nl, $n) = (',"\n"', 4) if($self->dot_nl); |
312
|
0
|
0
|
|
|
|
0
|
my ($n, @chars) = ($self->dot_nl) |
313
|
|
|
|
|
|
|
? $self->class_chars(qr/./s) |
314
|
|
|
|
|
|
|
: $self->class_chars(qr/./); |
315
|
|
|
|
|
|
|
|
316
|
0
|
|
|
|
|
0
|
local($") = ","; |
317
|
0
|
|
|
|
|
0
|
$self->add("(?: . (?{ \$^R.(@chars)[rand $n] }) )$nq", ".$q"); |
318
|
|
|
|
|
|
|
#$self->add("(?: . (?{ \$^R.('.','x','X'$nl)[rand $n] }) )$nq",".$q"); |
319
|
0
|
|
|
|
|
0
|
return $self; |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
package Regexp::Genex::macro; |
323
|
|
|
|
|
|
|
sub new { |
324
|
0
|
|
|
0
|
|
0
|
my $self = $_[0]->SUPER::new(@_[1..$#_]); |
325
|
0
|
|
|
|
|
0
|
my $text = $self->text; |
326
|
0
|
|
|
|
|
0
|
my $q = $self->quant; |
327
|
0
|
|
|
|
|
0
|
my $nq = $self->safe_quant($q); |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
# \d \D \w \W \s \S |
330
|
0
|
|
|
|
|
0
|
my ($n, @chars) = $self->class_chars(qr/$text/); |
331
|
|
|
|
|
|
|
|
332
|
0
|
|
|
|
|
0
|
local($") = ","; |
333
|
0
|
|
|
|
|
0
|
$self->add("(?: . (?{ \$^R.(@chars)[rand $n] }) )$nq", "$text$q"); |
334
|
0
|
|
|
|
|
0
|
return $self; |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
package Regexp::Genex::class; |
338
|
|
|
|
|
|
|
sub new { |
339
|
5
|
|
|
5
|
|
903
|
my $self = $_[0]->SUPER::new(@_[1..$#_]); |
340
|
5
|
|
|
|
|
68
|
my $text = $self->text; |
341
|
5
|
|
|
|
|
71
|
my $q = $self->quant; |
342
|
5
|
|
|
|
|
35
|
my $nq = $self->safe_quant($q); |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
# [^dfads] |
345
|
5
|
|
|
|
|
50
|
my ($n, @chars) = $self->class_chars(qr/$text/); |
346
|
|
|
|
|
|
|
|
347
|
5
|
|
|
|
|
17
|
local($") = ","; |
348
|
5
|
|
|
|
|
42
|
$self->add("(?: . (?{ \$^R.(@chars)[rand $n] }) )$nq", "$text$q"); |
349
|
5
|
|
|
|
|
23
|
return $self; |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
# TODO |
353
|
|
|
|
|
|
|
package Regexp::Genex::anchor; |
354
|
|
|
|
|
|
|
# $ is a lookahead \n|\z |
355
|
|
|
|
|
|
|
# \A \z \Z ^ $ \G |
356
|
|
|
|
|
|
|
# ^ $ are /s sensitive (multiline) |
357
|
|
|
|
|
|
|
sub new { |
358
|
0
|
|
|
0
|
|
0
|
Carp::croak("Genex: Anchors not implemented ^ \$ \\A \\Z \\z \\G\n"); |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
package Regexp::Genex::lookahead; |
361
|
|
|
|
|
|
|
# could run look ahead code at the end and check the output... |
362
|
|
|
|
|
|
|
# might be no match possible with random string selections |
363
|
|
|
|
|
|
|
sub new { |
364
|
0
|
|
|
0
|
|
0
|
Carp::croak("Genex: Look-ahead not implemented (?=...) (?!...)\n"); |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
package Regexp::Genex::lookbehind; |
367
|
|
|
|
|
|
|
# can look behind! match against the string we have made or fail |
368
|
|
|
|
|
|
|
sub new { |
369
|
0
|
|
|
0
|
|
0
|
Carp::croak("Genex: Look-behind not implemented (?<=...) (?
|
370
|
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
package Regexp::Genex::cond; |
373
|
|
|
|
|
|
|
# probably ok, except for the close complications |
374
|
|
|
|
|
|
|
# (?(1) ... ) should test our $1 |
375
|
|
|
|
|
|
|
sub new { |
376
|
0
|
|
|
0
|
|
0
|
Carp::croak("Genex: Cut not implemented (?(...)...|...)\n"); |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
package Regexp::Genex::cut; |
380
|
|
|
|
|
|
|
# probably ok, except for the close complications |
381
|
|
|
|
|
|
|
sub new { |
382
|
0
|
|
|
0
|
|
0
|
Carp::croak("Genex: Cut not implemented (?>...)\n"); |
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
package Regexp::Genex::code; |
386
|
|
|
|
|
|
|
# trashes $^R (stash it somewhere else locally) |
387
|
|
|
|
|
|
|
# could use condition to avoid $^R trashing (?( (?{...}) ) ) |
388
|
|
|
|
|
|
|
sub new { |
389
|
0
|
|
|
0
|
|
0
|
Carp::croak("Genex: Code assertion not implemented (?{...})\n"); |
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
package Regexp::Genex::later; # (??{}) |
393
|
|
|
|
|
|
|
# probably ok, except for the close complications |
394
|
|
|
|
|
|
|
# may need original modifier state (esp. /x) |
395
|
|
|
|
|
|
|
sub new { |
396
|
0
|
|
|
0
|
|
0
|
Carp::croak("Genex: Delayed regex not implemented (??{...})\n"); |
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
#sub new { |
400
|
|
|
|
|
|
|
# Carp::carp("Delayed regex not handled (??{...})"); |
401
|
|
|
|
|
|
|
# my $self = $_[0]->SUPER::new(@_[1..$#_]); |
402
|
|
|
|
|
|
|
# my $text = $self->text; |
403
|
|
|
|
|
|
|
# my $q = $self->quant; |
404
|
|
|
|
|
|
|
# my $nq = $self->safe_quant($q); |
405
|
|
|
|
|
|
|
# |
406
|
|
|
|
|
|
|
# # HACK needs no_close handling in close->new |
407
|
|
|
|
|
|
|
# push @stack, { |
408
|
|
|
|
|
|
|
# anycase => $self->anycase, dot_nl => $self->dot_nl, |
409
|
|
|
|
|
|
|
# q => $stack[$top]{q}, nq => $stack[$top]{nq}, |
410
|
|
|
|
|
|
|
# no_close => 1, |
411
|
|
|
|
|
|
|
# }; |
412
|
|
|
|
|
|
|
# |
413
|
|
|
|
|
|
|
# $in .= ' '; |
414
|
|
|
|
|
|
|
# |
415
|
|
|
|
|
|
|
# $self->add($text.$nq, $text.$q); |
416
|
|
|
|
|
|
|
# return $self; |
417
|
|
|
|
|
|
|
#} |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
package Regexp::Genex; |
420
|
1
|
|
|
1
|
|
3078
|
use YAPE::Regex 'Regexp::Genex'; |
|
1
|
|
|
|
|
121457
|
|
|
1
|
|
|
|
|
11
|
|
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
require Exporter; |
423
|
|
|
|
|
|
|
our @ISA = qw(Exporter YAPE::Regex); |
424
|
|
|
|
|
|
|
our @EXPORT_OK = qw(strings strings_rx generator generator_rx); |
425
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( 'all' => \@EXPORT_OK ); |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
our $DEFAULT_LEN = 10; |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
sub strings { |
430
|
35
|
|
|
35
|
1
|
2309
|
my ($rx_arg, $len) = @_; |
431
|
35
|
|
|
|
|
78
|
my $rx_str = strings_rx($rx_arg); |
432
|
|
|
|
|
|
|
|
433
|
35
|
|
33
|
|
|
157
|
$len ||= $DEFAULT_LEN; |
434
|
|
|
|
|
|
|
|
435
|
1
|
|
|
1
|
|
3600
|
use re 'eval'; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
5866
|
|
436
|
|
|
|
|
|
|
#eval 'use re "debug"'; |
437
|
35
|
|
|
|
|
5651
|
("a"x$len) =~ qr/$rx/x; |
438
|
|
|
|
|
|
|
|
439
|
35
|
|
|
|
|
272
|
return @_; |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
sub _main_rx { |
443
|
38
|
|
|
38
|
|
46
|
my $in_rx = shift; |
444
|
|
|
|
|
|
|
|
445
|
38
|
|
|
|
|
58
|
$rx = ""; |
446
|
38
|
|
|
|
|
80
|
my $orig_rx = Regexp::Genex::Element::escape($in_rx); |
447
|
38
|
|
|
|
|
161
|
Regexp::Genex::Element->add('', "Orignal: $orig_rx"); |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
# The ^ means the target length can limit output |
450
|
38
|
|
|
|
|
128
|
Regexp::Genex::Element->add( |
451
|
|
|
|
|
|
|
'^(?> (?{ @_ = (); "" }) )', 'Initialize $^R & @_'); |
452
|
38
|
|
|
|
|
56
|
$in = ' '; |
453
|
|
|
|
|
|
|
|
454
|
38
|
|
|
|
|
158
|
my $yape = Regexp::Genex->new($in_rx); |
455
|
38
|
50
|
|
|
|
1701
|
$yape->parse; die $yape->error if $yape->error; |
|
38
|
|
|
|
|
1228
|
|
456
|
|
|
|
|
|
|
|
457
|
38
|
|
|
|
|
463
|
$in = ''; |
458
|
|
|
|
|
|
|
# left in $rx |
459
|
|
|
|
|
|
|
} |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
sub strings_rx { |
462
|
36
|
|
|
36
|
1
|
964
|
my $in_rx = shift; |
463
|
|
|
|
|
|
|
|
464
|
36
|
|
|
|
|
71
|
_main_rx($in_rx); |
465
|
|
|
|
|
|
|
|
466
|
36
|
|
|
|
|
104
|
Regexp::Genex::Element->add( |
467
|
|
|
|
|
|
|
'(?{ push @_, $^R }) (?!)', 'Save & backtrack'); |
468
|
|
|
|
|
|
|
|
469
|
36
|
|
|
|
|
96
|
return $rx; |
470
|
|
|
|
|
|
|
} |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
sub generator_rx { |
473
|
2
|
|
|
2
|
1
|
4
|
my $in_rx = shift; |
474
|
|
|
|
|
|
|
|
475
|
2
|
|
|
|
|
7
|
_main_rx($in_rx); |
476
|
|
|
|
|
|
|
|
477
|
2
|
|
|
|
|
8
|
Regexp::Genex::Element->add( |
478
|
|
|
|
|
|
|
'(?(?{ @_ = $^R if $c++ == $n; }) (?=) | (?!) )', |
479
|
|
|
|
|
|
|
'Replay up to $n then stop'); |
480
|
|
|
|
|
|
|
|
481
|
2
|
|
|
|
|
8
|
return $rx; |
482
|
|
|
|
|
|
|
} |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
# perl -MRegexp::Genex=:all -le '$i = generator(qr/ab*?/); print $i->() for 1..4; print $i->(1)' |
485
|
|
|
|
|
|
|
sub generator { |
486
|
1
|
|
|
1
|
1
|
1756
|
my ($rx_arg, $len) = @_; |
487
|
1
|
|
33
|
|
|
10
|
$len ||= $DEFAULT_LEN; |
488
|
1
|
|
|
|
|
5
|
my $rx_str = generator_rx($rx_arg); |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
# These vars are captured both by the closure and the regex |
491
|
1
|
|
|
|
|
3
|
my $n = 0; |
492
|
1
|
|
|
|
|
2
|
my $c; |
493
|
|
|
|
|
|
|
|
494
|
1
|
|
|
1
|
|
15
|
use re 'eval'; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
170
|
|
495
|
|
|
|
|
|
|
#eval "use re 'debug'"; |
496
|
1
|
|
|
|
|
211
|
my $qr = qr/$rx_str/x; |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
return sub { |
499
|
5
|
100
|
|
5
|
|
22
|
$n = shift if defined $_[0]; # reset's with argument |
500
|
|
|
|
|
|
|
|
501
|
5
|
|
|
|
|
7
|
$c = 0; # reset found counter |
502
|
5
|
|
|
|
|
171
|
('a'x$len) =~ $qr; |
503
|
5
|
|
|
|
|
7
|
$n++; # track next to show |
504
|
5
|
|
|
|
|
26
|
return $_[0]; |
505
|
1
|
|
|
|
|
8
|
}; |
506
|
|
|
|
|
|
|
} |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
1; |
509
|
|
|
|
|
|
|
__END__ |