line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Mock::Data::Regex; |
2
|
9
|
|
|
9
|
|
247905
|
use strict; |
|
9
|
|
|
|
|
25
|
|
|
9
|
|
|
|
|
318
|
|
3
|
9
|
|
|
9
|
|
49
|
use warnings; |
|
9
|
|
|
|
|
17
|
|
|
9
|
|
|
|
|
264
|
|
4
|
9
|
|
|
9
|
|
596
|
use Mock::Data::Charset; |
|
9
|
|
|
|
|
19
|
|
|
9
|
|
|
|
|
280
|
|
5
|
9
|
|
|
9
|
|
48
|
use Mock::Data::Util qw( _parse_context _escape_str ); |
|
9
|
|
|
|
|
17
|
|
|
9
|
|
|
|
|
41835
|
|
6
|
|
|
|
|
|
|
require Carp; |
7
|
|
|
|
|
|
|
require Scalar::Util; |
8
|
|
|
|
|
|
|
require List::Util; |
9
|
|
|
|
|
|
|
require Hash::Util; |
10
|
|
|
|
|
|
|
require Mock::Data::Generator; |
11
|
|
|
|
|
|
|
our @ISA= ( 'Mock::Data::Generator' ); |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# ABSTRACT: Generator that uses a Regex as a template to generate strings |
14
|
|
|
|
|
|
|
our $VERSION = '0.01'; # VERSION |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub new { |
18
|
22
|
|
|
22
|
1
|
54487
|
my $class= shift; |
19
|
|
|
|
|
|
|
my %self= @_ == 1 && (!ref $_[0] || ref $_[0] eq 'Regexp')? ( regex => $_[0] ) |
20
|
22
|
50
|
66
|
|
|
348
|
: @_ == 1? %{$_[0]} |
|
0
|
100
|
|
|
|
0
|
|
21
|
|
|
|
|
|
|
: @_; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# If called on an object, carry over some settings |
24
|
22
|
50
|
|
|
|
83
|
if (ref $class) { |
25
|
0
|
|
|
|
|
0
|
%self= ( %$class, %self ); |
26
|
|
|
|
|
|
|
# Make sure we didn't copy a regex without a matching regex_parse_tree, or vice versa |
27
|
0
|
0
|
0
|
|
|
0
|
if ($self{regex} == $class->{regex} xor $self{regex_parse_tree} == $class->{regex_parse_tree}) { |
28
|
0
|
0
|
|
|
|
0
|
delete $self{regex_parse_tree} if $self{regex_parse_tree} == $class->{regex_parse_tree}; |
29
|
0
|
0
|
|
|
|
0
|
delete $self{regex} if $self{regex} == $class->{regex}; |
30
|
|
|
|
|
|
|
} |
31
|
0
|
|
|
|
|
0
|
$class= ref $class; |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
|
34
|
22
|
50
|
|
|
|
91
|
defined $self{regex} or Carp::croak "Attribute 'regex' is required"; |
35
|
22
|
50
|
|
|
|
90
|
$self{regex}= qr/$self{regex}/ unless ref $self{regex} eq 'Regexp'; |
36
|
|
|
|
|
|
|
# Must be parsed eventually, so might as well do it now and see the errors right away |
37
|
22
|
|
33
|
|
|
160
|
$self{regex_parse_tree} ||= $class->parse($self{regex}); |
38
|
22
|
100
|
100
|
|
|
113
|
$self{max_codepoint} //= 0x7F if $self{regex_parse_tree}->flags->{a}; |
39
|
|
|
|
|
|
|
|
40
|
22
|
50
|
0
|
|
|
113
|
$self{prefix} //= Mock::Data::Util::coerce_generator($self{prefix}) if defined $self{prefix}; |
41
|
22
|
50
|
0
|
|
|
78
|
$self{suffix} //= Mock::Data::Util::coerce_generator($self{suffix}) if defined $self{suffix}; |
42
|
|
|
|
|
|
|
|
43
|
22
|
|
|
|
|
69
|
return bless \%self, $class; |
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
|
47
|
0
|
|
|
0
|
1
|
0
|
sub regex { $_[0]{regex} } |
48
|
|
|
|
|
|
|
|
49
|
205
|
|
|
205
|
1
|
512
|
sub regex_parse_tree { $_[0]{regex_parse_tree} } |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
sub min_codepoint { |
53
|
|
|
|
|
|
|
$_[0]{min_codepoint} |
54
|
203
|
|
|
203
|
1
|
506
|
} |
55
|
|
|
|
|
|
|
|
56
|
203
|
|
|
203
|
1
|
624
|
sub max_codepoint { $_[0]{max_codepoint} } |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
|
59
|
205
|
50
|
|
205
|
1
|
989
|
sub max_repetition { $_[0]{max_repetition} || '+8' } |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub prefix { |
63
|
0
|
0
|
|
0
|
1
|
0
|
if (@_ > 1) { |
64
|
0
|
|
|
|
|
0
|
$_[0]{prefix}= Mock::Data::Util::coerce_generator($_[1]); |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
$_[0]{prefix} |
67
|
0
|
|
|
|
|
0
|
} |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
sub suffix { |
70
|
0
|
0
|
|
0
|
1
|
0
|
if (@_ > 1) { |
71
|
0
|
|
|
|
|
0
|
$_[0]{suffix}= Mock::Data::Util::coerce_generator($_[1]); |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
$_[0]{suffix} |
74
|
0
|
|
|
|
|
0
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub generate { |
78
|
205
|
|
|
205
|
1
|
12576
|
my ($self, $mockdata)= (shift,shift); |
79
|
205
|
100
|
|
|
|
577
|
my %opts= ref $_[0] eq 'HASH'? %{$_[0]} : (); |
|
23
|
|
|
|
|
98
|
|
80
|
205
|
|
100
|
|
|
873
|
$opts{max_codepoint} //= $self->max_codepoint; |
81
|
205
|
|
100
|
|
|
851
|
$opts{min_codepoint} //= $self->min_codepoint; |
82
|
205
|
|
33
|
|
|
720
|
$opts{max_repetition} //= $self->max_repetition; |
83
|
205
|
|
|
|
|
600
|
my $out= $self->_str_builder($mockdata, \%opts); |
84
|
|
|
|
|
|
|
$self->regex_parse_tree->generate($out) |
85
|
|
|
|
|
|
|
# is the string allowed to end here? Requirement of '' is generated by $ and \Z |
86
|
205
|
50
|
66
|
|
|
472
|
&& (!$out->next_req || (grep $_ eq '', @{ $out->next_req })) |
|
|
|
66
|
|
|
|
|
87
|
|
|
|
|
|
|
or Carp::croak "Regex assertions could not be met (such as '^' or '\$'). Final attempt was: \""._escape_str($out->str)."\""; |
88
|
205
|
|
66
|
|
|
796
|
my $prefix= $opts{prefix} // $self->{prefix}; |
89
|
205
|
|
66
|
|
|
569
|
my $suffix= $opts{suffix} // $self->{suffix}; |
90
|
205
|
100
|
66
|
|
|
778
|
return $out->str unless defined $prefix || defined $suffix; |
91
|
|
|
|
|
|
|
|
92
|
19
|
|
|
|
|
54
|
my $str= $out->str; |
93
|
|
|
|
|
|
|
# A prefix can only be added if there was not a beginning-of-string assertion, or if |
94
|
|
|
|
|
|
|
# it was a ^/m assertion (flagged as "LF") |
95
|
19
|
50
|
66
|
|
|
108
|
if ($prefix && (!$out->start || $out->start eq 'LF')) { |
|
|
|
66
|
|
|
|
|
96
|
19
|
|
|
|
|
102
|
my $p= Mock::Data::Util::coerce_generator($prefix)->generate($mockdata); |
97
|
19
|
100
|
|
|
|
81
|
$p .= "\n" if $out->start; |
98
|
19
|
|
|
|
|
75
|
$str= $p . $str; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
# A suffix can only be added if there was not an end-of-string assertion, or if |
101
|
|
|
|
|
|
|
# the next assertion allows "\n" and there is no assertion after that. |
102
|
19
|
100
|
100
|
|
|
99
|
if ($suffix && (!$out->next_req || (grep $_ eq "\n", @{ $out->next_req }) && !$out->require->[1])) { |
|
|
|
66
|
|
|
|
|
103
|
18
|
100
|
|
|
|
100
|
$str .= "\n" if $out->next_req; |
104
|
18
|
|
|
|
|
50
|
$str .= Mock::Data::Util::coerce_generator($suffix)->generate($mockdata); |
105
|
|
|
|
|
|
|
} |
106
|
19
|
|
|
|
|
129
|
return $str; |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub parse { |
111
|
40
|
|
|
40
|
1
|
23899
|
my ($self, $regex)= @_; |
112
|
40
|
|
|
|
|
248
|
return $self->_parse_regex({}) for "$regex"; |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
sub get_charset { |
116
|
0
|
|
|
0
|
1
|
0
|
my $self= shift; |
117
|
0
|
|
|
|
|
0
|
my $p= $self->regex_parse_tree->pattern; |
118
|
0
|
0
|
0
|
|
|
0
|
return Scalar::Util::blessed($p) && $p->isa('Mock::Data::Charset')? $p : undef; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
our %_regex_syntax_unsupported= ( |
122
|
|
|
|
|
|
|
'' => { map { $_ => 1 } qw( $ ) }, |
123
|
|
|
|
|
|
|
'\\' => { map { $_ => 1 } qw( B b A Z z G g K k ) }, |
124
|
|
|
|
|
|
|
); |
125
|
|
|
|
|
|
|
our %_parse_regex_backslash= ( |
126
|
|
|
|
|
|
|
map +( $_ => $Mock::Data::Charset::_parse_charset_backslash{$_} ), |
127
|
|
|
|
|
|
|
qw( a b c e f n N o r t x 0 1 2 3 4 5 6 7 8 9 ) |
128
|
|
|
|
|
|
|
); |
129
|
|
|
|
|
|
|
sub _parse_regex { |
130
|
91
|
|
|
91
|
|
165
|
my $self= shift; |
131
|
91
|
|
50
|
|
|
226
|
my $flags= shift || {}; |
132
|
91
|
|
|
|
|
139
|
my $expr= []; |
133
|
91
|
|
|
|
|
142
|
my @or; |
134
|
91
|
|
|
|
|
154
|
while (1) { |
135
|
|
|
|
|
|
|
# begin parenthetical sub-expression? |
136
|
220
|
100
|
|
|
|
774
|
if (/\G \( (\?)? /gcx) { |
137
|
51
|
|
|
|
|
99
|
my $sub_flags= $flags; |
138
|
51
|
100
|
|
|
|
191
|
if (defined $1) { |
139
|
|
|
|
|
|
|
# leading question mark means regex flags. This only supports the ^...: one: |
140
|
40
|
50
|
|
|
|
274
|
/\G \^ ( \w* )? : /gcx |
141
|
|
|
|
|
|
|
or Carp::croak("Unsupported regex feature '(?".substr($_,pos,1)."'"); |
142
|
40
|
50
|
|
|
|
120
|
if (defined $1) { |
143
|
40
|
|
|
|
|
85
|
$sub_flags= {}; |
144
|
40
|
|
|
|
|
237
|
++$sub_flags->{$_} for split '', $1; |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
} |
147
|
51
|
|
|
|
|
113
|
my $pos= pos; |
148
|
51
|
|
|
|
|
207
|
push @$expr, $self->_parse_regex($sub_flags); |
149
|
51
|
50
|
|
|
|
224
|
/\G \) /gcx |
150
|
|
|
|
|
|
|
or die "Missing end-parenthesee, started at '"._parse_context($pos)."'"; |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
# end sub-expression or next alternation? |
153
|
220
|
100
|
33
|
|
|
1286
|
if (/\G ( [|)] ) /gcx) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# end of sub-expression, return. |
155
|
56
|
100
|
|
|
|
169
|
if ($1 eq ')') { |
156
|
|
|
|
|
|
|
# back it up so the caller knows why we exited |
157
|
51
|
|
|
|
|
148
|
--pos; |
158
|
51
|
|
|
|
|
149
|
last; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
# else begin next piece of @or |
161
|
5
|
|
|
|
|
20
|
push @or, $self->_node($expr, $flags); |
162
|
5
|
|
|
|
|
12
|
$expr= []; |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
# character class? |
165
|
|
|
|
|
|
|
elsif (/\G ( \[ | \\w | \\W | \\s | \\S | \\d | \\D | \\N | \\Z | \. | \^ | \$ ) /gcx) { |
166
|
25
|
100
|
|
|
|
182
|
if ($1 eq '[') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# parse function continues to operate on $_ at pos() |
168
|
5
|
|
|
|
|
27
|
my $parse= Mock::Data::Charset::_parse_charset($flags); |
169
|
5
|
|
|
|
|
25
|
push @$expr, $self->_charset_node($parse, $flags); |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
elsif (ord $1 == ord '\\') { |
172
|
5
|
100
|
|
|
|
13
|
if ($1 eq "\\Z") { |
173
|
1
|
|
|
|
|
5
|
push @$expr, $self->_assertion_node(end => 1, flags => $flags); |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
else { |
176
|
4
|
|
|
|
|
18
|
push @$expr, $self->_charset_node(notation => $1, $flags); |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
elsif ($1 eq '.') { |
180
|
2
|
50
|
|
|
|
15
|
push @$expr, $self->_charset_node(classes => [ $flags->{s}? 'Any' : '\\N' ], $flags); |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
elsif ($1 eq '$') { |
183
|
7
|
100
|
|
|
|
39
|
push @$expr, $self->_assertion_node(end => ($flags->{m}? 'LF' : 'FinalLF'), flags => $flags); |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
elsif ($1 eq '^') { |
186
|
6
|
100
|
|
|
|
39
|
push @$expr, $self->_assertion_node(start => ($flags->{m}? 'LF' : 1 ), flags => $flags); |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
# repetition? |
190
|
|
|
|
|
|
|
elsif (/\G ( \? | \* \?? | \+ \?? | \{ ([0-9]+)? (,)? ([0-9]+)? \} ) /gcx) { |
191
|
24
|
|
|
|
|
50
|
my @rep; |
192
|
24
|
100
|
|
|
|
149
|
if ($1 eq '?') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
193
|
1
|
|
|
|
|
4
|
@rep= (0,1); |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
elsif (ord $1 == ord '*') { |
196
|
5
|
|
|
|
|
16
|
@rep= (0); |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
elsif (ord $1 == ord '+') { |
199
|
13
|
|
|
|
|
43
|
@rep= (1); |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
else { |
202
|
5
|
100
|
100
|
|
|
51
|
@rep= $3? ($2||0,$4) : ($2||0,$2); |
|
|
|
50
|
|
|
|
|
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
# What came before this? |
205
|
24
|
50
|
|
|
|
116
|
if (!@$expr) { |
|
|
100
|
|
|
|
|
|
206
|
0
|
|
|
|
|
0
|
die "Found quantifier '$1' before anything to quantify at "._parse_context; |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
elsif (!ref $expr->[-1]) { |
209
|
|
|
|
|
|
|
# If the string is composed of more than one character, split the final one |
210
|
|
|
|
|
|
|
# into its own node so that it can have a repetition applied to it. |
211
|
6
|
100
|
|
|
|
21
|
if (length $expr->[-1] > 1) { |
212
|
2
|
|
|
|
|
10
|
push @$expr, $self->_node([ substr($expr->[-1], -1) ], $flags); |
213
|
2
|
|
|
|
|
6
|
substr($expr->[-2], -1)= ''; |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
# else its one character, wrap it in a node |
216
|
|
|
|
|
|
|
else { |
217
|
4
|
|
|
|
|
16
|
$expr->[-1]= $self->_node([ $expr->[-1] ], $flags); |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
} |
220
|
24
|
|
|
|
|
118
|
$expr->[-1]->repetition(\@rep) |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
elsif ($flags->{x} && /\G ( \s | [#].* ) /gcx) { |
223
|
|
|
|
|
|
|
# ignore whitespace and comments under /x mode |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
elsif (/\G (\\)? (.) /gcxs) { |
226
|
|
|
|
|
|
|
# Tell users about unsupported features |
227
|
75
|
50
|
100
|
|
|
413
|
die "Unsupported notation: '$1$2'" if $_regex_syntax_unsupported{$1||''}{$2}; |
228
|
75
|
|
|
|
|
184
|
my $ch; |
229
|
75
|
100
|
100
|
|
|
234
|
if ($1 && defined (my $equiv= $_parse_regex_backslash{$2})) { |
230
|
5
|
100
|
|
|
|
30
|
$ch= chr(ref $equiv? $equiv->() : $equiv); |
231
|
|
|
|
|
|
|
} else { |
232
|
70
|
|
|
|
|
139
|
$ch= $2; |
233
|
|
|
|
|
|
|
} |
234
|
75
|
100
|
66
|
|
|
343
|
if ($flags->{i} && (uc $ch ne lc $ch)) { |
|
|
100
|
100
|
|
|
|
|
235
|
3
|
|
|
|
|
17
|
push @$expr, $self->_charset_node(chars => [uc $ch, lc $ch], $flags); |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
elsif (@$expr && !ref $expr->[-1]) { |
238
|
22
|
|
|
|
|
46
|
$expr->[-1] .= $ch; |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
else { |
241
|
50
|
|
|
|
|
117
|
push @$expr, $ch; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
else { |
245
|
40
|
|
|
|
|
65
|
last; # end of string |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
} |
248
|
91
|
50
|
100
|
|
|
851
|
return @or? do { push @or, $self->_node($expr, $flags) if @$expr; $self->_or_node(\@or, $flags) } |
|
3
|
100
|
|
|
|
14
|
|
|
3
|
100
|
|
|
|
15
|
|
249
|
|
|
|
|
|
|
: (@$expr > 1 || !ref $expr->[0])? $self->_node($expr, $flags) |
250
|
|
|
|
|
|
|
: $expr->[0]; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
#---------------------------------- |
254
|
|
|
|
|
|
|
# Factory Functions for Parse Nodes |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
sub _node { |
257
|
49
|
|
|
49
|
|
187
|
my ($self, $pattern, $flags)= @_; |
258
|
49
|
|
|
|
|
280
|
Mock::Data::Regex::ParseNode->new({ pattern => $pattern, flags => $flags }); |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
sub _or_node { |
261
|
3
|
|
|
3
|
|
8
|
my ($self, $or_list, $flags)= @_; |
262
|
3
|
|
|
|
|
25
|
Mock::Data::Regex::ParseNode::Or->new({ pattern => $or_list, flags => $flags }); |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
sub _charset_node { |
265
|
14
|
|
|
14
|
|
31
|
my $self= shift; |
266
|
14
|
|
|
|
|
29
|
my $flags= pop; |
267
|
14
|
100
|
|
|
|
156
|
Mock::Data::Regex::ParseNode::Charset->new({ |
268
|
|
|
|
|
|
|
pattern => @_ > 1? { @_ } : shift, |
269
|
|
|
|
|
|
|
flags => $flags |
270
|
|
|
|
|
|
|
}); |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
sub _assertion_node { |
273
|
14
|
|
|
14
|
|
27
|
my $self= shift; |
274
|
14
|
|
|
|
|
87
|
Mock::Data::Regex::ParseNode::Assertion->new({ @_ }); |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
sub _str_builder { |
277
|
205
|
|
|
205
|
|
401
|
my ($self, $mockdata, $opts)= @_; |
278
|
205
|
|
|
|
|
931
|
Mock::Data::Regex::StrBuilder->new({ |
279
|
|
|
|
|
|
|
mockdata => $mockdata, |
280
|
|
|
|
|
|
|
generator => $self, |
281
|
|
|
|
|
|
|
opts => $opts, |
282
|
|
|
|
|
|
|
}); |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
sub _fake_inc { |
286
|
45
|
|
|
45
|
|
255
|
(my $pkg= caller) =~ s,::,/,g; |
287
|
45
|
|
|
|
|
172
|
$INC{$pkg.'.pm'}= $INC{'Mock/Data/Generator/Regex.pm'}; |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
# ------------------------------ Regex Parse Node ------------------------------------- |
291
|
|
|
|
|
|
|
# The regular parse nodes hold a "pattern" which is an arrayref of literal strings |
292
|
|
|
|
|
|
|
# or nested parse nodes. It supports a "repetition" flag to handle min/max repetitions |
293
|
|
|
|
|
|
|
# of the node as a whole. |
294
|
|
|
|
|
|
|
# Other subclasses are used to handle OR-lists, charsets, and zero-width assertions. |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
package # Do not index |
297
|
|
|
|
|
|
|
Mock::Data::Regex::ParseNode; |
298
|
|
|
|
|
|
|
Mock::Data::Regex::_fake_inc(); |
299
|
|
|
|
|
|
|
|
300
|
66
|
|
|
66
|
0
|
266
|
sub new { bless $_[1], $_[0] } |
301
|
|
|
|
|
|
|
|
302
|
22
|
|
|
22
|
0
|
90
|
sub flags { $_[0]{flags} } |
303
|
|
|
|
|
|
|
sub repetition { |
304
|
448
|
100
|
|
448
|
0
|
3213
|
if (@_ > 1) { |
305
|
|
|
|
|
|
|
# If a quantifier is being applied to a thing that already had a quantifier |
306
|
|
|
|
|
|
|
# (such as /(X*){2}/ ) |
307
|
|
|
|
|
|
|
# multiply them |
308
|
24
|
|
|
|
|
49
|
my $val= $_[1]; |
309
|
24
|
50
|
|
|
|
72
|
if (my $rep= $_[0]{repetition}) { |
310
|
|
|
|
|
|
|
$rep->[$_]= (defined $rep->[$_] && defined $val->[$_]? $rep->[$_] * $val->[$_] : undef) |
311
|
0
|
0
|
0
|
|
|
0
|
for 0, 1; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
else { |
314
|
24
|
|
|
|
|
66
|
$_[0]{repetition}= $_[1]; |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
return $_[0]{repetition} |
318
|
448
|
|
|
|
|
923
|
} |
319
|
|
|
|
|
|
|
sub min_repetition { |
320
|
0
|
0
|
|
0
|
0
|
0
|
$_[0]{repetition}? $_[0]{repetition}[0] : 1 |
321
|
|
|
|
|
|
|
} |
322
|
|
|
|
|
|
|
sub max_repetition { |
323
|
0
|
0
|
|
0
|
0
|
0
|
$_[0]{repetition}? $_[0]{repetition}[1] : 1 |
324
|
|
|
|
|
|
|
} |
325
|
223
|
|
|
223
|
0
|
11855
|
sub pattern { $_[0]{pattern} } |
326
|
|
|
|
|
|
|
sub generate { |
327
|
341
|
|
|
341
|
0
|
587
|
my ($self, $out)= @_; |
328
|
341
|
100
|
|
|
|
613
|
if (my $rep= $self->repetition) { |
329
|
81
|
|
|
|
|
186
|
my ($min, $n)= ($rep->[0], $out->_random_rep_count($rep)); |
330
|
81
|
|
|
|
|
191
|
for (1 .. $n) { |
331
|
335
|
100
|
|
|
|
749
|
my $origin= $_ > $min? $out->mark : undef; |
332
|
|
|
|
|
|
|
# Plain nodes expect the pattern to be an arrayref where each item is a parse node or a literal |
333
|
335
|
|
|
|
|
442
|
my $success= 1; |
334
|
335
|
|
|
|
|
399
|
for (@{ $self->{pattern} }) { |
|
335
|
|
|
|
|
624
|
|
335
|
371
|
100
|
100
|
|
|
944
|
$success &&= ref? $_->generate($out) : $out->append($_); |
336
|
|
|
|
|
|
|
} |
337
|
335
|
100
|
|
|
|
730
|
next if $success; |
338
|
|
|
|
|
|
|
# This repetition failed, but did we meet the requirement already? |
339
|
16
|
50
|
|
|
|
27
|
if ($origin) { |
340
|
16
|
|
|
|
|
45
|
$out->reset($origin); |
341
|
16
|
|
|
|
|
56
|
return 1; |
342
|
|
|
|
|
|
|
} |
343
|
0
|
|
|
|
|
0
|
return 0; |
344
|
|
|
|
|
|
|
} |
345
|
|
|
|
|
|
|
} |
346
|
|
|
|
|
|
|
else { |
347
|
|
|
|
|
|
|
# Plain nodes expect the pattern to be an arrayref where each item is a parse node or a literal |
348
|
260
|
|
|
|
|
389
|
for (@{ $self->{pattern} }) { |
|
260
|
|
|
|
|
533
|
|
349
|
432
|
100
|
|
|
|
950
|
return 0 unless ref? $_->generate($out) : $out->append($_); |
|
|
100
|
|
|
|
|
|
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
} |
352
|
306
|
|
|
|
|
927
|
return 1; |
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
# --------------------------------- Regex "OR" Parse Node ---------------------------- |
356
|
|
|
|
|
|
|
# This parse holds a list of options in ->pattern. It chooses one of the options at |
357
|
|
|
|
|
|
|
# random, but then can backtrack if inner parse nodes were not able to match. |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
package # Do not index |
360
|
|
|
|
|
|
|
Mock::Data::Regex::ParseNode::Or; |
361
|
|
|
|
|
|
|
Mock::Data::Regex::_fake_inc(); |
362
|
|
|
|
|
|
|
our @ISA= ('Mock::Data::Regex::ParseNode'); |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
sub generate { |
365
|
30
|
|
|
30
|
0
|
56
|
my ($self, $out)= @_; |
366
|
30
|
|
|
|
|
56
|
my ($min, $n)= (1,1); |
367
|
30
|
50
|
|
|
|
72
|
if (my $rep= $self->{repetition}) { |
368
|
30
|
|
|
|
|
57
|
$min= $rep->[0]; |
369
|
30
|
|
|
|
|
65
|
$n= $out->_random_rep_count($rep); |
370
|
|
|
|
|
|
|
} |
371
|
30
|
|
|
|
|
74
|
rep: for (1 .. $n) { |
372
|
|
|
|
|
|
|
# OR nodes expect the pattern to be an arrayref where each item is an option |
373
|
|
|
|
|
|
|
# for what could be appended. Need to reset the output after each attempt. |
374
|
122
|
|
|
|
|
223
|
my $origin= $out->mark; |
375
|
|
|
|
|
|
|
# Pick one at random. It will almost always work on the first try, unless the user |
376
|
|
|
|
|
|
|
# has anchor constraints in the pattern. |
377
|
122
|
|
|
|
|
218
|
my $or= $self->pattern; |
378
|
122
|
|
|
|
|
219
|
my $pick= $or->[ rand scalar @$or ]; |
379
|
122
|
50
|
|
|
|
290
|
next rep if ref $pick? $pick->generate($out) : $out->append($pick); |
|
|
100
|
|
|
|
|
|
380
|
|
|
|
|
|
|
# if it fails, try all the others in random order |
381
|
19
|
|
|
|
|
38
|
for (List::Util::shuffle(grep { $_ != $pick } @$or)) { |
|
38
|
|
|
|
|
98
|
|
382
|
|
|
|
|
|
|
# reset output |
383
|
19
|
|
|
|
|
49
|
$out->reset($origin); |
384
|
|
|
|
|
|
|
# append something new |
385
|
19
|
50
|
|
|
|
46
|
next rep if ref? $_->generate($out) : $out->append($_); |
|
|
50
|
|
|
|
|
|
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
# None of the options succeeded. Did we get enough reps already? |
388
|
0
|
0
|
|
|
|
0
|
if ($_ > $min) { |
389
|
0
|
|
|
|
|
0
|
$out->reset($origin); |
390
|
0
|
|
|
|
|
0
|
return 1; |
391
|
|
|
|
|
|
|
} |
392
|
0
|
|
|
|
|
0
|
return 0; |
393
|
|
|
|
|
|
|
} |
394
|
30
|
|
|
|
|
84
|
return 1; |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
# -------------------------------- Regex Charset Parse Node --------------------------- |
398
|
|
|
|
|
|
|
# This node's ->pattern is an instance of Charset. It returns one character |
399
|
|
|
|
|
|
|
# from the set, but also has an optimized handling of the ->repetition flag that generates |
400
|
|
|
|
|
|
|
# multiple characters at once. |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
package # Do not index |
403
|
|
|
|
|
|
|
Mock::Data::Regex::ParseNode::Charset; |
404
|
|
|
|
|
|
|
Mock::Data::Regex::_fake_inc(); |
405
|
|
|
|
|
|
|
our @ISA= ('Mock::Data::Regex::ParseNode'); |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
sub new { |
408
|
14
|
|
|
14
|
0
|
46
|
my ($class, $self)= @_; |
409
|
14
|
50
|
|
|
|
63
|
if (ref $self->{pattern} eq 'HASH') { |
410
|
14
|
100
|
|
|
|
46
|
$self->{pattern}{max_codepoint}= 0x7F if $self->{flags}{a}; |
411
|
14
|
|
|
|
|
69
|
$self->{pattern}= Mock::Data::Util::charset($self->{pattern}); |
412
|
|
|
|
|
|
|
} |
413
|
14
|
|
|
|
|
67
|
bless $self, $class; |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
sub generate { |
417
|
76
|
|
|
76
|
0
|
153
|
my ($self, $out)= @_; |
418
|
|
|
|
|
|
|
# Check whether output has a restriction in effect: |
419
|
76
|
50
|
|
|
|
151
|
if (my $req= $out->next_req) { |
420
|
|
|
|
|
|
|
# pick the first requirement which can be matched by this charset |
421
|
0
|
|
|
|
|
0
|
for (@$req) { |
422
|
0
|
0
|
|
|
|
0
|
if (!ref) { |
423
|
|
|
|
|
|
|
# At \Z, can still match if rep count is 0 |
424
|
0
|
0
|
0
|
|
|
0
|
return 1 if length == 0 && $self->min_repetition == 0; |
425
|
0
|
0
|
0
|
|
|
0
|
return $out->append($_) if |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
426
|
|
|
|
|
|
|
length == 1 && $self->pattern->has_member($_) |
427
|
|
|
|
|
|
|
or length > 1 && !(grep !$self->pattern->has_member($_), split //); |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
} |
430
|
0
|
|
|
|
|
0
|
return 0; |
431
|
|
|
|
|
|
|
} |
432
|
76
|
|
|
|
|
164
|
my $n= $out->_random_rep_count($self->repetition); |
433
|
76
|
|
|
|
|
195
|
return $out->append($self->pattern->generate($out->mockdata, $out->opts, $n)); |
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
# ----------------------------- Regex Assertion Parse Node ------------------------------- |
437
|
|
|
|
|
|
|
# This node doesn't have a ->pattern, and instead holds constraints about what characters |
438
|
|
|
|
|
|
|
# must occur around the current position. Right now it only handles '^' and '$' and '\Z' |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
package # Do not index |
441
|
|
|
|
|
|
|
Mock::Data::Regex::ParseNode::Assertion; |
442
|
|
|
|
|
|
|
Mock::Data::Regex::_fake_inc(); |
443
|
|
|
|
|
|
|
our @ISA= ('Mock::Data::Regex::ParseNode'); |
444
|
|
|
|
|
|
|
|
445
|
0
|
|
|
0
|
0
|
0
|
sub start { $_[0]{start} } |
446
|
0
|
|
|
0
|
0
|
0
|
sub end { $_[0]{end} } |
447
|
|
|
|
|
|
|
sub generate { |
448
|
90
|
|
|
90
|
0
|
149
|
my ($self, $out)= @_; |
449
|
90
|
100
|
|
|
|
211
|
if ($self->{start}) { |
450
|
|
|
|
|
|
|
# Previous character must either be start of string or a newline |
451
|
|
|
|
|
|
|
length $out->str == 0 |
452
|
51
|
100
|
66
|
|
|
89
|
or ($self->{start} eq 'LF' && substr($out->str,-1) eq "\n") |
|
|
|
100
|
|
|
|
|
453
|
|
|
|
|
|
|
or return 0; |
454
|
|
|
|
|
|
|
# Set flag on entire output if this is the first assertion |
455
|
32
|
100
|
66
|
|
|
69
|
$out->start($self->{start}) if length $out->str == 0 && !$out->start; |
456
|
|
|
|
|
|
|
} |
457
|
71
|
100
|
|
|
|
143
|
if ($self->{end}) { |
458
|
|
|
|
|
|
|
# Next character must be a newline, or end of the output |
459
|
|
|
|
|
|
|
# end=1 results from \Z and does not allow the newline |
460
|
39
|
50
|
|
|
|
171
|
$out->require(['',"\n"]) unless $self->{end} eq 1; |
461
|
|
|
|
|
|
|
# If end=LF, the end of string is no longer mandatory once "\n" has been matched. |
462
|
39
|
100
|
|
|
|
126
|
$out->require(['']) unless $self->{end} eq 'LF'; |
463
|
|
|
|
|
|
|
} |
464
|
71
|
|
|
|
|
174
|
return 1; |
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
# ------------------------ String Builder ----------------------------------- |
468
|
|
|
|
|
|
|
# This class constructs an output string in ->{str}, and also performs checks |
469
|
|
|
|
|
|
|
# needed by the assertions like ^ and $. It also has the ability to mark a |
470
|
|
|
|
|
|
|
# position and then revert to that position, without copying the entire string |
471
|
|
|
|
|
|
|
# each time. |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
package # Do not index |
474
|
|
|
|
|
|
|
Mock::Data::Regex::StrBuilder; |
475
|
|
|
|
|
|
|
Mock::Data::Regex::_fake_inc(); |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
sub new { |
478
|
205
|
|
|
205
|
0
|
375
|
my ($class, $self)= @_; |
479
|
205
|
|
50
|
|
|
848
|
$self->{str} //= ''; |
480
|
205
|
|
|
|
|
490
|
bless $self, $class; |
481
|
|
|
|
|
|
|
} |
482
|
|
|
|
|
|
|
|
483
|
76
|
|
|
76
|
0
|
165
|
sub mockdata { $_[0]{mockdata} } # Mock::Data instance |
484
|
0
|
|
|
0
|
0
|
0
|
sub generator { $_[0]{generator} } |
485
|
197
|
|
|
197
|
0
|
627
|
sub opts { $_[0]{opts} } |
486
|
468
|
100
|
|
468
|
0
|
916
|
sub start { $_[0]{start}= $_[1] if @_ > 1; $_[0]{start} } |
|
468
|
|
|
|
|
1126
|
|
487
|
320
|
|
|
320
|
0
|
1320
|
sub str { $_[0]{str} } # string being built |
488
|
|
|
|
|
|
|
sub _random_rep_count { |
489
|
187
|
|
|
187
|
|
323
|
my ($self, $rep)= @_; |
490
|
187
|
100
|
|
|
|
414
|
return 1 unless defined $rep; |
491
|
147
|
100
|
|
|
|
384
|
return $rep->[0] + int rand($rep->[1] - $rep->[0] + 1) |
492
|
|
|
|
|
|
|
if defined $rep->[1]; |
493
|
121
|
|
50
|
|
|
271
|
my $range= $self->opts->{max_repetition} // '+8'; |
494
|
121
|
50
|
|
|
|
677
|
return $rep->[0] + int rand($range+1) |
495
|
|
|
|
|
|
|
if ord $range == ord '+'; |
496
|
0
|
|
|
|
|
0
|
$range -= $rep->[0]; |
497
|
0
|
0
|
|
|
|
0
|
return $range > 0? $rep->[0] + int rand($range+1) : $rep->[0]; |
498
|
|
|
|
|
|
|
} |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
sub require { |
501
|
61
|
100
|
|
61
|
0
|
129
|
push @{ $_[0]{require} }, $_[1] if @_ > 1; |
|
58
|
|
|
|
|
137
|
|
502
|
61
|
|
|
|
|
105
|
return $_[0]{require}; |
503
|
|
|
|
|
|
|
} |
504
|
|
|
|
|
|
|
sub next_req { |
505
|
1032
|
|
66
|
1032
|
0
|
3031
|
return $_[0]{require} && $_[0]{require}[0]; |
506
|
|
|
|
|
|
|
} |
507
|
|
|
|
|
|
|
sub append { |
508
|
672
|
|
|
672
|
0
|
1165
|
my ($self, $content)= @_; |
509
|
672
|
100
|
|
|
|
1060
|
if (my $req= $self->next_req) { |
510
|
|
|
|
|
|
|
# the provided output must be coerced to one of these options, if possible |
511
|
|
|
|
|
|
|
# TODO: need new ideas for this code. Or just give up on the plan of supporting |
512
|
|
|
|
|
|
|
# lookaround assertions and focus on a simple implemention of "\n" checks for ^/$ |
513
|
16
|
|
|
|
|
33
|
for (@$req) { |
514
|
32
|
50
|
|
|
|
57
|
if (!ref) { # next text must match a literal string. '' means end-of-string |
515
|
32
|
50
|
66
|
|
|
152
|
if (length && $content eq $_) { |
516
|
0
|
|
|
|
|
0
|
$self->{str} .= $content; |
517
|
0
|
|
|
|
|
0
|
shift @{ $self->require }; # requirement complete |
|
0
|
|
|
|
|
0
|
|
518
|
0
|
|
|
|
|
0
|
return 1; |
519
|
|
|
|
|
|
|
} |
520
|
|
|
|
|
|
|
} |
521
|
|
|
|
|
|
|
else { |
522
|
|
|
|
|
|
|
# TODO: support for "lookaround" assertions, will require regex match |
523
|
|
|
|
|
|
|
... |
524
|
0
|
|
|
|
|
0
|
} |
525
|
|
|
|
|
|
|
} |
526
|
16
|
|
|
|
|
46
|
return 0; # no match found for the restriction in effect |
527
|
|
|
|
|
|
|
} |
528
|
656
|
|
|
|
|
1105
|
$self->{str} .= $content; |
529
|
656
|
|
|
|
|
1745
|
return 1; |
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
sub mark { |
532
|
391
|
|
|
391
|
0
|
511
|
my $self= shift; |
533
|
391
|
|
|
|
|
626
|
my $len= $self->{lastmark}= length $self->{str}; |
534
|
391
|
|
|
|
|
584
|
my $req= $self->{require}; |
535
|
391
|
100
|
|
|
|
858
|
return [ \$self->{str}, $len, $req? [ @$req ] : undef, $self->start ]; |
536
|
|
|
|
|
|
|
} |
537
|
|
|
|
|
|
|
sub reset { |
538
|
35
|
|
|
35
|
0
|
57
|
my ($self, $origin)= @_; |
539
|
|
|
|
|
|
|
# If the string is a different instance than before, go back to that instance |
540
|
0
|
|
|
|
|
0
|
Hash::Util::hv_store(%$self, 'str', ${$origin->[0]}) |
541
|
35
|
50
|
|
|
|
93
|
unless \$self->{str} == $origin->[0]; |
542
|
|
|
|
|
|
|
# Reset the string to the original length |
543
|
35
|
|
|
|
|
133
|
substr($self->{str}, $origin->[1])= ''; |
544
|
35
|
|
|
|
|
72
|
$self->{require}= $origin->[2]; |
545
|
35
|
|
|
|
|
63
|
$self->{start}= $origin->[3]; |
546
|
|
|
|
|
|
|
} |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
1; |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
__END__ |