line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Mock::Data::Regex; |
2
|
12
|
|
|
12
|
|
211788
|
use strict; |
|
12
|
|
|
|
|
115
|
|
|
12
|
|
|
|
|
464
|
|
3
|
12
|
|
|
12
|
|
66
|
use warnings; |
|
12
|
|
|
|
|
53
|
|
|
12
|
|
|
|
|
394
|
|
4
|
12
|
|
|
12
|
|
530
|
use Mock::Data::Charset; |
|
12
|
|
|
|
|
25
|
|
|
12
|
|
|
|
|
417
|
|
5
|
12
|
|
|
12
|
|
83
|
use Mock::Data::Util qw( _parse_context _escape_str ); |
|
12
|
|
|
|
|
24
|
|
|
12
|
|
|
|
|
85
|
|
6
|
|
|
|
|
|
|
require Carp; |
7
|
|
|
|
|
|
|
require Scalar::Util; |
8
|
|
|
|
|
|
|
require List::Util; |
9
|
|
|
|
|
|
|
require Hash::Util; |
10
|
|
|
|
|
|
|
require Mock::Data::Generator; |
11
|
|
|
|
|
|
|
our @ISA= ( 'Mock::Data::Generator' ); |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# ABSTRACT: Generator that uses a Regex as a template to generate strings |
14
|
|
|
|
|
|
|
our $VERSION = '0.03'; # VERSION |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub new { |
18
|
22
|
|
|
22
|
1
|
46386
|
my $class= shift; |
19
|
|
|
|
|
|
|
my %self= @_ == 1 && (!ref $_[0] || ref $_[0] eq 'Regexp')? ( regex => $_[0] ) |
20
|
22
|
50
|
66
|
|
|
175
|
: @_ == 1? %{$_[0]} |
|
0
|
100
|
|
|
|
0
|
|
21
|
|
|
|
|
|
|
: @_; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# If called on an object, carry over some settings |
24
|
22
|
50
|
|
|
|
52
|
if (ref $class) { |
25
|
0
|
|
|
|
|
0
|
%self= ( %$class, %self ); |
26
|
|
|
|
|
|
|
# Make sure we didn't copy a regex without a matching regex_parse_tree, or vice versa |
27
|
0
|
0
|
0
|
|
|
0
|
if ($self{regex} == $class->{regex} xor $self{regex_parse_tree} == $class->{regex_parse_tree}) { |
28
|
0
|
0
|
|
|
|
0
|
delete $self{regex_parse_tree} if $self{regex_parse_tree} == $class->{regex_parse_tree}; |
29
|
0
|
0
|
|
|
|
0
|
delete $self{regex} if $self{regex} == $class->{regex}; |
30
|
|
|
|
|
|
|
} |
31
|
0
|
|
|
|
|
0
|
$class= ref $class; |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
|
34
|
22
|
50
|
|
|
|
49
|
defined $self{regex} or Carp::croak "Attribute 'regex' is required"; |
35
|
22
|
50
|
|
|
|
51
|
$self{regex}= qr/$self{regex}/ unless ref $self{regex} eq 'Regexp'; |
36
|
|
|
|
|
|
|
# Must be parsed eventually, so might as well do it now and see the errors right away |
37
|
22
|
|
33
|
|
|
89
|
$self{regex_parse_tree} ||= $class->parse($self{regex}); |
38
|
22
|
100
|
100
|
|
|
59
|
$self{max_codepoint} //= 0x7F if $self{regex_parse_tree}->flags->{a}; |
39
|
|
|
|
|
|
|
|
40
|
22
|
50
|
0
|
|
|
52
|
$self{prefix} //= Mock::Data::Util::coerce_generator($self{prefix}) if defined $self{prefix}; |
41
|
22
|
50
|
0
|
|
|
43
|
$self{suffix} //= Mock::Data::Util::coerce_generator($self{suffix}) if defined $self{suffix}; |
42
|
|
|
|
|
|
|
|
43
|
22
|
|
|
|
|
66
|
return bless \%self, $class; |
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
|
47
|
0
|
|
|
0
|
1
|
0
|
sub regex { $_[0]{regex} } |
48
|
|
|
|
|
|
|
|
49
|
205
|
|
|
205
|
1
|
455
|
sub regex_parse_tree { $_[0]{regex_parse_tree} } |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
sub min_codepoint { |
53
|
|
|
|
|
|
|
$_[0]{min_codepoint} |
54
|
203
|
|
|
203
|
1
|
482
|
} |
55
|
|
|
|
|
|
|
|
56
|
203
|
|
|
203
|
1
|
586
|
sub max_codepoint { $_[0]{max_codepoint} } |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
|
59
|
205
|
50
|
|
205
|
1
|
824
|
sub max_repetition { $_[0]{max_repetition} || '+8' } |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub prefix { |
63
|
0
|
0
|
|
0
|
1
|
0
|
if (@_ > 1) { |
64
|
0
|
|
|
|
|
0
|
$_[0]{prefix}= Mock::Data::Util::coerce_generator($_[1]); |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
$_[0]{prefix} |
67
|
0
|
|
|
|
|
0
|
} |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
sub suffix { |
70
|
0
|
0
|
|
0
|
1
|
0
|
if (@_ > 1) { |
71
|
0
|
|
|
|
|
0
|
$_[0]{suffix}= Mock::Data::Util::coerce_generator($_[1]); |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
$_[0]{suffix} |
74
|
0
|
|
|
|
|
0
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub generate { |
78
|
205
|
|
|
205
|
1
|
10324
|
my ($self, $mockdata)= (shift,shift); |
79
|
205
|
100
|
|
|
|
488
|
my %opts= ref $_[0] eq 'HASH'? %{$_[0]} : (); |
|
23
|
|
|
|
|
99
|
|
80
|
205
|
|
100
|
|
|
840
|
$opts{max_codepoint} //= $self->max_codepoint; |
81
|
205
|
|
100
|
|
|
668
|
$opts{min_codepoint} //= $self->min_codepoint; |
82
|
205
|
|
33
|
|
|
609
|
$opts{max_repetition} //= $self->max_repetition; |
83
|
205
|
|
|
|
|
453
|
my $out= $self->_str_builder($mockdata, \%opts); |
84
|
|
|
|
|
|
|
$self->regex_parse_tree->generate($out) |
85
|
|
|
|
|
|
|
# is the string allowed to end here? Requirement of '' is generated by $ and \Z |
86
|
205
|
50
|
66
|
|
|
415
|
&& (!$out->next_req || (grep $_ eq '', @{ $out->next_req })) |
|
|
|
66
|
|
|
|
|
87
|
|
|
|
|
|
|
or Carp::croak "Regex assertions could not be met (such as '^' or '\$'). Final attempt was: \""._escape_str($out->str)."\""; |
88
|
205
|
|
66
|
|
|
664
|
my $prefix= $opts{prefix} // $self->{prefix}; |
89
|
205
|
|
66
|
|
|
541
|
my $suffix= $opts{suffix} // $self->{suffix}; |
90
|
205
|
100
|
66
|
|
|
696
|
return $out->str unless defined $prefix || defined $suffix; |
91
|
|
|
|
|
|
|
|
92
|
19
|
|
|
|
|
36
|
my $str= $out->str; |
93
|
|
|
|
|
|
|
# A prefix can only be added if there was not a beginning-of-string assertion, or if |
94
|
|
|
|
|
|
|
# it was a ^/m assertion (flagged as "LF") |
95
|
19
|
50
|
66
|
|
|
55
|
if ($prefix && (!$out->start || $out->start eq 'LF')) { |
|
|
|
66
|
|
|
|
|
96
|
19
|
|
|
|
|
55
|
my $p= Mock::Data::Util::coerce_generator($prefix)->generate($mockdata); |
97
|
19
|
100
|
|
|
|
64
|
$p .= "\n" if $out->start; |
98
|
19
|
|
|
|
|
48
|
$str= $p . $str; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
# A suffix can only be added if there was not an end-of-string assertion, or if |
101
|
|
|
|
|
|
|
# the next assertion allows "\n" and there is no assertion after that. |
102
|
19
|
100
|
100
|
|
|
59
|
if ($suffix && (!$out->next_req || (grep $_ eq "\n", @{ $out->next_req }) && !$out->require->[1])) { |
|
|
|
66
|
|
|
|
|
103
|
18
|
100
|
|
|
|
35
|
$str .= "\n" if $out->next_req; |
104
|
18
|
|
|
|
|
41
|
$str .= Mock::Data::Util::coerce_generator($suffix)->generate($mockdata); |
105
|
|
|
|
|
|
|
} |
106
|
19
|
|
|
|
|
122
|
return $str; |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub parse { |
111
|
40
|
|
|
40
|
1
|
20433
|
my ($self, $regex)= @_; |
112
|
40
|
|
|
|
|
157
|
return $self->_parse_regex({}) for "$regex"; |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
sub get_charset { |
116
|
0
|
|
|
0
|
1
|
0
|
my $self= shift; |
117
|
0
|
|
|
|
|
0
|
my $p= $self->regex_parse_tree->pattern; |
118
|
0
|
0
|
0
|
|
|
0
|
return Scalar::Util::blessed($p) && $p->isa('Mock::Data::Charset')? $p : undef; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
our %_regex_syntax_unsupported= ( |
122
|
|
|
|
|
|
|
'' => { map { $_ => 1 } qw( $ ) }, |
123
|
|
|
|
|
|
|
'\\' => { map { $_ => 1 } qw( B b A Z z G g K k ) }, |
124
|
|
|
|
|
|
|
); |
125
|
|
|
|
|
|
|
our %_parse_regex_backslash= ( |
126
|
|
|
|
|
|
|
map +( $_ => $Mock::Data::Charset::_parse_charset_backslash{$_} ), |
127
|
|
|
|
|
|
|
qw( a b c e f n N o r t x 0 1 2 3 4 5 6 7 8 9 ) |
128
|
|
|
|
|
|
|
); |
129
|
|
|
|
|
|
|
sub _parse_regex { |
130
|
91
|
|
|
91
|
|
138
|
my $self= shift; |
131
|
91
|
|
50
|
|
|
178
|
my $flags= shift || {}; |
132
|
91
|
|
|
|
|
132
|
my $expr= []; |
133
|
91
|
|
|
|
|
123
|
my @or; |
134
|
91
|
|
|
|
|
108
|
while (1) { |
135
|
|
|
|
|
|
|
# begin parenthetical sub-expression? |
136
|
220
|
100
|
|
|
|
609
|
if (/\G \( (\?)? /gcx) { |
137
|
51
|
|
|
|
|
80
|
my $sub_flags= $flags; |
138
|
51
|
100
|
|
|
|
120
|
if (defined $1) { |
139
|
|
|
|
|
|
|
# leading question mark means regex flags. This only supports the ^...: one: |
140
|
40
|
50
|
0
|
|
|
158
|
if (/\G \^ ( \w* ) : /gcx) { |
|
|
0
|
|
|
|
|
|
141
|
40
|
|
|
|
|
66
|
$sub_flags= {}; |
142
|
40
|
|
|
|
|
142
|
++$sub_flags->{$_} for split '', $1; |
143
|
|
|
|
|
|
|
} elsif ($] < 5.020 and /\G (\w*)-\w* : /gcx) { |
144
|
0
|
|
|
|
|
0
|
$sub_flags= {}; |
145
|
0
|
|
|
|
|
0
|
++$sub_flags->{$_} for split '', $1; |
146
|
|
|
|
|
|
|
} else { |
147
|
0
|
|
|
|
|
0
|
Carp::croak("Unsupported regex feature '(?".substr($_,pos,1)."'"); |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
} |
150
|
51
|
|
|
|
|
87
|
my $pos= pos; |
151
|
51
|
|
|
|
|
124
|
push @$expr, $self->_parse_regex($sub_flags); |
152
|
51
|
50
|
|
|
|
182
|
/\G \) /gcx |
153
|
|
|
|
|
|
|
or die "Missing end-parenthesee, started at '"._parse_context($pos)."'"; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
# end sub-expression or next alternation? |
156
|
220
|
100
|
33
|
|
|
893
|
if (/\G ( [|)] ) /gcx) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
157
|
|
|
|
|
|
|
# end of sub-expression, return. |
158
|
56
|
100
|
|
|
|
123
|
if ($1 eq ')') { |
159
|
|
|
|
|
|
|
# back it up so the caller knows why we exited |
160
|
51
|
|
|
|
|
137
|
--pos; |
161
|
51
|
|
|
|
|
115
|
last; |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
# else begin next piece of @or |
164
|
5
|
|
|
|
|
11
|
push @or, $self->_node($expr, $flags); |
165
|
5
|
|
|
|
|
9
|
$expr= []; |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
# character class? |
168
|
|
|
|
|
|
|
elsif (/\G ( \[ | \\w | \\W | \\s | \\S | \\d | \\D | \\N | \\Z | \. | \^ | \$ ) /gcx) { |
169
|
25
|
100
|
|
|
|
108
|
if ($1 eq '[') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
170
|
|
|
|
|
|
|
# parse function continues to operate on $_ at pos() |
171
|
5
|
|
|
|
|
17
|
my $parse= Mock::Data::Charset::_parse_charset($flags); |
172
|
5
|
|
|
|
|
22
|
push @$expr, $self->_charset_node($parse, $flags); |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
elsif (ord $1 == ord '\\') { |
175
|
5
|
100
|
|
|
|
11
|
if ($1 eq "\\Z") { |
176
|
1
|
|
|
|
|
4
|
push @$expr, $self->_assertion_node(end => 1, flags => $flags); |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
else { |
179
|
4
|
|
|
|
|
11
|
push @$expr, $self->_charset_node(notation => $1, $flags); |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
elsif ($1 eq '.') { |
183
|
2
|
50
|
|
|
|
10
|
push @$expr, $self->_charset_node(classes => [ $flags->{s}? 'Any' : '\\N' ], $flags); |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
elsif ($1 eq '$') { |
186
|
7
|
100
|
|
|
|
24
|
push @$expr, $self->_assertion_node(end => ($flags->{m}? 'LF' : 'FinalLF'), flags => $flags); |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
elsif ($1 eq '^') { |
189
|
6
|
100
|
|
|
|
19
|
push @$expr, $self->_assertion_node(start => ($flags->{m}? 'LF' : 1 ), flags => $flags); |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
# repetition? |
193
|
|
|
|
|
|
|
elsif (/\G ( \? | \* \?? | \+ \?? | \{ ([0-9]+)? (,)? ([0-9]+)? \} ) /gcx) { |
194
|
24
|
|
|
|
|
40
|
my @rep; |
195
|
24
|
100
|
|
|
|
88
|
if ($1 eq '?') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
196
|
1
|
|
|
|
|
3
|
@rep= (0,1); |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
elsif (ord $1 == ord '*') { |
199
|
5
|
|
|
|
|
13
|
@rep= (0); |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
elsif (ord $1 == ord '+') { |
202
|
13
|
|
|
|
|
29
|
@rep= (1); |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
else { |
205
|
5
|
100
|
100
|
|
|
34
|
@rep= $3? ($2||0,$4) : ($2||0,$2); |
|
|
|
50
|
|
|
|
|
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
# What came before this? |
208
|
24
|
50
|
|
|
|
75
|
if (!@$expr) { |
|
|
100
|
|
|
|
|
|
209
|
0
|
|
|
|
|
0
|
die "Found quantifier '$1' before anything to quantify at "._parse_context; |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
elsif (!ref $expr->[-1]) { |
212
|
|
|
|
|
|
|
# If the string is composed of more than one character, split the final one |
213
|
|
|
|
|
|
|
# into its own node so that it can have a repetition applied to it. |
214
|
6
|
100
|
|
|
|
14
|
if (length $expr->[-1] > 1) { |
215
|
2
|
|
|
|
|
10
|
push @$expr, $self->_node([ substr($expr->[-1], -1) ], $flags); |
216
|
2
|
|
|
|
|
6
|
substr($expr->[-2], -1)= ''; |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
# else its one character, wrap it in a node |
219
|
|
|
|
|
|
|
else { |
220
|
4
|
|
|
|
|
13
|
$expr->[-1]= $self->_node([ $expr->[-1] ], $flags); |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
} |
223
|
24
|
|
|
|
|
93
|
$expr->[-1]->repetition(\@rep) |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
elsif ($flags->{x} && /\G ( \s | [#].* ) /gcx) { |
226
|
|
|
|
|
|
|
# ignore whitespace and comments under /x mode |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
elsif (/\G (\\)? (.) /gcxs) { |
229
|
|
|
|
|
|
|
# Tell users about unsupported features |
230
|
75
|
50
|
100
|
|
|
337
|
die "Unsupported notation: '$1$2'" if $_regex_syntax_unsupported{$1||''}{$2}; |
231
|
75
|
|
|
|
|
104
|
my $ch; |
232
|
75
|
100
|
100
|
|
|
181
|
if ($1 && defined (my $equiv= $_parse_regex_backslash{$2})) { |
233
|
5
|
100
|
|
|
|
20
|
$ch= chr(ref $equiv? $equiv->() : $equiv); |
234
|
|
|
|
|
|
|
} else { |
235
|
70
|
|
|
|
|
120
|
$ch= $2; |
236
|
|
|
|
|
|
|
} |
237
|
75
|
100
|
66
|
|
|
266
|
if ($flags->{i} && (uc $ch ne lc $ch)) { |
|
|
100
|
100
|
|
|
|
|
238
|
3
|
|
|
|
|
13
|
push @$expr, $self->_charset_node(chars => [uc $ch, lc $ch], $flags); |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
elsif (@$expr && !ref $expr->[-1]) { |
241
|
22
|
|
|
|
|
64
|
$expr->[-1] .= $ch; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
else { |
244
|
50
|
|
|
|
|
102
|
push @$expr, $ch; |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
else { |
248
|
40
|
|
|
|
|
52
|
last; # end of string |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
} |
251
|
91
|
50
|
100
|
|
|
495
|
return @or? do { push @or, $self->_node($expr, $flags) if @$expr; $self->_or_node(\@or, $flags) } |
|
3
|
100
|
|
|
|
10
|
|
|
3
|
100
|
|
|
|
13
|
|
252
|
|
|
|
|
|
|
: (@$expr > 1 || !ref $expr->[0])? $self->_node($expr, $flags) |
253
|
|
|
|
|
|
|
: $expr->[0]; |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
#---------------------------------- |
257
|
|
|
|
|
|
|
# Factory Functions for Parse Nodes |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
sub _node { |
260
|
49
|
|
|
49
|
|
102
|
my ($self, $pattern, $flags)= @_; |
261
|
49
|
|
|
|
|
157
|
Mock::Data::Regex::ParseNode->new({ pattern => $pattern, flags => $flags }); |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
sub _or_node { |
264
|
3
|
|
|
3
|
|
7
|
my ($self, $or_list, $flags)= @_; |
265
|
3
|
|
|
|
|
18
|
Mock::Data::Regex::ParseNode::Or->new({ pattern => $or_list, flags => $flags }); |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
sub _charset_node { |
268
|
14
|
|
|
14
|
|
23
|
my $self= shift; |
269
|
14
|
|
|
|
|
21
|
my $flags= pop; |
270
|
14
|
100
|
|
|
|
88
|
Mock::Data::Regex::ParseNode::Charset->new({ |
271
|
|
|
|
|
|
|
pattern => @_ > 1? { @_ } : shift, |
272
|
|
|
|
|
|
|
flags => $flags |
273
|
|
|
|
|
|
|
}); |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
sub _assertion_node { |
276
|
14
|
|
|
14
|
|
22
|
my $self= shift; |
277
|
14
|
|
|
|
|
59
|
Mock::Data::Regex::ParseNode::Assertion->new({ @_ }); |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
sub _str_builder { |
280
|
205
|
|
|
205
|
|
365
|
my ($self, $mockdata, $opts)= @_; |
281
|
205
|
|
|
|
|
763
|
Mock::Data::Regex::StrBuilder->new({ |
282
|
|
|
|
|
|
|
mockdata => $mockdata, |
283
|
|
|
|
|
|
|
generator => $self, |
284
|
|
|
|
|
|
|
opts => $opts, |
285
|
|
|
|
|
|
|
}); |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
sub _fake_inc { |
289
|
60
|
|
|
60
|
|
300
|
(my $pkg= caller) =~ s,::,/,g; |
290
|
60
|
|
|
|
|
210
|
$INC{$pkg.'.pm'}= $INC{'Mock/Data/Generator/Regex.pm'}; |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
# ------------------------------ Regex Parse Node ------------------------------------- |
294
|
|
|
|
|
|
|
# The regular parse nodes hold a "pattern" which is an arrayref of literal strings |
295
|
|
|
|
|
|
|
# or nested parse nodes. It supports a "repetition" flag to handle min/max repetitions |
296
|
|
|
|
|
|
|
# of the node as a whole. |
297
|
|
|
|
|
|
|
# Other subclasses are used to handle OR-lists, charsets, and zero-width assertions. |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
package # Do not index |
300
|
|
|
|
|
|
|
Mock::Data::Regex::ParseNode; |
301
|
|
|
|
|
|
|
Mock::Data::Regex::_fake_inc(); |
302
|
|
|
|
|
|
|
|
303
|
66
|
|
|
66
|
0
|
234
|
sub new { bless $_[1], $_[0] } |
304
|
|
|
|
|
|
|
|
305
|
22
|
|
|
22
|
0
|
63
|
sub flags { $_[0]{flags} } |
306
|
|
|
|
|
|
|
sub repetition { |
307
|
436
|
100
|
|
436
|
0
|
2957
|
if (@_ > 1) { |
308
|
|
|
|
|
|
|
# If a quantifier is being applied to a thing that already had a quantifier |
309
|
|
|
|
|
|
|
# (such as /(X*){2}/ ) |
310
|
|
|
|
|
|
|
# multiply them |
311
|
24
|
|
|
|
|
35
|
my $val= $_[1]; |
312
|
24
|
50
|
|
|
|
51
|
if (my $rep= $_[0]{repetition}) { |
313
|
|
|
|
|
|
|
$rep->[$_]= (defined $rep->[$_] && defined $val->[$_]? $rep->[$_] * $val->[$_] : undef) |
314
|
0
|
0
|
0
|
|
|
0
|
for 0, 1; |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
else { |
317
|
24
|
|
|
|
|
45
|
$_[0]{repetition}= $_[1]; |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
} |
320
|
|
|
|
|
|
|
return $_[0]{repetition} |
321
|
436
|
|
|
|
|
840
|
} |
322
|
|
|
|
|
|
|
sub min_repetition { |
323
|
0
|
0
|
|
0
|
0
|
0
|
$_[0]{repetition}? $_[0]{repetition}[0] : 1 |
324
|
|
|
|
|
|
|
} |
325
|
|
|
|
|
|
|
sub max_repetition { |
326
|
0
|
0
|
|
0
|
0
|
0
|
$_[0]{repetition}? $_[0]{repetition}[1] : 1 |
327
|
|
|
|
|
|
|
} |
328
|
220
|
|
|
220
|
0
|
10233
|
sub pattern { $_[0]{pattern} } |
329
|
|
|
|
|
|
|
sub generate { |
330
|
329
|
|
|
329
|
0
|
475
|
my ($self, $out)= @_; |
331
|
329
|
100
|
|
|
|
557
|
if (my $rep= $self->repetition) { |
332
|
83
|
|
|
|
|
165
|
my ($min, $n)= ($rep->[0], $out->_random_rep_count($rep)); |
333
|
83
|
|
|
|
|
199
|
for (1 .. $n) { |
334
|
349
|
100
|
|
|
|
666
|
my $origin= $_ > $min? $out->mark : undef; |
335
|
|
|
|
|
|
|
# Plain nodes expect the pattern to be an arrayref where each item is a parse node or a literal |
336
|
349
|
|
|
|
|
450
|
my $success= 1; |
337
|
349
|
|
|
|
|
416
|
for (@{ $self->{pattern} }) { |
|
349
|
|
|
|
|
545
|
|
338
|
389
|
100
|
100
|
|
|
937
|
$success &&= ref $_? $_->generate($out) : $out->append($_); |
339
|
|
|
|
|
|
|
} |
340
|
349
|
100
|
|
|
|
768
|
next if $success; |
341
|
|
|
|
|
|
|
# This repetition failed, but did we meet the requirement already? |
342
|
20
|
50
|
|
|
|
34
|
if ($origin) { |
343
|
20
|
|
|
|
|
46
|
$out->reset($origin); |
344
|
20
|
|
|
|
|
63
|
return 1; |
345
|
|
|
|
|
|
|
} |
346
|
0
|
|
|
|
|
0
|
return 0; |
347
|
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
else { |
350
|
|
|
|
|
|
|
# Plain nodes expect the pattern to be an arrayref where each item is a parse node or a literal |
351
|
246
|
|
|
|
|
326
|
for (@{ $self->{pattern} }) { |
|
246
|
|
|
|
|
443
|
|
352
|
424
|
100
|
|
|
|
923
|
return 0 unless ref $_? $_->generate($out) : $out->append($_); |
|
|
100
|
|
|
|
|
|
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
} |
355
|
301
|
|
|
|
|
818
|
return 1; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
# --------------------------------- Regex "OR" Parse Node ---------------------------- |
359
|
|
|
|
|
|
|
# This parse holds a list of options in ->pattern. It chooses one of the options at |
360
|
|
|
|
|
|
|
# random, but then can backtrack if inner parse nodes were not able to match. |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
package # Do not index |
363
|
|
|
|
|
|
|
Mock::Data::Regex::ParseNode::Or; |
364
|
|
|
|
|
|
|
Mock::Data::Regex::_fake_inc(); |
365
|
|
|
|
|
|
|
our @ISA= ('Mock::Data::Regex::ParseNode'); |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
sub generate { |
368
|
30
|
|
|
30
|
0
|
50
|
my ($self, $out)= @_; |
369
|
30
|
|
|
|
|
47
|
my ($min, $n)= (1,1); |
370
|
30
|
50
|
|
|
|
73
|
if (my $rep= $self->{repetition}) { |
371
|
30
|
|
|
|
|
43
|
$min= $rep->[0]; |
372
|
30
|
|
|
|
|
51
|
$n= $out->_random_rep_count($rep); |
373
|
|
|
|
|
|
|
} |
374
|
30
|
|
|
|
|
67
|
rep: for (1 .. $n) { |
375
|
|
|
|
|
|
|
# OR nodes expect the pattern to be an arrayref where each item is an option |
376
|
|
|
|
|
|
|
# for what could be appended. Need to reset the output after each attempt. |
377
|
119
|
|
|
|
|
211
|
my $origin= $out->mark; |
378
|
|
|
|
|
|
|
# Pick one at random. It will almost always work on the first try, unless the user |
379
|
|
|
|
|
|
|
# has anchor constraints in the pattern. |
380
|
119
|
|
|
|
|
198
|
my $or= $self->pattern; |
381
|
119
|
|
|
|
|
221
|
my $pick= $or->[ rand scalar @$or ]; |
382
|
119
|
50
|
|
|
|
239
|
next rep if ref $pick? $pick->generate($out) : $out->append($pick); |
|
|
100
|
|
|
|
|
|
383
|
|
|
|
|
|
|
# if it fails, try all the others in random order |
384
|
8
|
|
|
|
|
16
|
for (List::Util::shuffle(grep { $_ != $pick } @$or)) { |
|
16
|
|
|
|
|
42
|
|
385
|
|
|
|
|
|
|
# reset output |
386
|
8
|
|
|
|
|
21
|
$out->reset($origin); |
387
|
|
|
|
|
|
|
# append something new |
388
|
8
|
50
|
|
|
|
21
|
next rep if ref $_? $_->generate($out) : $out->append($_); |
|
|
50
|
|
|
|
|
|
389
|
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
|
# None of the options succeeded. Did we get enough reps already? |
391
|
0
|
0
|
|
|
|
0
|
if ($_ > $min) { |
392
|
0
|
|
|
|
|
0
|
$out->reset($origin); |
393
|
0
|
|
|
|
|
0
|
return 1; |
394
|
|
|
|
|
|
|
} |
395
|
0
|
|
|
|
|
0
|
return 0; |
396
|
|
|
|
|
|
|
} |
397
|
30
|
|
|
|
|
78
|
return 1; |
398
|
|
|
|
|
|
|
} |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
# -------------------------------- Regex Charset Parse Node --------------------------- |
401
|
|
|
|
|
|
|
# This node's ->pattern is an instance of Charset. It returns one character |
402
|
|
|
|
|
|
|
# from the set, but also has an optimized handling of the ->repetition flag that generates |
403
|
|
|
|
|
|
|
# multiple characters at once. |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
package # Do not index |
406
|
|
|
|
|
|
|
Mock::Data::Regex::ParseNode::Charset; |
407
|
|
|
|
|
|
|
Mock::Data::Regex::_fake_inc(); |
408
|
|
|
|
|
|
|
our @ISA= ('Mock::Data::Regex::ParseNode'); |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
sub new { |
411
|
14
|
|
|
14
|
0
|
32
|
my ($class, $self)= @_; |
412
|
14
|
50
|
|
|
|
42
|
if (ref $self->{pattern} eq 'HASH') { |
413
|
14
|
100
|
|
|
|
35
|
$self->{pattern}{max_codepoint}= 0x7F if $self->{flags}{a}; |
414
|
14
|
|
|
|
|
43
|
$self->{pattern}= Mock::Data::Util::charset($self->{pattern}); |
415
|
|
|
|
|
|
|
} |
416
|
14
|
|
|
|
|
56
|
bless $self, $class; |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
sub generate { |
420
|
76
|
|
|
76
|
0
|
136
|
my ($self, $out)= @_; |
421
|
|
|
|
|
|
|
# Check whether output has a restriction in effect: |
422
|
76
|
50
|
|
|
|
135
|
if (my $req= $out->next_req) { |
423
|
|
|
|
|
|
|
# pick the first requirement which can be matched by this charset |
424
|
0
|
|
|
|
|
0
|
for (@$req) { |
425
|
0
|
0
|
|
|
|
0
|
if (!ref) { |
426
|
|
|
|
|
|
|
# At \Z, can still match if rep count is 0 |
427
|
0
|
0
|
0
|
|
|
0
|
return 1 if length == 0 && $self->min_repetition == 0; |
428
|
0
|
0
|
0
|
|
|
0
|
return $out->append($_) if |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
429
|
|
|
|
|
|
|
length == 1 && $self->pattern->has_member($_) |
430
|
|
|
|
|
|
|
or length > 1 && !(grep !$self->pattern->has_member($_), split //); |
431
|
|
|
|
|
|
|
} |
432
|
|
|
|
|
|
|
} |
433
|
0
|
|
|
|
|
0
|
return 0; |
434
|
|
|
|
|
|
|
} |
435
|
76
|
|
|
|
|
148
|
my $n= $out->_random_rep_count($self->repetition); |
436
|
76
|
|
|
|
|
164
|
return $out->append($self->pattern->generate($out->mockdata, $out->opts, $n)); |
437
|
|
|
|
|
|
|
} |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
# ----------------------------- Regex Assertion Parse Node ------------------------------- |
440
|
|
|
|
|
|
|
# This node doesn't have a ->pattern, and instead holds constraints about what characters |
441
|
|
|
|
|
|
|
# must occur around the current position. Right now it only handles '^' and '$' and '\Z' |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
package # Do not index |
444
|
|
|
|
|
|
|
Mock::Data::Regex::ParseNode::Assertion; |
445
|
|
|
|
|
|
|
Mock::Data::Regex::_fake_inc(); |
446
|
|
|
|
|
|
|
our @ISA= ('Mock::Data::Regex::ParseNode'); |
447
|
|
|
|
|
|
|
|
448
|
0
|
|
|
0
|
0
|
0
|
sub start { $_[0]{start} } |
449
|
0
|
|
|
0
|
0
|
0
|
sub end { $_[0]{end} } |
450
|
|
|
|
|
|
|
sub generate { |
451
|
83
|
|
|
83
|
0
|
141
|
my ($self, $out)= @_; |
452
|
83
|
100
|
|
|
|
174
|
if ($self->{start}) { |
453
|
|
|
|
|
|
|
# Previous character must either be start of string or a newline |
454
|
|
|
|
|
|
|
length $out->str == 0 |
455
|
44
|
100
|
66
|
|
|
76
|
or ($self->{start} eq 'LF' && substr($out->str,-1) eq "\n") |
|
|
|
100
|
|
|
|
|
456
|
|
|
|
|
|
|
or return 0; |
457
|
|
|
|
|
|
|
# Set flag on entire output if this is the first assertion |
458
|
36
|
100
|
66
|
|
|
74
|
$out->start($self->{start}) if length $out->str == 0 && !$out->start; |
459
|
|
|
|
|
|
|
} |
460
|
75
|
100
|
|
|
|
144
|
if ($self->{end}) { |
461
|
|
|
|
|
|
|
# Next character must be a newline, or end of the output |
462
|
|
|
|
|
|
|
# end=1 results from \Z and does not allow the newline |
463
|
39
|
50
|
|
|
|
162
|
$out->require(['',"\n"]) unless $self->{end} eq 1; |
464
|
|
|
|
|
|
|
# If end=LF, the end of string is no longer mandatory once "\n" has been matched. |
465
|
39
|
100
|
|
|
|
94
|
$out->require(['']) unless $self->{end} eq 'LF'; |
466
|
|
|
|
|
|
|
} |
467
|
75
|
|
|
|
|
171
|
return 1; |
468
|
|
|
|
|
|
|
} |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
# ------------------------ String Builder ----------------------------------- |
471
|
|
|
|
|
|
|
# This class constructs an output string in ->{str}, and also performs checks |
472
|
|
|
|
|
|
|
# needed by the assertions like ^ and $. It also has the ability to mark a |
473
|
|
|
|
|
|
|
# position and then revert to that position, without copying the entire string |
474
|
|
|
|
|
|
|
# each time. |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
package # Do not index |
477
|
|
|
|
|
|
|
Mock::Data::Regex::StrBuilder; |
478
|
|
|
|
|
|
|
Mock::Data::Regex::_fake_inc(); |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
sub new { |
481
|
205
|
|
|
205
|
0
|
398
|
my ($class, $self)= @_; |
482
|
205
|
|
50
|
|
|
736
|
$self->{str} //= ''; |
483
|
205
|
|
|
|
|
470
|
bless $self, $class; |
484
|
|
|
|
|
|
|
} |
485
|
|
|
|
|
|
|
|
486
|
76
|
|
|
76
|
0
|
143
|
sub mockdata { $_[0]{mockdata} } # Mock::Data instance |
487
|
0
|
|
|
0
|
0
|
0
|
sub generator { $_[0]{generator} } |
488
|
199
|
|
|
199
|
0
|
523
|
sub opts { $_[0]{opts} } |
489
|
477
|
100
|
|
477
|
0
|
880
|
sub start { $_[0]{start}= $_[1] if @_ > 1; $_[0]{start} } |
|
477
|
|
|
|
|
984
|
|
490
|
310
|
|
|
310
|
0
|
1267
|
sub str { $_[0]{str} } # string being built |
491
|
|
|
|
|
|
|
sub _random_rep_count { |
492
|
189
|
|
|
189
|
|
282
|
my ($self, $rep)= @_; |
493
|
189
|
100
|
|
|
|
392
|
return 1 unless defined $rep; |
494
|
149
|
100
|
|
|
|
346
|
return $rep->[0] + int rand($rep->[1] - $rep->[0] + 1) |
495
|
|
|
|
|
|
|
if defined $rep->[1]; |
496
|
123
|
|
50
|
|
|
220
|
my $range= $self->opts->{max_repetition} // '+8'; |
497
|
123
|
50
|
|
|
|
593
|
return $rep->[0] + int rand($range+1) |
498
|
|
|
|
|
|
|
if ord $range == ord '+'; |
499
|
0
|
|
|
|
|
0
|
$range -= $rep->[0]; |
500
|
0
|
0
|
|
|
|
0
|
return $range > 0? $rep->[0] + int rand($range+1) : $rep->[0]; |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
sub require { |
504
|
61
|
100
|
|
61
|
0
|
122
|
push @{ $_[0]{require} }, $_[1] if @_ > 1; |
|
58
|
|
|
|
|
127
|
|
505
|
61
|
|
|
|
|
107
|
return $_[0]{require}; |
506
|
|
|
|
|
|
|
} |
507
|
|
|
|
|
|
|
sub next_req { |
508
|
1043
|
|
66
|
1043
|
0
|
2741
|
return $_[0]{require} && $_[0]{require}[0]; |
509
|
|
|
|
|
|
|
} |
510
|
|
|
|
|
|
|
sub append { |
511
|
683
|
|
|
683
|
0
|
1115
|
my ($self, $content)= @_; |
512
|
683
|
100
|
|
|
|
1067
|
if (my $req= $self->next_req) { |
513
|
|
|
|
|
|
|
# the provided output must be coerced to one of these options, if possible |
514
|
|
|
|
|
|
|
# TODO: need new ideas for this code. Or just give up on the plan of supporting |
515
|
|
|
|
|
|
|
# lookaround assertions and focus on a simple implemention of "\n" checks for ^/$ |
516
|
20
|
|
|
|
|
36
|
for (@$req) { |
517
|
40
|
50
|
|
|
|
62
|
if (!ref) { # next text must match a literal string. '' means end-of-string |
518
|
40
|
50
|
66
|
|
|
111
|
if (length && $content eq $_) { |
519
|
0
|
|
|
|
|
0
|
$self->{str} .= $content; |
520
|
0
|
|
|
|
|
0
|
shift @{ $self->require }; # requirement complete |
|
0
|
|
|
|
|
0
|
|
521
|
0
|
|
|
|
|
0
|
return 1; |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
} |
524
|
|
|
|
|
|
|
else { |
525
|
|
|
|
|
|
|
# TODO: support for "lookaround" assertions, will require regex match |
526
|
0
|
|
|
|
|
0
|
die "Unimplemented: zero-width lookaround assertions"; |
527
|
|
|
|
|
|
|
} |
528
|
|
|
|
|
|
|
} |
529
|
20
|
|
|
|
|
54
|
return 0; # no match found for the restriction in effect |
530
|
|
|
|
|
|
|
} |
531
|
663
|
|
|
|
|
1186
|
$self->{str} .= $content; |
532
|
663
|
|
|
|
|
1555
|
return 1; |
533
|
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
sub mark { |
535
|
400
|
|
|
400
|
0
|
526
|
my $self= shift; |
536
|
400
|
|
|
|
|
675
|
my $len= $self->{lastmark}= length $self->{str}; |
537
|
400
|
|
|
|
|
545
|
my $req= $self->{require}; |
538
|
400
|
100
|
|
|
|
861
|
return [ \$self->{str}, $len, $req? [ @$req ] : undef, $self->start ]; |
539
|
|
|
|
|
|
|
} |
540
|
|
|
|
|
|
|
sub reset { |
541
|
28
|
|
|
28
|
0
|
44
|
my ($self, $origin)= @_; |
542
|
|
|
|
|
|
|
# If the string is a different instance than before, go back to that instance |
543
|
0
|
|
|
|
|
0
|
Hash::Util::hv_store(%$self, 'str', ${$origin->[0]}) |
544
|
28
|
50
|
|
|
|
72
|
unless \$self->{str} == $origin->[0]; |
545
|
|
|
|
|
|
|
# Reset the string to the original length |
546
|
28
|
|
|
|
|
66
|
substr($self->{str}, $origin->[1])= ''; |
547
|
28
|
|
|
|
|
53
|
$self->{require}= $origin->[2]; |
548
|
28
|
|
|
|
|
48
|
$self->{start}= $origin->[3]; |
549
|
|
|
|
|
|
|
} |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
1; |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
__END__ |