line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Data::Validate::Perl; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
57665
|
use 5.006; |
|
1
|
|
|
|
|
4
|
|
4
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
20
|
|
5
|
1
|
|
|
1
|
|
4
|
use warnings FATAL => 'all'; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
36
|
|
6
|
1
|
|
|
1
|
|
5
|
use Carp; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
1387
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
our $VERSION = '0.03'; |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our @ISA = qw/Exporter/; |
11
|
|
|
|
|
|
|
our @EXPORT = qw/gen_yp_rules/; |
12
|
|
|
|
|
|
|
our @EXPORT_OK = qw/gen_yp_rules/; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Data::Validate::Perl - validates in-memory perl data using a specification |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
Continue reading only when you want to generate L grammar |
21
|
|
|
|
|
|
|
from the specification file and patch it, or understand how it works |
22
|
|
|
|
|
|
|
internally, else please look into C command-line |
23
|
|
|
|
|
|
|
utility documentation instead. |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
use Data::Validate::Perl qw/gen_yp_rules/; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
my $yapp_grammar = gen_yp_rules($spec_file); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 EXPORTS |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=over |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=item gen_yp_rules |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=back |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 SUBROUTINES |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=over |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=item gen_yp_rules |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
This function contains the main logic to parse the data specification |
44
|
|
|
|
|
|
|
and translate it to L grammar. Returns the grammar string |
45
|
|
|
|
|
|
|
on success. |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=back |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=cut |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
sub gen_yp_rules { |
53
|
0
|
|
|
0
|
1
|
|
my ( $spec_file, ) = @_; |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# contains all the rules defined |
56
|
0
|
|
|
|
|
|
my %rule = (); |
57
|
|
|
|
|
|
|
# contains all the rules being references in rule body |
58
|
0
|
|
|
|
|
|
my %rule_required = (); |
59
|
0
|
|
|
|
|
|
my $start; |
60
|
|
|
|
|
|
|
{ |
61
|
0
|
|
|
|
|
|
my %lhs_type = ('%' => 'HASH', '@' => 'ARRAY', '$' => 'SCALAR'); |
|
0
|
|
|
|
|
|
|
62
|
0
|
|
|
|
|
|
my %rhs_type = (%lhs_type, '\'' => 'SYMBOL',); |
63
|
0
|
|
|
|
|
|
my $lhs_type_regex = join('|', map { '\\'. $_ } sort keys %lhs_type); |
|
0
|
|
|
|
|
|
|
64
|
0
|
|
|
|
|
|
my $rhs_type_regex = join('|', map { '\\'. $_ } sort keys %rhs_type); |
|
0
|
|
|
|
|
|
|
65
|
0
|
0
|
|
|
|
|
open my $F, '<', $spec_file or croak "cannot open file to read: $!"; |
66
|
0
|
|
|
|
|
|
my %rule_map = (); |
67
|
0
|
|
|
|
|
|
while (<$F>) { |
68
|
0
|
|
|
|
|
|
chomp; |
69
|
0
|
0
|
|
|
|
|
next if /^\s*#/io; |
70
|
0
|
|
|
|
|
|
my @l = split /\s*\:\s*/io, $_; |
71
|
0
|
0
|
|
|
|
|
croak "invalid rule line: $_" if @l != 2; |
72
|
0
|
|
|
|
|
|
my ( $k, $v ) = @l; |
73
|
0
|
0
|
|
|
|
|
croak "invalid rule name: $k" if $k !~ /^($lhs_type_regex)(\w+)$/io; |
74
|
|
|
|
|
|
|
# key = name:type |
75
|
0
|
|
|
|
|
|
my $type = $lhs_type{$1}; |
76
|
0
|
|
|
|
|
|
my $name = $2; |
77
|
0
|
|
|
|
|
|
my $key = join(':', $name, $type); |
78
|
0
|
|
|
|
|
|
my @v = (); |
79
|
0
|
|
|
|
|
|
foreach my $i (split /\s+/io, $v) { |
80
|
0
|
0
|
|
|
|
|
if ($i =~ /^($rhs_type_regex)(?:\(((?:\w+|\*))\))?(\w+)$/io) { |
81
|
0
|
|
|
|
|
|
my $t = $rhs_type{$1}; |
82
|
0
|
|
|
|
|
|
my $k = $2; |
83
|
0
|
|
|
|
|
|
my $n = $3; |
84
|
0
|
0
|
0
|
|
|
|
croak "left-hand side must be a hash: $name" if $k and $type ne 'HASH'; |
85
|
0
|
0
|
0
|
|
|
|
$k = $n if $type eq 'HASH' and !$k; |
86
|
|
|
|
|
|
|
# [ name, type ] |
87
|
0
|
0
|
|
|
|
|
push @v, $type eq 'HASH' ? [ $n, $t, $k ] : [ $n, $t ]; |
88
|
0
|
|
|
|
|
|
$rule_required{join(':', $n, $t)} = $t; |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
else { |
91
|
0
|
|
|
|
|
|
croak "invalid rule item: $i"; |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
} |
94
|
0
|
0
|
|
|
|
|
croak "duplicate rule declaration: $k" if exists $rule_map{$key}; |
95
|
0
|
|
|
|
|
|
$rule{$key} = [ @v ]; |
96
|
|
|
|
|
|
|
# first declared rule is start |
97
|
0
|
0
|
|
|
|
|
$start = $key if !defined $start; |
98
|
0
|
|
|
|
|
|
$rule_map{$key}++; |
99
|
|
|
|
|
|
|
} |
100
|
0
|
|
|
|
|
|
close $F; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
# create the rules which have been referenced but not declared |
103
|
|
|
|
|
|
|
# they are simple arrays or hashes which contains text key/value |
104
|
0
|
|
|
|
|
|
foreach my $k (keys %rule_required) { |
105
|
0
|
0
|
|
|
|
|
if (!exists $rule{$k}) { |
106
|
0
|
0
|
0
|
|
|
|
if ($rule_required{$k} eq 'ARRAY' or $rule_required{$k} eq 'HASH') { |
|
|
0
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# simple array or hash |
108
|
0
|
|
|
|
|
|
$rule{$k} = []; |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
elsif ($rule_required{$k} eq 'SCALAR') { |
111
|
0
|
|
|
|
|
|
$rule{$k} = []; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
} |
115
|
0
|
0
|
|
|
|
|
croak "$start rule declaration not found" if !exists $rule{$start}; |
116
|
|
|
|
|
|
|
# if ($::opt{d}) { |
117
|
|
|
|
|
|
|
# require Data::Dumper; |
118
|
|
|
|
|
|
|
# no warnings 'once'; |
119
|
|
|
|
|
|
|
# local $Data::Dumper::Indent = 1; |
120
|
|
|
|
|
|
|
# print STDERR Data::Dumper::Dumper(\%rule), "\n"; |
121
|
|
|
|
|
|
|
# } |
122
|
|
|
|
|
|
|
|
123
|
0
|
|
|
|
|
|
my $yapp = "%%\n"; |
124
|
0
|
|
|
|
|
|
my $count = 0; |
125
|
0
|
|
|
|
|
|
my @stack = ( [ $start, $count++ ], ); |
126
|
|
|
|
|
|
|
my $cb_process_children = sub { |
127
|
0
|
|
|
0
|
|
|
my ( $children, ) = @_; |
128
|
|
|
|
|
|
|
|
129
|
0
|
|
|
|
|
|
for (my $i = 0; $i < @$children; $i++) { |
130
|
0
|
|
|
|
|
|
my $child= $children->[$i]; |
131
|
0
|
|
|
|
|
|
my $key = join(':', $child->[0], $child->[1]); |
132
|
0
|
|
|
|
|
|
my $name = $child->[0]; |
133
|
0
|
|
|
|
|
|
my $type = $child->[1]; |
134
|
0
|
|
|
|
|
|
my $cnt = $count++; |
135
|
0
|
0
|
|
|
|
|
if (exists $rule{$key}) { |
136
|
0
|
0
|
0
|
|
|
|
if ($type eq 'ARRAY' or $type eq 'HASH') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
137
|
0
|
|
|
|
|
|
push @stack, [ $key, $cnt ]; |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
elsif ($type eq 'SCALAR') { |
140
|
0
|
|
|
|
|
|
push @stack, [ $key, $cnt ]; |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
elsif ($type eq 'SYMBOL') { |
143
|
|
|
|
|
|
|
# NOOP: skip |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
else { |
146
|
0
|
|
|
|
|
|
croak "unknown rule type of $name: $type"; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
else { |
150
|
0
|
|
|
|
|
|
croak "internal state error, no such rule: $key"; |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
} |
153
|
0
|
|
|
|
|
|
}; |
154
|
0
|
|
|
|
|
|
my $has_list_enum = 0; |
155
|
0
|
|
|
|
|
|
my $has_scalar_enum = 0; |
156
|
0
|
|
|
|
|
|
my $has_simple_hash = 0; |
157
|
0
|
|
|
|
|
|
my $rule_format = 'rule%04d'; |
158
|
0
|
|
|
|
|
|
while (@stack) { |
159
|
0
|
|
|
|
|
|
my $item = shift @stack; |
160
|
0
|
|
|
|
|
|
my $k = $item->[0]; |
161
|
0
|
|
|
|
|
|
my $c = $item->[1]; |
162
|
|
|
|
|
|
|
|
163
|
0
|
|
|
|
|
|
my ( $name, $type, ) = split /:/io, $k, 2; |
164
|
0
|
|
|
|
|
|
my $children = $rule{$k}; |
165
|
0
|
|
|
|
|
|
my $rule = sprintf($rule_format, $c); |
166
|
0
|
0
|
|
|
|
|
if ($type eq 'HASH') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# there shouldn't be any enum (such as 'value) in hash declaration |
168
|
0
|
0
|
|
|
|
|
croak "invalid hash declaration for $name: scalar item found" if grep { $_->[1] eq 'SYMBOL' } @$children; |
|
0
|
|
|
|
|
|
|
169
|
0
|
0
|
|
|
|
|
if (@$children == 0) { |
170
|
|
|
|
|
|
|
# simple hash |
171
|
0
|
|
|
|
|
|
$has_simple_hash++; |
172
|
0
|
|
|
|
|
|
$yapp .= "$rule: '{' my_begin_simple_hash ${rule}_elements my_end_simple_hash '}';\n"; |
173
|
0
|
|
|
|
|
|
$yapp .= "${rule}_elements: TEXT ${rule}_elements | TEXT;\n"; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
else { |
176
|
0
|
|
|
|
|
|
$yapp .= "$rule: '{' ${rule}_elements '}';\n"; |
177
|
0
|
|
|
|
|
|
$yapp .= "${rule}_elements: ${rule}_element ${rule}_elements | ${rule}_element;\n"; |
178
|
|
|
|
|
|
|
$yapp .= "${rule}_element: ". join( |
179
|
0
|
|
|
|
|
|
' | ', map { "'". $children->[$_][2]. "' ". sprintf($rule_format, $count+$_) } 0 .. $#{$children}). ";\n"; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
180
|
0
|
|
|
|
|
|
$cb_process_children->($children); |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
# NOREACH |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
elsif ($type eq 'ARRAY') { |
185
|
0
|
0
|
|
|
|
|
if (grep { $_->[1] eq 'SYMBOL' } @$children) { |
|
0
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
# enum array, all the children must be enum in this case |
187
|
|
|
|
|
|
|
croak "invalid array declaration for $name: non scalar item found" if |
188
|
0
|
0
|
|
|
|
|
grep { $_->[1] ne 'SYMBOL' } @$children; |
|
0
|
|
|
|
|
|
|
189
|
0
|
|
|
|
|
|
$has_list_enum++; |
190
|
0
|
|
|
|
|
|
$yapp .= "$rule: '[' my_begin_list_enum ${rule}_items my_end_list_enum ']';\n"; |
191
|
0
|
|
|
|
|
|
$yapp .= "${rule}_items: ${rule}_item ${rule}_items | ${rule}_item;\n"; |
192
|
0
|
|
|
|
|
|
$yapp .= "${rule}_item: ". join(' | ', map { "'$_'" } map { $_->[0] } @$children). ";\n"; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
else { |
195
|
0
|
|
|
|
|
|
$yapp .= "$rule: '[' ${rule}_items ']';\n"; |
196
|
0
|
0
|
|
|
|
|
if (@$children == 0) { |
197
|
|
|
|
|
|
|
# simple array |
198
|
0
|
|
|
|
|
|
$yapp .= "${rule}_items: TEXT ${rule}_items | TEXT;\n"; |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
else { |
201
|
0
|
|
|
|
|
|
$yapp .= "${rule}_items: ${rule}_item ${rule}_items | ${rule}_item;\n"; |
202
|
|
|
|
|
|
|
$yapp .= "${rule}_item: ". join( |
203
|
0
|
|
|
|
|
|
' | ', map { sprintf($rule_format, $count+$_) } 0 .. $#{$children}). ";\n"; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
204
|
0
|
|
|
|
|
|
$cb_process_children->($children); |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
elsif ($type eq 'SCALAR') { |
209
|
0
|
0
|
|
|
|
|
if (@$children == 0) { |
210
|
0
|
|
|
|
|
|
$yapp .= "${rule}: ;\n"; |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
else { |
213
|
0
|
0
|
|
|
|
|
croak "only constant values permitted for scalar rule" if grep { $_->[1] ne 'SYMBOL' } @$children; |
|
0
|
|
|
|
|
|
|
214
|
0
|
|
|
|
|
|
$has_scalar_enum++; |
215
|
0
|
|
|
|
|
|
$yapp .= "${rule}: my_begin_scalar_enum ${rule}_value my_end_scalar_enum;\n"; |
216
|
0
|
|
|
|
|
|
$yapp .= "${rule}_value: ". join(' | ', map { "'". $_->[0]. "'" } @$children). ";\n"; |
|
0
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
elsif ($type eq 'SYMBOL') { |
220
|
|
|
|
|
|
|
# there shouldn't be any symbol item being pushed onto stack |
221
|
0
|
|
|
|
|
|
croak "internal state error: $type item on stack"; |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
else { |
224
|
0
|
|
|
|
|
|
croak "unknown type of rule $name: $type"; |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
} |
227
|
0
|
0
|
|
|
|
|
$yapp .= <<'EOL' if $has_list_enum; |
228
|
|
|
|
|
|
|
my_begin_list_enum: { $_[0]->YYData->{_flag}->{list_enum} = 1 }; |
229
|
|
|
|
|
|
|
my_end_list_enum: { $_[0]->YYData->{_flag}->{list_enum} = 0 }; |
230
|
|
|
|
|
|
|
EOL |
231
|
0
|
0
|
|
|
|
|
$yapp .= <<'EOL' if $has_simple_hash; |
232
|
|
|
|
|
|
|
my_begin_simple_hash: { $_[0]->YYData->{_flag}->{simple_hash} = 1 }; |
233
|
|
|
|
|
|
|
my_end_simple_hash : { $_[0]->YYData->{_flag}->{simple_hash} = 0 }; |
234
|
|
|
|
|
|
|
EOL |
235
|
0
|
0
|
|
|
|
|
$yapp .= <<'EOL' if $has_scalar_enum; |
236
|
|
|
|
|
|
|
my_begin_scalar_enum: { $_[0]->YYData->{_flag}->{scalar_enum} = 1 }; |
237
|
|
|
|
|
|
|
my_end_scalar_enum : { $_[0]->YYData->{_flag}->{scalar_enum} = 0 }; |
238
|
|
|
|
|
|
|
EOL |
239
|
0
|
|
|
|
|
|
$yapp .= "%%\n"; |
240
|
0
|
0
|
|
|
|
|
print STDERR $yapp if $::opt{v}; |
241
|
0
|
|
|
|
|
|
$yapp .= do { local $/; }; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
242
|
0
|
|
|
|
|
|
return $yapp; |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=head1 DESCRIPTION |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
In order to understand internal of this module, working knowledge of |
248
|
|
|
|
|
|
|
parsing, especially Yacc is required. Stop and grab a book on topic if |
249
|
|
|
|
|
|
|
you are unsure what this is. |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
A common parsing mechanism applies state machine onto a string, such |
252
|
|
|
|
|
|
|
as regular expression. This part is easy to follow. In this module a |
253
|
|
|
|
|
|
|
Yacc state machine is used, the target is not plain text but a |
254
|
|
|
|
|
|
|
in-memory data structure - a tree made up by several perl |
255
|
|
|
|
|
|
|
scalar/array/hash items. |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
The process to validate a data structure like that is a tree |
258
|
|
|
|
|
|
|
traversal. The biggest challenge is how to put these 2 things |
259
|
|
|
|
|
|
|
together. |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
The best way to figure a solution is, imagine each step to perform a |
262
|
|
|
|
|
|
|
depth-first iteration on a tree. Each move can be abstracted as a |
263
|
|
|
|
|
|
|
'token'. This is the key idea behind. |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
To elaborate, think how to validate a simple perl hash like below: |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
my %hash = (key1 => value1, key2 => value2, key3 => value3); |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
To iterate the hash key/value pairs, use a cursor to describe the |
270
|
|
|
|
|
|
|
following states: |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
1. initial state: place the cursor onto hash itself; |
273
|
|
|
|
|
|
|
2. 1st state: move cursor to key1; |
274
|
|
|
|
|
|
|
3. 2nd state: move cursor to value1; |
275
|
|
|
|
|
|
|
4. 3rd state: move cursor to key2; |
276
|
|
|
|
|
|
|
5. 4th state: move cursor to value2; |
277
|
|
|
|
|
|
|
6. 5th state: move cursor to key3; |
278
|
|
|
|
|
|
|
7. 6th state: move cursor to value3; |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
A draft Yacc grammar written as: |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
root_of_hash: key1 value1 | key2 value2 | key3 value3 |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
The state machine needs token to decide which sub-rule to walk |
285
|
|
|
|
|
|
|
into. Looking onto the key1/2/3, the corresponding token can |
286
|
|
|
|
|
|
|
simply be the value of themselves. That is: |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
root_of_hash: 'key1' value1 | 'key2' value2 | 'key3' value3 |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
Note the quotes, they mark key1/2/3 as tokens. Next move to the hash |
291
|
|
|
|
|
|
|
value. When the cursor points to a value, I do not care about the |
292
|
|
|
|
|
|
|
actual value, instead I just want to hint the state machine that it is |
293
|
|
|
|
|
|
|
a value. It requires another token to accept the state. How about a |
294
|
|
|
|
|
|
|
plain text token - 'TEXT'. Finally the grammar to be: |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
root_of_hash: 'key1' 'TEXT' | 'key2' 'TEXT' | 'key3' 'TEXT' |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
How to apply the generated state machine to the hash validation then? |
299
|
|
|
|
|
|
|
Each time the parser cannot determine which is next state, it asks the |
300
|
|
|
|
|
|
|
lexer for a token. The simplest form of a lexer is just a function to |
301
|
|
|
|
|
|
|
return the corresponding tokens for each state. At this point, you |
302
|
|
|
|
|
|
|
might be able to guess how it works: |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
1. state machine initialized, it wants to move to next state, so it asks lexer; |
305
|
|
|
|
|
|
|
2. the lexer holds the hash itself, it calls keys function, returns the first key as token, put the key returned into its memory; |
306
|
|
|
|
|
|
|
3. by the time state machine got key1, it moves the cursor onto 'key1', then asks lexer again; |
307
|
|
|
|
|
|
|
4. the lexer checks its memory and figures it returned 'key1' just now, time to return its vlaue, as the state machine has no interest on the actual value, it returns 'TEXT'; |
308
|
|
|
|
|
|
|
5. state machine finished the iteration of key1/value1 pair, asks for another token; |
309
|
|
|
|
|
|
|
6. lexer returns 'key2' and keeps it in its own memory; |
310
|
|
|
|
|
|
|
7. state machine steps into the sub-rule 'key2' 'TEXT'; |
311
|
|
|
|
|
|
|
... |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
The state loop is fairly straightforward. Parsing isn't that |
314
|
|
|
|
|
|
|
difficult, huh :-) |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
To iterate a nested tree full of scalar/array/hash, other tokens are |
317
|
|
|
|
|
|
|
introduced: |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
1. '[' ']' indicates start/end state of array traversal; |
320
|
|
|
|
|
|
|
2. '{' '}' indicates start/end state of hash traversal; |
321
|
|
|
|
|
|
|
3. to meet special need, certain rule actions are defined to set some state flags, which influence the decision that the lexer returns the value as 'TEXT', or the actual value string itself; |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
The state maintenance in lexer is made up by a stack, the stack |
324
|
|
|
|
|
|
|
simulates a depth-first traversal: |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
1. when meets array, iterates array items one by one, if any item is another array or hash, push current array onto the stack together with an index marking where we are in this array. Iterates that item recursively; |
327
|
|
|
|
|
|
|
2. similar strategy is applied to hash; |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
The left piece is a DSL to describe the tree structure. By the time |
330
|
|
|
|
|
|
|
you read here, I am fairly confident you are able to figure it out |
331
|
|
|
|
|
|
|
yourself by exercising various pieces of this module, below is a small |
332
|
|
|
|
|
|
|
leaf-note: |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
1. gen_yp_rules function handles translation from data structure spec to corresponding Yacc grammar; |
335
|
|
|
|
|
|
|
2. bottom section of this module contains the Lexer function and other routines L requires to work (browse the module source to read); |
336
|
|
|
|
|
|
|
3. the command-line utility C reads the spec file, calls gen_yp_rules to generate grammar, fits it into a file and calls C to create the parser module; |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
Wish you like this little article and enjoy playing with this module. |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
=head1 SEE ALSO |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
* L |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
=head1 AUTHOR |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
Dongxu Ma, C<< >> |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
=head1 BUGS |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
Please report any bugs or feature requests to C
|
351
|
|
|
|
|
|
|
at rt.cpan.org>, or through the web interface at |
352
|
|
|
|
|
|
|
L. |
353
|
|
|
|
|
|
|
I will be notified, and then you'll automatically be notified of |
354
|
|
|
|
|
|
|
progress on your bug as I make changes. |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
=head1 SUPPORT |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
perldoc Data::Validate::Perl |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
You can also look for information at: |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
=over 4 |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
L |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
L |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
=item * CPAN Ratings |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
L |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
=item * Search CPAN |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
L |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
=back |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
Copyright 2014 Dongxu Ma. |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
389
|
|
|
|
|
|
|
under the terms of the the Artistic License (2.0). You may obtain a |
390
|
|
|
|
|
|
|
copy of the full license at: |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
L |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
Any use, modification, and distribution of the Standard or Modified |
395
|
|
|
|
|
|
|
Versions is governed by this Artistic License. By using, modifying or |
396
|
|
|
|
|
|
|
distributing the Package, you accept this license. Do not use, modify, |
397
|
|
|
|
|
|
|
or distribute the Package, if you do not accept this license. |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
If your Modified Version has been derived from a Modified Version made |
400
|
|
|
|
|
|
|
by someone other than you, you are nevertheless required to ensure that |
401
|
|
|
|
|
|
|
your Modified Version complies with the requirements of this license. |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
This license does not grant you the right to use any trademark, service |
404
|
|
|
|
|
|
|
mark, tradename, or logo of the Copyright Holder. |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
This license includes the non-exclusive, worldwide, free-of-charge |
407
|
|
|
|
|
|
|
patent license to make, have made, use, offer to sell, sell, import and |
408
|
|
|
|
|
|
|
otherwise transfer the Package with respect to any patent claims |
409
|
|
|
|
|
|
|
licensable by the Copyright Holder that are necessarily infringed by the |
410
|
|
|
|
|
|
|
Package. If you institute patent litigation (including a cross-claim or |
411
|
|
|
|
|
|
|
counterclaim) against any party alleging that the Package constitutes |
412
|
|
|
|
|
|
|
direct or contributory patent infringement, then this Artistic License |
413
|
|
|
|
|
|
|
to you shall terminate on the date that such litigation is filed. |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER |
416
|
|
|
|
|
|
|
AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. |
417
|
|
|
|
|
|
|
THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR |
418
|
|
|
|
|
|
|
PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY |
419
|
|
|
|
|
|
|
YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR |
420
|
|
|
|
|
|
|
CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR |
421
|
|
|
|
|
|
|
CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, |
422
|
|
|
|
|
|
|
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
=cut |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
1; # End of Data::Validate::Perl |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
__DATA__ |