line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# $Id: FSM.pm,v 1.10 2013/07/27 00:34:39 Paulo Exp $
|
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package Parse::FSM;
|
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 NAME
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
Parse::FSM - Deterministic top-down parser based on a Finite State Machine
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=cut
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
14
|
|
|
|
|
|
|
|
15
|
7
|
|
|
7
|
|
16076
|
use strict;
|
|
7
|
|
|
|
|
7
|
|
|
7
|
|
|
|
|
164
|
|
16
|
7
|
|
|
7
|
|
21
|
use warnings;
|
|
7
|
|
|
|
|
7
|
|
|
7
|
|
|
|
|
141
|
|
17
|
|
|
|
|
|
|
|
18
|
7
|
|
|
7
|
|
21
|
use Carp; our @CARP_NOT = ('Parse::FSM');
|
|
7
|
|
|
|
|
9
|
|
|
7
|
|
|
|
|
530
|
|
19
|
7
|
|
|
7
|
|
2014
|
use Data::Dump 'dump';
|
|
7
|
|
|
|
|
19527
|
|
|
7
|
|
|
|
|
324
|
|
20
|
7
|
|
|
7
|
|
3297
|
use Text::Template 'fill_in_string';
|
|
7
|
|
|
|
|
14141
|
|
|
7
|
|
|
|
|
334
|
|
21
|
7
|
|
|
7
|
|
2878
|
use File::Slurp;
|
|
7
|
|
|
|
|
52457
|
|
|
7
|
|
|
|
|
612
|
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
our $VERSION = '1.13';
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
=head1 SYNOPSIS
|
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
use Parse::FSM;
|
30
|
|
|
|
|
|
|
$fsm = Parse::FSM->new;
|
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
$fsm->prolog($text);
|
33
|
|
|
|
|
|
|
$fsm->epilog($text);
|
34
|
|
|
|
|
|
|
$fsm->add_rule($name, @elems, $action);
|
35
|
|
|
|
|
|
|
$fsm->start_rule($name);
|
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
$fsm->parse_grammar($text);
|
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
$fsm->write_module($module);
|
40
|
|
|
|
|
|
|
$fsm->write_module($module, $file);
|
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
$parser = $fsm->parser; # isa Parse::FSM::Driver
|
43
|
|
|
|
|
|
|
$parser->input(\&lexer);
|
44
|
|
|
|
|
|
|
$result = $parser->parse;
|
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# script
|
47
|
|
|
|
|
|
|
perl -MParse::FSM - Grammar.yp Parser::Module
|
48
|
|
|
|
|
|
|
perl -MParse::FSM - Grammar.yp Parser::Module lib\Parser\Module.pm
|
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
This module compiles the Finite State Machine used by the
|
53
|
|
|
|
|
|
|
L parser module.
|
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
It can be used by a sequence of C calls, or by parsing a yacc-like
|
56
|
|
|
|
|
|
|
grammar in one go with C.
|
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
It can be used as a script to generate a module from a grammar file.
|
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
The result of compiling the parser can be used immediately by retrieving the
|
61
|
|
|
|
|
|
|
C object, or a pre-compiled module can be written to disk by
|
62
|
|
|
|
|
|
|
C. This module can then be used by the client code of the parser.
|
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
As usual in top-down parsers, left recursion is not supported
|
65
|
|
|
|
|
|
|
and generates an infinite loop. This parser is deterministic and does not implement backtracking.
|
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head1 METHODS - SETUP
|
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=head2 new
|
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
Creates a new object.
|
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=cut
|
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
76
|
|
|
|
|
|
|
use Class::XSAccessor {
|
77
|
6
|
|
|
|
|
41
|
constructor => '_init',
|
78
|
|
|
|
|
|
|
accessors => [
|
79
|
|
|
|
|
|
|
'_tree', # parse tree
|
80
|
|
|
|
|
|
|
# Contains nested HASH tables with the decision tree
|
81
|
|
|
|
|
|
|
# used during parsing.
|
82
|
|
|
|
|
|
|
# Each node maps:
|
83
|
|
|
|
|
|
|
# token => next node / string with action code
|
84
|
|
|
|
|
|
|
# [subrule] => next node / string with action code
|
85
|
|
|
|
|
|
|
# [subrule]? => next node / string with action code
|
86
|
|
|
|
|
|
|
# [subrule]* => next node / string with action code
|
87
|
|
|
|
|
|
|
# __else__ => next node / string with action code
|
88
|
|
|
|
|
|
|
# The first level are the rule names.
|
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
'_state_table', # ARRAY that maps each state ID to the corresponding
|
91
|
|
|
|
|
|
|
# HASH table from tree.
|
92
|
|
|
|
|
|
|
# Copied to the generated parser module.
|
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
'_action', # map func text => [ sub name, sub text ]
|
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
'start_rule', # name start rule
|
97
|
|
|
|
|
|
|
'prolog', # code to include near the beginning of the file
|
98
|
|
|
|
|
|
|
'epilog', # code to include at the end of the file
|
99
|
|
|
|
|
|
|
'_names', # keep all generated names up to now, to be able to
|
100
|
|
|
|
|
|
|
# create unique ones
|
101
|
|
|
|
|
|
|
],
|
102
|
6
|
|
|
6
|
|
2347
|
};
|
|
6
|
|
|
|
|
10009
|
|
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
105
|
|
|
|
|
|
|
sub new {
|
106
|
70
|
|
|
70
|
1
|
20161
|
my($class) = @_;
|
107
|
70
|
|
|
|
|
333
|
return $class->_init(_tree => {}, _state_table => [], _action => {},
|
108
|
|
|
|
|
|
|
_names => {});
|
109
|
|
|
|
|
|
|
}
|
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
112
|
|
|
|
|
|
|
# create a new unique name (for actions, sub-rules)
|
113
|
|
|
|
|
|
|
sub _unique_name {
|
114
|
153
|
|
|
153
|
|
135
|
my($self, $name) = @_;
|
115
|
153
|
|
|
|
|
118
|
my $id = 1;
|
116
|
153
|
|
|
|
|
335
|
while (exists $self->_names->{$name.$id}) {
|
117
|
11
|
|
|
|
|
20
|
$id++;
|
118
|
|
|
|
|
|
|
}
|
119
|
153
|
|
|
|
|
262
|
$self->_names->{$name.$id}++;
|
120
|
153
|
|
|
|
|
219
|
return $name.$id;
|
121
|
|
|
|
|
|
|
}
|
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=head1 METHODS - BUILD GRAMMAR
|
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=head2 start_rule
|
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
Name of the grammar start rule. It defaults to the first rule added by C.
|
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=head2 prolog, epilog
|
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
Perl code to include in the generated module near the start of the generated
|
134
|
|
|
|
|
|
|
module and near the end of it.
|
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=head2 add_rule
|
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
Adds one rule to the parser.
|
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
$fsm->add_rule($name, @elems, $action);
|
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
C<$name> is the name of the rule, i.e. the syntactic object recognized
|
143
|
|
|
|
|
|
|
by the rule.
|
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
C<@elems> is the list of elements in sequence needed to recognize this rule.
|
146
|
|
|
|
|
|
|
Each element can be one of:
|
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=over 4
|
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=item *
|
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
A string that will match with that token type from the lexer.
|
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
The empty string is used to match the end of input and should
|
155
|
|
|
|
|
|
|
be present in the grammar to force the parser
|
156
|
|
|
|
|
|
|
to accept all the input;
|
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=item *
|
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
An array reference of a list of all possible tokens to accept at this position.
|
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=item *
|
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
A subrule name inside square brackets, optionally followed by a
|
165
|
|
|
|
|
|
|
repetition character that asks the parser to recursively descend
|
166
|
|
|
|
|
|
|
to match that subrule at the current input location.
|
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
The accepted forms are:
|
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
C<[term]> - recurse to the term rule;
|
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
C<[term]?> - term is optional;
|
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
C<[term]*> - accept zero or more terms;
|
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
C<[term]+> - accept one or more terms;
|
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
C<[term]E+,E> - accept one or more terms separated by commas,
|
179
|
|
|
|
|
|
|
any token type can be used instead of the comma;
|
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=back
|
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
C<$action> is the Perl text of the action executed when the rule is recognized,
|
184
|
|
|
|
|
|
|
i.e. all elements were found in sequence.
|
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
It has to be enclosed in brackets C<{}>, and can use the following lexical
|
187
|
|
|
|
|
|
|
variables that are declared by the generated code:
|
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=over 4
|
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=item *
|
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
C<$self> : object pointer;
|
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=item *
|
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
C<@item> : values of all the tokens or rules identified in this rule. The subrule
|
198
|
|
|
|
|
|
|
call with repetitions return an array reference containing all the found items
|
199
|
|
|
|
|
|
|
in the subrule;
|
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
=back
|
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=cut
|
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
206
|
|
|
|
|
|
|
# add_rule
|
207
|
|
|
|
|
|
|
# Args:
|
208
|
|
|
|
|
|
|
# rule name
|
209
|
|
|
|
|
|
|
# list of : '[rule]' '[rule]*' '[rule]?' '[rule]+' '[rule]<+SEP>' # subrules
|
210
|
|
|
|
|
|
|
# token # tokens
|
211
|
|
|
|
|
|
|
# action : '{ CODE }'
|
212
|
|
|
|
|
|
|
sub add_rule {
|
213
|
151
|
|
|
151
|
1
|
8695
|
my($self, $rule_name, @elems) = @_;
|
214
|
151
|
|
|
|
|
145
|
my $action = pop(@elems);
|
215
|
|
|
|
|
|
|
|
216
|
151
|
100
|
|
|
|
412
|
@elems or croak "missing arguments";
|
217
|
150
|
50
|
|
|
|
439
|
$rule_name =~ /^\w+$/ or croak "invalid rule name ".dump($rule_name);
|
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
# check for array-ref @elem and recurse for all alternatives
|
220
|
150
|
|
|
|
|
274
|
for my $i (0 .. $#elems) {
|
221
|
259
|
100
|
|
|
|
419
|
if (ref($elems[$i])) { # isa 'ARRAY', others cause run-time error
|
222
|
2
|
|
|
|
|
2
|
for (@{$elems[$i]}) {
|
|
2
|
|
|
|
|
4
|
|
223
|
4
|
|
|
|
|
16
|
$self->add_rule($rule_name,
|
224
|
|
|
|
|
|
|
@elems[0 .. $i-1], $_, @elems[$i+1 .. $#elems],
|
225
|
|
|
|
|
|
|
$action);
|
226
|
|
|
|
|
|
|
}
|
227
|
2
|
|
|
|
|
5
|
return;
|
228
|
|
|
|
|
|
|
}
|
229
|
|
|
|
|
|
|
}
|
230
|
|
|
|
|
|
|
|
231
|
148
|
|
|
|
|
201
|
$self->_check_start_rule($rule_name);
|
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
# load the tree
|
234
|
148
|
|
|
|
|
145
|
my $tree = $self->_tree;
|
235
|
148
|
|
|
|
|
190
|
$tree = $self->_add_tree_node($tree, $rule_name); # load rule name
|
236
|
|
|
|
|
|
|
|
237
|
148
|
|
|
|
|
202
|
my $comment = "$rule_name :";
|
238
|
|
|
|
|
|
|
|
239
|
148
|
|
|
|
|
218
|
while (@elems) {
|
240
|
252
|
|
|
|
|
197
|
my $elem = shift @elems;
|
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
# handle subrule calls with quantifiers
|
243
|
|
|
|
|
|
|
# check if recursing for _add_list_rule
|
244
|
252
|
100
|
100
|
|
|
845
|
if ($rule_name !~ /^_lst_/ &&
|
245
|
|
|
|
|
|
|
$elem =~ /^ \[ .* \] /x) {
|
246
|
68
|
|
|
|
|
95
|
$elem = $self->_add_list_rule($elem);
|
247
|
|
|
|
|
|
|
}
|
248
|
|
|
|
|
|
|
|
249
|
252
|
|
|
|
|
318
|
$tree->{__comment__} = $comment; # way up to this state
|
250
|
|
|
|
|
|
|
|
251
|
252
|
100
|
|
|
|
564
|
$comment .= " ".($elem =~ /^\[/ ? $elem : dump($elem));
|
252
|
|
|
|
|
|
|
|
253
|
252
|
100
|
|
|
|
8303
|
if (@elems) { # not a leaf node
|
254
|
|
|
|
|
|
|
croak "leaf and node at ($comment)"
|
255
|
105
|
100
|
100
|
|
|
344
|
if (exists($tree->{$elem}) && ref($tree->{$elem}) ne 'HASH');
|
256
|
104
|
|
|
|
|
141
|
$tree = $self->_add_tree_node($tree, $elem); # load token
|
257
|
|
|
|
|
|
|
}
|
258
|
|
|
|
|
|
|
else { # leaf node
|
259
|
|
|
|
|
|
|
croak "leaf not unique at ($comment)"
|
260
|
147
|
100
|
|
|
|
406
|
if (exists($tree->{$elem}));
|
261
|
146
|
|
|
|
|
187
|
$self->_add_tree_node($tree, $elem); # create node
|
262
|
146
|
|
|
|
|
179
|
$tree->{$elem} = $self->_add_action($action, $rule_name, $comment);
|
263
|
|
|
|
|
|
|
}
|
264
|
|
|
|
|
|
|
}
|
265
|
|
|
|
|
|
|
|
266
|
142
|
|
|
|
|
267
|
return;
|
267
|
|
|
|
|
|
|
}
|
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
270
|
|
|
|
|
|
|
# add a list subrule, get passed a string '[subrule]*'
|
271
|
|
|
|
|
|
|
sub _add_list_rule {
|
272
|
68
|
|
|
68
|
|
84
|
my($self, $elem) = @_;
|
273
|
|
|
|
|
|
|
|
274
|
68
|
50
|
|
|
|
217
|
$elem =~ /^ \[ (\w+) \] ( [?*+] | <\+.*> )? $/x
|
275
|
|
|
|
|
|
|
or croak "invalid subrule call $elem";
|
276
|
68
|
|
|
|
|
119
|
my($subrule, $quant) = ($1, $2);
|
277
|
|
|
|
|
|
|
|
278
|
68
|
100
|
|
|
|
144
|
return "[$subrule]" unless $quant; # subrule without quatifier
|
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
# create a list subrule, so that the result of the repetition is returned
|
281
|
|
|
|
|
|
|
# as an array reference
|
282
|
22
|
|
|
|
|
40
|
my $list_subrule = $self->_unique_name("_lst_".$subrule);
|
283
|
|
|
|
|
|
|
|
284
|
22
|
100
|
100
|
|
|
125
|
if ($quant eq '*' || $quant eq '?') {
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
285
|
12
|
|
|
|
|
25
|
$self->add_rule($list_subrule, "[$subrule]$quant",
|
286
|
|
|
|
|
|
|
'{ return \@item }');
|
287
|
|
|
|
|
|
|
}
|
288
|
|
|
|
|
|
|
elsif ($quant eq '+') { # A+ -> A A*
|
289
|
5
|
|
|
|
|
18
|
$self->add_rule($list_subrule, "[$subrule]", "[$subrule]*",
|
290
|
|
|
|
|
|
|
'{ return \@item }');
|
291
|
|
|
|
|
|
|
}
|
292
|
|
|
|
|
|
|
elsif ($quant =~ /^< \+ (.*) >$/x) { # A<+;> -> A Ac* ; Ac : ';' A
|
293
|
5
|
|
|
|
|
7
|
my $separator = $1;
|
294
|
5
|
|
|
|
|
11
|
my $list_subrule_cont = $self->_unique_name("_lst_".$subrule);
|
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
# Ac : ';' A
|
297
|
5
|
|
|
|
|
14
|
$self->add_rule($list_subrule_cont, $separator, "[$subrule]",
|
298
|
|
|
|
|
|
|
'{ return $item[1] }');
|
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
# A Ac*
|
301
|
5
|
|
|
|
|
14
|
$self->add_rule($list_subrule, "[$subrule]", "[$list_subrule_cont]*",
|
302
|
|
|
|
|
|
|
'{ return \@item }');
|
303
|
|
|
|
|
|
|
}
|
304
|
|
|
|
|
|
|
else {
|
305
|
0
|
|
|
|
|
0
|
die; # not reached
|
306
|
|
|
|
|
|
|
}
|
307
|
|
|
|
|
|
|
|
308
|
22
|
|
|
|
|
37
|
return "[$list_subrule]";
|
309
|
|
|
|
|
|
|
}
|
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
312
|
|
|
|
|
|
|
# add a tree node and create a new state
|
313
|
|
|
|
|
|
|
sub _add_tree_node {
|
314
|
398
|
|
|
398
|
|
367
|
my($self, $tree, $elem) = @_;
|
315
|
|
|
|
|
|
|
|
316
|
398
|
|
100
|
|
|
1071
|
$tree->{$elem} ||= {};
|
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
# new state?
|
319
|
398
|
100
|
|
|
|
538
|
if (! exists $tree->{__state__}) {
|
320
|
278
|
|
|
|
|
178
|
my $id = scalar(@{$self->_state_table});
|
|
278
|
|
|
|
|
359
|
|
321
|
278
|
|
|
|
|
243
|
$tree->{__state__} = $id;
|
322
|
278
|
|
|
|
|
350
|
$self->_state_table->[$id] = $tree;
|
323
|
|
|
|
|
|
|
}
|
324
|
|
|
|
|
|
|
|
325
|
398
|
|
|
|
|
515
|
return $tree->{$elem};
|
326
|
|
|
|
|
|
|
}
|
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
329
|
|
|
|
|
|
|
# define start rule, except if starting with '_' (internal)
|
330
|
|
|
|
|
|
|
sub _check_start_rule {
|
331
|
151
|
|
|
151
|
|
1441
|
my($self, $rule_name) = @_;
|
332
|
|
|
|
|
|
|
|
333
|
151
|
100
|
100
|
|
|
526
|
if (! defined $self->start_rule && $rule_name =~ /^[a-z]/i) {
|
334
|
58
|
|
|
|
|
106
|
$self->start_rule($rule_name); # start rule is first defined rule
|
335
|
|
|
|
|
|
|
}
|
336
|
|
|
|
|
|
|
|
337
|
151
|
|
|
|
|
139
|
return;
|
338
|
|
|
|
|
|
|
}
|
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
341
|
|
|
|
|
|
|
# _add_action()
|
342
|
|
|
|
|
|
|
# Create a new action or re-use an existing one. An action has to start by
|
343
|
|
|
|
|
|
|
# '{'; a new name is created and a reference to the name is
|
344
|
|
|
|
|
|
|
# returned : "\&_action_RULE"
|
345
|
|
|
|
|
|
|
sub _add_action {
|
346
|
146
|
|
|
146
|
|
155
|
my($self, $action, $rule_name, $comment) = @_;
|
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# remove braces
|
349
|
146
|
100
|
|
|
|
1256
|
$action =~ s/ \A \s* \{ \s* (.*?) \s* \} \s* \z /$1/xs
|
350
|
|
|
|
|
|
|
or croak "action must be enclosed in {}";
|
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
# reuse an existing action, if any
|
353
|
142
|
|
|
|
|
379
|
(my $cannon_action = $action) =~ s/\s+//g;
|
354
|
142
|
100
|
|
|
|
272
|
if (!$self->_action->{$cannon_action}) {
|
355
|
126
|
|
|
|
|
226
|
my $action_name = $self->_unique_name("_act_".$rule_name);
|
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
# reduce indentation
|
358
|
126
|
|
|
|
|
164
|
for ($action) {
|
359
|
126
|
|
|
|
|
162
|
my($lead_space) = /^(\t+)/m;
|
360
|
126
|
100
|
|
|
|
322
|
$lead_space and s/^$lead_space/\t/gm;
|
361
|
|
|
|
|
|
|
}
|
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
$action =
|
364
|
126
|
100
|
|
|
|
374
|
"# $comment\n".
|
365
|
|
|
|
|
|
|
"sub $action_name {".
|
366
|
|
|
|
|
|
|
($action ne '' ? "\n\tmy(\$self, \@item) = \@_;\n\t" : "").
|
367
|
|
|
|
|
|
|
$action.
|
368
|
|
|
|
|
|
|
"\n}\n\n";
|
369
|
|
|
|
|
|
|
|
370
|
126
|
|
|
|
|
291
|
$self->_action->{$cannon_action} = [ $action_name, $action ];
|
371
|
|
|
|
|
|
|
}
|
372
|
|
|
|
|
|
|
else {
|
373
|
|
|
|
|
|
|
# append this comment
|
374
|
16
|
|
|
|
|
90
|
$self->_action->{$cannon_action}[1] =~ s/^(sub)/# $comment\n$1/m;
|
375
|
|
|
|
|
|
|
}
|
376
|
|
|
|
|
|
|
|
377
|
142
|
|
|
|
|
537
|
return "\\&".$self->_action->{$cannon_action}[0];
|
378
|
|
|
|
|
|
|
}
|
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
381
|
|
|
|
|
|
|
# compute the FSM machine
|
382
|
|
|
|
|
|
|
#
|
383
|
|
|
|
|
|
|
# expand [rule] calls into start_set(rule) => [ rule_id, next_state ]
|
384
|
|
|
|
|
|
|
# Search for all sub-rule calls, and add each of the first tokens of the subrule
|
385
|
|
|
|
|
|
|
# to the call. Repeat until no more rules added, to cope with follow sets being
|
386
|
|
|
|
|
|
|
# computed after being looked up
|
387
|
|
|
|
|
|
|
# creates FSM loops for the constructs:
|
388
|
|
|
|
|
|
|
# A -> B?
|
389
|
|
|
|
|
|
|
# A -> B*
|
390
|
|
|
|
|
|
|
sub _compute_fsm {
|
391
|
48
|
|
|
48
|
|
47
|
my($self) = @_;
|
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
# repeat until no more follow tokens added
|
394
|
|
|
|
|
|
|
# Example : A B[?*] C
|
395
|
48
|
|
|
|
|
38
|
my $changed;
|
396
|
48
|
|
|
|
|
36
|
do {
|
397
|
85
|
|
|
|
|
52
|
$changed = 0;
|
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
# check all states in turn
|
400
|
85
|
|
|
|
|
55
|
for my $state (@{$self->_state_table}) {
|
|
85
|
|
|
|
|
126
|
|
401
|
571
|
|
|
|
|
1132
|
my %state_copy = %$state;
|
402
|
571
|
|
|
|
|
905
|
while (my($token, $next_state) = each %state_copy) {
|
403
|
2207
|
100
|
|
|
|
5088
|
next unless my($subrule_name, $quant) =
|
404
|
|
|
|
|
|
|
$token =~ /^ \[ (.*) \] ( [?*] )? $/x;
|
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
my $next_state_text = ref($next_state) eq 'HASH' ?
|
407
|
|
|
|
|
|
|
$next_state->{__state__} :
|
408
|
248
|
100
|
|
|
|
340
|
$next_state;
|
409
|
|
|
|
|
|
|
|
410
|
248
|
50
|
|
|
|
420
|
my $subrule = $self->_tree->{$subrule_name}
|
411
|
|
|
|
|
|
|
or croak "rule $subrule_name not found";
|
412
|
248
|
50
|
|
|
|
291
|
ref($subrule) eq 'HASH' or die;
|
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
# call subrule on each of the subrule follow set
|
415
|
|
|
|
|
|
|
# Example : add all 'follow(B) -> call B' to current rule
|
416
|
248
|
|
|
|
|
341
|
for my $subrule_key (keys %$subrule) {
|
417
|
1062
|
100
|
|
|
|
1740
|
next if $subrule_key =~ /^(__(comment|state)__|\[.*\][?*]?)$/;
|
418
|
|
|
|
|
|
|
my $text = "[ ".$subrule->{__state__}.", ".
|
419
|
|
|
|
|
|
|
(($quant||"") eq '*' ?
|
420
|
|
|
|
|
|
|
$state->{__state__} : # loop on a '*'
|
421
|
454
|
100
|
100
|
|
|
1355
|
$next_state_text # else, next state
|
422
|
|
|
|
|
|
|
)." ]";
|
423
|
454
|
100
|
|
|
|
417
|
if ($state->{$subrule_key}) {
|
424
|
307
|
50
|
|
|
|
459
|
die if $state->{$subrule_key} ne $text;
|
425
|
|
|
|
|
|
|
}
|
426
|
|
|
|
|
|
|
else {
|
427
|
147
|
|
|
|
|
125
|
$state->{$subrule_key} = $text;
|
428
|
147
|
|
|
|
|
131
|
$changed++;
|
429
|
|
|
|
|
|
|
}
|
430
|
|
|
|
|
|
|
}
|
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
# call next rule on the next rule follow set
|
433
|
|
|
|
|
|
|
# Example : add all 'follow(C) -> end' to end current rule
|
434
|
248
|
100
|
|
|
|
593
|
if (defined($quant)) {
|
435
|
56
|
100
|
|
|
|
63
|
if ($state->{__else__}) {
|
436
|
34
|
50
|
|
|
|
97
|
die if $state->{__else__} ne $next_state_text;
|
437
|
|
|
|
|
|
|
}
|
438
|
|
|
|
|
|
|
else {
|
439
|
22
|
|
|
|
|
34
|
$state->{__else__} = $next_state_text;
|
440
|
22
|
|
|
|
|
52
|
$changed++;
|
441
|
|
|
|
|
|
|
}
|
442
|
|
|
|
|
|
|
}
|
443
|
|
|
|
|
|
|
}
|
444
|
|
|
|
|
|
|
}
|
445
|
|
|
|
|
|
|
} while ($changed);
|
446
|
|
|
|
|
|
|
|
447
|
48
|
|
|
|
|
47
|
return;
|
448
|
|
|
|
|
|
|
}
|
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=head2 parse_grammar
|
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
Parses the given grammar text and adds to the parser. Example grammar follows:
|
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
{
|
457
|
|
|
|
|
|
|
# prolog
|
458
|
|
|
|
|
|
|
use MyLibrary;
|
459
|
|
|
|
|
|
|
}
|
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
main : (number | name)+ ;
|
462
|
|
|
|
|
|
|
number : 'NUMBER' { $item[0][1] } ; # comment
|
463
|
|
|
|
|
|
|
name : 'NAME' { $item[0][1] } ; # comment
|
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
expr : ;
|
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
{
|
470
|
|
|
|
|
|
|
# epilog
|
471
|
|
|
|
|
|
|
sub util_method {...}
|
472
|
|
|
|
|
|
|
}
|
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
=over 4
|
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
=item prolog
|
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
If the text contains a code block surrounded by braces before the first rule
|
479
|
|
|
|
|
|
|
definition, the text is copied without the external braces to the prolog
|
480
|
|
|
|
|
|
|
of generated module.
|
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=item epilog
|
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
If the text contains a code block surrounded by braces after the last rule
|
485
|
|
|
|
|
|
|
definition, the text is copied without the external braces to the epilog
|
486
|
|
|
|
|
|
|
of generated module.
|
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
=item statements
|
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
Statements are either rule definitions of directives and end with a
|
491
|
|
|
|
|
|
|
semi-colon C<;>. Comments are as in Perl, from a hash C<#> sign to
|
492
|
|
|
|
|
|
|
the end of the line.
|
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
=item rule
|
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
A rule defines one sentence to match in the grammar. The first rule defined
|
497
|
|
|
|
|
|
|
is the default start rule, i.e. the rule parsed by default on the input.
|
498
|
|
|
|
|
|
|
A rule name must start with a letter and contain only letters,
|
499
|
|
|
|
|
|
|
digits and the underscore character.
|
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
The rule definition follows after a colon and is composed of a sequence
|
502
|
|
|
|
|
|
|
of tokens (quoted strings) and sub-rules, to match in sequence. The rule matches
|
503
|
|
|
|
|
|
|
when all the tokens and sub-rules in the definition match in sequence.
|
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
The top level rule should end with CeofE> to make sure all input
|
506
|
|
|
|
|
|
|
is parsed.
|
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
The rule can define several alternative definitions separated by '|'.
|
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
The rule definition finishes with a semi-colon ';'.
|
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
A rule can call an anonymous sub-rule enclosed in parentheses.
|
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
=item action
|
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
The last item in the rule definition is a text delimited by {} with the code
|
517
|
|
|
|
|
|
|
to execute when the rule is matched. The code can use $self to refer to the
|
518
|
|
|
|
|
|
|
Parser object, and @item to refer to the values of each of the tokens and
|
519
|
|
|
|
|
|
|
sub-rules matched. The return value from the code defines the value of the
|
520
|
|
|
|
|
|
|
rule, passed to the upper level rule, or returned as the parse result.
|
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
If no action is supplied, a default action returns an array reference with
|
523
|
|
|
|
|
|
|
the result of all tokens and sub-rules of the matched sentence.
|
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
=item quantifiers
|
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
Every token or sub-rule can be followed by a repetition specification:
|
528
|
|
|
|
|
|
|
'?' (zero or one), '*' (zero or more), '+' (one or more),
|
529
|
|
|
|
|
|
|
or '<+,>' (comma-separated list, comma can be replaced by any token).
|
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
=item directives
|
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
Directives are written with angle brackets.
|
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
=over 4
|
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
=item
|
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
Can be used in a rule instead of the empty string to represent the end of input.
|
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
=item
|
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
Shortcut for creating lists of operators separated by tokens,
|
544
|
|
|
|
|
|
|
returns the list of rule and token values.
|
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
=item
|
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
Defines the start rule of the grammar. By default the first
|
549
|
|
|
|
|
|
|
defined rule is the start rule; use Cstart:E> to override that.
|
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
=back
|
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
=back
|
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
=cut
|
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
558
|
|
|
|
|
|
|
sub parse_grammar {
|
559
|
38
|
|
|
38
|
1
|
9994
|
my($self, $text) = @_;
|
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
# need to postpone load of Parse::FSM::Parser, as Parse::FSM is used by
|
562
|
|
|
|
|
|
|
# the script that creates Parse::FSM::Parser
|
563
|
38
|
50
|
|
10
|
|
2039
|
eval 'use Parse::FSM::Parser'; $@ and die; ## no critic
|
|
38
|
|
|
19
|
|
99
|
|
|
13
|
|
|
20
|
|
108
|
|
|
14
|
|
|
29
|
|
37
|
|
|
7
|
|
|
22
|
|
7
|
|
|
7
|
|
|
19
|
|
117
|
|
|
15
|
|
|
20
|
|
39
|
|
|
15
|
|
|
11
|
|
20
|
|
|
8
|
|
|
22
|
|
635
|
|
|
9
|
|
|
15
|
|
39
|
|
|
9
|
|
|
9
|
|
11
|
|
|
9
|
|
|
17
|
|
236
|
|
|
11
|
|
|
18
|
|
44
|
|
|
11
|
|
|
6
|
|
11
|
|
|
11
|
|
|
5
|
|
294
|
|
|
12
|
|
|
3
|
|
42
|
|
|
12
|
|
|
8
|
|
11
|
|
|
12
|
|
|
10
|
|
644
|
|
|
11
|
|
|
2
|
|
42
|
|
|
11
|
|
|
|
|
12
|
|
|
11
|
|
|
|
|
1007
|
|
|
9
|
|
|
|
|
32
|
|
|
9
|
|
|
|
|
9
|
|
|
9
|
|
|
|
|
442
|
|
|
10
|
|
|
|
|
38
|
|
|
10
|
|
|
|
|
8
|
|
|
10
|
|
|
|
|
456
|
|
|
10
|
|
|
|
|
34
|
|
|
10
|
|
|
|
|
11
|
|
|
10
|
|
|
|
|
509
|
|
|
10
|
|
|
|
|
39
|
|
|
10
|
|
|
|
|
12
|
|
|
10
|
|
|
|
|
989
|
|
|
9
|
|
|
|
|
37
|
|
|
9
|
|
|
|
|
10
|
|
|
9
|
|
|
|
|
475
|
|
|
8
|
|
|
|
|
28
|
|
|
8
|
|
|
|
|
8
|
|
|
8
|
|
|
|
|
478
|
|
|
7
|
|
|
|
|
25
|
|
|
7
|
|
|
|
|
9
|
|
|
7
|
|
|
|
|
496
|
|
|
6
|
|
|
|
|
20
|
|
|
6
|
|
|
|
|
7
|
|
|
6
|
|
|
|
|
946
|
|
|
3
|
|
|
|
|
9
|
|
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
207
|
|
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
182
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
1
|
|
564
|
|
|
|
|
|
|
|
565
|
38
|
|
|
|
|
109
|
my $parser = Parse::FSM::Parser->new;
|
566
|
38
|
|
|
|
|
80
|
$parser->user->{fsm} = $self;
|
567
|
38
|
|
|
|
|
38
|
eval {
|
568
|
38
|
|
|
|
|
68
|
$parser->from($text); # setup lexer
|
569
|
38
|
|
|
|
|
71
|
$parser->parse;
|
570
|
|
|
|
|
|
|
};
|
571
|
38
|
100
|
|
|
|
454
|
$@ and do { $@ =~ s/\s+\z//; croak $@; };
|
|
11
|
|
|
|
|
46
|
|
|
11
|
|
|
|
|
778
|
|
572
|
|
|
|
|
|
|
|
573
|
27
|
|
|
|
|
243
|
return;
|
574
|
|
|
|
|
|
|
}
|
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
=head1 METHODS - USE PARSER
|
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
=head2 parser
|
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
Computes the Finite State Machine to execute the parser and returns a
|
583
|
|
|
|
|
|
|
L object that implements the parser.
|
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
Useful to build the parser and execute it in the same
|
586
|
|
|
|
|
|
|
program, but with the run-time penalty of the time to setup the state tables.
|
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
=cut
|
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
591
|
|
|
|
|
|
|
sub parser {
|
592
|
42
|
|
|
42
|
1
|
120
|
my($self) = @_;
|
593
|
42
|
|
100
|
|
|
66
|
our $name ||= 'Parser00000'; $name++; # new module on each call
|
|
42
|
|
|
|
|
41
|
|
594
|
|
|
|
|
|
|
|
595
|
42
|
|
|
|
|
64
|
my $text = $self->_module_text($name, "-");
|
596
|
42
|
|
|
16
|
|
31762
|
eval $text; ## no critic
|
|
9
|
|
|
15
|
|
1031
|
|
|
9
|
|
|
15
|
|
20
|
|
|
5
|
|
|
11
|
|
112
|
|
|
5
|
|
|
25
|
|
19
|
|
|
5
|
|
|
16
|
|
6
|
|
|
5
|
|
|
4
|
|
100
|
|
|
12
|
|
|
1
|
|
1114
|
|
|
12
|
|
|
3
|
|
20
|
|
|
5
|
|
|
1
|
|
466
|
|
|
4
|
|
|
8
|
|
17
|
|
|
4
|
|
|
1
|
|
4
|
|
|
4
|
|
|
0
|
|
42
|
|
|
6
|
|
|
0
|
|
26
|
|
|
6
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
189
|
|
597
|
42
|
50
|
|
|
|
99
|
$@ and die $@;
|
598
|
|
|
|
|
|
|
|
599
|
42
|
|
|
|
|
947
|
my $parser = $name->new;
|
600
|
|
|
|
|
|
|
|
601
|
42
|
|
|
|
|
146
|
return $parser;
|
602
|
|
|
|
|
|
|
}
|
603
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
=head2 write_module
|
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
Receives as input the module name and the output file name
|
608
|
|
|
|
|
|
|
and writes the parser module.
|
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
The file name is optional; if not supplied is computed from the
|
611
|
|
|
|
|
|
|
module name by replacing C<::> by C> and appending C<.pm>,
|
612
|
|
|
|
|
|
|
e.g. C.
|
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
The generated code includes C functions for every rule
|
615
|
|
|
|
|
|
|
C found in the grammar, as a short-cut for calling C.
|
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
=cut
|
618
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
620
|
|
|
|
|
|
|
sub write_module {
|
621
|
13
|
|
|
13
|
1
|
736
|
my($self, $name, $file) = @_;
|
622
|
|
|
|
|
|
|
|
623
|
13
|
100
|
|
|
|
94
|
$name or croak "name not defined";
|
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
# build file name from module name
|
626
|
22
|
100
|
|
|
|
45
|
unless (defined $file) {
|
627
|
13
|
|
|
|
|
35
|
$file = $name;
|
628
|
12
|
|
|
|
|
23
|
$file =~ s/::/\//g;
|
629
|
12
|
|
|
|
|
23
|
$file .= ".pm";
|
630
|
|
|
|
|
|
|
}
|
631
|
|
|
|
|
|
|
|
632
|
20
|
|
|
|
|
27
|
my $text = $self->_module_text($name, $file);
|
633
|
19
|
|
|
|
|
3424
|
write_file($file, {atomic => 1}, $text);
|
634
|
|
|
|
|
|
|
|
635
|
13
|
|
|
|
|
1013
|
return;
|
636
|
|
|
|
|
|
|
}
|
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
639
|
|
|
|
|
|
|
# template code for grammmar parser
|
640
|
|
|
|
|
|
|
my $TEMPLATE = <<'END_TEMPLATE';
|
641
|
|
|
|
|
|
|
# $Id: FSM.pm,v 1.10 2013/07/27 00:34:39 Paulo Exp $
|
642
|
|
|
|
|
|
|
# Parser generated by Parse::FSM
|
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
package # hide from CPAN indexer
|
645
|
|
|
|
|
|
|
<% $name %>;
|
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
use strict;
|
648
|
|
|
|
|
|
|
use warnings;
|
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
use Parse::FSM::Driver; our @ISA = ('Parse::FSM::Driver');
|
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
<% $prolog %>
|
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
<% $table %>
|
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
sub new {
|
657
|
|
|
|
|
|
|
my($class, %args) = @_;
|
658
|
|
|
|
|
|
|
return $class->SUPER::new(
|
659
|
|
|
|
|
|
|
_state_table => \@state_table,
|
660
|
|
|
|
|
|
|
_start_state => $start_state,
|
661
|
|
|
|
|
|
|
%args,
|
662
|
|
|
|
|
|
|
);
|
663
|
|
|
|
|
|
|
}
|
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
<% $epilog %>
|
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
1;
|
668
|
|
|
|
|
|
|
END_TEMPLATE
|
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
671
|
|
|
|
|
|
|
# module text
|
672
|
|
|
|
|
|
|
sub _module_text {
|
673
|
57
|
|
|
61
|
|
67
|
my($self, $name, $file) = @_;
|
674
|
|
|
|
|
|
|
|
675
|
58
|
50
|
|
|
|
84
|
$name or croak "name not defined";
|
676
|
58
|
100
|
|
|
|
99
|
$file or croak "file not defined";
|
677
|
|
|
|
|
|
|
|
678
|
64
|
|
|
|
|
81
|
my $table = $self->_table_dump;
|
679
|
|
|
|
|
|
|
|
680
|
63
|
|
50
|
|
|
395
|
my @template_args = (
|
|
|
|
50
|
|
|
|
|
681
|
|
|
|
|
|
|
DELIMITERS => [ '<%', '%>' ],
|
682
|
|
|
|
|
|
|
HASH => {
|
683
|
|
|
|
|
|
|
prolog => $self->prolog || "",
|
684
|
|
|
|
|
|
|
epilog => $self->epilog || "",
|
685
|
|
|
|
|
|
|
name => $name,
|
686
|
|
|
|
|
|
|
table => $table,
|
687
|
|
|
|
|
|
|
},
|
688
|
|
|
|
|
|
|
);
|
689
|
62
|
|
|
|
|
146
|
return fill_in_string($TEMPLATE, @template_args);
|
690
|
|
|
|
|
|
|
}
|
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
693
|
|
|
|
|
|
|
# dump the state table
|
694
|
|
|
|
|
|
|
sub _table_dump {
|
695
|
53
|
|
|
55
|
|
64
|
my($self) = @_;
|
696
|
|
|
|
|
|
|
|
697
|
57
|
|
|
|
|
66
|
$self->_compute_fsm;
|
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
#print dump($self),"\n" if $ENV{DEBUG};
|
700
|
|
|
|
|
|
|
|
701
|
57
|
|
|
|
|
56
|
my $start_state = 0;
|
702
|
55
|
100
|
66
|
|
|
202
|
if (defined($self->start_rule) && exists($self->_tree->{$self->start_rule})) {
|
703
|
54
|
|
|
|
|
99
|
$start_state = $self->_tree->{$self->start_rule}{__state__};
|
704
|
|
|
|
|
|
|
}
|
705
|
|
|
|
|
|
|
else {
|
706
|
24
|
|
|
|
|
104
|
croak "start state not found";
|
707
|
|
|
|
|
|
|
}
|
708
|
|
|
|
|
|
|
|
709
|
65
|
|
|
|
|
112
|
my $ret = 'my $start_state = '.$start_state.";\n".
|
710
|
|
|
|
|
|
|
'my @state_table = ('."\n";
|
711
|
59
|
|
|
|
|
55
|
my $width;
|
712
|
59
|
|
|
|
|
65
|
for my $i (0 .. $#{$self->_state_table}) {
|
|
51
|
|
|
|
|
116
|
|
713
|
|
|
|
|
|
|
$ret .= "\t# [$i] " .
|
714
|
260
|
|
100
|
|
|
701
|
($self->_state_table->[$i]{__comment__} || "") .
|
715
|
|
|
|
|
|
|
"\n" .
|
716
|
|
|
|
|
|
|
"\t{ ";
|
717
|
262
|
|
|
|
|
179
|
$width = 2;
|
718
|
|
|
|
|
|
|
|
719
|
262
|
|
|
|
|
181
|
for my $key (sort keys %{$self->_state_table->[$i]}) {
|
|
263
|
|
|
|
|
681
|
|
720
|
985
|
100
|
|
|
|
1888
|
next if $key =~ /^(__(comment|state)__|\[.*\][?*]?)$/;
|
721
|
|
|
|
|
|
|
|
722
|
427
|
|
|
|
|
448
|
my $value = $self->_state_table->[$i]{$key};
|
723
|
427
|
100
|
|
|
|
572
|
$value = $value->{__state__} if ref($value) eq 'HASH';
|
724
|
|
|
|
|
|
|
|
725
|
430
|
100
|
|
|
|
824
|
my $key_text = ($key =~ /^\w+$/) ? $key : dump($key);
|
726
|
|
|
|
|
|
|
|
727
|
430
|
|
|
|
|
6313
|
my $item_text = "$key_text => $value, ";
|
728
|
430
|
100
|
|
|
|
551
|
if (($width += length($item_text)) > 72) {
|
729
|
24
|
|
|
|
|
29
|
$ret .= "\n\t ";
|
730
|
29
|
|
|
|
|
29
|
$width = 2 + length($item_text);
|
731
|
|
|
|
|
|
|
}
|
732
|
435
|
|
|
|
|
482
|
$ret .= $item_text;
|
733
|
|
|
|
|
|
|
}
|
734
|
|
|
|
|
|
|
|
735
|
259
|
|
|
|
|
296
|
$ret .= "},\n\n";
|
736
|
|
|
|
|
|
|
}
|
737
|
54
|
|
|
|
|
73
|
$ret .= ");\n\n";
|
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
# dump action
|
740
|
51
|
|
|
|
|
42
|
for (sort {$a->[0] cmp $b->[0]} values %{$self->_action}) {
|
|
129
|
|
|
|
|
156
|
|
|
54
|
|
|
|
|
161
|
|
741
|
127
|
|
|
|
|
163
|
$ret .= $_->[1];
|
742
|
|
|
|
|
|
|
}
|
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
# dump parse_XXX functions
|
745
|
55
|
|
|
|
|
53
|
my $length = 1;
|
746
|
55
|
|
|
|
|
62
|
while (my($name, $rule) = each %{$self->_tree}) {
|
|
220
|
|
|
|
|
412
|
|
747
|
172
|
100
|
|
|
|
327
|
next unless $name =~ /^[a-z]/i;
|
748
|
93
|
100
|
|
|
|
154
|
$length = length($name) if length($name) > $length;
|
749
|
|
|
|
|
|
|
}
|
750
|
54
|
|
|
|
|
70
|
while (my($name, $rule) = each %{$self->_tree}) {
|
|
218
|
|
|
|
|
370
|
|
751
|
170
|
100
|
|
|
|
277
|
next unless $name =~ /^[a-z]/i;
|
752
|
91
|
|
|
|
|
250
|
$ret .=
|
753
|
|
|
|
|
|
|
"sub parse_$name".
|
754
|
|
|
|
|
|
|
(" " x ($length - length($name))).
|
755
|
|
|
|
|
|
|
" { return shift->_parse($rule->{__state__}) }\n";
|
756
|
|
|
|
|
|
|
}
|
757
|
|
|
|
|
|
|
|
758
|
50
|
|
|
|
|
103
|
return $ret;
|
759
|
|
|
|
|
|
|
}
|
760
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
=head1 PRE-COMPILING THE GRAMMAR
|
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
The setup of the parsing tables and creating the parsing module may take up
|
766
|
|
|
|
|
|
|
considerable time. Therefore it is useful to separate the parser generation
|
767
|
|
|
|
|
|
|
phase from the parsing phase.
|
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
=head2 precompile
|
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
A parser module can be created from a yacc-like grammar file by the
|
772
|
|
|
|
|
|
|
following command. The generated file (last parameter) is optional; if not
|
773
|
|
|
|
|
|
|
supplied is computed from the module name by replacing C<::> by C> and
|
774
|
|
|
|
|
|
|
appending C<.pm>, e.g. C:
|
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
perl -MParse::FSM - Grammar.yp Parser::Module
|
777
|
|
|
|
|
|
|
perl -MParse::FSM - Grammar.yp Parser::Module lib\Parser\Module.pm
|
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
This is equivalent to the following Perl program:
|
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
#!perl
|
782
|
|
|
|
|
|
|
use Parse::FSM;
|
783
|
|
|
|
|
|
|
Parse::FSM->precompile(@ARGV);
|
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
The class method C receives as arguments the grammar file, the
|
786
|
|
|
|
|
|
|
generated module name and an optional file name, and creates the parsing module.
|
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
=cut
|
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
791
|
|
|
|
|
|
|
sub precompile {
|
792
|
3
|
|
|
21
|
1
|
8
|
my($class, $grammar, $module, $file) = @_;
|
793
|
|
|
|
|
|
|
|
794
|
7
|
|
|
|
|
16
|
my $self = $class->new;
|
795
|
8
|
|
|
|
|
57
|
my $text = read_file($grammar);
|
796
|
7
|
|
|
|
|
19
|
$self->parse_grammar($text);
|
797
|
11
|
|
|
|
|
17
|
$self->write_module($module, $file);
|
798
|
|
|
|
|
|
|
|
799
|
17
|
|
|
|
|
34
|
return;
|
800
|
|
|
|
|
|
|
}
|
801
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
803
|
|
|
|
|
|
|
# startup code for pre-compiler
|
804
|
|
|
|
|
|
|
# borrowed from Parse::RecDescent
|
805
|
|
|
|
|
|
|
sub import {
|
806
|
8
|
|
|
12
|
|
15
|
local *_die = sub { warn @_, "\n"; exit 1; };
|
|
17
|
|
|
24
|
|
27
|
|
|
32
|
|
|
|
|
102
|
|
807
|
|
|
|
|
|
|
|
808
|
23
|
|
|
|
|
47
|
my($package, $file, $line) = caller;
|
809
|
18
|
50
|
33
|
|
|
69
|
if (substr($file,0,1) eq '-' && $line == 0) {
|
810
|
9
|
0
|
0
|
|
|
21
|
_die("Usage: perl -MParse::FSM - GRAMMAR MODULE::NAME [MODULE/NAME.pm]")
|
811
|
|
|
|
|
|
|
unless @ARGV == 2 || @ARGV == 3;
|
812
|
|
|
|
|
|
|
|
813
|
5
|
|
|
|
|
54
|
my($grammar, $module, $file) = @ARGV;
|
814
|
1
|
|
|
|
|
6
|
eval {
|
815
|
10
|
|
|
|
|
15
|
Parse::FSM->precompile($grammar, $module, $file);
|
816
|
|
|
|
|
|
|
};
|
817
|
10
|
0
|
|
|
|
23
|
$@ and _die($@);
|
818
|
|
|
|
|
|
|
|
819
|
10
|
|
|
|
|
16
|
exit 0;
|
820
|
|
|
|
|
|
|
}
|
821
|
|
|
|
|
|
|
|
822
|
16
|
|
|
|
|
86
|
return;
|
823
|
|
|
|
|
|
|
}
|
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
=head1 AUTHOR
|
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
Paulo Custodio, C<< >>
|
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS
|
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
Calling pre-compiler on C
|
835
|
|
|
|
|
|
|
borrowed from L.
|
836
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
=head1 BUGS and FEEDBACK
|
838
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
Please report any bugs or feature requests through the web interface at
|
840
|
|
|
|
|
|
|
L.
|
841
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
=head1 LICENSE and COPYRIGHT
|
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
Copyright (C) 2010-2011 Paulo Custodio.
|
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it
|
847
|
|
|
|
|
|
|
under the terms of either: the GNU General Public License as published
|
848
|
|
|
|
|
|
|
by the Free Software Foundation; or the Artistic License.
|
849
|
|
|
|
|
|
|
|
850
|
|
|
|
|
|
|
See http://dev.perl.org/licenses/ for more information.
|
851
|
|
|
|
|
|
|
|
852
|
|
|
|
|
|
|
=cut
|
853
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
1; # End of Parse::FSM
|