line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# $Id: Driver.pm,v 1.10 2013/07/27 00:34:39 Paulo Exp $
|
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package Parse::FSM::Driver;
|
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 NAME
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
Parse::FSM::Driver - Run-time engine for Parse::FSM parser
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=cut
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
14
|
|
|
|
|
|
|
|
15
|
6
|
|
|
6
|
|
504
|
use warnings;
|
|
6
|
|
|
|
|
7
|
|
|
6
|
|
|
|
|
161
|
|
16
|
6
|
|
|
6
|
|
19
|
use strict;
|
|
6
|
|
|
|
|
6
|
|
|
6
|
|
|
|
|
88
|
|
17
|
|
|
|
|
|
|
|
18
|
6
|
|
|
6
|
|
17
|
use Carp; our @CARP_NOT = ('Parse::FSM::Driver');
|
|
6
|
|
|
|
|
6
|
|
|
6
|
|
|
|
|
385
|
|
19
|
6
|
|
|
6
|
|
412
|
use Data::Dump 'dump';
|
|
6
|
|
|
|
|
3504
|
|
|
6
|
|
|
|
|
424
|
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
our $VERSION = '1.13';
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head1 SYNOPSIS
|
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
use MyParser; # isa Parse::FSM::Driver
|
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
$parser = MyParser->new;
|
30
|
|
|
|
|
|
|
$parser->input( \&lexer );
|
31
|
|
|
|
|
|
|
$parser->user( $user_pointer );
|
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
$result = $parser->parse( $start_rule );
|
34
|
|
|
|
|
|
|
$result = $parser->parse_start_rule;
|
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
$token = $parser->peek_token;
|
37
|
|
|
|
|
|
|
$token = $parser->get_token;
|
38
|
|
|
|
|
|
|
$parser->unget_token(@tokens);
|
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
This module implements a deterministic top-down parser based on a
|
43
|
|
|
|
|
|
|
pre-computed Finite State Machine (FSM).
|
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
The FSM is generated by L, by
|
46
|
|
|
|
|
|
|
reading a BNF-type grammar file and generating
|
47
|
|
|
|
|
|
|
a run-time module that includes the state tables. The module also include
|
48
|
|
|
|
|
|
|
the run-time parsing routine that follows the state tables to obtain
|
49
|
|
|
|
|
|
|
a parse of the input.
|
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
This module is not intended to be used stand alone. It is used as a base class
|
52
|
|
|
|
|
|
|
by the modules generated by L.
|
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=head1 METHODS - SETUP
|
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=head2 new
|
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
Creates a new object.
|
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=head2 user
|
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
Get/set of the parser user pointer. The user pointer is not used by the parser,
|
63
|
|
|
|
|
|
|
and is available for communication between the parser actions and the
|
64
|
|
|
|
|
|
|
calling module.
|
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
It can for example point to a data structure that describes the objects
|
67
|
|
|
|
|
|
|
already identified in the parse.
|
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=cut
|
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
72
|
|
|
|
|
|
|
# Parsing state machine
|
73
|
|
|
|
|
|
|
# Each state hash has:
|
74
|
|
|
|
|
|
|
# terminal => (state ID), for a match
|
75
|
|
|
|
|
|
|
# terminal => [ (subrule ID), (next state ID) ], for a sub-rule
|
76
|
|
|
|
|
|
|
# followed by a match
|
77
|
|
|
|
|
|
|
# terminal => [ (subrule ID), sub{} ], for a sub-rule followed by an accept
|
78
|
|
|
|
|
|
|
# terminal => sub{}, for an accept
|
79
|
|
|
|
|
|
|
# Each sub{} has $self and @args pre-declared
|
80
|
|
|
|
|
|
|
# @args is [] of all parsed elements
|
81
|
|
|
|
|
|
|
# $self is the Parse::FSM::Driver object
|
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
84
|
|
|
|
|
|
|
use Class::XSAccessor {
|
85
|
6
|
|
|
|
|
39
|
constructor => '_init',
|
86
|
|
|
|
|
|
|
accessors => [
|
87
|
|
|
|
|
|
|
'input', # input iterator
|
88
|
|
|
|
|
|
|
'_head', # unget queue of tokens retrived from input
|
89
|
|
|
|
|
|
|
'user', # user pointer
|
90
|
|
|
|
|
|
|
'_state_table', # list of states
|
91
|
|
|
|
|
|
|
'_start_state', # ID of start state
|
92
|
|
|
|
|
|
|
],
|
93
|
6
|
|
|
6
|
|
396
|
};
|
|
6
|
|
|
|
|
1646
|
|
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
96
|
|
|
|
|
|
|
sub new {
|
97
|
100
|
|
|
100
|
1
|
1362
|
my($class, @args) = @_;
|
98
|
|
|
|
|
|
|
return $class->_init(
|
99
|
|
|
|
3
|
|
|
input => sub {},
|
100
|
100
|
|
|
|
|
825
|
_head => [],
|
101
|
|
|
|
|
|
|
user => {},
|
102
|
|
|
|
|
|
|
_state_table => [],
|
103
|
|
|
|
|
|
|
_start_state => 0,
|
104
|
|
|
|
|
|
|
@args);
|
105
|
|
|
|
|
|
|
}
|
106
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=head1 METHODS - INPUT STREAM
|
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head2 input
|
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Get/set the parser input lexer iterator. The iterator is a code reference of
|
113
|
|
|
|
|
|
|
a function that returns the next token to be parsed as an array ref,
|
114
|
|
|
|
|
|
|
with token type and token value C<[$type, $value]>.
|
115
|
|
|
|
|
|
|
It returns C on end of input. E.g. for a simple expression lexer:
|
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
sub make_lexer {
|
118
|
|
|
|
|
|
|
my($line) = @_;
|
119
|
|
|
|
|
|
|
return sub {
|
120
|
|
|
|
|
|
|
for ($line) {
|
121
|
|
|
|
|
|
|
/\G\s+/gc;
|
122
|
|
|
|
|
|
|
return [NUM => $1] if /\G(\d+)/gc;
|
123
|
|
|
|
|
|
|
return [NAME => $1] if /\G([a-z]\w*)/gci;
|
124
|
|
|
|
|
|
|
return [$1 => $1] if /\G(.)/gc;
|
125
|
|
|
|
|
|
|
return;
|
126
|
|
|
|
|
|
|
}
|
127
|
|
|
|
|
|
|
};
|
128
|
|
|
|
|
|
|
}
|
129
|
|
|
|
|
|
|
$parser->input(make_lexer("2+3*4"));
|
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=head2 peek_token
|
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
Returns the next token to be retrieved by the lexer, but keeps it in the input
|
134
|
|
|
|
|
|
|
queue. Can be used by a rule action to decide based on the input that follows.
|
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=cut
|
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
139
|
|
|
|
|
|
|
sub peek_token {
|
140
|
2407
|
|
|
2407
|
1
|
1595
|
my($self) = @_;
|
141
|
2407
|
100
|
|
|
|
1392
|
@{$self->_head} or push @{$self->_head}, $self->input->();
|
|
1340
|
|
|
|
|
2183
|
|
|
2407
|
|
|
|
|
3661
|
|
142
|
2403
|
|
|
|
|
6741
|
return $self->_head->[0]; # may be undef, if end of input
|
143
|
|
|
|
|
|
|
}
|
144
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=head2 get_token
|
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
Extracts the next token from the lexer stream. Can be used by a rule action to
|
149
|
|
|
|
|
|
|
discard the following tokens.
|
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=cut
|
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
154
|
|
|
|
|
|
|
sub get_token {
|
155
|
886
|
|
|
886
|
1
|
16815
|
my($self) = @_;
|
156
|
886
|
100
|
|
|
|
564
|
@{$self->_head} and return shift @{$self->_head};
|
|
698
|
|
|
|
|
853
|
|
|
886
|
|
|
|
|
1569
|
|
157
|
188
|
|
|
|
|
448
|
return $self->_head->[0]; # may be undef, if end of input
|
158
|
|
|
|
|
|
|
}
|
159
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=head2 unget_token
|
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
Pushes back the given list of tokens to the lexer input stream, to be retrieved
|
164
|
|
|
|
|
|
|
on the next calls to C.
|
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=cut
|
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
169
|
|
|
|
|
|
|
sub unget_token {
|
170
|
4
|
|
|
4
|
1
|
7
|
my($self, @tokens) = @_;
|
171
|
4
|
|
|
|
|
4
|
unshift @{$self->_head}, @tokens;
|
|
4
|
|
|
|
|
9
|
|
172
|
4
|
|
|
|
|
33
|
return;
|
173
|
|
|
|
|
|
|
}
|
174
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=head1 METHODS - PARSING
|
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=head2 parse
|
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
This function receives an optional start rule name, and uses the default rule
|
181
|
|
|
|
|
|
|
of the grammar if not supplied.
|
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
It parses the input stream, leaving the stream at the first unparsed
|
184
|
|
|
|
|
|
|
token, and returns the parse value - the result of the action function for the
|
185
|
|
|
|
|
|
|
start rule.
|
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
The function dies with an error message indicating the input that cannot
|
188
|
|
|
|
|
|
|
be parsed in case of a parse error.
|
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=head2 parse_XXX
|
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
For each rule C in the grammar, L creates a
|
193
|
|
|
|
|
|
|
corresponding C to start the parse at that rule. This is a
|
194
|
|
|
|
|
|
|
short-cut to C.
|
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=cut
|
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
199
|
|
|
|
|
|
|
sub parse {
|
200
|
175
|
|
|
175
|
1
|
7382
|
my($self, $start_rule) = @_;
|
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# current state
|
203
|
175
|
|
|
|
|
426
|
my $state;
|
204
|
175
|
100
|
|
|
|
257
|
if (defined($start_rule)) {
|
205
|
8
|
100
|
|
|
|
145
|
$state = $self->_state_table->[0]{$start_rule}
|
206
|
|
|
|
|
|
|
or croak "Rule $start_rule not found";
|
207
|
|
|
|
|
|
|
}
|
208
|
|
|
|
|
|
|
else {
|
209
|
167
|
50
|
|
|
|
382
|
$state = $self->_start_state
|
210
|
|
|
|
|
|
|
or croak "Start state not found";
|
211
|
|
|
|
|
|
|
}
|
212
|
174
|
|
|
|
|
276
|
return $self->_parse($state);
|
213
|
|
|
|
|
|
|
}
|
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
216
|
|
|
|
|
|
|
sub _parse {
|
217
|
194
|
|
|
194
|
|
153
|
my($self, $state) = @_;
|
218
|
|
|
|
|
|
|
|
219
|
194
|
|
|
|
|
177
|
my @values = ();
|
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
# return stack of states
|
222
|
194
|
|
|
|
|
139
|
my @stack = (); # store: [$state, @values]
|
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
# fetch token only after drop and after calling parser rules
|
225
|
194
|
|
|
|
|
252
|
my $token = $self->peek_token;
|
226
|
194
|
|
|
|
|
158
|
while (1) {
|
227
|
2572
|
|
|
|
|
1464
|
my($entry, $found_else);
|
228
|
2572
|
100
|
|
|
|
5245
|
if ($entry = $self->_state_table->[$state]{($token ? $token->[0] : "")}) {
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
229
|
|
|
|
|
|
|
# entry exists, found token
|
230
|
|
|
|
|
|
|
}
|
231
|
|
|
|
|
|
|
elsif ($entry = $self->_state_table->[$state]{__else__}) {
|
232
|
521
|
|
|
|
|
387
|
$found_else++;
|
233
|
|
|
|
|
|
|
}
|
234
|
|
|
|
|
|
|
else {
|
235
|
82
|
|
|
|
|
155
|
$self->_error_at($token, $state);
|
236
|
|
|
|
|
|
|
}
|
237
|
|
|
|
|
|
|
|
238
|
2490
|
100
|
|
|
|
2659
|
if (ref($entry) eq 'ARRAY') { # call sub-rule
|
239
|
1411
|
|
|
|
|
1024
|
my($next_state, $return_state) = @$entry;
|
240
|
1411
|
|
|
|
|
1394
|
push(@stack, [ $return_state, @values ]); # return data
|
241
|
1411
|
|
|
|
|
1351
|
($state, @values) = ($next_state); # call
|
242
|
|
|
|
|
|
|
}
|
243
|
|
|
|
|
|
|
else { # accept token
|
244
|
1079
|
|
|
|
|
673
|
$state = $entry;
|
245
|
|
|
|
|
|
|
|
246
|
1079
|
100
|
|
|
|
1231
|
if (!$found_else) {
|
247
|
725
|
100
|
|
|
|
913
|
push(@values, $token) if $token; # add token to values
|
248
|
725
|
|
|
|
|
848
|
$self->get_token; # drop value
|
249
|
725
|
|
|
|
|
742
|
$token = $self->peek_token; # and get next token
|
250
|
|
|
|
|
|
|
}
|
251
|
|
|
|
|
|
|
|
252
|
1075
|
|
|
|
|
1600
|
while (ref($state) eq 'CODE') { # return from sub-rules
|
253
|
1460
|
|
|
|
|
5915
|
my $value = $self->$state(@values);
|
254
|
1460
|
|
|
|
|
1533
|
$token = $self->peek_token; # input may have changed
|
255
|
|
|
|
|
|
|
|
256
|
1460
|
100
|
|
|
|
1774
|
if ( ! @stack ) { # END OF PARSE
|
257
|
108
|
|
|
|
|
426
|
return $value;
|
258
|
|
|
|
|
|
|
}
|
259
|
|
|
|
|
|
|
|
260
|
1352
|
|
|
|
|
909
|
my $top = pop(@stack);
|
261
|
1352
|
|
|
|
|
1661
|
($state, @values) = @$top;
|
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# keep only defined values
|
264
|
1352
|
100
|
|
|
|
3268
|
push(@values, $value) if defined($value);
|
265
|
|
|
|
|
|
|
}
|
266
|
|
|
|
|
|
|
}
|
267
|
|
|
|
|
|
|
}
|
268
|
0
|
|
|
|
|
0
|
die 'not reached';
|
269
|
|
|
|
|
|
|
}
|
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
272
|
|
|
|
|
|
|
# expected error at given stream position, die with error message
|
273
|
|
|
|
|
|
|
sub _error_at {
|
274
|
84
|
|
|
84
|
|
696
|
my($self, $token, $state) = @_;
|
275
|
|
|
|
|
|
|
|
276
|
95
|
|
|
|
|
455
|
my @expected = sort map {_format_token($_)}
|
277
|
84
|
|
|
|
|
72
|
keys %{$self->_state_table->[$state]};
|
|
84
|
|
|
|
|
196
|
|
278
|
84
|
100
|
|
|
|
1149
|
die("Expected ",
|
|
|
100
|
|
|
|
|
|
279
|
|
|
|
|
|
|
scalar(@expected) == 1 ? "@expected" : "one of (@expected)",
|
280
|
|
|
|
|
|
|
" at ",
|
281
|
|
|
|
|
|
|
defined($token) ? _format_token($token->[0]) : "EOF",
|
282
|
|
|
|
|
|
|
"\n");
|
283
|
|
|
|
|
|
|
}
|
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
286
|
|
|
|
|
|
|
# format a token
|
287
|
|
|
|
|
|
|
sub _format_token {
|
288
|
131
|
|
|
131
|
|
102
|
my($token) = @_;
|
289
|
131
|
50
|
|
|
|
197
|
return "" if !defined($token);
|
290
|
131
|
100
|
|
|
|
217
|
return "EOF" if $token eq "";
|
291
|
113
|
100
|
|
|
|
282
|
return dump($token) if $token =~ /\W/;
|
292
|
89
|
|
|
|
|
309
|
return $token;
|
293
|
|
|
|
|
|
|
}
|
294
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
=head1 AUTHOR, BUGS, FEEDBACK, LICENSE, COPYRIGHT
|
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
See L
|
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
=cut
|
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
1;
|