File Coverage

blib/lib/Parse/FSM/Driver.pm

Criterion	Covered	Total	%
statement	73	74	98.6
branch	34	36	94.4
condition			n/a
subroutine	14	14	100.0
pod	5	5	100.0
total	126	129	97.6

line	stmt	bran	sub	pod	time	code
1						# $Id: Driver.pm,v 1.10 2013/07/27 00:34:39 Paulo Exp $
2
3						package Parse::FSM::Driver;
4
5						#------------------------------------------------------------------------------
6
7						=head1 NAME
8
9						Parse::FSM::Driver - Run-time engine for Parse::FSM parser
10
11						=cut
12
13						#------------------------------------------------------------------------------
14
15	6		6		504	use warnings;
	6				7
	6				161
16	6		6		19	use strict;
	6				6
	6				88
17
18	6		6		17	use Carp; our @CARP_NOT = ('Parse::FSM::Driver');
	6				6
	6				385
19	6		6		412	use Data::Dump 'dump';
	6				3504
	6				424
20
21						our $VERSION = '1.13';
22
23						#------------------------------------------------------------------------------
24
25						=head1 SYNOPSIS
26
27						use MyParser; # isa Parse::FSM::Driver
28
29						$parser = MyParser->new;
30						$parser->input( \&lexer );
31						$parser->user( $user_pointer );
32
33						$result = $parser->parse( $start_rule );
34						$result = $parser->parse_start_rule;
35
36						$token = $parser->peek_token;
37						$token = $parser->get_token;
38						$parser->unget_token(@tokens);
39
40						=head1 DESCRIPTION
41
42						This module implements a deterministic top-down parser based on a
43						pre-computed Finite State Machine (FSM).
44
45						The FSM is generated by L, by
46						reading a BNF-type grammar file and generating
47						a run-time module that includes the state tables. The module also include
48						the run-time parsing routine that follows the state tables to obtain
49						a parse of the input.
50
51						This module is not intended to be used stand alone. It is used as a base class
52						by the modules generated by L.
53
54						=head1 METHODS - SETUP
55
56						=head2 new
57
58						Creates a new object.
59
60						=head2 user
61
62						Get/set of the parser user pointer. The user pointer is not used by the parser,
63						and is available for communication between the parser actions and the
64						calling module.
65
66						It can for example point to a data structure that describes the objects
67						already identified in the parse.
68
69						=cut
70
71						#------------------------------------------------------------------------------
72						# Parsing state machine
73						# Each state hash has:
74						# terminal => (state ID), for a match
75						# terminal => [ (subrule ID), (next state ID) ], for a sub-rule
76						# followed by a match
77						# terminal => [ (subrule ID), sub{} ], for a sub-rule followed by an accept
78						# terminal => sub{}, for an accept
79						# Each sub{} has $self and @args pre-declared
80						# @args is [] of all parsed elements
81						# $self is the Parse::FSM::Driver object
82
83						#------------------------------------------------------------------------------
84						use Class::XSAccessor {
85	6				39	constructor => '_init',
86						accessors => [
87						'input', # input iterator
88						'_head', # unget queue of tokens retrived from input
89						'user', # user pointer
90						'_state_table', # list of states
91						'_start_state', # ID of start state
92						],
93	6		6		396	};
	6				1646
94
95						#------------------------------------------------------------------------------
96						sub new {
97	100		100	1	1362	my($class, @args) = @_;
98						return $class->_init(
99			3			input => sub {},
100	100				825	_head => [],
101						user => {},
102						_state_table => [],
103						_start_state => 0,
104						@args);
105						}
106						#------------------------------------------------------------------------------
107
108						=head1 METHODS - INPUT STREAM
109
110						=head2 input
111
112						Get/set the parser input lexer iterator. The iterator is a code reference of
113						a function that returns the next token to be parsed as an array ref,
114						with token type and token value C<[$type, $value]>.
115						It returns C on end of input. E.g. for a simple expression lexer:
116
117						sub make_lexer {
118						my($line) = @_;
119						return sub {
120						for ($line) {
121						/\G\s+/gc;
122						return [NUM => $1] if /\G(\d+)/gc;
123						return [NAME => $1] if /\G([a-z]\w*)/gci;
124						return [$1 => $1] if /\G(.)/gc;
125						return;
126						}
127						};
128						}
129						$parser->input(make_lexer("2+3*4"));
130
131						=head2 peek_token
132
133						Returns the next token to be retrieved by the lexer, but keeps it in the input
134						queue. Can be used by a rule action to decide based on the input that follows.
135
136						=cut
137
138						#------------------------------------------------------------------------------
139						sub peek_token {
140	2407		2407	1	1595	my($self) = @_;
141	2407	100			1392	@{$self->_head} or push @{$self->_head}, $self->input->();
	1340				2183
	2407				3661
142	2403				6741	return $self->_head->[0]; # may be undef, if end of input
143						}
144						#------------------------------------------------------------------------------
145
146						=head2 get_token
147
148						Extracts the next token from the lexer stream. Can be used by a rule action to
149						discard the following tokens.
150
151						=cut
152
153						#------------------------------------------------------------------------------
154						sub get_token {
155	886		886	1	16815	my($self) = @_;
156	886	100			564	@{$self->_head} and return shift @{$self->_head};
	698				853
	886				1569
157	188				448	return $self->_head->[0]; # may be undef, if end of input
158						}
159						#------------------------------------------------------------------------------
160
161						=head2 unget_token
162
163						Pushes back the given list of tokens to the lexer input stream, to be retrieved
164						on the next calls to C.
165
166						=cut
167
168						#------------------------------------------------------------------------------
169						sub unget_token {
170	4		4	1	7	my($self, @tokens) = @_;
171	4				4	unshift @{$self->_head}, @tokens;
	4				9
172	4				33	return;
173						}
174						#------------------------------------------------------------------------------
175
176						=head1 METHODS - PARSING
177
178						=head2 parse
179
180						This function receives an optional start rule name, and uses the default rule
181						of the grammar if not supplied.
182
183						It parses the input stream, leaving the stream at the first unparsed
184						token, and returns the parse value - the result of the action function for the
185						start rule.
186
187						The function dies with an error message indicating the input that cannot
188						be parsed in case of a parse error.
189
190						=head2 parse_XXX
191
192						For each rule C in the grammar, L creates a
193						corresponding C to start the parse at that rule. This is a
194						short-cut to C.
195
196						=cut
197
198						#------------------------------------------------------------------------------
199						sub parse {
200	175		175	1	7382	my($self, $start_rule) = @_;
201
202						# current state
203	175				426	my $state;
204	175	100			257	if (defined($start_rule)) {
205	8	100			145	$state = $self->_state_table->[0]{$start_rule}
206						or croak "Rule $start_rule not found";
207						}
208						else {
209	167	50			382	$state = $self->_start_state
210						or croak "Start state not found";
211						}
212	174				276	return $self->_parse($state);
213						}
214
215						#------------------------------------------------------------------------------
216						sub _parse {
217	194		194		153	my($self, $state) = @_;
218
219	194				177	my @values = ();
220
221						# return stack of states
222	194				139	my @stack = (); # store: [$state, @values]
223
224						# fetch token only after drop and after calling parser rules
225	194				252	my $token = $self->peek_token;
226	194				158	while (1) {
227	2572				1464	my($entry, $found_else);
228	2572	100			5245	if ($entry = $self->_state_table->[$state]{($token ? $token->[0] : "")}) {
		100
		100
229						# entry exists, found token
230						}
231						elsif ($entry = $self->_state_table->[$state]{__else__}) {
232	521				387	$found_else++;
233						}
234						else {
235	82				155	$self->_error_at($token, $state);
236						}
237
238	2490	100			2659	if (ref($entry) eq 'ARRAY') { # call sub-rule
239	1411				1024	my($next_state, $return_state) = @$entry;
240	1411				1394	push(@stack, [ $return_state, @values ]); # return data
241	1411				1351	($state, @values) = ($next_state); # call
242						}
243						else { # accept token
244	1079				673	$state = $entry;
245
246	1079	100			1231	if (!$found_else) {
247	725	100			913	push(@values, $token) if $token; # add token to values
248	725				848	$self->get_token; # drop value
249	725				742	$token = $self->peek_token; # and get next token
250						}
251
252	1075				1600	while (ref($state) eq 'CODE') { # return from sub-rules
253	1460				5915	my $value = $self->$state(@values);
254	1460				1533	$token = $self->peek_token; # input may have changed
255
256	1460	100			1774	if ( ! @stack ) { # END OF PARSE
257	108				426	return $value;
258						}
259
260	1352				909	my $top = pop(@stack);
261	1352				1661	($state, @values) = @$top;
262
263						# keep only defined values
264	1352	100			3268	push(@values, $value) if defined($value);
265						}
266						}
267						}
268	0				0	die 'not reached';
269						}
270
271						#------------------------------------------------------------------------------
272						# expected error at given stream position, die with error message
273						sub _error_at {
274	84		84		696	my($self, $token, $state) = @_;
275
276	95				455	my @expected = sort map {_format_token($_)}
277	84				72	keys %{$self->_state_table->[$state]};
	84				196
278	84	100			1149	die("Expected ",
		100
279						scalar(@expected) == 1 ? "@expected" : "one of (@expected)",
280						" at ",
281						defined($token) ? _format_token($token->[0]) : "EOF",
282						"\n");
283						}
284
285						#------------------------------------------------------------------------------
286						# format a token
287						sub _format_token {
288	131		131		102	my($token) = @_;
289	131	50			197	return "" if !defined($token);
290	131	100			217	return "EOF" if $token eq "";
291	113	100			282	return dump($token) if $token =~ /\W/;
292	89				309	return $token;
293						}
294						#------------------------------------------------------------------------------
295
296						=head1 AUTHOR, BUGS, FEEDBACK, LICENSE, COPYRIGHT
297
298						See L
299
300						=cut
301
302						#------------------------------------------------------------------------------
303
304						1;