File Coverage

blib/lib/Parse/FSM/Driver.pm
Criterion Covered Total %
statement 73 74 98.6
branch 34 36 94.4
condition n/a
subroutine 14 14 100.0
pod 5 5 100.0
total 126 129 97.6


line stmt bran cond sub pod time code
1             # $Id: Driver.pm,v 1.10 2013/07/27 00:34:39 Paulo Exp $
2            
3             package Parse::FSM::Driver;
4            
5             #------------------------------------------------------------------------------
6            
7             =head1 NAME
8            
9             Parse::FSM::Driver - Run-time engine for Parse::FSM parser
10            
11             =cut
12            
13             #------------------------------------------------------------------------------
14            
15 6     6   504 use warnings;
  6         7  
  6         161  
16 6     6   19 use strict;
  6         6  
  6         88  
17            
18 6     6   17 use Carp; our @CARP_NOT = ('Parse::FSM::Driver');
  6         6  
  6         385  
19 6     6   412 use Data::Dump 'dump';
  6         3504  
  6         424  
20            
21             our $VERSION = '1.13';
22            
23             #------------------------------------------------------------------------------
24            
25             =head1 SYNOPSIS
26            
27             use MyParser; # isa Parse::FSM::Driver
28            
29             $parser = MyParser->new;
30             $parser->input( \&lexer );
31             $parser->user( $user_pointer );
32            
33             $result = $parser->parse( $start_rule );
34             $result = $parser->parse_start_rule;
35            
36             $token = $parser->peek_token;
37             $token = $parser->get_token;
38             $parser->unget_token(@tokens);
39            
40             =head1 DESCRIPTION
41            
42             This module implements a deterministic top-down parser based on a
43             pre-computed Finite State Machine (FSM).
44            
45             The FSM is generated by L, by
46             reading a BNF-type grammar file and generating
47             a run-time module that includes the state tables. The module also include
48             the run-time parsing routine that follows the state tables to obtain
49             a parse of the input.
50            
51             This module is not intended to be used stand alone. It is used as a base class
52             by the modules generated by L.
53            
54             =head1 METHODS - SETUP
55            
56             =head2 new
57            
58             Creates a new object.
59            
60             =head2 user
61            
62             Get/set of the parser user pointer. The user pointer is not used by the parser,
63             and is available for communication between the parser actions and the
64             calling module.
65            
66             It can for example point to a data structure that describes the objects
67             already identified in the parse.
68            
69             =cut
70            
71             #------------------------------------------------------------------------------
72             # Parsing state machine
73             # Each state hash has:
74             # terminal => (state ID), for a match
75             # terminal => [ (subrule ID), (next state ID) ], for a sub-rule
76             # followed by a match
77             # terminal => [ (subrule ID), sub{} ], for a sub-rule followed by an accept
78             # terminal => sub{}, for an accept
79             # Each sub{} has $self and @args pre-declared
80             # @args is [] of all parsed elements
81             # $self is the Parse::FSM::Driver object
82            
83             #------------------------------------------------------------------------------
84             use Class::XSAccessor {
85 6         39 constructor => '_init',
86             accessors => [
87             'input', # input iterator
88             '_head', # unget queue of tokens retrived from input
89             'user', # user pointer
90             '_state_table', # list of states
91             '_start_state', # ID of start state
92             ],
93 6     6   396 };
  6         1646  
94            
95             #------------------------------------------------------------------------------
96             sub new {
97 100     100 1 1362 my($class, @args) = @_;
98             return $class->_init(
99       3     input => sub {},
100 100         825 _head => [],
101             user => {},
102             _state_table => [],
103             _start_state => 0,
104             @args);
105             }
106             #------------------------------------------------------------------------------
107            
108             =head1 METHODS - INPUT STREAM
109            
110             =head2 input
111            
112             Get/set the parser input lexer iterator. The iterator is a code reference of
113             a function that returns the next token to be parsed as an array ref,
114             with token type and token value C<[$type, $value]>.
115             It returns C on end of input. E.g. for a simple expression lexer:
116            
117             sub make_lexer {
118             my($line) = @_;
119             return sub {
120             for ($line) {
121             /\G\s+/gc;
122             return [NUM => $1] if /\G(\d+)/gc;
123             return [NAME => $1] if /\G([a-z]\w*)/gci;
124             return [$1 => $1] if /\G(.)/gc;
125             return;
126             }
127             };
128             }
129             $parser->input(make_lexer("2+3*4"));
130            
131             =head2 peek_token
132            
133             Returns the next token to be retrieved by the lexer, but keeps it in the input
134             queue. Can be used by a rule action to decide based on the input that follows.
135            
136             =cut
137            
138             #------------------------------------------------------------------------------
139             sub peek_token {
140 2407     2407 1 1595 my($self) = @_;
141 2407 100       1392 @{$self->_head} or push @{$self->_head}, $self->input->();
  1340         2183  
  2407         3661  
142 2403         6741 return $self->_head->[0]; # may be undef, if end of input
143             }
144             #------------------------------------------------------------------------------
145            
146             =head2 get_token
147            
148             Extracts the next token from the lexer stream. Can be used by a rule action to
149             discard the following tokens.
150            
151             =cut
152            
153             #------------------------------------------------------------------------------
154             sub get_token {
155 886     886 1 16815 my($self) = @_;
156 886 100       564 @{$self->_head} and return shift @{$self->_head};
  698         853  
  886         1569  
157 188         448 return $self->_head->[0]; # may be undef, if end of input
158             }
159             #------------------------------------------------------------------------------
160            
161             =head2 unget_token
162            
163             Pushes back the given list of tokens to the lexer input stream, to be retrieved
164             on the next calls to C.
165            
166             =cut
167            
168             #------------------------------------------------------------------------------
169             sub unget_token {
170 4     4 1 7 my($self, @tokens) = @_;
171 4         4 unshift @{$self->_head}, @tokens;
  4         9  
172 4         33 return;
173             }
174             #------------------------------------------------------------------------------
175            
176             =head1 METHODS - PARSING
177            
178             =head2 parse
179            
180             This function receives an optional start rule name, and uses the default rule
181             of the grammar if not supplied.
182            
183             It parses the input stream, leaving the stream at the first unparsed
184             token, and returns the parse value - the result of the action function for the
185             start rule.
186            
187             The function dies with an error message indicating the input that cannot
188             be parsed in case of a parse error.
189            
190             =head2 parse_XXX
191            
192             For each rule C in the grammar, L creates a
193             corresponding C to start the parse at that rule. This is a
194             short-cut to C.
195            
196             =cut
197            
198             #------------------------------------------------------------------------------
199             sub parse {
200 175     175 1 7382 my($self, $start_rule) = @_;
201            
202             # current state
203 175         426 my $state;
204 175 100       257 if (defined($start_rule)) {
205 8 100       145 $state = $self->_state_table->[0]{$start_rule}
206             or croak "Rule $start_rule not found";
207             }
208             else {
209 167 50       382 $state = $self->_start_state
210             or croak "Start state not found";
211             }
212 174         276 return $self->_parse($state);
213             }
214            
215             #------------------------------------------------------------------------------
216             sub _parse {
217 194     194   153 my($self, $state) = @_;
218            
219 194         177 my @values = ();
220            
221             # return stack of states
222 194         139 my @stack = (); # store: [$state, @values]
223            
224             # fetch token only after drop and after calling parser rules
225 194         252 my $token = $self->peek_token;
226 194         158 while (1) {
227 2572         1464 my($entry, $found_else);
228 2572 100       5245 if ($entry = $self->_state_table->[$state]{($token ? $token->[0] : "")}) {
    100          
    100          
229             # entry exists, found token
230             }
231             elsif ($entry = $self->_state_table->[$state]{__else__}) {
232 521         387 $found_else++;
233             }
234             else {
235 82         155 $self->_error_at($token, $state);
236             }
237            
238 2490 100       2659 if (ref($entry) eq 'ARRAY') { # call sub-rule
239 1411         1024 my($next_state, $return_state) = @$entry;
240 1411         1394 push(@stack, [ $return_state, @values ]); # return data
241 1411         1351 ($state, @values) = ($next_state); # call
242             }
243             else { # accept token
244 1079         673 $state = $entry;
245            
246 1079 100       1231 if (!$found_else) {
247 725 100       913 push(@values, $token) if $token; # add token to values
248 725         848 $self->get_token; # drop value
249 725         742 $token = $self->peek_token; # and get next token
250             }
251            
252 1075         1600 while (ref($state) eq 'CODE') { # return from sub-rules
253 1460         5915 my $value = $self->$state(@values);
254 1460         1533 $token = $self->peek_token; # input may have changed
255            
256 1460 100       1774 if ( ! @stack ) { # END OF PARSE
257 108         426 return $value;
258             }
259            
260 1352         909 my $top = pop(@stack);
261 1352         1661 ($state, @values) = @$top;
262            
263             # keep only defined values
264 1352 100       3268 push(@values, $value) if defined($value);
265             }
266             }
267             }
268 0         0 die 'not reached';
269             }
270            
271             #------------------------------------------------------------------------------
272             # expected error at given stream position, die with error message
273             sub _error_at {
274 84     84   696 my($self, $token, $state) = @_;
275            
276 95         455 my @expected = sort map {_format_token($_)}
277 84         72 keys %{$self->_state_table->[$state]};
  84         196  
278 84 100       1149 die("Expected ",
    100          
279             scalar(@expected) == 1 ? "@expected" : "one of (@expected)",
280             " at ",
281             defined($token) ? _format_token($token->[0]) : "EOF",
282             "\n");
283             }
284            
285             #------------------------------------------------------------------------------
286             # format a token
287             sub _format_token {
288 131     131   102 my($token) = @_;
289 131 50       197 return "" if !defined($token);
290 131 100       217 return "EOF" if $token eq "";
291 113 100       282 return dump($token) if $token =~ /\W/;
292 89         309 return $token;
293             }
294             #------------------------------------------------------------------------------
295            
296             =head1 AUTHOR, BUGS, FEEDBACK, LICENSE, COPYRIGHT
297            
298             See L
299            
300             =cut
301            
302             #------------------------------------------------------------------------------
303            
304             1;