File Coverage

blib/lib/Parse/FSM.pm
Criterion Covered Total %
statement 276 277 99.6
branch 80 96 83.3
condition 25 33 75.7
subroutine 54 56 96.4
pod 6 6 100.0
total 441 468 94.2


line stmt bran cond sub pod time code
1             # $Id: FSM.pm,v 1.10 2013/07/27 00:34:39 Paulo Exp $
2            
3             package Parse::FSM;
4            
5             #------------------------------------------------------------------------------
6            
7             =head1 NAME
8            
9             Parse::FSM - Deterministic top-down parser based on a Finite State Machine
10            
11             =cut
12            
13             #------------------------------------------------------------------------------
14            
15 7     7   25506 use strict;
  7         28  
  7         278  
16 7     7   30 use warnings;
  7         12  
  7         234  
17            
18 7     7   31 use Carp; our @CARP_NOT = ('Parse::FSM');
  7         14  
  7         710  
19 7     7   2494 use Data::Dump 'dump';
  7         29501  
  7         463  
20 7     7   4215 use Text::Template 'fill_in_string';
  7         17950  
  7         414  
21 7     7   3373 use File::Slurp;
  7         64817  
  7         723  
22            
23             our $VERSION = '1.11';
24            
25             #------------------------------------------------------------------------------
26            
27             =head1 SYNOPSIS
28            
29             use Parse::FSM;
30             $fsm = Parse::FSM->new;
31            
32             $fsm->prolog($text);
33             $fsm->epilog($text);
34             $fsm->add_rule($name, @elems, $action);
35             $fsm->start_rule($name);
36            
37             $fsm->parse_grammar($text);
38            
39             $fsm->write_module($module);
40             $fsm->write_module($module, $file);
41            
42             $parser = $fsm->parser; # isa Parse::FSM::Driver
43             $parser->input(\&lexer);
44             $result = $parser->parse;
45            
46             # script
47             perl -MParse::FSM - Grammar.yp Parser::Module
48             perl -MParse::FSM - Grammar.yp Parser::Module lib\Parser\Module.pm
49            
50             =head1 DESCRIPTION
51            
52             This module compiles the Finite State Machine used by the
53             L parser module.
54            
55             It can be used by by a sequence of C calls, or by parsing a yacc-like
56             grammar in one go with C.
57            
58             Can be used as a script to generate a module from a grammar file.
59            
60             The result of compiling the parser can be used immediately by retrieving the
61             C object, or a pre-compiled module can be written to disk by
62             C. This module can then be used by the client code of the parser.
63            
64             As usual in top-down parsers, left recursion is not supported
65             and generates an infinite loop. This parser is deterministic and does not implement backtracking.
66            
67             =head1 METHODS - SETUP
68            
69             =head2 new
70            
71             Creates a new object.
72            
73             =cut
74            
75             #------------------------------------------------------------------------------
76             use Class::XSAccessor {
77 6         58 constructor => '_init',
78             accessors => [
79             '_tree', # parse tree
80             # Contains nested HASH tables with the decision tree
81             # used during parsing.
82             # Each node maps:
83             # token => next node / string with action code
84             # [subrule] => next node / string with action code
85             # [subrule]? => next node / string with action code
86             # [subrule]* => next node / string with action code
87             # __else__ => next node / string with action code
88             # The first level are the rule names.
89            
90             '_state_table', # ARRAY that maps each state ID to the corresponding
91             # HASH table from tree.
92             # Copied to the generated parser module.
93            
94             '_action', # map func text => [ sub name, sub text ]
95            
96             'start_rule', # name start rule
97             'prolog', # code to include near the beginning of the file
98             'epilog', # code to include at the end of the file
99             '_names', # keep all generated names up to now, to be able to
100             # create unique ones
101             ],
102 6     6   6176 };
  6         13253  
103            
104             #------------------------------------------------------------------------------
105             sub new {
106 70     70 1 25854 my($class) = @_;
107 70         628 return $class->_init(_tree => {}, _state_table => [], _action => {},
108             _names => {});
109             }
110            
111             #------------------------------------------------------------------------------
112             # create a new unique name (for actions, sub-rules)
113             sub _unique_name {
114 153     153   201 my($self, $name) = @_;
115 153         177 my $id = 1;
116 153         480 while (exists $self->_names->{$name.$id}) {
117 11         25 $id++;
118             }
119 153         364 $self->_names->{$name.$id}++;
120 153         290 return $name.$id;
121             }
122            
123             #------------------------------------------------------------------------------
124            
125             =head1 METHODS - BUILD GRAMMAR
126            
127             =head2 start_rule
128            
129             Name of the grammar start rule. It defaults to the first rule added by C.
130            
131             =head2 prolog, epilog
132            
133             Perl code to include in the generated module near the start of the generated
134             module and near the end of it.
135            
136             =head2 add_rule
137            
138             Adds one rule to the parser.
139            
140             $fsm->add_rule($name, @elems, $action);
141            
142             C<$name> is the name of the rule, i.e. the syntatic object recognized
143             by the rule.
144            
145             C<@elems> is the list of elements in sequence needed to recognize this rule.
146             Each element can be one of:
147            
148             =over 4
149            
150             =item *
151            
152             A string that will match with that token type from the lexer.
153            
154             The empty string is used to match the end of input and should
155             be present in the grammar to force the parser
156             to accept all the input;
157            
158             =item *
159            
160             An array refernce of a list of all possible tokens to accept at this position.
161            
162             =item *
163            
164             A subrule name inside square brackets, optionally followed by a
165             repetion character, that asks the parser to recursively descend
166             to match that subrule at the current input location.
167            
168             The accepted forms are:
169            
170             C<[term]> - recurse to the term rule;
171            
172             C<[term]?> - term is optional;
173            
174             C<[term]*> - accept zero or more terms;
175            
176             C<[term]+> - accept one or more terms;
177            
178             C<[term]E+,E> - accept one or more terms separated by commas,
179             any token type can be used instead of the comma;
180            
181             =back
182            
183             C<$action> is the Perl text of the action executed when the rule is recognized,
184             i.e. all elements were found in sequence.
185            
186             It has to be enclosed in brackets C<{}>, and can use the following lexical
187             variables, that are declared by the generated code:
188            
189             =over 4
190            
191             =item *
192            
193             C<$self> : object pointer;
194            
195             =item *
196            
197             C<@item> : values of all the tokens or rules identified in this rule. The subrule
198             call with repetions return an array reference containing all the found items
199             in the subrule;
200            
201             =back
202            
203             =cut
204            
205             #------------------------------------------------------------------------------
206             # add_rule
207             # Args:
208             # rule name
209             # list of : '[rule]' '[rule]*' '[rule]?' '[rule]+' '[rule]<+SEP>' # subrules
210             # token # tokens
211             # action : '{ CODE }'
212             sub add_rule {
213 151     151 1 11615 my($self, $rule_name, @elems) = @_;
214 151         206 my $action = pop(@elems);
215            
216 151 100       543 @elems or croak "missing arguments";
217 150 50       692 $rule_name =~ /^\w+$/ or croak "invalid rule name ".dump($rule_name);
218            
219             # check for array-ref @elem and recurse for all alternatives
220 150         338 for my $i (0 .. $#elems) {
221 259 100       621 if (ref($elems[$i])) { # isa 'ARRAY', others cause run-time error
222 2         5 for (@{$elems[$i]}) {
  2         7  
223 4         31 $self->add_rule($rule_name,
224             @elems[0 .. $i-1], $_, @elems[$i+1 .. $#elems],
225             $action);
226             }
227 2         7 return;
228             }
229             }
230            
231 148         398 $self->_check_start_rule($rule_name);
232            
233             # load the tree
234 148         233 my $tree = $self->_tree;
235 148         277 $tree = $self->_add_tree_node($tree, $rule_name); # load rule name
236            
237 148         290 my $comment = "$rule_name :";
238            
239 148         478 while (@elems) {
240 252         278 my $elem = shift @elems;
241            
242             # handle subrule calls with quantifiers
243             # check if recursing for _add_list_rule
244 252 100 100     1193 if ($rule_name !~ /^_lst_/ &&
245             $elem =~ /^ \[ .* \] /x) {
246 68         152 $elem = $self->_add_list_rule($elem);
247             }
248            
249 252         428 $tree->{__comment__} = $comment; # way up to this state
250            
251 252 100       882 $comment .= " ".($elem =~ /^\[/ ? $elem : dump($elem));
252            
253 252 100       12550 if (@elems) { # not a leaf node
254 105 100 100     470 croak "leaf and node at ($comment)"
255             if (exists($tree->{$elem}) && ref($tree->{$elem}) ne 'HASH');
256 104         212 $tree = $self->_add_tree_node($tree, $elem); # load token
257             }
258             else { # leaf node
259 147 100       430 croak "leaf not unique at ($comment)"
260             if (exists($tree->{$elem}));
261 146         292 $self->_add_tree_node($tree, $elem); # create node
262 146         295 $tree->{$elem} = $self->_add_action($action, $rule_name, $comment);
263             }
264             }
265            
266 142         380 return;
267             }
268            
269             #------------------------------------------------------------------------------
270             # add a list subrule, get passed a string '[subrule]*'
271             sub _add_list_rule {
272 68     68   134 my($self, $elem) = @_;
273            
274 68 50       318 $elem =~ /^ \[ (\w+) \] ( [?*+] | <\+.*> )? $/x
275             or croak "invalid subrule call $elem";
276 68         192 my($subrule, $quant) = ($1, $2);
277            
278 68 100       229 return "[$subrule]" unless $quant; # subrule without quatifier
279            
280             # create a list subrule, so that the result of the repetion is returned
281             # as an array reference
282 22         83 my $list_subrule = $self->_unique_name("_lst_".$subrule);
283            
284 22 100 100     181 if ($quant eq '*' || $quant eq '?') {
    100          
    50          
285 12         44 $self->add_rule($list_subrule, "[$subrule]$quant",
286             '{ return \@item }');
287             }
288             elsif ($quant eq '+') { # A+ -> A A*
289 5         38 $self->add_rule($list_subrule, "[$subrule]", "[$subrule]*",
290             '{ return \@item }');
291             }
292             elsif ($quant =~ /^< \+ (.*) >$/x) { # A<+;> -> A Ac* ; Ac : ';' A
293 5         11 my $separator = $1;
294 5         16 my $list_subrule_cont = $self->_unique_name("_lst_".$subrule);
295            
296             # Ac : ';' A
297 5         25 $self->add_rule($list_subrule_cont, $separator, "[$subrule]",
298             '{ return $item[1] }');
299            
300             # A Ac*
301 5         22 $self->add_rule($list_subrule, "[$subrule]", "[$list_subrule_cont]*",
302             '{ return \@item }');
303             }
304             else {
305 0         0 die; # not reached
306             }
307            
308 22         62 return "[$list_subrule]";
309             }
310            
311             #------------------------------------------------------------------------------
312             # add a tree node and create a new state
313             sub _add_tree_node {
314 398     398   456 my($self, $tree, $elem) = @_;
315            
316 398   100     1473 $tree->{$elem} ||= {};
317            
318             # new state?
319 398 100       787 if (! exists $tree->{__state__}) {
320 278         233 my $id = scalar(@{$self->_state_table});
  278         448  
321 278         351 $tree->{__state__} = $id;
322 278         458 $self->_state_table->[$id] = $tree;
323             }
324            
325 398         647 return $tree->{$elem};
326             }
327            
328             #------------------------------------------------------------------------------
329             # define start rule, except if starting with '_' (internal)
330             sub _check_start_rule {
331 151     151   1129 my($self, $rule_name) = @_;
332            
333 151 100 100     810 if (! defined $self->start_rule && $rule_name =~ /^[a-z]/i) {
334 58         144 $self->start_rule($rule_name); # start rule is first defined rule
335             }
336            
337 151         187 return;
338             }
339            
340             #------------------------------------------------------------------------------
341             # _add_action()
342             # Create a new action or re-use an existing one. An action has to start by
343             # '{'; a new name is created and a reference to the name is
344             # returned : "\&_action_RULE"
345             sub _add_action {
346 146     146   209 my($self, $action, $rule_name, $comment) = @_;
347            
348             # remove braces
349 146 100       1824 $action =~ s/ \A \s* \{ \s* (.*?) \s* \} \s* \z /$1/xs
350             or croak "action must be enclosed in {}";
351            
352             # reuse an existing action, if any
353 142         564 (my $cannon_action = $action) =~ s/\s+//g;
354 142 100       423 if (!$self->_action->{$cannon_action}) {
355 126         332 my $action_name = $self->_unique_name("_act_".$rule_name);
356            
357             # reduce indentation
358 126         273 for ($action) {
359 126         244 my($lead_space) = /^(\t+)/m;
360 126 100       505 $lead_space and s/^$lead_space/\t/gm;
361             }
362            
363             $action =
364 126 100       559 "# $comment\n".
365             "sub $action_name {".
366             ($action ne '' ? "\n\tmy(\$self, \@item) = \@_;\n\t" : "").
367             $action.
368             "\n}\n\n";
369            
370 126         436 $self->_action->{$cannon_action} = [ $action_name, $action ];
371             }
372             else {
373             # append this comment
374 16         136 $self->_action->{$cannon_action}[1] =~ s/^(sub)/# $comment\n$1/m;
375             }
376            
377 142         726 return "\\&".$self->_action->{$cannon_action}[0];
378             }
379            
380             #------------------------------------------------------------------------------
381             # compute the FSM machine
382             #
383             # expand [rule] calls into start_set(rule) => [ rule_id, next_state ]
384             # Search for all sub-rule calls, and add each of the first tokens of the subrule
385             # to the call. Repeat until no more rules added, to cope with follow sets being
386             # computed after being looked up
387             # creates FSM loops for the constructs:
388             # A -> B?
389             # A -> B*
390             sub _compute_fsm {
391 48     48   83 my($self) = @_;
392            
393             # repeat until no more follow tokens added
394             # Example : A B[?*] C
395 48         60 my $changed;
396 48         83 do {
397 85         106 $changed = 0;
398            
399             # check all states in turn
400 85         97 for my $state (@{$self->_state_table}) {
  85         205  
401 571         1384 my %state_copy = %$state;
402 571         1228 while (my($token, $next_state) = each %state_copy) {
403 2207 100       7038 next unless my($subrule_name, $quant) =
404             $token =~ /^ \[ (.*) \] ( [?*] )? $/x;
405            
406 248 100       501 my $next_state_text = ref($next_state) eq 'HASH' ?
407             $next_state->{__state__} :
408             $next_state;
409            
410 248 50       580 my $subrule = $self->_tree->{$subrule_name}
411             or croak "rule $subrule_name not found";
412 248 50       432 ref($subrule) eq 'HASH' or die;
413            
414             # call subrule on each of the subrule follow set
415             # Example : add all 'follow(B) -> call B' to current rule
416 248         433 for my $subrule_key (keys %$subrule) {
417 1062 100       2504 next if $subrule_key =~ /^(__(comment|state)__|\[.*\][?*]?)$/;
418 454 100 100     1669 my $text = "[ ".$subrule->{__state__}.", ".
419             (($quant||"") eq '*' ?
420             $state->{__state__} : # loop on a '*'
421             $next_state_text # else, next state
422             )." ]";
423 454 100       593 if ($state->{$subrule_key}) {
424 307 50       606 die if $state->{$subrule_key} ne $text;
425             }
426             else {
427 147         187 $state->{$subrule_key} = $text;
428 147         194 $changed++;
429             }
430             }
431            
432             # call next rule on the next rule follow set
433             # Example : add all 'follow(C) -> end' to end current rule
434 248 100       899 if (defined($quant)) {
435 56 100       103 if ($state->{__else__}) {
436 34 50       144 die if $state->{__else__} ne $next_state_text;
437             }
438             else {
439 22         38 $state->{__else__} = $next_state_text;
440 22         67 $changed++;
441             }
442             }
443             }
444             }
445             } while ($changed);
446            
447 48         78 return;
448             }
449            
450             #------------------------------------------------------------------------------
451            
452             =head2 parse_grammar
453            
454             Parses the given grammar text and adds to the parser. Example grammar follows:
455            
456             {
457             # prolog
458             use MyLibrary;
459             }
460            
461             main : (number | name)+ ;
462             number : 'NUMBER' { $item[0][1] } ; # comment
463             name : 'NAME' { $item[0][1] } ; # comment
464            
465             expr : ;
466            
467            
468            
469             {
470             # epilog
471             sub util_method {...}
472             }
473            
474             =over 4
475            
476             =item prolog
477            
478             If the text contains a code block surronded by braces before the first rule
479             definition, the text is copied without the external braces to the prolog
480             of generated module.
481            
482             =item epilog
483            
484             If the text contains a code block surronded by braces after the last rule
485             definition, the text is copied without the external braces to the epilog
486             of generated module.
487            
488             =item statements
489            
490             Statement are either rule definitions of directives and end with a
491             semi-colon C<;>. Comments are as in Perl, from a hash C<#> sign to
492             the end of the line.
493            
494             =item rule
495            
496             A rule defines one sentence to match in the grammar. The first rule defined
497             is the default start rule, i.e. the rule parsed by default on the input.
498             A rule name must start with a letter and contain only letters,
499             digits and the underscore character.
500            
501             The rule definition follows after a colon and is composed of a sequence
502             of tokens (quoted strings) and sub-rules, to match in sequence. The rule matches
503             when all the tokens and sub-rules in the definition match in sequence.
504            
505             The top level rule should end with CeofE> to make sure all input
506             is parsed.
507            
508             The rule can define several alternative definitions separated by '|'.
509            
510             The rule definition finishes with a semi-colon ';'.
511            
512             A rule can call an anonymous sub-rule eclosed in parentheses.
513            
514             =item action
515            
516             The last item in the rule definition is a text delimited by {} with the code
517             to execute when the rule is matched. The code can use $self to refer to the
518             Parser object, and @item to refer to the values of each of the tokens and
519             sub-rules matched. The return value from the code defines the value of the
520             rule, passed to the upper level rule, or returned as the parse result.
521            
522             If no action is supplied, a default action returns an array reference with
523             the result of all tokens and sub-rules of the matched sentence.
524            
525             =item quantifiers
526            
527             Every token or sub-rule can be followed by a repetition specification:
528             '?' (zero or one), '*' (zero or more), '+' (one or more),
529             or '<+,>' (comma-separated list, comma can be replaced by any token).
530            
531             =item directives
532            
533             Directives are written with angle brackets.
534            
535             =over 4
536            
537             =item
538            
539             Can be used in a rule instead of the empty string to represent the end of input.
540            
541             =item
542            
543             Shortcut for creating lists of operators separated by tokens,
544             returns the list of rule and token values.
545            
546             =item
547            
548             Defines the start rule of the grammar. By default the first
549             defined rule is the start rule; use Cstart:E> to override that.
550            
551             =back
552            
553             =back
554            
555             =cut
556            
557             #------------------------------------------------------------------------------
558             sub parse_grammar {
559 38     38 1 17771 my($self, $text) = @_;
560            
561             # need to postpone load of Parse::FSM::Parser, as Parse::FSM is used by
562             # the script that creates Parse::FSM::Parser
563 38 50   15   2732 eval 'use Parse::FSM::Parser'; $@ and die; ## no critic
  38     18   153  
  13     35   194  
  7     20   32  
  7     28   11  
  7     19   163  
  8     11   36  
  8     23   10  
  8     19   756  
  9     12   54  
  9     18   13  
  9     10   311  
  11     16   98  
  11     4   19  
  11     5   422  
  12     11   66  
  12     1   17  
  12     5   789  
  11     2   60  
  11         18  
  11         1316  
  9         55  
  9         15  
  9         541  
  10         60  
  10         16  
  10         625  
  10         59  
  10         18  
  10         681  
  10         78  
  10         18  
  10         1204  
  9         55  
  9         10  
  9         648  
  8         46  
  8         12  
  8         634  
  7         38  
  7         11  
  7         561  
  6         30  
  6         9  
  6         1266  
  3         19  
  3         5  
  3         402  
  2         7  
  2         3  
  2         224  
  1         4  
  1         1  
564            
565 38         184 my $parser = Parse::FSM::Parser->new;
566 38         172 $parser->user->{fsm} = $self;
567 38         58 eval {
568 38         144 $parser->from($text); # setup lexer
569 38         138 $parser->parse;
570             };
571 38 100       665 $@ and do { $@ =~ s/\s+\z//; croak $@; };
  11         63  
  11         1257  
572            
573 27         440 return;
574             }
575            
576             #------------------------------------------------------------------------------
577            
578             =head1 METHODS - USE PARSER
579            
580             =head2 parser
581            
582             Computes the Finite State Machine to execute the parser and returns a
583             L object that implements the parser.
584            
585             Usefull to build the parser and execute it in the same
586             program, but with the run-time penalty of the time to setup the state tables.
587            
588             =cut
589            
590             #------------------------------------------------------------------------------
591             sub parser {
592 42     42 1 240 my($self) = @_;
593 42   100     127 our $name ||= 'Parser00000'; $name++; # new module on each call
  42         71  
594            
595 42         126 my $text = $self->_module_text($name, "-");
596 42     13   47876 eval $text; ## no critic
  9     10   1972  
  16     17   39  
  14     19   186  
  5     18   28  
  5     17   6  
  5     2   126  
  5     12   1277  
  12     3   17  
  12     8   443  
  6     1   33  
  6     1   16  
  4     0   59  
  6     0   41  
  13         27  
  1         240  
597 42 50       151 $@ and die $@;
598            
599 42         1232 my $parser = $name->new;
600            
601 42         289 return $parser;
602             }
603             #------------------------------------------------------------------------------
604            
605             =head2 write_module
606            
607             Receives as input the module name and the output file name
608             and writes the parser module.
609            
610             The file name is optional; if not supplied is computed from the
611             module name by replacing C<::> by C and appending C<.pm>,
612             e.g. C.
613            
614             The generated code includes C functions for every rule
615             C found in the grammar, as a short-cut for calling C.
616            
617             =cut
618            
619             #------------------------------------------------------------------------------
620             sub write_module {
621 18     18 1 632 my($self, $name, $file) = @_;
622            
623 18 100       125 $name or croak "name not defined";
624            
625             # build file name from module name
626 15 100       38 unless (defined $file) {
627 6         31 $file = $name;
628 10         28 $file =~ s/::/\//g;
629 10         38 $file .= ".pm";
630             }
631            
632 12         28 my $text = $self->_module_text($name, $file);
633 11         4397 write_file($file, {atomic => 1}, $text);
634            
635 17         1205 return;
636             }
637            
638             #------------------------------------------------------------------------------
639             # template code for grammmar parser
640             my $TEMPLATE = <<'END_TEMPLATE';
641             # $Id: FSM.pm,v 1.10 2013/07/27 00:34:39 Paulo Exp $
642             # Parser generated by Parse::FSM
643            
644             package # hide from CPAN indexer
645             <% $name %>;
646            
647             use strict;
648             use warnings;
649            
650             use Parse::FSM::Driver; our @ISA = ('Parse::FSM::Driver');
651            
652             <% $prolog %>
653            
654             <% $table %>
655            
656             sub new {
657             my($class, %args) = @_;
658             return $class->SUPER::new(
659             _state_table => \@state_table,
660             _start_state => $start_state,
661             %args,
662             );
663             }
664            
665             <% $epilog %>
666            
667             1;
668             END_TEMPLATE
669            
670             #------------------------------------------------------------------------------
671             # module text
672             sub _module_text {
673 61     54   133 my($self, $name, $file) = @_;
674            
675 54 50       127 $name or croak "name not defined";
676 54 50       163 $file or croak "file not defined";
677            
678 61         158 my $table = $self->_table_dump;
679            
680 60   50     689 my @template_args = (
      50        
681             DELIMITERS => [ '<%', '%>' ],
682             HASH => {
683             prolog => $self->prolog || "",
684             epilog => $self->epilog || "",
685             name => $name,
686             table => $table,
687             },
688             );
689 57         296 return fill_in_string($TEMPLATE, @template_args);
690             }
691            
692             #------------------------------------------------------------------------------
693             # dump the state table
694             sub _table_dump {
695 48     53   67 my($self) = @_;
696            
697 59         172 $self->_compute_fsm;
698            
699             #print dump($self),"\n" if $ENV{DEBUG};
700            
701 59         102 my $start_state = 0;
702 62 100 66     355 if (defined($self->start_rule) && exists($self->_tree->{$self->start_rule})) {
703 61         193 $start_state = $self->_tree->{$self->start_rule}{__state__};
704             }
705             else {
706 18         122 croak "start state not found";
707             }
708            
709 59         225 my $ret = 'my $start_state = '.$start_state.";\n".
710             'my @state_table = ('."\n";
711 60         75 my $width;
712 60         91 for my $i (0 .. $#{$self->_state_table}) {
  52         179  
713 261   100     992 $ret .= "\t# [$i] " .
714             ($self->_state_table->[$i]{__comment__} || "") .
715             "\n" .
716             "\t{ ";
717 267         264 $width = 2;
718            
719 267         282 for my $key (sort keys %{$self->_state_table->[$i]}) {
  267         923  
720 989 100       2662 next if $key =~ /^(__(comment|state)__|\[.*\][?*]?)$/;
721            
722 428         670 my $value = $self->_state_table->[$i]{$key};
723 428 100       789 $value = $value->{__state__} if ref($value) eq 'HASH';
724            
725 428 100       1186 my $key_text = ($key =~ /^\w+$/) ? $key : dump($key);
726            
727 428         8264 my $item_text = "$key_text => $value, ";
728 428 100       773 if (($width += length($item_text)) > 72) {
729 22         37 $ret .= "\n\t ";
730 19         26 $width = 2 + length($item_text);
731             }
732 425         634 $ret .= $item_text;
733             }
734            
735 264         440 $ret .= "},\n\n";
736             }
737 57         126 $ret .= ");\n\n";
738            
739             # dump action
740 60         103 for (sort {$a->[0] cmp $b->[0]} values %{$self->_action}) {
  132         262  
  59         289  
741 129         252 $ret .= $_->[1];
742             }
743            
744             # dump parse_XXX functions
745 54         105 my $length = 1;
746 59         97 while (my($name, $rule) = each %{$self->_tree}) {
  225         590  
747 174 100       442 next unless $name =~ /^[a-z]/i;
748 97 100       231 $length = length($name) if length($name) > $length;
749             }
750 47         80 while (my($name, $rule) = each %{$self->_tree}) {
  218         496  
751 176 100       406 next unless $name =~ /^[a-z]/i;
752 97         401 $ret .=
753             "sub parse_$name".
754             (" " x ($length - length($name))).
755             " { return shift->_parse($rule->{__state__}) }\n";
756             }
757            
758 54         158 return $ret;
759             }
760            
761             #------------------------------------------------------------------------------
762            
763             =head1 PRE-COMPILING THE GRAMMAR
764            
765             The setup of the parsing tables and creating the parsing module may take up
766             considerable time. Therefore it is usefull to separate the parser generation
767             phase from the parsing phase.
768            
769             =head2 precompile
770            
771             A parser module can be created from a yacc-like grammar file by the
772             following command. The generated file (last parameter) is optional; if not
773             supplied is computed from the module name by replacing C<::> by C and
774             appending C<.pm>, e.g. C:
775            
776             perl -MParse::FSM - Grammar.yp Parser::Module
777             perl -MParse::FSM - Grammar.yp Parser::Module lib\Parser\Module.pm
778            
779             This is equivalent to the following Perl program:
780            
781             #!perl
782             use Parse::FSM;
783             Parse::FSM->precompile(@ARGV);
784            
785             The class method C receives as argumens the grammar file, the
786             generated module name and an optional file name, and creates the parsing module.
787            
788             =cut
789            
790             #------------------------------------------------------------------------------
791             sub precompile {
792 8     13 1 20 my($class, $grammar, $module, $file) = @_;
793            
794 5         18 my $self = $class->new;
795 2         7 my $text = read_file($grammar);
796 9         27 $self->parse_grammar($text);
797 10         28 $self->write_module($module, $file);
798            
799 18         43 return;
800             }
801            
802             #------------------------------------------------------------------------------
803             # startup code for pre-compiler
804             # borrowed from Parse::RecDescent
805             sub import {
806 21     16   52 local *_die = sub { warn @_, "\n"; exit 1; };
  12     20   35  
  14         114  
807            
808 21         102 my($package, $file, $line) = caller;
809 25 50 33     104 if (substr($file,0,1) eq '-' && $line == 0) {
810 9 0 0     31 _die("Usage: perl -MParse::FSM - GRAMMAR MODULE::NAME [MODULE/NAME.pm]")
811             unless @ARGV == 2 || @ARGV == 3;
812            
813 5         19 my($grammar, $module, $file) = @ARGV;
814 10         36 eval {
815 10         40 Parse::FSM->precompile($grammar, $module, $file);
816             };
817 1 0       8 $@ and _die($@);
818            
819 1         2 exit 0;
820             }
821            
822 9         99 return;
823             }
824            
825             #------------------------------------------------------------------------------
826            
827            
828             =head1 AUTHOR
829            
830             Paulo Custodio, C<< >>
831            
832             =head1 ACKNOWLEDGEMENTS
833            
834             Calling pre-compiler on C
835             borrowed from L.
836            
837             =head1 BUGS and FEEDBACK
838            
839             Please report any bugs or feature requests through the web interface at
840             L.
841            
842             =head1 LICENSE and COPYRIGHT
843            
844             Copyright (C) 2010-2011 Paulo Custodio.
845            
846             This program is free software; you can redistribute it and/or modify it
847             under the terms of either: the GNU General Public License as published
848             by the Free Software Foundation; or the Artistic License.
849            
850             See http://dev.perl.org/licenses/ for more information.
851            
852             =cut
853            
854             1; # End of Parse::FSM