File Coverage

blib/lib/Parse/FSM.pm

Criterion	Covered	Total	%
statement	276	277	99.6
branch	81	96	84.3
condition	25	33	75.7
subroutine	54	56	96.4
pod	6	6	100.0
total	442	468	94.4

line	stmt	bran	cond	sub	pod	time	code
1							# $Id: FSM.pm,v 1.10 2013/07/27 00:34:39 Paulo Exp $
2
3							package Parse::FSM;
4
5							#------------------------------------------------------------------------------
6
7							=head1 NAME
8
9							Parse::FSM - Deterministic top-down parser based on a Finite State Machine
10
11							=cut
12
13							#------------------------------------------------------------------------------
14
15	7			7		16076	use strict;
	7					7
	7					164
16	7			7		21	use warnings;
	7					7
	7					141
17
18	7			7		21	use Carp; our @CARP_NOT = ('Parse::FSM');
	7					9
	7					530
19	7			7		2014	use Data::Dump 'dump';
	7					19527
	7					324
20	7			7		3297	use Text::Template 'fill_in_string';
	7					14141
	7					334
21	7			7		2878	use File::Slurp;
	7					52457
	7					612
22
23							our $VERSION = '1.13';
24
25							#------------------------------------------------------------------------------
26
27							=head1 SYNOPSIS
28
29							use Parse::FSM;
30							$fsm = Parse::FSM->new;
31
32							$fsm->prolog($text);
33							$fsm->epilog($text);
34							$fsm->add_rule($name, @elems, $action);
35							$fsm->start_rule($name);
36
37							$fsm->parse_grammar($text);
38
39							$fsm->write_module($module);
40							$fsm->write_module($module, $file);
41
42							$parser = $fsm->parser; # isa Parse::FSM::Driver
43							$parser->input(\&lexer);
44							$result = $parser->parse;
45
46							# script
47							perl -MParse::FSM - Grammar.yp Parser::Module
48							perl -MParse::FSM - Grammar.yp Parser::Module lib\Parser\Module.pm
49
50							=head1 DESCRIPTION
51
52							This module compiles the Finite State Machine used by the
53							L parser module.
54
55							It can be used by a sequence of C calls, or by parsing a yacc-like
56							grammar in one go with C.
57
58							It can be used as a script to generate a module from a grammar file.
59
60							The result of compiling the parser can be used immediately by retrieving the
61							C object, or a pre-compiled module can be written to disk by
62							C. This module can then be used by the client code of the parser.
63
64							As usual in top-down parsers, left recursion is not supported
65							and generates an infinite loop. This parser is deterministic and does not implement backtracking.
66
67							=head1 METHODS - SETUP
68
69							=head2 new
70
71							Creates a new object.
72
73							=cut
74
75							#------------------------------------------------------------------------------
76							use Class::XSAccessor {
77	6					41	constructor => '_init',
78							accessors => [
79							'_tree', # parse tree
80							# Contains nested HASH tables with the decision tree
81							# used during parsing.
82							# Each node maps:
83							# token => next node / string with action code
84							# [subrule] => next node / string with action code
85							# [subrule]? => next node / string with action code
86							# [subrule]* => next node / string with action code
87							# __else__ => next node / string with action code
88							# The first level are the rule names.
89
90							'_state_table', # ARRAY that maps each state ID to the corresponding
91							# HASH table from tree.
92							# Copied to the generated parser module.
93
94							'_action', # map func text => [ sub name, sub text ]
95
96							'start_rule', # name start rule
97							'prolog', # code to include near the beginning of the file
98							'epilog', # code to include at the end of the file
99							'_names', # keep all generated names up to now, to be able to
100							# create unique ones
101							],
102	6			6		2347	};
	6					10009
103
104							#------------------------------------------------------------------------------
105							sub new {
106	70			70	1	20161	my($class) = @_;
107	70					333	return $class->_init(_tree => {}, _state_table => [], _action => {},
108							_names => {});
109							}
110
111							#------------------------------------------------------------------------------
112							# create a new unique name (for actions, sub-rules)
113							sub _unique_name {
114	153			153		135	my($self, $name) = @_;
115	153					118	my $id = 1;
116	153					335	while (exists $self->_names->{$name.$id}) {
117	11					20	$id++;
118							}
119	153					262	$self->_names->{$name.$id}++;
120	153					219	return $name.$id;
121							}
122
123							#------------------------------------------------------------------------------
124
125							=head1 METHODS - BUILD GRAMMAR
126
127							=head2 start_rule
128
129							Name of the grammar start rule. It defaults to the first rule added by C.
130
131							=head2 prolog, epilog
132
133							Perl code to include in the generated module near the start of the generated
134							module and near the end of it.
135
136							=head2 add_rule
137
138							Adds one rule to the parser.
139
140							$fsm->add_rule($name, @elems, $action);
141
142							C<$name> is the name of the rule, i.e. the syntactic object recognized
143							by the rule.
144
145							C<@elems> is the list of elements in sequence needed to recognize this rule.
146							Each element can be one of:
147
148							=over 4
149
150							=item *
151
152							A string that will match with that token type from the lexer.
153
154							The empty string is used to match the end of input and should
155							be present in the grammar to force the parser
156							to accept all the input;
157
158							=item *
159
160							An array reference of a list of all possible tokens to accept at this position.
161
162							=item *
163
164							A subrule name inside square brackets, optionally followed by a
165							repetition character that asks the parser to recursively descend
166							to match that subrule at the current input location.
167
168							The accepted forms are:
169
170							C<[term]> - recurse to the term rule;
171
172							C<[term]?> - term is optional;
173
174							C<[term]*> - accept zero or more terms;
175
176							C<[term]+> - accept one or more terms;
177
178							C<[term]E+,E> - accept one or more terms separated by commas,
179							any token type can be used instead of the comma;
180
181							=back
182
183							C<$action> is the Perl text of the action executed when the rule is recognized,
184							i.e. all elements were found in sequence.
185
186							It has to be enclosed in brackets C<{}>, and can use the following lexical
187							variables that are declared by the generated code:
188
189							=over 4
190
191							=item *
192
193							C<$self> : object pointer;
194
195							=item *
196
197							C<@item> : values of all the tokens or rules identified in this rule. The subrule
198							call with repetitions return an array reference containing all the found items
199							in the subrule;
200
201							=back
202
203							=cut
204
205							#------------------------------------------------------------------------------
206							# add_rule
207							# Args:
208							# rule name
209							# list of : '[rule]' '[rule]*' '[rule]?' '[rule]+' '[rule]<+SEP>' # subrules
210							# token # tokens
211							# action : '{ CODE }'
212							sub add_rule {
213	151			151	1	8695	my($self, $rule_name, @elems) = @_;
214	151					145	my $action = pop(@elems);
215
216	151	100				412	@elems or croak "missing arguments";
217	150	50				439	$rule_name =~ /^\w+$/ or croak "invalid rule name ".dump($rule_name);
218
219							# check for array-ref @elem and recurse for all alternatives
220	150					274	for my $i (0 .. $#elems) {
221	259	100				419	if (ref($elems[$i])) { # isa 'ARRAY', others cause run-time error
222	2					2	for (@{$elems[$i]}) {
	2					4
223	4					16	$self->add_rule($rule_name,
224							@elems[0 .. $i-1], $_, @elems[$i+1 .. $#elems],
225							$action);
226							}
227	2					5	return;
228							}
229							}
230
231	148					201	$self->_check_start_rule($rule_name);
232
233							# load the tree
234	148					145	my $tree = $self->_tree;
235	148					190	$tree = $self->_add_tree_node($tree, $rule_name); # load rule name
236
237	148					202	my $comment = "$rule_name :";
238
239	148					218	while (@elems) {
240	252					197	my $elem = shift @elems;
241
242							# handle subrule calls with quantifiers
243							# check if recursing for _add_list_rule
244	252	100	100			845	if ($rule_name !~ /^_lst_/ &&
245							$elem =~ /^ \[ .* \] /x) {
246	68					95	$elem = $self->_add_list_rule($elem);
247							}
248
249	252					318	$tree->{__comment__} = $comment; # way up to this state
250
251	252	100				564	$comment .= " ".($elem =~ /^\[/ ? $elem : dump($elem));
252
253	252	100				8303	if (@elems) { # not a leaf node
254							croak "leaf and node at ($comment)"
255	105	100	100			344	if (exists($tree->{$elem}) && ref($tree->{$elem}) ne 'HASH');
256	104					141	$tree = $self->_add_tree_node($tree, $elem); # load token
257							}
258							else { # leaf node
259							croak "leaf not unique at ($comment)"
260	147	100				406	if (exists($tree->{$elem}));
261	146					187	$self->_add_tree_node($tree, $elem); # create node
262	146					179	$tree->{$elem} = $self->_add_action($action, $rule_name, $comment);
263							}
264							}
265
266	142					267	return;
267							}
268
269							#------------------------------------------------------------------------------
270							# add a list subrule, get passed a string '[subrule]*'
271							sub _add_list_rule {
272	68			68		84	my($self, $elem) = @_;
273
274	68	50				217	$elem =~ /^ \[ (\w+) \] ( [?+] \| <\+.> )? $/x
275							or croak "invalid subrule call $elem";
276	68					119	my($subrule, $quant) = ($1, $2);
277
278	68	100				144	return "[$subrule]" unless $quant; # subrule without quatifier
279
280							# create a list subrule, so that the result of the repetition is returned
281							# as an array reference
282	22					40	my $list_subrule = $self->_unique_name("_lst_".$subrule);
283
284	22	100	100			125	if ($quant eq '*' \|\| $quant eq '?') {
		100
		50
285	12					25	$self->add_rule($list_subrule, "[$subrule]$quant",
286							'{ return \@item }');
287							}
288							elsif ($quant eq '+') { # A+ -> A A*
289	5					18	$self->add_rule($list_subrule, "[$subrule]", "[$subrule]*",
290							'{ return \@item }');
291							}
292							elsif ($quant =~ /^< \+ (.) >$/x) { # A<+;> -> A Ac ; Ac : ';' A
293	5					7	my $separator = $1;
294	5					11	my $list_subrule_cont = $self->_unique_name("_lst_".$subrule);
295
296							# Ac : ';' A
297	5					14	$self->add_rule($list_subrule_cont, $separator, "[$subrule]",
298							'{ return $item[1] }');
299
300							# A Ac*
301	5					14	$self->add_rule($list_subrule, "[$subrule]", "[$list_subrule_cont]*",
302							'{ return \@item }');
303							}
304							else {
305	0					0	die; # not reached
306							}
307
308	22					37	return "[$list_subrule]";
309							}
310
311							#------------------------------------------------------------------------------
312							# add a tree node and create a new state
313							sub _add_tree_node {
314	398			398		367	my($self, $tree, $elem) = @_;
315
316	398		100			1071	$tree->{$elem} \|\|= {};
317
318							# new state?
319	398	100				538	if (! exists $tree->{__state__}) {
320	278					178	my $id = scalar(@{$self->_state_table});
	278					359
321	278					243	$tree->{__state__} = $id;
322	278					350	$self->_state_table->[$id] = $tree;
323							}
324
325	398					515	return $tree->{$elem};
326							}
327
328							#------------------------------------------------------------------------------
329							# define start rule, except if starting with '_' (internal)
330							sub _check_start_rule {
331	151			151		1441	my($self, $rule_name) = @_;
332
333	151	100	100			526	if (! defined $self->start_rule && $rule_name =~ /^[a-z]/i) {
334	58					106	$self->start_rule($rule_name); # start rule is first defined rule
335							}
336
337	151					139	return;
338							}
339
340							#------------------------------------------------------------------------------
341							# _add_action()
342							# Create a new action or re-use an existing one. An action has to start by
343							# '{'; a new name is created and a reference to the name is
344							# returned : "\&_action_RULE"
345							sub _add_action {
346	146			146		155	my($self, $action, $rule_name, $comment) = @_;
347
348							# remove braces
349	146	100				1256	$action =~ s/ \A \s* \{ \s* (.?) \s \} \s* \z /$1/xs
350							or croak "action must be enclosed in {}";
351
352							# reuse an existing action, if any
353	142					379	(my $cannon_action = $action) =~ s/\s+//g;
354	142	100				272	if (!$self->_action->{$cannon_action}) {
355	126					226	my $action_name = $self->_unique_name("_act_".$rule_name);
356
357							# reduce indentation
358	126					164	for ($action) {
359	126					162	my($lead_space) = /^(\t+)/m;
360	126	100				322	$lead_space and s/^$lead_space/\t/gm;
361							}
362
363							$action =
364	126	100				374	"# $comment\n".
365							"sub $action_name {".
366							($action ne '' ? "\n\tmy(\$self, \@item) = \@_;\n\t" : "").
367							$action.
368							"\n}\n\n";
369
370	126					291	$self->_action->{$cannon_action} = [ $action_name, $action ];
371							}
372							else {
373							# append this comment
374	16					90	$self->_action->{$cannon_action}[1] =~ s/^(sub)/# $comment\n$1/m;
375							}
376
377	142					537	return "\\&".$self->_action->{$cannon_action}[0];
378							}
379
380							#------------------------------------------------------------------------------
381							# compute the FSM machine
382							#
383							# expand [rule] calls into start_set(rule) => [ rule_id, next_state ]
384							# Search for all sub-rule calls, and add each of the first tokens of the subrule
385							# to the call. Repeat until no more rules added, to cope with follow sets being
386							# computed after being looked up
387							# creates FSM loops for the constructs:
388							# A -> B?
389							# A -> B*
390							sub _compute_fsm {
391	48			48		47	my($self) = @_;
392
393							# repeat until no more follow tokens added
394							# Example : A B[?*] C
395	48					38	my $changed;
396	48					36	do {
397	85					52	$changed = 0;
398
399							# check all states in turn
400	85					55	for my $state (@{$self->_state_table}) {
	85					126
401	571					1132	my %state_copy = %$state;
402	571					905	while (my($token, $next_state) = each %state_copy) {
403	2207	100				5088	next unless my($subrule_name, $quant) =
404							$token =~ /^ \[ (.) \] ( [?] )? $/x;
405
406							my $next_state_text = ref($next_state) eq 'HASH' ?
407							$next_state->{__state__} :
408	248	100				340	$next_state;
409
410	248	50				420	my $subrule = $self->_tree->{$subrule_name}
411							or croak "rule $subrule_name not found";
412	248	50				291	ref($subrule) eq 'HASH' or die;
413
414							# call subrule on each of the subrule follow set
415							# Example : add all 'follow(B) -> call B' to current rule
416	248					341	for my $subrule_key (keys %$subrule) {
417	1062	100				1740	next if $subrule_key =~ /^(__(comment\|state)__\|\[.\][?]?)$/;
418							my $text = "[ ".$subrule->{__state__}.", ".
419							(($quant\|\|"") eq '*' ?
420							$state->{__state__} : # loop on a '*'
421	454	100	100			1355	$next_state_text # else, next state
422							)." ]";
423	454	100				417	if ($state->{$subrule_key}) {
424	307	50				459	die if $state->{$subrule_key} ne $text;
425							}
426							else {
427	147					125	$state->{$subrule_key} = $text;
428	147					131	$changed++;
429							}
430							}
431
432							# call next rule on the next rule follow set
433							# Example : add all 'follow(C) -> end' to end current rule
434	248	100				593	if (defined($quant)) {
435	56	100				63	if ($state->{__else__}) {
436	34	50				97	die if $state->{__else__} ne $next_state_text;
437							}
438							else {
439	22					34	$state->{__else__} = $next_state_text;
440	22					52	$changed++;
441							}
442							}
443							}
444							}
445							} while ($changed);
446
447	48					47	return;
448							}
449
450							#------------------------------------------------------------------------------
451
452							=head2 parse_grammar
453
454							Parses the given grammar text and adds to the parser. Example grammar follows:
455
456							{
457							# prolog
458							use MyLibrary;
459							}
460
461							main : (number \| name)+ ;
462							number : 'NUMBER' { $item[0][1] } ; # comment
463							name : 'NAME' { $item[0][1] } ; # comment
464
465							expr : ;
466
467
468
469							{
470							# epilog
471							sub util_method {...}
472							}
473
474							=over 4
475
476							=item prolog
477
478							If the text contains a code block surrounded by braces before the first rule
479							definition, the text is copied without the external braces to the prolog
480							of generated module.
481
482							=item epilog
483
484							If the text contains a code block surrounded by braces after the last rule
485							definition, the text is copied without the external braces to the epilog
486							of generated module.
487
488							=item statements
489
490							Statements are either rule definitions of directives and end with a
491							semi-colon C<;>. Comments are as in Perl, from a hash C<#> sign to
492							the end of the line.
493
494							=item rule
495
496							A rule defines one sentence to match in the grammar. The first rule defined
497							is the default start rule, i.e. the rule parsed by default on the input.
498							A rule name must start with a letter and contain only letters,
499							digits and the underscore character.
500
501							The rule definition follows after a colon and is composed of a sequence
502							of tokens (quoted strings) and sub-rules, to match in sequence. The rule matches
503							when all the tokens and sub-rules in the definition match in sequence.
504
505							The top level rule should end with CeofE> to make sure all input
506							is parsed.
507
508							The rule can define several alternative definitions separated by '\|'.
509
510							The rule definition finishes with a semi-colon ';'.
511
512							A rule can call an anonymous sub-rule enclosed in parentheses.
513
514							=item action
515
516							The last item in the rule definition is a text delimited by {} with the code
517							to execute when the rule is matched. The code can use $self to refer to the
518							Parser object, and @item to refer to the values of each of the tokens and
519							sub-rules matched. The return value from the code defines the value of the
520							rule, passed to the upper level rule, or returned as the parse result.
521
522							If no action is supplied, a default action returns an array reference with
523							the result of all tokens and sub-rules of the matched sentence.
524
525							=item quantifiers
526
527							Every token or sub-rule can be followed by a repetition specification:
528							'?' (zero or one), '*' (zero or more), '+' (one or more),
529							or '<+,>' (comma-separated list, comma can be replaced by any token).
530
531							=item directives
532
533							Directives are written with angle brackets.
534
535							=over 4
536
537							=item
538
539							Can be used in a rule instead of the empty string to represent the end of input.
540
541							=item
542
543							Shortcut for creating lists of operators separated by tokens,
544							returns the list of rule and token values.
545
546							=item
547
548							Defines the start rule of the grammar. By default the first
549							defined rule is the start rule; use Cstart:E> to override that.
550
551							=back
552
553							=back
554
555							=cut
556
557							#------------------------------------------------------------------------------
558							sub parse_grammar {
559	38			38	1	9994	my($self, $text) = @_;
560
561							# need to postpone load of Parse::FSM::Parser, as Parse::FSM is used by
562							# the script that creates Parse::FSM::Parser
563	38	50		10		2039	eval 'use Parse::FSM::Parser'; $@ and die; ## no critic
	38			19		99
	13			20		108
	14			29		37
	7			22		7
	7			19		117
	15			20		39
	15			11		20
	8			22		635
	9			15		39
	9			9		11
	9			17		236
	11			18		44
	11			6		11
	11			5		294
	12			3		42
	12			8		11
	12			10		644
	11			2		42
	11					12
	11					1007
	9					32
	9					9
	9					442
	10					38
	10					8
	10					456
	10					34
	10					11
	10					509
	10					39
	10					12
	10					989
	9					37
	9					10
	9					475
	8					28
	8					8
	8					478
	7					25
	7					9
	7					496
	6					20
	6					7
	6					946
	3					9
	3					7
	3					207
	2					6
	2					2
	2					182
	1					3
	1					1
564
565	38					109	my $parser = Parse::FSM::Parser->new;
566	38					80	$parser->user->{fsm} = $self;
567	38					38	eval {
568	38					68	$parser->from($text); # setup lexer
569	38					71	$parser->parse;
570							};
571	38	100				454	$@ and do { $@ =~ s/\s+\z//; croak $@; };
	11					46
	11					778
572
573	27					243	return;
574							}
575
576							#------------------------------------------------------------------------------
577
578							=head1 METHODS - USE PARSER
579
580							=head2 parser
581
582							Computes the Finite State Machine to execute the parser and returns a
583							L object that implements the parser.
584
585							Useful to build the parser and execute it in the same
586							program, but with the run-time penalty of the time to setup the state tables.
587
588							=cut
589
590							#------------------------------------------------------------------------------
591							sub parser {
592	42			42	1	120	my($self) = @_;
593	42		100			66	our $name \|\|= 'Parser00000'; $name++; # new module on each call
	42					41
594
595	42					64	my $text = $self->_module_text($name, "-");
596	42			16		31762	eval $text; ## no critic
	9			15		1031
	9			15		20
	5			11		112
	5			25		19
	5			16		6
	5			4		100
	12			1		1114
	12			3		20
	5			1		466
	4			8		17
	4			1		4
	4			0		42
	6			0		26
	6					7
	1					189
597	42	50				99	$@ and die $@;
598
599	42					947	my $parser = $name->new;
600
601	42					146	return $parser;
602							}
603							#------------------------------------------------------------------------------
604
605							=head2 write_module
606
607							Receives as input the module name and the output file name
608							and writes the parser module.
609
610							The file name is optional; if not supplied is computed from the
611							module name by replacing C<::> by C and appending C<.pm>,
612							e.g. C.
613
614							The generated code includes C functions for every rule
615							C found in the grammar, as a short-cut for calling C.
616
617							=cut
618
619							#------------------------------------------------------------------------------
620							sub write_module {
621	13			13	1	736	my($self, $name, $file) = @_;
622
623	13	100				94	$name or croak "name not defined";
624
625							# build file name from module name
626	22	100				45	unless (defined $file) {
627	13					35	$file = $name;
628	12					23	$file =~ s/::/\//g;
629	12					23	$file .= ".pm";
630							}
631
632	20					27	my $text = $self->_module_text($name, $file);
633	19					3424	write_file($file, {atomic => 1}, $text);
634
635	13					1013	return;
636							}
637
638							#------------------------------------------------------------------------------
639							# template code for grammmar parser
640							my $TEMPLATE = <<'END_TEMPLATE';
641							# $Id: FSM.pm,v 1.10 2013/07/27 00:34:39 Paulo Exp $
642							# Parser generated by Parse::FSM
643
644							package # hide from CPAN indexer
645							<% $name %>;
646
647							use strict;
648							use warnings;
649
650							use Parse::FSM::Driver; our @ISA = ('Parse::FSM::Driver');
651
652							<% $prolog %>
653
654							<% $table %>
655
656							sub new {
657							my($class, %args) = @_;
658							return $class->SUPER::new(
659							_state_table => \@state_table,
660							_start_state => $start_state,
661							%args,
662							);
663							}
664
665							<% $epilog %>
666
667							1;
668							END_TEMPLATE
669
670							#------------------------------------------------------------------------------
671							# module text
672							sub _module_text {
673	57			61		67	my($self, $name, $file) = @_;
674
675	58	50				84	$name or croak "name not defined";
676	58	100				99	$file or croak "file not defined";
677
678	64					81	my $table = $self->_table_dump;
679
680	63		50			395	my @template_args = (
			50
681							DELIMITERS => [ '<%', '%>' ],
682							HASH => {
683							prolog => $self->prolog \|\| "",
684							epilog => $self->epilog \|\| "",
685							name => $name,
686							table => $table,
687							},
688							);
689	62					146	return fill_in_string($TEMPLATE, @template_args);
690							}
691
692							#------------------------------------------------------------------------------
693							# dump the state table
694							sub _table_dump {
695	53			55		64	my($self) = @_;
696
697	57					66	$self->_compute_fsm;
698
699							#print dump($self),"\n" if $ENV{DEBUG};
700
701	57					56	my $start_state = 0;
702	55	100	66			202	if (defined($self->start_rule) && exists($self->_tree->{$self->start_rule})) {
703	54					99	$start_state = $self->_tree->{$self->start_rule}{__state__};
704							}
705							else {
706	24					104	croak "start state not found";
707							}
708
709	65					112	my $ret = 'my $start_state = '.$start_state.";\n".
710							'my @state_table = ('."\n";
711	59					55	my $width;
712	59					65	for my $i (0 .. $#{$self->_state_table}) {
	51					116
713							$ret .= "\t# [$i] " .
714	260		100			701	($self->_state_table->[$i]{__comment__} \|\| "") .
715							"\n" .
716							"\t{ ";
717	262					179	$width = 2;
718
719	262					181	for my $key (sort keys %{$self->_state_table->[$i]}) {
	263					681
720	985	100				1888	next if $key =~ /^(__(comment\|state)__\|\[.\][?]?)$/;
721
722	427					448	my $value = $self->_state_table->[$i]{$key};
723	427	100				572	$value = $value->{__state__} if ref($value) eq 'HASH';
724
725	430	100				824	my $key_text = ($key =~ /^\w+$/) ? $key : dump($key);
726
727	430					6313	my $item_text = "$key_text => $value, ";
728	430	100				551	if (($width += length($item_text)) > 72) {
729	24					29	$ret .= "\n\t ";
730	29					29	$width = 2 + length($item_text);
731							}
732	435					482	$ret .= $item_text;
733							}
734
735	259					296	$ret .= "},\n\n";
736							}
737	54					73	$ret .= ");\n\n";
738
739							# dump action
740	51					42	for (sort {$a->[0] cmp $b->[0]} values %{$self->_action}) {
	129					156
	54					161
741	127					163	$ret .= $_->[1];
742							}
743
744							# dump parse_XXX functions
745	55					53	my $length = 1;
746	55					62	while (my($name, $rule) = each %{$self->_tree}) {
	220					412
747	172	100				327	next unless $name =~ /^[a-z]/i;
748	93	100				154	$length = length($name) if length($name) > $length;
749							}
750	54					70	while (my($name, $rule) = each %{$self->_tree}) {
	218					370
751	170	100				277	next unless $name =~ /^[a-z]/i;
752	91					250	$ret .=
753							"sub parse_$name".
754							(" " x ($length - length($name))).
755							" { return shift->_parse($rule->{__state__}) }\n";
756							}
757
758	50					103	return $ret;
759							}
760
761							#------------------------------------------------------------------------------
762
763							=head1 PRE-COMPILING THE GRAMMAR
764
765							The setup of the parsing tables and creating the parsing module may take up
766							considerable time. Therefore it is useful to separate the parser generation
767							phase from the parsing phase.
768
769							=head2 precompile
770
771							A parser module can be created from a yacc-like grammar file by the
772							following command. The generated file (last parameter) is optional; if not
773							supplied is computed from the module name by replacing C<::> by C and
774							appending C<.pm>, e.g. C:
775
776							perl -MParse::FSM - Grammar.yp Parser::Module
777							perl -MParse::FSM - Grammar.yp Parser::Module lib\Parser\Module.pm
778
779							This is equivalent to the following Perl program:
780
781							#!perl
782							use Parse::FSM;
783							Parse::FSM->precompile(@ARGV);
784
785							The class method C receives as arguments the grammar file, the
786							generated module name and an optional file name, and creates the parsing module.
787
788							=cut
789
790							#------------------------------------------------------------------------------
791							sub precompile {
792	3			21	1	8	my($class, $grammar, $module, $file) = @_;
793
794	7					16	my $self = $class->new;
795	8					57	my $text = read_file($grammar);
796	7					19	$self->parse_grammar($text);
797	11					17	$self->write_module($module, $file);
798
799	17					34	return;
800							}
801
802							#------------------------------------------------------------------------------
803							# startup code for pre-compiler
804							# borrowed from Parse::RecDescent
805							sub import {
806	8			12		15	local *_die = sub { warn @_, "\n"; exit 1; };
	17			24		27
	32					102
807
808	23					47	my($package, $file, $line) = caller;
809	18	50	33			69	if (substr($file,0,1) eq '-' && $line == 0) {
810	9	0	0			21	_die("Usage: perl -MParse::FSM - GRAMMAR MODULE::NAME [MODULE/NAME.pm]")
811							unless @ARGV == 2 \|\| @ARGV == 3;
812
813	5					54	my($grammar, $module, $file) = @ARGV;
814	1					6	eval {
815	10					15	Parse::FSM->precompile($grammar, $module, $file);
816							};
817	10	0				23	$@ and _die($@);
818
819	10					16	exit 0;
820							}
821
822	16					86	return;
823							}
824
825							#------------------------------------------------------------------------------
826
827
828							=head1 AUTHOR
829
830							Paulo Custodio, C<< >>
831
832							=head1 ACKNOWLEDGEMENTS
833
834							Calling pre-compiler on C
835							borrowed from L.
836
837							=head1 BUGS and FEEDBACK
838
839							Please report any bugs or feature requests through the web interface at
840							L.
841
842							=head1 LICENSE and COPYRIGHT
843
844							Copyright (C) 2010-2011 Paulo Custodio.
845
846							This program is free software; you can redistribute it and/or modify it
847							under the terms of either: the GNU General Public License as published
848							by the Free Software Foundation; or the Artistic License.
849
850							See http://dev.perl.org/licenses/ for more information.
851
852							=cut
853
854							1; # End of Parse::FSM