File Coverage

blib/lib/Parse/FSM.pm

Criterion	Covered	Total	%
statement	276	277	99.6
branch	80	96	83.3
condition	25	33	75.7
subroutine	54	56	96.4
pod	6	6	100.0
total	441	468	94.2

line	stmt	bran	cond	sub	pod	time	code
1							# $Id: FSM.pm,v 1.10 2013/07/27 00:34:39 Paulo Exp $
2
3							package Parse::FSM;
4
5							#------------------------------------------------------------------------------
6
7							=head1 NAME
8
9							Parse::FSM - Deterministic top-down parser based on a Finite State Machine
10
11							=cut
12
13							#------------------------------------------------------------------------------
14
15	7			7		25506	use strict;
	7					28
	7					278
16	7			7		30	use warnings;
	7					12
	7					234
17
18	7			7		31	use Carp; our @CARP_NOT = ('Parse::FSM');
	7					14
	7					710
19	7			7		2494	use Data::Dump 'dump';
	7					29501
	7					463
20	7			7		4215	use Text::Template 'fill_in_string';
	7					17950
	7					414
21	7			7		3373	use File::Slurp;
	7					64817
	7					723
22
23							our $VERSION = '1.11';
24
25							#------------------------------------------------------------------------------
26
27							=head1 SYNOPSIS
28
29							use Parse::FSM;
30							$fsm = Parse::FSM->new;
31
32							$fsm->prolog($text);
33							$fsm->epilog($text);
34							$fsm->add_rule($name, @elems, $action);
35							$fsm->start_rule($name);
36
37							$fsm->parse_grammar($text);
38
39							$fsm->write_module($module);
40							$fsm->write_module($module, $file);
41
42							$parser = $fsm->parser; # isa Parse::FSM::Driver
43							$parser->input(\&lexer);
44							$result = $parser->parse;
45
46							# script
47							perl -MParse::FSM - Grammar.yp Parser::Module
48							perl -MParse::FSM - Grammar.yp Parser::Module lib\Parser\Module.pm
49
50							=head1 DESCRIPTION
51
52							This module compiles the Finite State Machine used by the
53							L parser module.
54
55							It can be used by by a sequence of C calls, or by parsing a yacc-like
56							grammar in one go with C.
57
58							Can be used as a script to generate a module from a grammar file.
59
60							The result of compiling the parser can be used immediately by retrieving the
61							C object, or a pre-compiled module can be written to disk by
62							C. This module can then be used by the client code of the parser.
63
64							As usual in top-down parsers, left recursion is not supported
65							and generates an infinite loop. This parser is deterministic and does not implement backtracking.
66
67							=head1 METHODS - SETUP
68
69							=head2 new
70
71							Creates a new object.
72
73							=cut
74
75							#------------------------------------------------------------------------------
76							use Class::XSAccessor {
77	6					58	constructor => '_init',
78							accessors => [
79							'_tree', # parse tree
80							# Contains nested HASH tables with the decision tree
81							# used during parsing.
82							# Each node maps:
83							# token => next node / string with action code
84							# [subrule] => next node / string with action code
85							# [subrule]? => next node / string with action code
86							# [subrule]* => next node / string with action code
87							# __else__ => next node / string with action code
88							# The first level are the rule names.
89
90							'_state_table', # ARRAY that maps each state ID to the corresponding
91							# HASH table from tree.
92							# Copied to the generated parser module.
93
94							'_action', # map func text => [ sub name, sub text ]
95
96							'start_rule', # name start rule
97							'prolog', # code to include near the beginning of the file
98							'epilog', # code to include at the end of the file
99							'_names', # keep all generated names up to now, to be able to
100							# create unique ones
101							],
102	6			6		6176	};
	6					13253
103
104							#------------------------------------------------------------------------------
105							sub new {
106	70			70	1	25854	my($class) = @_;
107	70					628	return $class->_init(_tree => {}, _state_table => [], _action => {},
108							_names => {});
109							}
110
111							#------------------------------------------------------------------------------
112							# create a new unique name (for actions, sub-rules)
113							sub _unique_name {
114	153			153		201	my($self, $name) = @_;
115	153					177	my $id = 1;
116	153					480	while (exists $self->_names->{$name.$id}) {
117	11					25	$id++;
118							}
119	153					364	$self->_names->{$name.$id}++;
120	153					290	return $name.$id;
121							}
122
123							#------------------------------------------------------------------------------
124
125							=head1 METHODS - BUILD GRAMMAR
126
127							=head2 start_rule
128
129							Name of the grammar start rule. It defaults to the first rule added by C.
130
131							=head2 prolog, epilog
132
133							Perl code to include in the generated module near the start of the generated
134							module and near the end of it.
135
136							=head2 add_rule
137
138							Adds one rule to the parser.
139
140							$fsm->add_rule($name, @elems, $action);
141
142							C<$name> is the name of the rule, i.e. the syntatic object recognized
143							by the rule.
144
145							C<@elems> is the list of elements in sequence needed to recognize this rule.
146							Each element can be one of:
147
148							=over 4
149
150							=item *
151
152							A string that will match with that token type from the lexer.
153
154							The empty string is used to match the end of input and should
155							be present in the grammar to force the parser
156							to accept all the input;
157
158							=item *
159
160							An array refernce of a list of all possible tokens to accept at this position.
161
162							=item *
163
164							A subrule name inside square brackets, optionally followed by a
165							repetion character, that asks the parser to recursively descend
166							to match that subrule at the current input location.
167
168							The accepted forms are:
169
170							C<[term]> - recurse to the term rule;
171
172							C<[term]?> - term is optional;
173
174							C<[term]*> - accept zero or more terms;
175
176							C<[term]+> - accept one or more terms;
177
178							C<[term]E+,E> - accept one or more terms separated by commas,
179							any token type can be used instead of the comma;
180
181							=back
182
183							C<$action> is the Perl text of the action executed when the rule is recognized,
184							i.e. all elements were found in sequence.
185
186							It has to be enclosed in brackets C<{}>, and can use the following lexical
187							variables, that are declared by the generated code:
188
189							=over 4
190
191							=item *
192
193							C<$self> : object pointer;
194
195							=item *
196
197							C<@item> : values of all the tokens or rules identified in this rule. The subrule
198							call with repetions return an array reference containing all the found items
199							in the subrule;
200
201							=back
202
203							=cut
204
205							#------------------------------------------------------------------------------
206							# add_rule
207							# Args:
208							# rule name
209							# list of : '[rule]' '[rule]*' '[rule]?' '[rule]+' '[rule]<+SEP>' # subrules
210							# token # tokens
211							# action : '{ CODE }'
212							sub add_rule {
213	151			151	1	11615	my($self, $rule_name, @elems) = @_;
214	151					206	my $action = pop(@elems);
215
216	151	100				543	@elems or croak "missing arguments";
217	150	50				692	$rule_name =~ /^\w+$/ or croak "invalid rule name ".dump($rule_name);
218
219							# check for array-ref @elem and recurse for all alternatives
220	150					338	for my $i (0 .. $#elems) {
221	259	100				621	if (ref($elems[$i])) { # isa 'ARRAY', others cause run-time error
222	2					5	for (@{$elems[$i]}) {
	2					7
223	4					31	$self->add_rule($rule_name,
224							@elems[0 .. $i-1], $_, @elems[$i+1 .. $#elems],
225							$action);
226							}
227	2					7	return;
228							}
229							}
230
231	148					398	$self->_check_start_rule($rule_name);
232
233							# load the tree
234	148					233	my $tree = $self->_tree;
235	148					277	$tree = $self->_add_tree_node($tree, $rule_name); # load rule name
236
237	148					290	my $comment = "$rule_name :";
238
239	148					478	while (@elems) {
240	252					278	my $elem = shift @elems;
241
242							# handle subrule calls with quantifiers
243							# check if recursing for _add_list_rule
244	252	100	100			1193	if ($rule_name !~ /^_lst_/ &&
245							$elem =~ /^ \[ .* \] /x) {
246	68					152	$elem = $self->_add_list_rule($elem);
247							}
248
249	252					428	$tree->{__comment__} = $comment; # way up to this state
250
251	252	100				882	$comment .= " ".($elem =~ /^\[/ ? $elem : dump($elem));
252
253	252	100				12550	if (@elems) { # not a leaf node
254	105	100	100			470	croak "leaf and node at ($comment)"
255							if (exists($tree->{$elem}) && ref($tree->{$elem}) ne 'HASH');
256	104					212	$tree = $self->_add_tree_node($tree, $elem); # load token
257							}
258							else { # leaf node
259	147	100				430	croak "leaf not unique at ($comment)"
260							if (exists($tree->{$elem}));
261	146					292	$self->_add_tree_node($tree, $elem); # create node
262	146					295	$tree->{$elem} = $self->_add_action($action, $rule_name, $comment);
263							}
264							}
265
266	142					380	return;
267							}
268
269							#------------------------------------------------------------------------------
270							# add a list subrule, get passed a string '[subrule]*'
271							sub _add_list_rule {
272	68			68		134	my($self, $elem) = @_;
273
274	68	50				318	$elem =~ /^ \[ (\w+) \] ( [?+] \| <\+.> )? $/x
275							or croak "invalid subrule call $elem";
276	68					192	my($subrule, $quant) = ($1, $2);
277
278	68	100				229	return "[$subrule]" unless $quant; # subrule without quatifier
279
280							# create a list subrule, so that the result of the repetion is returned
281							# as an array reference
282	22					83	my $list_subrule = $self->_unique_name("_lst_".$subrule);
283
284	22	100	100			181	if ($quant eq '*' \|\| $quant eq '?') {
		100
		50
285	12					44	$self->add_rule($list_subrule, "[$subrule]$quant",
286							'{ return \@item }');
287							}
288							elsif ($quant eq '+') { # A+ -> A A*
289	5					38	$self->add_rule($list_subrule, "[$subrule]", "[$subrule]*",
290							'{ return \@item }');
291							}
292							elsif ($quant =~ /^< \+ (.) >$/x) { # A<+;> -> A Ac ; Ac : ';' A
293	5					11	my $separator = $1;
294	5					16	my $list_subrule_cont = $self->_unique_name("_lst_".$subrule);
295
296							# Ac : ';' A
297	5					25	$self->add_rule($list_subrule_cont, $separator, "[$subrule]",
298							'{ return $item[1] }');
299
300							# A Ac*
301	5					22	$self->add_rule($list_subrule, "[$subrule]", "[$list_subrule_cont]*",
302							'{ return \@item }');
303							}
304							else {
305	0					0	die; # not reached
306							}
307
308	22					62	return "[$list_subrule]";
309							}
310
311							#------------------------------------------------------------------------------
312							# add a tree node and create a new state
313							sub _add_tree_node {
314	398			398		456	my($self, $tree, $elem) = @_;
315
316	398		100			1473	$tree->{$elem} \|\|= {};
317
318							# new state?
319	398	100				787	if (! exists $tree->{__state__}) {
320	278					233	my $id = scalar(@{$self->_state_table});
	278					448
321	278					351	$tree->{__state__} = $id;
322	278					458	$self->_state_table->[$id] = $tree;
323							}
324
325	398					647	return $tree->{$elem};
326							}
327
328							#------------------------------------------------------------------------------
329							# define start rule, except if starting with '_' (internal)
330							sub _check_start_rule {
331	151			151		1129	my($self, $rule_name) = @_;
332
333	151	100	100			810	if (! defined $self->start_rule && $rule_name =~ /^[a-z]/i) {
334	58					144	$self->start_rule($rule_name); # start rule is first defined rule
335							}
336
337	151					187	return;
338							}
339
340							#------------------------------------------------------------------------------
341							# _add_action()
342							# Create a new action or re-use an existing one. An action has to start by
343							# '{'; a new name is created and a reference to the name is
344							# returned : "\&_action_RULE"
345							sub _add_action {
346	146			146		209	my($self, $action, $rule_name, $comment) = @_;
347
348							# remove braces
349	146	100				1824	$action =~ s/ \A \s* \{ \s* (.?) \s \} \s* \z /$1/xs
350							or croak "action must be enclosed in {}";
351
352							# reuse an existing action, if any
353	142					564	(my $cannon_action = $action) =~ s/\s+//g;
354	142	100				423	if (!$self->_action->{$cannon_action}) {
355	126					332	my $action_name = $self->_unique_name("_act_".$rule_name);
356
357							# reduce indentation
358	126					273	for ($action) {
359	126					244	my($lead_space) = /^(\t+)/m;
360	126	100				505	$lead_space and s/^$lead_space/\t/gm;
361							}
362
363							$action =
364	126	100				559	"# $comment\n".
365							"sub $action_name {".
366							($action ne '' ? "\n\tmy(\$self, \@item) = \@_;\n\t" : "").
367							$action.
368							"\n}\n\n";
369
370	126					436	$self->_action->{$cannon_action} = [ $action_name, $action ];
371							}
372							else {
373							# append this comment
374	16					136	$self->_action->{$cannon_action}[1] =~ s/^(sub)/# $comment\n$1/m;
375							}
376
377	142					726	return "\\&".$self->_action->{$cannon_action}[0];
378							}
379
380							#------------------------------------------------------------------------------
381							# compute the FSM machine
382							#
383							# expand [rule] calls into start_set(rule) => [ rule_id, next_state ]
384							# Search for all sub-rule calls, and add each of the first tokens of the subrule
385							# to the call. Repeat until no more rules added, to cope with follow sets being
386							# computed after being looked up
387							# creates FSM loops for the constructs:
388							# A -> B?
389							# A -> B*
390							sub _compute_fsm {
391	48			48		83	my($self) = @_;
392
393							# repeat until no more follow tokens added
394							# Example : A B[?*] C
395	48					60	my $changed;
396	48					83	do {
397	85					106	$changed = 0;
398
399							# check all states in turn
400	85					97	for my $state (@{$self->_state_table}) {
	85					205
401	571					1384	my %state_copy = %$state;
402	571					1228	while (my($token, $next_state) = each %state_copy) {
403	2207	100				7038	next unless my($subrule_name, $quant) =
404							$token =~ /^ \[ (.) \] ( [?] )? $/x;
405
406	248	100				501	my $next_state_text = ref($next_state) eq 'HASH' ?
407							$next_state->{__state__} :
408							$next_state;
409
410	248	50				580	my $subrule = $self->_tree->{$subrule_name}
411							or croak "rule $subrule_name not found";
412	248	50				432	ref($subrule) eq 'HASH' or die;
413
414							# call subrule on each of the subrule follow set
415							# Example : add all 'follow(B) -> call B' to current rule
416	248					433	for my $subrule_key (keys %$subrule) {
417	1062	100				2504	next if $subrule_key =~ /^(__(comment\|state)__\|\[.\][?]?)$/;
418	454	100	100			1669	my $text = "[ ".$subrule->{__state__}.", ".
419							(($quant\|\|"") eq '*' ?
420							$state->{__state__} : # loop on a '*'
421							$next_state_text # else, next state
422							)." ]";
423	454	100				593	if ($state->{$subrule_key}) {
424	307	50				606	die if $state->{$subrule_key} ne $text;
425							}
426							else {
427	147					187	$state->{$subrule_key} = $text;
428	147					194	$changed++;
429							}
430							}
431
432							# call next rule on the next rule follow set
433							# Example : add all 'follow(C) -> end' to end current rule
434	248	100				899	if (defined($quant)) {
435	56	100				103	if ($state->{__else__}) {
436	34	50				144	die if $state->{__else__} ne $next_state_text;
437							}
438							else {
439	22					38	$state->{__else__} = $next_state_text;
440	22					67	$changed++;
441							}
442							}
443							}
444							}
445							} while ($changed);
446
447	48					78	return;
448							}
449
450							#------------------------------------------------------------------------------
451
452							=head2 parse_grammar
453
454							Parses the given grammar text and adds to the parser. Example grammar follows:
455
456							{
457							# prolog
458							use MyLibrary;
459							}
460
461							main : (number \| name)+ ;
462							number : 'NUMBER' { $item[0][1] } ; # comment
463							name : 'NAME' { $item[0][1] } ; # comment
464
465							expr : ;
466
467
468
469							{
470							# epilog
471							sub util_method {...}
472							}
473
474							=over 4
475
476							=item prolog
477
478							If the text contains a code block surronded by braces before the first rule
479							definition, the text is copied without the external braces to the prolog
480							of generated module.
481
482							=item epilog
483
484							If the text contains a code block surronded by braces after the last rule
485							definition, the text is copied without the external braces to the epilog
486							of generated module.
487
488							=item statements
489
490							Statement are either rule definitions of directives and end with a
491							semi-colon C<;>. Comments are as in Perl, from a hash C<#> sign to
492							the end of the line.
493
494							=item rule
495
496							A rule defines one sentence to match in the grammar. The first rule defined
497							is the default start rule, i.e. the rule parsed by default on the input.
498							A rule name must start with a letter and contain only letters,
499							digits and the underscore character.
500
501							The rule definition follows after a colon and is composed of a sequence
502							of tokens (quoted strings) and sub-rules, to match in sequence. The rule matches
503							when all the tokens and sub-rules in the definition match in sequence.
504
505							The top level rule should end with CeofE> to make sure all input
506							is parsed.
507
508							The rule can define several alternative definitions separated by '\|'.
509
510							The rule definition finishes with a semi-colon ';'.
511
512							A rule can call an anonymous sub-rule eclosed in parentheses.
513
514							=item action
515
516							The last item in the rule definition is a text delimited by {} with the code
517							to execute when the rule is matched. The code can use $self to refer to the
518							Parser object, and @item to refer to the values of each of the tokens and
519							sub-rules matched. The return value from the code defines the value of the
520							rule, passed to the upper level rule, or returned as the parse result.
521
522							If no action is supplied, a default action returns an array reference with
523							the result of all tokens and sub-rules of the matched sentence.
524
525							=item quantifiers
526
527							Every token or sub-rule can be followed by a repetition specification:
528							'?' (zero or one), '*' (zero or more), '+' (one or more),
529							or '<+,>' (comma-separated list, comma can be replaced by any token).
530
531							=item directives
532
533							Directives are written with angle brackets.
534
535							=over 4
536
537							=item
538
539							Can be used in a rule instead of the empty string to represent the end of input.
540
541							=item
542
543							Shortcut for creating lists of operators separated by tokens,
544							returns the list of rule and token values.
545
546							=item
547
548							Defines the start rule of the grammar. By default the first
549							defined rule is the start rule; use Cstart:E> to override that.
550
551							=back
552
553							=back
554
555							=cut
556
557							#------------------------------------------------------------------------------
558							sub parse_grammar {
559	38			38	1	17771	my($self, $text) = @_;
560
561							# need to postpone load of Parse::FSM::Parser, as Parse::FSM is used by
562							# the script that creates Parse::FSM::Parser
563	38	50		15		2732	eval 'use Parse::FSM::Parser'; $@ and die; ## no critic
	38			18		153
	13			35		194
	7			20		32
	7			28		11
	7			19		163
	8			11		36
	8			23		10
	8			19		756
	9			12		54
	9			18		13
	9			10		311
	11			16		98
	11			4		19
	11			5		422
	12			11		66
	12			1		17
	12			5		789
	11			2		60
	11					18
	11					1316
	9					55
	9					15
	9					541
	10					60
	10					16
	10					625
	10					59
	10					18
	10					681
	10					78
	10					18
	10					1204
	9					55
	9					10
	9					648
	8					46
	8					12
	8					634
	7					38
	7					11
	7					561
	6					30
	6					9
	6					1266
	3					19
	3					5
	3					402
	2					7
	2					3
	2					224
	1					4
	1					1
564
565	38					184	my $parser = Parse::FSM::Parser->new;
566	38					172	$parser->user->{fsm} = $self;
567	38					58	eval {
568	38					144	$parser->from($text); # setup lexer
569	38					138	$parser->parse;
570							};
571	38	100				665	$@ and do { $@ =~ s/\s+\z//; croak $@; };
	11					63
	11					1257
572
573	27					440	return;
574							}
575
576							#------------------------------------------------------------------------------
577
578							=head1 METHODS - USE PARSER
579
580							=head2 parser
581
582							Computes the Finite State Machine to execute the parser and returns a
583							L object that implements the parser.
584
585							Usefull to build the parser and execute it in the same
586							program, but with the run-time penalty of the time to setup the state tables.
587
588							=cut
589
590							#------------------------------------------------------------------------------
591							sub parser {
592	42			42	1	240	my($self) = @_;
593	42		100			127	our $name \|\|= 'Parser00000'; $name++; # new module on each call
	42					71
594
595	42					126	my $text = $self->_module_text($name, "-");
596	42			13		47876	eval $text; ## no critic
	9			10		1972
	16			17		39
	14			19		186
	5			18		28
	5			17		6
	5			2		126
	5			12		1277
	12			3		17
	12			8		443
	6			1		33
	6			1		16
	4			0		59
	6			0		41
	13					27
	1					240
597	42	50				151	$@ and die $@;
598
599	42					1232	my $parser = $name->new;
600
601	42					289	return $parser;
602							}
603							#------------------------------------------------------------------------------
604
605							=head2 write_module
606
607							Receives as input the module name and the output file name
608							and writes the parser module.
609
610							The file name is optional; if not supplied is computed from the
611							module name by replacing C<::> by C and appending C<.pm>,
612							e.g. C.
613
614							The generated code includes C functions for every rule
615							C found in the grammar, as a short-cut for calling C.
616
617							=cut
618
619							#------------------------------------------------------------------------------
620							sub write_module {
621	18			18	1	632	my($self, $name, $file) = @_;
622
623	18	100				125	$name or croak "name not defined";
624
625							# build file name from module name
626	15	100				38	unless (defined $file) {
627	6					31	$file = $name;
628	10					28	$file =~ s/::/\//g;
629	10					38	$file .= ".pm";
630							}
631
632	12					28	my $text = $self->_module_text($name, $file);
633	11					4397	write_file($file, {atomic => 1}, $text);
634
635	17					1205	return;
636							}
637
638							#------------------------------------------------------------------------------
639							# template code for grammmar parser
640							my $TEMPLATE = <<'END_TEMPLATE';
641							# $Id: FSM.pm,v 1.10 2013/07/27 00:34:39 Paulo Exp $
642							# Parser generated by Parse::FSM
643
644							package # hide from CPAN indexer
645							<% $name %>;
646
647							use strict;
648							use warnings;
649
650							use Parse::FSM::Driver; our @ISA = ('Parse::FSM::Driver');
651
652							<% $prolog %>
653
654							<% $table %>
655
656							sub new {
657							my($class, %args) = @_;
658							return $class->SUPER::new(
659							_state_table => \@state_table,
660							_start_state => $start_state,
661							%args,
662							);
663							}
664
665							<% $epilog %>
666
667							1;
668							END_TEMPLATE
669
670							#------------------------------------------------------------------------------
671							# module text
672							sub _module_text {
673	61			54		133	my($self, $name, $file) = @_;
674
675	54	50				127	$name or croak "name not defined";
676	54	50				163	$file or croak "file not defined";
677
678	61					158	my $table = $self->_table_dump;
679
680	60		50			689	my @template_args = (
			50
681							DELIMITERS => [ '<%', '%>' ],
682							HASH => {
683							prolog => $self->prolog \|\| "",
684							epilog => $self->epilog \|\| "",
685							name => $name,
686							table => $table,
687							},
688							);
689	57					296	return fill_in_string($TEMPLATE, @template_args);
690							}
691
692							#------------------------------------------------------------------------------
693							# dump the state table
694							sub _table_dump {
695	48			53		67	my($self) = @_;
696
697	59					172	$self->_compute_fsm;
698
699							#print dump($self),"\n" if $ENV{DEBUG};
700
701	59					102	my $start_state = 0;
702	62	100	66			355	if (defined($self->start_rule) && exists($self->_tree->{$self->start_rule})) {
703	61					193	$start_state = $self->_tree->{$self->start_rule}{__state__};
704							}
705							else {
706	18					122	croak "start state not found";
707							}
708
709	59					225	my $ret = 'my $start_state = '.$start_state.";\n".
710							'my @state_table = ('."\n";
711	60					75	my $width;
712	60					91	for my $i (0 .. $#{$self->_state_table}) {
	52					179
713	261		100			992	$ret .= "\t# [$i] " .
714							($self->_state_table->[$i]{__comment__} \|\| "") .
715							"\n" .
716							"\t{ ";
717	267					264	$width = 2;
718
719	267					282	for my $key (sort keys %{$self->_state_table->[$i]}) {
	267					923
720	989	100				2662	next if $key =~ /^(__(comment\|state)__\|\[.\][?]?)$/;
721
722	428					670	my $value = $self->_state_table->[$i]{$key};
723	428	100				789	$value = $value->{__state__} if ref($value) eq 'HASH';
724
725	428	100				1186	my $key_text = ($key =~ /^\w+$/) ? $key : dump($key);
726
727	428					8264	my $item_text = "$key_text => $value, ";
728	428	100				773	if (($width += length($item_text)) > 72) {
729	22					37	$ret .= "\n\t ";
730	19					26	$width = 2 + length($item_text);
731							}
732	425					634	$ret .= $item_text;
733							}
734
735	264					440	$ret .= "},\n\n";
736							}
737	57					126	$ret .= ");\n\n";
738
739							# dump action
740	60					103	for (sort {$a->[0] cmp $b->[0]} values %{$self->_action}) {
	132					262
	59					289
741	129					252	$ret .= $_->[1];
742							}
743
744							# dump parse_XXX functions
745	54					105	my $length = 1;
746	59					97	while (my($name, $rule) = each %{$self->_tree}) {
	225					590
747	174	100				442	next unless $name =~ /^[a-z]/i;
748	97	100				231	$length = length($name) if length($name) > $length;
749							}
750	47					80	while (my($name, $rule) = each %{$self->_tree}) {
	218					496
751	176	100				406	next unless $name =~ /^[a-z]/i;
752	97					401	$ret .=
753							"sub parse_$name".
754							(" " x ($length - length($name))).
755							" { return shift->_parse($rule->{__state__}) }\n";
756							}
757
758	54					158	return $ret;
759							}
760
761							#------------------------------------------------------------------------------
762
763							=head1 PRE-COMPILING THE GRAMMAR
764
765							The setup of the parsing tables and creating the parsing module may take up
766							considerable time. Therefore it is usefull to separate the parser generation
767							phase from the parsing phase.
768
769							=head2 precompile
770
771							A parser module can be created from a yacc-like grammar file by the
772							following command. The generated file (last parameter) is optional; if not
773							supplied is computed from the module name by replacing C<::> by C and
774							appending C<.pm>, e.g. C:
775
776							perl -MParse::FSM - Grammar.yp Parser::Module
777							perl -MParse::FSM - Grammar.yp Parser::Module lib\Parser\Module.pm
778
779							This is equivalent to the following Perl program:
780
781							#!perl
782							use Parse::FSM;
783							Parse::FSM->precompile(@ARGV);
784
785							The class method C receives as argumens the grammar file, the
786							generated module name and an optional file name, and creates the parsing module.
787
788							=cut
789
790							#------------------------------------------------------------------------------
791							sub precompile {
792	8			13	1	20	my($class, $grammar, $module, $file) = @_;
793
794	5					18	my $self = $class->new;
795	2					7	my $text = read_file($grammar);
796	9					27	$self->parse_grammar($text);
797	10					28	$self->write_module($module, $file);
798
799	18					43	return;
800							}
801
802							#------------------------------------------------------------------------------
803							# startup code for pre-compiler
804							# borrowed from Parse::RecDescent
805							sub import {
806	21			16		52	local *_die = sub { warn @_, "\n"; exit 1; };
	12			20		35
	14					114
807
808	21					102	my($package, $file, $line) = caller;
809	25	50	33			104	if (substr($file,0,1) eq '-' && $line == 0) {
810	9	0	0			31	_die("Usage: perl -MParse::FSM - GRAMMAR MODULE::NAME [MODULE/NAME.pm]")
811							unless @ARGV == 2 \|\| @ARGV == 3;
812
813	5					19	my($grammar, $module, $file) = @ARGV;
814	10					36	eval {
815	10					40	Parse::FSM->precompile($grammar, $module, $file);
816							};
817	1	0				8	$@ and _die($@);
818
819	1					2	exit 0;
820							}
821
822	9					99	return;
823							}
824
825							#------------------------------------------------------------------------------
826
827
828							=head1 AUTHOR
829
830							Paulo Custodio, C<< >>
831
832							=head1 ACKNOWLEDGEMENTS
833
834							Calling pre-compiler on C
835							borrowed from L.
836
837							=head1 BUGS and FEEDBACK
838
839							Please report any bugs or feature requests through the web interface at
840							L.
841
842							=head1 LICENSE and COPYRIGHT
843
844							Copyright (C) 2010-2011 Paulo Custodio.
845
846							This program is free software; you can redistribute it and/or modify it
847							under the terms of either: the GNU General Public License as published
848							by the Free Software Foundation; or the Artistic License.
849
850							See http://dev.perl.org/licenses/ for more information.
851
852							=cut
853
854							1; # End of Parse::FSM