File Coverage

blib/lib/Language/FormulaEngine/Parser.pm

Criterion	Covered	Total	%
statement	246	253	97.2
branch	77	88	87.5
condition	27	35	77.1
subroutine	66	70	94.2
pod	33	33	100.0
total	449	479	93.7

line	stmt	bran	cond	sub	pod	time	code
1							package Language::FormulaEngine::Parser;
2	9			9		464892	use Moo;
	9					34224
	9					72
3	9			9		6605	use Carp;
	9					33
	9					588
4	9			9		994	use Try::Tiny;
	9					2659
	9					566
5	9			9		75	use List::Util qw( min max );
	9					23
	9					741
6							use Language::FormulaEngine::Parser::ContextUtil
7	9			9		4191	qw( calc_text_coordinates format_context_string format_context_multiline );
	9					112
	9					662
8	9			9		2039	use namespace::clean;
	9					39086
	9					61
9
10							# ABSTRACT: Create parse tree from an input string
11							our $VERSION = '0.07'; # VERSION
12
13
14							has parse_tree => ( is => 'rw' );
15							has error => ( is => 'rw' );
16							has functions => ( is => 'rw' );
17							has symbols => ( is => 'rw' );
18
19							sub parse {
20	283			283	1	57623	my ($self, $input)= @_;
21	283					807	$self->reset;
22	283					639	$self->{input}= $input;
23	283					839	pos( $self->{input} )= 0;
24							try {
25	283			283		11793	$self->next_token;
26	282					689	my $tree= $self->parse_expr;
27							# It is an error if there was un-processed input.
28	281	100				603	$self->token_type eq '0'
29							or die sprintf('Unexpected %s "%s" near %s',
30							$self->token_type, $self->token_value, $self->token_context);
31	280					973	$self->parse_tree($tree);
32							} catch {
33	3			3		55	chomp;
34	3					16	$self->error($_);
35	283					1803	};
36	283					5218	return $self->parse_tree;
37							}
38
39							sub reset {
40	283			283	1	493	my $self= shift;
41	283					1319	$self->parse_tree(undef);
42	283					588	$self->error(undef);
43	283					726	$self->functions({});
44	283					659	$self->symbols({});
45	283					490	delete @{$self}{'input','token_type','token_value','token_pos'};
	283					1020
46	283					506	$self;
47							}
48
49
50							sub deparse {
51	63			63	1	3734	my ($self, $node)= @_;
52	63	100				205	$node= $self->parse_tree unless @_ > 1;
53	63					168	$node->deparse($self);
54							}
55
56
57	0			0	1	0	sub input { shift->{input} }
58	0			0	1	0	sub input_pos { pos( shift->{input} ) }
59	338			338	1	1164	sub token_type { shift->{token_type} }
60	47			47	1	221	sub token_value { shift->{token_value} }
61	46			46	1	115	sub token_pos { shift->{token_pos} }
62
63
64							sub next_token {
65	2078			2078	1	48526	my $self= shift;
66
67							# If already reached end of input, throw an exception.
68							die "Can't call next_token after end of input"
69	2078	50	100			5827	if '0' eq ($self->{token_type}\|\|'');
70
71							# Detect the next token
72	2078					4219	my ($type, $val, $pos0, $pos1)= ('','');
73	2078					3921	while ($type eq '') {
74	2312		100			4901	$pos0= pos($self->{input}) \|\| 0;
75	2312					54467	($type, $val)= $self->scan_token;
76	2312		100			11930	$pos1= pos($self->{input}) \|\| 0;
77							# Check for end of buffer, even if it matched.
78	2312	100				4877	if ($pos1 >= length $self->{input}) {
79							#pos($self->{input})= $pos0; # rewind to start of token before growing buffer
80							#if ($self->_grow_buffer) {
81							# $log->trace("grow buffer succeeded");
82							# $type= '';
83							# next;
84							#}
85							#pos($self->{input})= $pos1; # restore actual position\
86							# If we didn't get a token or are ignoring this final token, then return the EOF token
87	577	100	100			1769	if (!defined $type \|\| $type eq '') {
88	291					498	$type= 0;
89	291					448	$val= '';
90	291					451	$pos0= $pos1;
91	291					521	last;
92							}
93							}
94	2021	100				3657	defined $type
95							or die "Unknown syntax at ".$self->token_context."\n";
96	2020	50				5107	$pos1 > $pos0
97							or croak "Tokenizer consumed zero characters";
98							}
99	2077					3349	@{$self}{'token_type','token_value','token_pos'}= ($type,$val,$pos0);
	2077					4860
100	2077					3734	return $type, $val;
101							}
102
103
104							sub consume_token {
105	971			971	1	18587	my $self= shift;
106							croak "Can't consume EOF"
107	971	100				2992	if $self->{token_type} eq '0';
108	961					1539	my $val= $self->{token_value};
109	961					2227	$self->next_token;
110	961					2342	return $val;
111							}
112
113							sub token_context {
114	2			2	1	6	my ($self, %args)= @_;
115							return format_context_multiline($self->{input}, $self->{token_pos}\|\|0, pos($self->{input})\|\|0, \%args)
116	2	50	0			6	if delete $args{multiline};
			0
117	2		100			25	return format_context_string($self->{input}, $self->{token_pos}\|\|0, pos($self->{input})\|\|0);
			100
118							}
119
120
121	739			739	1	1492	sub parse_expr { shift->parse_or_expr; }
122
123							sub parse_or_expr {
124	739			739	1	1172	my $self= shift;
125	739					1408	my $first= $self->parse_and_expr;
126	738	100				2031	return $first unless $self->{token_type} eq 'or';
127	2					6	my @or_expr= $first;
128	2					17	while ($self->{token_type} eq 'or') {
129	2					8	$self->next_token;
130	2					5	push @or_expr, $self->parse_and_expr;
131							}
132	2					7	return $self->new_call('or', \@or_expr);
133							}
134
135							sub parse_and_expr {
136	741			741	1	1195	my $self= shift;
137	741					1390	my $first= $self->parse_not_expr;
138	740	100				1757	return $first unless $self->{token_type} eq 'and';
139	8					21	my @and_expr= $first;
140	8					38	while ($self->{token_type} eq 'and') {
141	8					28	$self->next_token;
142	8					26	push @and_expr, $self->parse_not_expr;
143							}
144	8					29	return $self->new_call('and', \@and_expr);
145							}
146
147							sub parse_not_expr {
148	749			749	1	1125	my $self= shift;
149	749	100	66			2804	if ($self->{token_type} eq 'not' or $self->{token_type} eq '!') {
150	5					16	$self->next_token;
151	5					33	return $self->new_call('not', [ $self->parse_cmp_expr ]);
152							}
153	744					1523	return $self->parse_cmp_expr;
154							}
155
156							my %_cmp_ops= map { $_ => 1 } qw( > < >= <= != == );
157							sub parse_cmp_expr {
158	749			749	1	1111	my $self= shift;
159	749					1422	my $first= $self->parse_sum_expr;
160	748	100				2049	return $first unless $_cmp_ops{$self->{token_type}};
161	24					67	my @expr= $first;
162	24					72	while ($_cmp_ops{$self->{token_type}}) {
163	32					81	push @expr, $self->new_string($self->{token_type});
164	32					87	$self->next_token;
165	32					81	push @expr, $self->parse_sum_expr;
166							}
167	24					74	return $self->new_call('compare', \@expr);
168							}
169
170							sub parse_sum_expr {
171	781			781	1	1115	my $self= shift;
172	781					1473	my $first= $self->parse_prod_expr;
173	780	100	100			2800	return $first unless $self->{token_type} eq '+' or $self->{token_type} eq '-';
174	26					68	my @sum_expr= $first;
175	26		100			93	while ($self->{token_type} eq '+' or $self->{token_type} eq '-') {
176	35					89	my $negate= $self->consume_token eq '-';
177	35					104	my $operand= $self->parse_prod_expr;
178	35	100				181	push @sum_expr, $negate? $self->get_negative($operand) : $operand;
179							}
180	26					83	return $self->new_call('sum', \@sum_expr);
181							}
182
183							sub parse_prod_expr {
184	816			816	1	1271	my $self= shift;
185	816					1690	my $value= $self->parse_unit_expr;
186	815		100			3765	while ($self->{token_type} eq '*' or $self->{token_type} eq '/') {
187	39					110	my $op= $self->consume_token;
188	39					110	my $right= $self->parse_unit_expr;
189	39	100				194	$value= $self->new_call( $op eq '*'? 'mul' : 'div', [ $value, $right ] );
190							}
191	815					1426	return $value;
192							}
193
194							sub parse_unit_expr {
195	885			885	1	1308	my $self= shift;
196	885					1277	my $negate= 0;
197	885					1270	my $expr;
198
199	885	100				1830	if ($self->{token_type} eq '-') {
200	30					75	$self->next_token;
201	30					72	return $self->get_negative($self->parse_unit_expr);
202							}
203
204	855	100				1693	if ($self->{token_type} eq '(') {
205	13					40	$self->next_token;
206	13					43	my $args= $self->parse_list;
207							die "Expected ')' near ".$self->token_context."\n"
208	13	50				40	if $self->{token_type} ne ')';
209	13					36	$self->next_token;
210	13	100				58	return @$args > 1? $self->new_call('list', $args) : $args->[0];
211							}
212
213	842	100				1673	if ($self->{token_type} eq 'Number') {
214	323					673	return $self->new_number($self->consume_token);
215							}
216
217	519	100				1053	if ($self->{token_type} eq 'String') {
218	84					189	return $self->new_string($self->consume_token);
219							}
220
221	435	100				956	if ($self->{token_type} eq 'Identifier') {
222	434					811	my $id= $self->consume_token;
223	434	100				1050	if ($self->{token_type} eq '(') {
224	266					680	$self->next_token;
225	266	100				917	my $args= $self->{token_type} eq ')'? [] : $self->parse_list;
226							die "Expected ')' near ".$self->token_context."\n"
227	266	50				653	if $self->{token_type} ne ')';
228	266					672	$self->next_token;
229	266					616	return $self->new_call($id, $args);
230							}
231							else {
232	168					452	return $self->new_symbol($id);
233							}
234							}
235
236	1	50				10	if ($self->{token_type} eq '0') {
237	1					9	die "Expected expression, but reached end of input\n";
238							}
239
240	0					0	die "Unexpected token $self->{token_type} '$self->{token_value}' near ".$self->token_context."\n";
241							}
242
243							sub parse_list {
244	268			268	1	482	my $self= shift;
245	268					545	my @args= $self->parse_expr;
246	268					676	while ($self->{token_type} eq ',') {
247	189					454	$self->next_token;
248	189					430	push @args, $self->parse_expr;
249							}
250	268					641	return \@args;
251							}
252
253
254	8			8	1	54	sub cmp_operators { qw( = == != <> > >= < <= ), "\x{2260}", "\x{2264}", "\x{2265}" }
255	8			8	1	57	sub math_operators { qw( + - * / ) }
256	8			8	1	32	sub logic_operators { qw( and or not ! ) }
257	8			8	1	25	sub list_operators { ',', '(', ')' }
258							sub keyword_map {
259							return {
260	8			8	1	30	(map { $_ => $_ } cmp_operators, math_operators, logic_operators, list_operators),
	176					505
261							'=' => '==', '<>' => '!=', "\x{2260}" => '!=',
262							"\x{2264}" => '<=', "\x{2265}" => '>='
263							}
264							}
265							sub scanner_rules {
266	8			8	1	19	my $self= shift;
267	8					40	my $keywords= $self->keyword_map;
268							my $kw_regex= join '\|', map "\Q$_\E",
269	8					68	sort { length($b) <=> length($a) } # longest keywords get priority
	528					858
270							keys %$keywords;
271
272							# Perl 5.20.1 and 5.20.2 have a bug where regex comparisons on unicode strings can crash.
273							# It seems to damage the scalar $1, but copying it first fixes the problem.
274	8	50	33			92	my $kw_canonical= $] >= 5.020000 && $] < 5.020003? '$keywords->{lc(my $clone1= $1)}' : '$keywords->{lc $1}';
275							return (
276							# Pattern Name, Pattern, Token Type and Token Value
277	8					901	[ 'Whitespace', qr/(\s+)/, '"" => ""' ], # empty string causes next_token to loop
278							[ 'Decimal', qr/([0-9]*\.?[0-9]+(?:[eE][+-]?[0-9]+)?)\b/, 'Number => $1+0' ],
279							[ 'Hexadecimal', qr/0x([0-9A-Fa-f]+)/, 'Number => hex($1)' ],
280							[ 'Keywords', qr/($kw_regex)/, $kw_canonical.' => $1', { keywords => $keywords } ],
281							[ 'Identifiers', qr/([A-Za-z_][A-Za-z0-9_.]*)\b/, 'Identifier => $1' ],
282							# Single or double quoted string, using Pascal-style repeated quotes for escaping
283							[ 'StringLiteral', qr/(?:"((?:[^"]\|""))"\|'((?:[^']\|''))')/, q%
284							do{
285							my $str= defined $1? $1 : $2;
286							$str =~ s/""/"/g if defined $1;
287							$str =~ s/''/'/g if defined $2;
288							(String => $str)
289							}
290							%],
291							);
292							}
293
294							sub _build_scan_token_method_body {
295	8			8		28	my ($self, $rules)= @_;
296	8					242	return join('', map
297							' return ' . $_->[2] . ' if $self->{input} =~ /\G' . $_->[1] . "/gc;\n",
298							@$rules
299							).' return;' # return empty list of no rule matched
300							}
301
302							sub _build_scan_token_method {
303	8			8		27	my ($pkg, $method_name)= @_;
304	8	50				34	$pkg= ref $pkg if ref $pkg;
305	8	50				45	$method_name= 'scan_token' unless defined $method_name;
306	8					45	my @rules= $pkg->scanner_rules;
307							# collect variables which should be available to the code
308	8	100				42	my %vars= map { $_->[3]? %{ $_->[3] } : () } @rules;
	48					132
	8					33
309	8					81	my $code= join "\n",
310							(map 'my $'.$_.' = $vars{'.$_.'};', keys %vars),
311							"sub ${pkg}::$method_name {",
312							' my $self= shift;',
313							$pkg->_build_scan_token_method_body(\@rules),
314							"}\n";
315							# closure needed for 5.8 and 5.10 which complain about using a lexical
316							# in a sub declared at package scope.
317	9			9		27161	no warnings 'redefine','closure';
	9					29
	9					10620
318	8	50		2312	1	4272	eval "$code; 1" or die $@ . " for generated scanner code:\n".$code;
	2312	100				5593
	2312	100				7271
	2075	100				6913
	1747	100				3965
	1745	100				7434
	821	100				3140
	376	100				1309
	87	100				310
	87	100				250
	87					248
	87					328
	289					732
319	8					108	return $pkg->can('scan_token');
320							}
321
322	2			2	1	12	sub scan_token { my $m= $_[0]->_build_scan_token_method; goto $m; };
	2					51
323
324
325							sub Language::FormulaEngine::Parser::Node::Call::new {
326	13			13		30	my ($class, $name, $params)= @_;
327	13					51	bless [ $name, $params ], $class;
328							}
329	7			7		21	sub Language::FormulaEngine::Parser::Node::Call::is_constant { 0 }
330	414			414		1071	sub Language::FormulaEngine::Parser::Node::Call::function_name { $_[0][0] }
331	444			444		1576	sub Language::FormulaEngine::Parser::Node::Call::parameters { $_[0][1] }
332							sub Language::FormulaEngine::Parser::Node::Call::evaluate {
333	169			169		1245	my ($self, $namespace)= @_;
334	169					459	$namespace->evaluate_call($self);
335							}
336							sub Language::FormulaEngine::Parser::Node::Call::simplify {
337	15			15		49	my ($node, $namespace)= @_;
338	15					74	$namespace->simplify_call($node)
339							}
340							sub Language::FormulaEngine::Parser::Node::Call::deparse {
341	23			23		67	my ($node, $parser)= @_;
342							return $node->function_name . (
343	23					53	!@{$node->parameters}? '()'
344	23	100				46	: '( ' .join(', ', map $parser->deparse($_), @{$node->parameters}). ' )'
	21					40
345							)
346							}
347
348							sub new_call {
349	377			377	1	759	my ($self, $fn, $params)= @_;
350	377					1182	$self->functions->{$fn}++; # record dependency on this function
351	377					2288	bless [ $fn, $params ], 'Language::FormulaEngine::Parser::Node::Call';
352							}
353
354
355							sub Language::FormulaEngine::Parser::Node::symbol::new {
356	0			0		0	my ($class, $name)= @_;
357	0					0	bless \$name, $class;
358							}
359
360	7			7		22	sub Language::FormulaEngine::Parser::Node::Symbol::is_constant { 0 }
361	121			121		195	sub Language::FormulaEngine::Parser::Node::Symbol::symbol_name { ${$_[0]} }
	121					464
362							sub Language::FormulaEngine::Parser::Node::Symbol::evaluate {
363	66			66		126	my ($self, $namespace)= @_;
364	66					225	$namespace->get_value($$self);
365							}
366							sub Language::FormulaEngine::Parser::Node::Symbol::simplify {
367	15			15		41	my ($self, $namespace)= @_;
368	15					50	return $namespace->simplify_symref($self);
369							}
370							sub Language::FormulaEngine::Parser::Node::Symbol::deparse {
371	24			24		75	shift->symbol_name;
372							}
373
374							sub new_symbol {
375	168			168	1	363	my ($self, $name)= @_;
376	168					517	$self->symbols->{$name}++; # record dependency on this variable
377	168					544	bless \$name, 'Language::FormulaEngine::Parser::Node::Symbol';
378							}
379
380
381							sub Language::FormulaEngine::Parser::Node::String::new {
382	0			0		0	my ($class, $value)= @_;
383	0					0	bless \$value, $class;
384							}
385
386	1			1		4	sub Language::FormulaEngine::Parser::Node::String::is_constant { 1 }
387	62			62		114	sub Language::FormulaEngine::Parser::Node::String::string_value { ${$_[0]} }
	62					199
388	52			52		95	sub Language::FormulaEngine::Parser::Node::String::evaluate { ${$_[0]} }
	52					192
389	1			1		2	sub Language::FormulaEngine::Parser::Node::String::simplify { $_[0] }
390							sub _str_escape {
391	6			6		10	my $str= shift;
392	6					15	$str =~ s/'/''/g;
393	6					68	"'$str'";
394							}
395							sub Language::FormulaEngine::Parser::Node::String::deparse {
396	6			6		28	_str_escape(shift->string_value);
397							}
398
399							sub new_string {
400	116			116	1	282	my ($self, $text)= @_;
401	116					352	bless \$text, 'Language::FormulaEngine::Parser::Node::String'
402							}
403
404
405							sub Language::FormulaEngine::Parser::Node::Number::new {
406	18			18		47	my ($class, $value)= @_;
407	18					32	$value= 0+$value;
408	18					80	bless \$value, $class;
409							}
410
411	25			25		93	sub Language::FormulaEngine::Parser::Node::Number::is_constant { 1 }
412	219			219		304	sub Language::FormulaEngine::Parser::Node::Number::number_value { ${$_[0]} }
	219					1245
413	159			159		260	sub Language::FormulaEngine::Parser::Node::Number::evaluate { ${$_[0]} }
	159					532
414	6			6		24	sub Language::FormulaEngine::Parser::Node::Number::simplify { $_[0] }
415	18			18		48	sub Language::FormulaEngine::Parser::Node::Number::deparse { shift->number_value }
416
417							sub new_number {
418	359			359	1	644	my $value= $_[1]+0;
419	359					1142	bless \$value, 'Language::FormulaEngine::Parser::Node::Number'
420							}
421
422
423							sub get_negative {
424	41			41	1	85	my ($self, $node)= @_;
425	41	100				242	return $self->new_number(-$node->number_value) if $node->can('number_value');
426	5	50	66			30	return $node->parameters->[0] if $node->can('function_name') and $node->function_name eq 'negative';
427	5					21	return $self->new_call('negative', [$node]);
428							}
429
430							1;
431
432							__END__
433
434							=pod
435
436							=encoding UTF-8
437
438							=head1 NAME
439
440							Language::FormulaEngine::Parser - Create parse tree from an input string
441
442							=head1 VERSION
443
444							version 0.07
445
446							=head1 SYNOPSIS
447
448							my $parse_tree= Language::FormulaEngine::Parser->new->parse($string);
449
450							=head1 DESCRIPTION
451
452							This class scans tokens from an input string and builds a parse tree. In compiler terminology,
453							it is both a Scanner and Parser. It performs a top-down recursive descent parse, because this
454							is easy and gives good error messages. It only parses strings, but leaves room for subclasses
455							to implement streaming. By default, the parser simply applies a Grammar to the input, without
456							checking whether the functions or variables exist, but can be subclassed to do more detailed
457							analysis during the parse.
458
459							The generated parse tree is made up of Function nodes (each infix operator is converted to a
460							named function) and each Function node may contain Symbols, Strings, Numbers, and other
461							Function nodes. The parse tree can be passed to the Evaluator for instant execution, or passed
462							to the Compiler to generate an optimized perl coderef. The parse tree is lightweight, and does
463							not include token/context information; this could also be added by a subclass.
464
465							=head1 PUBLIC API
466
467							=head2 parse
468
469							Parse a new input text, updating all derived attributes with the result of the operation.
470							It returns the value of L</parse_tree> (which is undef if the parse failed).
471							On failure, the exception is stored in L</error> and other attributes like L</token_pos> may
472							contain useful diagnostic information.
473
474							=head2 parse_tree
475
476							This holds the generated parse tree, or C<undef> if the parse failed. See L</"Parse Nodes">.
477
478							=head2 error
479
480							This is C<undef> if the parse succeeded, else an error message describing the syntax that ended
481							the parse.
482
483							=head2 functions
484
485							A set (hashref) of all function names encountered during the parse.
486
487							=head2 symbols
488
489							A set (hashref) of all non-function symbols encountered. (variables, constnts, etc.)
490
491							=head2 reset
492
493							Clear the results of the previous parse, to re-use the object. Returns C<$self> for chaining.
494
495							=head2 deparse
496
497							my $formula_text= $parser->deparse($tree);
498
499							Return a canonical formula text for the parse tree, or a parse tree that you supply.
500
501							=head1 EXTENSIBLE API
502
503							These methods and attributes are documented for purposes of subclassing the parser.
504
505							=head2 input
506
507							The input string being scanned.
508							Code within the parser should access this as C<< $self->{input} >> for efficiency.
509
510							=head2 input_pos
511
512							Shortcut for C<< pos($self->{input}) >>.
513
514							=head2 token_type
515
516							Type of current token scanned from C<input>.
517							Code within the parser should access this as C<< $self->{token_type} >> for efficiency.
518
519							=head2 token_value
520
521							Value of current token scanned from C<input>, with escape sequences and etc resolved to a
522							sensible perl value.
523							Code within the parser should access this as C<< $self->{token_value} >> for efficiency.
524
525							=head2 token_pos
526
527							An offset within C<input> where this token started.
528							Code within the parser should access this as C<< $self->{token_pos} >> for efficiency.
529
530							=head2 next_token
531
532							Advance to the next token, replacing the values of C<token_> variables and updating
533							C<input_pos>. Returns the token_type, of which all are true except EOF which has a
534							type of C<0>, so this also means the function returns true if it parsed a token and
535							false if it reached EOF. It dies if no token could be parsed.
536							If you call next_token again after the eof token, it throws an exception.
537
538							This method is a wrapper around L</scan_token>. Override that method to add new token types.
539
540							=head2 scan_token
541
542							Pattern-match the next token, and either return C<< $type => $value >> or an empty list if
543							the syntax is invalid. This is intended to be overridden by subclasses.
544
545							=head2 consume_token
546
547							return $self->consume_token if $self->{token_type} eq $desired_type;
548
549							This is a shorthand for returning the current C<token_value> while also calling C<next_token>.
550
551							=head2 token_context
552
553							my $text= $self->token_context(%options);
554
555							Default behavior generates a string like:
556
557							"'blah blah' on line 15, char 12"
558
559							Passing C<< token_context(multiline => 1) >> generates a string like
560
561							"Expected something else at line 15, char 16\n" .
562							"blah blah blah token blah blah\n" .
563							" ^^^^^\n"
564
565							Multiline additionally takes arguments as described in
566							L<Language::FormulaEngine::Parser::ContextUtil/format_context_multiline>.
567
568							=head1 GRAMMAR
569
570							=head2 Parse Rules
571
572							The default grammar implements the following rules:
573
574							expr ::= or_expr
575							or_expr ::= and_expr ( 'or' and_expr )*
576							and_expr ::= not_expr ( 'and' not_expr )*
577							not_expr ::= ( 'not' \| '!' ) cmp_expr \| cmp_expr
578							cmp_expr ::= sum_expr ( ( '=' \| '==' \| '<>' \| '\u2260' \| '<' \| '<=' \| '>' \| '>=' ) sum_expr )*
579							sum_expr ::= prod_expr ( ('+' \| '-') prod_expr )*
580							prod_expr ::= ( unit_expr ('' \| '/') ) unit_expr
581							unit_expr ::= '-' unit_expr \| Identifier '(' list ')' \| '(' (expr\|list) ')' \| Identifier \| Number \| String
582							list ::= expr ( ',' expr )* ','?
583
584							C<ident>, C<num>, C<str>, and all the punctuation symbols are tokens.
585
586							The parser uses a Recursive Descent algorithm implemented as the following method calls.
587							Each method consumes tokens from C<< $self >> and return a L</"PARSE NODES">:
588
589							=over
590
591							=item parse_expr
592
593							=item parse_or_expr
594
595							=item parse_and_expr
596
597							=item parse_not_expr
598
599							=item parse_cmp_expr
600
601							=item parse_sum_expr
602
603							=item parse_prod_expr
604
605							=item parse_unit_expr
606
607							=item parse_list
608
609							=back
610
611							=head2 Token Types
612
613							=over
614
615							=item C<'Number'>
616
617							All the common decimal representations of integers and floating point numbers
618							which perl can parse. Optional decimals and decimal point followed by decimals
619							and optional exponent, ending at either the end of the input or a non-alphanumeric.
620
621							=item C<'String'>
622
623							A single-quoted or double-quoted string, treating a double occurrence of the quote
624							character to mean a literal quote character. ("Pascal style")
625
626							'apostrophes are''nt hard'
627
628							There are no escape sequences though, so to get control characters or awkward unicode
629							into a string you need something like:
630
631							concat("smile ",char(0x263A))
632
633							which depends on those functions being available in the namespace.
634
635							=item Keywords...
636
637							Keywords include the "word" tokens like 'OR', but also every text literal seen in a parse rule
638							such as operators and punctuation.
639							The C<token_type> of the keyword is the canonical version of the keyword, and the C<token_value>
640							is the actual text that was captured. The pattern matches the longest keyword possible.
641
642							=item C<'Identifier'>
643
644							Any alpha (or underscore) followed by any run of alphanumerics,
645							(including underscore and period).
646
647							=back
648
649							=head2 Customizing the Token Scanner
650
651							The tokens are parsed using a series of regex tests. The regexes and the code that handles a
652							match of that regex are found in package attribute L</scanner_rules>. These regexes and code
653							fragments get lazily compiled into a package method on the first use (per package).
654							Meanwhile, several of those regex are built from other package attributes.
655
656							=over
657
658							=item scanner_rules
659
660							This package method returns a list (not arrayref) of ordered elements of the form
661							C<< [ $name, $regex, $code_fragment, \%vars ] >>. You can subclass this method to inspect
662							the rules (probably based on C<$name>) and replace the regexes, or alter the handler code,
663							or add/remove your own rules. The regexes are attempted in the order they appear in this
664							list. You do not need to use "\G" or "/gc" on these regexes because those are added
665							automatically during compilation.
666
667							=item keyword_map
668
669							This package method returns a hashref of all known keywords, mapped to their canonical form.
670							So for instance, a key of C<< '<>' >> with a value of C<< '!=' >>. These tokens automatically
671							become the scanner rule named C<Keywords>. In turn, the contents of this hashref include
672							the L</cmp_operators>, L</math_operators>, L</logic_operators>, and L</list_operators> which
673							can be overridden separately.
674
675							This method is called once during the compilation of L</scan_token>, and the result is then
676							made into a constant and referenced by the compiled method, so dynamic changes to the output
677							of this method will be ignored.
678
679							=item cmp_operators
680
681							Package method that returns a list of comparison operators, like '<', '>=', etc.
682
683							=item math_operators
684
685							Package method that returns a list of math operators, like '*', '+', etc.
686
687							=item logic_operators
688
689							Package method that returns a list of keywords like 'and', 'or', etc.
690
691							=item list_operators
692
693							Package method that returns a list of '(', ')', ','
694
695							=back
696
697							=head2 Parse Nodes
698
699							The parse tree takes a minimalist approach to node classification. In this default
700							implementation, number values, string values, and symbolic references have just a simple
701							wrapper around the value, and function calls are just a pair of function name and list of
702							arguments. All language operators are represented as function calls.
703
704							A blessed node only needs to support one method: C<< ->evaluate($namespace) >>.
705
706							The class name of the blessed nodes should be ignored. A function is anything which
707							C<< can("function_name") >>, a string is anything which C<< can("string_value") >>, a number is
708							anything which C<< can("number_value") >> and a symbolic reference is anything which
709							C<< can("symbolic_name") >>.
710
711							Subclasses of Parser should implemnt new node types as needed. You probable also need to
712							update L</deparse>.
713
714							The parser rules (C<parse_X_expr> methods) create nodes by the following methods on the Parser
715							class, so that you can easily subclass C<Parser> and override which class of node is getting
716							created.
717
718							=over
719
720							=item new_call
721
722							$node= $parser->new_call( $function_name, $parameters );
723
724							Generate a node for a function call. The returned node has attributes C<function_name>
725							and C<parameters>
726
727							=item new_symbol
728
729							$node= $parser->new_symbol($symbol_name);
730
731							A reference to a symbolic value (i.e. variable or constant).
732							It has one attribute C<symbol_name>.
733
734							=item new_string
735
736							$node= $parser->new_string($string_value);
737
738							A string literal. It has an attribute C<string_value> holding the raw value.
739
740							=item new_number
741
742							$plain_scalar= $parser->new_number($value);
743
744							A numeric constant. It has an attribute C<number_value> holding the raw value.
745
746							=item get_negative
747
748							$negative_node= $parser->get_negative( $node );
749
750							Utility method to get the "opposite of" a parse node. By default, this wraps it with the
751							function C<'negative'>, unless it already was that function then it unwraps the parameter.
752							It performs simple negation on numbers.
753
754							=back
755
756							=head1 AUTHOR
757
758							Michael Conrad <mconrad@intellitree.com>
759
760							=head1 COPYRIGHT AND LICENSE
761
762							This software is copyright (c) 2023 by Michael Conrad, IntelliTree Solutions llc.
763
764							This is free software; you can redistribute it and/or modify it under
765							the same terms as the Perl 5 programming language system itself.
766
767							=cut