File Coverage

blib/lib/Language/FormulaEngine/Parser.pm

Criterion	Covered	Total	%
statement	246	253	97.2
branch	77	88	87.5
condition	27	35	77.1
subroutine	66	70	94.2
pod	33	33	100.0
total	449	479	93.7

line	stmt	bran	cond	sub	pod	time	code
1							package Language::FormulaEngine::Parser;
2	9			9		466036	use Moo;
	9					33873
	9					78
3	9			9		6842	use Carp;
	9					20
	9					636
4	9			9		1132	use Try::Tiny;
	9					2702
	9					594
5	9			9		72	use List::Util qw( min max );
	9					30
	9					783
6							use Language::FormulaEngine::Parser::ContextUtil
7	9			9		4362	qw( calc_text_coordinates format_context_string format_context_multiline );
	9					27
	9					708
8	9			9		1942	use namespace::clean;
	9					38570
	9					70
9
10							# ABSTRACT: Create parse tree from an input string
11							our $VERSION = '0.08'; # VERSION
12
13
14							has parse_tree => ( is => 'rw' );
15							has error => ( is => 'rw' );
16							has functions => ( is => 'rw' );
17							has symbols => ( is => 'rw' );
18
19							sub parse {
20	283			283	1	63747	my ($self, $input)= @_;
21	283					876	$self->reset;
22	283					573	$self->{input}= $input;
23	283					863	pos( $self->{input} )= 0;
24							try {
25	283			283		11925	$self->next_token;
26	282					643	my $tree= $self->parse_expr;
27							# It is an error if there was un-processed input.
28	281	100				652	$self->token_type eq '0'
29							or die sprintf('Unexpected %s "%s" near %s',
30							$self->token_type, $self->token_value, $self->token_context);
31	280					962	$self->parse_tree($tree);
32							} catch {
33	3			3		67	chomp;
34	3					17	$self->error($_);
35	283					1870	};
36	283					5134	return $self->parse_tree;
37							}
38
39							sub reset {
40	283			283	1	547	my $self= shift;
41	283					1365	$self->parse_tree(undef);
42	283					648	$self->error(undef);
43	283					759	$self->functions({});
44	283					695	$self->symbols({});
45	283					471	delete @{$self}{'input','token_type','token_value','token_pos'};
	283					1099
46	283					516	$self;
47							}
48
49
50							sub deparse {
51	63			63	1	3702	my ($self, $node)= @_;
52	63	100				150	$node= $self->parse_tree unless @_ > 1;
53	63					142	$node->deparse($self);
54							}
55
56
57	0			0	1	0	sub input { shift->{input} }
58	0			0	1	0	sub input_pos { pos( shift->{input} ) }
59	338			338	1	1206	sub token_type { shift->{token_type} }
60	47			47	1	228	sub token_value { shift->{token_value} }
61	46			46	1	114	sub token_pos { shift->{token_pos} }
62
63
64							sub next_token {
65	2078			2078	1	50258	my $self= shift;
66
67							# If already reached end of input, throw an exception.
68							die "Can't call next_token after end of input"
69	2078	50	100			5960	if '0' eq ($self->{token_type}\|\|'');
70
71							# Detect the next token
72	2078					4057	my ($type, $val, $pos0, $pos1)= ('','');
73	2078					4004	while ($type eq '') {
74	2312		100			4815	$pos0= pos($self->{input}) \|\| 0;
75	2312					55489	($type, $val)= $self->scan_token;
76	2312		100			11833	$pos1= pos($self->{input}) \|\| 0;
77							# Check for end of buffer, even if it matched.
78	2312	100				4765	if ($pos1 >= length $self->{input}) {
79							#pos($self->{input})= $pos0; # rewind to start of token before growing buffer
80							#if ($self->_grow_buffer) {
81							# $log->trace("grow buffer succeeded");
82							# $type= '';
83							# next;
84							#}
85							#pos($self->{input})= $pos1; # restore actual position\
86							# If we didn't get a token or are ignoring this final token, then return the EOF token
87	577	100	100			1774	if (!defined $type \|\| $type eq '') {
88	291					461	$type= 0;
89	291					466	$val= '';
90	291					414	$pos0= $pos1;
91	291					504	last;
92							}
93							}
94	2021	100				3837	defined $type
95							or die "Unknown syntax at ".$self->token_context."\n";
96	2020	50				4938	$pos1 > $pos0
97							or croak "Tokenizer consumed zero characters";
98							}
99	2077					3503	@{$self}{'token_type','token_value','token_pos'}= ($type,$val,$pos0);
	2077					4973
100	2077					3763	return $type, $val;
101							}
102
103
104							sub consume_token {
105	971			971	1	19225	my $self= shift;
106							croak "Can't consume EOF"
107	971	100				3020	if $self->{token_type} eq '0';
108	961					1544	my $val= $self->{token_value};
109	961					2122	$self->next_token;
110	961					2301	return $val;
111							}
112
113							sub token_context {
114	2			2	1	5	my ($self, %args)= @_;
115							return format_context_multiline($self->{input}, $self->{token_pos}\|\|0, pos($self->{input})\|\|0, \%args)
116	2	50	0			7	if delete $args{multiline};
			0
117	2		100			19	return format_context_string($self->{input}, $self->{token_pos}\|\|0, pos($self->{input})\|\|0);
			100
118							}
119
120
121	739			739	1	1622	sub parse_expr { shift->parse_or_expr; }
122
123							sub parse_or_expr {
124	739			739	1	1109	my $self= shift;
125	739					1438	my $first= $self->parse_and_expr;
126	738	100				2119	return $first unless $self->{token_type} eq 'or';
127	2					5	my @or_expr= $first;
128	2					18	while ($self->{token_type} eq 'or') {
129	2					11	$self->next_token;
130	2					9	push @or_expr, $self->parse_and_expr;
131							}
132	2					6	return $self->new_call('or', \@or_expr);
133							}
134
135							sub parse_and_expr {
136	741			741	1	1166	my $self= shift;
137	741					1367	my $first= $self->parse_not_expr;
138	740	100				1873	return $first unless $self->{token_type} eq 'and';
139	8					26	my @and_expr= $first;
140	8					38	while ($self->{token_type} eq 'and') {
141	8					33	$self->next_token;
142	8					26	push @and_expr, $self->parse_not_expr;
143							}
144	8					46	return $self->new_call('and', \@and_expr);
145							}
146
147							sub parse_not_expr {
148	749			749	1	1052	my $self= shift;
149	749	100	66			2801	if ($self->{token_type} eq 'not' or $self->{token_type} eq '!') {
150	5					17	$self->next_token;
151	5					12	return $self->new_call('not', [ $self->parse_cmp_expr ]);
152							}
153	744					1495	return $self->parse_cmp_expr;
154							}
155
156							my %_cmp_ops= map { $_ => 1 } qw( > < >= <= != == );
157							sub parse_cmp_expr {
158	749			749	1	1212	my $self= shift;
159	749					1446	my $first= $self->parse_sum_expr;
160	748	100				2198	return $first unless $_cmp_ops{$self->{token_type}};
161	24					64	my @expr= $first;
162	24					87	while ($_cmp_ops{$self->{token_type}}) {
163	32					124	push @expr, $self->new_string($self->{token_type});
164	32					99	$self->next_token;
165	32					86	push @expr, $self->parse_sum_expr;
166							}
167	24					84	return $self->new_call('compare', \@expr);
168							}
169
170							sub parse_sum_expr {
171	781			781	1	1192	my $self= shift;
172	781					1444	my $first= $self->parse_prod_expr;
173	780	100	100			2884	return $first unless $self->{token_type} eq '+' or $self->{token_type} eq '-';
174	26					65	my @sum_expr= $first;
175	26		100			89	while ($self->{token_type} eq '+' or $self->{token_type} eq '-') {
176	35					85	my $negate= $self->consume_token eq '-';
177	35					103	my $operand= $self->parse_prod_expr;
178	35	100				195	push @sum_expr, $negate? $self->get_negative($operand) : $operand;
179							}
180	26					74	return $self->new_call('sum', \@sum_expr);
181							}
182
183							sub parse_prod_expr {
184	816			816	1	1221	my $self= shift;
185	816					1553	my $value= $self->parse_unit_expr;
186	815		100			3408	while ($self->{token_type} eq '*' or $self->{token_type} eq '/') {
187	39					93	my $op= $self->consume_token;
188	39					97	my $right= $self->parse_unit_expr;
189	39	100				211	$value= $self->new_call( $op eq '*'? 'mul' : 'div', [ $value, $right ] );
190							}
191	815					1436	return $value;
192							}
193
194							sub parse_unit_expr {
195	885			885	1	1346	my $self= shift;
196	885					1237	my $negate= 0;
197	885					1211	my $expr;
198
199	885	100				1820	if ($self->{token_type} eq '-') {
200	30					71	$self->next_token;
201	30					102	return $self->get_negative($self->parse_unit_expr);
202							}
203
204	855	100				1659	if ($self->{token_type} eq '(') {
205	13					42	$self->next_token;
206	13					54	my $args= $self->parse_list;
207							die "Expected ')' near ".$self->token_context."\n"
208	13	50				47	if $self->{token_type} ne ')';
209	13					36	$self->next_token;
210	13	100				63	return @$args > 1? $self->new_call('list', $args) : $args->[0];
211							}
212
213	842	100				1654	if ($self->{token_type} eq 'Number') {
214	323					611	return $self->new_number($self->consume_token);
215							}
216
217	519	100				1038	if ($self->{token_type} eq 'String') {
218	84					184	return $self->new_string($self->consume_token);
219							}
220
221	435	100				921	if ($self->{token_type} eq 'Identifier') {
222	434					753	my $id= $self->consume_token;
223	434	100				1074	if ($self->{token_type} eq '(') {
224	266					730	$self->next_token;
225	266	100				1368	my $args= $self->{token_type} eq ')'? [] : $self->parse_list;
226							die "Expected ')' near ".$self->token_context."\n"
227	266	50				695	if $self->{token_type} ne ')';
228	266					655	$self->next_token;
229	266					684	return $self->new_call($id, $args);
230							}
231							else {
232	168					452	return $self->new_symbol($id);
233							}
234							}
235
236	1	50				12	if ($self->{token_type} eq '0') {
237	1					7	die "Expected expression, but reached end of input\n";
238							}
239
240	0					0	die "Unexpected token $self->{token_type} '$self->{token_value}' near ".$self->token_context."\n";
241							}
242
243							sub parse_list {
244	268			268	1	483	my $self= shift;
245	268					549	my @args= $self->parse_expr;
246	268					664	while ($self->{token_type} eq ',') {
247	189					493	$self->next_token;
248	189					467	push @args, $self->parse_expr;
249							}
250	268					568	return \@args;
251							}
252
253
254	8			8	1	53	sub cmp_operators { qw( = == != <> > >= < <= ), "\x{2260}", "\x{2264}", "\x{2265}" }
255	8			8	1	35	sub math_operators { qw( + - * / ) }
256	8			8	1	27	sub logic_operators { qw( and or not ! ) }
257	8			8	1	26	sub list_operators { ',', '(', ')' }
258							sub keyword_map {
259							return {
260	8			8	1	40	(map { $_ => $_ } cmp_operators, math_operators, logic_operators, list_operators),
	176					591
261							'=' => '==', '<>' => '!=', "\x{2260}" => '!=',
262							"\x{2264}" => '<=', "\x{2265}" => '>='
263							}
264							}
265							sub scanner_rules {
266	8			8	1	25	my $self= shift;
267	8					30	my $keywords= $self->keyword_map;
268							my $kw_regex= join '\|', map "\Q$_\E",
269	8					82	sort { length($b) <=> length($a) } # longest keywords get priority
	524					906
270							keys %$keywords;
271
272							# Perl 5.20.1 and 5.20.2 have a bug where regex comparisons on unicode strings can crash.
273							# It seems to damage the scalar $1, but copying it first fixes the problem.
274	8	50	33			95	my $kw_canonical= $] >= 5.020000 && $] < 5.020003? '$keywords->{lc(my $clone1= $1)}' : '$keywords->{lc $1}';
275							return (
276							# Pattern Name, Pattern, Token Type and Token Value
277	8					935	[ 'Whitespace', qr/(\s+)/, '"" => ""' ], # empty string causes next_token to loop
278							[ 'Decimal', qr/([0-9]*\.?[0-9]+(?:[eE][+-]?[0-9]+)?)\b/, 'Number => $1+0' ],
279							[ 'Hexadecimal', qr/0x([0-9A-Fa-f]+)/, 'Number => hex($1)' ],
280							[ 'Keywords', qr/($kw_regex)/, $kw_canonical.' => $1', { keywords => $keywords } ],
281							[ 'Identifiers', qr/([A-Za-z_][A-Za-z0-9_.]*)\b/, 'Identifier => $1' ],
282							# Single or double quoted string, using Pascal-style repeated quotes for escaping
283							[ 'StringLiteral', qr/(?:"((?:[^"]\|""))"\|'((?:[^']\|''))')/, q%
284							do{
285							my $str= defined $1? $1 : $2;
286							$str =~ s/""/"/g if defined $1;
287							$str =~ s/''/'/g if defined $2;
288							(String => $str)
289							}
290							%],
291							);
292							}
293
294							sub _build_scan_token_method_body {
295	8			8		31	my ($self, $rules)= @_;
296	8					240	return join('', map
297							' return ' . $_->[2] . ' if $self->{input} =~ /\G' . $_->[1] . "/gc;\n",
298							@$rules
299							).' return;' # return empty list of no rule matched
300							}
301
302							sub _build_scan_token_method {
303	8			8		26	my ($pkg, $method_name)= @_;
304	8	50				43	$pkg= ref $pkg if ref $pkg;
305	8	50				54	$method_name= 'scan_token' unless defined $method_name;
306	8					50	my @rules= $pkg->scanner_rules;
307							# collect variables which should be available to the code
308	8	100				38	my %vars= map { $_->[3]? %{ $_->[3] } : () } @rules;
	48					133
	8					38
309	8					91	my $code= join "\n",
310							(map 'my $'.$_.' = $vars{'.$_.'};', keys %vars),
311							"sub ${pkg}::$method_name {",
312							' my $self= shift;',
313							$pkg->_build_scan_token_method_body(\@rules),
314							"}\n";
315							# closure needed for 5.8 and 5.10 which complain about using a lexical
316							# in a sub declared at package scope.
317	9			9		27585	no warnings 'redefine','closure';
	9					86
	9					10517
318	8	50		2312	1	4258	eval "$code; 1" or die $@ . " for generated scanner code:\n".$code;
	2312	100				5385
	2312	100				7079
	2075	100				6637
	1747	100				3784
	1745	100				7202
	821	100				3208
	376	100				1256
	87	100				314
	87	100				267
	87					246
	87					337
	289					762
319	8					129	return $pkg->can('scan_token');
320							}
321
322	2			2	1	13	sub scan_token { my $m= $_[0]->_build_scan_token_method; goto $m; };
	2					52
323
324
325							sub Language::FormulaEngine::Parser::Node::Call::new {
326	13			13		31	my ($class, $name, $params)= @_;
327	13					52	bless [ $name, $params ], $class;
328							}
329	7			7		24	sub Language::FormulaEngine::Parser::Node::Call::is_constant { 0 }
330	414			414		1141	sub Language::FormulaEngine::Parser::Node::Call::function_name { $_[0][0] }
331	444			444		1635	sub Language::FormulaEngine::Parser::Node::Call::parameters { $_[0][1] }
332							sub Language::FormulaEngine::Parser::Node::Call::evaluate {
333	169			169		1222	my ($self, $namespace)= @_;
334	169					513	$namespace->evaluate_call($self);
335							}
336							sub Language::FormulaEngine::Parser::Node::Call::simplify {
337	15			15		48	my ($node, $namespace)= @_;
338	15					63	$namespace->simplify_call($node)
339							}
340							sub Language::FormulaEngine::Parser::Node::Call::deparse {
341	23			23		74	my ($node, $parser)= @_;
342							return $node->function_name . (
343	23					47	!@{$node->parameters}? '()'
344	23	100				48	: '( ' .join(', ', map $parser->deparse($_), @{$node->parameters}). ' )'
	21					39
345							)
346							}
347
348							sub new_call {
349	377			377	1	834	my ($self, $fn, $params)= @_;
350	377					1289	$self->functions->{$fn}++; # record dependency on this function
351	377					1491	bless [ $fn, $params ], 'Language::FormulaEngine::Parser::Node::Call';
352							}
353
354
355							sub Language::FormulaEngine::Parser::Node::symbol::new {
356	0			0		0	my ($class, $name)= @_;
357	0					0	bless \$name, $class;
358							}
359
360	7			7		22	sub Language::FormulaEngine::Parser::Node::Symbol::is_constant { 0 }
361	121			121		196	sub Language::FormulaEngine::Parser::Node::Symbol::symbol_name { ${$_[0]} }
	121					451
362							sub Language::FormulaEngine::Parser::Node::Symbol::evaluate {
363	66			66		139	my ($self, $namespace)= @_;
364	66					198	$namespace->get_value($$self);
365							}
366							sub Language::FormulaEngine::Parser::Node::Symbol::simplify {
367	15			15		55	my ($self, $namespace)= @_;
368	15					46	return $namespace->simplify_symref($self);
369							}
370							sub Language::FormulaEngine::Parser::Node::Symbol::deparse {
371	24			24		90	shift->symbol_name;
372							}
373
374							sub new_symbol {
375	168			168	1	395	my ($self, $name)= @_;
376	168					548	$self->symbols->{$name}++; # record dependency on this variable
377	168					549	bless \$name, 'Language::FormulaEngine::Parser::Node::Symbol';
378							}
379
380
381							sub Language::FormulaEngine::Parser::Node::String::new {
382	0			0		0	my ($class, $value)= @_;
383	0					0	bless \$value, $class;
384							}
385
386	1			1		4	sub Language::FormulaEngine::Parser::Node::String::is_constant { 1 }
387	62			62		93	sub Language::FormulaEngine::Parser::Node::String::string_value { ${$_[0]} }
	62					225
388	52			52		88	sub Language::FormulaEngine::Parser::Node::String::evaluate { ${$_[0]} }
	52					231
389	1			1		2	sub Language::FormulaEngine::Parser::Node::String::simplify { $_[0] }
390							sub _str_escape {
391	6			6		9	my $str= shift;
392	6					14	$str =~ s/'/''/g;
393	6					23	"'$str'";
394							}
395							sub Language::FormulaEngine::Parser::Node::String::deparse {
396	6			6		13	_str_escape(shift->string_value);
397							}
398
399							sub new_string {
400	116			116	1	263	my ($self, $text)= @_;
401	116					381	bless \$text, 'Language::FormulaEngine::Parser::Node::String'
402							}
403
404
405							sub Language::FormulaEngine::Parser::Node::Number::new {
406	18			18		40	my ($class, $value)= @_;
407	18					32	$value= 0+$value;
408	18					83	bless \$value, $class;
409							}
410
411	25			25		81	sub Language::FormulaEngine::Parser::Node::Number::is_constant { 1 }
412	219			219		349	sub Language::FormulaEngine::Parser::Node::Number::number_value { ${$_[0]} }
	219					1297
413	159			159		253	sub Language::FormulaEngine::Parser::Node::Number::evaluate { ${$_[0]} }
	159					522
414	6			6		13	sub Language::FormulaEngine::Parser::Node::Number::simplify { $_[0] }
415	18			18		39	sub Language::FormulaEngine::Parser::Node::Number::deparse { shift->number_value }
416
417							sub new_number {
418	359			359	1	646	my $value= $_[1]+0;
419	359					1300	bless \$value, 'Language::FormulaEngine::Parser::Node::Number'
420							}
421
422
423							sub get_negative {
424	41			41	1	93	my ($self, $node)= @_;
425	41	100				218	return $self->new_number(-$node->number_value) if $node->can('number_value');
426	5	50	66			50	return $node->parameters->[0] if $node->can('function_name') and $node->function_name eq 'negative';
427	5					21	return $self->new_call('negative', [$node]);
428							}
429
430							1;
431
432							__END__
433
434							=pod
435
436							=encoding UTF-8
437
438							=head1 NAME
439
440							Language::FormulaEngine::Parser - Create parse tree from an input string
441
442							=head1 VERSION
443
444							version 0.08
445
446							=head1 SYNOPSIS
447
448							my $parse_tree= Language::FormulaEngine::Parser->new->parse($string);
449
450							=head1 DESCRIPTION
451
452							This class scans tokens from an input string and builds a parse tree. In compiler terminology,
453							it is both a Scanner and Parser. It performs a top-down recursive descent parse, because this
454							is easy and gives good error messages. It only parses strings, but leaves room for subclasses
455							to implement streaming. By default, the parser simply applies a Grammar to the input, without
456							checking whether the functions or variables exist, but can be subclassed to do more detailed
457							analysis during the parse.
458
459							The generated parse tree is made up of Function nodes (each infix operator is converted to a
460							named function) and each Function node may contain Symbols, Strings, Numbers, and other
461							Function nodes. The parse tree can be passed to the Evaluator for instant execution, or passed
462							to the Compiler to generate an optimized perl coderef. The parse tree is lightweight, and does
463							not include token/context information; this could also be added by a subclass.
464
465							=head1 PUBLIC API
466
467							=head2 parse
468
469							Parse a new input text, updating all derived attributes with the result of the operation.
470							It returns the value of L</parse_tree> (which is undef if the parse failed).
471							On failure, the exception is stored in L</error> and other attributes like L</token_pos> may
472							contain useful diagnostic information.
473
474							=head2 parse_tree
475
476							This holds the generated parse tree, or C<undef> if the parse failed. See L</"Parse Nodes">.
477
478							=head2 error
479
480							This is C<undef> if the parse succeeded, else an error message describing the syntax that ended
481							the parse.
482
483							=head2 functions
484
485							A set (hashref) of all function names encountered during the parse.
486
487							=head2 symbols
488
489							A set (hashref) of all non-function symbols encountered. (variables, constnts, etc.)
490
491							=head2 reset
492
493							Clear the results of the previous parse, to re-use the object. Returns C<$self> for chaining.
494
495							=head2 deparse
496
497							my $formula_text= $parser->deparse($tree);
498
499							Return a canonical formula text for the parse tree, or a parse tree that you supply.
500
501							=head1 EXTENSIBLE API
502
503							These methods and attributes are documented for purposes of subclassing the parser.
504
505							=head2 input
506
507							The input string being scanned.
508							Code within the parser should access this as C<< $self->{input} >> for efficiency.
509
510							=head2 input_pos
511
512							Shortcut for C<< pos($self->{input}) >>.
513
514							=head2 token_type
515
516							Type of current token scanned from C<input>.
517							Code within the parser should access this as C<< $self->{token_type} >> for efficiency.
518
519							=head2 token_value
520
521							Value of current token scanned from C<input>, with escape sequences and etc resolved to a
522							sensible perl value.
523							Code within the parser should access this as C<< $self->{token_value} >> for efficiency.
524
525							=head2 token_pos
526
527							An offset within C<input> where this token started.
528							Code within the parser should access this as C<< $self->{token_pos} >> for efficiency.
529
530							=head2 next_token
531
532							Advance to the next token, replacing the values of C<token_> variables and updating
533							C<input_pos>. Returns the token_type, of which all are true except EOF which has a
534							type of C<0>, so this also means the function returns true if it parsed a token and
535							false if it reached EOF. It dies if no token could be parsed.
536							If you call next_token again after the eof token, it throws an exception.
537
538							This method is a wrapper around L</scan_token>. Override that method to add new token types.
539
540							=head2 scan_token
541
542							Pattern-match the next token, and either return C<< $type => $value >> or an empty list if
543							the syntax is invalid. This is intended to be overridden by subclasses.
544
545							=head2 consume_token
546
547							return $self->consume_token if $self->{token_type} eq $desired_type;
548
549							This is a shorthand for returning the current C<token_value> while also calling C<next_token>.
550
551							=head2 token_context
552
553							my $text= $self->token_context(%options);
554
555							Default behavior generates a string like:
556
557							"'blah blah' on line 15, char 12"
558
559							Passing C<< token_context(multiline => 1) >> generates a string like
560
561							"Expected something else at line 15, char 16\n" .
562							"blah blah blah token blah blah\n" .
563							" ^^^^^\n"
564
565							Multiline additionally takes arguments as described in
566							L<Language::FormulaEngine::Parser::ContextUtil/format_context_multiline>.
567
568							=head1 GRAMMAR
569
570							=head2 Parse Rules
571
572							The default grammar implements the following rules:
573
574							expr ::= or_expr
575							or_expr ::= and_expr ( 'or' and_expr )*
576							and_expr ::= not_expr ( 'and' not_expr )*
577							not_expr ::= ( 'not' \| '!' ) cmp_expr \| cmp_expr
578							cmp_expr ::= sum_expr ( ( '=' \| '==' \| '<>' \| '\u2260' \| '<' \| '<=' \| '>' \| '>=' ) sum_expr )*
579							sum_expr ::= prod_expr ( ('+' \| '-') prod_expr )*
580							prod_expr ::= ( unit_expr ('' \| '/') ) unit_expr
581							unit_expr ::= '-' unit_expr \| Identifier '(' list ')' \| '(' (expr\|list) ')' \| Identifier \| Number \| String
582							list ::= expr ( ',' expr )* ','?
583
584							C<ident>, C<num>, C<str>, and all the punctuation symbols are tokens.
585
586							The parser uses a Recursive Descent algorithm implemented as the following method calls.
587							Each method consumes tokens from C<< $self >> and return a L</"PARSE NODES">:
588
589							=over
590
591							=item parse_expr
592
593							=item parse_or_expr
594
595							=item parse_and_expr
596
597							=item parse_not_expr
598
599							=item parse_cmp_expr
600
601							=item parse_sum_expr
602
603							=item parse_prod_expr
604
605							=item parse_unit_expr
606
607							=item parse_list
608
609							=back
610
611							=head2 Token Types
612
613							=over
614
615							=item C<'Number'>
616
617							All the common decimal representations of integers and floating point numbers
618							which perl can parse. Optional decimals and decimal point followed by decimals
619							and optional exponent, ending at either the end of the input or a non-alphanumeric.
620
621							=item C<'String'>
622
623							A single-quoted or double-quoted string, treating a double occurrence of the quote
624							character to mean a literal quote character. ("Pascal style")
625
626							'apostrophes are''nt hard'
627
628							There are no escape sequences though, so to get control characters or awkward unicode
629							into a string you need something like:
630
631							concat("smile ",char(0x263A))
632
633							which depends on those functions being available in the namespace.
634
635							=item Keywords...
636
637							Keywords include the "word" tokens like 'OR', but also every text literal seen in a parse rule
638							such as operators and punctuation.
639							The C<token_type> of the keyword is the canonical version of the keyword, and the C<token_value>
640							is the actual text that was captured. The pattern matches the longest keyword possible.
641
642							=item C<'Identifier'>
643
644							Any alpha (or underscore) followed by any run of alphanumerics,
645							(including underscore and period).
646
647							=back
648
649							=head2 Customizing the Token Scanner
650
651							The tokens are parsed using a series of regex tests. The regexes and the code that handles a
652							match of that regex are found in package attribute L</scanner_rules>. These regexes and code
653							fragments get lazily compiled into a package method on the first use (per package).
654							Meanwhile, several of those regex are built from other package attributes.
655
656							=over
657
658							=item scanner_rules
659
660							This package method returns a list (not arrayref) of ordered elements of the form
661							C<< [ $name, $regex, $code_fragment, \%vars ] >>. You can subclass this method to inspect
662							the rules (probably based on C<$name>) and replace the regexes, or alter the handler code,
663							or add/remove your own rules. The regexes are attempted in the order they appear in this
664							list. You do not need to use "\G" or "/gc" on these regexes because those are added
665							automatically during compilation.
666
667							=item keyword_map
668
669							This package method returns a hashref of all known keywords, mapped to their canonical form.
670							So for instance, a key of C<< '<>' >> with a value of C<< '!=' >>. These tokens automatically
671							become the scanner rule named C<Keywords>. In turn, the contents of this hashref include
672							the L</cmp_operators>, L</math_operators>, L</logic_operators>, and L</list_operators> which
673							can be overridden separately.
674
675							This method is called once during the compilation of L</scan_token>, and the result is then
676							made into a constant and referenced by the compiled method, so dynamic changes to the output
677							of this method will be ignored.
678
679							=item cmp_operators
680
681							Package method that returns a list of comparison operators, like '<', '>=', etc.
682
683							=item math_operators
684
685							Package method that returns a list of math operators, like '*', '+', etc.
686
687							=item logic_operators
688
689							Package method that returns a list of keywords like 'and', 'or', etc.
690
691							=item list_operators
692
693							Package method that returns a list of '(', ')', ','
694
695							=back
696
697							=head2 Parse Nodes
698
699							The parse tree takes a minimalist approach to node classification. In this default
700							implementation, number values, string values, and symbolic references have just a simple
701							wrapper around the value, and function calls are just a pair of function name and list of
702							arguments. All language operators are represented as function calls.
703
704							A blessed node only needs to support one method: C<< ->evaluate($namespace) >>.
705
706							The class name of the blessed nodes should be ignored. A function is anything which
707							C<< can("function_name") >>, a string is anything which C<< can("string_value") >>, a number is
708							anything which C<< can("number_value") >> and a symbolic reference is anything which
709							C<< can("symbolic_name") >>.
710
711							Subclasses of Parser should implemnt new node types as needed. You probable also need to
712							update L</deparse>.
713
714							The parser rules (C<parse_X_expr> methods) create nodes by the following methods on the Parser
715							class, so that you can easily subclass C<Parser> and override which class of node is getting
716							created.
717
718							=over
719
720							=item new_call
721
722							$node= $parser->new_call( $function_name, $parameters );
723
724							Generate a node for a function call. The returned node has attributes C<function_name>
725							and C<parameters>
726
727							=item new_symbol
728
729							$node= $parser->new_symbol($symbol_name);
730
731							A reference to a symbolic value (i.e. variable or constant).
732							It has one attribute C<symbol_name>.
733
734							=item new_string
735
736							$node= $parser->new_string($string_value);
737
738							A string literal. It has an attribute C<string_value> holding the raw value.
739
740							=item new_number
741
742							$plain_scalar= $parser->new_number($value);
743
744							A numeric constant. It has an attribute C<number_value> holding the raw value.
745
746							=item get_negative
747
748							$negative_node= $parser->get_negative( $node );
749
750							Utility method to get the "opposite of" a parse node. By default, this wraps it with the
751							function C<'negative'>, unless it already was that function then it unwraps the parameter.
752							It performs simple negation on numbers.
753
754							=back
755
756							=head1 AUTHOR
757
758							Michael Conrad <mconrad@intellitree.com>
759
760							=head1 COPYRIGHT AND LICENSE
761
762							This software is copyright (c) 2023 by Michael Conrad, IntelliTree Solutions llc.
763
764							This is free software; you can redistribute it and/or modify it under
765							the same terms as the Perl 5 programming language system itself.
766
767							=cut