File Coverage

blib/lib/Math/Expression/Evaluator/Lexer.pm

Criterion	Covered	Total	%
statement	42	42	100.0
branch	14	14	100.0
condition			n/a
subroutine	6	6	100.0
pod	1	1	100.0
total	63	63	100.0

line	stmt	bran	sub	pod	time	code
1						package Math::Expression::Evaluator::Lexer;
2	17		17		28244	use warnings;
	17				33
	17				463
3	17		17		81	use strict;
	17				31
	17				505
4	17		17		96	use Carp qw(confess);
	17				25
	17				1219
5	17		17		16987	use Data::Dumper;
	17				165113
	17				9011
6
7						=head1 NAME
8
9						Math::Expression::Evaluator::Lexer - Simple Lexer
10
11						=head1 SYNOPSIS
12
13						use Math::Expression::Evaluator::Lexer qw(lex);
14						# suppose you want to parse simple math expressions
15						my @input_tokens = (
16						['Int', qr/[+-]?\d+/ ],
17						['Op', qr{[+-/*]} ],
18						['Brace_Open', qr/\(/ ],
19						['Brace_Close', qr/\)/ ],
20						['Whitespace', qr/\s+/, sub { return; }],
21						);
22						my $text = "-12 * (3+4)";
23						my $out_tokens = lex($text, \@input_tokens);
24						for (@$out_tokens){
25						my ($name, $text, $pos) = @$_;
26						print "Found Token $name: $text (string pos: $pos)\n";
27						}
28
29						=head1 DESCRIPTION
30
31						Math::Expression::Evaluator::Lexer is a simple lexer that breaks up a text
32						into tokens, depending on the input tokens you provide
33
34						=head1 METHODS
35
36						=over 2
37
38						=item lex
39
40						The only exported routine is lex, which expects input text as its
41						first argument and a array ref to list of input tokens.
42
43						Each input token consists of a token name (which you can choose freely),
44						a regex which matches the desired token, and optionally a reference to
45						a functions that takes the matched token text as its argument. The
46						token text is replaced by the return value of that function. If the
47						function returns undef, that token will not be included in the list
48						of output tokens. The regex should either fail or match at least one
49						character; zero-width matches utterly confuse the lexer, and are disallowed.
50
51						lex() returns an array ref to a list of output tokens, each output
52						token is a reference to a list which contains the token name, the matched
53						text, the string position (in characters, counted from the start of
54						the input string, zero based) and the line number.
55
56						Note that C puts parentheses around the entire regex, so if you
57						want to use backreferences, the numbering of the capturing group is changed.
58
59						=back
60
61						=head1 COPYRIGHT AND LICENSE
62
63						Copyright (C) 2007 by Moritz Lenz, L,
64						L.
65
66						This Program and its Documentation is free software. You may distribute it
67						under the same terms as perl itself.
68
69						However all code examples are to be public domain, so you can use it in any
70						way you want to.
71
72						=cut
73
74						require Exporter;
75						our @ISA = qw(Exporter);
76						our @EXPORT_OK = qw(lex);
77
78						our %EXPORT_TAGS = (":all" => \@EXPORT_OK);
79
80						sub lex {
81	297		297	1	1204	my ($text, $tokens) = @_;
82	297	100			1111	confess("passed undefined value to lex()") unless defined $text;
83	296				489	my $l = length $text;
84	296	100			618	return [] unless $l;
85
86	295				478	my ($last_line_number, $last_pos) = (0, 0);
87						my $pos_and_line_number = sub {
88	1348		1348		1605	my $pos = shift;
89	1348				2957	$last_line_number +=
90						(substr($text, $last_pos, $pos - $last_pos) =~ tr/\n//);
91	1348				1714	$last_pos = $pos;
92	1348				4654	return ($pos, $last_line_number + 1);
93	295				1400	};
94
95	295				412	my @res;
96
97						# avoid 'Use of uninitialized value in numeric lt (<)' warnings:
98	295				724	pos($text) = 0;
99
100	295				904	while (pos($text) < $l){
101	963				1176	my $matched = 0;
102						REGEXES:
103	963				1649	for (@$tokens){
104	11513				17113	my $re = $_->[1];
105						# failed regex matches reset pos() unless the /c modifier
106						# is present
107	11513	100			211789	if ($text =~ m/\G($re)/gc){
108	1582				2128	$matched = 1;
109	1582				3070	my $match = $1;
110	1582	100			3229	if (length $match == 0){
111	1				185	confess("Each token has to require at least one "
112						. "character; Rule $_->[0] matched Zero!\n");
113						}
114
115						# safe information before callbacks can modify $match
116						# and thus length($match)
117	1581				2381	my $pos = pos($text) - length($match);
118
119	1581	100			3297	if ($_->[2]){
120	234				808	$match = $_->[2]->($match);
121						}
122	1581	100			3139	if (defined $match){
123	1348				2946	push @res, [
124						$_->[0],
125						$match,
126						$pos_and_line_number->($pos),
127						];
128						}
129	1581				5726	next REGEXES;
130						}
131						}
132	962	100			3498	if ($matched == 0){
133	1				120	confess("No token matched input text <$text> at position " . pos($text));
134						}
135						}
136	293				2658	return \@res;
137						}
138
139						1;
140
141						# vim: sw=4 ts=4 expandtab