File Coverage

blib/lib/Pegex/Parser.pm

Criterion	Covered	Total	%
statement	146	207	70.5
branch	91	156	58.3
condition	25	56	44.6
subroutine	16	23	69.5
pod	0	16	0.0
total	278	458	60.7

line	stmt	bran	cond	sub	pod	time	code
1							package Pegex::Parser;
2	11			11		135566	use Pegex::Base;
	11					29
	11					60
3	11			11		71	no warnings qw( recursion );
	11					29
	11					404
4
5	11			11		4217	use Pegex::Input;
	11					29
	11					356
6	11			11		4441	use Pegex::Optimizer;
	11					26
	11					323
7	11			11		70	use Scalar::Util;
	11					22
	11					9908
8
9							has grammar => (required => 1);
10							has receiver => ();
11							has input => ();
12
13							has recursion_count => 0;
14							has iteration_count => 0;
15
16							has debug => ();
17							has debug_indent => ();
18							has debug_color => ();
19							has debug_got_color => ();
20							has debug_not_color => ();
21
22							has recursion_limit => ();
23							has recursion_warn_limit => ();
24							has iteration_limit => ();
25
26							sub BUILD {
27	21			21	0	62	my ($self) = @_;
28
29	21		50			120	$self->{throw_on_error} \|\|= 1;
30
31							$self->{debug} =
32							defined($ENV{PERL_PEGEX_DEBUG}) ? $ENV{PERL_PEGEX_DEBUG} :
33							defined($Pegex::Parser::Debug) ? $Pegex::Parser::Debug : 0
34	21	50				130	unless defined($self->{debug});
		50
		100
35
36							$self->{debug_indent} =
37							defined($ENV{PERL_PEGEX_DEBUG_INDENT}) ? $ENV{PERL_PEGEX_DEBUG_INDENT} :
38							defined($Pegex::Parser::DebugIndent) ? $Pegex::Parser::DebugIndent : 1
39	21	50				125	unless defined($self->{debug_indent});
		50
		50
40							$self->{debug_indent} = 1 if (
41							not length $self->{debug_indent}
42							or $self->{debug_indent} =~ tr/0-9//c
43	21	50	33			210	or $self->{debug_indent} < 0
			33
44							);
45
46	21	100				100	if ($self->{debug}) {
47							$self->{debug_color} =
48							defined($ENV{PERL_PEGEX_DEBUG_COLOR}) ? $ENV{PERL_PEGEX_DEBUG_COLOR} :
49							defined($Pegex::Parser::DebugColor) ? $Pegex::Parser::DebugColor : 1
50	2	50				17	unless defined($self->{debug_color});
		50
		50
51	2					6	my ($got, $not);
52							($self->{debug_color}, $got, $not) =
53	2					10	split / , /, $self->{debug_color};
54	2		50			12	$got \|\|= 'bright_green';
55	2		50			10	$not \|\|= 'bright_red';
56	2					12	$_ = [split ' ', $_] for ($got, $not);
57	2					6	$self->{debug_got_color} = $got;
58	2					6	$self->{debug_not_color} = $not;
59	2	50				9	my $c = defined($self->{debug_color}) ? $self->{debug_color} : 1;
60							$self->{debug_color} =
61	2	0				24	$c eq 'always' ? 1 :
		50
		50
		50
		50
62							$c eq 'auto' ? (-t STDERR ? 1 : 0) :
63							$c eq 'never' ? 0 :
64							$c =~ /^\d+$/ ? $c : 0;
65	2	50				14	if ($self->{debug_color}) {
66	2					1496	require Term::ANSIColor;
67	2	50				18119	if ($Term::ANSIColor::VERSION < 3.00) {
68	0					0	s/^bright_// for
69	0					0	@{$self->{debug_got_color}},
70	0					0	@{$self->{debug_not_color}};
71							}
72							}
73							}
74							$self->{recursion_limit} =
75							defined($ENV{PERL_PEGEX_RECURSION_LIMIT}) ? $ENV{PERL_PEGEX_RECURSION_LIMIT} :
76							defined($Pegex::Parser::RecursionLimit) ? $Pegex::Parser::RecursionLimit : 0
77	21	50				149	unless defined($self->{recursion_limit});
		50
		50
78							$self->{recursion_warn_limit} =
79							defined($ENV{PERL_PEGEX_RECURSION_WARN_LIMIT}) ? $ENV{PERL_PEGEX_RECURSION_WARN_LIMIT} :
80							defined($Pegex::Parser::RecursionWarnLimit) ? $Pegex::Parser::RecursionWarnLimit : 0
81	21	50				127	unless defined($self->{recursion_warn_limit});
		50
		50
82							$self->{iteration_limit} =
83							defined($ENV{PERL_PEGEX_ITERATION_LIMIT}) ? $ENV{PERL_PEGEX_ITERATION_LIMIT} :
84							defined($Pegex::Parser::IterationLimit) ? $Pegex::Parser::IterationLimit : 0
85	21	50				161	unless defined($self->{iteration_limit});
		50
		50
86							}
87
88							# XXX Add an optional $position argument. Default to 0. This is the position
89							# to start parsing. Set position and farthest below to this value. Allows for
90							# sub-parsing. Need to somehow return the finishing position of a subparse.
91							# Maybe this all goes in a subparse() method.
92							sub parse {
93	19			19	0	74	my ($self, $input, $start) = @_;
94
95	19	50				52	$start =~ s/-/_/g if $start;
96
97	19					47	$self->{position} = 0;
98	19					39	$self->{farthest} = 0;
99
100	19	100				122	$self->{input} = (not ref $input)
101							? Pegex::Input->new(string => $input)
102							: $input;
103
104							$self->{input}->open
105	19	50				139	unless $self->{input}{_is_open};
106	19					78	$self->{buffer} = $self->{input}->read;
107	19					48	$self->{last_line_pos} = 0;
108	19					73	$self->{last_line} = 1;
109
110	19		66			217	$self->{grammar}{tree} \|\|= $self->{grammar}->make_tree;
111
112							my $start_rule_ref = $start \|\|
113							$self->{grammar}{tree}{'+toprule'} \|\|
114	19	50	33			248	$self->{grammar}{tree}{'TOP'} & 'TOP' or
115							die "No starting rule for Pegex::Parser::parse";
116
117							die "No 'receiver'. Can't parse"
118	19	50				103	unless $self->{receiver};
119
120							my $optimizer = Pegex::Optimizer->new(
121							parser => $self,
122							grammar => $self->{grammar},
123							receiver => $self->{receiver},
124	19					128	);
125
126	19					105	$optimizer->optimize_grammar($start_rule_ref);
127
128							# Add circular ref and weaken it.
129	19					44	$self->{receiver}{parser} = $self;
130	19					124	Scalar::Util::weaken($self->{receiver}{parser});
131
132	19	50				167	if ($self->{receiver}->can("initial")) {
133	0					0	$self->{rule} = $start_rule_ref;
134	0					0	$self->{parent} = {};
135	0					0	$self->{receiver}->initial();
136							}
137
138	19					62	local *match_next;
139							{
140	11			11		95	no warnings 'redefine';
	11					25
	11					11064
	19					31
141							*match_next = (
142							$self->{recursion_warn_limit} or
143							$self->{recursion_limit} or
144							$self->{iteration_limit}
145	19	50	33			215	) ? \&match_next_with_limit :
146							\&match_next_normal;
147							}
148
149	19	100				88	my $match = $self->debug ? do {
150	1					4	my $method = $optimizer->make_trace_wrapper(\&match_ref);
151	1					5	$self->$method($start_rule_ref, {'+asr' => 0});
152							} : $self->match_ref($start_rule_ref, {});
153
154	19					108	$self->{input}->close;
155
156	19	50	33			75	if (not $match or $self->{position} < length ${$self->{buffer}}) {
	19					73
157	0					0	$self->throw_error("Parse document failed for some reason");
158	0					0	return; # In case $self->throw_on_error is off
159							}
160
161	19	100				159	if ($self->{receiver}->can("final")) {
162	17					47	$self->{rule} = $start_rule_ref;
163	17					41	$self->{parent} = {};
164	17					64	$match = [ $self->{receiver}->final(@$match) ];
165							}
166
167	19					188	$match->[0];
168							}
169
170							sub match_next_normal {
171	3442			3442	0	5111	my ($self, $next) = @_;
172
173							my ($rule, $method, $kind, $min, $max, $assertion) =
174	3442					4139	@{$next}{'rule', 'method', 'kind', '+min', '+max', '+asr'};
	3442					6605
175
176							my ($position, $match, $count) =
177	3442					5815	($self->{position}, [], 0);
178
179	3442					5738	while (my $return = $method->($self, $rule, $next)) {
180	1376	50				2570	$position = $self->{position} unless $assertion;
181	1376					1594	$count++;
182	1376					2116	push @$match, @$return;
183	1376	100				2652	last if $max == 1;
184							}
185	3442	100	100			8981	if (not $count and $min == 0 and $kind eq 'all') {
			100
186	125					249	$match = [[]];
187							}
188	3442	100				5275	if ($max != 1) {
189	116	50				211	if ($next->{-flat}) {
190	0	0				0	$match = [ map { (ref($_) eq 'ARRAY') ? (@$_) : ($_) } @$match ];
	0					0
191							}
192							else {
193	116					189	$match = [$match]
194							}
195							}
196	3442		66			8069	my $result = ($count >= $min and (not $max or $count <= $max))
197							^ ($assertion == -1);
198	3442	100	100			7017	if (not($result) or $assertion) {
199							$self->{farthest} = $position
200	1985	50				3518	if ($self->{position} = $position) > $self->{farthest};
201							}
202
203	3442	50				9314	($result ? $next->{'-skip'} ? [] : $match : 0);
		100
204							}
205
206							sub match_next_with_limit {
207	0			0	0	0	my ($self, $next) = @_;
208
209							sub limit_msg {
210	0			0	0	0	"Deep recursion ($_[0] levels) on Pegex::Parser::match_next\n";
211							}
212
213	0					0	$self->{iteration_count}++;
214	0					0	$self->{recursion_count}++;
215
216	0	0	0			0	if (
		0	0
		0	0
217							$self->{recursion_limit} and
218							$self->{recursion_count} >= $self->{recursion_limit}
219	0					0	) { die limit_msg $self->{recursion_count} }
220							elsif (
221							$self->{recursion_warn_limit} and
222							not ($self->{recursion_count} % $self->{recursion_warn_limit})
223	0					0	) { warn limit_msg $self->{recursion_count} }
224							elsif (
225							$self->{iteration_limit} and
226							$self->{iteration_count} > $self->{iteration_limit}
227	0					0	) { die "Pegex iteration limit of $self->{iteration_limit} reached." }
228
229	0					0	my $result = $self->match_next_normal($next);
230
231	0					0	$self->{recursion_count}--;
232
233	0					0	return $result;
234							}
235
236							sub match_rule {
237	1			1	0	10	my ($self, $position, $match) = (@_, []);
238	1					3	$self->{position} = $position;
239							$self->{farthest} = $position
240	1	50				5	if $position > $self->{farthest};
241	1	50				3	$match = [ $match ] if @$match > 1;
242	1					3	my ($ref, $parent) = @{$self}{'rule', 'parent'};
	1					3
243	1	50				4	my $rule = $self->{grammar}{tree}{$ref}
244							or die "No rule defined for '$ref'";
245
246	1					4	[ $rule->{action}->($self->{receiver}, @$match) ];
247							}
248
249							sub match_ref {
250	1474			1474	0	2211	my ($self, $ref, $parent) = @_;
251	1474	50				3125	my $rule = $self->{grammar}{tree}{$ref}
252							or die "No rule defined for '$ref'";
253	1474	100				2573	my $match = $self->match_next($rule) or return;
254	595	100				1130	return $Pegex::Constant::Dummy unless $rule->{action};
255	572					824	@{$self}{'rule', 'parent'} = ($ref, $parent);
	572					1030
256
257							# XXX Possible API mismatch.
258							# Not sure if we should "splat" the $match.
259	572					1585	[ $rule->{action}->($self->{receiver}, @$match) ];
260							}
261
262							sub match_rgx {
263	1287			1287	0	1817	my ($self, $regexp) = @_;
264	1287					1614	my $buffer = $self->{buffer};
265
266	1287					2583	pos($$buffer) = $self->{position};
267	1287	100				7352	$$buffer =~ /$regexp/g or return;
268
269	401					752	$self->{position} = pos($$buffer);
270
271							$self->{farthest} = $self->{position}
272	401	100				830	if $self->{position} > $self->{farthest};
273
274	11			11		87	no strict 'refs';
	11					26
	11					12231
275	401					1722	my $captures = [ map $$_, 1..$#+ ];
276	401	100				990	$captures = [ $captures ] if $#+ > 1;
277
278	401					895	return $captures;
279							}
280
281							sub match_all {
282	571			571	0	843	my ($self, $list) = @_;
283	571					742	my $position = $self->{position};
284	571					765	my $set = [];
285	571					746	my $len = 0;
286	571					870	for my $elem (@$list) {
287	1036	100				2048	if (my $match = $self->match_next($elem)) {
288	701	50	66			1993	if (not ($elem->{'+asr'} or $elem->{'-skip'})) {
289	700					1163	push @$set, @$match;
290	700					1272	$len++;
291							}
292							}
293							else {
294							$self->{farthest} = $position
295	335	50				632	if ($self->{position} = $position) > $self->{farthest};
296	335					735	return;
297							}
298							}
299	236	50				561	$set = [ $set ] if $len > 1;
300	236					496	return $set;
301							}
302
303							sub match_any {
304	241			241	0	390	my ($self, $list) = @_;
305	241					401	for my $elem (@$list) {
306	932	100				1740	if (my $match = $self->match_next($elem)) {
307	162					367	return $match;
308							}
309							}
310	79					156	return;
311							}
312
313							sub match_err {
314	0			0	0	0	my ($self, $error) = @_;
315	0					0	$self->throw_error($error);
316							}
317
318							sub trace {
319	46			46	0	91	my ($self, $action) = @_;
320	46	100				127	my $indent = ($action =~ /^try_/) ? 1 : 0;
321	46		100			97	$self->{indent} \|\|= 0;
322	46	100				80	$self->{indent}-- unless $indent;
323
324							$action = (
325							$action =~ m/got_/ ?
326							Term::ANSIColor::colored($self->{debug_got_color}, $action) :
327							$action =~ m/not_/ ?
328							Term::ANSIColor::colored($self->{debug_not_color}, $action) :
329							$action
330	46	50				194	) if $self->{debug_color};
		100
		50
331
332	46					2134	print STDERR ' ' x ($self->{indent} * $self->{debug_indent});
333	46	100				171	$self->{indent}++ if $indent;
334	46					54	my $snippet = substr(${$self->{buffer}}, $self->{position});
	46					177
335	46	100				119	$snippet = substr($snippet, 0, 30) . "..."
336							if length $snippet > 30;
337	46					174	$snippet =~ s/\n/\\n/g;
338	46	100				600	print STDERR sprintf("%-30s", $action) .
339							($indent ? " >$snippet<\n" : "\n");
340							}
341
342							sub throw_error {
343	0			0	0		my ($self, $msg) = @_;
344	0						$@ = $self->{error} = $self->format_error($msg);
345	0	0					return undef unless $self->{throw_on_error};
346	0						require Carp;
347	0						Carp::croak($self->{error});
348							}
349
350							sub format_error {
351	0			0	0		my ($self, $msg) = @_;
352	0						my $buffer = $self->{buffer};
353	0						my $position = $self->{farthest};
354	0						my $real_pos = $self->{position};
355
356	0						my $line = $self->line($position);
357	0						my $column = $position - rindex($$buffer, "\n", $position);
358
359	0	0					my $pretext = substr(
		0
360							$$buffer,
361							$position < 50 ? 0 : $position - 50,
362							$position < 50 ? $position : 50
363							);
364	0						my $context = substr($$buffer, $position, 50);
365	0						$pretext =~ s/.*\n//gs;
366	0						$context =~ s/\n/\\n/g;
367
368	0						return <<"...";
369							Error parsing Pegex document:
370							msg: $msg
371							line: $line
372							column: $column
373							context: $pretext$context
374	0						${\ (' ' x (length($pretext) + 10) . '^')}
375							position: $position ($real_pos pre-lookahead)
376							...
377							}
378
379							# TODO Move this to a Parser helper role/subclass
380							sub line_column {
381	0			0	0		my ($self, $position) = @_;
382	0		0				$position \|\|= $self->{position};
383	0						my $buffer = $self->{buffer};
384	0						my $line = $self->line($position);
385	0						my $column = $position - rindex($$buffer, "\n", $position);
386	0						return [$line, $column];
387							}
388
389							sub line {
390	0			0	0		my ($self, $position) = @_;
391	0		0				$position \|\|= $self->{position};
392	0						my $buffer = $self->{buffer};
393	0						my $last_line = $self->{last_line};
394	0						my $last_line_pos = $self->{last_line_pos};
395	0						my $len = $position - $last_line_pos;
396	0	0					if ($len == 0) {
397	0						return $last_line;
398							}
399	0						my $line;
400	0	0					if ($len < 0) {
401	0						$line = $last_line - scalar substr($$buffer, $position, -$len) =~ tr/\n//;
402							} else {
403	0						$line = $last_line + scalar substr($$buffer, $last_line_pos, $len) =~ tr/\n//;
404							}
405	0						$self->{last_line} = $line;
406	0						$self->{last_line_pos} = $position;
407	0						return $line;
408							}
409
410							# XXX Need to figure out what uses this. (sample.t)
411							{
412							package Pegex::Constant;
413							our $Null = [];
414							our $Dummy = [];
415							}
416
417							1;