line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Pegex::Parser; |
2
|
1
|
|
|
1
|
|
6
|
use Pegex::Base; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
8
|
|
3
|
|
|
|
|
|
|
|
4
|
1
|
|
|
1
|
|
3525
|
use Pegex::Input; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
46
|
|
5
|
1
|
|
|
1
|
|
740
|
use Pegex::Optimizer; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
32
|
|
6
|
1
|
|
|
1
|
|
8
|
use Scalar::Util; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
1430
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
has grammar => (required => 1); |
9
|
|
|
|
|
|
|
has receiver => (); |
10
|
|
|
|
|
|
|
has input => (); |
11
|
|
|
|
|
|
|
has debug => ( |
12
|
|
|
|
|
|
|
exists($ENV{PERL_PEGEX_DEBUG}) ? $ENV{PERL_PEGEX_DEBUG} : |
13
|
|
|
|
|
|
|
defined($Pegex::Parser::Debug) ? $Pegex::Parser::Debug : |
14
|
|
|
|
|
|
|
0 |
15
|
|
|
|
|
|
|
); |
16
|
|
|
|
|
|
|
sub BUILD { |
17
|
13
|
|
|
13
|
0
|
2032
|
my ($self) = @_; |
18
|
13
|
|
50
|
|
|
87
|
$self->{throw_on_error} ||= 1; |
19
|
|
|
|
|
|
|
# $self->{rule} = undef; |
20
|
|
|
|
|
|
|
# $self->{parent} = undef; |
21
|
|
|
|
|
|
|
# $self->{error} = undef; |
22
|
|
|
|
|
|
|
# $self->{position} = undef; |
23
|
|
|
|
|
|
|
# $self->{farthest} = undef; |
24
|
|
|
|
|
|
|
} |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# XXX Add an optional $position argument. Default to 0. This is the position |
27
|
|
|
|
|
|
|
# to start parsing. Set position and farthest below to this value. Allows for |
28
|
|
|
|
|
|
|
# sub-parsing. Need to somehow return the finishing position of a subparse. |
29
|
|
|
|
|
|
|
# Maybe this all goes in a subparse() method. |
30
|
|
|
|
|
|
|
sub parse { |
31
|
14
|
|
|
14
|
0
|
221
|
my ($self, $input, $start) = @_; |
32
|
|
|
|
|
|
|
|
33
|
14
|
100
|
|
|
|
63
|
$start =~ s/-/_/g if $start; |
34
|
|
|
|
|
|
|
|
35
|
14
|
|
|
|
|
32
|
$self->{position} = 0; |
36
|
14
|
|
|
|
|
36
|
$self->{farthest} = 0; |
37
|
|
|
|
|
|
|
|
38
|
14
|
50
|
|
|
|
102
|
$self->{input} = (not ref $input) |
39
|
|
|
|
|
|
|
? Pegex::Input->new(string => $input) |
40
|
|
|
|
|
|
|
: $input; |
41
|
|
|
|
|
|
|
|
42
|
14
|
50
|
|
|
|
809
|
$self->{input}->open |
43
|
|
|
|
|
|
|
unless $self->{input}{_is_open}; |
44
|
14
|
|
|
|
|
59
|
$self->{buffer} = $self->{input}->read; |
45
|
|
|
|
|
|
|
|
46
|
14
|
50
|
|
|
|
57
|
die "No 'grammar'. Can't parse" |
47
|
|
|
|
|
|
|
unless $self->{grammar}; |
48
|
|
|
|
|
|
|
|
49
|
14
|
|
66
|
|
|
113
|
$self->{grammar}{tree} ||= $self->{grammar}->make_tree; |
50
|
|
|
|
|
|
|
|
51
|
14
|
50
|
33
|
|
|
2633
|
my $start_rule_ref = $start || |
52
|
|
|
|
|
|
|
$self->{grammar}{tree}{'+toprule'} || |
53
|
|
|
|
|
|
|
$self->{grammar}{tree}{'TOP'} & 'TOP' or |
54
|
|
|
|
|
|
|
die "No starting rule for Pegex::Parser::parse"; |
55
|
|
|
|
|
|
|
|
56
|
14
|
50
|
|
|
|
78
|
die "No 'receiver'. Can't parse" |
57
|
|
|
|
|
|
|
unless $self->{receiver}; |
58
|
|
|
|
|
|
|
|
59
|
14
|
|
|
|
|
128
|
my $optimizer = Pegex::Optimizer->new( |
60
|
|
|
|
|
|
|
parser => $self, |
61
|
|
|
|
|
|
|
grammar => $self->{grammar}, |
62
|
|
|
|
|
|
|
receiver => $self->{receiver}, |
63
|
|
|
|
|
|
|
); |
64
|
|
|
|
|
|
|
|
65
|
14
|
|
|
|
|
858
|
$optimizer->optimize_grammar($start_rule_ref); |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
# Add circular ref and weaken it. |
68
|
14
|
|
|
|
|
47
|
$self->{receiver}{parser} = $self; |
69
|
14
|
|
|
|
|
93
|
Scalar::Util::weaken($self->{receiver}{parser}); |
70
|
|
|
|
|
|
|
|
71
|
14
|
50
|
|
|
|
106
|
if ($self->{receiver}->can("initial")) { |
72
|
0
|
|
|
|
|
0
|
$self->{rule} = $start_rule_ref; |
73
|
0
|
|
|
|
|
0
|
$self->{parent} = {}; |
74
|
0
|
|
|
|
|
0
|
$self->{receiver}->initial(); |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
14
|
50
|
|
|
|
55
|
my $match = $self->debug ? do { |
78
|
0
|
|
|
|
|
0
|
my $method = $optimizer->make_trace_wrapper(\&match_ref); |
79
|
0
|
|
|
|
|
0
|
$self->$method($start_rule_ref, {'+asr' => 0}); |
80
|
|
|
|
|
|
|
} : $self->match_ref($start_rule_ref, {}); |
81
|
|
|
|
|
|
|
|
82
|
14
|
|
|
|
|
85
|
$self->{input}->close; |
83
|
|
|
|
|
|
|
|
84
|
14
|
50
|
33
|
|
|
54
|
if (not $match or $self->{position} < length ${$self->{buffer}}) { |
|
14
|
|
|
|
|
65
|
|
85
|
0
|
|
|
|
|
0
|
$self->throw_error("Parse document failed for some reason"); |
86
|
0
|
|
|
|
|
0
|
return; # In case $self->throw_on_error is off |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
|
89
|
14
|
50
|
|
|
|
104
|
if ($self->{receiver}->can("final")) { |
90
|
14
|
|
|
|
|
29
|
$self->{rule} = $start_rule_ref; |
91
|
14
|
|
|
|
|
26
|
$self->{parent} = {}; |
92
|
14
|
|
|
|
|
75
|
$match = [ $self->{receiver}->final(@$match) ]; |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
14
|
|
|
|
|
1080
|
$match->[0]; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
sub match_next { |
99
|
1503
|
|
|
1503
|
0
|
1890
|
my ($self, $next) = @_; |
100
|
|
|
|
|
|
|
|
101
|
1503
|
|
|
|
|
4072
|
my ($rule, $method, $kind, $min, $max, $assertion) = |
102
|
1503
|
|
|
|
|
1886
|
@{$next}{'rule', 'method', 'kind', '+min', '+max', '+asr'}; |
103
|
|
|
|
|
|
|
|
104
|
1503
|
|
|
|
|
3304
|
my ($position, $match, $count) = |
105
|
|
|
|
|
|
|
($self->{position}, [], 0); |
106
|
|
|
|
|
|
|
|
107
|
1503
|
|
|
|
|
2826
|
while (my $return = $method->($self, $rule, $next)) { |
108
|
502
|
50
|
|
|
|
1372
|
$position = $self->{position} unless $assertion; |
109
|
502
|
|
|
|
|
525
|
$count++; |
110
|
502
|
|
|
|
|
948
|
push @$match, @$return; |
111
|
502
|
100
|
|
|
|
2801
|
last if $max == 1; |
112
|
|
|
|
|
|
|
} |
113
|
1503
|
100
|
100
|
|
|
5913
|
if (not $count and $min == 0 and $kind eq 'all') { |
|
|
|
100
|
|
|
|
|
114
|
4
|
|
|
|
|
13
|
$match = [[]]; |
115
|
|
|
|
|
|
|
} |
116
|
1503
|
100
|
|
|
|
2889
|
if ($max != 1) { |
117
|
40
|
100
|
|
|
|
134
|
if ($next->{-flat}) { |
118
|
4
|
50
|
|
|
|
9
|
$match = [ map { (ref($_) eq 'ARRAY') ? (@$_) : ($_) } @$match ]; |
|
4
|
|
|
|
|
18
|
|
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
else { |
121
|
36
|
|
|
|
|
123
|
$match = [$match] |
122
|
|
|
|
|
|
|
} |
123
|
40
|
50
|
|
|
|
125
|
$self->{farthest} = $position |
124
|
|
|
|
|
|
|
if ($self->{position} = $position) > $self->{farthest}; |
125
|
|
|
|
|
|
|
} |
126
|
1503
|
|
66
|
|
|
4665
|
my $result = ($count >= $min and (not $max or $count <= $max)) |
127
|
|
|
|
|
|
|
^ ($assertion == -1); |
128
|
1503
|
100
|
100
|
|
|
4385
|
if (not($result) or $assertion) { |
129
|
978
|
50
|
|
|
|
2645
|
$self->{farthest} = $position |
130
|
|
|
|
|
|
|
if ($self->{position} = $position) > $self->{farthest}; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
1503
|
100
|
|
|
|
6908
|
($result ? $next->{'-skip'} ? [] : $match : 0); |
|
|
100
|
|
|
|
|
|
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub match_rule { |
137
|
0
|
|
|
0
|
0
|
0
|
my ($self, $position, $match) = (@_, []); |
138
|
0
|
|
|
|
|
0
|
$self->{position} = $position; |
139
|
0
|
0
|
|
|
|
0
|
$self->{farthest} = $position |
140
|
|
|
|
|
|
|
if $position > $self->{farthest}; |
141
|
0
|
0
|
|
|
|
0
|
$match = [ $match ] if @$match > 1; |
142
|
0
|
|
|
|
|
0
|
my ($ref, $parent) = @{$self}{'rule', 'parent'}; |
|
0
|
|
|
|
|
0
|
|
143
|
0
|
0
|
|
|
|
0
|
my $rule = $self->{grammar}{tree}{$ref} |
144
|
|
|
|
|
|
|
or die "No rule defined for '$ref'"; |
145
|
|
|
|
|
|
|
|
146
|
0
|
|
|
|
|
0
|
[ $rule->{action}->($self->{receiver}, @$match) ]; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
sub match_ref { |
150
|
754
|
|
|
754
|
0
|
1146
|
my ($self, $ref, $parent) = @_; |
151
|
754
|
50
|
|
|
|
2940
|
my $rule = $self->{grammar}{tree}{$ref} |
152
|
|
|
|
|
|
|
or die "No rule defined for '$ref'"; |
153
|
754
|
100
|
|
|
|
1530
|
my $match = $self->match_next($rule) or return; |
154
|
252
|
50
|
|
|
|
564
|
return $Pegex::Constant::Dummy unless $rule->{action}; |
155
|
252
|
|
|
|
|
329
|
@{$self}{'rule', 'parent'} = ($ref, $parent); |
|
252
|
|
|
|
|
825
|
|
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
# XXX Possible API mismatch. |
158
|
|
|
|
|
|
|
# Not sure if we should "splat" the $match. |
159
|
252
|
|
|
|
|
967
|
[ $rule->{action}->($self->{receiver}, @$match) ]; |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
sub match_rgx { |
163
|
508
|
|
|
508
|
0
|
657
|
my ($self, $regexp) = @_; |
164
|
508
|
|
|
|
|
717
|
my $buffer = $self->{buffer}; |
165
|
|
|
|
|
|
|
|
166
|
508
|
|
|
|
|
1343
|
pos($$buffer) = $self->{position}; |
167
|
508
|
100
|
|
|
|
5928
|
$$buffer =~ /$regexp/g or return; |
168
|
|
|
|
|
|
|
|
169
|
123
|
|
|
|
|
208
|
$self->{position} = pos($$buffer); |
170
|
|
|
|
|
|
|
|
171
|
123
|
50
|
|
|
|
315
|
$self->{farthest} = $self->{position} |
172
|
|
|
|
|
|
|
if $self->{position} > $self->{farthest}; |
173
|
|
|
|
|
|
|
|
174
|
1
|
|
|
1
|
|
14
|
no strict 'refs'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
1044
|
|
175
|
123
|
|
|
|
|
560
|
my $captures = [ map $$_, 1..$#+ ]; |
176
|
123
|
50
|
|
|
|
320
|
$captures = [ $captures ] if $#+ > 1; |
177
|
|
|
|
|
|
|
|
178
|
123
|
|
|
|
|
372
|
return $captures; |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub match_all { |
182
|
164
|
|
|
164
|
0
|
212
|
my ($self, $list) = @_; |
183
|
164
|
|
|
|
|
220
|
my $position = $self->{position}; |
184
|
164
|
|
|
|
|
247
|
my $set = []; |
185
|
164
|
|
|
|
|
186
|
my $len = 0; |
186
|
164
|
|
|
|
|
274
|
for my $elem (@$list) { |
187
|
316
|
100
|
|
|
|
656
|
if (my $match = $self->match_next($elem)) { |
188
|
214
|
100
|
100
|
|
|
941
|
if (not ($elem->{'+asr'} or $elem->{'-skip'})) { |
189
|
191
|
|
|
|
|
343
|
push @$set, @$match; |
190
|
191
|
|
|
|
|
460
|
$len++; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
else { |
194
|
102
|
50
|
|
|
|
240
|
$self->{farthest} = $position |
195
|
|
|
|
|
|
|
if ($self->{position} = $position) > $self->{farthest}; |
196
|
102
|
|
|
|
|
323
|
return; |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
} |
199
|
62
|
50
|
|
|
|
201
|
$set = [ $set ] if $len > 1; |
200
|
62
|
|
|
|
|
174
|
return $set; |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
sub match_any { |
204
|
129
|
|
|
129
|
0
|
177
|
my ($self, $list) = @_; |
205
|
129
|
|
|
|
|
377
|
for my $elem (@$list) { |
206
|
433
|
100
|
|
|
|
933
|
if (my $match = $self->match_next($elem)) { |
207
|
79
|
|
|
|
|
257
|
return $match; |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
} |
210
|
50
|
|
|
|
|
161
|
return; |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
sub match_err { |
214
|
0
|
|
|
0
|
0
|
|
my ($self, $error) = @_; |
215
|
0
|
|
|
|
|
|
$self->throw_error($error); |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
sub trace { |
219
|
0
|
|
|
0
|
0
|
|
my ($self, $action) = @_; |
220
|
0
|
0
|
|
|
|
|
my $indent = ($action =~ /^try_/) ? 1 : 0; |
221
|
0
|
|
0
|
|
|
|
$self->{indent} ||= 0; |
222
|
0
|
0
|
|
|
|
|
$self->{indent}-- unless $indent; |
223
|
0
|
|
|
|
|
|
print STDERR ' ' x $self->{indent}; |
224
|
0
|
0
|
|
|
|
|
$self->{indent}++ if $indent; |
225
|
0
|
|
|
|
|
|
my $snippet = substr(${$self->{buffer}}, $self->{position}); |
|
0
|
|
|
|
|
|
|
226
|
0
|
0
|
|
|
|
|
$snippet = substr($snippet, 0, 30) . "..." |
227
|
|
|
|
|
|
|
if length $snippet > 30; |
228
|
0
|
|
|
|
|
|
$snippet =~ s/\n/\\n/g; |
229
|
0
|
0
|
|
|
|
|
print STDERR sprintf("%-30s", $action) . |
230
|
|
|
|
|
|
|
($indent ? " >$snippet<\n" : "\n"); |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
sub throw_error { |
234
|
0
|
|
|
0
|
0
|
|
my ($self, $msg) = @_; |
235
|
0
|
|
|
|
|
|
$@ = $self->{error} = $self->format_error($msg); |
236
|
0
|
0
|
|
|
|
|
return undef unless $self->{throw_on_error}; |
237
|
0
|
|
|
|
|
|
require Carp; |
238
|
0
|
|
|
|
|
|
Carp::croak($self->{error}); |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
sub format_error { |
242
|
0
|
|
|
0
|
0
|
|
my ($self, $msg) = @_; |
243
|
0
|
|
|
|
|
|
my $buffer = $self->{buffer}; |
244
|
0
|
|
|
|
|
|
my $position = $self->{farthest}; |
245
|
0
|
|
|
|
|
|
my $real_pos = $self->{position}; |
246
|
|
|
|
|
|
|
|
247
|
0
|
|
|
|
|
|
my $line = @{[substr($$buffer, 0, $position) =~ /(\n)/g]} + 1; |
|
0
|
|
|
|
|
|
|
248
|
0
|
|
|
|
|
|
my $column = $position - rindex($$buffer, "\n", $position); |
249
|
|
|
|
|
|
|
|
250
|
0
|
0
|
|
|
|
|
my $pretext = substr( |
|
|
0
|
|
|
|
|
|
251
|
|
|
|
|
|
|
$$buffer, |
252
|
|
|
|
|
|
|
$position < 50 ? 0 : $position - 50, |
253
|
|
|
|
|
|
|
$position < 50 ? $position : 50 |
254
|
|
|
|
|
|
|
); |
255
|
0
|
|
|
|
|
|
my $context = substr($$buffer, $position, 50); |
256
|
0
|
|
|
|
|
|
$pretext =~ s/.*\n//gs; |
257
|
0
|
|
|
|
|
|
$context =~ s/\n/\\n/g; |
258
|
|
|
|
|
|
|
|
259
|
0
|
|
|
|
|
|
return <<"..."; |
260
|
|
|
|
|
|
|
Error parsing Pegex document: |
261
|
0
|
|
|
|
|
|
msg: $msg |
262
|
|
|
|
|
|
|
line: $line |
263
|
|
|
|
|
|
|
column: $column |
264
|
|
|
|
|
|
|
context: $pretext$context |
265
|
|
|
|
|
|
|
${\ (' ' x (length($pretext) + 10) . '^')} |
266
|
|
|
|
|
|
|
position: $position ($real_pos pre-lookahead) |
267
|
|
|
|
|
|
|
... |
268
|
|
|
|
|
|
|
} |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
# TODO Move this to a Parser helper role/subclass |
271
|
|
|
|
|
|
|
sub line_column { |
272
|
0
|
|
|
0
|
0
|
|
my ($self, $position) = @_; |
273
|
0
|
|
0
|
|
|
|
$position ||= $self->{position}; |
274
|
0
|
|
|
|
|
|
my $buffer = $self->{buffer}; |
275
|
0
|
|
|
|
|
|
my $line = @{[substr($$buffer, 0, $position) =~ /(\n)/g]} + 1; |
|
0
|
|
|
|
|
|
|
276
|
0
|
|
|
|
|
|
my $column = $position - rindex($$buffer, "\n", $position); |
277
|
0
|
|
|
|
|
|
return [$line, $position]; |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
# XXX Need to figure out what uses this. (sample.t) |
281
|
|
|
|
|
|
|
{ |
282
|
|
|
|
|
|
|
package Pegex::Constant; |
283
|
|
|
|
|
|
|
our $Null = []; |
284
|
|
|
|
|
|
|
our $Dummy = []; |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
1; |