line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Parse::Earley; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
#Parse::Earley |
4
|
|
|
|
|
|
|
#By Luke Palmer |
5
|
|
|
|
|
|
|
#Copyright (C) 2002, Luke Palmer. All rights reserved. |
6
|
|
|
|
|
|
|
#This module is free software. It may used, redistributed, and/or modified |
7
|
|
|
|
|
|
|
#under the terms of the Perl Artistic Licence: |
8
|
|
|
|
|
|
|
# http://www.perl.com/perl/misc/Artistic.html |
9
|
|
|
|
|
|
|
|
10
|
1
|
|
|
|
|
111
|
use Text::Balanced qw( extract_quotelike extract_codeblock |
11
|
1
|
|
|
1
|
|
17579
|
extract_bracketed extract_multiple ); |
|
1
|
|
|
|
|
22902
|
|
12
|
1
|
|
|
1
|
|
7
|
use Carp; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
49
|
|
13
|
|
|
|
|
|
|
|
14
|
1
|
|
|
1
|
|
4
|
use strict; |
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
3253
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our $VERSION = '0.15'; |
17
|
|
|
|
|
|
|
our $DEBUG; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
sub new($) |
20
|
|
|
|
|
|
|
{ |
21
|
6
|
|
|
6
|
0
|
664
|
my $self = bless { |
22
|
|
|
|
|
|
|
rules => { }, |
23
|
|
|
|
|
|
|
sets => { }, # Sparse array by pos() |
24
|
|
|
|
|
|
|
set => [ ], # Not an index, rather, something to be pushed |
25
|
|
|
|
|
|
|
ncset => [ ], |
26
|
|
|
|
|
|
|
skip => qr/\s*/, |
27
|
|
|
|
|
|
|
no_code => undef, |
28
|
|
|
|
|
|
|
} => shift |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub pushset(\$$) |
32
|
|
|
|
|
|
|
{ |
33
|
|
|
|
|
|
|
# rule lhs dot pos ref |
34
|
180
|
|
|
180
|
0
|
232
|
my ($self, $set) = @_; |
35
|
180
|
|
|
|
|
206
|
my $change = 0; |
36
|
180
|
|
|
|
|
291
|
for my $state (@$set) { |
37
|
181
|
|
|
|
|
329
|
my $s = $self->{sets}{$state->{pos}}; |
38
|
|
|
|
|
|
|
# unless (grep { $_->{rule} == $state->{rule} && |
39
|
|
|
|
|
|
|
# $_->{lhs} eq $state->{lhs} && |
40
|
|
|
|
|
|
|
# $_->{dot} == $state->{dot} && |
41
|
|
|
|
|
|
|
# $_->{pos} == $state->{pos} && |
42
|
|
|
|
|
|
|
# $_->{ref} == $state->{ref} } |
43
|
181
|
100
|
|
|
|
184
|
unless (grep { $_ == $state } |
|
278
|
|
|
|
|
738
|
|
|
181
|
|
|
|
|
396
|
|
44
|
|
|
|
|
|
|
@{$self->{sets}{$state->{pos}}} ) { |
45
|
150
|
|
|
|
|
155
|
push @{$self->{sets}{$state->{pos}}}, $state; |
|
150
|
|
|
|
|
318
|
|
46
|
150
|
|
|
|
|
438
|
$change++; |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
} |
49
|
180
|
|
|
|
|
489
|
return $change; |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# XXX: This is a I simple processor: make it better |
53
|
|
|
|
|
|
|
sub grammar(\$$) |
54
|
|
|
|
|
|
|
{ |
55
|
6
|
|
|
6
|
0
|
828
|
my ($self, $g) = @_; |
56
|
6
|
|
|
|
|
11
|
local $_ = $g; # XXX: Pseudo hack. I don't know why I can't |
57
|
|
|
|
|
|
|
# extract_multiple($g, ...) |
58
|
|
|
|
|
|
|
my @toks = extract_multiple($_, [ |
59
|
|
|
|
|
|
|
qr/[a-zA-Z_]\w*\s*:/, # LHS |
60
|
|
|
|
|
|
|
qr/[a-zA-Z_]\w*/, # Nonterminal |
61
|
134
|
|
|
134
|
|
21704
|
sub { scalar extract_quotelike($_); }, |
62
|
122
|
|
|
122
|
|
5771
|
sub { scalar extract_bracketed($_, '<>') }, |
63
|
121
|
|
|
121
|
|
7417
|
sub { scalar extract_codeblock($_, '{}') }, |
64
|
6
|
|
|
|
|
101
|
qr/\|/, |
65
|
|
|
|
|
|
|
qr/#.*/m, |
66
|
|
|
|
|
|
|
]); |
67
|
6
|
|
|
|
|
731
|
my @rulebuf; |
68
|
|
|
|
|
|
|
my $curule; |
69
|
0
|
|
|
|
|
0
|
my $errors; |
70
|
0
|
|
|
|
|
0
|
my $noskip_f; |
71
|
0
|
|
|
|
|
0
|
my $lineno; |
72
|
|
|
|
|
|
|
|
73
|
6
|
|
|
|
|
13
|
for (@toks) |
74
|
|
|
|
|
|
|
{ |
75
|
125
|
|
|
|
|
396
|
$lineno++ for (/\n/g); |
76
|
|
|
|
|
|
|
|
77
|
125
|
|
|
|
|
263
|
my %cp = (line => $lineno); |
78
|
125
|
100
|
|
|
|
251
|
if ($noskip_f) { |
79
|
1
|
|
|
|
|
2
|
$noskip_f = 0; |
80
|
1
|
|
|
|
|
5
|
%cp = (%cp, noskip => 1); |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
|
83
|
125
|
|
|
|
|
261
|
s/^\s+//; |
84
|
125
|
|
|
|
|
177
|
s/\s+$//; |
85
|
|
|
|
|
|
|
|
86
|
125
|
100
|
100
|
|
|
1050
|
if (/^(\w+)\s*:$/) { # LHS |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
87
|
18
|
|
|
|
|
21
|
push @{$self->{rules}{$curule}}, [ @rulebuf ]; |
|
18
|
|
|
|
|
89
|
|
88
|
18
|
|
|
|
|
31
|
undef @rulebuf; |
89
|
18
|
|
|
|
|
80
|
$curule = $1; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
elsif (/^\|$/) { |
92
|
6
|
|
|
|
|
13
|
push @{$self->{rules}{$curule}}, [ @rulebuf ]; |
|
6
|
|
|
|
|
22
|
|
93
|
6
|
|
|
|
|
17
|
undef @rulebuf; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
elsif (/^(\w+)$/) { # Nonterminal |
96
|
24
|
|
|
|
|
173
|
push @rulebuf, { %cp, |
97
|
|
|
|
|
|
|
match => $1, |
98
|
|
|
|
|
|
|
type => 'nonterminal' }; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
elsif (/^['"]/) { # Terminal |
101
|
7
|
|
|
|
|
514
|
push @rulebuf, { %cp, |
102
|
|
|
|
|
|
|
match => eval "$_", |
103
|
|
|
|
|
|
|
type => 'literal' }; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
elsif (/^q/) { |
106
|
0
|
|
|
|
|
0
|
push @rulebuf, { %cp, |
107
|
|
|
|
|
|
|
match => eval "$_", |
108
|
|
|
|
|
|
|
type => 'literal' }; |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
elsif (/^\/(.*)\/$/ || /^m.(.*).$/) { |
111
|
5
|
|
|
|
|
58
|
push @rulebuf, { %cp, |
112
|
|
|
|
|
|
|
match => qr/$1/, |
113
|
|
|
|
|
|
|
type => 'regex' }; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
elsif (/^<\s*(.*)\s*>$/) { |
116
|
1
|
|
|
|
|
3
|
my $dir = $1; |
117
|
1
|
50
|
|
|
|
5
|
if ($dir eq 'noskip') { |
118
|
1
|
|
|
|
|
3
|
$noskip_f = 1; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
else { |
121
|
0
|
|
|
|
|
0
|
$errors .= "Unrecognized directive: <$dir> at line $lineno\n"; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
elsif (/^{/) { |
125
|
0
|
0
|
|
|
|
0
|
if ($self->{no_code}) { |
|
|
0
|
|
|
|
|
|
126
|
0
|
|
|
|
|
0
|
$errors .= "Code not allowed\n"; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
elsif (@rulebuf) { |
129
|
0
|
|
|
|
|
0
|
$rulebuf[$#rulebuf]{code} = eval "sub $_"; |
130
|
0
|
0
|
|
|
|
0
|
$errors .= "$@\n" if $@; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
else { |
133
|
0
|
|
|
|
|
0
|
$errors .= "Condition does not follow anything at line $lineno\n"; |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
elsif (/^#/ || /^\s*$/) { |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
else { |
139
|
0
|
|
|
|
|
0
|
$errors .= "Unrecognized pattern '$_' at line $lineno\n"; |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
} |
142
|
6
|
|
|
|
|
11
|
push @{$self->{rules}{$curule}}, [ @rulebuf ]; |
|
6
|
|
|
|
|
20
|
|
143
|
6
|
50
|
|
|
|
14
|
if ($errors) { |
144
|
0
|
|
|
|
|
0
|
croak $errors; |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
else { |
147
|
6
|
|
|
|
|
39
|
return 1; |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
sub start(\$$) |
152
|
|
|
|
|
|
|
{ |
153
|
7
|
|
|
7
|
0
|
36
|
my ($self, $rule) = @_; |
154
|
7
|
|
|
|
|
8
|
my @newset; |
155
|
7
|
|
|
|
|
17
|
$self->{sets} = { }; |
156
|
7
|
|
|
|
|
26
|
$self->{set} = [ ]; |
157
|
7
|
|
|
|
|
34
|
$self->{ncset} = [ ]; |
158
|
7
|
|
|
|
|
14
|
for (@{$self->{rules}{$rule}}) { |
|
7
|
|
|
|
|
53
|
|
159
|
7
|
|
|
|
|
40
|
push @newset, { rule => $_, |
160
|
|
|
|
|
|
|
lhs => $rule, |
161
|
|
|
|
|
|
|
dot => 0, |
162
|
|
|
|
|
|
|
pos => 0, |
163
|
|
|
|
|
|
|
ref => 0 }; |
164
|
|
|
|
|
|
|
} |
165
|
7
|
|
|
|
|
12
|
push @{$self->{ncset}}, @newset; |
|
7
|
|
|
|
|
16
|
|
166
|
7
|
|
|
|
|
31
|
$self->pushset(\@newset); |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
sub advance(\$$) |
170
|
|
|
|
|
|
|
{ |
171
|
52
|
|
|
52
|
0
|
221
|
my ($self, $str) = @_; |
172
|
|
|
|
|
|
|
|
173
|
52
|
|
|
|
|
114
|
$self->pushset($self->{ncset}); |
174
|
52
|
|
|
|
|
95
|
$self->{set} = $self->{ncset}; |
175
|
52
|
|
|
|
|
108
|
$self->{ncset} = [ ]; |
176
|
|
|
|
|
|
|
|
177
|
52
|
|
|
|
|
58
|
for (@{$self->{set}}) { |
|
52
|
|
|
|
|
109
|
|
178
|
|
|
|
|
|
|
#This is the main huffmanized switch. |
179
|
|
|
|
|
|
|
#The heart of the algorithm is here. |
180
|
|
|
|
|
|
|
|
181
|
174
|
|
|
|
|
474
|
my $p = $_->{rule}[$_->{dot}]; |
182
|
174
|
100
|
|
|
|
290
|
if ($p) { |
183
|
96
|
100
|
|
|
|
180
|
if ($p->{type} eq 'nonterminal') { |
184
|
43
|
|
|
|
|
156
|
$self->predict($_); |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
else { |
187
|
53
|
|
|
|
|
115
|
$self->scan($_, $str); |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
else { |
191
|
78
|
|
|
|
|
148
|
$self->complete($_); |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
52
|
50
|
|
|
|
226
|
if ($DEBUG) { |
196
|
0
|
|
|
|
|
0
|
my $displen = 7; |
197
|
0
|
|
|
|
|
0
|
my $sp = ' ' x 4; |
198
|
0
|
|
|
|
|
0
|
for (qw(set ncset)) { |
199
|
0
|
0
|
|
|
|
0
|
print /nc/ ? 'advance(): NEXT SET' : 'advance(): CURRENT SET'; |
200
|
0
|
|
|
|
|
0
|
print "\n"; |
201
|
0
|
|
|
|
|
0
|
for my $state (@{$self->{$_}}) { |
|
0
|
|
|
|
|
0
|
|
202
|
0
|
|
|
|
|
0
|
print $sp; |
203
|
0
|
|
|
|
|
0
|
my (@p); |
204
|
0
|
0
|
|
|
|
0
|
if ($state->{pos} < $displen) { |
205
|
0
|
|
|
|
|
0
|
$p[0] = substr($str, 0, $state->{pos}); |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
else { |
208
|
0
|
|
|
|
|
0
|
$p[0] = substr($str, $state->{pos}-$displen, $displen); |
209
|
|
|
|
|
|
|
} |
210
|
0
|
|
|
|
|
0
|
$p[1] = substr($str, $state->{pos}, 5); |
211
|
0
|
0
|
0
|
|
|
0
|
s/(.)/ord($1)<32 || ord($1)>127 ? '.' : $1/seg for @p; |
|
0
|
|
|
|
|
0
|
|
212
|
0
|
|
|
|
|
0
|
printf "\%${displen}s * \%-${displen}s\%s", $p[0], $p[1], $sp; |
213
|
0
|
|
|
|
|
0
|
print "($state->{pos}) $state->{lhs}: "; |
214
|
0
|
|
0
|
|
|
0
|
for (my $i= 0; $i < @{$state->{rule}} || $i <= $state->{dot}; $i++){ |
|
0
|
|
|
|
|
0
|
|
215
|
0
|
0
|
|
|
|
0
|
if ($state->{dot} == $i) { |
216
|
0
|
|
|
|
|
0
|
print "* "; |
217
|
|
|
|
|
|
|
} |
218
|
0
|
0
|
|
|
|
0
|
if (exists $state->{rule}[$i]) { |
219
|
0
|
|
|
|
|
0
|
my $t = $state->{rule}[$i]{type}; |
220
|
0
|
|
|
|
|
0
|
my $p = $state->{rule}[$i]{match}; |
221
|
0
|
0
|
|
|
|
0
|
if ($t eq 'literal') { |
|
|
0
|
|
|
|
|
|
222
|
0
|
|
|
|
|
0
|
print "'$p' " |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
elsif ($t eq 'regex') { |
225
|
0
|
|
|
|
|
0
|
$p =~ s/^.*?://; # Get rid of the qr// stuff |
226
|
0
|
|
|
|
|
0
|
$p =~ s/\)$//; |
227
|
0
|
|
|
|
|
0
|
print "/$p/ " |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
else { |
231
|
0
|
|
|
|
|
0
|
print "$p "; |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
} |
235
|
0
|
|
|
|
|
0
|
print "($state->{ref})\n"; |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
#This function checks for matching the entire input. Sub matches |
244
|
|
|
|
|
|
|
#are seldom of use, and so they are discarded (as they make the |
245
|
|
|
|
|
|
|
#parse graph needlessly huge). |
246
|
|
|
|
|
|
|
sub matches(\$$$) |
247
|
|
|
|
|
|
|
{ |
248
|
7
|
|
|
7
|
0
|
14
|
my ($self, $str, $rule) = @_; |
249
|
7
|
|
|
|
|
71
|
$str =~ s/$self->{skip}$//; |
250
|
7
|
100
|
|
|
|
43
|
my $cset = $self->{sets}{length($str)} or return; |
251
|
6
|
|
|
|
|
10
|
return grep { $_->{lhs} eq $rule && |
|
6
|
|
|
|
|
52
|
|
252
|
26
|
100
|
66
|
|
|
117
|
$_->{dot} == @{$_->{rule}} && |
253
|
|
|
|
|
|
|
$_->{ref} == 0 } |
254
|
|
|
|
|
|
|
@$cset; |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
sub matches_all(\$$$) |
258
|
|
|
|
|
|
|
{ |
259
|
6
|
|
|
6
|
0
|
42
|
my ($self, $str, $rule) = @_; |
260
|
6
|
|
|
|
|
12
|
my $cset = $self->{ncset}; |
261
|
6
|
50
|
|
|
|
15
|
unless (@$cset) { |
262
|
6
|
|
|
|
|
21
|
return $self->matches($str, $rule); |
263
|
|
|
|
|
|
|
} |
264
|
0
|
|
|
|
|
0
|
return; |
265
|
|
|
|
|
|
|
} |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
sub fails(\$$$) |
268
|
|
|
|
|
|
|
{ |
269
|
1
|
|
|
1
|
0
|
7
|
my ($self, $str, $rule) = @_; |
270
|
1
|
50
|
|
|
|
4
|
if ($self->matches($str, $rule)) { |
271
|
0
|
|
|
|
|
0
|
return 0; |
272
|
|
|
|
|
|
|
} |
273
|
|
|
|
|
|
|
else { |
274
|
1
|
50
|
|
|
|
2
|
return @{$self->{set}} ? 0 : 1; |
|
1
|
|
|
|
|
7
|
|
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
sub predict(\$$) |
279
|
|
|
|
|
|
|
{ |
280
|
43
|
|
|
43
|
0
|
59
|
my ($self, $state) = @_; |
281
|
43
|
|
|
|
|
64
|
my $cset = $self->{set}; |
282
|
43
|
|
|
|
|
75
|
my $p = $state->{rule}[$state->{dot}]; |
283
|
|
|
|
|
|
|
|
284
|
43
|
50
|
|
|
|
105
|
unless ($self->{rules}{$p->{match}}) { |
285
|
0
|
|
|
|
|
0
|
croak "No definition for nonterminal '$p->{match}'\n"; |
286
|
|
|
|
|
|
|
} |
287
|
43
|
|
|
|
|
51
|
my @newset = @{$self->{rules}{$p->{match}}}; |
|
43
|
|
|
|
|
114
|
|
288
|
62
|
|
|
|
|
69
|
@newset = map { |
289
|
43
|
|
|
|
|
70
|
my $m = $_; |
290
|
62
|
100
|
66
|
|
|
99
|
unless (grep { $_->{rule} == $m && |
|
126
|
100
|
100
|
|
|
629
|
|
|
|
|
66
|
|
|
|
|
291
|
|
|
|
|
|
|
$_->{lhs} eq $p->{match} && |
292
|
|
|
|
|
|
|
$_->{dot} == 0 && |
293
|
|
|
|
|
|
|
$_->{pos} == $state->{pos} && |
294
|
|
|
|
|
|
|
$_->{ref} == $state->{pos} } @$cset) { |
295
|
43
|
|
|
|
|
244
|
{ rule => $m, |
296
|
|
|
|
|
|
|
lhs => $p->{match}, |
297
|
|
|
|
|
|
|
dot => 0, |
298
|
|
|
|
|
|
|
pos => $state->{pos}, |
299
|
|
|
|
|
|
|
ref => $state->{pos} } |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
else { |
302
|
|
|
|
|
|
|
() |
303
|
19
|
|
|
|
|
39
|
} |
304
|
|
|
|
|
|
|
} @newset; |
305
|
43
|
|
|
|
|
60
|
push @$cset, @newset; |
306
|
43
|
|
|
|
|
95
|
$self->pushset(\@newset); |
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
sub scan(\$$$) |
311
|
|
|
|
|
|
|
{ |
312
|
53
|
|
|
53
|
0
|
74
|
my ($self, $state, $str) = @_; |
313
|
53
|
|
|
|
|
114
|
my $cset = $self->{set}; |
314
|
53
|
|
|
|
|
73
|
my $skipos = $state->{pos}; |
315
|
53
|
|
|
|
|
89
|
my $p = $state->{rule}[$state->{dot}]; |
316
|
|
|
|
|
|
|
|
317
|
53
|
100
|
|
|
|
129
|
unless ($state->{rule}[$state->{dot}]{noskip}) { |
318
|
47
|
|
|
|
|
108
|
pos $str = $skipos; |
319
|
47
|
|
|
|
|
226
|
$str =~ /\G$self->{skip}/g; # Terminal Seperator! |
320
|
47
|
|
|
|
|
98
|
$skipos = pos $str; |
321
|
|
|
|
|
|
|
} |
322
|
|
|
|
|
|
|
|
323
|
53
|
100
|
|
|
|
139
|
if ($p->{type} eq 'literal') { |
|
|
50
|
|
|
|
|
|
324
|
29
|
|
|
|
|
61
|
my $tok = substr($str, $skipos, length $p->{match}); |
325
|
29
|
|
|
|
|
37
|
my $res = 1; |
326
|
29
|
50
|
|
|
|
61
|
if ($p->{code}) { |
327
|
0
|
|
|
|
|
0
|
local $_ = $tok; |
328
|
0
|
|
|
|
|
0
|
$res = eval { $p->{code}() }; |
|
0
|
|
|
|
|
0
|
|
329
|
0
|
0
|
|
|
|
0
|
croak "$@ near line $p->{line} of grammar\n" if $@; |
330
|
|
|
|
|
|
|
} |
331
|
29
|
100
|
66
|
|
|
142
|
if ($res and $tok eq $p->{match}){ |
332
|
19
|
|
|
|
|
129
|
my $push = { |
333
|
|
|
|
|
|
|
rule => $state->{rule}, |
334
|
|
|
|
|
|
|
lhs => $state->{lhs}, |
335
|
|
|
|
|
|
|
dot => $state->{dot}+1, |
336
|
|
|
|
|
|
|
pos => $skipos+length $p->{match}, |
337
|
|
|
|
|
|
|
ref => $state->{ref}, |
338
|
|
|
|
|
|
|
tok => $tok , |
339
|
|
|
|
|
|
|
left => [ $state ] }; |
340
|
19
|
0
|
0
|
|
|
27
|
unless (grep { |
|
0
|
50
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
|
|
341
|
19
|
|
|
|
|
57
|
$_->{rule} == $push->{rule} && |
342
|
|
|
|
|
|
|
$_->{lhs} eq $push->{lhs} && |
343
|
|
|
|
|
|
|
$_->{dot} == $push->{dot} && |
344
|
|
|
|
|
|
|
$_->{pos} == $push->{pos} && |
345
|
|
|
|
|
|
|
$_->{ref} == $push->{ref} } @{$self->{ncset}} ) { |
346
|
19
|
|
|
|
|
21
|
push @{$self->{ncset}}, $push; |
|
19
|
|
|
|
|
91
|
|
347
|
|
|
|
|
|
|
# push @{$self->{sets}{$push->{pos}}}, $push; |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
elsif ($p->{type} eq 'regex') { |
352
|
24
|
|
|
|
|
38
|
pos $str = $skipos; |
353
|
24
|
50
|
|
|
|
273
|
if ($str =~ /\G($p->{match})/g) { |
354
|
24
|
|
|
|
|
51
|
my $tok = $1; |
355
|
24
|
50
|
|
|
|
53
|
if ($p->{code}) { |
356
|
0
|
|
|
|
|
0
|
local $_ = $tok; |
357
|
0
|
|
|
|
|
0
|
my $res = eval { $p->{code}() }; |
|
0
|
|
|
|
|
0
|
|
358
|
0
|
0
|
|
|
|
0
|
croak "$@ near line $p->{line} of grammar\n" if $@; |
359
|
0
|
0
|
|
|
|
0
|
return unless $res; # Should make this some sort of break |
360
|
|
|
|
|
|
|
} |
361
|
24
|
|
|
|
|
148
|
my $push = { |
362
|
|
|
|
|
|
|
rule => $state->{rule}, |
363
|
|
|
|
|
|
|
lhs => $state->{lhs}, |
364
|
|
|
|
|
|
|
dot => $state->{dot}+1, |
365
|
|
|
|
|
|
|
pos => pos $str, |
366
|
|
|
|
|
|
|
ref => $state->{ref}, |
367
|
|
|
|
|
|
|
tok => $tok, |
368
|
|
|
|
|
|
|
left => [ $state ] }; |
369
|
24
|
0
|
0
|
|
|
35
|
unless (grep { |
|
0
|
50
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
|
|
370
|
24
|
|
|
|
|
68
|
$_->{rule} == $push->{rule} && |
371
|
|
|
|
|
|
|
$_->{lhs} eq $push->{lhs} && |
372
|
|
|
|
|
|
|
$_->{dot} == $push->{dot} && |
373
|
|
|
|
|
|
|
$_->{pos} == $push->{pos} && |
374
|
|
|
|
|
|
|
$_->{ref} == $push->{ref} } @{$self->{ncset}} ) { |
375
|
24
|
|
|
|
|
24
|
push @{$self->{ncset}}, $push; |
|
24
|
|
|
|
|
46
|
|
376
|
24
|
|
|
|
|
27
|
push @{$self->{sets}{$push->{pos}}}, $push; |
|
24
|
|
|
|
|
167
|
|
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
} |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
sub complete(\$$) |
383
|
|
|
|
|
|
|
{ |
384
|
78
|
|
|
78
|
0
|
109
|
my ($self, $state) = @_; |
385
|
78
|
|
|
|
|
117
|
my $cset = $self->{set}; |
386
|
286
|
|
33
|
|
|
1579
|
my @newset = grep { |
387
|
78
|
|
|
|
|
185
|
(exists $_->{rule}[$_->{dot}] && |
388
|
|
|
|
|
|
|
$_->{rule}[$_->{dot}]{match}) eq $state->{lhs} } |
389
|
78
|
|
|
|
|
81
|
@{$self->{sets}{$state->{ref}}}; |
390
|
78
|
|
|
|
|
107
|
my @reval; |
391
|
83
|
|
|
|
|
89
|
@newset = map { |
392
|
78
|
|
|
|
|
125
|
my $m = $_; |
393
|
83
|
|
|
|
|
86
|
my @g; |
394
|
83
|
100
|
66
|
|
|
107
|
unless (@g = grep { $_->{rule} == $m->{rule} && |
|
149
|
100
|
100
|
|
|
775
|
|
|
|
|
66
|
|
|
|
|
395
|
|
|
|
|
|
|
$_->{lhs} eq $m->{lhs} && |
396
|
|
|
|
|
|
|
$_->{dot} == $m->{dot}+1 && |
397
|
|
|
|
|
|
|
$_->{pos} == $state->{pos} && |
398
|
|
|
|
|
|
|
$_->{ref} == $m->{ref} } @$cset) { |
399
|
|
|
|
|
|
|
|
400
|
81
|
|
|
|
|
551
|
my $push = { rule => $m->{rule}, |
401
|
|
|
|
|
|
|
lhs => $m->{lhs}, |
402
|
|
|
|
|
|
|
dot => $m->{dot}+1, |
403
|
|
|
|
|
|
|
pos => $state->{pos}, |
404
|
|
|
|
|
|
|
ref => $m->{ref}, |
405
|
|
|
|
|
|
|
down => [ $state ], |
406
|
|
|
|
|
|
|
left => [ $m ], |
407
|
|
|
|
|
|
|
}; |
408
|
81
|
50
|
|
|
|
218
|
if ($m->{rule}[$m->{dot}]{code}) { |
409
|
0
|
|
|
|
|
0
|
local $_ = $state; |
410
|
0
|
|
|
|
|
0
|
my $res = eval { $m->{rule}[$m->{dot}]{code}() }; |
|
0
|
|
|
|
|
0
|
|
411
|
0
|
0
|
|
|
|
0
|
croak "$@ near line $m->{rule}[$m->{dot}]{line} " |
412
|
|
|
|
|
|
|
."of grammar\n" if $@; |
413
|
0
|
0
|
|
|
|
0
|
$res ? $push : () |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
else { |
416
|
81
|
|
|
|
|
211
|
$push |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
} |
419
|
|
|
|
|
|
|
else { |
420
|
2
|
|
|
|
|
6
|
for (@g) { |
421
|
2
|
50
|
33
|
|
|
3
|
unless (grep { $_ == $state } @{$_->{down}} and |
|
2
|
|
|
|
|
11
|
|
|
2
|
|
|
|
|
5
|
|
|
0
|
|
|
|
|
0
|
|
422
|
0
|
|
|
|
|
0
|
grep { $_ == $m } @{$_->{left}}) { |
423
|
2
|
|
|
|
|
3
|
my $succ = 1; |
424
|
2
|
50
|
|
|
|
8
|
if ($m->{rule}[$m->{dot}]{code}) { |
425
|
0
|
|
|
|
|
0
|
my $left; |
426
|
0
|
|
|
|
|
0
|
local $_ = $state; |
427
|
0
|
|
|
|
|
0
|
$succ = eval { $m->{rule}[$m->{dot}]{code}() }; |
|
0
|
|
|
|
|
0
|
|
428
|
0
|
0
|
|
|
|
0
|
croak "$@ near line $m->{rule}[$m->{dot}]{line} " |
429
|
|
|
|
|
|
|
."of grammar\n" if $@; |
430
|
|
|
|
|
|
|
} |
431
|
2
|
50
|
|
|
|
6
|
if ($succ) { |
432
|
2
|
|
|
|
|
3
|
push @{$_->{down}}, $state; |
|
2
|
|
|
|
|
5
|
|
433
|
2
|
|
|
|
|
3
|
push @{$_->{left}}, $m; |
|
2
|
|
|
|
|
7
|
|
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
} |
436
|
|
|
|
|
|
|
} |
437
|
|
|
|
|
|
|
() |
438
|
2
|
|
|
|
|
4
|
} |
439
|
|
|
|
|
|
|
} @newset; |
440
|
78
|
|
|
|
|
120
|
push @$cset, @newset; |
441
|
78
|
|
|
|
|
176
|
$self->pushset(\@newset); |
442
|
|
|
|
|
|
|
} |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
1; |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
__END__ |