line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# You may distribute under the terms of either the GNU General Public License |
2
|
|
|
|
|
|
|
# or the Artistic License (the same terms as Perl itself) |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# (C) Paul Evans, 2010-2022 -- leonerd@leonerd.org.uk |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
package Parser::MGC 0.21; |
7
|
|
|
|
|
|
|
|
8
|
33
|
|
|
33
|
|
1581975
|
use v5.14; |
|
33
|
|
|
|
|
311
|
|
9
|
33
|
|
|
33
|
|
143
|
use warnings; |
|
33
|
|
|
|
|
51
|
|
|
33
|
|
|
|
|
810
|
|
10
|
|
|
|
|
|
|
|
11
|
33
|
|
|
33
|
|
146
|
use Carp; |
|
33
|
|
|
|
|
72
|
|
|
33
|
|
|
|
|
1799
|
|
12
|
33
|
|
|
33
|
|
14004
|
use Feature::Compat::Try; |
|
33
|
|
|
|
|
9114
|
|
|
33
|
|
|
|
|
120
|
|
13
|
|
|
|
|
|
|
|
14
|
33
|
|
|
33
|
|
78752
|
use Scalar::Util qw( blessed ); |
|
33
|
|
|
|
|
64
|
|
|
33
|
|
|
|
|
3495
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 NAME |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
C - build simple recursive-descent parsers |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 SYNOPSIS |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
package My::Grammar::Parser; |
23
|
|
|
|
|
|
|
use base qw( Parser::MGC ); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
sub parse |
26
|
|
|
|
|
|
|
{ |
27
|
|
|
|
|
|
|
my $self = shift; |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
$self->sequence_of( sub { |
30
|
|
|
|
|
|
|
$self->any_of( |
31
|
|
|
|
|
|
|
sub { $self->token_int }, |
32
|
|
|
|
|
|
|
sub { $self->token_string }, |
33
|
|
|
|
|
|
|
sub { \$self->token_ident }, |
34
|
|
|
|
|
|
|
sub { $self->scope_of( "(", \&parse, ")" ) } |
35
|
|
|
|
|
|
|
); |
36
|
|
|
|
|
|
|
} ); |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
my $parser = My::Grammar::Parser->new; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
my $tree = $parser->from_file( $ARGV[0] ); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
... |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=head1 DESCRIPTION |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
This base class provides a low-level framework for building recursive-descent |
48
|
|
|
|
|
|
|
parsers that consume a given input string from left to right, returning a |
49
|
|
|
|
|
|
|
parse structure. It takes its name from the C regexps used to implement |
50
|
|
|
|
|
|
|
the token parsing behaviour. |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
It provides a number of token-parsing methods, which each extract a |
53
|
|
|
|
|
|
|
grammatical token from the string. It also provides wrapping methods that can |
54
|
|
|
|
|
|
|
be used to build up a possibly-recursive grammar structure, by applying a |
55
|
|
|
|
|
|
|
structure around other parts of parsing code. |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=head2 Backtracking |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
Each method, both token and structural, atomically either consumes a prefix of |
60
|
|
|
|
|
|
|
the string and returns its result, or fails and consumes nothing. This makes |
61
|
|
|
|
|
|
|
it simple to implement grammars that require backtracking. |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
Several structure-forming methods have some form of "optional" behaviour; they |
64
|
|
|
|
|
|
|
can optionally consume some amount of input or take some particular choice, |
65
|
|
|
|
|
|
|
but if the code invoked inside that subsequently fails, the structure can |
66
|
|
|
|
|
|
|
backtrack and take some different behaviour. This is usually what is required |
67
|
|
|
|
|
|
|
when testing whether the structure of the input string matches some part of |
68
|
|
|
|
|
|
|
the grammar that is optional, or has multiple choices. |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
However, once the choice of grammar has been made, it is often useful to be |
71
|
|
|
|
|
|
|
able to fix on that one choice, thus making subsequent failures propagate up |
72
|
|
|
|
|
|
|
rather than taking that alternative behaviour. Control of this backtracking |
73
|
|
|
|
|
|
|
is given by the C method; and careful use of this method is one of the |
74
|
|
|
|
|
|
|
key advantages that C has over more simple parsing using single |
75
|
|
|
|
|
|
|
regexps alone. |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head2 Stall Detection |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
Most of the methods in this class have bounded execution time, but some |
80
|
|
|
|
|
|
|
methods (L and L) repeatedly recuse into other code |
81
|
|
|
|
|
|
|
to build up a list of results until some ending condition is reached. A |
82
|
|
|
|
|
|
|
possible class of bug is that whatever they recurse into might successfully |
83
|
|
|
|
|
|
|
match an empty string, and thus make no progress. |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
These methods will automatically detect this situation if they repeatedly |
86
|
|
|
|
|
|
|
encounter the same string position more than a certain number of times (given |
87
|
|
|
|
|
|
|
by the C argument). If this count is reached, the entire parse |
88
|
|
|
|
|
|
|
attempt will be aborted by the L method. |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=cut |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=cut |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=head2 new |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
$parser = Parser::MGC->new( %args ) |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
Returns a new instance of a C object. This must be called on a |
101
|
|
|
|
|
|
|
subclass that provides method of the name provided as C, by default |
102
|
|
|
|
|
|
|
called C. |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
Takes the following named arguments |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=over 8 |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=item toplevel => STRING |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
Name of the toplevel method to use to start the parse from. If not supplied, |
111
|
|
|
|
|
|
|
will try to use a method called C. |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=item patterns => HASH |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
Keys in this hash should map to quoted regexp (C) references, to |
116
|
|
|
|
|
|
|
override the default patterns used to match tokens. See C below |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=item accept_0o_oct => BOOL |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
If true, the C method will also accept integers with a C<0o> prefix |
121
|
|
|
|
|
|
|
as octal. |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=item stallcount => INT |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
I |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
The number of times that the stall-detector would have to see the same |
128
|
|
|
|
|
|
|
position before it aborts the parse attempt. If not supplied, a default of |
129
|
|
|
|
|
|
|
C<10> will apply. |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=back |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=cut |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head1 PATTERNS |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
The following pattern names are recognised. They may be passed to the |
138
|
|
|
|
|
|
|
constructor in the C hash, or provided as a class method under the |
139
|
|
|
|
|
|
|
name C>. |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=over 4 |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=item * ws |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
Pattern used to skip whitespace between tokens. Defaults to C[\s\n\t]+/> |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=item * comment |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
Pattern used to skip comments between tokens. Undefined by default. |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=item * int |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
Pattern used to parse an integer by C. Defaults to |
154
|
|
|
|
|
|
|
C-?(?:0x[[:xdigit:]]+|[[:digit:]]+)/>. If C is given, then |
155
|
|
|
|
|
|
|
this will be expanded to match C0o[0-7]+/> as well. |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=item * float |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Pattern used to parse a floating-point number by C. Defaults to |
160
|
|
|
|
|
|
|
C-?(?:\d*\.\d+|\d+\.)(?:e-?\d+)?|-?\d+e-?\d+/i>. |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=item * ident |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
Pattern used to parse an identifier by C. Defaults to |
165
|
|
|
|
|
|
|
C[[:alpha:]_]\w*/> |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
=item * string_delim |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
Pattern used to delimit a string by C. Defaults to C["']/>. |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=back |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=cut |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
my @patterns = qw( |
176
|
|
|
|
|
|
|
ws |
177
|
|
|
|
|
|
|
comment |
178
|
|
|
|
|
|
|
int |
179
|
|
|
|
|
|
|
float |
180
|
|
|
|
|
|
|
ident |
181
|
|
|
|
|
|
|
string_delim |
182
|
|
|
|
|
|
|
); |
183
|
|
|
|
|
|
|
|
184
|
33
|
|
|
33
|
|
235
|
use constant pattern_ws => qr/[\s\n\t]+/; |
|
33
|
|
|
|
|
63
|
|
|
33
|
|
|
|
|
1754
|
|
185
|
33
|
|
|
33
|
|
153
|
use constant pattern_comment => undef; |
|
33
|
|
|
|
|
64
|
|
|
33
|
|
|
|
|
2446
|
|
186
|
33
|
|
|
33
|
|
198
|
use constant pattern_int => qr/-?(?:0x[[:xdigit:]]+|[[:digit:]]+)/; |
|
33
|
|
|
|
|
62
|
|
|
33
|
|
|
|
|
2536
|
|
187
|
33
|
|
|
33
|
|
198
|
use constant pattern_float => qr/-?(?:\d*\.\d+|\d+\.)(?:e-?\d+)?|-?\d+e-?\d+/i; |
|
33
|
|
|
|
|
60
|
|
|
33
|
|
|
|
|
3500
|
|
188
|
33
|
|
|
33
|
|
202
|
use constant pattern_ident => qr/[[:alpha:]_]\w*/; |
|
33
|
|
|
|
|
133
|
|
|
33
|
|
|
|
|
1793
|
|
189
|
33
|
|
|
33
|
|
166
|
use constant pattern_string_delim => qr/["']/; |
|
33
|
|
|
|
|
49
|
|
|
33
|
|
|
|
|
1272
|
|
190
|
|
|
|
|
|
|
|
191
|
33
|
|
|
33
|
|
150
|
use constant DEFAULT_STALLCOUNT => 10; |
|
33
|
|
|
|
|
57
|
|
|
33
|
|
|
|
|
128183
|
|
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
sub new |
194
|
|
|
|
|
|
|
{ |
195
|
48
|
|
|
48
|
1
|
2834
|
my $class = shift; |
196
|
48
|
|
|
|
|
120
|
my %args = @_; |
197
|
|
|
|
|
|
|
|
198
|
48
|
|
100
|
|
|
287
|
my $toplevel = $args{toplevel} || "parse"; |
199
|
|
|
|
|
|
|
|
200
|
48
|
50
|
|
|
|
325
|
$class->can( $toplevel ) or |
201
|
|
|
|
|
|
|
croak "Expected to be a subclass that can ->$toplevel"; |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
my $self = bless { |
204
|
|
|
|
|
|
|
toplevel => $toplevel, |
205
|
|
|
|
|
|
|
patterns => {}, |
206
|
|
|
|
|
|
|
scope_level => 0, |
207
|
48
|
|
50
|
|
|
368
|
stallcount => $args{stallcount} // DEFAULT_STALLCOUNT, |
208
|
|
|
|
|
|
|
}, $class; |
209
|
|
|
|
|
|
|
|
210
|
48
|
|
100
|
|
|
264
|
$self->{patterns}{$_} = $args{patterns}{$_} || $self->${\"pattern_$_"} for @patterns; |
211
|
|
|
|
|
|
|
|
212
|
48
|
100
|
|
|
|
157
|
if( $args{accept_0o_oct} ) { |
213
|
1
|
|
|
|
|
30
|
$self->{patterns}{int} = qr/0o[0-7]+|$self->{patterns}{int}/; |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
|
216
|
48
|
100
|
|
|
|
151
|
if( defined $self->{patterns}{comment} ) { |
217
|
1
|
|
|
|
|
35
|
$self->{patterns}{_skip} = qr/$self->{patterns}{ws}|$self->{patterns}{comment}/; |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
else { |
220
|
47
|
|
|
|
|
107
|
$self->{patterns}{_skip} = $self->{patterns}{ws}; |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
48
|
|
|
|
|
236
|
return $self; |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=head1 SUBCLASSING METHODS |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
The following optional methods may be defined by subclasses, to customise |
229
|
|
|
|
|
|
|
their parsing. |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=head2 on_parse_start |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
$parser->on_parse_start |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
I |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
If defined, is invoked by the C method that begins a new parse |
238
|
|
|
|
|
|
|
operation, just before invoking the toplevel structure method. |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=head2 on_parse_end |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
$result = $parser->on_parse_end( $result ) |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
I |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
If defined, is invoked by the C method once it has finished the |
247
|
|
|
|
|
|
|
toplevel structure method. This is passed the tentative result from the |
248
|
|
|
|
|
|
|
structure method, and whatever it returns becomes the result of the C |
249
|
|
|
|
|
|
|
method itself. |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
=cut |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=head1 METHODS |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=cut |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
=head2 from_string |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
$result = $parser->from_string( $str ) |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
Parse the given literal string and return the result from the toplevel method. |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
=cut |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
sub from_string |
266
|
|
|
|
|
|
|
{ |
267
|
167
|
|
|
167
|
1
|
30088
|
my $self = shift; |
268
|
167
|
|
|
|
|
325
|
my ( $str ) = @_; |
269
|
|
|
|
|
|
|
|
270
|
167
|
|
|
|
|
312
|
$self->{str} = $str; |
271
|
|
|
|
|
|
|
|
272
|
167
|
|
|
|
|
394
|
pos $self->{str} = 0; |
273
|
|
|
|
|
|
|
|
274
|
167
|
100
|
|
|
|
733
|
if( my $code = $self->can( "on_parse_start" ) ) { |
275
|
2
|
|
|
|
|
6
|
$self->$code; |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
167
|
|
|
|
|
264
|
my $toplevel = $self->{toplevel}; |
279
|
167
|
|
|
|
|
452
|
my $result = $self->$toplevel; |
280
|
|
|
|
|
|
|
|
281
|
148
|
100
|
|
|
|
462
|
$self->at_eos or |
282
|
|
|
|
|
|
|
$self->fail( "Expected end of input" ); |
283
|
|
|
|
|
|
|
|
284
|
143
|
100
|
|
|
|
620
|
if( my $code = $self->can( "on_parse_end" ) ) { |
285
|
2
|
|
|
|
|
5
|
$result = $self->$code( $result ); |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
|
288
|
143
|
|
|
|
|
697
|
return $result; |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=head2 from_file |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
$result = $parser->from_file( $file, %opts ) |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
Parse the given file, which may be a pathname in a string, or an opened IO |
296
|
|
|
|
|
|
|
handle, and return the result from the toplevel method. |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
The following options are recognised: |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
=over 8 |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
=item binmode => STRING |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
If set, applies the given binmode to the filehandle before reading. Typically |
305
|
|
|
|
|
|
|
this can be used to set the encoding of the file. |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
$parser->from_file( $file, binmode => ":encoding(UTF-8)" ) |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=back |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
=cut |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
sub from_file |
314
|
|
|
|
|
|
|
{ |
315
|
3
|
|
|
3
|
1
|
1975
|
my $self = shift; |
316
|
3
|
|
|
|
|
8
|
my ( $file, %opts ) = @_; |
317
|
|
|
|
|
|
|
|
318
|
3
|
50
|
|
|
|
11
|
defined $file or croak "Expected a filename to ->from_file"; |
319
|
|
|
|
|
|
|
|
320
|
3
|
|
|
|
|
16
|
$self->{filename} = $file; |
321
|
|
|
|
|
|
|
|
322
|
3
|
|
|
|
|
4
|
my $fh; |
323
|
3
|
100
|
|
|
|
11
|
if( ref $file ) { |
324
|
2
|
|
|
|
|
4
|
$fh = $file; |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
else { |
327
|
1
|
50
|
|
|
|
40
|
open $fh, "<", $file or die "Cannot open $file for reading - $!"; |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
|
330
|
3
|
50
|
|
|
|
10
|
binmode $fh, $opts{binmode} if $opts{binmode}; |
331
|
|
|
|
|
|
|
|
332
|
3
|
|
|
|
|
4
|
$self->from_string( do { local $/; <$fh>; } ); |
|
3
|
|
|
|
|
12
|
|
|
3
|
|
|
|
|
96
|
|
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=head2 filename |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
$filename = $parser->filename |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
I |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
Returns the name of the file currently being parsed, if invoked from within |
342
|
|
|
|
|
|
|
L. |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
=cut |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
sub filename |
347
|
|
|
|
|
|
|
{ |
348
|
44
|
|
|
44
|
1
|
61
|
my $self = shift; |
349
|
44
|
|
|
|
|
93
|
return $self->{filename}; |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
=head2 from_reader |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
$result = $parser->from_reader( \&reader ) |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
I |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
Parse the input which is read by the C function. This function will be |
359
|
|
|
|
|
|
|
called in scalar context to generate portions of string to parse, being passed |
360
|
|
|
|
|
|
|
the C<$parser> object. The function should return C when it has no more |
361
|
|
|
|
|
|
|
string to return. |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
$reader->( $parser ) |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
Note that because it is not generally possible to detect exactly when more |
366
|
|
|
|
|
|
|
input may be required due to failed regexp parsing, the reader function is |
367
|
|
|
|
|
|
|
only invoked during searching for skippable whitespace. This makes it suitable |
368
|
|
|
|
|
|
|
for reading lines of a file in the common case where lines are considered as |
369
|
|
|
|
|
|
|
skippable whitespace, or for reading lines of input interactively from a |
370
|
|
|
|
|
|
|
user. It cannot be used in all cases (for example, reading fixed-size buffers |
371
|
|
|
|
|
|
|
from a file) because two successive invocations may split a single token |
372
|
|
|
|
|
|
|
across the buffer boundaries, and cause parse failures. |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
=cut |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
sub from_reader |
377
|
|
|
|
|
|
|
{ |
378
|
1
|
|
|
1
|
1
|
13
|
my $self = shift; |
379
|
1
|
|
|
|
|
2
|
my ( $reader ) = @_; |
380
|
|
|
|
|
|
|
|
381
|
1
|
|
|
|
|
3
|
local $self->{reader} = $reader; |
382
|
|
|
|
|
|
|
|
383
|
1
|
|
|
|
|
2
|
$self->{str} = ""; |
384
|
1
|
|
|
|
|
4
|
pos $self->{str} = 0; |
385
|
|
|
|
|
|
|
|
386
|
1
|
|
|
|
|
4
|
my $result = $self->parse; |
387
|
|
|
|
|
|
|
|
388
|
1
|
50
|
|
|
|
2
|
$self->at_eos or |
389
|
|
|
|
|
|
|
$self->fail( "Expected end of input" ); |
390
|
|
|
|
|
|
|
|
391
|
1
|
|
|
|
|
9
|
return $result; |
392
|
|
|
|
|
|
|
} |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
=head2 pos |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
$pos = $parser->pos |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
I |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
Returns the current parse position, as a character offset from the beginning |
401
|
|
|
|
|
|
|
of the file or string. |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=cut |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
sub pos |
406
|
|
|
|
|
|
|
{ |
407
|
291
|
|
|
291
|
1
|
358
|
my $self = shift; |
408
|
291
|
|
|
|
|
611
|
return pos $self->{str}; |
409
|
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
=head2 take |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
$str = $parser->take( $len ) |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
I |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
Returns the next C<$len> characters directly from the input, prior to any |
418
|
|
|
|
|
|
|
whitespace or comment skipping. This does I take account of any |
419
|
|
|
|
|
|
|
end-of-scope marker that may be pending. It is intended for use by parsers of |
420
|
|
|
|
|
|
|
partially-binary protocols, or other situations in which it would be incorrect |
421
|
|
|
|
|
|
|
for the end-of-scope marker to take effect at this time. |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
=cut |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
sub take |
426
|
|
|
|
|
|
|
{ |
427
|
57
|
|
|
57
|
1
|
79
|
my $self = shift; |
428
|
57
|
|
|
|
|
84
|
my ( $len ) = @_; |
429
|
|
|
|
|
|
|
|
430
|
57
|
|
|
|
|
102
|
my $start = pos( $self->{str} ); |
431
|
|
|
|
|
|
|
|
432
|
57
|
|
|
|
|
109
|
pos( $self->{str} ) += $len; |
433
|
|
|
|
|
|
|
|
434
|
57
|
|
|
|
|
265
|
return substr( $self->{str}, $start, $len ); |
435
|
|
|
|
|
|
|
} |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
=head2 where |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
( $lineno, $col, $text ) = $parser->where |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
Returns the current parse position, as a line and column number, and |
442
|
|
|
|
|
|
|
the entire current line of text. The first line is numbered 1, and the first |
443
|
|
|
|
|
|
|
column is numbered 0. |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
=cut |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
sub where |
448
|
|
|
|
|
|
|
{ |
449
|
51
|
|
|
51
|
1
|
89
|
my $self = shift; |
450
|
51
|
|
|
|
|
98
|
my ( $pos ) = @_; |
451
|
|
|
|
|
|
|
|
452
|
51
|
100
|
|
|
|
131
|
defined $pos or $pos = pos $self->{str}; |
453
|
|
|
|
|
|
|
|
454
|
51
|
|
|
|
|
93
|
my $str = $self->{str}; |
455
|
|
|
|
|
|
|
|
456
|
51
|
|
|
|
|
64
|
my $sol = $pos; |
457
|
51
|
100
|
100
|
|
|
240
|
$sol-- if $sol > 0 and substr( $str, $sol, 1 ) =~ m/^[\r\n]$/; |
458
|
51
|
|
100
|
|
|
340
|
$sol-- while $sol > 0 and substr( $str, $sol-1, 1 ) !~ m/^[\r\n]$/; |
459
|
|
|
|
|
|
|
|
460
|
51
|
|
|
|
|
76
|
my $eol = $pos; |
461
|
51
|
|
100
|
|
|
561
|
$eol++ while $eol < length($str) and substr( $str, $eol, 1 ) !~ m/^[\r\n]$/; |
462
|
|
|
|
|
|
|
|
463
|
51
|
|
|
|
|
125
|
my $line = substr( $str, $sol, $eol - $sol ); |
464
|
|
|
|
|
|
|
|
465
|
51
|
|
|
|
|
80
|
my $col = $pos - $sol; |
466
|
51
|
|
|
|
|
129
|
my $lineno = ( () = substr( $str, 0, $pos ) =~ m/\n/g ) + 1; |
467
|
|
|
|
|
|
|
|
468
|
51
|
|
|
|
|
174
|
return ( $lineno, $col, $line ); |
469
|
|
|
|
|
|
|
} |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
=head2 fail |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=head2 fail_from |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
$parser->fail( $message ) |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
$parser->fail_from( $pos, $message ) |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
I since version 0.09.> |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
Aborts the current parse attempt with the given message string. The failure |
482
|
|
|
|
|
|
|
message will include the line and column position, and the line of input that |
483
|
|
|
|
|
|
|
failed at the current parse position (C), or a position earlier obtained |
484
|
|
|
|
|
|
|
using the C method (C). |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
This failure will propagate up to the inner-most structure parsing method that |
487
|
|
|
|
|
|
|
has not been committed; or will cause the entire parser to fail if there are |
488
|
|
|
|
|
|
|
no further options to take. |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=cut |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
sub fail |
493
|
|
|
|
|
|
|
{ |
494
|
283
|
|
|
283
|
1
|
360
|
my $self = shift; |
495
|
283
|
|
|
|
|
415
|
my ( $message ) = @_; |
496
|
283
|
|
|
|
|
541
|
$self->fail_from( $self->pos, $message ); |
497
|
|
|
|
|
|
|
} |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
sub fail_from |
500
|
|
|
|
|
|
|
{ |
501
|
283
|
|
|
283
|
1
|
362
|
my $self = shift; |
502
|
283
|
|
|
|
|
398
|
my ( $pos, $message ) = @_; |
503
|
283
|
|
|
|
|
573
|
die Parser::MGC::Failure->new( $message, $self, $pos ); |
504
|
|
|
|
|
|
|
} |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
# On perl 5.32 onwards we can use the nicer `isa` infix operator |
507
|
|
|
|
|
|
|
# Problem is it won't even parse correctly on older perls so we'll have to go |
508
|
|
|
|
|
|
|
# the long way around |
509
|
|
|
|
|
|
|
*_isa_failure = ( $^V ge v5.32 ) |
510
|
|
|
|
|
|
|
? do { eval 'use experimental "isa"; sub { $_[0] isa Parser::MGC::Failure }' // die $@ } |
511
|
|
|
|
|
|
|
: do { require Scalar::Util; |
512
|
264
|
100
|
|
264
|
|
2631
|
sub { Scalar::Util::blessed($_[0]) and $_[0]->isa( "Parser::MGC::Failure" ) } }; |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
=head2 die |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
=head2 die_from |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
$parser->die( $message ) |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
$parser->die_from( $pos, $message ) |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
I |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
Throws an exception that propagates as normal for C, entirely out of the |
525
|
|
|
|
|
|
|
entire parser and to the caller of the toplevel C method that invoked |
526
|
|
|
|
|
|
|
it, bypassing all of the back-tracking logic. |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
This is much like using core's C directly, except that the message string |
529
|
|
|
|
|
|
|
will include the line and column position, and the line of input that the |
530
|
|
|
|
|
|
|
parser was working on, as it does in the L method. |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
This method is intended for reporting fatal errors where the parsed input was |
533
|
|
|
|
|
|
|
correctly recognised at a grammar level, but is requesting something that |
534
|
|
|
|
|
|
|
cannot be fulfilled semantically. |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
=cut |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
sub die :method |
539
|
|
|
|
|
|
|
{ |
540
|
2
|
|
|
2
|
1
|
7
|
my $self = shift; |
541
|
2
|
|
|
|
|
4
|
my ( $message ) = @_; |
542
|
2
|
|
|
|
|
6
|
$self->die_from( $self->pos, $message ); |
543
|
|
|
|
|
|
|
} |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
sub die_from |
546
|
|
|
|
|
|
|
{ |
547
|
2
|
|
|
2
|
1
|
4
|
my $self = shift; |
548
|
2
|
|
|
|
|
5
|
my ( $pos, $message ) = @_; |
549
|
|
|
|
|
|
|
# Convenient just to use the ->STRING method of a Failure object but don't |
550
|
|
|
|
|
|
|
# throw it directly |
551
|
2
|
|
|
|
|
8
|
die Parser::MGC::Failure->new( $message, $self, $pos )->STRING; |
552
|
|
|
|
|
|
|
} |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
=head2 at_eos |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
$eos = $parser->at_eos |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
Returns true if the input string is at the end of the string. |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
=cut |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
sub at_eos |
563
|
|
|
|
|
|
|
{ |
564
|
641
|
|
|
641
|
1
|
757
|
my $self = shift; |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
# Save pos() before skipping ws so we don't break the substring_before method |
567
|
641
|
|
|
|
|
860
|
my $pos = pos $self->{str}; |
568
|
|
|
|
|
|
|
|
569
|
641
|
|
|
|
|
1153
|
$self->skip_ws; |
570
|
|
|
|
|
|
|
|
571
|
641
|
|
|
|
|
714
|
my $at_eos; |
572
|
641
|
100
|
|
|
|
1324
|
if( pos( $self->{str} ) >= length $self->{str} ) { |
|
|
100
|
|
|
|
|
|
573
|
177
|
|
|
|
|
247
|
$at_eos = 1; |
574
|
|
|
|
|
|
|
} |
575
|
|
|
|
|
|
|
elsif( defined $self->{endofscope} ) { |
576
|
114
|
|
|
|
|
417
|
$at_eos = $self->{str} =~ m/\G$self->{endofscope}/; |
577
|
|
|
|
|
|
|
} |
578
|
|
|
|
|
|
|
else { |
579
|
350
|
|
|
|
|
405
|
$at_eos = 0; |
580
|
|
|
|
|
|
|
} |
581
|
|
|
|
|
|
|
|
582
|
641
|
|
|
|
|
1096
|
pos( $self->{str} ) = $pos; |
583
|
|
|
|
|
|
|
|
584
|
641
|
|
|
|
|
1370
|
return $at_eos; |
585
|
|
|
|
|
|
|
} |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
=head2 scope_level |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
$level = $parser->scope_level |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
I |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
Returns the number of nested C calls that have been made. |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
=cut |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
sub scope_level |
598
|
|
|
|
|
|
|
{ |
599
|
5
|
|
|
5
|
1
|
7
|
my $self = shift; |
600
|
5
|
|
|
|
|
34
|
return $self->{scope_level}; |
601
|
|
|
|
|
|
|
} |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
=head2 include_string |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
$result = $parser->include_string( $str, %opts ) |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
I |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
Parses a given string into the existing parser object. |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
The current parser state is moved aside from the duration of this method, and |
612
|
|
|
|
|
|
|
is replaced by the given string. Then the toplevel parser method (or a |
613
|
|
|
|
|
|
|
different as specified) is invoked over it. Its result is returned by this |
614
|
|
|
|
|
|
|
method. |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
This would typically be used to handle some sort of "include" or "macro |
617
|
|
|
|
|
|
|
expansion" ability, by injecting new content in as if the current parse |
618
|
|
|
|
|
|
|
location had encountered it. Other than the internal parser state, other |
619
|
|
|
|
|
|
|
object fields are not altered, so whatever effects the invoked parsing methods |
620
|
|
|
|
|
|
|
will have on it can continue to inspect and alter it as required. |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
The following options are recognised: |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
=over 8 |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
=item filename => STRING |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
If set, provides a filename (or other descriptive text) to pretend for the |
629
|
|
|
|
|
|
|
source of this string. It need not be a real file on the filesystem; it could |
630
|
|
|
|
|
|
|
for example explain the source of the string in some other way. It is the |
631
|
|
|
|
|
|
|
value reported by the L method and printed in failure messages. |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
=item toplevel => STRING | CODE |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
If set, provides the toplevel parser method to use within this inclusion, |
636
|
|
|
|
|
|
|
overriding the object's defined default. |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
=back |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
=cut |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
sub include_string |
643
|
|
|
|
|
|
|
{ |
644
|
5
|
|
|
5
|
1
|
22
|
my $self = shift; |
645
|
5
|
|
|
|
|
8
|
my ( $str, %opts ) = @_; |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
# local'ize everything out of the way |
648
|
5
|
|
|
|
|
7
|
local @{$self}{qw( str filename reader )}; |
|
5
|
|
|
|
|
33
|
|
649
|
|
|
|
|
|
|
|
650
|
5
|
|
|
|
|
20
|
$self->{str} = $str; |
651
|
5
|
|
|
|
|
12
|
pos($self->{str}) = 0; |
652
|
|
|
|
|
|
|
|
653
|
5
|
|
|
|
|
8
|
$self->{filename} = $opts{filename}; |
654
|
|
|
|
|
|
|
|
655
|
5
|
|
33
|
|
|
16
|
my $toplevel = $opts{toplevel} // $self->{toplevel}; |
656
|
5
|
|
|
|
|
15
|
my $result = $self->$toplevel; |
657
|
|
|
|
|
|
|
|
658
|
5
|
|
|
|
|
41
|
return $result; |
659
|
|
|
|
|
|
|
} |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
=head1 STRUCTURE-FORMING METHODS |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
The following methods may be used to build a grammatical structure out of the |
664
|
|
|
|
|
|
|
defined basic token-parsing methods. Each takes at least one code reference, |
665
|
|
|
|
|
|
|
which will be passed the actual C<$parser> object as its first argument. |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
Anywhere that a code reference is expected also permits a plain string giving |
668
|
|
|
|
|
|
|
the name of a method to invoke. This is sufficient in many simple cases, such |
669
|
|
|
|
|
|
|
as |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
$self->any_of( |
672
|
|
|
|
|
|
|
'token_int', |
673
|
|
|
|
|
|
|
'token_string', |
674
|
|
|
|
|
|
|
... |
675
|
|
|
|
|
|
|
); |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
=cut |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
=head2 maybe |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
$ret = $parser->maybe( $code ) |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
Attempts to execute the given C<$code> in scalar context, and returns what it |
684
|
|
|
|
|
|
|
returned, accepting that it might fail. C<$code> may either be a CODE |
685
|
|
|
|
|
|
|
reference or a method name given as a string. |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
If the code fails (either by calling C itself, or by propagating a |
688
|
|
|
|
|
|
|
failure from another method it invoked) before it has invoked C, then |
689
|
|
|
|
|
|
|
none of the input string will be consumed; the current parsing position will |
690
|
|
|
|
|
|
|
be restored. C will be returned in this case. |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
If it calls C then any subsequent failure will be propagated to the |
693
|
|
|
|
|
|
|
caller, rather than returning C. |
694
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
This may be considered to be similar to the C> regexp qualifier. |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
sub parse_declaration |
698
|
|
|
|
|
|
|
{ |
699
|
|
|
|
|
|
|
my $self = shift; |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
[ $self->parse_type, |
702
|
|
|
|
|
|
|
$self->token_ident, |
703
|
|
|
|
|
|
|
$self->maybe( sub { |
704
|
|
|
|
|
|
|
$self->expect( "=" ); |
705
|
|
|
|
|
|
|
$self->parse_expression |
706
|
|
|
|
|
|
|
} ), |
707
|
|
|
|
|
|
|
]; |
708
|
|
|
|
|
|
|
} |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
=cut |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
sub maybe |
713
|
|
|
|
|
|
|
{ |
714
|
4
|
|
|
4
|
1
|
27
|
my $self = shift; |
715
|
4
|
|
|
|
|
12
|
my ( $code ) = @_; |
716
|
|
|
|
|
|
|
|
717
|
4
|
|
|
|
|
8
|
my $pos = pos $self->{str}; |
718
|
|
|
|
|
|
|
|
719
|
4
|
|
|
|
|
4
|
my $committed = 0; |
720
|
4
|
|
|
0
|
|
12
|
local $self->{committer} = sub { $committed++ }; |
|
0
|
|
|
|
|
0
|
|
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
try { |
723
|
|
|
|
|
|
|
return $self->$code; |
724
|
|
|
|
|
|
|
} |
725
|
4
|
|
|
|
|
8
|
catch ( $e ) { |
726
|
|
|
|
|
|
|
pos($self->{str}) = $pos; |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
die $e if $committed or not _isa_failure( $e ); |
729
|
|
|
|
|
|
|
return undef; |
730
|
|
|
|
|
|
|
} |
731
|
|
|
|
|
|
|
} |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
=head2 scope_of |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
$ret = $parser->scope_of( $start, $code, $stop ) |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
Expects to find the C<$start> pattern, then attempts to execute the given |
738
|
|
|
|
|
|
|
C<$code>, then expects to find the C<$stop> pattern. Returns whatever the |
739
|
|
|
|
|
|
|
code returned. C<$code> may either be a CODE reference of a method name given |
740
|
|
|
|
|
|
|
as a string. |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
While the code is being executed, the C<$stop> pattern will be used by the |
743
|
|
|
|
|
|
|
token parsing methods as an end-of-scope marker; causing them to raise a |
744
|
|
|
|
|
|
|
failure if called at the end of a scope. |
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
sub parse_block |
747
|
|
|
|
|
|
|
{ |
748
|
|
|
|
|
|
|
my $self = shift; |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
$self->scope_of( "{", 'parse_statements', "}" ); |
751
|
|
|
|
|
|
|
} |
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
If the C<$start> pattern is undefined, it is presumed the caller has already |
754
|
|
|
|
|
|
|
checked for this. This is useful when the stop pattern needs to be calculated |
755
|
|
|
|
|
|
|
based on the start pattern. |
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
sub parse_bracketed |
758
|
|
|
|
|
|
|
{ |
759
|
|
|
|
|
|
|
my $self = shift; |
760
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
my $delim = $self->expect( qr/[\(\[\<\{]/ ); |
762
|
|
|
|
|
|
|
$delim =~ tr/([<{/)]>}/; |
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
$self->scope_of( undef, 'parse_body', $delim ); |
765
|
|
|
|
|
|
|
} |
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
This method does not have any optional parts to it; any failures are |
768
|
|
|
|
|
|
|
immediately propagated to the caller. |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
=cut |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
sub scope_of |
773
|
|
|
|
|
|
|
{ |
774
|
19
|
|
|
19
|
1
|
87
|
my $self = shift; |
775
|
19
|
|
|
|
|
69
|
$self->_scope_of( 0, @_ ); |
776
|
|
|
|
|
|
|
} |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
sub _scope_of |
779
|
|
|
|
|
|
|
{ |
780
|
73
|
|
|
73
|
|
88
|
my $self = shift; |
781
|
73
|
|
|
|
|
131
|
my ( $commit_if_started, $start, $code, $stop ) = @_; |
782
|
|
|
|
|
|
|
|
783
|
73
|
50
|
|
|
|
403
|
ref $stop or $stop = qr/\Q$stop/; |
784
|
|
|
|
|
|
|
|
785
|
73
|
100
|
|
|
|
235
|
$self->expect( $start ) if defined $start; |
786
|
|
|
|
|
|
|
|
787
|
34
|
100
|
|
|
|
108
|
$self->commit if $commit_if_started; |
788
|
|
|
|
|
|
|
|
789
|
34
|
|
|
|
|
105
|
local $self->{endofscope} = $stop; |
790
|
34
|
|
|
|
|
74
|
local $self->{scope_level} = $self->{scope_level} + 1; |
791
|
|
|
|
|
|
|
|
792
|
34
|
|
|
|
|
104
|
my $ret = $self->$code; |
793
|
|
|
|
|
|
|
|
794
|
31
|
|
|
|
|
103
|
$self->expect( $stop ); |
795
|
|
|
|
|
|
|
|
796
|
30
|
|
|
|
|
176
|
return $ret; |
797
|
|
|
|
|
|
|
} |
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
=head2 committed_scope_of |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
$ret = $parser->committed_scope_of( $start, $code, $stop ) |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
I |
804
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
A variant of L that calls L after a successful match of |
806
|
|
|
|
|
|
|
the start pattern. This is usually what you want if using C from |
807
|
|
|
|
|
|
|
within an C choice, if no other alternative following this one could |
808
|
|
|
|
|
|
|
possibly match if the start pattern has. |
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
=cut |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
sub committed_scope_of |
813
|
|
|
|
|
|
|
{ |
814
|
54
|
|
|
54
|
1
|
71
|
my $self = shift; |
815
|
54
|
|
|
|
|
95
|
$self->_scope_of( 1, @_ ); |
816
|
|
|
|
|
|
|
} |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
=head2 list_of |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
$ret = $parser->list_of( $sep, $code ) |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
Expects to find a list of instances of something parsed by C<$code>, |
823
|
|
|
|
|
|
|
separated by the C<$sep> pattern. Returns an ARRAY ref containing a list of |
824
|
|
|
|
|
|
|
the return values from the C<$code>. A single trailing delimiter is allowed, |
825
|
|
|
|
|
|
|
and does not affect the return value. C<$code> may either be a CODE reference |
826
|
|
|
|
|
|
|
or a method name given as a string. It is called in list context, and whatever |
827
|
|
|
|
|
|
|
values it returns are appended to the eventual result - similar to perl's |
828
|
|
|
|
|
|
|
C |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
This method does not consider it an error if the returned list is empty; that |
831
|
|
|
|
|
|
|
is, that the scope ended before any item instances were parsed from it. |
832
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
sub parse_numbers |
834
|
|
|
|
|
|
|
{ |
835
|
|
|
|
|
|
|
my $self = shift; |
836
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
$self->list_of( ",", 'token_int' ); |
838
|
|
|
|
|
|
|
} |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
If the code fails (either by invoking C itself, or by propagating a |
841
|
|
|
|
|
|
|
failure from another method it invoked) before it has invoked C on a |
842
|
|
|
|
|
|
|
particular item, then the item is aborted and the parsing position will be |
843
|
|
|
|
|
|
|
restored to the beginning of that failed item. The list of results from |
844
|
|
|
|
|
|
|
previous successful attempts will be returned. |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
If it calls C within an item then any subsequent failure for that item |
847
|
|
|
|
|
|
|
will cause the entire C to fail, propagating that to the caller. |
848
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
=cut |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
sub list_of |
852
|
|
|
|
|
|
|
{ |
853
|
83
|
|
|
83
|
1
|
139
|
my $self = shift; |
854
|
83
|
|
|
|
|
124
|
my ( $sep, $code ) = @_; |
855
|
|
|
|
|
|
|
|
856
|
83
|
100
|
33
|
|
|
230
|
ref $sep or $sep = qr/\Q$sep/ if defined $sep; |
857
|
|
|
|
|
|
|
|
858
|
83
|
|
|
|
|
95
|
my $committed; |
859
|
83
|
|
|
14
|
|
251
|
local $self->{committer} = sub { $committed++ }; |
|
14
|
|
|
|
|
21
|
|
860
|
|
|
|
|
|
|
|
861
|
83
|
|
|
|
|
119
|
my @ret; |
862
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
my @lastpos; |
864
|
|
|
|
|
|
|
|
865
|
83
|
|
|
|
|
172
|
while( !$self->at_eos ) { |
866
|
157
|
|
|
|
|
188
|
$committed = 0; |
867
|
157
|
|
|
|
|
188
|
my $pos = pos $self->{str}; |
868
|
|
|
|
|
|
|
|
869
|
157
|
|
|
|
|
202
|
push @lastpos, $pos; |
870
|
157
|
100
|
|
|
|
293
|
if( @lastpos > $self->{stallcount} ) { |
871
|
1
|
|
|
|
|
2
|
shift @lastpos; |
872
|
1
|
50
|
|
|
|
7
|
$self->die( ref($self) . " failed to make progress" ) if $lastpos[0] == $pos; |
873
|
|
|
|
|
|
|
} |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
try { |
876
|
|
|
|
|
|
|
push @ret, $self->$code; |
877
|
|
|
|
|
|
|
next; |
878
|
|
|
|
|
|
|
} |
879
|
|
|
|
|
|
|
catch ( $e ) { |
880
|
|
|
|
|
|
|
pos($self->{str}) = $pos; |
881
|
|
|
|
|
|
|
die $e if $committed or not _isa_failure( $e ); |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
last; |
884
|
|
|
|
|
|
|
} |
885
|
156
|
|
|
|
|
268
|
} |
886
|
|
|
|
|
|
|
continue { |
887
|
132
|
100
|
|
|
|
299
|
if( defined $sep ) { |
888
|
32
|
|
|
|
|
62
|
$self->skip_ws; |
889
|
32
|
100
|
|
|
|
149
|
$self->{str} =~ m/\G$sep/gc or last; |
890
|
|
|
|
|
|
|
} |
891
|
|
|
|
|
|
|
} |
892
|
|
|
|
|
|
|
|
893
|
81
|
|
|
|
|
358
|
return \@ret; |
894
|
|
|
|
|
|
|
} |
895
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
=head2 sequence_of |
897
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
$ret = $parser->sequence_of( $code ) |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
A shortcut for calling C with an empty string as separator; expects |
901
|
|
|
|
|
|
|
to find at least one instance of something parsed by C<$code>, separated only |
902
|
|
|
|
|
|
|
by skipped whitespace. |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
This may be considered to be similar to the C<+> or C<*> regexp qualifiers. |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
sub parse_statements |
907
|
|
|
|
|
|
|
{ |
908
|
|
|
|
|
|
|
my $self = shift; |
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
$self->sequence_of( 'parse_statement' ); |
911
|
|
|
|
|
|
|
} |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
The interaction of failures in the code and the C method is identical |
914
|
|
|
|
|
|
|
to that of C. |
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
=cut |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
sub sequence_of |
919
|
|
|
|
|
|
|
{ |
920
|
67
|
|
|
67
|
1
|
223
|
my $self = shift; |
921
|
67
|
|
|
|
|
96
|
my ( $code ) = @_; |
922
|
|
|
|
|
|
|
|
923
|
67
|
|
|
|
|
156
|
$self->list_of( undef, $code ); |
924
|
|
|
|
|
|
|
} |
925
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
=head2 any_of |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
$ret = $parser->any_of( @codes ) |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
I |
931
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
Expects that one of the given code instances can parse something from the |
933
|
|
|
|
|
|
|
input, returning what it returned. Each code instance may indicate a failure |
934
|
|
|
|
|
|
|
to parse by calling the C method or otherwise propagating a failure. |
935
|
|
|
|
|
|
|
Each code instance may either be a CODE reference or a method name given as a |
936
|
|
|
|
|
|
|
string. |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
This may be considered to be similar to the C<|> regexp operator for forming |
939
|
|
|
|
|
|
|
alternations of possible parse trees. |
940
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
sub parse_statement |
942
|
|
|
|
|
|
|
{ |
943
|
|
|
|
|
|
|
my $self = shift; |
944
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
$self->any_of( |
946
|
|
|
|
|
|
|
sub { $self->parse_declaration; $self->expect(";") }, |
947
|
|
|
|
|
|
|
sub { $self->parse_expression; $self->expect(";") }, |
948
|
|
|
|
|
|
|
sub { $self->parse_block }, |
949
|
|
|
|
|
|
|
); |
950
|
|
|
|
|
|
|
} |
951
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
If the code for a given choice fails (either by invoking C itself, or by |
953
|
|
|
|
|
|
|
propagating a failure from another method it invoked) before it has invoked |
954
|
|
|
|
|
|
|
C itself, then the parsing position restored and the next choice will |
955
|
|
|
|
|
|
|
be attempted. |
956
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
If it calls C then any subsequent failure for that choice will cause |
958
|
|
|
|
|
|
|
the entire C to fail, propagating that to the caller and no further |
959
|
|
|
|
|
|
|
choices will be attempted. |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
If none of the choices match then a simple failure message is printed: |
962
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
Found nothing parseable |
964
|
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
As this is unlikely to be helpful to users, a better message can be provided |
966
|
|
|
|
|
|
|
by the final choice instead. Don't forget to C before printing the |
967
|
|
|
|
|
|
|
failure message, or it won't count. |
968
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
$self->any_of( |
970
|
|
|
|
|
|
|
'token_int', |
971
|
|
|
|
|
|
|
'token_string', |
972
|
|
|
|
|
|
|
..., |
973
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
sub { $self->commit; $self->fail( "Expected an int or string" ) } |
975
|
|
|
|
|
|
|
); |
976
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
=cut |
978
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
sub any_of |
980
|
|
|
|
|
|
|
{ |
981
|
234
|
|
|
234
|
1
|
498
|
my $self = shift; |
982
|
|
|
|
|
|
|
|
983
|
234
|
|
|
|
|
418
|
while( @_ ) { |
984
|
460
|
|
|
|
|
578
|
my $code = shift; |
985
|
460
|
|
|
|
|
555
|
my $pos = pos $self->{str}; |
986
|
|
|
|
|
|
|
|
987
|
460
|
|
|
|
|
467
|
my $committed = 0; |
988
|
460
|
|
|
55
|
|
1190
|
local $self->{committer} = sub { $committed++ }; |
|
55
|
|
|
|
|
114
|
|
989
|
|
|
|
|
|
|
|
990
|
|
|
|
|
|
|
try { |
991
|
|
|
|
|
|
|
return $self->$code; |
992
|
|
|
|
|
|
|
} |
993
|
460
|
|
|
|
|
713
|
catch ( $e ) { |
994
|
|
|
|
|
|
|
pos( $self->{str} ) = $pos; |
995
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
die $e if $committed or not _isa_failure( $e ); |
997
|
|
|
|
|
|
|
} |
998
|
|
|
|
|
|
|
} |
999
|
|
|
|
|
|
|
|
1000
|
12
|
|
|
|
|
26
|
$self->fail( "Found nothing parseable" ); |
1001
|
|
|
|
|
|
|
} |
1002
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
sub one_of { |
1004
|
0
|
|
|
0
|
0
|
0
|
croak "Parser::MGC->one_of is deprecated; use ->any_of instead"; |
1005
|
|
|
|
|
|
|
} |
1006
|
|
|
|
|
|
|
|
1007
|
|
|
|
|
|
|
=head2 commit |
1008
|
|
|
|
|
|
|
|
1009
|
|
|
|
|
|
|
$parser->commit |
1010
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
Calling this method will cancel the backtracking behaviour of the innermost |
1012
|
|
|
|
|
|
|
C, C, C, or C structure forming method. |
1013
|
|
|
|
|
|
|
That is, if later code then calls C, the exception will be propagated |
1014
|
|
|
|
|
|
|
out of C, no further list items will be attempted by C or |
1015
|
|
|
|
|
|
|
C, and no further code blocks will be attempted by C. |
1016
|
|
|
|
|
|
|
|
1017
|
|
|
|
|
|
|
Typically this will be called once the grammatical structure alter has been |
1018
|
|
|
|
|
|
|
determined, ensuring that any further failures are raised as real exceptions, |
1019
|
|
|
|
|
|
|
rather than by attempting other alternatives. |
1020
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
sub parse_statement |
1022
|
|
|
|
|
|
|
{ |
1023
|
|
|
|
|
|
|
my $self = shift; |
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
$self->any_of( |
1026
|
|
|
|
|
|
|
... |
1027
|
|
|
|
|
|
|
sub { |
1028
|
|
|
|
|
|
|
$self->scope_of( "{", |
1029
|
|
|
|
|
|
|
sub { $self->commit; $self->parse_statements; }, |
1030
|
|
|
|
|
|
|
"}" ), |
1031
|
|
|
|
|
|
|
}, |
1032
|
|
|
|
|
|
|
); |
1033
|
|
|
|
|
|
|
} |
1034
|
|
|
|
|
|
|
|
1035
|
|
|
|
|
|
|
Though in this common pattern, L may be used instead. |
1036
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
=cut |
1038
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
sub commit |
1040
|
|
|
|
|
|
|
{ |
1041
|
69
|
|
|
69
|
1
|
108
|
my $self = shift; |
1042
|
69
|
50
|
|
|
|
128
|
if( $self->{committer} ) { |
1043
|
69
|
|
|
|
|
105
|
$self->{committer}->(); |
1044
|
|
|
|
|
|
|
} |
1045
|
|
|
|
|
|
|
else { |
1046
|
0
|
|
|
|
|
0
|
croak "Cannot commit except within a backtrack-able structure"; |
1047
|
|
|
|
|
|
|
} |
1048
|
|
|
|
|
|
|
} |
1049
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
=head1 TOKEN PARSING METHODS |
1051
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
The following methods attempt to consume some part of the input string, to be |
1053
|
|
|
|
|
|
|
used as part of the parsing process. |
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
=cut |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
sub skip_ws |
1058
|
|
|
|
|
|
|
{ |
1059
|
1357
|
|
|
1357
|
0
|
1484
|
my $self = shift; |
1060
|
|
|
|
|
|
|
|
1061
|
1357
|
|
|
|
|
1672
|
my $pattern = $self->{patterns}{_skip}; |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
{ |
1064
|
1357
|
|
|
|
|
1386
|
1 while $self->{str} =~ m/\G$pattern/gc; |
|
1360
|
|
|
|
|
5808
|
|
1065
|
|
|
|
|
|
|
|
1066
|
1360
|
100
|
|
|
|
2955
|
return if pos( $self->{str} ) < length $self->{str}; |
1067
|
|
|
|
|
|
|
|
1068
|
237
|
100
|
|
|
|
545
|
return unless $self->{reader}; |
1069
|
|
|
|
|
|
|
|
1070
|
4
|
|
|
|
|
6
|
my $more = $self->{reader}->( $self ); |
1071
|
4
|
100
|
|
|
|
16
|
if( defined $more ) { |
1072
|
3
|
|
|
|
|
4
|
my $pos = pos( $self->{str} ); |
1073
|
3
|
|
|
|
|
6
|
$self->{str} .= $more; |
1074
|
3
|
|
|
|
|
4
|
pos( $self->{str} ) = $pos; |
1075
|
|
|
|
|
|
|
|
1076
|
3
|
|
|
|
|
8
|
redo; |
1077
|
|
|
|
|
|
|
} |
1078
|
|
|
|
|
|
|
|
1079
|
1
|
|
|
|
|
2
|
undef $self->{reader}; |
1080
|
1
|
|
|
|
|
2
|
return; |
1081
|
|
|
|
|
|
|
} |
1082
|
|
|
|
|
|
|
} |
1083
|
|
|
|
|
|
|
|
1084
|
|
|
|
|
|
|
=head2 expect |
1085
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
$str = $parser->expect( $literal ) |
1087
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
$str = $parser->expect( qr/pattern/ ) |
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
@groups = $parser->expect( qr/pattern/ ) |
1091
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
Expects to find a literal string or regexp pattern match, and consumes it. |
1093
|
|
|
|
|
|
|
In scalar context, this method returns the string that was captured. In list |
1094
|
|
|
|
|
|
|
context it returns the matching substring and the contents of any subgroups |
1095
|
|
|
|
|
|
|
contained in the pattern. |
1096
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
This method will raise a parse error (by calling C) if the regexp fails |
1098
|
|
|
|
|
|
|
to match. Note that if the pattern could match an empty string (such as for |
1099
|
|
|
|
|
|
|
example C), the pattern will always match, even if it has to match an |
1100
|
|
|
|
|
|
|
empty string. This method will not consider a failure if the regexp matches |
1101
|
|
|
|
|
|
|
with zero-width. |
1102
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
=head2 maybe_expect |
1104
|
|
|
|
|
|
|
|
1105
|
|
|
|
|
|
|
$str = $parser->maybe_expect( ... ) |
1106
|
|
|
|
|
|
|
|
1107
|
|
|
|
|
|
|
@groups = $parser->maybe_expect( ... ) |
1108
|
|
|
|
|
|
|
|
1109
|
|
|
|
|
|
|
I |
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
A convenient shortcut equivalent to calling C within C, but |
1112
|
|
|
|
|
|
|
implemented more efficiently, avoiding the exception-handling set up by |
1113
|
|
|
|
|
|
|
C. Returns C or an empty list if the match fails. |
1114
|
|
|
|
|
|
|
|
1115
|
|
|
|
|
|
|
=cut |
1116
|
|
|
|
|
|
|
|
1117
|
|
|
|
|
|
|
sub maybe_expect |
1118
|
|
|
|
|
|
|
{ |
1119
|
400
|
|
|
400
|
1
|
501
|
my $self = shift; |
1120
|
400
|
|
|
|
|
496
|
my ( $expect ) = @_; |
1121
|
|
|
|
|
|
|
|
1122
|
400
|
100
|
|
|
|
655
|
ref $expect or $expect = qr/\Q$expect/; |
1123
|
|
|
|
|
|
|
|
1124
|
400
|
|
|
|
|
715
|
$self->skip_ws; |
1125
|
400
|
100
|
|
|
|
3767
|
$self->{str} =~ m/\G$expect/gc or return; |
1126
|
|
|
|
|
|
|
|
1127
|
228
|
100
|
|
|
|
1153
|
return substr( $self->{str}, $-[0], $+[0]-$-[0] ) if !wantarray; |
1128
|
40
|
100
|
|
|
|
102
|
return map { defined $-[$_] ? substr( $self->{str}, $-[$_], $+[$_]-$-[$_] ) : undef } 0 .. $#+; |
|
60
|
|
|
|
|
361
|
|
1129
|
|
|
|
|
|
|
} |
1130
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
sub expect |
1132
|
|
|
|
|
|
|
{ |
1133
|
384
|
|
|
384
|
1
|
657
|
my $self = shift; |
1134
|
384
|
|
|
|
|
582
|
my ( $expect ) = @_; |
1135
|
|
|
|
|
|
|
|
1136
|
384
|
100
|
|
|
|
2258
|
ref $expect or $expect = qr/\Q$expect/; |
1137
|
|
|
|
|
|
|
|
1138
|
384
|
100
|
|
|
|
746
|
if( wantarray ) { |
1139
|
60
|
100
|
|
|
|
138
|
my @ret = $self->maybe_expect( $expect ) or |
1140
|
|
|
|
|
|
|
$self->fail( "Expected $expect" ); |
1141
|
38
|
|
|
|
|
129
|
return @ret; |
1142
|
|
|
|
|
|
|
} |
1143
|
|
|
|
|
|
|
else { |
1144
|
324
|
100
|
|
|
|
573
|
defined( my $ret = $self->maybe_expect( $expect ) ) or |
1145
|
|
|
|
|
|
|
$self->fail( "Expected $expect" ); |
1146
|
185
|
|
|
|
|
476
|
return $ret; |
1147
|
|
|
|
|
|
|
} |
1148
|
|
|
|
|
|
|
} |
1149
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
=head2 substring_before |
1151
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
$str = $parser->substring_before( $literal ) |
1153
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
$str = $parser->substring_before( qr/pattern/ ) |
1155
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
I |
1157
|
|
|
|
|
|
|
|
1158
|
|
|
|
|
|
|
Expects to possibly find a literal string or regexp pattern match. If it finds |
1159
|
|
|
|
|
|
|
such, consume all the input text before but excluding this match, and return |
1160
|
|
|
|
|
|
|
it. If it fails to find a match before the end of the current scope, consumes |
1161
|
|
|
|
|
|
|
all the input text until the end of scope and return it. |
1162
|
|
|
|
|
|
|
|
1163
|
|
|
|
|
|
|
This method does not consume the part of input that matches, only the text |
1164
|
|
|
|
|
|
|
before it. It is not considered a failure if the substring before this match |
1165
|
|
|
|
|
|
|
is empty. If a non-empty match is required, use the C method: |
1166
|
|
|
|
|
|
|
|
1167
|
|
|
|
|
|
|
sub token_nonempty_part |
1168
|
|
|
|
|
|
|
{ |
1169
|
|
|
|
|
|
|
my $self = shift; |
1170
|
|
|
|
|
|
|
|
1171
|
|
|
|
|
|
|
my $str = $parser->substring_before( "," ); |
1172
|
|
|
|
|
|
|
length $str or $self->fail( "Expected a string fragment before ," ); |
1173
|
|
|
|
|
|
|
|
1174
|
|
|
|
|
|
|
return $str; |
1175
|
|
|
|
|
|
|
} |
1176
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
Note that unlike most of the other token parsing methods, this method does not |
1178
|
|
|
|
|
|
|
consume either leading or trailing whitespace around the substring. It is |
1179
|
|
|
|
|
|
|
expected that this method would be used as part a parser to read quoted |
1180
|
|
|
|
|
|
|
strings, or similar cases where whitespace should be preserved. |
1181
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
=head2 nonempty_substring_before |
1183
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
$str = $parser->nonempty_substring_before( $literal ) |
1185
|
|
|
|
|
|
|
|
1186
|
|
|
|
|
|
|
$str = $parser->nonempty_substring_before( qr/pattern/ ) |
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
I |
1189
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
A variant of L which fails if the matched part is empty. |
1191
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
The example above could have been written: |
1193
|
|
|
|
|
|
|
|
1194
|
|
|
|
|
|
|
sub token_nonempty_part |
1195
|
|
|
|
|
|
|
{ |
1196
|
|
|
|
|
|
|
my $self = shift; |
1197
|
|
|
|
|
|
|
|
1198
|
|
|
|
|
|
|
return $parser->nonempty_substring_before( "," ); |
1199
|
|
|
|
|
|
|
} |
1200
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
This is often useful for breaking out of repeating loops; e.g. |
1202
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
sub token_escaped_string |
1204
|
|
|
|
|
|
|
{ |
1205
|
|
|
|
|
|
|
my $self = shift; |
1206
|
|
|
|
|
|
|
$self->expect( '"' ); |
1207
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
my $ret = ""; |
1209
|
|
|
|
|
|
|
1 while $self->any_of( |
1210
|
|
|
|
|
|
|
sub { $ret .= $self->nonempty_substring_before( qr/%|$/m ); 1 } |
1211
|
|
|
|
|
|
|
sub { my $escape = ( $self->expect( qr/%(.)/ ) )[1]; |
1212
|
|
|
|
|
|
|
$ret .= _handle_escape( $escape ); |
1213
|
|
|
|
|
|
|
1 }, |
1214
|
|
|
|
|
|
|
sub { 0 }, |
1215
|
|
|
|
|
|
|
) |
1216
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
return $ret; |
1218
|
|
|
|
|
|
|
} |
1219
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
=cut |
1221
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
sub _substring_before |
1223
|
|
|
|
|
|
|
{ |
1224
|
52
|
|
|
52
|
|
60
|
my $self = shift; |
1225
|
52
|
|
|
|
|
80
|
my ( $expect, $fail_if_empty ) = @_; |
1226
|
|
|
|
|
|
|
|
1227
|
52
|
100
|
|
|
|
192
|
ref $expect or $expect = qr/\Q$expect/; |
1228
|
|
|
|
|
|
|
|
1229
|
52
|
100
|
|
|
|
195
|
my $endre = ( defined $self->{endofscope} ) ? |
1230
|
|
|
|
|
|
|
qr/$expect|$self->{endofscope}/ : |
1231
|
|
|
|
|
|
|
$expect; |
1232
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
# NO skip_ws |
1234
|
|
|
|
|
|
|
|
1235
|
52
|
|
|
|
|
70
|
my $start = pos $self->{str}; |
1236
|
52
|
|
|
|
|
56
|
my $end; |
1237
|
52
|
100
|
|
|
|
487
|
if( $self->{str} =~ m/\G(?s:.*?)($endre)/ ) { |
1238
|
42
|
|
|
|
|
109
|
$end = $-[1]; |
1239
|
|
|
|
|
|
|
} |
1240
|
|
|
|
|
|
|
else { |
1241
|
10
|
|
|
|
|
19
|
$end = length $self->{str}; |
1242
|
|
|
|
|
|
|
} |
1243
|
|
|
|
|
|
|
|
1244
|
52
|
100
|
66
|
|
|
126
|
$self->fail( "Expected to find a non-empty substring before $expect" ) |
1245
|
|
|
|
|
|
|
if $fail_if_empty and $end == $start; |
1246
|
|
|
|
|
|
|
|
1247
|
51
|
|
|
|
|
131
|
return $self->take( $end - $start ); |
1248
|
|
|
|
|
|
|
} |
1249
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
sub substring_before |
1251
|
|
|
|
|
|
|
{ |
1252
|
51
|
|
|
51
|
1
|
84
|
my $self = shift; |
1253
|
51
|
|
|
|
|
95
|
return $self->_substring_before( $_[0], 0 ); |
1254
|
|
|
|
|
|
|
} |
1255
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
sub nonempty_substring_before |
1257
|
|
|
|
|
|
|
{ |
1258
|
1
|
|
|
1
|
1
|
8
|
my $self = shift; |
1259
|
1
|
|
|
|
|
3
|
return $self->_substring_before( $_[0], 1 ); |
1260
|
|
|
|
|
|
|
} |
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
=head2 generic_token |
1263
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
$val = $parser->generic_token( $name, $re, $convert ) |
1265
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
I |
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
Expects to find a token matching the precompiled regexp C<$re>. If provided, |
1269
|
|
|
|
|
|
|
the C<$convert> CODE reference can be used to convert the string into a more |
1270
|
|
|
|
|
|
|
convenient form. C<$name> is used in the failure message if the pattern fails |
1271
|
|
|
|
|
|
|
to match. |
1272
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
If provided, the C<$convert> function will be passed the parser and the |
1274
|
|
|
|
|
|
|
matching substring; the value it returns is returned from C. |
1275
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
$convert->( $parser, $substr ) |
1277
|
|
|
|
|
|
|
|
1278
|
|
|
|
|
|
|
If not provided, the substring will be returned as it stands. |
1279
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
This method is mostly provided for subclasses to define their own token types. |
1281
|
|
|
|
|
|
|
For example: |
1282
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
sub token_hex |
1284
|
|
|
|
|
|
|
{ |
1285
|
|
|
|
|
|
|
my $self = shift; |
1286
|
|
|
|
|
|
|
$self->generic_token( hex => qr/[0-9A-F]{2}h/, sub { hex $_[1] } ); |
1287
|
|
|
|
|
|
|
} |
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
=cut |
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
sub generic_token |
1292
|
|
|
|
|
|
|
{ |
1293
|
230
|
|
|
230
|
1
|
269
|
my $self = shift; |
1294
|
230
|
|
|
|
|
376
|
my ( $name, $re, $convert ) = @_; |
1295
|
|
|
|
|
|
|
|
1296
|
230
|
50
|
|
|
|
372
|
$self->fail( "Expected $name" ) if $self->at_eos; |
1297
|
|
|
|
|
|
|
|
1298
|
230
|
|
|
|
|
453
|
$self->skip_ws; |
1299
|
230
|
100
|
|
|
|
3179
|
$self->{str} =~ m/\G$re/gc or |
1300
|
|
|
|
|
|
|
$self->fail( "Expected $name" ); |
1301
|
|
|
|
|
|
|
|
1302
|
169
|
|
|
|
|
777
|
my $match = substr( $self->{str}, $-[0], $+[0] - $-[0] ); |
1303
|
|
|
|
|
|
|
|
1304
|
169
|
100
|
|
|
|
579
|
return $convert ? $convert->( $self, $match ) : $match; |
1305
|
|
|
|
|
|
|
} |
1306
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
sub _token_generic |
1308
|
|
|
|
|
|
|
{ |
1309
|
226
|
|
|
226
|
|
286
|
my $self = shift; |
1310
|
226
|
|
|
|
|
564
|
my %args = @_; |
1311
|
|
|
|
|
|
|
|
1312
|
226
|
|
|
|
|
341
|
my $name = $args{name}; |
1313
|
226
|
50
|
|
|
|
470
|
my $re = $args{pattern} ? $self->{patterns}{ $args{pattern} } : $args{re}; |
1314
|
226
|
|
|
|
|
254
|
my $convert = $args{convert}; |
1315
|
|
|
|
|
|
|
|
1316
|
226
|
|
|
|
|
416
|
$self->generic_token( $name, $re, $convert ); |
1317
|
|
|
|
|
|
|
} |
1318
|
|
|
|
|
|
|
|
1319
|
|
|
|
|
|
|
=head2 token_int |
1320
|
|
|
|
|
|
|
|
1321
|
|
|
|
|
|
|
$int = $parser->token_int |
1322
|
|
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
Expects to find an integer in decimal, octal or hexadecimal notation, and |
1324
|
|
|
|
|
|
|
consumes it. Negative integers, preceeded by C<->, are also recognised. |
1325
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
=cut |
1327
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
sub token_int |
1329
|
|
|
|
|
|
|
{ |
1330
|
134
|
|
|
134
|
1
|
280
|
my $self = shift; |
1331
|
|
|
|
|
|
|
$self->_token_generic( |
1332
|
|
|
|
|
|
|
name => "int", |
1333
|
|
|
|
|
|
|
|
1334
|
|
|
|
|
|
|
pattern => "int", |
1335
|
|
|
|
|
|
|
convert => sub { |
1336
|
104
|
|
|
104
|
|
145
|
my $int = $_[1]; |
1337
|
104
|
100
|
|
|
|
210
|
my $sign = ( $int =~ s/^-// ) ? -1 : 1; |
1338
|
|
|
|
|
|
|
|
1339
|
104
|
|
|
|
|
130
|
$int =~ s/^0o/0/; |
1340
|
|
|
|
|
|
|
|
1341
|
104
|
100
|
|
|
|
216
|
return $sign * oct $int if $int =~ m/^0/; |
1342
|
99
|
|
|
|
|
683
|
return $sign * $int; |
1343
|
|
|
|
|
|
|
}, |
1344
|
134
|
|
|
|
|
459
|
); |
1345
|
|
|
|
|
|
|
} |
1346
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
=head2 token_float |
1348
|
|
|
|
|
|
|
|
1349
|
|
|
|
|
|
|
$float = $parser->token_float |
1350
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
I |
1352
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
Expects to find a number expressed in floating-point notation; a sequence of |
1354
|
|
|
|
|
|
|
digits possibly prefixed by C<->, possibly containing a decimal point, |
1355
|
|
|
|
|
|
|
possibly followed by an exponent specified by C followed by an integer. The |
1356
|
|
|
|
|
|
|
numerical value is then returned. |
1357
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
=cut |
1359
|
|
|
|
|
|
|
|
1360
|
|
|
|
|
|
|
sub token_float |
1361
|
|
|
|
|
|
|
{ |
1362
|
20
|
|
|
20
|
1
|
52
|
my $self = shift; |
1363
|
|
|
|
|
|
|
$self->_token_generic( |
1364
|
|
|
|
|
|
|
name => "float", |
1365
|
|
|
|
|
|
|
|
1366
|
|
|
|
|
|
|
pattern => "float", |
1367
|
18
|
|
|
18
|
|
119
|
convert => sub { $_[1] + 0 }, |
1368
|
20
|
|
|
|
|
77
|
); |
1369
|
|
|
|
|
|
|
} |
1370
|
|
|
|
|
|
|
|
1371
|
|
|
|
|
|
|
=head2 token_number |
1372
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
$number = $parser->token_number |
1374
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
I |
1376
|
|
|
|
|
|
|
|
1377
|
|
|
|
|
|
|
Expects to find a number expressed in either of the above forms. |
1378
|
|
|
|
|
|
|
|
1379
|
|
|
|
|
|
|
=cut |
1380
|
|
|
|
|
|
|
|
1381
|
|
|
|
|
|
|
sub token_number |
1382
|
|
|
|
|
|
|
{ |
1383
|
7
|
|
|
7
|
1
|
26
|
my $self = shift; |
1384
|
7
|
|
|
|
|
18
|
$self->any_of( \&token_float, \&token_int ); |
1385
|
|
|
|
|
|
|
} |
1386
|
|
|
|
|
|
|
|
1387
|
|
|
|
|
|
|
=head2 token_string |
1388
|
|
|
|
|
|
|
|
1389
|
|
|
|
|
|
|
$str = $parser->token_string |
1390
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
Expects to find a quoted string, and consumes it. The string should be quoted |
1392
|
|
|
|
|
|
|
using C<"> or C<'> quote marks. |
1393
|
|
|
|
|
|
|
|
1394
|
|
|
|
|
|
|
The content of the quoted string can contain character escapes similar to |
1395
|
|
|
|
|
|
|
those accepted by C or Perl. Specifically, the following forms are recognised: |
1396
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
\a Bell ("alert") |
1398
|
|
|
|
|
|
|
\b Backspace |
1399
|
|
|
|
|
|
|
\e Escape |
1400
|
|
|
|
|
|
|
\f Form feed |
1401
|
|
|
|
|
|
|
\n Newline |
1402
|
|
|
|
|
|
|
\r Return |
1403
|
|
|
|
|
|
|
\t Horizontal Tab |
1404
|
|
|
|
|
|
|
\0, \012 Octal character |
1405
|
|
|
|
|
|
|
\x34, \x{5678} Hexadecimal character |
1406
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
C's C<\v> for vertical tab is not supported as it is rarely used in practice |
1408
|
|
|
|
|
|
|
and it collides with Perl's C<\v> regexp escape. Perl's C<\c> for forming other |
1409
|
|
|
|
|
|
|
control characters is also not supported. |
1410
|
|
|
|
|
|
|
|
1411
|
|
|
|
|
|
|
=cut |
1412
|
|
|
|
|
|
|
|
1413
|
|
|
|
|
|
|
my %escapes = ( |
1414
|
|
|
|
|
|
|
a => "\a", |
1415
|
|
|
|
|
|
|
b => "\b", |
1416
|
|
|
|
|
|
|
e => "\e", |
1417
|
|
|
|
|
|
|
f => "\f", |
1418
|
|
|
|
|
|
|
n => "\n", |
1419
|
|
|
|
|
|
|
r => "\r", |
1420
|
|
|
|
|
|
|
t => "\t", |
1421
|
|
|
|
|
|
|
); |
1422
|
|
|
|
|
|
|
|
1423
|
|
|
|
|
|
|
sub token_string |
1424
|
|
|
|
|
|
|
{ |
1425
|
53
|
|
|
53
|
1
|
136
|
my $self = shift; |
1426
|
|
|
|
|
|
|
|
1427
|
53
|
100
|
|
|
|
151
|
$self->fail( "Expected string" ) if $self->at_eos; |
1428
|
|
|
|
|
|
|
|
1429
|
52
|
|
|
|
|
77
|
my $pos = pos $self->{str}; |
1430
|
|
|
|
|
|
|
|
1431
|
52
|
|
|
|
|
101
|
$self->skip_ws; |
1432
|
52
|
100
|
|
|
|
448
|
$self->{str} =~ m/\G($self->{patterns}{string_delim})/gc or |
1433
|
|
|
|
|
|
|
$self->fail( "Expected string delimiter" ); |
1434
|
|
|
|
|
|
|
|
1435
|
32
|
|
|
|
|
88
|
my $delim = $1; |
1436
|
|
|
|
|
|
|
|
1437
|
|
|
|
|
|
|
$self->{str} =~ m/ |
1438
|
|
|
|
|
|
|
\G( |
1439
|
|
|
|
|
|
|
(?: |
1440
|
|
|
|
|
|
|
\\[0-7]{1,3} # octal escape |
1441
|
|
|
|
|
|
|
|\\x[0-9A-F]{2} # 2-digit hex escape |
1442
|
|
|
|
|
|
|
|\\x\{[0-9A-F]+\} # {}-delimited hex escape |
1443
|
|
|
|
|
|
|
|\\. # symbolic escape |
1444
|
|
|
|
|
|
|
|[^\\$delim]+ # plain chunk |
1445
|
|
|
|
|
|
|
)*? |
1446
|
|
|
|
|
|
|
)$delim/gcix or |
1447
|
32
|
50
|
|
|
|
835
|
pos($self->{str}) = $pos, $self->fail( "Expected contents of string" ); |
1448
|
|
|
|
|
|
|
|
1449
|
32
|
|
|
|
|
85
|
my $string = $1; |
1450
|
|
|
|
|
|
|
|
1451
|
32
|
|
|
|
|
101
|
$string =~ s<\\(?:([0-7]{1,3})|x([0-9A-F]{2})|x\{([0-9A-F]+)\}|(.))> |
1452
|
|
|
|
|
|
|
[defined $1 ? chr oct $1 : |
1453
|
|
|
|
|
|
|
defined $2 ? chr hex $2 : |
1454
|
11
|
50
|
|
|
|
79
|
defined $3 ? chr hex $3 : |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1455
|
|
|
|
|
|
|
exists $escapes{$4} ? $escapes{$4} : $4]egi; |
1456
|
32
|
|
|
|
|
129
|
|
1457
|
|
|
|
|
|
|
return $string; |
1458
|
|
|
|
|
|
|
} |
1459
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
=head2 token_ident |
1461
|
|
|
|
|
|
|
|
1462
|
|
|
|
|
|
|
$ident = $parser->token_ident |
1463
|
|
|
|
|
|
|
|
1464
|
|
|
|
|
|
|
Expects to find an identifier, and consumes it. |
1465
|
|
|
|
|
|
|
|
1466
|
|
|
|
|
|
|
=cut |
1467
|
|
|
|
|
|
|
|
1468
|
|
|
|
|
|
|
sub token_ident |
1469
|
72
|
|
|
72
|
1
|
111
|
{ |
1470
|
72
|
|
|
|
|
119
|
my $self = shift; |
1471
|
|
|
|
|
|
|
$self->_token_generic( |
1472
|
|
|
|
|
|
|
name => "ident", |
1473
|
|
|
|
|
|
|
|
1474
|
|
|
|
|
|
|
pattern => "ident", |
1475
|
|
|
|
|
|
|
); |
1476
|
|
|
|
|
|
|
} |
1477
|
|
|
|
|
|
|
|
1478
|
|
|
|
|
|
|
=head2 token_kw |
1479
|
|
|
|
|
|
|
|
1480
|
|
|
|
|
|
|
$keyword = $parser->token_kw( @keywords ) |
1481
|
|
|
|
|
|
|
|
1482
|
|
|
|
|
|
|
Expects to find a keyword, and consumes it. A keyword is defined as an |
1483
|
|
|
|
|
|
|
identifier which is exactly one of the literal values passed in. |
1484
|
|
|
|
|
|
|
|
1485
|
|
|
|
|
|
|
=cut |
1486
|
|
|
|
|
|
|
|
1487
|
|
|
|
|
|
|
sub token_kw |
1488
|
2
|
|
|
2
|
1
|
11
|
{ |
1489
|
2
|
|
|
|
|
4
|
my $self = shift; |
1490
|
|
|
|
|
|
|
my @acceptable = @_; |
1491
|
2
|
|
|
|
|
6
|
|
1492
|
|
|
|
|
|
|
$self->skip_ws; |
1493
|
2
|
|
|
|
|
3
|
|
1494
|
|
|
|
|
|
|
my $pos = pos $self->{str}; |
1495
|
2
|
50
|
|
|
|
5
|
|
1496
|
|
|
|
|
|
|
defined( my $kw = $self->token_ident ) or |
1497
|
|
|
|
|
|
|
return undef; |
1498
|
4
|
|
|
|
|
16
|
|
1499
|
2
|
100
|
|
|
|
4
|
grep { $_ eq $kw } @acceptable or |
1500
|
|
|
|
|
|
|
pos($self->{str}) = $pos, $self->fail( "Expected any of ".join( ", ", @acceptable ) ); |
1501
|
1
|
|
|
|
|
3
|
|
1502
|
|
|
|
|
|
|
return $kw; |
1503
|
|
|
|
|
|
|
} |
1504
|
|
|
|
|
|
|
|
1505
|
|
|
|
|
|
|
package # hide from indexer |
1506
|
|
|
|
|
|
|
Parser::MGC::Failure; |
1507
|
|
|
|
|
|
|
|
1508
|
|
|
|
|
|
|
sub new |
1509
|
285
|
|
|
285
|
|
345
|
{ |
1510
|
285
|
|
|
|
|
450
|
my $class = shift; |
1511
|
285
|
|
|
|
|
421
|
my $self = bless {}, $class; |
|
285
|
|
|
|
|
1279
|
|
1512
|
285
|
|
|
|
|
1184
|
@{$self}{qw( message parser pos )} = @_; |
1513
|
|
|
|
|
|
|
return $self; |
1514
|
|
|
|
|
|
|
} |
1515
|
33
|
|
|
33
|
|
27633
|
|
|
33
|
|
|
|
|
21615
|
|
|
33
|
|
|
|
|
205
|
|
1516
|
|
|
|
|
|
|
use overload '""' => "STRING"; |
1517
|
|
|
|
|
|
|
sub STRING |
1518
|
44
|
|
|
44
|
|
3607
|
{ |
1519
|
|
|
|
|
|
|
my $self = shift; |
1520
|
44
|
|
|
|
|
92
|
|
1521
|
44
|
|
|
|
|
163
|
my $parser = $self->{parser}; |
1522
|
|
|
|
|
|
|
my ( $linenum, $col, $text ) = $parser->where( $self->{pos} ); |
1523
|
|
|
|
|
|
|
|
1524
|
|
|
|
|
|
|
# Column number only counts characters. There may be tabs in there. |
1525
|
|
|
|
|
|
|
# Rather than trying to calculate the visual column number, just print the |
1526
|
|
|
|
|
|
|
# indentation as it stands. |
1527
|
44
|
|
|
|
|
99
|
|
1528
|
44
|
|
|
|
|
178
|
my $indent = substr( $text, 0, $col ); |
1529
|
|
|
|
|
|
|
$indent =~ s/[^ \t]/ /g; # blank out all the non-whitespace |
1530
|
44
|
|
|
|
|
170
|
|
1531
|
44
|
100
|
100
|
|
|
167
|
my $filename = $parser->filename; |
1532
|
|
|
|
|
|
|
my $in_file = ( defined $filename and !ref $filename ) |
1533
|
|
|
|
|
|
|
? "in $filename " : ""; |
1534
|
44
|
|
|
|
|
462
|
|
1535
|
|
|
|
|
|
|
return "$self->{message} ${in_file}on line $linenum at:\n" . |
1536
|
|
|
|
|
|
|
"$text\n" . |
1537
|
|
|
|
|
|
|
"$indent^\n"; |
1538
|
|
|
|
|
|
|
} |
1539
|
|
|
|
|
|
|
|
1540
|
33
|
|
|
33
|
|
6768
|
# Provide fallback operators for cmp, eq, etc... |
|
33
|
|
|
|
|
76
|
|
|
33
|
|
|
|
|
164
|
|
1541
|
|
|
|
|
|
|
use overload fallback => 1; |
1542
|
|
|
|
|
|
|
|
1543
|
|
|
|
|
|
|
=head1 EXAMPLES |
1544
|
|
|
|
|
|
|
|
1545
|
|
|
|
|
|
|
=head2 Accumulating Results Using Variables |
1546
|
|
|
|
|
|
|
|
1547
|
|
|
|
|
|
|
Although the structure-forming methods all return a value, obtained from their |
1548
|
|
|
|
|
|
|
nested parsing code, it can sometimes be more convenient to use a variable to |
1549
|
|
|
|
|
|
|
accumulate a result in instead. For example, consider the following parser |
1550
|
|
|
|
|
|
|
method, designed to parse a set of C assignments, such as might |
1551
|
|
|
|
|
|
|
be found in a configuration file, or YAML/JSON-style mapping value. |
1552
|
|
|
|
|
|
|
|
1553
|
|
|
|
|
|
|
sub parse_dict |
1554
|
|
|
|
|
|
|
{ |
1555
|
|
|
|
|
|
|
my $self = shift; |
1556
|
|
|
|
|
|
|
|
1557
|
|
|
|
|
|
|
my %ret; |
1558
|
|
|
|
|
|
|
$self->list_of( ",", sub { |
1559
|
|
|
|
|
|
|
my $key = $self->token_ident; |
1560
|
|
|
|
|
|
|
exists $ret{$key} and $self->fail( "Already have a mapping for '$key'" ); |
1561
|
|
|
|
|
|
|
|
1562
|
|
|
|
|
|
|
$self->expect( ":" ); |
1563
|
|
|
|
|
|
|
|
1564
|
|
|
|
|
|
|
$ret{$key} = $self->parse_value; |
1565
|
|
|
|
|
|
|
} ); |
1566
|
|
|
|
|
|
|
|
1567
|
|
|
|
|
|
|
return \%ret |
1568
|
|
|
|
|
|
|
} |
1569
|
|
|
|
|
|
|
|
1570
|
|
|
|
|
|
|
Instead of using the return value from C, this method accumulates |
1571
|
|
|
|
|
|
|
values in the C<%ret> hash, eventually returning a reference to it as its |
1572
|
|
|
|
|
|
|
result. Because of this, it can perform some error checking while it parses; |
1573
|
|
|
|
|
|
|
namely, rejecting duplicate keys. |
1574
|
|
|
|
|
|
|
|
1575
|
|
|
|
|
|
|
=head1 TODO |
1576
|
|
|
|
|
|
|
|
1577
|
|
|
|
|
|
|
=over 4 |
1578
|
|
|
|
|
|
|
|
1579
|
|
|
|
|
|
|
=item * |
1580
|
|
|
|
|
|
|
|
1581
|
|
|
|
|
|
|
Make unescaping of string constants more customisable. Possibly consider |
1582
|
|
|
|
|
|
|
instead a C using a loop over C. |
1583
|
|
|
|
|
|
|
|
1584
|
|
|
|
|
|
|
=item * |
1585
|
|
|
|
|
|
|
|
1586
|
|
|
|
|
|
|
Easy ability for subclasses to define more token types as methods. Perhaps |
1587
|
|
|
|
|
|
|
provide a class method such as |
1588
|
|
|
|
|
|
|
|
1589
|
|
|
|
|
|
|
__PACKAGE__->has_token( hex => qr/[0-9A-F]+/i, sub { hex $_[1] } ); |
1590
|
|
|
|
|
|
|
|
1591
|
|
|
|
|
|
|
=item * |
1592
|
|
|
|
|
|
|
|
1593
|
|
|
|
|
|
|
Investigate how well C can cope with buffer splitting across |
1594
|
|
|
|
|
|
|
other tokens than simply skippable whitespace |
1595
|
|
|
|
|
|
|
|
1596
|
|
|
|
|
|
|
=back |
1597
|
|
|
|
|
|
|
|
1598
|
|
|
|
|
|
|
=head1 AUTHOR |
1599
|
|
|
|
|
|
|
|
1600
|
|
|
|
|
|
|
Paul Evans |
1601
|
|
|
|
|
|
|
|
1602
|
|
|
|
|
|
|
=cut |
1603
|
|
|
|
|
|
|
|
1604
|
|
|
|
|
|
|
0x55AA; |