line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# $Id: Lexer.pm,v 1.6 2013/07/26 01:57:26 Paulo Exp $
|
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package Asm::Preproc::Lexer;
|
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 NAME
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
Asm::Preproc::Lexer - Iterator to split input in tokens
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=cut
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
14
|
|
|
|
|
|
|
|
15
|
1
|
|
|
1
|
|
1753
|
use strict;
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
41
|
|
16
|
1
|
|
|
1
|
|
5
|
use warnings;
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
34
|
|
17
|
|
|
|
|
|
|
|
18
|
1
|
|
|
1
|
|
6
|
use Carp;
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
60
|
|
19
|
1
|
|
|
1
|
|
673
|
use Text::Template 'fill_in_string';
|
|
1
|
|
|
|
|
3525
|
|
|
1
|
|
|
|
|
66
|
|
20
|
1
|
|
|
1
|
|
8
|
use Asm::Preproc::Line;
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
6
|
|
21
|
1
|
|
|
1
|
|
27
|
use Asm::Preproc::Token;
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
7
|
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
our $VERSION = '1.03';
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
=head1 SYNOPSIS
|
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
use Asm::Preproc::Lexer;
|
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
my @tokens = (
|
32
|
|
|
|
|
|
|
BLANKS => qr/\s+/, sub {()},
|
33
|
|
|
|
|
|
|
COMMENT => [qr/\/\*/, qr/\*\//],
|
34
|
|
|
|
|
|
|
undef,
|
35
|
|
|
|
|
|
|
QSTR => [qr/'/], sub { my($type, $value) = @_;
|
36
|
|
|
|
|
|
|
[$type,
|
37
|
|
|
|
|
|
|
substr($value, 1, length($value)-2)] },
|
38
|
|
|
|
|
|
|
QQSTR => [qr/"/, qr/"/],
|
39
|
|
|
|
|
|
|
NUM => qr/\d+/,
|
40
|
|
|
|
|
|
|
ID => qr/[a-z]+/, sub { my($type, $value) = @_;
|
41
|
|
|
|
|
|
|
[$type, $value] },
|
42
|
|
|
|
|
|
|
SYM => qr/(.)/, sub { [$1, $1] },
|
43
|
|
|
|
|
|
|
);
|
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
my $lex = Asm::Preproc::Lexer->new;
|
46
|
|
|
|
|
|
|
$lex->make_lexer(@tokens);
|
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
my $lex2 = $lex->clone;
|
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
$lex->from(sub {}, @lines); # read Asm::Preproc::Line from iterator
|
51
|
|
|
|
|
|
|
my $token = $lex->next; # isa Asm::Preproc::Token
|
52
|
|
|
|
|
|
|
my $token = $lex->();
|
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
This module implements a sub-class of
|
57
|
|
|
|
|
|
|
L
|
58
|
|
|
|
|
|
|
to read text from iterators and split the text in tokens,
|
59
|
|
|
|
|
|
|
according to the specification given to
|
60
|
|
|
|
|
|
|
C constructor.
|
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
The objects are L compatible,
|
63
|
|
|
|
|
|
|
i.e. they can be used as an argument to C.
|
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
The tokenizer reads L objects and
|
66
|
|
|
|
|
|
|
splits them in L objects on each
|
67
|
|
|
|
|
|
|
C call. C returns C on end of input.
|
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=head1 FUNCTIONS
|
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head2 new
|
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
Creates a new tokenizer object, subclass of
|
74
|
|
|
|
|
|
|
L.
|
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
C must be called to create the tokenizer code before the
|
77
|
|
|
|
|
|
|
iterator can be used.
|
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=head2 make_lexer
|
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
Creates a new tokenizer object for the given token specification.
|
82
|
|
|
|
|
|
|
Each token is specified by the following elements:
|
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=over 4
|
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=item type
|
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
String to identify the token type, unused if the token is discarded (see
|
89
|
|
|
|
|
|
|
C and C above).
|
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
=item regexp
|
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
One of:
|
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=over 4
|
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=item 1
|
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
A single regular expression to match the token at the current input position.
|
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=item 2
|
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
A list of one regular expression, to match delimited tokens that use the
|
104
|
|
|
|
|
|
|
same delimiter for the start and the end.
|
105
|
|
|
|
|
|
|
The token can span multiple lines.
|
106
|
|
|
|
|
|
|
See see C above for an example for multi-line single-quoted strings.
|
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=item 3
|
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
A list of two regular expressions, to match the start
|
111
|
|
|
|
|
|
|
of the token at the current input position, and the end of the token.
|
112
|
|
|
|
|
|
|
The token can span multiple lines.
|
113
|
|
|
|
|
|
|
See see C above for an example for multi-line comments.
|
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=back
|
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
The regular expression is matched where the previous match finished,
|
118
|
|
|
|
|
|
|
and each sub-expression cannot span multiple lines.
|
119
|
|
|
|
|
|
|
Parentheses may be used to capture sub-expressions in C<$1>, C<$2>, etc.
|
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
It is considered an error, and the tokeninzer dies with an error message
|
122
|
|
|
|
|
|
|
when reading input, if some input cannot be recognized by any of the
|
123
|
|
|
|
|
|
|
given C espressions. Therefore the C token above contains the
|
124
|
|
|
|
|
|
|
catch-all expression C.
|
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=item transform (optional)
|
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
The optional code reference is a transform subroutine. It receives
|
129
|
|
|
|
|
|
|
the C and C of the recognized token, and returns one of:
|
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=over 4
|
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=item 1
|
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
An array ref with two elements C<[$type, $value]>,
|
136
|
|
|
|
|
|
|
the new C and C to be
|
137
|
|
|
|
|
|
|
returned in the L object.
|
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=item 2
|
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
An empty array C<()> to signal that this token shall be dicarded.
|
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=back
|
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
As an optimization, the transform subroutine code reference may be
|
146
|
|
|
|
|
|
|
set to C, to signal that the token will be dicarded
|
147
|
|
|
|
|
|
|
and there is no use in accumulating it while matching.
|
148
|
|
|
|
|
|
|
This is usefull to discard comments upfront, instead of
|
149
|
|
|
|
|
|
|
collecting the whole comment, and then pass it to the transform subroutine
|
150
|
|
|
|
|
|
|
just to be discarded afterwards.
|
151
|
|
|
|
|
|
|
See see C above for an example of usage.
|
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=back
|
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=head2 clone
|
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
Creates a copy of this tokenizer object without compiling a new
|
158
|
|
|
|
|
|
|
lexing subroutine. The copied object has all pending input cleared.
|
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=cut
|
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
163
|
1
|
|
|
1
|
|
73
|
use base 'Iterator::Simple::Lookahead', 'Class::Accessor';
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
821
|
|
164
|
|
|
|
|
|
|
__PACKAGE__->mk_accessors(
|
165
|
|
|
|
|
|
|
'_lexer', # lexer iterator
|
166
|
|
|
|
|
|
|
'_input', # input iterator
|
167
|
|
|
|
|
|
|
'_line', # current line being processed
|
168
|
|
|
|
|
|
|
'_text', # text being parsed
|
169
|
|
|
|
|
|
|
);
|
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub new {
|
172
|
7
|
|
|
7
|
1
|
3570
|
my($class) = @_;
|
173
|
7
|
|
|
1
|
|
64
|
return $class->_new( sub { return } ); # dummy lexer
|
|
1
|
|
|
|
|
10
|
|
174
|
|
|
|
|
|
|
}
|
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
sub clone {
|
177
|
1
|
|
|
1
|
1
|
4
|
my($self) = @_;
|
178
|
1
|
|
|
|
|
4
|
return ref($self)->_new( $self->_lexer );
|
179
|
|
|
|
|
|
|
}
|
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
# used by new and clone
|
182
|
|
|
|
|
|
|
sub _new {
|
183
|
8
|
|
|
8
|
|
31
|
my($class, $lexer) = @_;
|
184
|
|
|
|
|
|
|
|
185
|
8
|
|
|
|
|
42
|
my $self = $class->SUPER::new; # init iterator
|
186
|
8
|
|
|
|
|
102
|
$self->_lexer( $lexer );
|
187
|
8
|
|
|
|
|
165
|
$self->_input( Iterator::Simple::Lookahead->new );
|
188
|
8
|
|
|
|
|
139
|
$self->_line( undef );
|
189
|
8
|
|
|
|
|
83
|
$self->_text( "" );
|
190
|
|
|
|
|
|
|
|
191
|
8
|
|
|
|
|
168
|
return $self;
|
192
|
|
|
|
|
|
|
};
|
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
195
|
|
|
|
|
|
|
# compile the lexing subroutine
|
196
|
|
|
|
|
|
|
sub make_lexer {
|
197
|
6
|
|
|
6
|
1
|
24
|
my($self, @tokens) = @_;
|
198
|
6
|
100
|
|
|
|
203
|
@tokens or croak "tokens expected";
|
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
# closure for each token attributes, indexed by token sequence nr
|
201
|
5
|
|
|
|
|
28
|
my @type; # token type
|
202
|
|
|
|
|
|
|
my @start_re; # match start of token
|
203
|
5
|
|
|
|
|
0
|
my @end_re; # match end of token
|
204
|
5
|
|
|
|
|
0
|
my @transform; # transform subroutine
|
205
|
5
|
|
|
|
|
0
|
my @discard; # true to discard multi-line token
|
206
|
5
|
|
|
|
|
0
|
my @comment; # comment to show all options of each token branch
|
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
# parse the @tokens list
|
209
|
5
|
|
|
|
|
16
|
for (my $id = 0; @tokens; $id++) {
|
210
|
|
|
|
|
|
|
# read type
|
211
|
15
|
|
|
|
|
28
|
$type[$id] = shift @tokens;
|
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
# read regexp
|
214
|
15
|
100
|
|
|
|
130
|
my $re = shift @tokens or croak "regexp expected";
|
215
|
|
|
|
|
|
|
|
216
|
14
|
100
|
|
|
|
39
|
if (ref $re eq 'Regexp') {
|
|
|
50
|
|
|
|
|
|
217
|
11
|
|
|
|
|
17
|
$start_re[$id] = $re;
|
218
|
|
|
|
|
|
|
}
|
219
|
|
|
|
|
|
|
elsif (ref $re eq 'ARRAY') {
|
220
|
3
|
100
|
|
|
|
10
|
@$re == 1 and push @$re, $re->[0];
|
221
|
3
|
50
|
|
|
|
6
|
@$re == 2 or croak "invalid regexp list";
|
222
|
3
|
|
|
|
|
8
|
($start_re[$id], $end_re[$id]) = @$re;
|
223
|
|
|
|
|
|
|
}
|
224
|
|
|
|
|
|
|
else {
|
225
|
0
|
|
|
|
|
0
|
croak "invalid regexp";
|
226
|
|
|
|
|
|
|
}
|
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
# read transform, define discard
|
229
|
14
|
100
|
|
|
|
28
|
if (@tokens) {
|
230
|
13
|
100
|
|
|
|
35
|
if (! defined($tokens[0])) {
|
|
|
100
|
|
|
|
|
|
231
|
1
|
|
|
|
|
2
|
$discard[$id] = 1;
|
232
|
1
|
|
|
|
|
2
|
shift @tokens;
|
233
|
|
|
|
|
|
|
}
|
234
|
|
|
|
|
|
|
elsif (ref($tokens[0]) eq 'CODE') {
|
235
|
8
|
|
|
|
|
15
|
$transform[$id] = shift @tokens;
|
236
|
|
|
|
|
|
|
}
|
237
|
|
|
|
|
|
|
}
|
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
# comment
|
240
|
14
|
100
|
|
|
|
31
|
$comment[$id] = join(' ', map {defined($_) ? $_ : ''}
|
|
84
|
|
|
|
|
182
|
|
241
|
|
|
|
|
|
|
$id,
|
242
|
|
|
|
|
|
|
$type[$id],
|
243
|
|
|
|
|
|
|
$start_re[$id],
|
244
|
|
|
|
|
|
|
$end_re[$id],
|
245
|
|
|
|
|
|
|
$transform[$id],
|
246
|
|
|
|
|
|
|
$discard[$id]);
|
247
|
14
|
|
|
|
|
53
|
$comment[$id] =~ s/\n/\\n/g;
|
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
}
|
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
# LEXER code
|
252
|
4
|
|
|
|
|
21
|
my $template_data = {
|
253
|
|
|
|
|
|
|
end_re => \@end_re,
|
254
|
|
|
|
|
|
|
transform => \@transform,
|
255
|
|
|
|
|
|
|
discard => \@discard,
|
256
|
|
|
|
|
|
|
comment => \@comment,
|
257
|
|
|
|
|
|
|
};
|
258
|
4
|
|
|
|
|
12
|
my @template_args = (
|
259
|
|
|
|
|
|
|
DELIMITERS => [ '<%', '%>' ],
|
260
|
|
|
|
|
|
|
HASH => $template_data,
|
261
|
|
|
|
|
|
|
);
|
262
|
|
|
|
|
|
|
|
263
|
4
|
|
|
|
|
17
|
my $code = fill_in_string(<<'END_CODE', @template_args);
|
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
sub {
|
266
|
|
|
|
|
|
|
my($self) = @_;
|
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
for ($self->{_text}) {
|
269
|
|
|
|
|
|
|
LINE:
|
270
|
|
|
|
|
|
|
while (1) { # read lines
|
271
|
|
|
|
|
|
|
while ((pos()||0) >= length()) { # last line consumed
|
272
|
|
|
|
|
|
|
$self->_read_line or return undef;
|
273
|
|
|
|
|
|
|
}
|
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
TOKEN:
|
276
|
|
|
|
|
|
|
while (1) { # read tokens
|
277
|
|
|
|
|
|
|
my $token_line = $self->_line; # start of token line
|
278
|
|
|
|
|
|
|
my $pos0 = pos()||0; # position before match
|
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
# need to read new line
|
281
|
|
|
|
|
|
|
if (/ \G \z /gcx) {
|
282
|
|
|
|
|
|
|
next LINE;
|
283
|
|
|
|
|
|
|
}
|
284
|
|
|
|
|
|
|
END_CODE
|
285
|
|
|
|
|
|
|
|
286
|
4
|
|
|
|
|
3268
|
for my $id (0 .. $#type) {
|
287
|
14
|
|
|
|
|
13745
|
$template_data->{id} = $id;
|
288
|
|
|
|
|
|
|
$template_data->{LINE_BLOCK}
|
289
|
14
|
|
|
|
|
37
|
= fill_in_string(<<'END_CODE', @template_args);
|
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
BLOCK:
|
292
|
|
|
|
|
|
|
while (1) { # read multi-line block
|
293
|
|
|
|
|
|
|
<% $discard[$id] ? '' : '$pos0 = pos()||0;' %>
|
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
# need to read new line
|
296
|
|
|
|
|
|
|
if (/ \G \z /gcx) {
|
297
|
|
|
|
|
|
|
$self->_read_line
|
298
|
|
|
|
|
|
|
or $token_line->error(
|
299
|
|
|
|
|
|
|
"unbalanced token at: ".$value);
|
300
|
|
|
|
|
|
|
}
|
301
|
|
|
|
|
|
|
# end
|
302
|
|
|
|
|
|
|
elsif (/ \G (?s: .*?) $end_re[<% $id %>] /gcx) {
|
303
|
|
|
|
|
|
|
<% $discard[$id] ? '' :
|
304
|
|
|
|
|
|
|
'$value .= $self->_capture($pos0);' %>
|
305
|
|
|
|
|
|
|
last BLOCK; # collected whole token
|
306
|
|
|
|
|
|
|
}
|
307
|
|
|
|
|
|
|
# consume all
|
308
|
|
|
|
|
|
|
else {
|
309
|
|
|
|
|
|
|
pos() = length();
|
310
|
|
|
|
|
|
|
<% $discard[$id] ? '' :
|
311
|
|
|
|
|
|
|
'$value .= $self->_capture($pos0);' %>
|
312
|
|
|
|
|
|
|
}
|
313
|
|
|
|
|
|
|
}
|
314
|
|
|
|
|
|
|
END_CODE
|
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
$template_data->{TRANSFORM}
|
317
|
14
|
|
|
|
|
17707
|
= fill_in_string(<<'END_CODE', @template_args);
|
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
# call transform routine
|
320
|
|
|
|
|
|
|
my $ret = $transform[<% $id %>]->($type, $value);
|
321
|
|
|
|
|
|
|
next unless $ret; # discard token
|
322
|
|
|
|
|
|
|
($type, $value) = @$ret;
|
323
|
|
|
|
|
|
|
END_CODE
|
324
|
14
|
|
|
|
|
9014
|
$code .= fill_in_string(<<'END_CODE', @template_args);
|
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
# <% $comment[$id] %>
|
327
|
|
|
|
|
|
|
elsif (/ \G $start_re[<% $id %>] /gcx) {
|
328
|
|
|
|
|
|
|
my($type, $value) = <%
|
329
|
|
|
|
|
|
|
'' %> ($type[<% $id %>], $self->_capture($pos0));
|
330
|
|
|
|
|
|
|
<% $end_re[$id] ? $LINE_BLOCK : '' %>
|
331
|
|
|
|
|
|
|
<% $transform[$id] ? $TRANSFORM : '' %>
|
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
<% $discard[$id] ? 'next;' : '' %>
|
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
return Asm::Preproc::Token->new(
|
336
|
|
|
|
|
|
|
$type, $value, $token_line);
|
337
|
|
|
|
|
|
|
}
|
338
|
|
|
|
|
|
|
END_CODE
|
339
|
|
|
|
|
|
|
}
|
340
|
|
|
|
|
|
|
|
341
|
4
|
|
|
|
|
5352
|
$code .= fill_in_string(<<'END_CODE', @template_args);
|
342
|
|
|
|
|
|
|
# no token recognized, consume rest of line and die
|
343
|
|
|
|
|
|
|
else {
|
344
|
|
|
|
|
|
|
pos() = length();
|
345
|
|
|
|
|
|
|
$token_line->error("no token recognized at: ".
|
346
|
|
|
|
|
|
|
substr($_, $pos0));
|
347
|
|
|
|
|
|
|
}
|
348
|
|
|
|
|
|
|
}
|
349
|
|
|
|
|
|
|
}
|
350
|
|
|
|
|
|
|
}
|
351
|
|
|
|
|
|
|
};
|
352
|
|
|
|
|
|
|
END_CODE
|
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
#warn $code;
|
355
|
4
|
|
|
|
|
4406
|
my $lexer = eval $code;
|
356
|
4
|
50
|
|
|
|
17
|
$@ and croak "$code\n$@";
|
357
|
|
|
|
|
|
|
|
358
|
4
|
|
|
|
|
28
|
$self->_lexer( $lexer );
|
359
|
|
|
|
|
|
|
}
|
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
362
|
|
|
|
|
|
|
# get the next line from _input, save in _line, _rtext
|
363
|
|
|
|
|
|
|
sub _read_line {
|
364
|
33
|
|
|
33
|
|
61
|
my($self) = @_;
|
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
# get one line
|
367
|
33
|
|
|
|
|
74
|
my $line = $self->_input->next;
|
368
|
33
|
|
|
|
|
2869
|
my $text = ""; # default: no text to parse
|
369
|
|
|
|
|
|
|
|
370
|
33
|
100
|
|
|
|
98
|
if (defined $line) {
|
371
|
|
|
|
|
|
|
# convert to Asm::Preproc::Line if needed
|
372
|
23
|
100
|
|
|
|
91
|
ref($line) or $line = Asm::Preproc::Line->new($line);
|
373
|
23
|
|
|
|
|
56
|
$text = $line->text;
|
374
|
23
|
50
|
|
|
|
269
|
$text = "" unless defined $text; # make sure we have something
|
375
|
|
|
|
|
|
|
}
|
376
|
|
|
|
|
|
|
|
377
|
33
|
|
|
|
|
87
|
$self->_line( $line ); # line to return at each token
|
378
|
33
|
|
|
|
|
379
|
$self->{_text} = $text; # text to parse - need to reset pos()
|
379
|
|
|
|
|
|
|
|
380
|
33
|
|
|
|
|
821
|
return $line;
|
381
|
|
|
|
|
|
|
}
|
382
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
383
|
|
|
|
|
|
|
# capture the last match
|
384
|
|
|
|
|
|
|
sub _capture {
|
385
|
73
|
|
|
73
|
|
140
|
my($self, $pos0) = @_;
|
386
|
73
|
|
|
|
|
1433
|
return substr($_, $pos0, pos() - $pos0);
|
387
|
|
|
|
|
|
|
}
|
388
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
=head2 from
|
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
Inserts the given input at the head of the input queue to the tokenizer.
|
393
|
|
|
|
|
|
|
The input is either a list of L
|
394
|
|
|
|
|
|
|
objects, or an interator function that returns a
|
395
|
|
|
|
|
|
|
L object on each call.
|
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
The input list and interator can also return plain scalar strings, that
|
398
|
|
|
|
|
|
|
are converted to L on the fly, but
|
399
|
|
|
|
|
|
|
the information on input file location for error messages will not be available.
|
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
The new inserted input is processed before continuing with whatever was
|
402
|
|
|
|
|
|
|
already in the queue.
|
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
=cut
|
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
407
|
|
|
|
|
|
|
sub from {
|
408
|
13
|
|
|
13
|
1
|
3935
|
my($self, @input) = @_;
|
409
|
13
|
|
|
|
|
38
|
$self->_input->unget(@input);
|
410
|
13
|
|
|
52
|
|
679
|
$self->unget( sub { $self->_lexer->($self) } );
|
|
52
|
|
|
|
|
5500
|
|
411
|
|
|
|
|
|
|
}
|
412
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=head2 peek
|
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
Peek the Nth element from the stream, inherited from
|
417
|
|
|
|
|
|
|
L.
|
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
=head2 next
|
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
Retrieve the next token from the input strean as a
|
422
|
|
|
|
|
|
|
L object, inherited from
|
423
|
|
|
|
|
|
|
L.
|
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=head1 AUTHOR, BUGS, SUPPORT, LICENSE, COPYRIGHT
|
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
See L.
|
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=cut
|
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
1;
|