line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Perl::Tokenizer; |
2
|
|
|
|
|
|
|
|
3
|
5
|
|
|
5
|
|
335193
|
use utf8; |
|
5
|
|
|
|
|
126
|
|
|
5
|
|
|
|
|
29
|
|
4
|
5
|
|
|
5
|
|
245
|
use 5.018; |
|
5
|
|
|
|
|
23
|
|
5
|
5
|
|
|
5
|
|
32
|
use strict; |
|
5
|
|
|
|
|
12
|
|
|
5
|
|
|
|
|
123
|
|
6
|
5
|
|
|
5
|
|
29
|
use warnings; |
|
5
|
|
|
|
|
13
|
|
|
5
|
|
|
|
|
188
|
|
7
|
|
|
|
|
|
|
|
8
|
5
|
|
|
5
|
|
31
|
no warnings "experimental::smartmatch"; |
|
5
|
|
|
|
|
10
|
|
|
5
|
|
|
|
|
28772
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
require Exporter; |
11
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
12
|
|
|
|
|
|
|
our @EXPORT = qw(perl_tokens); |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
our $VERSION = '0.08'; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=encoding utf8 |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 NAME |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
Perl::Tokenizer - A tiny Perl code tokenizer. |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 VERSION |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
Version 0.08 |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=cut |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
my $make_esc_delim = sub { |
29
|
|
|
|
|
|
|
if ($_[0] eq '\\') { |
30
|
|
|
|
|
|
|
return qr{\\(.*?)\\}s; |
31
|
|
|
|
|
|
|
} |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
my $delim = quotemeta shift; |
34
|
|
|
|
|
|
|
qr{$delim([^$delim\\]*+(?>\\.|[^$delim\\]+)*+)$delim}s; |
35
|
|
|
|
|
|
|
}; |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
my $make_end_delim = sub { |
38
|
|
|
|
|
|
|
if ($_[0] eq '\\') { |
39
|
|
|
|
|
|
|
return qr{.*?\\}s; |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
my $delim = quotemeta shift; |
43
|
|
|
|
|
|
|
qr{[^$delim\\]*+(?>\\.|[^$delim\\]+)*+$delim}s; |
44
|
|
|
|
|
|
|
}; |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
my %bdelims; |
47
|
|
|
|
|
|
|
foreach my $d ([qw~< >~], [qw~( )~], [qw~{ }~], [qw~[ ]~]) { |
48
|
|
|
|
|
|
|
my @ed = map { quotemeta } @{$d}; |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
$bdelims{$d->[0]} = qr{ |
51
|
|
|
|
|
|
|
$ed[0] |
52
|
|
|
|
|
|
|
(?> |
53
|
|
|
|
|
|
|
[^$ed[0]$ed[1]\\]+ |
54
|
|
|
|
|
|
|
| |
55
|
|
|
|
|
|
|
\\. |
56
|
|
|
|
|
|
|
| |
57
|
|
|
|
|
|
|
(??{$bdelims{$d->[0]}}) |
58
|
|
|
|
|
|
|
)* |
59
|
|
|
|
|
|
|
$ed[1] |
60
|
|
|
|
|
|
|
}xs; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# string - single quote |
64
|
|
|
|
|
|
|
my $str_sq = $make_esc_delim->(q{'}); |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# string - double quote |
67
|
|
|
|
|
|
|
my $str_dq = $make_esc_delim->(q{"}); |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# backtick - backquote |
70
|
|
|
|
|
|
|
my $str_bq = $make_esc_delim->(q{`}); |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# regex - // |
73
|
|
|
|
|
|
|
my $match_re = $make_esc_delim->(q{/}); |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# glob/readline |
76
|
|
|
|
|
|
|
my $glob = $bdelims{'<'}; |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
# Cache regular expressions that are generated dynamically |
79
|
|
|
|
|
|
|
my %cache_esc; |
80
|
|
|
|
|
|
|
my %cache_end; |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# Double pairs |
83
|
|
|
|
|
|
|
my $dpairs = qr{ |
84
|
|
|
|
|
|
|
(?= |
85
|
|
|
|
|
|
|
(?(?<=\s) |
86
|
|
|
|
|
|
|
(.) |
87
|
|
|
|
|
|
|
| |
88
|
|
|
|
|
|
|
(\W) |
89
|
|
|
|
|
|
|
) |
90
|
|
|
|
|
|
|
) |
91
|
|
|
|
|
|
|
(??{$bdelims{$+} // ($cache_esc{$+} //= $make_esc_delim->($+))}) |
92
|
|
|
|
|
|
|
}x; |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# Double pairs -- comments |
95
|
|
|
|
|
|
|
my $dcomm = qr{ |
96
|
|
|
|
|
|
|
\s* (?>(?<=\s)\# (?-s:.*) \s*)* |
97
|
|
|
|
|
|
|
}x; |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
# Quote-like balanced (q{}, m//) |
100
|
|
|
|
|
|
|
my $make_single_q_balanced = sub { |
101
|
|
|
|
|
|
|
my $name = shift; |
102
|
|
|
|
|
|
|
qr{ |
103
|
|
|
|
|
|
|
$name |
104
|
|
|
|
|
|
|
$dcomm |
105
|
|
|
|
|
|
|
$dpairs |
106
|
|
|
|
|
|
|
}x; |
107
|
|
|
|
|
|
|
}; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# Quote-like balanced (q{}, m//) |
110
|
|
|
|
|
|
|
my %single_q; |
111
|
|
|
|
|
|
|
foreach my $name (qw(q qq qr qw qx m)) { |
112
|
|
|
|
|
|
|
$single_q{$name} = $make_single_q_balanced->($name); |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# First of balanced pairs |
116
|
|
|
|
|
|
|
my $bbpair = qr~[<\[\{\(]~; |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
my $make_double_q_balanced = sub { |
119
|
|
|
|
|
|
|
my $name = shift; |
120
|
|
|
|
|
|
|
qr{ |
121
|
|
|
|
|
|
|
$name |
122
|
|
|
|
|
|
|
$dcomm |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
(?(?=$bbpair) # balanced pairs (e.g.: s{}//) |
125
|
|
|
|
|
|
|
$dpairs |
126
|
|
|
|
|
|
|
$dcomm |
127
|
|
|
|
|
|
|
$dpairs |
128
|
|
|
|
|
|
|
| # or: single delims (e.g.: s///) |
129
|
|
|
|
|
|
|
$dpairs |
130
|
|
|
|
|
|
|
(??{$cache_end{$+} //= $make_end_delim->($+)}) |
131
|
|
|
|
|
|
|
) |
132
|
|
|
|
|
|
|
}x; |
133
|
|
|
|
|
|
|
}; |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# Double quote-like balanced (s{}{}, s///) |
136
|
|
|
|
|
|
|
my %double_q; |
137
|
|
|
|
|
|
|
foreach my $name (qw(tr s y)) { |
138
|
|
|
|
|
|
|
$double_q{$name} = $make_double_q_balanced->($name); |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
my $number = qr{(?=\.?[0-9])[0-9_]*(?:\.(?!\.)[0-9_]*)?(?:[Ee](?:[+-]?[0-9_]+))?}; |
142
|
|
|
|
|
|
|
my $hex_num = qr{0x[_0-9A-Fa-f]*}; |
143
|
|
|
|
|
|
|
my $binary_num = qr{0b[_01]*}; |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
my $var_name = qr{(?>\w+|(?>::)+|'(?=\w))++}; |
146
|
|
|
|
|
|
|
my $vstring = qr{\b(?:v[0-9]+(?>\.[0-9][0-9_]*+)*+ | [0-9][0-9_]*(?>\.[0-9][0-9_]*){2,})\b}x; |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
# HERE-DOC beginning |
149
|
|
|
|
|
|
|
my $bhdoc = qr{ |
150
|
|
|
|
|
|
|
<<(?>\h*(?>$str_sq|$str_dq)|\\?+(\w+)) |
151
|
|
|
|
|
|
|
}x; |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
my $tr_flags = qr{[rcds]*}; |
154
|
|
|
|
|
|
|
my $match_flags = qr{[mnsixpogcdual]*}; |
155
|
|
|
|
|
|
|
my $substitution_flags = qr{[mnsixpogcerdual]*}; |
156
|
|
|
|
|
|
|
my $compiled_regex_flags = qr{[mnsixpodual]*}; |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
my @postfix_operators = qw( ++ -- ); |
159
|
|
|
|
|
|
|
my @prec_operators = qw ( ... .. -> ++ -- =~ <=> \\ ? ~~ ~. ~ : >> >= > << <= < == != ! ); |
160
|
|
|
|
|
|
|
my @assignment_operators = qw( && || // ** % ^. ^ &. & |. | * + - = / . << >> ); |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
my $operators = do { |
163
|
|
|
|
|
|
|
local $" = '|'; |
164
|
|
|
|
|
|
|
qr{@{[map{quotemeta} @prec_operators, @assignment_operators]}}; |
165
|
|
|
|
|
|
|
}; |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
my $postfix_operators = do { |
168
|
|
|
|
|
|
|
local $" = '|'; |
169
|
|
|
|
|
|
|
qr{@{[map{quotemeta} @postfix_operators]}}; |
170
|
|
|
|
|
|
|
}; |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
my $assignment_operators = do { |
173
|
|
|
|
|
|
|
local $" = '|'; |
174
|
|
|
|
|
|
|
qr{@{[map{($_ eq '=') ? '=(?!=)' : "\Q$_=\E"} @assignment_operators]}}; |
175
|
|
|
|
|
|
|
}; |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
my @special_var_names = (qw( \\ | + / ~ ! @ $ % ^ & * ( ) } < > : ; " ` ' ? = - [ ] . ), '#', ','); |
178
|
|
|
|
|
|
|
my $special_var_names = do { |
179
|
|
|
|
|
|
|
local $" = '|'; |
180
|
|
|
|
|
|
|
qr{@{[map {quotemeta} @special_var_names]}}; |
181
|
|
|
|
|
|
|
}; |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
my $bracket_var = qr~(?=\s*\{)(?!\s*\{\s*(?:\^?$var_name|$special_var_names|\{)\s*\})~; |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
#<<< |
186
|
|
|
|
|
|
|
my $perl_keywords = |
187
|
|
|
|
|
|
|
qr/(?:CORE::)?(?>(a(?:(?:ccept|larm|tan2|bs|nd))|b(?:in(?:(?:mode|d))|less|reak)|c(?:aller|h(?:dir|mod |
188
|
|
|
|
|
|
|
|o(?:(?:m?p|wn))|r(?:oot)?)|lose(?:dir)?|mp|o(?:n(?:(?:tinue|nect))|s)|rypt)|d(?:bm(?:(?:close|open))| |
189
|
|
|
|
|
|
|
e(?:f(?:(?:ault|ined))|lete)|ie|ump|o)|e(?:ach|ls(?:(?:if|e))|nd(?:grent|hostent|netent|p(?:(?:roto|w) |
190
|
|
|
|
|
|
|
ent)|servent)|of|val|x(?:ec|i(?:(?:sts|t))|p)|q)|f(?:c(?:ntl)?|ileno|lock|or(?:(?:each|m(?:(?:line|at) |
191
|
|
|
|
|
|
|
)|k))?)|g(?:e(?:t(?:gr(?:(?:ent|gid|nam))|host(?:by(?:(?:addr|name))|ent)|login|net(?:by(?:(?:addr|nam |
192
|
|
|
|
|
|
|
e))|ent)|p(?:eername|grp|pid|r(?:iority|oto(?:byn(?:(?:umber|ame))|ent))|w(?:(?:ent|nam|uid)))|s(?:erv |
193
|
|
|
|
|
|
|
(?:by(?:(?:name|port))|ent)|ock(?:(?:name|opt)))|c))?|iven|lob|mtime|oto|rep|t)|hex|i(?:mport|n(?:(?:d |
194
|
|
|
|
|
|
|
ex|t))|octl|sa|f)|join|k(?:(?:eys|ill))|l(?:ast|c(?:first)?|e(?:ngth)?|i(?:(?:sten|nk))|o(?:c(?:al(?:t |
195
|
|
|
|
|
|
|
ime)?|k)|g)|stat|t)|m(?:ap|kdir|sg(?:(?:ctl|get|rcv|snd))|y)|n(?:e(?:xt)?|ot?)|o(?:ct|pen(?:dir)?|rd?| |
196
|
|
|
|
|
|
|
ur)|p(?:ack(?:age)?|ipe|o[ps]|r(?:(?:ototype|intf?))|ush)|quotemeta|r(?:and|e(?:ad(?:(?:(?:lin[ek]|pip |
197
|
|
|
|
|
|
|
e|dir)))?|cv|do|name|quire|set|turn|verse|winddir|f)|index|mdir)|s(?:ay|calar|e(?:ek(?:dir)?|lect|m(?: |
198
|
|
|
|
|
|
|
(?:ctl|get|op))|nd|t(?:grent|hostent|netent|p(?:grp|r(?:(?:iority|otoent))|went)|s(?:(?:erven|ockop)t) |
199
|
|
|
|
|
|
|
))|h(?:ift|m(?:(?:write|read|ctl|get))|utdown)|in|leep|o(?:cket(?:pair)?|rt)|p(?:li(?:(?:ce|t))|rintf) |
200
|
|
|
|
|
|
|
|qrt|rand|t(?:(?:ate?|udy))|ub(?:str)?|y(?:mlink|s(?:(?:write|call|open|read|seek|tem))))|t(?:ell(?:di |
201
|
|
|
|
|
|
|
r)?|i(?:(?:mes?|ed?))|runcate)|u(?:c(?:first)?|mask|n(?:def|l(?:(?:ess|ink))|pack|shift|ti[el])|se|tim |
202
|
|
|
|
|
|
|
e)|v(?:(?:alues|ec))|w(?:a(?:it(?:pid)?|ntarray|rn)|h(?:(?:ile|en))|rite)|xor|BEGIN|END|INIT|CHECK))\b |
203
|
|
|
|
|
|
|
/x; |
204
|
|
|
|
|
|
|
#>>> |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
my $perl_filetests = qr/\-[ABCMORSTWXbcdefgkloprstuwxz]/; |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
sub perl_tokens(&$) { |
209
|
66
|
|
|
66
|
1
|
18092
|
my ($callback, $code) = @_; |
210
|
|
|
|
|
|
|
|
211
|
66
|
50
|
|
|
|
209
|
ref($callback) eq 'CODE' |
212
|
|
|
|
|
|
|
or die "usage: perl_tokens {...} \$code;"; |
213
|
|
|
|
|
|
|
|
214
|
66
|
|
|
|
|
109
|
my $variable = 0; |
215
|
66
|
|
|
|
|
89
|
my $flat = 0; |
216
|
66
|
|
|
|
|
89
|
my $regex = 1; |
217
|
66
|
|
|
|
|
97
|
my $canpod = 1; |
218
|
66
|
|
|
|
|
92
|
my $proto = 0; |
219
|
66
|
|
|
|
|
93
|
my $format = 0; |
220
|
66
|
|
|
|
|
129
|
my $expect_format = 0; |
221
|
66
|
|
|
|
|
97
|
my $postfix_op = 0; |
222
|
66
|
|
|
|
|
84
|
my @heredoc_eofs; |
223
|
|
|
|
|
|
|
|
224
|
66
|
|
|
|
|
96
|
given ($code) { |
225
|
|
|
|
|
|
|
{ |
226
|
66
|
|
66
|
|
|
92
|
when (($expect_format == 1) && /\G(?=\R)/) { |
|
351
|
|
|
|
|
972
|
|
227
|
2
|
50
|
|
|
|
14
|
if (/.*?\R\.\h*(?=\R|\z)/cgs) { |
228
|
2
|
|
|
|
|
8
|
$callback->('vertical_space', $-[0], $-[0] + 1); |
229
|
2
|
|
|
|
|
811
|
$callback->('format', $-[0] + 1, $+[0]); |
230
|
2
|
|
|
|
|
836
|
$expect_format = 0; |
231
|
2
|
|
|
|
|
4
|
$canpod = 1; |
232
|
2
|
|
|
|
|
3
|
$regex = 1; |
233
|
2
|
|
|
|
|
3
|
$postfix_op = 0; |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
else { |
236
|
0
|
0
|
|
|
|
0
|
if (/\G(.)/cgs) { |
237
|
0
|
|
|
|
|
0
|
$callback->('unknown_char', $-[0], $+[0]); |
238
|
0
|
|
|
|
|
0
|
redo; |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
} |
241
|
2
|
|
|
|
|
3
|
redo; |
242
|
|
|
|
|
|
|
} |
243
|
349
|
|
100
|
|
|
922
|
when ($#heredoc_eofs >= 0 && /\G(?=\R)/) { |
244
|
3
|
|
|
|
|
9
|
my $token = shift @heredoc_eofs; |
245
|
3
|
50
|
|
|
|
98
|
if (m{\G.*?\R\Q$token\E(?=\R|\z)}sgc) { |
246
|
3
|
|
|
|
|
22
|
$callback->('vertical_space', $-[0], $-[0] + 1); |
247
|
3
|
|
|
|
|
1493
|
$callback->('heredoc', $-[0] + 1, $+[0]); |
248
|
|
|
|
|
|
|
} |
249
|
3
|
|
|
|
|
1490
|
redo; |
250
|
|
|
|
|
|
|
} |
251
|
346
|
|
100
|
|
|
3092
|
when (($regex == 1 || /\G(?!<<[0-9])/) && m{\G$bhdoc}gc) { |
|
|
|
100
|
|
|
|
|
252
|
3
|
|
|
|
|
22
|
$callback->('heredoc_beg', $-[0], $+[0]); |
253
|
3
|
|
|
|
|
1433
|
push @heredoc_eofs, $+; |
254
|
3
|
|
|
|
|
8
|
$regex = 0; |
255
|
3
|
|
|
|
|
7
|
$canpod = 0; |
256
|
3
|
|
|
|
|
10
|
redo; |
257
|
|
|
|
|
|
|
} |
258
|
343
|
|
66
|
|
|
932
|
when ($canpod == 1 && /\G^=[a-zA-Z]/cgm) { |
259
|
0
|
0
|
|
|
|
0
|
/\G.*?\R=cut\h*(?=\R|\z)/cgs || /\G.*\z/cgs; |
260
|
0
|
|
|
|
|
0
|
$callback->('pod', $-[0] - 2, $+[0]); |
261
|
0
|
|
|
|
|
0
|
redo; |
262
|
|
|
|
|
|
|
} |
263
|
343
|
|
|
|
|
907
|
when (/\G(?=\s)/) { |
264
|
40
|
|
|
|
|
145
|
when (/\G\h+/cg) { |
265
|
37
|
|
|
|
|
187
|
$callback->('horizontal_space', $-[0], $+[0]); |
266
|
37
|
|
|
|
|
18902
|
redo; |
267
|
|
|
|
|
|
|
} |
268
|
3
|
|
|
|
|
13
|
when (/\G\v+/cg) { |
269
|
3
|
|
|
|
|
23
|
$callback->('vertical_space', $-[0], $+[0]); |
270
|
3
|
|
|
|
|
1360
|
redo; |
271
|
|
|
|
|
|
|
} |
272
|
0
|
|
|
|
|
0
|
when (/\G\s+/cg) { |
273
|
0
|
|
|
|
|
0
|
$callback->({other_space => [$-[0], $+[0]]}); |
274
|
0
|
|
|
|
|
0
|
redo; |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
} |
277
|
303
|
|
|
|
|
467
|
when ($variable > 0) { |
278
|
22
|
|
33
|
|
|
228
|
when ((m{\G$var_name}gco || m{\G(?<=\$)\#$var_name}gco)) { |
279
|
22
|
|
|
|
|
138
|
$callback->('var_name', $-[0], $+[0]); |
280
|
22
|
|
|
|
|
10116
|
$regex = 0; |
281
|
22
|
|
|
|
|
50
|
$variable = 0; |
282
|
22
|
|
|
|
|
42
|
$canpod = 0; |
283
|
22
|
100
|
|
|
|
114
|
$flat = /\G(?=\s*\{)/ ? 1 : 0; |
284
|
22
|
|
|
|
|
60
|
redo; |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
when ( |
287
|
|
|
|
|
|
|
( |
288
|
|
|
|
|
|
|
m{\G(?!\$+$var_name)}o && ( m~\G(?:\s+|#?)\{\s*(?:$var_name|$special_var_names|[#{])\s*\}~goc |
289
|
|
|
|
|
|
|
|| m{\G(?:\^\w+|#(?!\{)|$special_var_names)}gco |
290
|
|
|
|
|
|
|
|| /\G#/cg) |
291
|
|
|
|
|
|
|
) |
292
|
0
|
|
0
|
|
|
0
|
) { |
|
|
|
0
|
|
|
|
|
293
|
0
|
|
|
|
|
0
|
$callback->('special_var_name', $-[0], $+[0]); |
294
|
0
|
|
|
|
|
0
|
$regex = 0; |
295
|
0
|
|
|
|
|
0
|
$canpod = 0; |
296
|
0
|
|
|
|
|
0
|
$variable = 0; |
297
|
0
|
0
|
|
|
|
0
|
$flat = /\G(?
|
298
|
0
|
|
|
|
|
0
|
redo; |
299
|
|
|
|
|
|
|
} |
300
|
0
|
|
|
|
|
0
|
continue; |
301
|
|
|
|
|
|
|
} |
302
|
281
|
|
|
|
|
478
|
when (/\G#.*/cg) { |
303
|
2
|
|
|
|
|
14
|
$callback->('comment', $-[0], $+[0]); |
304
|
2
|
|
|
|
|
881
|
redo; |
305
|
|
|
|
|
|
|
} |
306
|
279
|
|
100
|
|
|
1070
|
when (($regex == 1 and not($postfix_op)) or /\G(?=[\@\$])/) { |
|
|
|
100
|
|
|
|
|
307
|
148
|
|
|
|
|
274
|
when (/\G\$/cg) { |
308
|
21
|
|
|
|
|
100
|
$callback->('scalar_sigil', $-[0], $+[0]); |
309
|
21
|
50
|
|
|
|
10066
|
/\G$bracket_var/o || ++$variable; |
310
|
21
|
|
|
|
|
52
|
$regex = 0; |
311
|
21
|
|
|
|
|
44
|
$canpod = 0; |
312
|
21
|
|
|
|
|
33
|
$flat = 1; |
313
|
21
|
|
|
|
|
53
|
redo; |
314
|
|
|
|
|
|
|
} |
315
|
127
|
|
|
|
|
208
|
when (/\G\@/cg) { |
316
|
1
|
|
|
|
|
7
|
$callback->('array_sigil', $-[0], $+[0]); |
317
|
1
|
50
|
|
|
|
566
|
/\G$bracket_var/o || ++$variable; |
318
|
1
|
|
|
|
|
4
|
$regex = 0; |
319
|
1
|
|
|
|
|
2
|
$canpod = 0; |
320
|
1
|
|
|
|
|
2
|
$flat = 1; |
321
|
1
|
|
|
|
|
2
|
redo; |
322
|
|
|
|
|
|
|
} |
323
|
126
|
|
|
|
|
196
|
when (/\G\%/cg) { |
324
|
0
|
|
|
|
|
0
|
$callback->('hash_sigil', $-[0], $+[0]); |
325
|
0
|
0
|
|
|
|
0
|
/\G$bracket_var/o || ++$variable; |
326
|
0
|
|
|
|
|
0
|
$regex = 0; |
327
|
0
|
|
|
|
|
0
|
$canpod = 0; |
328
|
0
|
|
|
|
|
0
|
$flat = 1; |
329
|
0
|
|
|
|
|
0
|
redo; |
330
|
|
|
|
|
|
|
} |
331
|
126
|
|
|
|
|
188
|
when (/\G\*/cg) { |
332
|
0
|
|
|
|
|
0
|
$callback->('glob_sigil', $-[0], $+[0]); |
333
|
0
|
0
|
|
|
|
0
|
/\G$bracket_var/o || ++$variable; |
334
|
0
|
|
|
|
|
0
|
$regex = 0; |
335
|
0
|
|
|
|
|
0
|
$canpod = 0; |
336
|
0
|
|
|
|
|
0
|
$flat = 1; |
337
|
0
|
|
|
|
|
0
|
redo; |
338
|
|
|
|
|
|
|
} |
339
|
126
|
|
|
|
|
217
|
when (/\G&/cg) { |
340
|
0
|
|
|
|
|
0
|
$callback->('ampersand_sigil', $-[0], $+[0]); |
341
|
0
|
0
|
|
|
|
0
|
/\G$bracket_var/o || ++$variable; |
342
|
0
|
|
|
|
|
0
|
$regex = 0; |
343
|
0
|
|
|
|
|
0
|
$canpod = 0; |
344
|
0
|
|
|
|
|
0
|
$flat = 1; |
345
|
0
|
|
|
|
|
0
|
redo; |
346
|
|
|
|
|
|
|
} |
347
|
126
|
|
|
|
|
174
|
continue; |
348
|
|
|
|
|
|
|
} |
349
|
257
|
|
33
|
|
|
464
|
when ($proto == 1 && /\G\(.*?\)/cgs) { |
350
|
0
|
|
|
|
|
0
|
$callback->('sub_proto', $-[0], $+[0]); |
351
|
0
|
|
|
|
|
0
|
$proto = 0; |
352
|
0
|
|
|
|
|
0
|
$canpod = 0; |
353
|
0
|
|
|
|
|
0
|
$regex = 0; |
354
|
0
|
|
|
|
|
0
|
redo; |
355
|
|
|
|
|
|
|
} |
356
|
257
|
|
|
|
|
373
|
when (/\G\(/cg) { |
357
|
2
|
|
|
|
|
17
|
$callback->('parenthesis_open', $-[0], $+[0]); |
358
|
2
|
|
|
|
|
961
|
$regex = 1; |
359
|
2
|
|
|
|
|
7
|
$flat = 0; |
360
|
2
|
|
|
|
|
5
|
$canpod = 0; |
361
|
2
|
|
|
|
|
8
|
redo; |
362
|
|
|
|
|
|
|
} |
363
|
255
|
|
|
|
|
376
|
when (/\G\)/cg) { |
364
|
2
|
|
|
|
|
12
|
$callback->('parenthesis_close', $-[0], $+[0]); |
365
|
2
|
|
|
|
|
952
|
$regex = 0; |
366
|
2
|
|
|
|
|
5
|
$canpod = 0; |
367
|
2
|
|
|
|
|
7
|
$flat = 0; |
368
|
2
|
|
|
|
|
8
|
redo; |
369
|
|
|
|
|
|
|
} |
370
|
253
|
|
|
|
|
374
|
when (/\G\{/cg) { |
371
|
4
|
|
|
|
|
25
|
$callback->('curly_bracket_open', $-[0], $+[0]); |
372
|
4
|
|
|
|
|
1865
|
$regex = 1; |
373
|
4
|
|
|
|
|
12
|
$proto = 0; |
374
|
4
|
|
|
|
|
14
|
redo; |
375
|
|
|
|
|
|
|
} |
376
|
249
|
|
|
|
|
338
|
when (/\G\}/cg) { |
377
|
4
|
|
|
|
|
27
|
$callback->('curly_bracket_close', $-[0], $+[0]); |
378
|
4
|
|
|
|
|
2182
|
$flat = 0; |
379
|
4
|
|
|
|
|
14
|
$canpod = 1; |
380
|
4
|
|
|
|
|
11
|
redo; |
381
|
|
|
|
|
|
|
} |
382
|
245
|
|
|
|
|
330
|
when (/\G\[/cg) { |
383
|
2
|
|
|
|
|
13
|
$callback->('right_bracket_open', $-[0], $+[0]); |
384
|
2
|
|
|
|
|
1006
|
$regex = 1; |
385
|
2
|
|
|
|
|
8
|
$postfix_op = 0; |
386
|
2
|
|
|
|
|
5
|
$flat = 0; |
387
|
2
|
|
|
|
|
6
|
$canpod = 0; |
388
|
2
|
|
|
|
|
8
|
redo; |
389
|
|
|
|
|
|
|
} |
390
|
243
|
|
|
|
|
314
|
when (/\G\]/cg) { |
391
|
2
|
|
|
|
|
10
|
$callback->('right_bracket_close', $-[0], $+[0]); |
392
|
2
|
|
|
|
|
976
|
$regex = 0; |
393
|
2
|
|
|
|
|
5
|
$canpod = 0; |
394
|
2
|
|
|
|
|
6
|
$flat = 0; |
395
|
2
|
|
|
|
|
7
|
redo; |
396
|
|
|
|
|
|
|
} |
397
|
241
|
|
|
|
|
321
|
when ($proto == 0) { |
398
|
241
|
|
100
|
|
|
465
|
when ($canpod == 1 && /\Gformat\b/cg) { |
399
|
2
|
|
|
|
|
9
|
$callback->('keyword', $-[0], $+[0]); |
400
|
2
|
|
|
|
|
913
|
$regex = 0; |
401
|
2
|
|
|
|
|
3
|
$canpod = 0; |
402
|
2
|
|
|
|
|
3
|
$format = 1; |
403
|
2
|
|
|
|
|
4
|
redo; |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
when ( |
406
|
|
|
|
|
|
|
( |
407
|
|
|
|
|
|
|
$flat == 0 || ($flat == 1 |
408
|
|
|
|
|
|
|
&& (/\G(?!\w+\h*\})/)) |
409
|
|
|
|
|
|
|
) |
410
|
|
|
|
|
|
|
&& m{\G(?)$perl_keywords}gco |
411
|
239
|
|
66
|
|
|
4339
|
) { |
|
|
|
100
|
|
|
|
|
412
|
5
|
|
|
|
|
21
|
my $name = $1; |
413
|
5
|
|
|
|
|
27
|
my @pos = ($-[0], $+[0]); |
414
|
5
|
|
|
|
|
23
|
my $is_bare_word = /\G(?=\h*=>)/; |
415
|
5
|
50
|
|
|
|
27
|
$callback->(($is_bare_word ? 'bare_word' : 'keyword'), @pos); |
416
|
|
|
|
|
|
|
|
417
|
5
|
50
|
33
|
|
|
2952
|
if ($name eq 'sub' and not $is_bare_word) { |
418
|
0
|
|
|
|
|
0
|
$proto = 1; |
419
|
0
|
|
|
|
|
0
|
$regex = 0; |
420
|
|
|
|
|
|
|
} |
421
|
|
|
|
|
|
|
else { |
422
|
5
|
|
|
|
|
14
|
$regex = 1; |
423
|
5
|
|
|
|
|
13
|
$postfix_op = 0; |
424
|
|
|
|
|
|
|
} |
425
|
5
|
|
|
|
|
14
|
$canpod = 0; |
426
|
5
|
|
|
|
|
21
|
redo; |
427
|
|
|
|
|
|
|
} |
428
|
234
|
|
|
|
|
316
|
continue; |
429
|
|
|
|
|
|
|
} |
430
|
234
|
|
33
|
|
|
913
|
when (/\G(?!(?>tr|[ysm]|q[rwxq]?)\h*=>)/ && /\G(?)/) { |
431
|
|
|
|
|
|
|
|
432
|
234
|
100
|
100
|
|
|
898
|
($flat == 1 && /\G(?=[a-z]+\h*\})/) || /\G((?<=\{)|(?<=\{\h))(?=[a-z]+\h*\})/ ? continue : (); |
433
|
|
|
|
|
|
|
|
434
|
231
|
|
|
|
|
601
|
when (m{\G $double_q{s} $substitution_flags }gcxo) { |
435
|
4
|
|
|
|
|
24
|
$callback->('substitution', $-[0], $+[0]); |
436
|
4
|
|
|
|
|
1967
|
$regex = 0; |
437
|
4
|
|
|
|
|
9
|
$canpod = 0; |
438
|
4
|
|
|
|
|
8
|
redo; |
439
|
|
|
|
|
|
|
} |
440
|
227
|
|
|
|
|
875
|
when (m{\G (?> $double_q{tr} | $double_q{y} ) $tr_flags }gxco) { |
441
|
2
|
|
|
|
|
12
|
$callback->('transliteration', $-[0], $+[0]); |
442
|
2
|
|
|
|
|
1299
|
$regex = 0; |
443
|
2
|
|
|
|
|
3
|
$canpod = 0; |
444
|
2
|
|
|
|
|
6
|
redo; |
445
|
|
|
|
|
|
|
} |
446
|
225
|
|
100
|
|
|
1021
|
when ((m{\G $single_q{m} $match_flags }gcxo || ($regex == 1 && m{\G $match_re $match_flags }gcxo))) { |
|
|
|
100
|
|
|
|
|
447
|
4
|
|
|
|
|
28
|
$callback->('match_regex', $-[0], $+[0]); |
448
|
4
|
|
|
|
|
1879
|
$regex = 0; |
449
|
4
|
|
|
|
|
7
|
$canpod = 0; |
450
|
4
|
|
|
|
|
9
|
redo; |
451
|
|
|
|
|
|
|
} |
452
|
221
|
|
|
|
|
443
|
when (m{\G $single_q{qr} $compiled_regex_flags }gcxo) { |
453
|
1
|
|
|
|
|
6
|
$callback->('compiled_regex', $-[0], $+[0]); |
454
|
1
|
|
|
|
|
585
|
$regex = 0; |
455
|
1
|
|
|
|
|
3
|
$canpod = 0; |
456
|
1
|
|
|
|
|
4
|
redo; |
457
|
|
|
|
|
|
|
} |
458
|
220
|
|
|
|
|
356
|
when (m{\G$single_q{q}}gco) { |
459
|
0
|
|
|
|
|
0
|
$callback->('q_string', $-[0], $+[0]); |
460
|
0
|
|
|
|
|
0
|
$regex = 0; |
461
|
0
|
|
|
|
|
0
|
$canpod = 0; |
462
|
0
|
|
|
|
|
0
|
redo; |
463
|
|
|
|
|
|
|
} |
464
|
220
|
|
|
|
|
325
|
when (m{\G$single_q{qq}}gco) { |
465
|
0
|
|
|
|
|
0
|
$callback->('qq_string', $-[0], $+[0]); |
466
|
0
|
|
|
|
|
0
|
$regex = 0; |
467
|
0
|
|
|
|
|
0
|
$canpod = 0; |
468
|
0
|
|
|
|
|
0
|
redo; |
469
|
|
|
|
|
|
|
} |
470
|
220
|
|
|
|
|
347
|
when (m{\G$single_q{qw}}gco) { |
471
|
0
|
|
|
|
|
0
|
$callback->('qw_string', $-[0], $+[0]); |
472
|
0
|
|
|
|
|
0
|
$regex = 0; |
473
|
0
|
|
|
|
|
0
|
$canpod = 0; |
474
|
0
|
|
|
|
|
0
|
redo; |
475
|
|
|
|
|
|
|
} |
476
|
220
|
|
|
|
|
325
|
when (m{\G$single_q{qx}}gco) { |
477
|
0
|
|
|
|
|
0
|
$callback->('qx_string', $-[0], $+[0]); |
478
|
0
|
|
|
|
|
0
|
$regex = 0; |
479
|
0
|
|
|
|
|
0
|
$canpod = 0; |
480
|
0
|
|
|
|
|
0
|
redo; |
481
|
|
|
|
|
|
|
} |
482
|
220
|
|
|
|
|
316
|
continue; |
483
|
|
|
|
|
|
|
} |
484
|
223
|
|
|
|
|
461
|
when (m{\G$str_dq}gco) { |
485
|
10
|
|
|
|
|
36
|
$callback->('double_quoted_string', $-[0], $+[0]); |
486
|
10
|
|
|
|
|
4076
|
$regex = 0; |
487
|
10
|
|
|
|
|
14
|
$canpod = 0; |
488
|
10
|
|
|
|
|
14
|
$flat = 0; |
489
|
10
|
|
|
|
|
20
|
redo; |
490
|
|
|
|
|
|
|
} |
491
|
213
|
|
|
|
|
339
|
when (m{\G$str_sq}gco) { |
492
|
0
|
|
|
|
|
0
|
$callback->('single_quoted_string', $-[0], $+[0]); |
493
|
0
|
|
|
|
|
0
|
$regex = 0; |
494
|
0
|
|
|
|
|
0
|
$canpod = 0; |
495
|
0
|
|
|
|
|
0
|
$flat = 0; |
496
|
0
|
|
|
|
|
0
|
redo; |
497
|
|
|
|
|
|
|
} |
498
|
213
|
|
|
|
|
340
|
when (m{\G$str_bq}gco) { |
499
|
0
|
|
|
|
|
0
|
$callback->('backtick', $-[0], $+[0]); |
500
|
0
|
|
|
|
|
0
|
$regex = 0; |
501
|
0
|
|
|
|
|
0
|
$canpod = 0; |
502
|
0
|
|
|
|
|
0
|
$flat = 0; |
503
|
0
|
|
|
|
|
0
|
redo; |
504
|
|
|
|
|
|
|
} |
505
|
213
|
|
|
|
|
303
|
when (/\G;/cg) { |
506
|
4
|
|
|
|
|
29
|
$callback->('semicolon', $-[0], $+[0]); |
507
|
4
|
|
|
|
|
2241
|
$canpod = 1; |
508
|
4
|
|
|
|
|
15
|
$regex = 1; |
509
|
4
|
|
|
|
|
36
|
$postfix_op = 0; |
510
|
4
|
|
|
|
|
22
|
$proto = 0; |
511
|
4
|
|
|
|
|
10
|
$flat = 0; |
512
|
4
|
|
|
|
|
13
|
redo; |
513
|
|
|
|
|
|
|
} |
514
|
209
|
|
|
|
|
261
|
when (/\G=>/cg) { |
515
|
0
|
|
|
|
|
0
|
$callback->('fat_comma', $-[0], $+[0]); |
516
|
0
|
|
|
|
|
0
|
$regex = 1; |
517
|
0
|
|
|
|
|
0
|
$postfix_op = 0; |
518
|
0
|
|
|
|
|
0
|
$canpod = 0; |
519
|
0
|
|
|
|
|
0
|
$flat = 0; |
520
|
0
|
|
|
|
|
0
|
redo; |
521
|
|
|
|
|
|
|
} |
522
|
209
|
|
|
|
|
296
|
when (/\G,/cg) { |
523
|
3
|
|
|
|
|
15
|
$callback->('comma', $-[0], $+[0]); |
524
|
3
|
|
|
|
|
1465
|
$regex = 1; |
525
|
3
|
|
|
|
|
10
|
$postfix_op = 0; |
526
|
3
|
|
|
|
|
7
|
$canpod = 0; |
527
|
3
|
|
|
|
|
6
|
$flat = 0; |
528
|
3
|
|
|
|
|
9
|
redo; |
529
|
|
|
|
|
|
|
} |
530
|
206
|
|
|
|
|
436
|
when (m{\G$vstring}gco) { |
531
|
0
|
|
|
|
|
0
|
$callback->('v_string', $-[0], $+[0]); |
532
|
0
|
|
|
|
|
0
|
$regex = 0; |
533
|
0
|
|
|
|
|
0
|
$canpod = 0; |
534
|
0
|
|
|
|
|
0
|
redo; |
535
|
|
|
|
|
|
|
} |
536
|
206
|
|
|
|
|
334
|
when (m{\G$perl_filetests\b}gco) { |
537
|
1
|
|
|
|
|
6
|
my @pos = ($-[0], $+[0]); |
538
|
1
|
|
|
|
|
5
|
my $is_bare_word = /\G(?=\h*=>)/; |
539
|
1
|
50
|
|
|
|
8
|
$callback->(($is_bare_word ? 'bare_word' : 'file_test'), @pos); |
540
|
1
|
50
|
|
|
|
480
|
if ($is_bare_word) { |
541
|
0
|
|
|
|
|
0
|
$canpod = 0; |
542
|
0
|
|
|
|
|
0
|
$regex = 0; |
543
|
|
|
|
|
|
|
} |
544
|
|
|
|
|
|
|
else { |
545
|
1
|
|
|
|
|
3
|
$regex = 1; # ambiguous, but possible |
546
|
1
|
|
|
|
|
4
|
$postfix_op = 0; |
547
|
1
|
|
|
|
|
3
|
$canpod = 0; |
548
|
|
|
|
|
|
|
} |
549
|
1
|
|
|
|
|
4
|
redo; |
550
|
|
|
|
|
|
|
} |
551
|
205
|
|
|
|
|
321
|
when (/\G(?=__)/) { |
552
|
0
|
|
|
|
|
0
|
when (m{\G__(?>DATA|END)__\b\h*+(?!=>).*\z}gcs) { |
553
|
0
|
|
|
|
|
0
|
$callback->('data', $-[0], $+[0]); |
554
|
0
|
|
|
|
|
0
|
redo; |
555
|
|
|
|
|
|
|
} |
556
|
0
|
|
|
|
|
0
|
when (m{\G__(?>SUB|FILE|PACKAGE|LINE)__\b(?!\h*+=>)}gc) { |
557
|
0
|
|
|
|
|
0
|
$callback->('special_keyword', $-[0], $+[0]); |
558
|
0
|
|
|
|
|
0
|
$canpod = 0; |
559
|
0
|
|
|
|
|
0
|
$regex = 0; |
560
|
0
|
|
|
|
|
0
|
redo; |
561
|
|
|
|
|
|
|
} |
562
|
0
|
|
|
|
|
0
|
continue; |
563
|
|
|
|
|
|
|
} |
564
|
205
|
|
100
|
|
|
699
|
when ($regex == 1 && /\G(?
|
|
|
|
100
|
|
|
|
|
565
|
1
|
|
|
|
|
5
|
$callback->('glob_readline', $-[0], $+[0]); |
566
|
1
|
|
|
|
|
473
|
$regex = 0; |
567
|
1
|
|
|
|
|
4
|
$canpod = 0; |
568
|
1
|
|
|
|
|
3
|
redo; |
569
|
|
|
|
|
|
|
} |
570
|
204
|
|
|
|
|
535
|
when (m{\G$assignment_operators}gco) { |
571
|
16
|
|
|
|
|
67
|
$callback->('assignment_operator', $-[0], $+[0]); |
572
|
16
|
100
|
|
|
|
6906
|
if ($format) { |
573
|
2
|
50
|
|
|
|
12
|
if (substr($_, $-[0], $+[0] - $-[0]) eq '=') { |
574
|
2
|
|
|
|
|
4
|
$format = 0; |
575
|
2
|
|
|
|
|
3
|
$expect_format = 1; |
576
|
|
|
|
|
|
|
} |
577
|
|
|
|
|
|
|
} |
578
|
16
|
|
|
|
|
33
|
$regex = 1; |
579
|
16
|
|
|
|
|
24
|
$canpod = 0; |
580
|
16
|
|
|
|
|
23
|
$flat = 0; |
581
|
16
|
|
|
|
|
34
|
redo; |
582
|
|
|
|
|
|
|
} |
583
|
188
|
|
|
|
|
241
|
when (/\G->/cg) { |
584
|
0
|
|
|
|
|
0
|
$callback->('dereference_operator', $-[0], $+[0]); |
585
|
0
|
|
|
|
|
0
|
$regex = 0; |
586
|
0
|
|
|
|
|
0
|
$canpod = 0; |
587
|
0
|
|
|
|
|
0
|
$flat = 1; |
588
|
0
|
|
|
|
|
0
|
redo; |
589
|
|
|
|
|
|
|
} |
590
|
188
|
|
66
|
|
|
764
|
when (m{\G$operators}gco || /\Gx(?=[0-9\W])/cg) { |
591
|
50
|
|
|
|
|
191
|
$callback->('operator', $-[0], $+[0]); |
592
|
50
|
100
|
|
|
|
22051
|
if (substr($_, $-[0], ($+[0] - $-[0])) =~ /^$postfix_operators\z/) { |
593
|
5
|
|
|
|
|
14
|
$postfix_op = 1; |
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
else { |
596
|
45
|
|
|
|
|
86
|
$postfix_op = 0; |
597
|
|
|
|
|
|
|
} |
598
|
50
|
|
|
|
|
99
|
$canpod = 0; |
599
|
50
|
|
|
|
|
69
|
$regex = 1; |
600
|
50
|
|
|
|
|
68
|
$flat = 0; |
601
|
50
|
|
|
|
|
105
|
redo; |
602
|
|
|
|
|
|
|
} |
603
|
138
|
|
|
|
|
233
|
when (m{\G$hex_num}gco) { |
604
|
0
|
|
|
|
|
0
|
$callback->('hex_number', $-[0], $+[0]); |
605
|
0
|
|
|
|
|
0
|
$regex = 0; |
606
|
0
|
|
|
|
|
0
|
$canpod = 0; |
607
|
0
|
|
|
|
|
0
|
redo; |
608
|
|
|
|
|
|
|
} |
609
|
138
|
|
|
|
|
208
|
when (m{\G$binary_num}gco) { |
610
|
0
|
|
|
|
|
0
|
$callback->('binary_number', $-[0], $+[0]); |
611
|
0
|
|
|
|
|
0
|
$regex = 0; |
612
|
0
|
|
|
|
|
0
|
$canpod = 0; |
613
|
0
|
|
|
|
|
0
|
redo; |
614
|
|
|
|
|
|
|
} |
615
|
138
|
|
|
|
|
460
|
when (m{\G$number}gco) { |
616
|
66
|
|
|
|
|
243
|
$callback->('number', $-[0], $+[0]); |
617
|
66
|
|
|
|
|
28399
|
$regex = 0; |
618
|
66
|
|
|
|
|
95
|
$canpod = 0; |
619
|
66
|
|
|
|
|
128
|
redo; |
620
|
|
|
|
|
|
|
} |
621
|
72
|
|
|
|
|
113
|
when (m{\GSTD(?>OUT|ERR|IN)\b}gc) { |
622
|
2
|
|
|
|
|
12
|
$callback->('special_fh', $-[0], $+[0]); |
623
|
2
|
|
|
|
|
891
|
$regex = 1; |
624
|
2
|
|
|
|
|
6
|
$postfix_op = 0; |
625
|
2
|
|
|
|
|
3
|
$canpod = 0; |
626
|
2
|
|
|
|
|
6
|
redo; |
627
|
|
|
|
|
|
|
} |
628
|
70
|
|
|
|
|
264
|
when (m{\G$var_name}gco) { |
629
|
4
|
50
|
|
|
|
40
|
$callback->(($proto == 1 ? 'sub_name' : 'bare_word'), $-[0], $+[0]); |
630
|
4
|
|
|
|
|
1880
|
$regex = 0; |
631
|
4
|
|
|
|
|
12
|
$canpod = 0; |
632
|
4
|
|
|
|
|
8
|
$flat = 0; |
633
|
4
|
|
|
|
|
15
|
redo; |
634
|
|
|
|
|
|
|
} |
635
|
66
|
|
|
|
|
160
|
when (/\G\z/cg) { # all done |
636
|
66
|
|
|
|
|
138
|
break; |
637
|
|
|
|
|
|
|
} |
638
|
0
|
|
|
|
|
0
|
default { |
639
|
0
|
0
|
|
|
|
0
|
if (/\G(.)/cgs) { |
640
|
0
|
|
|
|
|
0
|
$callback->('unknown_char', $-[0], $+[0]); |
641
|
0
|
|
|
|
|
0
|
redo; |
642
|
|
|
|
|
|
|
} |
643
|
|
|
|
|
|
|
} |
644
|
|
|
|
|
|
|
} |
645
|
|
|
|
|
|
|
} |
646
|
|
|
|
|
|
|
|
647
|
66
|
|
|
|
|
1837
|
return pos($code); |
648
|
|
|
|
|
|
|
} |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
1; |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
=head1 SYNOPSIS |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
use Perl::Tokenizer; |
655
|
|
|
|
|
|
|
my $code = 'my $num = 42;'; |
656
|
|
|
|
|
|
|
perl_tokens { print "@_\n" } $code; |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
=head1 DESCRIPTION |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
Perl::Tokenizer is a tiny tokenizer which splits a given Perl code into a list of tokens, using the power of regular expressions. |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
=head1 SUBROUTINES |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
=over 4 |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
=item perl_tokens(&$) |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
This function takes a callback subroutine and a string. The subroutine is called for each token in real-time. |
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
perl_tokens { |
671
|
|
|
|
|
|
|
my ($token, $pos_beg, $pos_end) = @_; |
672
|
|
|
|
|
|
|
... |
673
|
|
|
|
|
|
|
} $code; |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
The positions are absolute to the string. |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
=back |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
=head2 EXPORT |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
The function B is exported by default. This is the only function provided by this module. |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
=head1 TOKENS |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
The standard token names that are available are: |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
format .................. Format text |
688
|
|
|
|
|
|
|
heredoc_beg ............. The beginning of a here-document ('<<"EOT"') |
689
|
|
|
|
|
|
|
heredoc ................. The content of a here-document |
690
|
|
|
|
|
|
|
pod ..................... An inline POD document, until '=cut' or end of the file |
691
|
|
|
|
|
|
|
horizontal_space ........ Horizontal whitespace (matched by /\h/) |
692
|
|
|
|
|
|
|
vertical_space .......... Vertical whitespace (matched by /\v/) |
693
|
|
|
|
|
|
|
other_space ............. Whitespace that is neither vertical nor horizontal (matched by /\s/) |
694
|
|
|
|
|
|
|
var_name ................ Alphanumeric name of a variable (excluding the sigil) |
695
|
|
|
|
|
|
|
special_var_name ........ Non-alphanumeric name of a variable, such as $/ or $^H (excluding the sigil) |
696
|
|
|
|
|
|
|
sub_name ................ Subroutine name |
697
|
|
|
|
|
|
|
sub_proto ............... Subroutine prototype |
698
|
|
|
|
|
|
|
comment ................. A #-to-newline comment (excluding the newline) |
699
|
|
|
|
|
|
|
scalar_sigil ............ The sigil of a scalar variable: '$' |
700
|
|
|
|
|
|
|
array_sigil ............. The sigil of an array variable: '@' |
701
|
|
|
|
|
|
|
hash_sigil .............. The sigil of a hash variable: '%' |
702
|
|
|
|
|
|
|
glob_sigil .............. The sigil of a glob symbol: '*' |
703
|
|
|
|
|
|
|
ampersand_sigil ......... The sigil of a subroutine call: '&' |
704
|
|
|
|
|
|
|
parenthesis_open ........ Open parenthesis: '(' |
705
|
|
|
|
|
|
|
parenthesis_close ....... Closed parenthesis: ')' |
706
|
|
|
|
|
|
|
right_bracket_open ...... Open right bracket: '[' |
707
|
|
|
|
|
|
|
right_bracket_close ..... Closed right bracket: ']' |
708
|
|
|
|
|
|
|
curly_bracket_open ...... Open curly bracket: '{' |
709
|
|
|
|
|
|
|
curly_bracket_close ..... Closed curly bracket: '}' |
710
|
|
|
|
|
|
|
substitution ............ Regex substitution: s/.../.../ |
711
|
|
|
|
|
|
|
transliteration.......... Transliteration: tr/.../.../ or y/.../.../ |
712
|
|
|
|
|
|
|
match_regex ............. Regex in matching context: m/.../ |
713
|
|
|
|
|
|
|
compiled_regex .......... Quoted compiled regex: qr/.../ |
714
|
|
|
|
|
|
|
q_string ................ Single quoted string: q/.../ |
715
|
|
|
|
|
|
|
qq_string ............... Double quoted string: qq/.../ |
716
|
|
|
|
|
|
|
qw_string ............... List of quoted words: qw/.../ |
717
|
|
|
|
|
|
|
qx_string ............... System command quoted string: qx/.../ |
718
|
|
|
|
|
|
|
backtick ................ Backtick system command quoted string: `...` |
719
|
|
|
|
|
|
|
single_quoted_string .... Single quoted string, as: '...' |
720
|
|
|
|
|
|
|
double_quoted_string .... Double quoted string, as: "..." |
721
|
|
|
|
|
|
|
bare_word ............... Unquoted string |
722
|
|
|
|
|
|
|
glob_readline ........... or |
723
|
|
|
|
|
|
|
v_string ................ Version string: "vX" or "X.X.X" |
724
|
|
|
|
|
|
|
file_test ............... File test operator (-X), such as: "-d", "-e", etc... |
725
|
|
|
|
|
|
|
data .................... The content of `__DATA__` or `__END__` sections |
726
|
|
|
|
|
|
|
keyword ................. Regular Perl keyword, such as: `if`, `else`, etc... |
727
|
|
|
|
|
|
|
special_keyword ......... Special Perl keyword, such as: `__PACKAGE__`, `__FILE__`, etc... |
728
|
|
|
|
|
|
|
comma ................... Comma: ',' |
729
|
|
|
|
|
|
|
fat_comma ............... Fat comma: '=>' |
730
|
|
|
|
|
|
|
operator ................ Primitive operator, such as: '+', '||', etc... |
731
|
|
|
|
|
|
|
assignment_operator ..... '=' or any assignment operator: '+=', '||=', etc... |
732
|
|
|
|
|
|
|
dereference_operator .... Arrow dereference operator: '->' |
733
|
|
|
|
|
|
|
hex_number .............. Hexadecimal literal number: 0x... |
734
|
|
|
|
|
|
|
binary_number ........... Binary literal number: 0b... |
735
|
|
|
|
|
|
|
number .................. Decimal literal number, such as 42, 3.1e4, etc... |
736
|
|
|
|
|
|
|
special_fh .............. Special file-handle name, such as 'STDIN', 'STDOUT', etc... |
737
|
|
|
|
|
|
|
unknown_char ............ Unknown or unexpected character |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
=head1 EXAMPLE |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
For this code: |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
my $num = 42; |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
it generates the following tokens: |
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
# TOKEN POS |
748
|
|
|
|
|
|
|
( keyword => ( 0, 2) ) |
749
|
|
|
|
|
|
|
( horizontal_space => ( 2, 3) ) |
750
|
|
|
|
|
|
|
( scalar_sigil => ( 3, 4) ) |
751
|
|
|
|
|
|
|
( var_name => ( 4, 7) ) |
752
|
|
|
|
|
|
|
( horizontal_space => ( 7, 8) ) |
753
|
|
|
|
|
|
|
( assignment_operator => ( 8, 9) ) |
754
|
|
|
|
|
|
|
( horizontal_space => ( 9, 10) ) |
755
|
|
|
|
|
|
|
( number => (10, 12) ) |
756
|
|
|
|
|
|
|
( semicolon => (12, 13) ) |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
=head1 REPOSITORY |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
L |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
=head1 AUTHOR |
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
Daniel "Trizen" Șuteu, Etrizenx@gmail.comE |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
Copyright (C) 2013-2017 |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
771
|
|
|
|
|
|
|
it under the same terms as Perl itself, either Perl version 5.22.0 or, |
772
|
|
|
|
|
|
|
at your option, any later version of Perl 5 you may have available. |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
=cut |