line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# tokenize PHP source files |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
package PHP::Decode::Tokenizer; |
5
|
|
|
|
|
|
|
|
6
|
7
|
|
|
7
|
|
68950
|
use strict; |
|
7
|
|
|
|
|
28
|
|
|
7
|
|
|
|
|
194
|
|
7
|
7
|
|
|
7
|
|
34
|
use warnings; |
|
7
|
|
|
|
|
17
|
|
|
7
|
|
|
|
|
21848
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = '1.47'; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
# Initialize PHP::Decode::Tokenizer |
12
|
|
|
|
|
|
|
# {inscript} - set to indicate already inside of script |
13
|
|
|
|
|
|
|
# {warn} - warning message handler |
14
|
|
|
|
|
|
|
# |
15
|
|
|
|
|
|
|
sub new { |
16
|
796
|
|
|
796
|
1
|
2911
|
my ($class, %args) = @_; |
17
|
|
|
|
|
|
|
my $self = bless { |
18
|
|
|
|
|
|
|
inscript => 0, # look for start of script |
19
|
|
|
|
0
|
|
|
warn => sub { }, |
20
|
796
|
|
|
|
|
5182
|
%args, # might override preceding keys |
21
|
|
|
|
|
|
|
}, $class; |
22
|
796
|
|
|
|
|
3769
|
return $self; |
23
|
|
|
|
|
|
|
} |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
sub inc_linenum { |
26
|
1060
|
|
|
1060
|
0
|
2762
|
my ($self, $s) = @_; |
27
|
|
|
|
|
|
|
|
28
|
1060
|
|
|
|
|
1724
|
my $off = 0; |
29
|
1060
|
|
|
|
|
3905
|
while (($off = index($s, "\n", $off)) != -1) { |
30
|
4
|
|
|
|
|
12
|
$self->add_white("\n"); |
31
|
4
|
|
|
|
|
9
|
$off++; |
32
|
|
|
|
|
|
|
} |
33
|
1060
|
|
|
|
|
1739
|
return; |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# extract variables from interpolated strings |
37
|
|
|
|
|
|
|
# |
38
|
|
|
|
|
|
|
sub expand_str { |
39
|
215
|
|
|
215
|
0
|
475
|
my ($self, $s, $off) = @_; |
40
|
215
|
|
|
|
|
358
|
my @word = (); |
41
|
215
|
|
|
|
|
289
|
my $cont; |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
# http://php.net/manual/en/language.types.string.php#language.types.string.parsing |
44
|
|
|
|
|
|
|
# for variable names see: https://www.php.net/manual/en/language.variables.basics.php |
45
|
|
|
|
|
|
|
# |
46
|
215
|
|
|
|
|
355
|
while (1) { |
47
|
269
|
100
|
|
|
|
1050
|
unless ($s =~ /\G([^\\\$\{]*)([\\\$\{]|$)/sgc) { |
48
|
1
|
|
|
|
|
4
|
last; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
#print ">>> '$1' '$2'\n"; |
51
|
|
|
|
|
|
|
#$self->inc_linenum($1); |
52
|
268
|
|
|
|
|
692
|
push(@word, $1); |
53
|
268
|
100
|
|
|
|
584
|
if ($2 ne "\\") { |
54
|
|
|
|
|
|
|
# http://php.net/manual/en/language.types.string.php#language.types.string.parsing |
55
|
249
|
100
|
|
|
|
739
|
if ($2 eq '$') { |
|
|
100
|
|
|
|
|
|
56
|
|
|
|
|
|
|
# expand simple variable |
57
|
22
|
100
|
|
|
|
95
|
if ($s =~ /\G([a-zA-Z_\x80-\xff][\w\x80-\xff]*)((\:\:|\-\>)([a-zA-Z_\x80-\xff][\w\x80-\xff]*))?/sgc) { |
58
|
19
|
|
|
|
|
43
|
my $w = join('', @word); |
59
|
19
|
100
|
|
|
|
45
|
if (defined $cont) { |
60
|
4
|
|
|
|
|
16
|
$self->add('.'); |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
#if ($w ne '') { |
63
|
19
|
|
|
|
|
82
|
$self->add_str($w); |
64
|
19
|
|
|
|
|
99
|
$self->add('.'); |
65
|
|
|
|
|
|
|
#} |
66
|
|
|
|
|
|
|
# just simple object references here |
67
|
|
|
|
|
|
|
# |
68
|
19
|
|
|
|
|
78
|
$self->add_var($1, $off); |
69
|
19
|
50
|
33
|
|
|
69
|
if (defined $2 && ($2 ne '')) { |
70
|
0
|
|
|
|
|
0
|
$self->add($3); |
71
|
0
|
|
|
|
|
0
|
$self->add_sym($4, $off); |
72
|
|
|
|
|
|
|
} |
73
|
19
|
100
|
|
|
|
54
|
if ($s =~ /\G(\[)/sgc) { |
74
|
4
|
50
|
|
|
|
24
|
if ($s =~ /\G([^"]*?)\]/sgc) { |
75
|
4
|
|
|
|
|
14
|
$self->add_open('['); |
76
|
|
|
|
|
|
|
# todo: split index special chars |
77
|
4
|
|
|
|
|
8
|
my $i = $1; |
78
|
4
|
100
|
|
|
|
18
|
if ($i =~ /^([0-9]+)$/) { |
|
|
50
|
|
|
|
|
|
79
|
2
|
|
|
|
|
7
|
$self->add_num($1); |
80
|
|
|
|
|
|
|
} elsif ($i =~ /^'(.*)'$/) { |
81
|
0
|
|
|
|
|
0
|
$self->add_str($1); |
82
|
|
|
|
|
|
|
} else { |
83
|
2
|
|
|
|
|
6
|
$self->add_sym($i, $off); |
84
|
|
|
|
|
|
|
} |
85
|
4
|
|
|
|
|
10
|
$self->add_close(']'); |
86
|
|
|
|
|
|
|
} else { |
87
|
0
|
|
|
|
|
0
|
push(@word, '['); |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
} |
90
|
19
|
|
|
|
|
28
|
$cont = 1; |
91
|
19
|
|
|
|
|
40
|
@word = (); |
92
|
|
|
|
|
|
|
} else { |
93
|
3
|
|
|
|
|
8
|
push(@word, '$'); |
94
|
|
|
|
|
|
|
} |
95
|
22
|
|
|
|
|
35
|
next; |
96
|
|
|
|
|
|
|
} elsif ($2 eq '{') { |
97
|
|
|
|
|
|
|
# expand complex variable |
98
|
13
|
100
|
|
|
|
41
|
if ($s =~ /\G(\$)/sgc) { |
99
|
5
|
50
|
|
|
|
22
|
if ($s =~ /\G([^"]*?)\}/sgc) { |
100
|
5
|
|
|
|
|
11
|
my $w = join('', @word); |
101
|
5
|
100
|
|
|
|
13
|
if (defined $cont) { |
102
|
1
|
|
|
|
|
5
|
$self->add('.'); |
103
|
|
|
|
|
|
|
} |
104
|
5
|
100
|
|
|
|
12
|
if ($w ne '') { |
105
|
4
|
|
|
|
|
15
|
$self->add_str($w); |
106
|
4
|
|
|
|
|
11
|
$self->add('.'); |
107
|
|
|
|
|
|
|
} |
108
|
5
|
|
|
|
|
12
|
my $v = $1; |
109
|
5
|
50
|
|
|
|
28
|
if ($v =~ /^([a-zA-Z_\x80-\xff][\w\x80-\xff]*)((\:\:|\-\>)([a-zA-Z_\x80-\xff][\w\x80-\xff]*))?(\[(.*?)\])?$/) { |
110
|
|
|
|
|
|
|
# split object references |
111
|
|
|
|
|
|
|
# |
112
|
5
|
|
|
|
|
18
|
$self->add_var($1, $off); |
113
|
5
|
50
|
33
|
|
|
16
|
if (defined $2 && ($2 ne '')) { |
114
|
0
|
|
|
|
|
0
|
$self->add($3); |
115
|
0
|
|
|
|
|
0
|
$self->add_sym($4, $off); |
116
|
|
|
|
|
|
|
} |
117
|
5
|
100
|
|
|
|
12
|
if (defined $5) { |
118
|
2
|
|
|
|
|
8
|
my $i = $6; |
119
|
2
|
|
|
|
|
10
|
$self->add_open('['); |
120
|
|
|
|
|
|
|
# todo: split var special chars |
121
|
2
|
50
|
|
|
|
12
|
if ($i =~ /^([0-9]+)$/) { |
|
|
0
|
|
|
|
|
|
122
|
2
|
|
|
|
|
6
|
$self->add_num($1); |
123
|
|
|
|
|
|
|
} elsif ($i =~ /^'(.*)'$/) { |
124
|
0
|
|
|
|
|
0
|
$self->add_str($1); |
125
|
|
|
|
|
|
|
} else { |
126
|
0
|
|
|
|
|
0
|
$self->add_sym($i, $off); |
127
|
|
|
|
|
|
|
} |
128
|
2
|
|
|
|
|
14
|
$self->add_close(']'); |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
} else { |
131
|
0
|
|
|
|
|
0
|
$self->add_var($v, $off); |
132
|
|
|
|
|
|
|
} |
133
|
5
|
|
|
|
|
7
|
$cont = 1; |
134
|
5
|
|
|
|
|
14
|
@word = (); |
135
|
|
|
|
|
|
|
} else { |
136
|
0
|
|
|
|
|
0
|
push(@word, '{$'); |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
} else { |
139
|
8
|
|
|
|
|
14
|
push(@word, '{'); |
140
|
|
|
|
|
|
|
} |
141
|
13
|
|
|
|
|
26
|
next; |
142
|
|
|
|
|
|
|
} |
143
|
214
|
|
|
|
|
368
|
last; |
144
|
|
|
|
|
|
|
} |
145
|
19
|
|
|
|
|
62
|
$s =~ /\G(.)/sgc; |
146
|
|
|
|
|
|
|
# escape characters in double quoted strings: |
147
|
|
|
|
|
|
|
# http://php.net/manual/de/language.types.string.php |
148
|
|
|
|
|
|
|
# http://perldoc.perl.org/perlrebackslash.html |
149
|
|
|
|
|
|
|
# |
150
|
19
|
50
|
66
|
|
|
184
|
if ($1 eq 'n') { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
151
|
0
|
|
|
|
|
0
|
push(@word, "\n"); |
152
|
|
|
|
|
|
|
} elsif ($1 eq 'r') { |
153
|
0
|
|
|
|
|
0
|
push(@word, "\r"); |
154
|
|
|
|
|
|
|
} elsif ($1 eq 't') { |
155
|
0
|
|
|
|
|
0
|
push(@word, "\t"); |
156
|
|
|
|
|
|
|
} elsif ($1 eq 'v') { |
157
|
0
|
|
|
|
|
0
|
push(@word, chr(0x0b)); |
158
|
|
|
|
|
|
|
} elsif ($1 eq 'f') { |
159
|
0
|
|
|
|
|
0
|
push(@word, "\f"); |
160
|
|
|
|
|
|
|
} elsif ($1 eq "\\") { |
161
|
5
|
|
|
|
|
10
|
push(@word, "\\"); |
162
|
|
|
|
|
|
|
} elsif ($1 eq '$') { |
163
|
0
|
|
|
|
|
0
|
push(@word, '$'); |
164
|
|
|
|
|
|
|
} elsif ($1 eq '"') { |
165
|
6
|
|
|
|
|
12
|
push(@word, '"'); |
166
|
|
|
|
|
|
|
} elsif ($1 eq 'x') { |
167
|
5
|
|
|
|
|
19
|
$s =~ /\G([0-9a-fA-F]{1,2})/sgc; |
168
|
5
|
50
|
|
|
|
15
|
if (defined $1) { |
169
|
5
|
|
|
|
|
19
|
push(@word, chr(hex($1))); |
170
|
|
|
|
|
|
|
} else { |
171
|
0
|
|
|
|
|
0
|
push(@word, "\\".'x'); |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
} elsif (($1 ge '0') && ($1 le '7')) { |
174
|
2
|
|
|
|
|
7
|
my $o = $1; |
175
|
2
|
|
|
|
|
8
|
$s =~ /\G([0-7]{0,2})/sgc; |
176
|
2
|
|
|
|
|
6
|
$o .= $1; |
177
|
2
|
|
|
|
|
12
|
push(@word, chr(oct($o))); |
178
|
|
|
|
|
|
|
} else { |
179
|
1
|
|
|
|
|
5
|
push(@word, "\\".$1); |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
} |
182
|
215
|
|
|
|
|
467
|
my $w = join('', @word); |
183
|
215
|
100
|
|
|
|
415
|
if (defined $cont) { |
184
|
19
|
100
|
|
|
|
51
|
if ($w ne '') { |
185
|
8
|
|
|
|
|
27
|
$self->add('.'); |
186
|
8
|
|
|
|
|
53
|
$self->add_str($w, $off); |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
} else { |
189
|
196
|
|
|
|
|
557
|
$self->add_str($w, $off); |
190
|
|
|
|
|
|
|
} |
191
|
215
|
|
|
|
|
531
|
return; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
# split into tokens, unquote quoted fields, and allow escapes |
195
|
|
|
|
|
|
|
# |
196
|
|
|
|
|
|
|
sub tokenize_line { |
197
|
786
|
|
|
786
|
1
|
3051
|
my ($self, $line, $quote) = @_; |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
# use regex to tokenize (https://perldoc.perl.org/perlrequick) |
200
|
|
|
|
|
|
|
# |
201
|
|
|
|
|
|
|
# The most interesting behaviour of /PAT/g is when not in list context, |
202
|
|
|
|
|
|
|
# but in scalar context. In that case, the next regexp can continue |
203
|
|
|
|
|
|
|
# where the previous one left off, using the \G anchor. Conceptually, |
204
|
|
|
|
|
|
|
# it is the same as the ^ anchor, which anchors at the beginning of the |
205
|
|
|
|
|
|
|
# string - except now it anchors on the current value of pos(), for |
206
|
|
|
|
|
|
|
# this string, which is at the end of where the previous pattern |
207
|
|
|
|
|
|
|
# matched. The /c modifier prevents reset of the pos pointer # to 0 |
208
|
|
|
|
|
|
|
# when the match fails. |
209
|
|
|
|
|
|
|
# |
210
|
|
|
|
|
|
|
# Such a lexer could look like this: |
211
|
|
|
|
|
|
|
# |
212
|
|
|
|
|
|
|
# $_ = 'unquoted text: "quoted text."'; |
213
|
|
|
|
|
|
|
# while (1) { |
214
|
|
|
|
|
|
|
# /\G\s*(?=\S)/gc or last; |
215
|
|
|
|
|
|
|
# if (/\G(\w+)/gc) { |
216
|
|
|
|
|
|
|
# print "Found word: $1\n"; |
217
|
|
|
|
|
|
|
# } elsif (/\G(['"])/gc) { |
218
|
|
|
|
|
|
|
# print "Found quote: $1\n"; |
219
|
|
|
|
|
|
|
# } elsif (/\G([.,;:!?])/gc) { |
220
|
|
|
|
|
|
|
# print "Found punctuation: $1\n"; |
221
|
|
|
|
|
|
|
# } else { |
222
|
|
|
|
|
|
|
# /\G(?=(\S+))/gc; |
223
|
|
|
|
|
|
|
# printf "Unknown token: %s +(pos %d)", $1, pos; |
224
|
|
|
|
|
|
|
# } |
225
|
|
|
|
|
|
|
# } |
226
|
|
|
|
|
|
|
# |
227
|
786
|
|
|
|
|
1707
|
$_ = $line; |
228
|
786
|
|
|
|
|
1190
|
WORD: while (1) { |
229
|
18419
|
100
|
|
|
|
34939
|
if (!$self->{inscript}) { |
230
|
18
|
|
|
|
|
32
|
my @seq = (); |
231
|
18
|
|
|
|
|
28
|
my $sym; |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
# parse everything between ?> and as string |
234
|
|
|
|
|
|
|
# and convert to 'echo #str;' |
235
|
|
|
|
|
|
|
# |
236
|
18
|
|
|
|
|
25
|
NOSCRIPT: while (1) { |
237
|
32
|
100
|
|
|
|
146
|
if (defined $quote) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
238
|
2
|
|
|
|
|
3
|
my @word = (); |
239
|
2
|
100
|
|
|
|
7
|
if ($quote eq '\'') { |
240
|
1
|
|
|
|
|
3
|
while (1) { |
241
|
3
|
50
|
|
|
|
40
|
unless (/\G([^$quote\\]*)([$quote\\])/sgc) { |
242
|
0
|
|
|
|
|
0
|
push(@seq, $quote); |
243
|
|
|
|
|
|
|
#push(@seq, join("", @word)); |
244
|
0
|
|
|
|
|
0
|
push(@seq, @word); |
245
|
0
|
|
|
|
|
0
|
$self->add_noscript(join('', @seq), pos); |
246
|
0
|
|
|
|
|
0
|
last NOSCRIPT; |
247
|
|
|
|
|
|
|
} |
248
|
3
|
|
|
|
|
12
|
$self->inc_linenum($1); |
249
|
3
|
|
|
|
|
7
|
push(@word, $1); |
250
|
3
|
100
|
|
|
|
12
|
last if ($2 ne "\\"); |
251
|
2
|
|
|
|
|
6
|
/\G(.)/sgc; |
252
|
2
|
|
|
|
|
6
|
push(@word, "\\".$1); |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
} else { |
255
|
1
|
|
|
|
|
2
|
while (1) { |
256
|
|
|
|
|
|
|
# strings are expanded single quoted, so |
257
|
|
|
|
|
|
|
# escape single quotes in other strings |
258
|
|
|
|
|
|
|
# |
259
|
3
|
50
|
|
|
|
41
|
unless (/\G([^$quote'\\]*)([$quote'\\])/sgc) { |
260
|
0
|
|
|
|
|
0
|
push(@seq, $quote); |
261
|
|
|
|
|
|
|
#push(@seq, join("", @word)); |
262
|
0
|
|
|
|
|
0
|
push(@seq, @word); |
263
|
0
|
|
|
|
|
0
|
$self->add_noscript(join('', @seq), pos); |
264
|
0
|
|
|
|
|
0
|
last NOSCRIPT; |
265
|
|
|
|
|
|
|
} |
266
|
3
|
|
|
|
|
11
|
$self->inc_linenum($1); |
267
|
3
|
|
|
|
|
7
|
push(@word, $1); |
268
|
3
|
50
|
|
|
|
9
|
if ($2 eq '\'') { |
269
|
0
|
|
|
|
|
0
|
push(@word, "\\".$2); |
270
|
0
|
|
|
|
|
0
|
next; |
271
|
|
|
|
|
|
|
} |
272
|
3
|
100
|
|
|
|
9
|
last if ($2 ne "\\"); |
273
|
2
|
|
|
|
|
6
|
/\G(.)/sgc; |
274
|
2
|
|
|
|
|
5
|
push(@word, "\\".$1); |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
} |
277
|
2
|
|
|
|
|
4
|
push(@seq, $quote); |
278
|
2
|
|
|
|
|
6
|
push(@seq, @word); |
279
|
2
|
|
|
|
|
4
|
push(@seq, $quote); |
280
|
2
|
|
|
|
|
4
|
$quote = undef; |
281
|
|
|
|
|
|
|
} elsif (/\G(["'`])/sgc) { |
282
|
2
|
|
|
|
|
4
|
$quote = $1; |
283
|
|
|
|
|
|
|
} elsif (/\G(\\)/sgc) { |
284
|
0
|
|
|
|
|
0
|
/\G(.)/sgc; |
285
|
0
|
0
|
|
|
|
0
|
if (defined $1) { |
286
|
0
|
|
|
|
|
0
|
push(@seq, "\\".$1); |
287
|
|
|
|
|
|
|
} else { |
288
|
0
|
|
|
|
|
0
|
$self->add_noscript(join('', @seq), pos); |
289
|
0
|
|
|
|
|
0
|
last; |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
} elsif (/\G(<)/sgc) { |
292
|
9
|
|
|
|
|
21
|
my $cur = pos; |
293
|
9
|
100
|
|
|
|
38
|
if (/\G(\?php)/sgci) { |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
294
|
3
|
|
|
|
|
11
|
my $s = join('', @seq); |
295
|
3
|
50
|
|
|
|
27
|
if ($s ne '') { |
296
|
0
|
|
|
|
|
0
|
$self->add_noscript($s, $cur-2); |
297
|
|
|
|
|
|
|
} |
298
|
3
|
|
|
|
|
21
|
$self->add_script_start('
|
299
|
3
|
|
|
|
|
8
|
$self->{inscript} = 1; |
300
|
3
|
|
|
|
|
7
|
last; |
301
|
|
|
|
|
|
|
} elsif (/\G(\?)/sgc) { |
302
|
6
|
|
|
|
|
15
|
my $s = join('', @seq); |
303
|
6
|
100
|
|
|
|
17
|
if ($s ne '') { |
304
|
3
|
|
|
|
|
12
|
$self->add_noscript($s, $cur-2); |
305
|
|
|
|
|
|
|
} |
306
|
6
|
|
|
|
|
24
|
$self->add_script_start('', $cur-1); |
307
|
6
|
|
|
|
|
12
|
$self->{inscript} = 1; |
308
|
6
|
|
|
|
|
15
|
last; |
309
|
|
|
|
|
|
|
} elsif (/\G(\?=)/sgc) { |
310
|
|
|
|
|
|
|
# short_open_tag for echo since php-5.4 always avail |
311
|
0
|
|
|
|
|
0
|
my $s = join('', @seq); |
312
|
0
|
0
|
|
|
|
0
|
if ($s ne '') { |
313
|
0
|
|
|
|
|
0
|
$self->add_noscript($s, $cur-2); |
314
|
|
|
|
|
|
|
} |
315
|
0
|
|
|
|
|
0
|
$self->add_script_start('', $cur-1); |
316
|
0
|
|
|
|
|
0
|
$self->add_sym('echo'); |
317
|
0
|
|
|
|
|
0
|
$self->{inscript} = 1; |
318
|
0
|
|
|
|
|
0
|
last; |
319
|
|
|
|
|
|
|
} elsif (/\G(script\s*language\s*=\s*["']php["']\s*>)/sgci) { |
320
|
|
|
|
|
|
|
# |
321
|
|
|
|
|
|
|
# |
322
|
0
|
|
|
|
|
0
|
my $s = join('', @seq); |
323
|
0
|
0
|
|
|
|
0
|
if ($s ne '') { |
324
|
0
|
|
|
|
|
0
|
$self->add_noscript($s, $cur-2); |
325
|
|
|
|
|
|
|
} |
326
|
0
|
|
|
|
|
0
|
$self->add_script_start('
|
327
|
0
|
|
|
|
|
0
|
$self->{inscript} = 2; |
328
|
0
|
|
|
|
|
0
|
last; |
329
|
|
|
|
|
|
|
} else { |
330
|
0
|
|
|
|
|
0
|
push(@seq, '<'); |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
} else { |
333
|
19
|
100
|
|
|
|
54
|
unless (/\G([^"'`'<\\]+)/sgc) { |
334
|
|
|
|
|
|
|
#/\G(.)/sgc; |
335
|
|
|
|
|
|
|
#printf ">> WARN: parse end after: [0x%02x] %s\n", ord($1), $self->tok_dump(); |
336
|
9
|
|
|
|
|
22
|
my $s = join('', @seq); |
337
|
9
|
100
|
|
|
|
24
|
if ($s ne '') { |
338
|
5
|
|
|
|
|
19
|
$self->add_noscript($s, pos); |
339
|
|
|
|
|
|
|
} |
340
|
9
|
|
|
|
|
20
|
last; |
341
|
|
|
|
|
|
|
} |
342
|
10
|
|
|
|
|
29
|
push(@seq, $1); |
343
|
|
|
|
|
|
|
} |
344
|
|
|
|
|
|
|
} |
345
|
|
|
|
|
|
|
} |
346
|
|
|
|
|
|
|
#printf ">>>>> [pos: %d] %s\n", pos, $self->tok_dump(); |
347
|
18419
|
|
|
|
|
25657
|
my $cur = pos; |
348
|
|
|
|
|
|
|
|
349
|
18419
|
100
|
|
|
|
121685
|
if (defined $quote) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
350
|
|
|
|
|
|
|
# handle quotes |
351
|
|
|
|
|
|
|
# |
352
|
1014
|
|
|
|
|
1698
|
my @word = (); |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
# before php 5.3 php silently truncated strings |
355
|
|
|
|
|
|
|
# after a 0-byte (0-byte poisioning) |
356
|
|
|
|
|
|
|
# https://bugs.php.net/bug.php?id=39863 |
357
|
|
|
|
|
|
|
# |
358
|
1014
|
100
|
|
|
|
2166
|
if ($quote eq "\"") { |
359
|
|
|
|
|
|
|
# In the first pass, just scan for the end of the string, and |
360
|
|
|
|
|
|
|
# in the second pass expand variables and other escape codes. |
361
|
|
|
|
|
|
|
# (keep escapes intact here) |
362
|
|
|
|
|
|
|
# |
363
|
213
|
|
|
|
|
366
|
while (1) { |
364
|
232
|
50
|
|
|
|
1371
|
unless (/\G([^$quote\\]*)([$quote\\])/sgc) { |
365
|
0
|
|
|
|
|
0
|
last WORD; |
366
|
|
|
|
|
|
|
} |
367
|
232
|
|
|
|
|
713
|
$self->inc_linenum($1); |
368
|
232
|
|
|
|
|
497
|
push(@word, $1); |
369
|
232
|
100
|
|
|
|
718
|
last if ($2 ne "\\"); |
370
|
19
|
|
|
|
|
65
|
/\G(.)/sgc; |
371
|
19
|
|
|
|
|
56
|
push(@word, "\\".$1); |
372
|
|
|
|
|
|
|
} |
373
|
213
|
|
|
|
|
710
|
$self->expand_str(join('', @word), $cur); |
374
|
|
|
|
|
|
|
} else { |
375
|
801
|
|
|
|
|
1189
|
while (1) { |
376
|
815
|
50
|
|
|
|
4186
|
unless (/\G([^$quote\\]*)([$quote\\])/sgc) { |
377
|
0
|
|
|
|
|
0
|
last WORD; |
378
|
|
|
|
|
|
|
} |
379
|
815
|
|
|
|
|
2733
|
$self->inc_linenum($1); |
380
|
815
|
|
|
|
|
1661
|
push(@word, $1); |
381
|
815
|
100
|
|
|
|
2328
|
last if ($2 ne "\\"); |
382
|
14
|
|
|
|
|
77
|
/\G(.)/sgc; |
383
|
14
|
100
|
100
|
|
|
57
|
if (($1 eq '\\') || ($1 eq $quote)) { |
384
|
9
|
|
|
|
|
21
|
push(@word, $1); |
385
|
|
|
|
|
|
|
} else { |
386
|
5
|
|
|
|
|
19
|
push(@word, "\\".$1); |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
} |
389
|
801
|
|
|
|
|
2778
|
$self->add_str(join('', @word), $cur); |
390
|
|
|
|
|
|
|
} |
391
|
1014
|
|
|
|
|
2091
|
$quote = undef; |
392
|
|
|
|
|
|
|
} elsif (/\G(["'`])/sgc) { |
393
|
1014
|
|
|
|
|
2065
|
$quote = $1; |
394
|
|
|
|
|
|
|
} elsif (/\G(\/)/sgc) { |
395
|
9
|
100
|
|
|
|
41
|
if (/\G(\*)/sgc) { |
|
|
100
|
|
|
|
|
|
396
|
|
|
|
|
|
|
# parse /* ... */ comments |
397
|
1
|
50
|
|
|
|
6
|
unless (/\G(.*?)\*\//sgc) { |
398
|
0
|
|
|
|
|
0
|
$quote = '*'; |
399
|
0
|
|
|
|
|
0
|
last WORD; |
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
# insert as string without newlines |
402
|
1
|
|
|
|
|
4
|
my $s = $1; |
403
|
1
|
|
|
|
|
5
|
$self->inc_linenum($s); |
404
|
1
|
|
|
|
|
3
|
$s =~ s/[\r\n]/ /g; |
405
|
1
|
|
|
|
|
6
|
$self->add_comment($s, pos); |
406
|
|
|
|
|
|
|
} elsif (/\G(\/)/gc) { |
407
|
|
|
|
|
|
|
# skip comments |
408
|
|
|
|
|
|
|
# parse // comments (up to ?> or line end) |
409
|
|
|
|
|
|
|
# |
410
|
|
|
|
|
|
|
# ?> tags have a higher priority and stop the comment |
411
|
|
|
|
|
|
|
# http://php.net/manual/en/language.basic-syntax.comments.php |
412
|
|
|
|
|
|
|
# |
413
|
7
|
50
|
|
|
|
35
|
unless (/\G(.*?)(\?>|\n|\r\n|\0|$)/gc) { |
414
|
0
|
|
|
|
|
0
|
last WORD; |
415
|
|
|
|
|
|
|
} |
416
|
7
|
|
|
|
|
30
|
$self->add_comment($1, pos); |
417
|
7
|
100
|
|
|
|
35
|
if ($2 eq '?>') { |
|
|
50
|
|
|
|
|
|
418
|
1
|
|
|
|
|
5
|
$self->add_script_end('?>', pos); |
419
|
1
|
|
|
|
|
4
|
$self->{inscript} = 0; |
420
|
|
|
|
|
|
|
} elsif ($2 eq "\n") { |
421
|
0
|
|
|
|
|
0
|
$self->add_white($2); |
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
} else { |
424
|
1
|
|
|
|
|
5
|
$self->add('/'); |
425
|
|
|
|
|
|
|
} |
426
|
|
|
|
|
|
|
} elsif (/\G(#)/gc) { |
427
|
|
|
|
|
|
|
#if (/\G((str|num|const|arr|fun|class|trait|call|elem|expr|stmt|blk|ref|obj|scope|ns)\d+)/gc) { |
428
|
|
|
|
|
|
|
# # parse inserted #str symbols (might be useful for sub evals) |
429
|
|
|
|
|
|
|
# $self->add('#'.$1); |
430
|
|
|
|
|
|
|
#} else { |
431
|
|
|
|
|
|
|
# parse # comments (up to ?> or line end) |
432
|
|
|
|
|
|
|
# |
433
|
2
|
50
|
|
|
|
12
|
unless (/\G(.*?)(\?>|\n|\r\n|\0)/gc) { |
434
|
2
|
|
|
|
|
5
|
last WORD; |
435
|
|
|
|
|
|
|
} |
436
|
0
|
|
|
|
|
0
|
$self->add_comment($1, pos); |
437
|
0
|
0
|
|
|
|
0
|
if ($2 eq '?>') { |
438
|
0
|
|
|
|
|
0
|
$self->add_script_end('?>', pos); |
439
|
0
|
|
|
|
|
0
|
$self->{inscript} = 0; |
440
|
0
|
0
|
|
|
|
0
|
if (/\G(\n|\r\n)/sgc) { |
441
|
0
|
|
|
|
|
0
|
$self->add_white("\n"); |
442
|
|
|
|
|
|
|
} |
443
|
|
|
|
|
|
|
} |
444
|
|
|
|
|
|
|
#} |
445
|
|
|
|
|
|
|
} elsif (/\G(\n)/sgc) { |
446
|
|
|
|
|
|
|
# skip whitespace token |
447
|
2
|
|
|
|
|
6
|
$self->add_white($1); |
448
|
|
|
|
|
|
|
} elsif (/\G([^\S\n]+)/sgc) { |
449
|
|
|
|
|
|
|
# skip whitespace token |
450
|
3489
|
|
|
|
|
8522
|
$self->add_white($1); |
451
|
|
|
|
|
|
|
} elsif (/\G([\x01-\x1f\x7f])/sgc) { |
452
|
|
|
|
|
|
|
# skip non printable token |
453
|
0
|
|
|
|
|
0
|
$self->add_white($1); |
454
|
|
|
|
|
|
|
} elsif (/\G(<)/sgc) { |
455
|
41
|
50
|
33
|
|
|
423
|
if (/\G(\?php)/sgci) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
456
|
0
|
0
|
|
|
|
0
|
if ($self->tok_count() > 0) { |
457
|
0
|
|
|
|
|
0
|
$self->add_bad_open('
|
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
} elsif (/\G(\?)/sgc) { |
460
|
0
|
0
|
|
|
|
0
|
if ($self->tok_count() > 0) { |
461
|
0
|
|
|
|
|
0
|
$self->add_bad_open(''); |
462
|
|
|
|
|
|
|
} |
463
|
|
|
|
|
|
|
} elsif (/\G(\?=)/sgc) { |
464
|
0
|
0
|
|
|
|
0
|
if ($self->tok_count() > 0) { |
465
|
0
|
|
|
|
|
0
|
$self->add_bad_open('='); |
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
} elsif (/\G(script\s*language\s*=\s*["']php["']\s*>)/sgci) { |
468
|
0
|
0
|
|
|
|
0
|
if ($self->tok_count() > 0) { |
469
|
0
|
|
|
|
|
0
|
$self->add_bad_open(' |
473
|
|
|
|
|
|
|
# |
474
|
0
|
|
|
|
|
0
|
$self->add_script_end('?>', pos); |
475
|
0
|
|
|
|
|
0
|
$self->{inscript} = 0; |
476
|
0
|
0
|
|
|
|
0
|
if (/\G(\n|\r\n)/sgc) { |
477
|
0
|
|
|
|
|
0
|
$self->add_white("\n"); |
478
|
|
|
|
|
|
|
} |
479
|
|
|
|
|
|
|
} elsif (/\G(<<)/sgc) { |
480
|
|
|
|
|
|
|
# heredoc or nowdoc |
481
|
|
|
|
|
|
|
# http://php.net/manual/de/language.types.string.php |
482
|
|
|
|
|
|
|
# |
483
|
7
|
50
|
|
7
|
|
4292
|
if (/\G([\w\pL]+)(\n|\r\n)/sgc) { |
|
7
|
0
|
|
|
|
104
|
|
|
7
|
|
|
|
|
136
|
|
|
2
|
|
|
|
|
11
|
|
484
|
2
|
|
|
|
|
5
|
my $e = $1; |
485
|
2
|
|
|
|
|
6
|
$self->inc_linenum("\n"); |
486
|
2
|
50
|
|
|
|
46
|
unless (/\G(.*?\n)$e(\;)?(\n|\r\n|\0|$)/sgc) { |
487
|
0
|
|
|
|
|
0
|
last WORD; |
488
|
|
|
|
|
|
|
} |
489
|
2
|
|
|
|
|
8
|
$self->inc_linenum($1); |
490
|
2
|
|
|
|
|
11
|
$self->expand_str($1, $cur); |
491
|
|
|
|
|
|
|
#$self->add_str($1); |
492
|
2
|
|
|
|
|
6
|
$self->inc_linenum($3); |
493
|
|
|
|
|
|
|
} elsif (/\G\'([\w\pL]+)\'(\n|\r\n)/sgc) { |
494
|
0
|
|
|
|
|
0
|
my $e = $1; |
495
|
0
|
|
|
|
|
0
|
$self->inc_linenum("\n"); |
496
|
0
|
0
|
|
|
|
0
|
unless (/\G(.*?\n)$e(\;)?(\n|\r\n|\0|$)/sgc) { |
497
|
0
|
|
|
|
|
0
|
last WORD; |
498
|
|
|
|
|
|
|
} |
499
|
0
|
|
|
|
|
0
|
$self->inc_linenum($1); |
500
|
0
|
|
|
|
|
0
|
$self->add_str($1); |
501
|
0
|
|
|
|
|
0
|
$self->inc_linenum($3); |
502
|
|
|
|
|
|
|
} else { |
503
|
0
|
|
|
|
|
0
|
$self->add('<<<'); |
504
|
|
|
|
|
|
|
} |
505
|
|
|
|
|
|
|
} else { |
506
|
39
|
|
|
|
|
105
|
$self->add('<'); |
507
|
|
|
|
|
|
|
} |
508
|
|
|
|
|
|
|
} elsif (/\G(\?)/sgc) { |
509
|
38
|
100
|
|
|
|
102
|
if (/\G(>)/sgc) { |
510
|
|
|
|
|
|
|
# http://php.net/manual/en/language.basic-syntax.instruction-separation.php |
511
|
|
|
|
|
|
|
# the closing tag includes an optional immediately following newline |
512
|
|
|
|
|
|
|
# |
513
|
14
|
|
|
|
|
56
|
$self->add_script_end('?>', pos); |
514
|
14
|
|
|
|
|
34
|
$self->{inscript} = 0; |
515
|
14
|
50
|
|
|
|
51
|
if (/\G(\n|\r\n)/sgc) { |
516
|
0
|
|
|
|
|
0
|
$self->add_white("\n"); |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
} else { |
519
|
24
|
|
|
|
|
63
|
$self->add('?'); |
520
|
|
|
|
|
|
|
} |
521
|
|
|
|
|
|
|
} elsif (/\G(\0)/sgc) { |
522
|
0
|
|
|
|
|
0
|
$self->add_script_end($1, pos); |
523
|
0
|
|
|
|
|
0
|
$self->{inscript} = 0; |
524
|
|
|
|
|
|
|
} elsif (/\G([\[\(\{])/sgc) { |
525
|
2265
|
|
|
|
|
6112
|
$self->add_open($1, $cur); |
526
|
|
|
|
|
|
|
} elsif (/\G([\}\)\]])/sgc) { |
527
|
2257
|
|
|
|
|
6172
|
$self->add_close($1, $cur); |
528
|
|
|
|
|
|
|
} elsif (/\G([>\;\=\,\.\:\&\-\+\|\^\~\%\!\\])/sgc) { |
529
|
|
|
|
|
|
|
# backslash is php namespace separator (not escape) |
530
|
|
|
|
|
|
|
# |
531
|
2948
|
|
|
|
|
8196
|
$self->add($1); |
532
|
|
|
|
|
|
|
} elsif (/\G(\$)/sgc) { |
533
|
|
|
|
|
|
|
# variable $var or '${' |
534
|
|
|
|
|
|
|
# |
535
|
1840
|
100
|
|
|
|
4555
|
if (/\G([a-zA-Z_\x80-\xff][\w\x80-\xff]*)/sgc) { |
536
|
1756
|
|
|
|
|
4867
|
$self->add_var($1, $cur); |
537
|
|
|
|
|
|
|
} else { |
538
|
84
|
|
|
|
|
280
|
$self->add('$'); |
539
|
|
|
|
|
|
|
} |
540
|
|
|
|
|
|
|
} elsif (/\G(\@)/sgc) { |
541
|
|
|
|
|
|
|
# '@' is allowed in php-identifiers |
542
|
|
|
|
|
|
|
# it suppresses error messages |
543
|
|
|
|
|
|
|
# |
544
|
1
|
|
|
|
|
7
|
$self->add_white('@'); |
545
|
|
|
|
|
|
|
} elsif (/\G([0-9]+)/sgc) { |
546
|
534
|
|
|
|
|
1187
|
my $v = $1; |
547
|
534
|
100
|
100
|
|
|
1765
|
if (($v eq '0') && /\G([xX][0-9a-fA-F]+)/sgc) { |
548
|
4
|
|
|
|
|
20
|
$self->add_num('0'.$1); # hex |
549
|
|
|
|
|
|
|
} else { |
550
|
530
|
100
|
|
|
|
1242
|
if (/\G(\.[0-9]+)/sgc) { |
551
|
6
|
|
|
|
|
14
|
$v .= $1; # float |
552
|
|
|
|
|
|
|
} |
553
|
530
|
50
|
|
|
|
1165
|
if (/\G([eE][\+\-]?[0-9]+)/sgc) { |
554
|
0
|
|
|
|
|
0
|
$v .= $1; # exponent |
555
|
|
|
|
|
|
|
} |
556
|
530
|
|
|
|
|
1454
|
$self->add_num($v); # dec/oct/float |
557
|
|
|
|
|
|
|
} |
558
|
|
|
|
|
|
|
} elsif (/\G([\w\x80-\xff]+)/sgc) { |
559
|
2178
|
|
|
|
|
5646
|
$self->add_sym($1, $cur); |
560
|
|
|
|
|
|
|
} else { |
561
|
787
|
100
|
|
|
|
1681
|
unless (/\G([^"'`<>\\\/#\s\w\[\]\(\)\{\}\$\?\;\=\,\.\:\&\-\+\|\^\~\%\!]+)/sgc) { |
562
|
|
|
|
|
|
|
#/\G(.)/sgc; |
563
|
|
|
|
|
|
|
#printf ">> WARN: parse end after: [0x%02x] %s\n", ord($1), $self->tok_dump(); |
564
|
784
|
|
|
|
|
1372
|
last; |
565
|
|
|
|
|
|
|
} |
566
|
3
|
|
|
|
|
16
|
$self->add($1); |
567
|
|
|
|
|
|
|
} |
568
|
|
|
|
|
|
|
} |
569
|
786
|
100
|
|
|
|
1834
|
if ($self->{inscript}) { |
570
|
777
|
|
|
|
|
2425
|
$self->add_script_end('', pos); |
571
|
|
|
|
|
|
|
} |
572
|
786
|
|
|
|
|
2466
|
return $quote; |
573
|
|
|
|
|
|
|
} |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
# set up method stubs |
576
|
|
|
|
|
|
|
# (when just the add()-method is overridden, then all |
577
|
|
|
|
|
|
|
# other handlers call this subclass method) |
578
|
|
|
|
|
|
|
# |
579
|
|
|
|
0
|
0
|
|
sub add { } |
580
|
4
|
|
|
4
|
|
12
|
sub _add { my ($self, $sym) = @_; $self->add($sym); return; } |
|
4
|
|
|
|
|
12
|
|
|
4
|
|
|
|
|
21
|
|
581
|
|
|
|
|
|
|
*add_open = \&_add; |
582
|
|
|
|
|
|
|
*add_close = \&_add; |
583
|
|
|
|
|
|
|
*add_white = \&_add; |
584
|
|
|
|
|
|
|
*add_comment = \&_add; |
585
|
|
|
|
|
|
|
*add_sym = \&_add; |
586
|
|
|
|
|
|
|
*add_var = \&_add; |
587
|
|
|
|
|
|
|
*add_str = \&_add; |
588
|
|
|
|
|
|
|
*add_num = \&_add; |
589
|
|
|
|
|
|
|
*add_script_start = \&_add; |
590
|
|
|
|
|
|
|
*add_script_end = \&_add; |
591
|
|
|
|
|
|
|
*add_noscript = \&_add; |
592
|
|
|
|
|
|
|
*add_bad_open = \&_add; |
593
|
|
|
|
|
|
|
|
594
|
0
|
|
|
0
|
0
|
0
|
sub tok_dump { return ''; } |
595
|
0
|
|
|
0
|
0
|
0
|
sub tok_count { return 0; } |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
sub DESTROY { |
598
|
753
|
|
|
753
|
|
450899
|
my $self = shift; |
599
|
753
|
|
|
|
|
10853
|
return; |
600
|
|
|
|
|
|
|
} |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
1; |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
__END__ |