line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTML::StripTags; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
HTML::StripTags - Strip HTML or XML tags from a string with Perl like PHP's strip_tags() does |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use HTML::StripTags qw(strip_tags); |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
$string = 'Hallo beautiful world!'; |
12
|
|
|
|
|
|
|
$allowed_tags = ''; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
print strip_tags( $string, $allowed_tags ); |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 DESCRIPTION |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
HTML::StripTags provides the function strip_tags() that can strip all HTML or XML tags from a string except the given allowed tags. |
19
|
|
|
|
|
|
|
This is a Perl port of the PHP function strip_tags() based on PHP 5.3.3. |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 SECURITY NOTE |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
Please note: As per L PHP's strip_tags() is a very basic and unsafe method, so it's strongly recommended not to use it for cleaning up user input! |
24
|
|
|
|
|
|
|
As HTML::StripTags uses the same state machine, the same applies to this module. |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head1 METHODS |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=cut |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
#------------------------- Pragmas --------------------------------------------- |
31
|
15
|
|
|
15
|
|
13833
|
use strict; |
|
15
|
|
|
|
|
29
|
|
|
15
|
|
|
|
|
480
|
|
32
|
15
|
|
|
15
|
|
83
|
use warnings; |
|
15
|
|
|
|
|
23
|
|
|
15
|
|
|
|
|
479
|
|
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
#------------------------- Libs ------------------------------------------------ |
35
|
15
|
|
|
15
|
|
75
|
use Carp; |
|
15
|
|
|
|
|
39
|
|
|
15
|
|
|
|
|
1340
|
|
36
|
15
|
|
|
15
|
|
77
|
use Exporter; |
|
15
|
|
|
|
|
23
|
|
|
15
|
|
|
|
|
538
|
|
37
|
15
|
|
|
15
|
|
16411
|
use Switch 'fallthrough'; |
|
15
|
|
|
|
|
855632
|
|
|
15
|
|
|
|
|
103
|
|
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
our $VERSION = '1.01'; |
40
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
41
|
|
|
|
|
|
|
our @EXPORT_OK = qw(strip_tags); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=head2 strip_tags() |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
A simple little state-machine to strip out html and php tags |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
State 0 is the output state, state 1 means we are inside a |
48
|
|
|
|
|
|
|
normal html tag, state 2 means we are inside a php tag, state 3 |
49
|
|
|
|
|
|
|
means we are inside a " |
|
24
|
|
|
|
|
220
|
|
187
|
24
|
100
|
66
|
|
|
177
|
if ($p >= 2 && $buf[$p-1] eq '-' && $buf[$p-2] eq '-') { |
|
|
|
66
|
|
|
|
|
188
|
23
|
|
|
|
|
27
|
$in_q = $state = 0; |
189
|
23
|
|
|
|
|
29
|
@tbuf = (); |
190
|
|
|
|
|
|
|
} |
191
|
24
|
|
|
|
|
29
|
last; |
192
|
0
|
|
|
|
|
0
|
} |
|
24
|
|
|
|
|
120
|
|
|
0
|
|
|
|
|
0
|
|
193
|
3
|
50
|
|
|
|
53
|
case qr/\d/ { |
|
3
|
|
|
|
|
98
|
|
194
|
3
|
|
|
|
|
9
|
push @rbuf, $c; |
195
|
3
|
|
|
|
|
6
|
last; |
196
|
0
|
|
|
|
|
0
|
} |
|
3
|
|
|
|
|
20
|
|
|
0
|
|
|
|
|
0
|
|
197
|
|
|
|
|
|
|
} |
198
|
429
|
|
|
|
|
569
|
last; |
199
|
0
|
|
|
|
|
0
|
} |
|
440
|
|
|
|
|
1677
|
|
|
0
|
|
|
|
|
0
|
|
200
|
4733
|
100
|
|
|
|
61977
|
case m/\'|\"/ { |
|
45
|
|
|
|
|
860
|
|
201
|
45
|
50
|
100
|
|
|
630
|
if ($state == 4) { |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# Inside |
203
|
0
|
|
|
|
|
0
|
last; |
204
|
|
|
|
|
|
|
} elsif (($state == 2) && $buf[$p-1] ne '\\') { |
205
|
|
|
|
|
|
|
# Inside PHP |
206
|
2
|
100
|
|
|
|
7
|
if ($lc eq $c) { |
|
|
50
|
|
|
|
|
|
207
|
1
|
|
|
|
|
2
|
$lc = "\0"; |
208
|
|
|
|
|
|
|
} elsif ($lc ne '\\') { |
209
|
1
|
|
|
|
|
3
|
$lc = $c; |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
} elsif ($state == 0) { |
212
|
|
|
|
|
|
|
# Outside a tag |
213
|
9
|
|
|
|
|
20
|
push @rbuf, $c; |
214
|
|
|
|
|
|
|
} elsif ($allow && $state == 1) { |
215
|
|
|
|
|
|
|
# Inside a tag |
216
|
18
|
|
|
|
|
29
|
push @tbuf, $c; |
217
|
|
|
|
|
|
|
} |
218
|
45
|
50
|
66
|
|
|
595
|
if ($state && $p != 0 && ($state == 1 || $buf[$p-1] ne '\\') && (!$in_q || $buf[$p] eq $in_q)) { |
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
219
|
32
|
100
|
|
|
|
73
|
if ($in_q) { |
220
|
16
|
|
|
|
|
25
|
$in_q = 0; |
221
|
|
|
|
|
|
|
} else { |
222
|
16
|
|
|
|
|
34
|
$in_q = $buf[$p]; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
} |
225
|
45
|
|
|
|
|
60
|
last; |
226
|
0
|
|
|
|
|
0
|
} |
|
45
|
|
|
|
|
306
|
|
|
0
|
|
|
|
|
0
|
|
227
|
4688
|
100
|
|
|
|
89040
|
case '!' { |
|
36
|
|
|
|
|
339
|
|
228
|
|
|
|
|
|
|
# JavaScript & Other HTML scripting languages |
229
|
36
|
100
|
66
|
|
|
165
|
if ($state == 1 && $buf[$p-1] eq '<') { |
230
|
23
|
|
|
|
|
26
|
$state = 3; |
231
|
23
|
|
|
|
|
29
|
$lc = $c; |
232
|
|
|
|
|
|
|
} else { |
233
|
13
|
50
|
0
|
|
|
34
|
if ($state == 0) { |
|
|
0
|
|
|
|
|
|
234
|
13
|
|
|
|
|
28
|
push @rbuf, $c; |
235
|
|
|
|
|
|
|
} elsif ($allow && $state == 1) { |
236
|
0
|
|
|
|
|
0
|
push @tbuf, $c; |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
} |
239
|
36
|
|
|
|
|
44
|
last; |
240
|
0
|
|
|
|
|
0
|
} |
|
36
|
|
|
|
|
158
|
|
|
0
|
|
|
|
|
0
|
|
241
|
4652
|
100
|
|
|
|
52965
|
case '-' { |
|
100
|
|
|
|
|
825
|
|
242
|
100
|
100
|
66
|
|
|
507
|
if ($state == 3 && $p >= 2 && $buf[$p-1] eq '-' && $buf[$p-2] eq '!') { |
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
243
|
|
|
|
|
|
|
# |
244
|
23
|
|
|
|
|
28
|
$state = 4; |
245
|
|
|
|
|
|
|
} else { |
246
|
77
|
|
|
|
|
88
|
next; |
247
|
|
|
|
|
|
|
} |
248
|
23
|
|
|
|
|
24
|
last; |
249
|
77
|
|
|
|
|
425
|
} |
|
23
|
|
|
|
|
91
|
|
|
77
|
|
|
|
|
73
|
|
250
|
4629
|
100
|
|
|
|
54226
|
case '?' { |
|
118
|
|
|
|
|
1062
|
|
251
|
118
|
100
|
100
|
|
|
515
|
if ($state == 1 && $buf[$p-1] eq '<') { |
252
|
|
|
|
|
|
|
# opened PHP tag |
253
|
53
|
|
|
|
|
75
|
$br=0; |
254
|
53
|
|
|
|
|
60
|
$state=2; |
255
|
53
|
|
|
|
|
69
|
last; |
256
|
|
|
|
|
|
|
} |
257
|
65
|
|
|
|
|
340
|
} |
|
53
|
|
|
|
|
279
|
|
|
65
|
|
|
|
|
84
|
|
258
|
4576
|
100
|
|
|
|
83812
|
case m/E|e/ { |
|
297
|
|
|
|
|
5183
|
|
259
|
|
|
|
|
|
|
# !DOCTYPE exception |
260
|
297
|
0
|
33
|
|
|
1025
|
if ($state==3 && $p > 6 |
|
|
|
33
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
261
|
|
|
|
|
|
|
&& lc($buf[$p-1]) eq 'p' |
262
|
|
|
|
|
|
|
&& lc($buf[$p-2]) eq 'y' |
263
|
|
|
|
|
|
|
&& lc($buf[$p-3]) eq 't' |
264
|
|
|
|
|
|
|
&& lc($buf[$p-4]) eq 'c' |
265
|
|
|
|
|
|
|
&& lc($buf[$p-5]) eq 'o' |
266
|
|
|
|
|
|
|
&& lc($buf[$p-6]) eq 'd') { |
267
|
|
|
|
|
|
|
# we're not in a |