line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTML::StripScripts::Regex; |
2
|
3
|
|
|
3
|
|
92302
|
use strict; |
|
3
|
|
|
|
|
9
|
|
|
3
|
|
|
|
|
120
|
|
3
|
3
|
|
|
3
|
|
17
|
use warnings; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
163
|
|
4
|
|
|
|
|
|
|
our $VERSION = '0.02'; |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
=head1 NAME |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
HTML::StripScripts::Regex - XSS filter using a regular expression |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 VERSION |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
Version 0.02 |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 SYNOPSIS |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
This class subclasses L, and adds an input method |
17
|
|
|
|
|
|
|
based on a regular expression. See L. |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
use HTML::StripScripts::Regex; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
my $hss = HTML::StripScripts::Regex->new({ Context => 'Inline' }); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
$hss->input("hello, world!"); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
print $hss->filtered_document; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
Using a regular expression to parse HTML is error prone and inefficient |
28
|
|
|
|
|
|
|
for large documents. If L is available then |
29
|
|
|
|
|
|
|
L should be used in preference to this module. |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 METHODS |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
This subclass adds the following methods to those of L. |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=over |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=item input ( TEXT ) |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
Parses an HTML document and runs it through the filter. TEXT must be the |
40
|
|
|
|
|
|
|
entire HTML document to be filtered, as a single flat string. |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=cut |
43
|
|
|
|
|
|
|
|
44
|
3
|
|
|
3
|
|
3664
|
use HTML::StripScripts; |
|
3
|
|
|
|
|
48066
|
|
|
3
|
|
|
|
|
148
|
|
45
|
3
|
|
|
3
|
|
41
|
use base qw(HTML::StripScripts); |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
1538
|
|
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
sub input { |
48
|
124
|
|
|
124
|
1
|
108137
|
my ($self, $text) = @_; |
49
|
|
|
|
|
|
|
|
50
|
124
|
|
|
|
|
413
|
$self->input_start_document; |
51
|
|
|
|
|
|
|
|
52
|
124
|
|
|
|
|
3128
|
while ( $text =~ m[ |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# or constructs, |
55
|
|
|
|
|
|
|
# in which everything between the tags counts as |
56
|
|
|
|
|
|
|
# CDATA. |
57
|
|
|
|
|
|
|
(?: <(script|style).*?> (.*?) \1> ) | |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# An HTML comment |
60
|
|
|
|
|
|
|
( ) | |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
# A processing instruction |
63
|
|
|
|
|
|
|
( <\?.*?> ) | |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# A declaration |
66
|
|
|
|
|
|
|
( <\!.*?> ) | |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# A start tag |
69
|
|
|
|
|
|
|
( <[a-z0-9]+\b(?:[^>'"]|"[^"]*"|'[^']*')*> ) | |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
# An end tag |
72
|
|
|
|
|
|
|
( [a-z0-9]+> ) | |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# Some non-tag text. We eat '<' only if it's |
75
|
|
|
|
|
|
|
# the first character, since a '<' as the |
76
|
|
|
|
|
|
|
# first character can't be the start of a well |
77
|
|
|
|
|
|
|
# formed tag or one of the patterns above would |
78
|
|
|
|
|
|
|
# have matched. |
79
|
|
|
|
|
|
|
( .[^<]* ) |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
]igsx ) { |
82
|
|
|
|
|
|
|
|
83
|
452
|
100
|
|
|
|
31879
|
if ( defined $1 ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
84
|
2
|
|
|
|
|
9
|
$self->input_start("<$1>"); |
85
|
2
|
|
|
|
|
57
|
$self->input_text($2); |
86
|
2
|
|
|
|
|
17
|
$self->input_end("$1>"); |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
elsif ( defined $3 ) { |
89
|
7
|
|
|
|
|
42
|
$self->input_comment($3); |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
elsif ( defined $4 ) { |
92
|
3
|
|
|
|
|
20
|
$self->input_process($4); |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
elsif ( defined $5 ) { |
95
|
4
|
|
|
|
|
27
|
$self->input_declaration($5); |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
elsif ( defined $6 ) { |
98
|
182
|
|
|
|
|
522
|
$self->input_start($6); |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
elsif ( defined $7 ) { |
101
|
56
|
|
|
|
|
458
|
$self->input_end($7); |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
elsif ( defined $8 ) { |
104
|
198
|
|
|
|
|
787
|
$self->input_text($8); |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
else { |
107
|
0
|
|
|
|
|
0
|
die 'regex failed to act as expected'; |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
|
112
|
124
|
|
|
|
|
10123
|
$self->input_end_document; |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=back |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head1 SUBCLASSING |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
The C class is subclassable, in exactly the same |
120
|
|
|
|
|
|
|
way as C. See L for |
121
|
|
|
|
|
|
|
details. |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=head1 SEE ALSO |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
L, L, L |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=head1 AUTHOR |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
Nick Cleaton, C<< >> |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=head1 COPYRIGHT & LICENSE |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
Copyright 2009 Nick Cleaton, all rights reserved. |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under |
136
|
|
|
|
|
|
|
the same terms as Perl itself. |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=cut |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
1; |