line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MojoMojo::Formatter::Defang; |
2
|
27
|
|
|
27
|
|
96248
|
use strict; |
|
27
|
|
|
|
|
67
|
|
|
27
|
|
|
|
|
673
|
|
3
|
27
|
|
|
27
|
|
128
|
use warnings; |
|
27
|
|
|
|
|
61
|
|
|
27
|
|
|
|
|
634
|
|
4
|
27
|
|
|
27
|
|
809
|
use parent qw/MojoMojo::Formatter/; |
|
27
|
|
|
|
|
496
|
|
|
27
|
|
|
|
|
135
|
|
5
|
27
|
|
|
27
|
|
14134
|
use MojoMojo::Declaw; |
|
27
|
|
|
|
|
136
|
|
|
27
|
|
|
|
|
1518
|
|
6
|
27
|
|
|
27
|
|
640
|
use URI; |
|
27
|
|
|
|
|
4953
|
|
|
27
|
|
|
|
|
1293
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
=head1 NAME |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
MojoMojo::Formatter::Defang - Scrub user HTML and XSS |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 DESCRIPTION |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
This formatter makes sure only a safe range of tags are |
15
|
|
|
|
|
|
|
allowed, using L<MojoMojo::Defang>; It also tries to remove XSS attempts. |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 METHODS |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head2 format_content_order |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
Format order can be 1-99. The Defang formatter runs on 16, just after the main |
22
|
|
|
|
|
|
|
formatter, in order to catch direct user input. Defang trusts the main formatter |
23
|
|
|
|
|
|
|
and all subsequently ran plugins to not output unsafe HTML. |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=cut |
26
|
|
|
|
|
|
|
|
27
|
1860
|
|
|
1860
|
1
|
6001
|
sub format_content_order { 16 } |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head2 defang_tags_callback |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
Callback for custom handling specific HTML tags |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=cut |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
sub defang_tags_callback { |
36
|
|
|
|
|
|
|
my ( |
37
|
30
|
|
|
30
|
1
|
97
|
$c, $defang, $open_angle, |
38
|
|
|
|
|
|
|
$lc_tag, $is_end_tag, $attribute_hash, |
39
|
|
|
|
|
|
|
$close_angle, $html_r, $out_r |
40
|
|
|
|
|
|
|
) = @_; |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# Explicitly whitelist this tag, although unsafe |
43
|
30
|
100
|
|
|
|
96
|
return 0 if $lc_tag eq 'embed'; |
44
|
28
|
100
|
|
|
|
78
|
return 0 if $lc_tag eq 'object'; |
45
|
26
|
100
|
|
|
|
79
|
return 0 if $lc_tag eq 'param'; |
46
|
20
|
50
|
|
|
|
89
|
return 0 if $lc_tag eq 'pre'; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# I am not sure what to do with this tag, so process as |
49
|
|
|
|
|
|
|
# HTML::Defang normally would |
50
|
|
|
|
|
|
|
#return 2 if $lc_tag eq 'img'; |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=head2 defang_url_callback |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
Callback for custom handling URLs in HTML attributes as well as |
56
|
|
|
|
|
|
|
styletag/attribute declarations |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=cut |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
sub defang_url_callback { |
61
|
124
|
|
|
124
|
1
|
408
|
my ( $c, $defang, $lc_tag, $lc_attr_key, $attr_val_r, $attribute_hash, |
62
|
|
|
|
|
|
|
$html_r ) |
63
|
|
|
|
|
|
|
= @_; |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# Do not allow javascript to start a URL in tag attributes or stylesheets |
66
|
124
|
100
|
|
|
|
486
|
return 1 if $$attr_val_r =~ /^javascript/i; |
67
|
|
|
|
|
|
|
# Do not allow bypassing of protocol |
68
|
120
|
100
|
|
|
|
435
|
return 1 if $$attr_val_r =~ m{^//}i; |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# Explicitly defang this URL in tag attributes or stylesheets |
71
|
117
|
50
|
|
|
|
448
|
return 1 if $$attr_val_r =~ /youporn.com/i; |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# Allow URL's otherwise |
74
|
117
|
|
|
|
|
6572
|
return 0; |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head2 defang_css_callback |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
Callback for custom handling style tags/attributes. |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=cut |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
sub defang_css_callback { |
84
|
4
|
|
|
4
|
1
|
17
|
my ( $c, $defang, $selectors, $selector_rules, $tag, $is_attr ) = @_; |
85
|
4
|
|
|
|
|
10
|
my $i = 0; |
86
|
4
|
|
|
|
|
14
|
foreach (@$selectors) { |
87
|
4
|
|
|
|
|
10
|
my $selector_rule = $$selector_rules[$i]; |
88
|
4
|
|
|
|
|
12
|
foreach my $key_value_rules (@$selector_rule) { |
89
|
4
|
|
|
|
|
11
|
foreach my $key_value_rule (@$key_value_rules) { |
90
|
6
|
|
|
|
|
18
|
my ( $key, $value ) = @$key_value_rule; |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# Comment out any ’!important’ directive |
93
|
6
|
50
|
|
|
|
21
|
$$key_value_rule[2] = 1 if $value =~ '!important'; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# Comment out any ’position=fixed;’ declaration |
96
|
6
|
50
|
33
|
|
|
30
|
$$key_value_rule[2] = 1 |
97
|
|
|
|
|
|
|
if $key =~ 'position' && $value =~ 'fixed'; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
} |
100
|
4
|
|
|
|
|
15
|
$i++; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=head2 defang_attribs_callback |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
Callback for custom handling HTML tag attributes. |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=cut |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub defang_attribs_callback { |
111
|
61
|
|
|
61
|
1
|
210
|
my ( $c, $defang, $lc_tag, $lc_attr_key, $attr_val_r, $html_r ) = @_; |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
# if $lc_attr_key eq 'value'; |
114
|
|
|
|
|
|
|
# Initial Defang effort on attributes applies specifically to 'src' |
115
|
61
|
100
|
|
|
|
222
|
if ( $lc_attr_key eq 'src' ) { |
116
|
16
|
|
|
|
|
127
|
my $src_uri_object = URI->new($$attr_val_r); |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
# Allow src URI's from configuration. |
119
|
16
|
|
|
|
|
7642
|
my @allowed_src_regex; |
120
|
|
|
|
|
|
|
# Tests may not have a $c |
121
|
16
|
100
|
|
|
|
52
|
if ( defined $c ) { |
122
|
|
|
|
|
|
|
|
123
|
7
|
50
|
|
|
|
44
|
if ( exists $c->stash->{allowed_src_regexes} ) { |
124
|
0
|
|
|
|
|
0
|
@allowed_src_regex = @{ $c->stash->{allowed_src_regexes} }; |
|
0
|
|
|
|
|
0
|
|
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
else { |
127
|
7
|
|
|
|
|
594
|
my $allowed_src = $c->config->{allowed}{src}; |
128
|
|
|
|
|
|
|
my @allowed_src = |
129
|
7
|
50
|
|
|
|
804
|
ref $allowed_src ? @{$allowed_src} : ($allowed_src); |
|
7
|
|
|
|
|
32
|
|
130
|
7
|
50
|
|
|
|
34
|
@allowed_src_regex = map { qr/$_/ } @allowed_src if $allowed_src[0]; |
|
21
|
|
|
|
|
330
|
|
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
# TODO: Shouldn't this be using pref cache? |
133
|
7
|
|
|
|
|
37
|
$c->stash->{allowed_src_regexes} = \@allowed_src_regex; |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
} |
136
|
16
|
|
|
|
|
490
|
for my $allowed_src_regex (@allowed_src_regex) { |
137
|
19
|
100
|
|
|
|
85
|
if ( $$attr_val_r =~ $allowed_src_regex ) { |
138
|
1
|
|
|
|
|
5
|
return 0; |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# When $c and src uri authority are defined we want to make sure |
144
|
|
|
|
|
|
|
# it matches the server of the img src. i.e. we allow images from the |
145
|
|
|
|
|
|
|
# local server whether the URI is relative or absolute.. |
146
|
15
|
100
|
100
|
|
|
105
|
if ( defined $c && defined $src_uri_object->authority ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
147
|
4
|
50
|
|
|
|
199
|
if ( $c->request->uri->authority eq $src_uri_object->authority ) { |
148
|
0
|
|
|
|
|
0
|
return 0; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
else { |
151
|
4
|
|
|
|
|
214
|
return 1; |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
# We have an authority but no context. |
155
|
|
|
|
|
|
|
# Probably means we're testing with just the Defang formatter |
156
|
|
|
|
|
|
|
# instead of the Full formatter chain. |
157
|
|
|
|
|
|
|
# We will defang any src's left with an authority (defang_src) |
158
|
|
|
|
|
|
|
# since the approved ones were already allowed in above. |
159
|
|
|
|
|
|
|
elsif ( defined $src_uri_object->authority ) { |
160
|
5
|
|
|
|
|
580
|
return 1; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
# Explicitly defang javascript in img src. |
163
|
|
|
|
|
|
|
elsif ( $$attr_val_r =~ m{javascript}i ) { |
164
|
3
|
|
|
|
|
82
|
return 1; |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
else { |
167
|
3
|
|
|
|
|
100
|
return 0; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
} |
170
|
45
|
|
|
|
|
151
|
return 0; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=head2 format_content |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
Calls the formatter. Takes a ref to the content as well as the |
176
|
|
|
|
|
|
|
context object. |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=cut |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
sub format_content { |
181
|
141
|
|
|
141
|
1
|
414462
|
my ( $self, $content, $c ) = @_; |
182
|
|
|
|
|
|
|
|
183
|
141
|
|
|
|
|
2167
|
my $defang = MojoMojo::Declaw->new( |
184
|
|
|
|
|
|
|
context => $c, |
185
|
|
|
|
|
|
|
fix_mismatched_tags => 1, |
186
|
|
|
|
|
|
|
tags_to_callback => [qw/br embed object param img/], |
187
|
|
|
|
|
|
|
tags_callback => \&defang_tags_callback, |
188
|
|
|
|
|
|
|
url_callback => \&defang_url_callback, |
189
|
|
|
|
|
|
|
css_callback => \&defang_css_callback, |
190
|
|
|
|
|
|
|
attribs_to_callback => [qw(src value title)], |
191
|
|
|
|
|
|
|
attribs_callback => \&defang_attribs_callback, |
192
|
|
|
|
|
|
|
); |
193
|
|
|
|
|
|
|
|
194
|
141
|
|
|
|
|
801
|
$$content = $defang->defang($$content); |
195
|
141
|
|
|
|
|
1217
|
return; |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=head1 SEE ALSO |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
L<MojoMojo>, L<Module::Pluggable::Ordered>, L<MojoMojo::Defang> |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=head1 AUTHORS |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
Marcus Ramberg <mramberg@cpan.org> |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=head1 LICENSE |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
This library is free software. You can redistribute it and/or modify |
209
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=cut |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
1; |