File Coverage

blib/lib/Catalyst/Plugin/HTML/Scrubber.pm
Criterion Covered Total %
statement 74 79 93.6
branch 48 62 77.4
condition 6 6 100.0
subroutine 13 13 100.0
pod 1 3 33.3
total 142 163 87.1


line stmt bran cond sub pod time code
1             package Catalyst::Plugin::HTML::Scrubber;
2             $Catalyst::Plugin::HTML::Scrubber::VERSION = '0.07';
3 5     5   9096654 use Moose;
  5         51  
  5         41  
4 5     5   36482 use namespace::autoclean;
  5         13  
  5         68  
5              
6             with 'Catalyst::ClassData';
7              
8 5     5   458 use MRO::Compat;
  5         12  
  5         177  
9 5     5   3085 use HTML::Scrubber;
  5         14810  
  5         5738  
10              
11             __PACKAGE__->mk_classdata('_scrubber');
12              
13             sub setup {
14 4     4 1 1279611 my $c = shift;
15              
16 4         35 my $conf = $c->config->{scrubber};
17 4 50       524 if (ref $conf eq 'ARRAY') {
    100          
18 0         0 $c->_scrubber(HTML::Scrubber->new(@$conf));
19             } elsif (ref $conf eq 'HASH') {
20             $c->config->{scrubber}{auto} = 1
21 3 50       15 unless defined $c->config->{scrubber}{auto};
22 3         293 $c->_scrubber(HTML::Scrubber->new(@{$conf->{params}}));
  3         36  
23             } else {
24 1         9 $c->_scrubber(HTML::Scrubber->new());
25             }
26              
27 4         882 return $c->maybe::next::method(@_);
28             }
29              
30             sub execute {
31 141     141 0 724203 my $c = shift;
32              
33 141         647 $c->maybe::next::method(@_);
34              
35 141         77287 my $conf = $c->config->{scrubber};
36              
37             # There are two ways to configure the plugin, it seems; giving a hashref
38             # of params under `scrubber`, with any params intended for HTML::Scrubber
39             # under the vaguely-named `params` key, or an arrayref of params intended
40             # to be passed straight to HTML::Scrubber - save html_scrub() from knowing
41             # about that by abstracting that nastyness away:
42 141 100 100     13130 if (ref $conf ne 'HASH' || $conf->{auto}) {
43 105 100       402 $c->html_scrub(ref($conf) eq 'HASH' ? $conf : {});
44             }
45             }
46              
47             sub html_scrub {
48 107     107 0 669 my ($c, $conf) = @_;
49              
50             # Firstly, if an entry in ignore_urls matches, then we don't want to
51             # scrub anything for this request...
52 107 100       285 return if ($c->_req_path_exempt_from_scrubbing($conf));
53              
54             # If there's body_data - for e.g. a POSTed JSON body that was decoded -
55             # then we need to walk through it, scrubbing as appropriate; don't call
56             # body_data unless the content type is one there's a data handler for
57             # though, otherwise we'll trigger an exception (see GH#4)
58 95 100       308 if (exists $c->req->data_handlers->{ $c->req->content_type }) {
59 77 50       12845 if (my $body_data = $c->request->body_data) {
60 77         25064 $c->_scrub_recurse($conf, $c->request->body_data);
61             }
62             }
63              
64             # And if Catalyst::Controller::REST is in use so we have $req->data,
65             # then scrub that too
66 95 100       5210 if ($c->request->can('data')) {
67 9         297 my $data = $c->request->data;
68 9 50       712 if ($data) {
69 9         193 $c->_scrub_recurse($conf, $c->request->data);
70             }
71             }
72              
73             # Normal query/POST body parameters:
74 95         2944 $c->_scrub_recurse($conf, $c->request->parameters);
75              
76             }
77              
78             # Recursively scrub param values...
79             sub _scrub_recurse {
80 229     229   8381 my ($c, $conf, $data) = @_;
81              
82             # If the thing we've got is a hashref, walk over its keys, checking
83             # whether we should ignore, otherwise, do the needful
84 229 100       605 if (ref $data eq 'HASH') {
    50          
    0          
85 205         708 for my $key (keys %$data) {
86 256 100       626 if (!$c->_should_scrub_param($conf, $key)) {
87 48         162 next;
88             }
89              
90             # OK, it's fine to fettle with this key - if its value is
91             # a ref, recurse, otherwise, scrub
92 208 100       585 if (my $ref = ref $data->{$key}) {
93             $c->_scrub_recurse($conf, $data->{$key})
94 48 50       210 if defined $data->{$key};
95             } else {
96             # Alright, non-ref value, so scrub it
97             # FIXME why did we have to have this ref-ref handling fun?
98             #$_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value);
99             $data->{$key} = $c->_scrub_value($conf, $data->{$key})
100 160 50       625 if defined $data->{$key};
101             }
102             }
103             } elsif (ref $data eq 'ARRAY') {
104 24         48 for (@$data) {
105 48 50       112 if (ref $_) {
106 0         0 $c->_scrub_recurse($conf, $_);
107             } else {
108 48 50       144 $_ = $c->_scrub_value($conf, $_) if defined $_;
109             }
110             }
111             } elsif (ref $data eq 'CODE') {
112 0         0 $c->log->debug("Can't scrub a coderef!");
113             } else {
114             # This shouldn't happen, as we should always start with a ref,
115             # and non-ref hash/array values should have been handled above.
116 0         0 $c->log->debug("Non-ref to scrub - should this happen?");
117             }
118             }
119              
120              
121             # Wrap HTML::Scrubber's scrub() so we can decode HTML entities if needed
122             sub _scrub_value {
123 208     208   454 my ($c, $conf, $value) = @_;
124              
125 208 50       458 return $value unless defined $value;
126            
127 208         601 $value = $c->_scrubber->scrub($value);
128              
129 208 100       21022 if ($conf->{no_encode_entities}) {
130 12         74 $value = HTML::Entities::decode_entities($value);
131             }
132 208         834 return $value;
133             }
134              
135             sub _should_scrub_param {
136 256     256   577 my ($c, $conf, $param) = @_;
137             # If we only want to operate on certain params, do that checking
138             # now...
139 256 100 100     1057 if ($conf && $conf->{ignore_params}) {
140 216         647 my $ignore_params = $c->config->{scrubber}{ignore_params};
141 216 50       18199 if (ref $ignore_params ne 'ARRAY') {
142 0         0 $ignore_params = [ $ignore_params ];
143             }
144 216         470 for my $ignore_param (@$ignore_params) {
145 396 100       867 if (ref $ignore_param eq 'Regexp') {
146 216 100       1070 return if $param =~ $ignore_param;
147             } else {
148 180 100       531 return if $param eq $ignore_param;
149             }
150             }
151             }
152              
153             # If we've not bailed above, we didn't match any ignore_params
154             # entries, or didn't have any, so we do want to scrub
155 208         501 return 1;
156             }
157              
158              
159             sub _req_path_exempt_from_scrubbing {
160 107     107   204 my ($c, $conf) = @_;
161 107 100       329 return unless exists $conf->{ignore_paths};
162              
163 72         200 my $req_path = $c->req->path;
164 72 50       6844 $req_path = "/$req_path" unless $req_path =~ m{^/};
165 72         146 for my $ignore (@{ $conf->{ignore_paths} }) {
  72         206  
166 138 100       340 if (ref $ignore eq 'Regexp') {
167 66 100       364 return 1 if $req_path =~ $ignore;
168             } else {
169 72 100       194 return 1 if $req_path eq $ignore;
170             }
171             }
172             }
173              
174             # Incredibly nasty monkey-patch to rewind filehandle before parsing - see
175             # https://github.com/perl-catalyst/catalyst-runtime/pull/186
176             # First, get the default handlers hashref:
177             my $default_data_handlers = Catalyst->default_data_handlers();
178              
179             # Wrap the coderef for application/json in one that rewinds the filehandle
180             # first:
181             my $orig_json_handler = $default_data_handlers->{'application/json'};
182             $default_data_handlers->{'application/json'} = sub {
183             $_[0]->seek(0,0); # rewind $fh arg
184             $orig_json_handler->(@_);
185             };
186              
187              
188             {
189             # and now replace the original default_data_handlers() with a version that
190             # returns our modified handlers
191 5     5   50 no warnings 'redefine';
  5         19  
  5         505  
192             *Catalyst::default_data_handlers = sub {
193 4     4   10559 return $default_data_handlers;
194             };
195             }
196              
197             __PACKAGE__->meta->make_immutable;
198              
199             1;
200             __END__
201              
202              
203             =head1 NAME
204              
205             Catalyst::Plugin::HTML::Scrubber - Catalyst plugin for scrubbing/sanitizing incoming parameters
206              
207             =head1 SYNOPSIS
208              
209             use Catalyst qw[HTML::Scrubber];
210              
211             MyApp->config(
212             scrubber => {
213             auto => 1, # automatically run on request
214              
215             # Exempt certain parameter names from scrubbing
216             ignore_params => [ qr/_html$/, 'article_body' ],
217              
218             # Don't scrub at all for certain URL paths:
219             ignore_paths => [
220             '/foo',
221             qr{^/foo/.+},
222             ],
223              
224             # HTML::Scrubber will HTML-encode some chars, e.g. angle
225             # brackets. If you don't want that, enable this setting and
226             # the scrubbed values will be unencoded.
227             no_decode_entities => 0,
228            
229             # The following are options to HTML::Scrubber
230             params => [
231             default => 0,
232             comment => 0,
233             script => 0,
234             process => 0,
235             allow => [qw [ br hr b a h1]],
236             ],
237             },
238             );
239              
240             =head1 DESCRIPTION
241              
242             On request, sanitize HTML tags in all params (with the ability to exempt
243             some if needed), to protect against XSS (cross-site scripting) attacks and
244             other unwanted things.
245              
246              
247             =head1 EXTENDED METHODS
248              
249             =over 4
250              
251             =item setup
252              
253             See SYNOPSIS for how to configure the plugin, both with its own configuration
254             (e.g. whether to automatically run, whether to exempt certain fields) and
255             passing on any options from L<HTML::Scrubber> to control exactly what
256             scrubbing happens.
257              
258             =item dispatch
259              
260             Sanitize HTML tags in all parameters (unless `ignore_params` exempts them) -
261             this includes normal POST params, and serialised data (e.g. a POSTed JSON body)
262             accessed via `$c->req->body_data` or `$c->req->data`.
263              
264             =back
265              
266             =head1 SEE ALSO
267              
268             L<Catalyst>, L<HTML::Scrubber>.
269              
270             =head1 AUTHOR
271              
272             Hideo Kimura, << <hide@hide-k.net> >> original author
273              
274             David Precious (BIGPRESH), C<< <davidp@preshweb.co.uk> >> maintainer since 2023-07-17
275              
276             =head1 COPYRIGHT AND LICENSE
277              
278             Copyright (C) 2005 by Hideo Kimura
279              
280             This library is free software; you can redistribute it and/or modify
281             it under the same terms as Perl itself.
282              
283             =cut