line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Perlanet::Trait::Scrubber; |
2
|
|
|
|
|
|
|
|
3
|
7
|
|
|
7
|
|
3908
|
use strict; |
|
7
|
|
|
|
|
16
|
|
|
7
|
|
|
|
|
199
|
|
4
|
7
|
|
|
7
|
|
36
|
use warnings; |
|
7
|
|
|
|
|
14
|
|
|
7
|
|
|
|
|
174
|
|
5
|
|
|
|
|
|
|
|
6
|
7
|
|
|
7
|
|
32
|
use Moose::Role; |
|
7
|
|
|
|
|
14
|
|
|
7
|
|
|
|
|
48
|
|
7
|
7
|
|
|
7
|
|
37920
|
use namespace::autoclean; |
|
7
|
|
|
|
|
21
|
|
|
7
|
|
|
|
|
66
|
|
8
|
|
|
|
|
|
|
|
9
|
7
|
|
|
7
|
|
4628
|
use HTML::Scrubber; |
|
7
|
|
|
|
|
15860
|
|
|
7
|
|
|
|
|
1788
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 NAME |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Perlanet::Trait::Scrubber - clean posts with HTML::Scrubber before aggregating |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 DESCRIPTION |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Before adding a post to the aggregated feed, it will first be cleaned with |
18
|
|
|
|
|
|
|
L<HTML::Scrubber>. |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 scrubber |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
An instance of L<HTML::Scrubber> used to remove unwanted content from |
25
|
|
|
|
|
|
|
the feed entries. For default settings see source of Perlanet.pm. |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
=cut |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
has 'scrubber' => ( |
30
|
|
|
|
|
|
|
is => 'rw', |
31
|
|
|
|
|
|
|
lazy_build => 1 |
32
|
|
|
|
|
|
|
); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub _build_scrubber { |
35
|
0
|
|
|
0
|
|
|
my $self = shift; |
36
|
|
|
|
|
|
|
|
37
|
0
|
|
|
|
|
|
my %scrub_rules = ( |
38
|
|
|
|
|
|
|
img => { |
39
|
|
|
|
|
|
|
src => qr{^https?://}, # only URL with http:// |
40
|
|
|
|
|
|
|
alt => 1, # alt attributes allowed |
41
|
|
|
|
|
|
|
align => 1, # allow align on images |
42
|
|
|
|
|
|
|
style => 1, |
43
|
|
|
|
|
|
|
width => 1, |
44
|
|
|
|
|
|
|
height => 1, |
45
|
|
|
|
|
|
|
'*' => 0, # deny all others |
46
|
|
|
|
|
|
|
}, |
47
|
|
|
|
|
|
|
style => 0, |
48
|
|
|
|
|
|
|
script => 0, |
49
|
|
|
|
|
|
|
span => { |
50
|
|
|
|
|
|
|
id => 0, # blogger(?) includes spans with id attribute |
51
|
|
|
|
|
|
|
}, |
52
|
|
|
|
|
|
|
a => { |
53
|
|
|
|
|
|
|
href => 1, |
54
|
|
|
|
|
|
|
'*' => 0, |
55
|
|
|
|
|
|
|
}, |
56
|
|
|
|
|
|
|
); |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# Definitions for HTML::Scrub |
59
|
0
|
|
|
|
|
|
my %scrub_def = ( |
60
|
|
|
|
|
|
|
'*' => 1, |
61
|
|
|
|
|
|
|
'href' => qr{^(?!(?:java)?script)}i, |
62
|
|
|
|
|
|
|
'src' => qr{^(?!(?:java)?script)}i, |
63
|
|
|
|
|
|
|
'cite' => '(?i-xsm:^(?!(?:java)?script))', |
64
|
|
|
|
|
|
|
'language' => 0, |
65
|
|
|
|
|
|
|
'name' => 1, |
66
|
|
|
|
|
|
|
'value' => 1, |
67
|
|
|
|
|
|
|
'onblur' => 0, |
68
|
|
|
|
|
|
|
'onchange' => 0, |
69
|
|
|
|
|
|
|
'onclick' => 0, |
70
|
|
|
|
|
|
|
'ondblclick' => 0, |
71
|
|
|
|
|
|
|
'onerror' => 0, |
72
|
|
|
|
|
|
|
'onfocus' => 0, |
73
|
|
|
|
|
|
|
'onkeydown' => 0, |
74
|
|
|
|
|
|
|
'onkeypress' => 0, |
75
|
|
|
|
|
|
|
'onkeyup' => 0, |
76
|
|
|
|
|
|
|
'onload' => 0, |
77
|
|
|
|
|
|
|
'onmousedown' => 0, |
78
|
|
|
|
|
|
|
'onmousemove' => 0, |
79
|
|
|
|
|
|
|
'onmouseout' => 0, |
80
|
|
|
|
|
|
|
'onmouseover' => 0, |
81
|
|
|
|
|
|
|
'onmouseup' => 0, |
82
|
|
|
|
|
|
|
'onreset' => 0, |
83
|
|
|
|
|
|
|
'onselect' => 0, |
84
|
|
|
|
|
|
|
'onsubmit' => 0, |
85
|
|
|
|
|
|
|
'onunload' => 0, |
86
|
|
|
|
|
|
|
'src' => 1, |
87
|
|
|
|
|
|
|
'type' => 1, |
88
|
|
|
|
|
|
|
'style' => 1, |
89
|
|
|
|
|
|
|
'class' => 0, |
90
|
|
|
|
|
|
|
'id' => 0, |
91
|
|
|
|
|
|
|
'frameborder' => 0, |
92
|
|
|
|
|
|
|
'border' => 0, |
93
|
|
|
|
|
|
|
); |
94
|
|
|
|
|
|
|
|
95
|
0
|
|
|
|
|
|
my $scrub = HTML::Scrubber->new; |
96
|
0
|
|
|
|
|
|
$scrub->rules(%scrub_rules); |
97
|
0
|
|
|
|
|
|
$scrub->default(1, \%scrub_def); |
98
|
|
|
|
|
|
|
|
99
|
0
|
|
|
|
|
|
return $scrub; |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
around 'clean_html' => sub { |
103
|
|
|
|
|
|
|
my $orig = shift; |
104
|
|
|
|
|
|
|
my ($self, $html) = @_; |
105
|
|
|
|
|
|
|
$html = $self->$orig($html); |
106
|
|
|
|
|
|
|
my $scrubbed = $self->scrubber->scrub($html); |
107
|
|
|
|
|
|
|
return $scrubbed; |
108
|
|
|
|
|
|
|
}; |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head1 AUTHOR |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Dave Cross, <dave@mag-sol.com> |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
Copyright (c) 2010 by Magnum Solutions Ltd. |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
119
|
|
|
|
|
|
|
it under the same terms as Perl itself, either Perl version 5.10.0 or, |
120
|
|
|
|
|
|
|
at your option, any later version of Perl 5 you may have available. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=cut |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
1; |