line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Perlanet::Trait::Scrubber; |
2
|
|
|
|
|
|
|
|
3
|
6
|
|
|
6
|
|
2266
|
use strict; |
|
6
|
|
|
|
|
9
|
|
|
6
|
|
|
|
|
156
|
|
4
|
6
|
|
|
6
|
|
23
|
use warnings; |
|
6
|
|
|
|
|
7
|
|
|
6
|
|
|
|
|
116
|
|
5
|
|
|
|
|
|
|
|
6
|
6
|
|
|
6
|
|
44
|
use 5.6.0; |
|
6
|
|
|
|
|
15
|
|
7
|
|
|
|
|
|
|
our $VERSION = '0.58'; |
8
|
|
|
|
|
|
|
|
9
|
6
|
|
|
6
|
|
20
|
use Moose::Role; |
|
6
|
|
|
|
|
7
|
|
|
6
|
|
|
|
|
29
|
|
10
|
6
|
|
|
6
|
|
18715
|
use namespace::autoclean; |
|
6
|
|
|
|
|
8
|
|
|
6
|
|
|
|
|
34
|
|
11
|
|
|
|
|
|
|
|
12
|
6
|
|
|
6
|
|
2947
|
use HTML::Scrubber; |
|
6
|
|
|
|
|
10183
|
|
|
6
|
|
|
|
|
1429
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Perlanet::Trait::Scrubber - clean posts with HTML::Scrubber before aggregating |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 DESCRIPTION |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
Before adding a post to the aggregated feed, it will first be cleaned with |
21
|
|
|
|
|
|
|
L<HTML::Scrubber>. |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head1 scrubber |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
An instance of L<HTML::Scrubber> used to remove unwanted content from |
28
|
|
|
|
|
|
|
the feed entries. For default settings see source of Perlanet.pm. |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=cut |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
has 'scrubber' => ( |
33
|
|
|
|
|
|
|
is => 'rw', |
34
|
|
|
|
|
|
|
lazy_build => 1 |
35
|
|
|
|
|
|
|
); |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
sub _build_scrubber { |
38
|
0
|
|
|
0
|
|
|
my $self = shift; |
39
|
|
|
|
|
|
|
|
40
|
0
|
|
|
|
|
|
my %scrub_rules = ( |
41
|
|
|
|
|
|
|
img => { |
42
|
|
|
|
|
|
|
src => qr{^https?://}, # only URL with http:// |
43
|
|
|
|
|
|
|
alt => 1, # alt attributes allowed |
44
|
|
|
|
|
|
|
align => 1, # allow align on images |
45
|
|
|
|
|
|
|
style => 1, |
46
|
|
|
|
|
|
|
width => 1, |
47
|
|
|
|
|
|
|
height => 1, |
48
|
|
|
|
|
|
|
'*' => 0, # deny all others |
49
|
|
|
|
|
|
|
}, |
50
|
|
|
|
|
|
|
style => 0, |
51
|
|
|
|
|
|
|
script => 0, |
52
|
|
|
|
|
|
|
span => { |
53
|
|
|
|
|
|
|
id => 0, # blogger(?) includes spans with id attribute |
54
|
|
|
|
|
|
|
}, |
55
|
|
|
|
|
|
|
a => { |
56
|
|
|
|
|
|
|
href => 1, |
57
|
|
|
|
|
|
|
'*' => 0, |
58
|
|
|
|
|
|
|
}, |
59
|
|
|
|
|
|
|
); |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# Definitions for HTML::Scrub |
62
|
0
|
|
|
|
|
|
my %scrub_def = ( |
63
|
|
|
|
|
|
|
'*' => 1, |
64
|
|
|
|
|
|
|
'href' => qr{^(?!(?:java)?script)}i, |
65
|
|
|
|
|
|
|
'src' => qr{^(?!(?:java)?script)}i, |
66
|
|
|
|
|
|
|
'cite' => '(?i-xsm:^(?!(?:java)?script))', |
67
|
|
|
|
|
|
|
'language' => 0, |
68
|
|
|
|
|
|
|
'name' => 1, |
69
|
|
|
|
|
|
|
'value' => 1, |
70
|
|
|
|
|
|
|
'onblur' => 0, |
71
|
|
|
|
|
|
|
'onchange' => 0, |
72
|
|
|
|
|
|
|
'onclick' => 0, |
73
|
|
|
|
|
|
|
'ondblclick' => 0, |
74
|
|
|
|
|
|
|
'onerror' => 0, |
75
|
|
|
|
|
|
|
'onfocus' => 0, |
76
|
|
|
|
|
|
|
'onkeydown' => 0, |
77
|
|
|
|
|
|
|
'onkeypress' => 0, |
78
|
|
|
|
|
|
|
'onkeyup' => 0, |
79
|
|
|
|
|
|
|
'onload' => 0, |
80
|
|
|
|
|
|
|
'onmousedown' => 0, |
81
|
|
|
|
|
|
|
'onmousemove' => 0, |
82
|
|
|
|
|
|
|
'onmouseout' => 0, |
83
|
|
|
|
|
|
|
'onmouseover' => 0, |
84
|
|
|
|
|
|
|
'onmouseup' => 0, |
85
|
|
|
|
|
|
|
'onreset' => 0, |
86
|
|
|
|
|
|
|
'onselect' => 0, |
87
|
|
|
|
|
|
|
'onsubmit' => 0, |
88
|
|
|
|
|
|
|
'onunload' => 0, |
89
|
|
|
|
|
|
|
'src' => 1, |
90
|
|
|
|
|
|
|
'type' => 1, |
91
|
|
|
|
|
|
|
'style' => 1, |
92
|
|
|
|
|
|
|
'class' => 0, |
93
|
|
|
|
|
|
|
'id' => 0, |
94
|
|
|
|
|
|
|
'frameborder' => 0, |
95
|
|
|
|
|
|
|
'border' => 0, |
96
|
|
|
|
|
|
|
); |
97
|
|
|
|
|
|
|
|
98
|
0
|
|
|
|
|
|
my $scrub = HTML::Scrubber->new; |
99
|
0
|
|
|
|
|
|
$scrub->rules(%scrub_rules); |
100
|
0
|
|
|
|
|
|
$scrub->default(1, \%scrub_def); |
101
|
|
|
|
|
|
|
|
102
|
0
|
|
|
|
|
|
return $scrub; |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
around 'clean_html' => sub { |
106
|
|
|
|
|
|
|
my $orig = shift; |
107
|
|
|
|
|
|
|
my ($self, $html) = @_; |
108
|
|
|
|
|
|
|
$html = $self->$orig($html); |
109
|
|
|
|
|
|
|
my $scrubbed = $self->scrubber->scrub($html); |
110
|
|
|
|
|
|
|
return $scrubbed; |
111
|
|
|
|
|
|
|
}; |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=head1 AUTHOR |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
Dave Cross, <dave@mag-sol.com> |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
Copyright (c) 2010 by Magnum Solutions Ltd. |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
122
|
|
|
|
|
|
|
it under the same terms as Perl itself, either Perl version 5.10.0 or, |
123
|
|
|
|
|
|
|
at your option, any later version of Perl 5 you may have available. |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=cut |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
1; |