line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# $Id: Adnix.pm,v 1.1.1.1 2004/06/19 09:37:19 cosimo Exp $ |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
HTTP::Proxy::BodyFilter::Adnix - Automatically block advertising images with custom regexes |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use HTTP::Proxy::BodyFilter::Adnix |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
# Use default blacklist rules and default placeholder image |
12
|
|
|
|
|
|
|
$proxy->push_filter( |
13
|
|
|
|
|
|
|
mime => 'image/*', |
14
|
|
|
|
|
|
|
response => HTTP::Proxy::BodyFilter::Adnix->new(); |
15
|
|
|
|
|
|
|
); |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# OR ... |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# You must supply your custom rules for filtering |
20
|
|
|
|
|
|
|
my $filter = HTTP::Proxy::BodyFilter::Adnix->new( |
21
|
|
|
|
|
|
|
deny => [ 'spammingserver.com', 'WeSpamYou.org', ... ], |
22
|
|
|
|
|
|
|
image => 'http://www.mydomain.com/mylogo.png' |
23
|
|
|
|
|
|
|
); |
24
|
|
|
|
|
|
|
$proxy->push_filter( |
25
|
|
|
|
|
|
|
mime => 'image/*', |
26
|
|
|
|
|
|
|
response => $filter |
27
|
|
|
|
|
|
|
); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 ABSTRACT |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
This class acts as a plugin filter module for HTTP::Proxy distribution. |
32
|
|
|
|
|
|
|
Its purpose is to block advertising (but you can define it) images |
33
|
|
|
|
|
|
|
to avoid wasting bandwidth for these images. |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=head1 DESCRIPTION |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
C filter module is based on |
38
|
|
|
|
|
|
|
C class that is part of C distribution. |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
This filter tries to detect advertising images into your HTTP requests, |
41
|
|
|
|
|
|
|
and it replaces them with an image of your choice. |
42
|
|
|
|
|
|
|
Detection is done through a set of regular expression you can customize. |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
If you're wondering where the name C comes from, you should |
45
|
|
|
|
|
|
|
really read the wonderful book B by Carl Sagan. |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
For more detailed information on C, see |
48
|
|
|
|
|
|
|
its documentation on CPAN. |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head2 EXPORT |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
None by default. |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=head1 METHODS |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=cut |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
package HTTP::Proxy::BodyFilter::Adnix; |
59
|
|
|
|
|
|
|
|
60
|
1
|
|
|
1
|
|
7485
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
162
|
|
61
|
1
|
|
|
1
|
|
6
|
use Carp; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
68
|
|
62
|
1
|
|
|
1
|
|
6
|
use base qw(HTTP::Proxy::BodyFilter); |
|
1
|
|
|
|
|
13
|
|
|
1
|
|
|
|
|
959
|
|
63
|
1
|
|
|
1
|
|
311
|
use vars qw($VERSION $IMAGE); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
708
|
|
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
$VERSION = '0.01'; |
66
|
|
|
|
|
|
|
our @UGLY_IMAGE = qw( |
67
|
|
|
|
|
|
|
5089 474e 0a0d 0a1a 0000 0d00 4849 5244 0000 1000 0000 1000 0608 0000 1f00 fff3 |
68
|
|
|
|
|
|
|
0061 0000 6206 474b 0044 00ff 00ff a0ff a7bd 0093 0000 7009 5948 0073 0b00 0012 |
69
|
|
|
|
|
|
|
0b00 0112 ddd2 fc7e 0000 0700 4974 454d d307 140c 240b ee31 e796 0095 0000 4935 |
70
|
|
|
|
|
|
|
4144 7854 639c 4060 0680 6628 4520 898c 4281 88c0 9a64 9403 803e 4f24 8c50 1762 |
71
|
|
|
|
|
|
|
8320 8606 0881 d183 1c40 440d d074 30c0 8110 0008 da6b 9616 bfd8 6986 0000 0000 |
72
|
|
|
|
|
|
|
4549 444e 42ae 8260 |
73
|
|
|
|
|
|
|
); |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=head2 init() |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
Internal. Gets called on filter initialization. |
78
|
|
|
|
|
|
|
Accepts the options that customize filter behaviour. |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=over 4 |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=item image |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
String. Filesystem path to PNG image to be used as a placeholder for all detected |
85
|
|
|
|
|
|
|
advertising images. |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=item deny |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
Array reference. Must contain all regular expressions that block images. |
90
|
|
|
|
|
|
|
This means that if the current image matches any of these regular expressions, |
91
|
|
|
|
|
|
|
it will be blocked. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=back |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=cut |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub init |
98
|
|
|
|
|
|
|
{ |
99
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
100
|
0
|
|
|
|
|
|
my %opt; |
101
|
|
|
|
|
|
|
|
102
|
0
|
0
|
|
|
|
|
if( @_ % 1 == 0 ) { |
103
|
0
|
|
|
|
|
|
%opt = @_; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
else { |
106
|
0
|
|
|
|
|
|
croak "You must supply key => value options"; |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# Set path of placeholder image |
110
|
0
|
0
|
|
|
|
|
if( exists $opt{image} ) { |
111
|
0
|
|
|
|
|
|
$self->{_image} = $opt{image}; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
0
|
|
0
|
|
|
|
$self->{_image} ||= '/usr/local/share/replaced.png'; |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# Set regex blacklist |
117
|
0
|
0
|
0
|
|
|
|
if( exists $opt{deny} && ref $opt{deny} eq 'ARRAY' ) { |
118
|
0
|
|
|
|
|
|
$self->{_denylist} = $opt{deny}; |
119
|
|
|
|
|
|
|
} |
120
|
0
|
|
0
|
|
|
|
$self->{_denylist} ||= [ map { qr($_) } |
|
0
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
'ad[vs\.]', |
122
|
|
|
|
|
|
|
'adv?server', |
123
|
|
|
|
|
|
|
'468x60', |
124
|
|
|
|
|
|
|
'doubleclick\.net', |
125
|
|
|
|
|
|
|
'promot[ie]', |
126
|
|
|
|
|
|
|
]; |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
# Set regex whitelist (empty at start) |
129
|
0
|
0
|
0
|
|
|
|
if( exists $opt{allow} && ref $opt{allow} eq 'ARRAY' ) { |
130
|
0
|
|
|
|
|
|
$self->{_allowlist} = $opt{allow}; |
131
|
|
|
|
|
|
|
} |
132
|
0
|
|
0
|
|
|
|
$self->{_allowlist} ||= []; |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# Do I need this? TODO must ask Philippe |
135
|
0
|
|
|
|
|
|
$self->{rw} = delete $opt{rw}; |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=head2 filter() |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
This is where the hard work gets done. |
143
|
|
|
|
|
|
|
Every image is matched against a set of regexes and if it matches B of |
144
|
|
|
|
|
|
|
these, it is B downloaded and B by the |
145
|
|
|
|
|
|
|
placeholder image. |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
The intention here is to save bandwidth and to remove those annoying |
148
|
|
|
|
|
|
|
banners. |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=cut |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
sub filter |
153
|
|
|
|
|
|
|
{ |
154
|
0
|
|
|
0
|
1
|
|
my($self, $headers, $message) = @_; |
155
|
0
|
|
|
|
|
|
my $uri = $message->uri(); |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
# "DECLINE" non-image urls |
158
|
|
|
|
|
|
|
#return 0 unless $uri =~ /\.(gif|jpe?g|png)/i; |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# Load placeholder image if not yet done |
161
|
0
|
0
|
|
|
|
|
if( ! $IMAGE ) { |
162
|
0
|
|
|
|
|
|
$self->_loadImage(); |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
0
|
|
|
|
|
|
foreach( @{ $self->{_denylist} } ) { |
|
0
|
|
|
|
|
|
|
166
|
0
|
|
|
|
|
|
my $re = $_; |
167
|
0
|
0
|
|
|
|
|
if( $uri =~ $re ) { |
168
|
0
|
|
|
|
|
|
$self->proxy()->log( '', '', 'blocked ad image('.$uri.')' ); |
169
|
0
|
0
|
|
|
|
|
my $response = HTTP::Response->new( |
170
|
|
|
|
|
|
|
200, |
171
|
|
|
|
|
|
|
'OK', |
172
|
|
|
|
|
|
|
HTTP::Headers->new( |
173
|
|
|
|
|
|
|
Content_Type => ( $self->{_image} =~ /\.(gif|png|jpg)/i ? qq{image/$1} : 'image/png' ), |
174
|
|
|
|
|
|
|
Content_Length => -s $IMAGE, |
175
|
|
|
|
|
|
|
), |
176
|
|
|
|
|
|
|
$IMAGE |
177
|
|
|
|
|
|
|
); |
178
|
|
|
|
|
|
|
|
179
|
0
|
|
|
|
|
|
$self->proxy()->response($response); |
180
|
0
|
|
|
|
|
|
last; |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
0
|
|
|
|
|
|
return 1; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=head2 _loadImage() |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
Internal function. Tries to load the image to be used as a placeholder |
192
|
|
|
|
|
|
|
for all advertising images. If no remote/local image can be loaded, |
193
|
|
|
|
|
|
|
an hardcoded binary PNG image is used. |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=cut |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
sub _loadImage |
198
|
|
|
|
|
|
|
{ |
199
|
0
|
|
|
0
|
|
|
my $self = $_[0]; |
200
|
0
|
|
|
|
|
|
my $loaded = 0; |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
#$self->{_image} = lc $self->{_image}; |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
# If image is specified as URL, try to load it |
205
|
0
|
0
|
0
|
|
|
|
if( ($self->{_image} =~ m|^[Hh][Tt][Tt][Pp]://|) || ($self->{_image} =~ m|^[Ff][Tt][Pp]://|) ) { |
206
|
0
|
|
|
|
|
|
eval { |
207
|
1
|
|
|
1
|
|
783
|
use LWP::Simple (); |
|
1
|
|
|
|
|
112664
|
|
|
1
|
|
|
|
|
430
|
|
208
|
0
|
|
|
|
|
|
$IMAGE = LWP::Simple::get($self->{_image}); |
209
|
0
|
0
|
0
|
|
|
|
$loaded = 1 if defined $IMAGE && length($IMAGE) > 0; |
210
|
|
|
|
|
|
|
}; |
211
|
0
|
0
|
|
|
|
|
if( ! $loaded ) { |
212
|
0
|
|
|
|
|
|
croak "Can't load image $$self{_image}"; |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
else { |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
# Try to load image from disk |
219
|
0
|
0
|
|
|
|
|
if( open IMG, $self->{_image} ) { |
220
|
0
|
|
|
|
|
|
binmode(IMG); |
221
|
0
|
|
|
|
|
|
local $/ = undef; |
222
|
0
|
|
|
|
|
|
$IMAGE = ; |
223
|
0
|
|
0
|
|
|
|
$loaded = (length($IMAGE) > 0) && close(IMG); |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
} |
227
|
|
|
|
|
|
|
|
228
|
0
|
0
|
|
|
|
|
if( $loaded ) { |
229
|
|
|
|
|
|
|
#printf STDERR 'loaded replace image from %s (%d bytes)'."\n", $self->{_image}, length($IMAGE); |
230
|
|
|
|
|
|
|
# Here proxy object is not yet prepared |
231
|
0
|
|
|
|
|
|
$self->proxy()->log('', '', sprintf('loaded replace image from %s (%d bytes)', $self->{_image}, length($IMAGE))); |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
else { |
234
|
|
|
|
|
|
|
# If all else failed, load static binary PNG data |
235
|
0
|
|
|
|
|
|
$IMAGE = ""; |
236
|
|
|
|
|
|
|
|
237
|
0
|
|
|
|
|
|
for( @UGLY_IMAGE ) { |
238
|
0
|
|
|
|
|
|
my($b1,$b2) = unpack('A2 A2',$_); |
239
|
0
|
|
|
|
|
|
$b1 = chr hex $b1; |
240
|
0
|
|
|
|
|
|
$b2 = chr hex $b2; |
241
|
0
|
|
|
|
|
|
$IMAGE .= $b2 . $b1; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
0
|
|
|
|
|
|
return $loaded; |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
1; |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
# |
251
|
|
|
|
|
|
|
# END OF MODULE |
252
|
|
|
|
|
|
|
# |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=head1 SEE ALSO |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
For more information, you should read C distribution documentation. |
258
|
|
|
|
|
|
|
If you find this class useful or want to report complaints or bugs, please |
259
|
|
|
|
|
|
|
do it through the good CPAN bug report system on http://rt.cpan.org. |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
This class has been derived from original work by Philippe "Book" Bruhat, |
262
|
|
|
|
|
|
|
author of L distribution. Go check out his good work! |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
=head1 AUTHOR |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
Cosimo Streppone Ecosimo@cpan.orgE |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
Copyright 2004 by Cosimo Streppone |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
273
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
=cut |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
1; |
278
|
|
|
|
|
|
|
|