line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
package URBL::Prepare; |
3
|
|
|
|
|
|
|
|
4
|
1
|
|
|
1
|
|
590
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
35
|
|
5
|
|
|
|
|
|
|
#use diagnostics; |
6
|
1
|
|
|
1
|
|
788
|
use AutoLoader 'AUTOLOAD'; |
|
1
|
|
|
|
|
1399
|
|
|
1
|
|
|
|
|
4
|
|
7
|
1
|
|
|
1
|
|
26
|
use vars qw($VERSION); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
931
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
$VERSION = do { my @r = (q$Revision: 0.09 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
sub loadcache; |
12
|
0
|
|
|
0
|
0
|
0
|
sub Destroy {}; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
URPL::Prepare -- prepare hostname for URBL domain lookup |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
require URBL::Prepare; |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
my $ubp = new URBL::Prepare; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
$tlds = $blessed->cachetlds($localfilelistptr); |
25
|
|
|
|
|
|
|
$whitelist = $blessed->cachewhite($localfilelistptr); |
26
|
|
|
|
|
|
|
$domain = $blessed->urbldomain($hostname); |
27
|
|
|
|
|
|
|
$response_code = $proto->loadcache($url,$localfile); |
28
|
|
|
|
|
|
|
($response,$message) = $proto->loadcache($url,$localfile); |
29
|
|
|
|
|
|
|
$rv = $blessed->urblblack($hostname); |
30
|
|
|
|
|
|
|
$rv = $blessed->urblwhite($hostname); |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=head1 DESCRIPTION |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=over 2 |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=item * my $urbl = new URBL::Prepare; |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
This method returns a blessed reference to an empty hash. |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
For use with other modules: |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
require URBL::Prepare; |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
@ISA = qw(URBL::Prepare); |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=back |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=cut |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
sub new { |
51
|
1
|
|
|
1
|
1
|
13
|
my $proto = shift; |
52
|
1
|
|
50
|
|
|
10
|
my $class = ref $proto || $proto || __PACKAGE__; |
53
|
1
|
|
|
|
|
4
|
bless {}, $class; |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=head1 URBL Preparation for lookup methods |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
The following three methods are for facilitating URBL lookups. |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
SEE: http://www.uribl.com/about.shtml |
61
|
|
|
|
|
|
|
and http://www.surbl.org/guidelines |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=over 2 |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=item * $tldlist = $blessed->cachetlds($localfilelistptr); |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
This method opens local files in "file list" and extracts the tld's found |
68
|
|
|
|
|
|
|
therein. |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
input: ptr to array of local/file/path/names |
71
|
|
|
|
|
|
|
return: array ptr to list of tld's |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
NOTE: place level 3 tld's ahead of level 2 tld's |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=cut |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# do level3 tld's first |
78
|
|
|
|
|
|
|
sub cachetlds { |
79
|
3
|
|
|
3
|
1
|
99
|
my($bls,$files) = @_; |
80
|
3
|
|
|
|
|
5
|
my @tldlist; |
81
|
3
|
|
|
|
|
7
|
foreach my $infile (@$files) { |
82
|
2
|
|
|
|
|
3
|
my $tldf; |
83
|
2
|
50
|
|
|
|
81
|
next unless open $tldf, $infile; |
84
|
2
|
|
|
|
|
2172
|
foreach (<$tldf>) { |
85
|
4120
|
|
|
|
|
4389
|
chomp; |
86
|
4120
|
50
|
|
|
|
9605
|
next unless $_ =~ /\S/; |
87
|
4120
|
|
|
|
|
8980
|
$_ =~ s/\./\\./g; |
88
|
4120
|
|
|
|
|
8222
|
push @tldlist, lc $_; |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
} |
91
|
3
|
|
|
|
|
19
|
$bls->{-urblpreparebad} = \@tldlist; |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=item * $whitelist = $blessed->cachewhite($localfilelistptr); |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
This method opens local file(s) in "file list" and extracts the domains |
97
|
|
|
|
|
|
|
found therein. |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
See http://wiki.apache.org/spamassassin/DnsBlocklists and |
100
|
|
|
|
|
|
|
http://spamassasin.googlecode.com/svn-history/r6/trunk/share/spamassassin/ |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
Note:: these URL's may change |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
input: ptr to array of local/file/path/names |
105
|
|
|
|
|
|
|
return: array ptr to whitelist domain names |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=cut |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
sub cachewhite { |
110
|
3
|
|
|
3
|
1
|
68
|
my($bls,$files) = @_; |
111
|
3
|
|
|
|
|
4
|
my @whitelist; |
112
|
3
|
|
|
|
|
7
|
foreach my $infile (@$files) { |
113
|
1
|
|
|
|
|
1
|
my $wfile; |
114
|
1
|
50
|
|
|
|
43
|
next unless open $wfile, $infile; |
115
|
1
|
|
|
|
|
109
|
foreach(<$wfile>) { |
116
|
160
|
100
|
|
|
|
430
|
next unless $_ =~ /uridnsbl_skip_domain\s+(.+)/; |
117
|
51
|
|
|
|
|
231
|
(my $white = $1) =~ s/\./\\./g; |
118
|
51
|
|
|
|
|
62
|
chomp $white; |
119
|
51
|
|
|
|
|
193
|
my @wtmp = split /\s+/, lc $white; |
120
|
51
|
|
|
|
|
147
|
push @whitelist, @wtmp; |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
} |
123
|
3
|
|
|
|
|
23
|
$bls->{-urblpreparewhite} = \@whitelist; |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=item * $blacklist = $blessed->cacheblack($localfilelistptr); |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
This method opens local file(s) in "file list" and extracts the domains found |
129
|
|
|
|
|
|
|
therein. The domains may be space seperated many to a line. |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
input: ptr to array of local/file/path/names |
132
|
|
|
|
|
|
|
return: ptr to blacklist domain names |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=cut |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub cacheblack { |
137
|
0
|
|
|
0
|
1
|
0
|
my($bls,$files) = @_; |
138
|
0
|
|
|
|
|
0
|
my @blacklist; |
139
|
0
|
|
|
|
|
0
|
foreach my $infile (@$files) { |
140
|
0
|
|
|
|
|
0
|
my $bkfile; |
141
|
0
|
0
|
|
|
|
0
|
next unless open $bkfile, $infile; |
142
|
0
|
|
|
|
|
0
|
foreach(<$bkfile>) { |
143
|
0
|
|
|
|
|
0
|
chomp; |
144
|
0
|
|
|
|
|
0
|
(my $black = $_) =~ s/\./\\./g; |
145
|
0
|
|
|
|
|
0
|
my @btmp = split /\s+/, lc $black; |
146
|
0
|
|
|
|
|
0
|
push @blacklist, @btmp; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
} |
149
|
0
|
|
|
|
|
0
|
$bls->{-urblprepareblack} = \@blacklist; |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=item * $domain = $blessed->urbldomain($hostname) |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
This method extracts a domain name to check against an SURBL. If the |
155
|
|
|
|
|
|
|
hostname is whitelisted, the return value is false, otherwise a domain name |
156
|
|
|
|
|
|
|
is returned. |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
input: hostname |
159
|
|
|
|
|
|
|
return: false if whitelisted |
160
|
|
|
|
|
|
|
else domain to check against SURBL |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
NOTE: optionally white or tld testing will be bypassed if the pointer |
163
|
|
|
|
|
|
|
is undefined or points to an empty array. |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=cut |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# Implementation Guidelines |
168
|
|
|
|
|
|
|
# |
169
|
|
|
|
|
|
|
# http://www.surbl.org/guidelines |
170
|
|
|
|
|
|
|
# |
171
|
|
|
|
|
|
|
# Extract base (registered) domains from those URIs. This includes removing |
172
|
|
|
|
|
|
|
# all leading host names, subdomains, www., randomized subdomains, etc. In |
173
|
|
|
|
|
|
|
# order to determine the level of domain to check, use our tables of two level |
174
|
|
|
|
|
|
|
# and three level TLDs. Originally these were CCTLDs, but they now also |
175
|
|
|
|
|
|
|
# include some frequently abused hosting domains. (Note that these files only |
176
|
|
|
|
|
|
|
# rarely update. Please don't retrieve them more often than once per day.) The |
177
|
|
|
|
|
|
|
# usage is: |
178
|
|
|
|
|
|
|
# |
179
|
|
|
|
|
|
|
# For any domain on the three level list, check it at the fourth level. |
180
|
|
|
|
|
|
|
# For any domain on the two level list, check it at the third level. |
181
|
|
|
|
|
|
|
# For any other domain, check it at the second level. |
182
|
|
|
|
|
|
|
# |
183
|
|
|
|
|
|
|
# For example, any domain found in the two level list should have a third |
184
|
|
|
|
|
|
|
# level domain name (such as foo.co.uk) checked against SURBLs. Domains not |
185
|
|
|
|
|
|
|
# specifically on the two level list (such as foo.com or foo.fr) should be |
186
|
|
|
|
|
|
|
# checked at two levels. Please do not check at levels other than these as |
187
|
|
|
|
|
|
|
# doing so will cause unnecessary queries that won't result in matches. |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
sub urbldomain { |
190
|
5
|
|
|
5
|
1
|
151
|
my $bls = shift; |
191
|
5
|
|
|
|
|
17
|
my $host = lc shift; |
192
|
5
|
|
50
|
|
|
31
|
my $white = $bls->{-urblpreparewhite} || []; |
193
|
5
|
|
50
|
|
|
21
|
my $tlds = $bls->{-urblpreparebad} || []; |
194
|
|
|
|
|
|
|
|
195
|
5
|
|
|
|
|
14
|
foreach(@$white) { |
196
|
1008
|
100
|
|
|
|
7689
|
return undef if $host =~ /$_$/; # whitelisted? |
197
|
|
|
|
|
|
|
} |
198
|
4
|
|
|
|
|
23
|
foreach (@$tlds) { |
199
|
11590
|
100
|
|
|
|
139879
|
if ($host =~ /([^\.]+\.$_)$/) { |
200
|
2
|
|
|
|
|
11
|
($host = $1) =~ s/\\//g; |
201
|
2
|
|
|
|
|
15
|
return $host; |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
} |
204
|
2
|
|
|
|
|
34
|
$host =~ /([^\.]+\.[^\.]+)$/; |
205
|
2
|
|
|
|
|
25
|
return $1; |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
=item * $rv = $blessed->urblblack($hostname) |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
This method check if a hostname is found within the local black list(s). |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
input: hostname |
213
|
|
|
|
|
|
|
return: domain found, else false |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=cut |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
sub urblblack { |
218
|
0
|
|
|
0
|
1
|
|
my $bls = shift; |
219
|
0
|
|
|
|
|
|
my $host = lc shift; |
220
|
0
|
|
0
|
|
|
|
my $tlds = $bls->{-urblprepareblack} || []; |
221
|
0
|
|
|
|
|
|
foreach (@$tlds) { |
222
|
0
|
0
|
|
|
|
|
if ($host =~ /$_$/) { |
223
|
0
|
|
|
|
|
|
($host = $_) =~ s/\\//g; |
224
|
0
|
|
|
|
|
|
return $host; |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
} |
227
|
0
|
|
|
|
|
|
return undef; |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
=item * $rv = $blessed->urbwhite($hostname) |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
This method check if a hostname is found within the local white list. |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
input: hostname |
235
|
|
|
|
|
|
|
return: domain found, else false |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
=cut |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
sub urblwhite { |
240
|
0
|
|
|
0
|
0
|
|
my $bls = shift; |
241
|
0
|
|
|
|
|
|
my $host = lc shift; |
242
|
0
|
|
0
|
|
|
|
my $white = $bls->{-urblpreparewhite} || []; |
243
|
0
|
|
|
|
|
|
foreach (@$white) { |
244
|
0
|
0
|
|
|
|
|
if ($host =~ /$_$/) { |
245
|
0
|
|
|
|
|
|
($host = $_) =~ s/\\//g; |
246
|
0
|
|
|
|
|
|
return $host; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
} |
249
|
0
|
|
|
|
|
|
return undef; |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
1; |
253
|
|
|
|
|
|
|
__END__ |