line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Net::Google::SafeBrowsing2; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
22768
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
31
|
|
4
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
24
|
|
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
4
|
use Carp; |
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
116
|
|
7
|
1
|
|
|
1
|
|
936
|
use LWP::UserAgent; |
|
1
|
|
|
|
|
58658
|
|
|
1
|
|
|
|
|
32
|
|
8
|
1
|
|
|
1
|
|
10
|
use URI; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
21
|
|
9
|
1
|
|
|
1
|
|
1100
|
use Digest::SHA qw(sha256); |
|
1
|
|
|
|
|
4381
|
|
|
1
|
|
|
|
|
112
|
|
10
|
1
|
|
|
1
|
|
11
|
use List::Util qw(first); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
53
|
|
11
|
1
|
|
|
1
|
|
792
|
use Text::Trim; |
|
1
|
|
|
|
|
634
|
|
|
1
|
|
|
|
|
71
|
|
12
|
1
|
|
|
1
|
|
787
|
use Digest::HMAC_SHA1 qw(hmac_sha1 hmac_sha1_hex); |
|
1
|
|
|
|
|
1864
|
|
|
1
|
|
|
|
|
60
|
|
13
|
1
|
|
|
1
|
|
867
|
use MIME::Base64::URLSafe; |
|
1
|
|
|
|
|
1989
|
|
|
1
|
|
|
|
|
62
|
|
14
|
1
|
|
|
1
|
|
6
|
use MIME::Base64; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
38
|
|
15
|
1
|
|
|
1
|
|
792
|
use String::HexConvert; |
|
1
|
|
|
|
|
284
|
|
|
1
|
|
|
|
|
36
|
|
16
|
1
|
|
|
1
|
|
980
|
use File::Slurp; |
|
1
|
|
|
|
|
17937
|
|
|
1
|
|
|
|
|
96
|
|
17
|
1
|
|
|
1
|
|
1542
|
use IO::Socket::SSL 'inet4' ; |
|
1
|
|
|
|
|
82798
|
|
|
1
|
|
|
|
|
13
|
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
|
20
|
1
|
|
|
1
|
|
327
|
use Exporter 'import'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
89
|
|
21
|
|
|
|
|
|
|
our @EXPORT = qw(DATABASE_RESET MAC_ERROR MAC_KEY_ERROR INTERNAL_ERROR SERVER_ERROR NO_UPDATE NO_DATA SUCCESSFUL MALWARE PHISHING); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
our $VERSION = '1.11'; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
BEGIN { |
26
|
1
|
|
|
1
|
|
39
|
IO::Socket::SSL::set_ctx_defaults( |
27
|
|
|
|
|
|
|
verify_mode => Net::SSLeay->VERIFY_PEER(), |
28
|
|
|
|
|
|
|
); |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=head1 NAME |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
Net::Google::SafeBrowsing2 - Perl extension for the Google Safe Browsing v2 API. (Google Safe Browsing v1 has been deprecated by Google.) |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head1 SYNOPSIS |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
use Net::Google::SafeBrowsing2; |
39
|
|
|
|
|
|
|
use Net::Google::SafeBrowsing2::Sqlite; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
my $storage = Net::Google::SafeBrowsing2::Sqlite->new(file => 'google-v2.db'); |
42
|
|
|
|
|
|
|
my $gsb = Net::Google::SafeBrowsing2->new( |
43
|
|
|
|
|
|
|
key => "my key", |
44
|
|
|
|
|
|
|
storage => $storage, |
45
|
|
|
|
|
|
|
); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
$gsb->update(); |
48
|
|
|
|
|
|
|
my $match = $gsb->lookup(url => 'http://www.gumblar.cn/'); |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
if ($match eq MALWARE) { |
51
|
|
|
|
|
|
|
print "http://www.gumblar.cn/ is flagged as a dangerous site\n"; |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
$storage->close(); |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=head1 DESCRIPTION |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
Net::Google::SafeBrowsing2 implements the Google Safe Browsing v2 API. |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
The library passes most of the unit tests listed in the API documentation. See the documentation (L) for more details about the failed tests. |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
The Google Safe Browsing database must be stored and managed locally. L uses Sqlite as the storage back-end, L uses MySQL. Other storage mechanisms (databases, memory, etc.) can be added and used transparently with this module. |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
You may want to look at "Google Safe Browsing v2: Implementation Notes" (L), a collection of notes and real-world numbers about the API. This is intended for people who want to learn more about the API, whether as a user or to make their own implementation. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
The source code is available on github at L. |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
If you do not need to inspect more than 10,000 URLs a day, you can use L with the Google Safe Browsing v2 Lookup API which does not require to store and maintain a local database. |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
IMPORTANT: If you start with an empty database, you will need to perform several updates to retrieve all the Google Safe Browsing information. This may require up to 24 hours. This is a limitation of the Google API, not of this module. See "Google Safe Browsing v2: Implementation Notes" at L. |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=head1 CONSTANTS |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
Several constants are exported by this module: |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=over 4 |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=item DATABASE_RESET |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
Google requested to reset (empty) the local database. |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=item MAC_ERROR |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
The replies from Google could not be validated with the MAC keys. |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=item MAC_KEY_ERROR |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
The request for the MAC keys failed. |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=item INTERNAL_ERROR |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
An internal error occurred. |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=item SERVER_ERROR |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
The server sent an error back to the client. |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=item NO_UPDATE |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
No update was performed, probably because it is too early to make a new request to Google Safe Browsing. |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=item NO_DATA |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
No data was sent back by Google to the client, probably because the database is up to date. |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=item SUCCESSFUL |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
The operation was successful. |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=item MALWARE |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Name of the Malware list in Google Safe Browsing (shortcut to 'goog-malware-shavar') |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=item PHISHING |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
Name of the Phishing list in Google Safe Browsing (shortcut to 'googpub-phish-shavar') |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=back |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=cut |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
use constant { |
123
|
1
|
|
|
|
|
239
|
DATABASE_RESET => -6, |
124
|
|
|
|
|
|
|
MAC_ERROR => -5, |
125
|
|
|
|
|
|
|
MAC_KEY_ERROR => -4, |
126
|
|
|
|
|
|
|
INTERNAL_ERROR => -3, # internal/parsing error |
127
|
|
|
|
|
|
|
SERVER_ERROR => -2, # Server sent an error back |
128
|
|
|
|
|
|
|
NO_UPDATE => -1, # no update (too early) |
129
|
|
|
|
|
|
|
NO_DATA => 0, # no data sent |
130
|
|
|
|
|
|
|
SUCCESSFUL => 1, # data sent |
131
|
|
|
|
|
|
|
MALWARE => 'goog-malware-shavar', |
132
|
|
|
|
|
|
|
PHISHING => 'googpub-phish-shavar', |
133
|
|
|
|
|
|
|
FULL_HASH_TIME => 45 * 60, |
134
|
|
|
|
|
|
|
INTERVAL_FULL_HASH_TIME => 'INTERVAL 45 MINUTE', |
135
|
1
|
|
|
1
|
|
124
|
}; |
|
1
|
|
|
|
|
4
|
|
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=over 4 |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=back |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=head2 new() |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
Create a Net::Google::SafeBrowsing2 object |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
my $gsb = Net::Google::SafeBrowsing2->new( |
149
|
|
|
|
|
|
|
key => "my key", |
150
|
|
|
|
|
|
|
storage => Net::Google::SafeBrowsing2::Sqlite->new(file => 'google-v2.db'), |
151
|
|
|
|
|
|
|
debug => 0, |
152
|
|
|
|
|
|
|
mac => 0, |
153
|
|
|
|
|
|
|
list => MALWARE, |
154
|
|
|
|
|
|
|
); |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
Arguments |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=over 4 |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=item server |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
Safe Browsing Server. https://safebrowsing.clients.google.com/safebrowsing/ by default |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=item mac_server |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
Safe Browsing MAC Server. https://sb-ssl.google.com/safebrowsing/ by default |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=item key |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
Required. Your Google Safe browsing API key |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
=item storage |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
Required. Object which handle the storage for the Google Safe Browsing database. See L for more details. |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=item list |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
Optional. The Google Safe Browsing list to handle. By default, handles both MALWARE and PHISHING. |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=item mac |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
Optional. Set to 1 to enable Message Authentication Code (MAC). 0 (disabled) by default. |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=item debug |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
Optional. Set to 1 to enable debugging. 0 (disabled) by default. |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
The debug output maybe quite large and can slow down significantly the update and lookup functions. |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=item errors |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
Optional. Set to 1 to show errors to STDOUT. 0 (disabled by default). |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=item perf |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
Optional. Set to 1 to show performance information. |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=item version |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
Optional. Google Safe Browsing version. 2.2 by default |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=back |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=cut |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub new { |
207
|
1
|
|
|
1
|
1
|
568
|
my ($class, %args) = @_; |
208
|
|
|
|
|
|
|
|
209
|
1
|
|
|
|
|
17
|
my $self = { # default arguments |
210
|
|
|
|
|
|
|
server => 'https://safebrowsing.clients.google.com/safebrowsing/', |
211
|
|
|
|
|
|
|
mac_server => 'https://sb-ssl.google.com/safebrowsing/', |
212
|
|
|
|
|
|
|
list => ['googpub-phish-shavar', 'goog-malware-shavar'], |
213
|
|
|
|
|
|
|
key => '', |
214
|
|
|
|
|
|
|
version => '2.2', |
215
|
|
|
|
|
|
|
debug => 0, |
216
|
|
|
|
|
|
|
errors => 0, |
217
|
|
|
|
|
|
|
last_error => '', |
218
|
|
|
|
|
|
|
mac => 0, |
219
|
|
|
|
|
|
|
perf => 0, |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
%args, |
222
|
|
|
|
|
|
|
}; |
223
|
|
|
|
|
|
|
|
224
|
1
|
50
|
|
|
|
4
|
if (! exists $self->{storage}) { |
225
|
1
|
|
|
1
|
|
710
|
use Net::Google::SafeBrowsing2::Storage; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
9842
|
|
226
|
1
|
|
|
|
|
11
|
$self->{storage} = Net::Google::SafeBrowsing2::Storage->new(); |
227
|
|
|
|
|
|
|
} |
228
|
1
|
50
|
|
|
|
5
|
if (ref $self->{list} ne 'ARRAY') { |
229
|
0
|
|
|
|
|
0
|
$self->{list} = [$self->{list}]; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
1
|
50
|
|
|
|
8
|
bless $self, $class or croak "Can't bless $class: $!"; |
233
|
1
|
|
|
|
|
3
|
return $self; |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
=head1 PUBLIC FUNCTIONS |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
=over 4 |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
=back |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
=head2 update() |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
Perform a database update. |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
$gsb->update(); |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
Return the status of the update (see the list of constants above): INTERNAL_ERROR, SERVER_ERROR, NO_UPDATE, NO_DATA or SUCCESSFUL |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
This function can handle two lists at the same time. If one of the list should not be updated, it will automatically skip it and update the other one. It is faster to update two lists at once rather than doing them one by one. |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
NOTE: If you start with an empty database, you will need to perform several updates to retrieve all the Google Safe Browsing information. This may require up to 24 hours. This is a limitation of the Google API, not of this module. See "Google Safe Browsing v2: Implementation Notes" at L. |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
Arguments |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=over 4 |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=item list |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
Optional. Update a specific list. Use the list(s) from new() by default. |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
=item mac |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
Optional. Set to 1 to enable Message Authentication Code (MAC). Use the value from new() by default. |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=item force |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
Optional. Force the update (1). Disabled by default (0). |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
Be careful if you set this option to 1 as too frequent updates might result in the blacklisting of your API key. |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
=back |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
=cut |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
sub update { |
279
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
280
|
|
|
|
|
|
|
# my @lists = @{[$args{list}]} || @{$self->{list}} || croak "Missing list name\n"; |
281
|
0
|
|
|
|
|
0
|
my $list = $args{list}; |
282
|
0
|
|
0
|
|
|
0
|
my $force = $args{force} || 0; |
283
|
0
|
|
0
|
|
|
0
|
my $mac = $args{mac} || $self->{mac} || 0; |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
|
286
|
0
|
|
|
|
|
0
|
my @lists = @{$self->{list}}; |
|
0
|
|
|
|
|
0
|
|
287
|
0
|
0
|
|
|
|
0
|
@lists = @{[$args{list}]} if (defined $list); |
|
0
|
|
|
|
|
0
|
|
288
|
|
|
|
|
|
|
|
289
|
0
|
|
|
|
|
0
|
my $result = 0; |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
# Too early to update? |
292
|
0
|
|
|
|
|
0
|
my $start = time(); |
293
|
0
|
|
|
|
|
0
|
my $i = 0; |
294
|
0
|
|
|
|
|
0
|
while ($i < scalar @lists) { |
295
|
0
|
|
|
|
|
0
|
my $list = $lists[$i]; |
296
|
0
|
|
|
|
|
0
|
my $info = $self->{storage}->last_update(list => $list); |
297
|
|
|
|
|
|
|
|
298
|
0
|
0
|
0
|
|
|
0
|
if ($info->{'time'} + $info->{'wait'} > time && $force == 0) { |
299
|
0
|
|
|
|
|
0
|
$self->debug("Too early to update $list\n"); |
300
|
0
|
|
|
|
|
0
|
splice(@lists, $i, 1); |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
else { |
303
|
0
|
|
|
|
|
0
|
$self->debug("OK to update $list: " . time() . "/" . ($info->{'time'} + $info->{'wait'}) . "\n"); |
304
|
0
|
|
|
|
|
0
|
$i++; |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
|
308
|
0
|
0
|
|
|
|
0
|
if (scalar @lists == 0) { |
309
|
0
|
|
|
|
|
0
|
$self->debug("Too early to update any list\n"); |
310
|
0
|
|
|
|
|
0
|
return NO_UPDATE; |
311
|
|
|
|
|
|
|
} |
312
|
0
|
|
|
|
|
0
|
$self->perf("OK to update check: " . (time() - $start) . "s\n"); |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
# MAC? |
315
|
0
|
|
|
|
|
0
|
my $client_key = ''; |
316
|
0
|
|
|
|
|
0
|
my $wrapped_key = ''; |
317
|
|
|
|
|
|
|
|
318
|
0
|
0
|
|
|
|
0
|
if ($mac) { |
319
|
0
|
|
|
|
|
0
|
($client_key, $wrapped_key) = $self->get_mac_keys(); |
320
|
|
|
|
|
|
|
|
321
|
0
|
0
|
0
|
|
|
0
|
if ($client_key eq '' || $wrapped_key eq '') { |
322
|
0
|
|
|
|
|
0
|
return MAC_KEY_ERROR; |
323
|
|
|
|
|
|
|
} |
324
|
|
|
|
|
|
|
} |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
|
328
|
0
|
|
|
|
|
0
|
my $ua = $self->ua; |
329
|
|
|
|
|
|
|
|
330
|
0
|
|
|
|
|
0
|
my $url = $self->{server} . "downloads?client=api&apikey=" . $self->{key} . "&appver=$VERSION&pver=" . $self->{version}; |
331
|
0
|
0
|
|
|
|
0
|
$url .= "&wrkey=$wrapped_key" if ($mac); |
332
|
|
|
|
|
|
|
|
333
|
0
|
|
|
|
|
0
|
my $body = ''; |
334
|
0
|
|
|
|
|
0
|
foreach my $list (@lists) { |
335
|
|
|
|
|
|
|
# Report existng chunks |
336
|
0
|
|
|
|
|
0
|
$start = time(); |
337
|
0
|
|
|
|
|
0
|
my $a_range = $self->create_range(numbers => [$self->{storage}->get_add_chunks_nums(list => $list)]); |
338
|
0
|
|
|
|
|
0
|
my $s_range = $self->create_range(numbers => [$self->{storage}->get_sub_chunks_nums(list => $list)]); |
339
|
0
|
|
|
|
|
0
|
$self->perf("Create add and sub ranges: " . (time() - $start) . "s\n"); |
340
|
|
|
|
|
|
|
|
341
|
0
|
|
|
|
|
0
|
my $chunks_list = ''; |
342
|
0
|
0
|
|
|
|
0
|
if ($a_range ne '') { |
343
|
0
|
|
|
|
|
0
|
$chunks_list .= "a:$a_range"; |
344
|
|
|
|
|
|
|
} |
345
|
0
|
0
|
|
|
|
0
|
if ($s_range ne '') { |
346
|
0
|
0
|
|
|
|
0
|
$chunks_list .= ":" if ($a_range ne ''); |
347
|
0
|
|
|
|
|
0
|
$chunks_list .= "s:$s_range"; |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
|
350
|
0
|
|
|
|
|
0
|
$body .= "$list;$chunks_list"; |
351
|
0
|
0
|
|
|
|
0
|
$body .= ":mac" if ($mac); |
352
|
0
|
|
|
|
|
0
|
$body .= "\n"; |
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
|
355
|
0
|
|
|
|
|
0
|
my $start_req = time(); |
356
|
0
|
|
|
|
|
0
|
my $res = $ua->post($url, Content => $body); |
357
|
0
|
|
|
|
|
0
|
$self->perf("$body\n"); |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
# $self->debug($res->request->as_string . "\n" . $res->as_string . "\n"); |
360
|
0
|
0
|
|
|
|
0
|
$self->debug($res->request->as_string . "\n") if ($self->{debug}); |
361
|
0
|
0
|
|
|
|
0
|
$self->debug($res->as_string . "\n") if ($self->{debug}); |
362
|
0
|
|
|
|
|
0
|
my $duration_req = time() - $start_req; |
363
|
|
|
|
|
|
|
|
364
|
0
|
0
|
|
|
|
0
|
if (! $res->is_success) { |
365
|
0
|
|
|
|
|
0
|
$self->error("Request failed\n"); |
366
|
|
|
|
|
|
|
|
367
|
0
|
|
|
|
|
0
|
foreach my $list (@lists) { |
368
|
0
|
|
|
|
|
0
|
$self->update_error('time' => time(), list => $list); |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
|
371
|
0
|
|
|
|
|
0
|
return SERVER_ERROR; |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
|
374
|
0
|
|
|
|
|
0
|
my $last_update = time; |
375
|
0
|
|
|
|
|
0
|
my $wait = 0; |
376
|
|
|
|
|
|
|
|
377
|
0
|
|
|
|
|
0
|
my @redirections = (); |
378
|
0
|
|
|
|
|
0
|
my $del_add_duration = 0; |
379
|
0
|
|
|
|
|
0
|
my $del_sub_duration = 0; |
380
|
0
|
|
|
|
|
0
|
my $add_range_info = ''; |
381
|
|
|
|
|
|
|
|
382
|
0
|
|
|
|
|
0
|
my @lines = split/\s/, $res->decoded_content; |
383
|
0
|
|
|
|
|
0
|
$list = ''; |
384
|
0
|
|
|
|
|
0
|
foreach my $line (@lines) { |
385
|
0
|
0
|
0
|
|
|
0
|
if ($line =~ /n:\s*(\d+)\s*$/) { |
|
|
0
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
386
|
0
|
|
|
|
|
0
|
$self->debug("Next poll: $1 seconds\n"); |
387
|
0
|
|
|
|
|
0
|
$wait = $1; |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
elsif ($line =~ /i:\s*(\S+)\s*$/) { |
390
|
0
|
|
|
|
|
0
|
$self->debug("List: $1\n"); |
391
|
0
|
|
|
|
|
0
|
$list = $1; |
392
|
|
|
|
|
|
|
} |
393
|
|
|
|
|
|
|
elsif ($line =~ /u:\s*(\S+),(\S+)\s*$/) { |
394
|
0
|
|
|
|
|
0
|
$self->debug("Redirection: $1\n"); |
395
|
0
|
|
|
|
|
0
|
$self->debug("MAC: $2\n"); |
396
|
0
|
|
|
|
|
0
|
push(@redirections, [$1, $list, $2]); |
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
elsif ($line =~ /u:\s*(\S+)\s*$/) { |
399
|
0
|
|
|
|
|
0
|
$self->debug("Redirection: $1\n"); |
400
|
0
|
|
|
|
|
0
|
push(@redirections, [$1, $list, '']); |
401
|
|
|
|
|
|
|
} |
402
|
|
|
|
|
|
|
elsif ($line =~ /ad:(\S+)$/) { |
403
|
0
|
|
|
|
|
0
|
$self->debug("Delete Add Chunks: $1\n"); |
404
|
|
|
|
|
|
|
|
405
|
0
|
|
|
|
|
0
|
my $del_add_start = time(); |
406
|
0
|
|
|
|
|
0
|
$add_range_info = $1 . " $list"; |
407
|
0
|
|
|
|
|
0
|
my @nums = $self->expand_range(range => $1); |
408
|
0
|
|
|
|
|
0
|
$self->{storage}->delete_add_ckunks(chunknums => [@nums], list => $list); |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
# Delete full hash as well |
411
|
0
|
|
|
|
|
0
|
$self->{storage}->delete_full_hashes(chunknums => [@nums], list => $list); |
412
|
0
|
|
|
|
|
0
|
$del_add_duration = time() - $del_add_start; |
413
|
|
|
|
|
|
|
|
414
|
0
|
|
|
|
|
0
|
$result = 1; |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
elsif ($line =~ /sd:(\S+)$/) { |
417
|
0
|
|
|
|
|
0
|
$self->debug("Delete Sub Chunks: $1\n"); |
418
|
|
|
|
|
|
|
|
419
|
0
|
|
|
|
|
0
|
my $del_sub_start = time(); |
420
|
0
|
|
|
|
|
0
|
my @nums = $self->expand_range(range => $1); |
421
|
0
|
|
|
|
|
0
|
$self->{storage}->delete_sub_ckunks(chunknums => [@nums], list => $list); |
422
|
0
|
|
|
|
|
0
|
$del_add_duration = time() - $del_sub_start; |
423
|
|
|
|
|
|
|
|
424
|
0
|
|
|
|
|
0
|
$result = 1; |
425
|
|
|
|
|
|
|
} |
426
|
|
|
|
|
|
|
elsif ($line =~ /m:(\S+)$/ && $mac) { |
427
|
0
|
|
|
|
|
0
|
my $hmac = $1; |
428
|
0
|
|
|
|
|
0
|
$self->debug("MAC of request: $hmac\n"); |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
# Remove this line for data |
431
|
0
|
|
|
|
|
0
|
my $data = $res->decoded_content; |
432
|
0
|
|
|
|
|
0
|
$data =~ s/^m:(\S+)\n//g; |
433
|
|
|
|
|
|
|
|
434
|
0
|
0
|
|
|
|
0
|
if (! $self->validate_data_mac(data => $data, key => $client_key, digest => $hmac) ) { |
435
|
0
|
|
|
|
|
0
|
$self->error("MAC error on main request\n"); |
436
|
|
|
|
|
|
|
|
437
|
0
|
|
|
|
|
0
|
return MAC_ERROR; |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
} |
440
|
|
|
|
|
|
|
elsif ($line =~ /e:pleaserekey/ && $mac) { |
441
|
0
|
|
|
|
|
0
|
$self->debug("MAC key has been expired\n"); |
442
|
|
|
|
|
|
|
|
443
|
0
|
|
|
|
|
0
|
$self->{storage}->delete_mac_keys(); |
444
|
0
|
|
|
|
|
0
|
return $self->update(list => $list, force => $force, mac => $mac); |
445
|
|
|
|
|
|
|
} |
446
|
|
|
|
|
|
|
elsif ($line =~ /r:pleasereset/) { |
447
|
0
|
|
|
|
|
0
|
$self->debug("Database must be reset\n"); |
448
|
|
|
|
|
|
|
|
449
|
0
|
|
|
|
|
0
|
$self->{storage}->reset(list => $list); |
450
|
|
|
|
|
|
|
|
451
|
0
|
|
|
|
|
0
|
return DATABASE_RESET; |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
} |
454
|
0
|
|
|
|
|
0
|
$self->debug("\n"); |
455
|
0
|
|
|
|
|
0
|
$self->perf("Handle first request: " . (time() - $last_update) . "s (POST: ${duration_req}s, DEL add: ${del_add_duration}s, DEL sub: ${del_sub_duration}s, ADD range: ${add_range_info})\n"); |
456
|
|
|
|
|
|
|
|
457
|
0
|
0
|
|
|
|
0
|
$result = 1 if (scalar @redirections > 0); |
458
|
|
|
|
|
|
|
|
459
|
0
|
|
|
|
|
0
|
$self->perf("Parse redirections: "); |
460
|
0
|
|
|
|
|
0
|
foreach my $data (@redirections) { |
461
|
0
|
|
|
|
|
0
|
$start = time(); |
462
|
0
|
|
|
|
|
0
|
my $redirection = $data->[0]; |
463
|
0
|
|
|
|
|
0
|
$list = $data->[1]; |
464
|
0
|
|
|
|
|
0
|
my $hmac = $data->[2]; |
465
|
|
|
|
|
|
|
|
466
|
0
|
|
|
|
|
0
|
$self->debug("Checking redirection https://$redirection ($list)\n"); |
467
|
0
|
|
|
|
|
0
|
$res = $ua->get("https://$redirection"); |
468
|
0
|
0
|
|
|
|
0
|
if (! $res->is_success) { |
469
|
0
|
|
|
|
|
0
|
$self->error("Request to $redirection failed\n"); |
470
|
|
|
|
|
|
|
|
471
|
0
|
|
|
|
|
0
|
foreach my $list (@lists) { |
472
|
0
|
|
|
|
|
0
|
$self->update_error('time' => $last_update, list => $list); |
473
|
|
|
|
|
|
|
} |
474
|
|
|
|
|
|
|
|
475
|
0
|
|
|
|
|
0
|
return SERVER_ERROR; |
476
|
|
|
|
|
|
|
} |
477
|
|
|
|
|
|
|
|
478
|
0
|
0
|
|
|
|
0
|
$self->debug(substr($res->as_string, 0, 250) . "\n\n") if ($self->{debug}); |
479
|
0
|
0
|
|
|
|
0
|
$self->debug(substr($res->content, 0, 250) . "\n\n") if ($self->{debug}); |
480
|
|
|
|
|
|
|
|
481
|
0
|
|
|
|
|
0
|
my $data = $res->content; |
482
|
0
|
0
|
0
|
|
|
0
|
if ($mac && ! $self->validate_data_mac(data => $data, key => $client_key, digest => $hmac) ) { |
483
|
0
|
|
|
|
|
0
|
$self->error("MAC error on redirection\n"); |
484
|
0
|
|
|
|
|
0
|
$self->debug("Length of data: " . length($data) . "\n"); |
485
|
|
|
|
|
|
|
|
486
|
0
|
|
|
|
|
0
|
return MAC_ERROR; |
487
|
|
|
|
|
|
|
} |
488
|
|
|
|
|
|
|
|
489
|
0
|
|
|
|
|
0
|
my $result = $self->parse_data(data => $data, list => $list); |
490
|
0
|
0
|
|
|
|
0
|
if ($result != SUCCESSFUL) { |
491
|
0
|
|
|
|
|
0
|
foreach my $list (@lists) { |
492
|
0
|
|
|
|
|
0
|
$self->update_error('time' => $last_update, list => $list); |
493
|
|
|
|
|
|
|
} |
494
|
|
|
|
|
|
|
|
495
|
0
|
|
|
|
|
0
|
return $result; |
496
|
|
|
|
|
|
|
} |
497
|
0
|
|
|
|
|
0
|
$self->perf((time() - $start) . "s "); |
498
|
|
|
|
|
|
|
} |
499
|
0
|
|
|
|
|
0
|
$self->perf("\n"); |
500
|
|
|
|
|
|
|
|
501
|
0
|
|
|
|
|
0
|
foreach my $list (@lists) { |
502
|
0
|
|
|
|
|
0
|
$self->debug("List update: $last_update $wait $list\n"); |
503
|
0
|
|
|
|
|
0
|
$self->{storage}->updated('time' => $last_update, 'wait' => $wait, list => $list); |
504
|
|
|
|
|
|
|
} |
505
|
|
|
|
|
|
|
|
506
|
0
|
|
|
|
|
0
|
return $result; # ok |
507
|
|
|
|
|
|
|
} |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
=head2 import_chunks() |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
Import add and sub chunks from a file. |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
my $result = $gsb->import_chunks(list => MALWARE, file => 'malware.dat'); |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
Return the status of the import: INTERNAL_ERROR or SUCCESSFUL. |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
This function should be used to initialize an empty back-end storage. |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
Arguments |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=over 4 |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
=item list |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
Required. Google list to use. |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
=item file |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
Required. File that contains the list of chunks. This file can be created with the C function inherited from C. |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
=back |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
=cut |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
sub import_chunks { |
537
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
538
|
0
|
|
0
|
|
|
0
|
my $list = $args{list} || ''; |
539
|
0
|
|
0
|
|
|
0
|
my $file = $args{file} || "$list.dat"; |
540
|
|
|
|
|
|
|
|
541
|
0
|
|
|
|
|
0
|
my $data = read_file($file, { binmode => ':raw' }); |
542
|
|
|
|
|
|
|
|
543
|
0
|
|
|
|
|
0
|
return $self->parse_data(data => $data, list => $list); |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
} |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
=head2 lookup() |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
Lookup a URL against the Google Safe Browsing database. |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
my $match = $gsb->lookup(url => 'http://www.gumblar.cn'); |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
Returns the name of the list if there is any match, returns an empty string otherwise. |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
Arguments |
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
=over 4 |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
=item list |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
Optional. Lookup against a specific list. Use the list(s) from new() by default. |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
=item url |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
Required. URL to lookup. |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
=back |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
=cut |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
sub lookup { |
572
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
573
|
0
|
|
0
|
|
|
0
|
my $list = $args{list} || ''; |
574
|
0
|
|
0
|
|
|
0
|
my $url = $args{url} || return ''; |
575
|
|
|
|
|
|
|
|
576
|
0
|
|
|
|
|
0
|
my @lists = @{$self->{list}}; |
|
0
|
|
|
|
|
0
|
|
577
|
0
|
0
|
|
|
|
0
|
@lists = @{[$args{list}]} if ($list ne ''); |
|
0
|
|
|
|
|
0
|
|
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
# TODO: create our own URI management for canonicalization |
581
|
|
|
|
|
|
|
# fix for http:///foo.com (3 ///) |
582
|
0
|
|
|
|
|
0
|
$url =~ s/^(https?:\/\/)\/+/$1/; |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
|
586
|
0
|
|
|
|
|
0
|
my $uri = URI->new($url)->canonical; |
587
|
|
|
|
|
|
|
|
588
|
0
|
|
|
|
|
0
|
my $domain = $uri->host; |
589
|
|
|
|
|
|
|
|
590
|
0
|
|
|
|
|
0
|
my @hosts = $self->canonical_domain_suffixes($domain); # only top-3 in this case |
591
|
|
|
|
|
|
|
|
592
|
0
|
|
|
|
|
0
|
foreach my $host (@hosts) { |
593
|
0
|
|
|
|
|
0
|
$self->debug("Domain for key: $domain => $host\n"); |
594
|
0
|
|
|
|
|
0
|
my $suffix = $self->prefix("$host/"); # Don't forget trailing hash |
595
|
0
|
|
|
|
|
0
|
$self->debug("Host key: " . $self->hex_to_ascii($suffix) . "\n"); |
596
|
|
|
|
|
|
|
|
597
|
0
|
|
|
|
|
0
|
my $match = $self->lookup_suffix(lists => [@lists], url => $url, suffix => $suffix); |
598
|
0
|
0
|
|
|
|
0
|
return $match if ($match ne ''); |
599
|
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
|
601
|
0
|
|
|
|
|
0
|
return ''; |
602
|
|
|
|
|
|
|
} |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
=head2 get_lists() |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
Returns the name of all the Google Safe Browsing lists |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
my $@lists = $gsb->get_lists(); |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
NOTE: this function is useless in practice because Google includes some lists which cannot be used by the Google Safe Browsing API, like lists used by the Google toolbar. |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
=cut |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
sub get_lists { |
617
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
618
|
|
|
|
|
|
|
|
619
|
0
|
|
|
|
|
0
|
my $url = $self->{server} . "list?client=api&apikey=" . $self->{key} . "&appver=$VERSION&pver=" . $self->{version}; |
620
|
|
|
|
|
|
|
|
621
|
0
|
|
|
|
|
0
|
my $res = $self->ua->get($url); |
622
|
|
|
|
|
|
|
|
623
|
0
|
|
|
|
|
0
|
return split/\s/, $res->decoded_content; # 1 list per line |
624
|
|
|
|
|
|
|
} |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
=head2 last_error() |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
Get/Set the last error message. |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
print "Last error: ", $gsb->last_error(), "\n"; |
632
|
|
|
|
|
|
|
$gsb->last_error(''); # Reset last error |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
NOTE: the last error message might not come from the last call. Returns an empty string if no errors. |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
=cut |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
sub last_error { |
639
|
0
|
|
|
0
|
1
|
0
|
my ($self, $message) = @_; |
640
|
|
|
|
|
|
|
|
641
|
0
|
0
|
|
|
|
0
|
if (defined $message) { |
642
|
0
|
|
|
|
|
0
|
$self->{last_error} = $message; |
643
|
|
|
|
|
|
|
} |
644
|
|
|
|
|
|
|
else { |
645
|
0
|
|
|
|
|
0
|
return $self->{last_error}; |
646
|
|
|
|
|
|
|
} |
647
|
|
|
|
|
|
|
} |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
=pod |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
=head1 PRIVATE FUNCTIONS |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
These functions are not intended to be used externally. |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
=over 4 |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
=back |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
=head2 lookup_suffix() |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
Lookup a host prefix. |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
=cut |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
sub lookup_suffix { |
667
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
668
|
0
|
|
0
|
|
|
0
|
my $lists = $args{lists} || croak "Missing lists\n"; |
669
|
0
|
|
0
|
|
|
0
|
my $url = $args{url} || return ''; |
670
|
0
|
|
0
|
|
|
0
|
my $suffix = $args{suffix} || return ''; |
671
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
# Calculcate prefixes |
673
|
0
|
|
|
|
|
0
|
my @full_hashes = $self->full_hashes($url); # Get the prefixes from the first 4 bytes |
674
|
0
|
|
|
|
|
0
|
my @full_hashes_prefix = map (substr($_, 0, 4), @full_hashes); |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
# Local lookup |
677
|
0
|
|
|
|
|
0
|
my @add_chunks = $self->local_lookup_suffix(lists => $lists, url => $url, suffix => $suffix, full_hashes_prefix => [@full_hashes_prefix]); |
678
|
0
|
0
|
|
|
|
0
|
if (scalar @add_chunks == 0) { |
679
|
0
|
|
|
|
|
0
|
$self->debug("No hit in local lookup\n"); |
680
|
0
|
|
|
|
|
0
|
return ''; |
681
|
|
|
|
|
|
|
} |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
# Check against full hashes |
685
|
0
|
|
|
|
|
0
|
my $found = ''; |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
# get stored full hashes |
688
|
0
|
|
|
|
|
0
|
foreach my $add_chunk (@add_chunks) { |
689
|
|
|
|
|
|
|
|
690
|
0
|
|
|
|
|
0
|
my @hashes = $self->{storage}->get_full_hashes( chunknum => $add_chunk->{chunknum}, timestamp => time() - FULL_HASH_TIME, list => $add_chunk->{list}); |
691
|
|
|
|
|
|
|
|
692
|
0
|
|
|
|
|
0
|
$self->debug("Full hashes already stored for chunk " . $add_chunk->{chunknum} . ": " . scalar @hashes . "\n"); |
693
|
0
|
|
|
|
|
0
|
foreach my $full_hash (@full_hashes) { |
694
|
0
|
|
|
|
|
0
|
foreach my $hash (@hashes) { |
695
|
0
|
0
|
0
|
0
|
|
0
|
if ($hash eq $full_hash && defined first { $add_chunk->{list} eq $_ } @$lists) { |
|
0
|
|
|
|
|
0
|
|
696
|
0
|
|
|
|
|
0
|
$self->debug("Full hash was found in storage: " . $self->hex_to_ascii($hash) . "\n"); |
697
|
0
|
|
|
|
|
0
|
$found = $add_chunk->{list}; |
698
|
0
|
|
|
|
|
0
|
last; |
699
|
|
|
|
|
|
|
} |
700
|
|
|
|
|
|
|
# elsif ($hash ne $full_hash) { |
701
|
|
|
|
|
|
|
# $self->debug($self->hex_to_ascii($hash) . " ne " . $self->hex_to_ascii($full_hash) . "\n\n"); |
702
|
|
|
|
|
|
|
# } |
703
|
|
|
|
|
|
|
} |
704
|
0
|
0
|
|
|
|
0
|
last if ($found ne ''); |
705
|
|
|
|
|
|
|
} |
706
|
0
|
0
|
|
|
|
0
|
last if ($found ne ''); |
707
|
|
|
|
|
|
|
} |
708
|
|
|
|
|
|
|
|
709
|
0
|
0
|
|
|
|
0
|
return $found if ($found ne ''); |
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
# ask for new hashes |
713
|
|
|
|
|
|
|
# TODO: make sure we don't keep asking for the same over and over |
714
|
0
|
|
0
|
|
|
0
|
my @hashes = $self->request_full_hash(prefixes => [ map($_->{prefix} || $_->{hostkey}, @add_chunks) ]); |
715
|
0
|
|
|
|
|
0
|
$self->{storage}->add_full_hashes(full_hashes => [@hashes], timestamp => time()); |
716
|
|
|
|
|
|
|
|
717
|
0
|
|
|
|
|
0
|
foreach my $full_hash (@full_hashes) { |
718
|
0
|
|
|
0
|
|
0
|
my $hash = first { $_->{hash} eq $full_hash} @hashes; |
|
0
|
|
|
|
|
0
|
|
719
|
0
|
0
|
|
|
|
0
|
next if (! defined $hash); |
720
|
|
|
|
|
|
|
|
721
|
0
|
|
|
0
|
|
0
|
my $list = first { $hash->{list} eq $_ } @$lists; |
|
0
|
|
|
|
|
0
|
|
722
|
|
|
|
|
|
|
|
723
|
0
|
0
|
0
|
|
|
0
|
if (defined $hash && defined $list) { |
724
|
|
|
|
|
|
|
# $self->debug($self->hex_to_ascii($hash->{hash}) . " eq " . $self->hex_to_ascii($full_hash) . "\n\n"); |
725
|
|
|
|
|
|
|
|
726
|
0
|
|
|
|
|
0
|
$self->debug("Match: " . $self->hex_to_ascii($full_hash) . "\n"); |
727
|
|
|
|
|
|
|
|
728
|
0
|
|
|
|
|
0
|
return $hash->{list}; |
729
|
|
|
|
|
|
|
} |
730
|
|
|
|
|
|
|
# elsif (defined $hash) { |
731
|
|
|
|
|
|
|
# $self->debug("hash: " . $self->hex_to_ascii($hash->{hash}) . "\n"); |
732
|
|
|
|
|
|
|
# $self->debug("list: " . $hash->{list} . "\n"); |
733
|
|
|
|
|
|
|
# } |
734
|
|
|
|
|
|
|
} |
735
|
|
|
|
|
|
|
|
736
|
0
|
|
|
|
|
0
|
$self->debug("No match\n"); |
737
|
0
|
|
|
|
|
0
|
return ''; |
738
|
|
|
|
|
|
|
} |
739
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
=head2 lookup_suffix() |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
Lookup a host prefix in the local database only. |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
=cut |
745
|
|
|
|
|
|
|
sub local_lookup_suffix { |
746
|
0
|
|
|
0
|
0
|
0
|
my ($self, %args) = @_; |
747
|
0
|
|
0
|
|
|
0
|
my $lists = $args{lists} || croak "Missing lists\n"; |
748
|
0
|
|
0
|
|
|
0
|
my $url = $args{url} || return (); |
749
|
0
|
|
0
|
|
|
0
|
my $suffix = $args{suffix} || return (); |
750
|
0
|
|
0
|
|
|
0
|
my $full_hashe_list = $args{full_hashes} || []; |
751
|
0
|
|
0
|
|
|
0
|
my $full_hashes_prefix_list = $args{full_hashes_prefix} || []; |
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
# Step 1: get all add chunks for this host key |
755
|
|
|
|
|
|
|
# Do it for all lists |
756
|
0
|
|
|
|
|
0
|
my @add_chunks = $self->{storage}->get_add_chunks(hostkey => $suffix); |
757
|
|
|
|
|
|
|
# return scalar @add_chunks; |
758
|
0
|
0
|
|
|
|
0
|
if (scalar @add_chunks == 0) { # no match |
759
|
0
|
|
|
|
|
0
|
$self->debug("No host key\n"); |
760
|
0
|
|
|
|
|
0
|
return @add_chunks; |
761
|
|
|
|
|
|
|
} |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
# Step 2: calculcate prefixes |
764
|
|
|
|
|
|
|
# Get the prefixes from the first 4 bytes |
765
|
0
|
|
|
|
|
0
|
my @full_hashes_prefix = @{$full_hashes_prefix_list}; |
|
0
|
|
|
|
|
0
|
|
766
|
0
|
0
|
|
|
|
0
|
if (scalar @full_hashes_prefix == 0) { |
767
|
0
|
|
|
|
|
0
|
my @full_hashes = @{$full_hashe_list}; |
|
0
|
|
|
|
|
0
|
|
768
|
0
|
0
|
|
|
|
0
|
@full_hashes = $self->full_hashes($url) if (scalar @full_hashes == 0); |
769
|
|
|
|
|
|
|
|
770
|
0
|
|
|
|
|
0
|
@full_hashes_prefix = map (substr($_, 0, 4), @full_hashes); |
771
|
|
|
|
|
|
|
} |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
# Step 3: filter out add_chunks with prefix |
774
|
0
|
|
|
|
|
0
|
my $i = 0; |
775
|
0
|
|
|
|
|
0
|
while ($i < scalar @add_chunks) { |
776
|
0
|
0
|
|
|
|
0
|
if ($add_chunks[$i]->{prefix} ne '') { |
777
|
0
|
|
|
|
|
0
|
my $found = 0; |
778
|
0
|
|
|
|
|
0
|
foreach my $hash_prefix (@full_hashes_prefix) { |
779
|
0
|
0
|
|
|
|
0
|
if ( $add_chunks[$i]->{prefix} eq $hash_prefix) { |
780
|
0
|
|
|
|
|
0
|
$found = 1; |
781
|
0
|
|
|
|
|
0
|
last; |
782
|
|
|
|
|
|
|
} |
783
|
|
|
|
|
|
|
# else { |
784
|
|
|
|
|
|
|
# $self->debug( $self->hex_to_ascii($add_chunks[$i]->{prefix}) . " ne " . $self->hex_to_ascii($hash_prefix) . "\n" ); |
785
|
|
|
|
|
|
|
# } |
786
|
|
|
|
|
|
|
} |
787
|
|
|
|
|
|
|
|
788
|
0
|
0
|
|
|
|
0
|
if ($found == 0) { |
789
|
0
|
|
|
|
|
0
|
$self->debug("No prefix found\n"); |
790
|
0
|
|
|
|
|
0
|
splice(@add_chunks, $i, 1); |
791
|
|
|
|
|
|
|
} |
792
|
|
|
|
|
|
|
else { |
793
|
0
|
|
|
|
|
0
|
$i++; |
794
|
|
|
|
|
|
|
} |
795
|
|
|
|
|
|
|
} |
796
|
|
|
|
|
|
|
else { |
797
|
0
|
|
|
|
|
0
|
$i++; |
798
|
|
|
|
|
|
|
} |
799
|
|
|
|
|
|
|
} |
800
|
0
|
0
|
|
|
|
0
|
if (scalar @add_chunks == 0) { |
801
|
0
|
|
|
|
|
0
|
$self->debug("No prefix match for any host key\n"); |
802
|
0
|
|
|
|
|
0
|
return @add_chunks; |
803
|
|
|
|
|
|
|
} |
804
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
# Step 4: get all sub chunks for this host key |
807
|
0
|
|
|
|
|
0
|
my @sub_chunks = $self->{storage}->get_sub_chunks(hostkey => $suffix); |
808
|
|
|
|
|
|
|
|
809
|
0
|
|
|
|
|
0
|
foreach my $sub_chunk (@sub_chunks) { |
810
|
0
|
|
|
|
|
0
|
my $i = 0; |
811
|
0
|
|
|
|
|
0
|
while ($i < scalar @add_chunks) { |
812
|
0
|
|
|
|
|
0
|
my $add_chunk = $add_chunks[$i]; |
813
|
|
|
|
|
|
|
|
814
|
0
|
0
|
0
|
|
|
0
|
if ($add_chunk->{chunknum} != $sub_chunk->{addchunknum} || $add_chunk->{list} ne $sub_chunk->{list}) { |
815
|
0
|
|
|
|
|
0
|
$i++; |
816
|
0
|
|
|
|
|
0
|
next; |
817
|
|
|
|
|
|
|
} |
818
|
|
|
|
|
|
|
|
819
|
0
|
0
|
|
|
|
0
|
if ($sub_chunk->{prefix} eq $add_chunk->{prefix}) { |
820
|
0
|
|
|
|
|
0
|
splice(@add_chunks, $i, 1); |
821
|
|
|
|
|
|
|
} |
822
|
|
|
|
|
|
|
else { |
823
|
0
|
|
|
|
|
0
|
$i++; |
824
|
|
|
|
|
|
|
} |
825
|
|
|
|
|
|
|
} |
826
|
|
|
|
|
|
|
} |
827
|
|
|
|
|
|
|
|
828
|
0
|
0
|
|
|
|
0
|
if (scalar @add_chunks == 0) { |
829
|
0
|
|
|
|
|
0
|
$self->debug("All add_chunks have been removed by sub_chunks\n"); |
830
|
|
|
|
|
|
|
} |
831
|
|
|
|
|
|
|
|
832
|
0
|
|
|
|
|
0
|
return @add_chunks; |
833
|
|
|
|
|
|
|
} |
834
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
=head2 local_lookup() |
836
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
Lookup a URL against the local Google Safe Browsing database URL. This should be used for debugging purpose only. See the lookup for normal use. |
838
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
my $match = $gsb->local_lookup(url => 'http://www.gumblar.cn'); |
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
Returns the name of the list if there is any match, returns an empty string otherwise. |
842
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
Arguments |
844
|
|
|
|
|
|
|
|
845
|
|
|
|
|
|
|
=over 4 |
846
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
=item list |
848
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
Optional. Lookup against a specific list. Use the list(s) from new() by default. |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
=item url |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
Required. URL to lookup. |
854
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
=back |
856
|
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
=cut |
858
|
|
|
|
|
|
|
sub local_lookup { |
859
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
860
|
0
|
|
0
|
|
|
0
|
my $list = $args{list} || ''; |
861
|
0
|
|
0
|
|
|
0
|
my $url = $args{url} || return ''; |
862
|
|
|
|
|
|
|
|
863
|
0
|
|
|
|
|
0
|
my @lists = @{$self->{list}}; |
|
0
|
|
|
|
|
0
|
|
864
|
0
|
0
|
|
|
|
0
|
@lists = @{[$args{list}]} if ($list ne ''); |
|
0
|
|
|
|
|
0
|
|
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
# TODO: create our own URI management for canonicalization |
868
|
|
|
|
|
|
|
# fix for http:///foo.com (3 ///) |
869
|
0
|
|
|
|
|
0
|
$url =~ s/^(https?:\/\/)\/+/$1/; |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
|
873
|
0
|
|
|
|
|
0
|
my $uri = URI->new($url)->canonical; |
874
|
|
|
|
|
|
|
|
875
|
0
|
|
|
|
|
0
|
my $domain = $uri->host; |
876
|
|
|
|
|
|
|
|
877
|
0
|
|
|
|
|
0
|
my @hosts = $self->canonical_domain_suffixes($domain); # only top-3 in this case |
878
|
|
|
|
|
|
|
|
879
|
0
|
|
|
|
|
0
|
foreach my $host (@hosts) { |
880
|
0
|
|
|
|
|
0
|
$self->debug("Domain for key: $domain => $host\n"); |
881
|
0
|
|
|
|
|
0
|
my $suffix = $self->prefix("$host/"); # Don't forget trailing hash |
882
|
0
|
|
|
|
|
0
|
$self->debug("Host key: " . $self->hex_to_ascii($suffix) . "\n"); |
883
|
|
|
|
|
|
|
|
884
|
0
|
|
|
|
|
0
|
my @matches = $self->local_lookup_suffix(lists => [@lists], url => $url, suffix => $suffix); |
885
|
|
|
|
|
|
|
# return $matches[0]->{list} if (scalar @matches > 0); |
886
|
0
|
0
|
|
|
|
0
|
return $matches[0]->{list} . " " . $matches[0]->{chunknum} if (scalar @matches > 0); |
887
|
|
|
|
|
|
|
} |
888
|
|
|
|
|
|
|
|
889
|
0
|
|
|
|
|
0
|
return ''; |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
} |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
=head2 request_key() |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
Request the Message Authentication Code (MAC) keys |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
=cut |
898
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
sub get_mac_keys { |
900
|
0
|
|
|
0
|
0
|
0
|
my ($self, %args) = @_; |
901
|
|
|
|
|
|
|
|
902
|
0
|
|
|
|
|
0
|
my $keys = $self->{storage}->get_mac_keys(); |
903
|
|
|
|
|
|
|
|
904
|
0
|
0
|
0
|
|
|
0
|
if ($keys->{client_key} eq '' || $keys->{wrapped_key} eq '') { |
905
|
0
|
|
|
|
|
0
|
my ($client_key, $wrapped_key) = $self->request_mac_keys(); |
906
|
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
# $self->debug("Client key: $client_key\n"); |
908
|
0
|
|
|
|
|
0
|
$self->{storage}->add_mac_keys(client_key => $client_key, wrapped_key => $wrapped_key); |
909
|
|
|
|
|
|
|
|
910
|
0
|
|
|
|
|
0
|
return ($client_key, $wrapped_key); |
911
|
|
|
|
|
|
|
} |
912
|
|
|
|
|
|
|
|
913
|
0
|
|
|
|
|
0
|
return ($keys->{client_key}, $keys->{wrapped_key}); |
914
|
|
|
|
|
|
|
} |
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
=head2 request_mac_keys() |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
Request the Message Authentication Code (MAC) keys from Google. |
920
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
=cut |
922
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
sub request_mac_keys { |
924
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
925
|
|
|
|
|
|
|
|
926
|
0
|
|
|
|
|
0
|
my $client_key = ''; |
927
|
0
|
|
|
|
|
0
|
my $wrapped_key = ''; |
928
|
|
|
|
|
|
|
|
929
|
0
|
|
|
|
|
0
|
my $url = $self->{mac_server} . "newkey?client=api&apikey=" . $self->{key} . "&appver=$VERSION&pver=" . $self->{version}; |
930
|
|
|
|
|
|
|
|
931
|
0
|
|
|
|
|
0
|
my $res = $self->ua->get($url); |
932
|
|
|
|
|
|
|
|
933
|
0
|
0
|
|
|
|
0
|
if (! $res->is_success) { |
934
|
0
|
|
|
|
|
0
|
$self->error("Key request failed: " . $res->code . "\n"); |
935
|
0
|
|
|
|
|
0
|
return ($client_key, $wrapped_key); |
936
|
|
|
|
|
|
|
} |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
|
940
|
0
|
|
|
|
|
0
|
my $data = $res->decoded_content; |
941
|
0
|
0
|
|
|
|
0
|
if ($data =~ s/^clientkey:(\d+)://mi) { |
942
|
0
|
|
|
|
|
0
|
my $length = $1; |
943
|
0
|
|
|
|
|
0
|
$self->debug("MAC client key length: $length\n"); |
944
|
0
|
|
|
|
|
0
|
$client_key = substr($data, 0, $length, ''); |
945
|
0
|
|
|
|
|
0
|
$self->debug("MAC client key: $client_key\n"); |
946
|
|
|
|
|
|
|
|
947
|
0
|
|
|
|
|
0
|
substr($data, 0, 1, ''); # remove \n |
948
|
|
|
|
|
|
|
|
949
|
0
|
0
|
|
|
|
0
|
if ($data =~ s/^wrappedkey:(\d+)://mi) { |
950
|
0
|
|
|
|
|
0
|
$length = $1; |
951
|
0
|
|
|
|
|
0
|
$self->debug("MAC wrapped key length: $length\n"); |
952
|
0
|
|
|
|
|
0
|
$wrapped_key = substr($data, 0, $length, ''); |
953
|
0
|
|
|
|
|
0
|
$self->debug("MAC wrapped key: $wrapped_key\n"); |
954
|
|
|
|
|
|
|
|
955
|
0
|
|
|
|
|
0
|
return (decode_base64($client_key), $wrapped_key); |
956
|
|
|
|
|
|
|
} |
957
|
|
|
|
|
|
|
else { |
958
|
0
|
|
|
|
|
0
|
return ('', ''); |
959
|
|
|
|
|
|
|
} |
960
|
|
|
|
|
|
|
} |
961
|
|
|
|
|
|
|
|
962
|
0
|
|
|
|
|
0
|
return ($client_key, $wrapped_key); |
963
|
|
|
|
|
|
|
} |
964
|
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
=head2 validate_data_mac() |
966
|
|
|
|
|
|
|
|
967
|
|
|
|
|
|
|
Validate data against the MAC keys. |
968
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
=cut |
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
sub validate_data_mac { |
972
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
973
|
0
|
|
0
|
|
|
0
|
my $data = $args{data} || ''; |
974
|
0
|
|
0
|
|
|
0
|
my $key = $args{key} || ''; |
975
|
0
|
|
0
|
|
|
0
|
my $digest = $args{digest} || ''; |
976
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
# my $hash = urlsafe_b64encode trim hmac_sha1($data, decode_base64($key)); |
979
|
|
|
|
|
|
|
# my $hash = urlsafe_b64encode (trim (hmac_sha1($data, decode_base64($key)))); |
980
|
0
|
|
|
|
|
0
|
my $hash = urlsafe_b64encode(hmac_sha1($data, $key)); |
981
|
0
|
|
|
|
|
0
|
$hash .= '='; |
982
|
|
|
|
|
|
|
|
983
|
0
|
|
|
|
|
0
|
$self->debug("$hash / $digest\n"); |
984
|
|
|
|
|
|
|
# $self->debug(urlsafe_b64encode(hmac_sha1($data, decode_base64($key))) . "\n"); |
985
|
|
|
|
|
|
|
# $self->debug(urlsafe_b64encode(trim(hmac_sha1($data, decode_base64($key)))) . "\n"); |
986
|
|
|
|
|
|
|
|
987
|
0
|
|
|
|
|
0
|
return ($hash eq $digest); |
988
|
|
|
|
|
|
|
} |
989
|
|
|
|
|
|
|
|
990
|
|
|
|
|
|
|
=head2 update_error() |
991
|
|
|
|
|
|
|
|
992
|
|
|
|
|
|
|
Handle server errors during a database update. |
993
|
|
|
|
|
|
|
|
994
|
|
|
|
|
|
|
=cut |
995
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
sub update_error { |
997
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
998
|
0
|
|
0
|
|
|
0
|
my $time = $args{'time'} || time; |
999
|
0
|
|
0
|
|
|
0
|
my $list = $args{'list'} || ''; |
1000
|
|
|
|
|
|
|
|
1001
|
0
|
|
|
|
|
0
|
my $info = $self->{storage}->last_update(list => $list); |
1002
|
0
|
0
|
|
|
|
0
|
$info->{errors} = 0 if (! exists $info->{errors}); |
1003
|
0
|
|
|
|
|
0
|
my $errors = $info->{errors} + 1; |
1004
|
0
|
|
|
|
|
0
|
my $wait = 0; |
1005
|
|
|
|
|
|
|
|
1006
|
0
|
0
|
|
|
|
0
|
$wait = $errors == 1 ? 60 |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1007
|
|
|
|
|
|
|
: $errors == 2 ? int(30 * 60 * (rand(1) + 1)) # 30-60 mins |
1008
|
|
|
|
|
|
|
: $errors == 3 ? int(60 * 60 * (rand(1) + 1)) # 60-120 mins |
1009
|
|
|
|
|
|
|
: $errors == 4 ? int(2 * 60 * 60 * (rand(1) + 1)) # 120-240 mins |
1010
|
|
|
|
|
|
|
: $errors == 5 ? int(4 * 60 * 60 * (rand(1) + 1)) # 240-480 mins |
1011
|
|
|
|
|
|
|
: $errors > 5 ? 480 * 60 |
1012
|
|
|
|
|
|
|
: 0; |
1013
|
|
|
|
|
|
|
|
1014
|
0
|
|
|
|
|
0
|
$self->{storage}->update_error('time' => $time, list => $list, 'wait' => $wait, errors => $errors); |
1015
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
} |
1017
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
=head2 lookup_whitelist() |
1020
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
Lookup a host prefix and suffix in the whitelist (s chunks) |
1022
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
=cut |
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
sub lookup_whitelist { |
1026
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
1027
|
0
|
|
0
|
|
|
0
|
my $suffix = $args{suffix} || return 0; |
1028
|
0
|
|
0
|
|
|
0
|
my $prefix = $args{prefix} || ''; |
1029
|
0
|
|
0
|
|
|
0
|
my $chuknum = $args{chunknum} || return 0; |
1030
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
|
1032
|
0
|
|
|
|
|
0
|
foreach my $schunknum (keys %{ $self->{s_chunks} }) { |
|
0
|
|
|
|
|
0
|
|
1033
|
0
|
|
|
|
|
0
|
foreach my $chunk ( @{ $self->{s_chunks}->{$schunknum} }) { |
|
0
|
|
|
|
|
0
|
|
1034
|
0
|
0
|
0
|
|
|
0
|
if ($chunk->{host} eq $suffix && ($chunk->{prefix} eq $prefix || $chunk->{prefix} eq '') && $chunk->{add_chunknum} == $chuknum) { |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
1035
|
0
|
|
|
|
|
0
|
return 1; |
1036
|
|
|
|
|
|
|
} |
1037
|
|
|
|
|
|
|
} |
1038
|
|
|
|
|
|
|
} |
1039
|
|
|
|
|
|
|
|
1040
|
0
|
|
|
|
|
0
|
return 0; |
1041
|
|
|
|
|
|
|
} |
1042
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
=head2 ua() |
1045
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
Create LWP::UserAgent to make HTTP requests to Google. |
1047
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
=cut |
1049
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
sub ua { |
1051
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
1052
|
|
|
|
|
|
|
|
1053
|
0
|
0
|
|
|
|
0
|
if (! exists $self->{ua}) { |
1054
|
0
|
|
|
|
|
0
|
my $ua = LWP::UserAgent->new; |
1055
|
0
|
|
|
|
|
0
|
$ua->timeout(60); |
1056
|
|
|
|
|
|
|
|
1057
|
0
|
|
|
|
|
0
|
$self->{ua} = $ua; |
1058
|
|
|
|
|
|
|
} |
1059
|
|
|
|
|
|
|
|
1060
|
0
|
|
|
|
|
0
|
return $self->{ua}; |
1061
|
|
|
|
|
|
|
} |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
|
1064
|
|
|
|
|
|
|
=head2 parse_s() |
1065
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
Parse data from a rediration (add asnd sub chunk information). |
1067
|
|
|
|
|
|
|
|
1068
|
|
|
|
|
|
|
=cut |
1069
|
|
|
|
|
|
|
|
1070
|
|
|
|
|
|
|
sub parse_data { |
1071
|
0
|
|
|
0
|
0
|
0
|
my ($self, %args) = @_; |
1072
|
0
|
|
0
|
|
|
0
|
my $data = $args{data} || ''; |
1073
|
0
|
|
0
|
|
|
0
|
my $list = $args{list} || ''; |
1074
|
|
|
|
|
|
|
|
1075
|
0
|
|
|
|
|
0
|
my $chunk_num = 0; |
1076
|
0
|
|
|
|
|
0
|
my $hash_length = 0; |
1077
|
0
|
|
|
|
|
0
|
my $chunk_length = 0; |
1078
|
|
|
|
|
|
|
|
1079
|
0
|
|
|
|
|
0
|
while (length $data > 0) { |
1080
|
|
|
|
|
|
|
# print "Length 1: ", length $data, "\n"; # 58748 |
1081
|
|
|
|
|
|
|
|
1082
|
0
|
|
|
|
|
0
|
my $type = substr($data, 0, 2, ''); # s:34321:4:137 |
1083
|
|
|
|
|
|
|
# print "Length 1.5: ", length $data, "\n"; # 58746 -2 |
1084
|
|
|
|
|
|
|
|
1085
|
0
|
0
|
|
|
|
0
|
if ($data =~ /^(\d+):(\d+):(\d+)\n/sgi) { |
1086
|
0
|
|
|
|
|
0
|
$chunk_num = $1; |
1087
|
0
|
|
|
|
|
0
|
$hash_length = $2; |
1088
|
0
|
|
|
|
|
0
|
$chunk_length = $3; |
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
# shorten data |
1091
|
0
|
|
|
|
|
0
|
substr($data, 0, length($chunk_num) + length($hash_length) + length($chunk_length) + 3, ''); |
1092
|
|
|
|
|
|
|
# print "Remove ", length($chunk_num) + length($hash_length) + length($chunk_length) + 3, "\n"; |
1093
|
|
|
|
|
|
|
# print "Length 2: ", length $data, "\n"; # 58741 -5 |
1094
|
|
|
|
|
|
|
|
1095
|
0
|
|
|
|
|
0
|
my $encoded = substr($data, 0, $chunk_length, ''); |
1096
|
|
|
|
|
|
|
# print "Length 3: ", length $data, "\n"; # 58604 -137 |
1097
|
|
|
|
|
|
|
|
1098
|
0
|
0
|
|
|
|
0
|
if ($type eq 's:') { |
|
|
0
|
|
|
|
|
|
1099
|
0
|
|
|
|
|
0
|
my @chunks = $self->parse_s(value => $encoded, hash_length => $hash_length); |
1100
|
|
|
|
|
|
|
|
1101
|
0
|
|
|
|
|
0
|
$self->{storage}->add_chunks(type => 's', chunknum => $chunk_num, chunks => [@chunks], list => $list); # Must happen all at once => not 100% sure |
1102
|
|
|
|
|
|
|
} |
1103
|
|
|
|
|
|
|
elsif ($type eq 'a:') { |
1104
|
0
|
|
|
|
|
0
|
my @chunks = $self->parse_a(value => $encoded, hash_length => $hash_length); |
1105
|
0
|
|
|
|
|
0
|
$self->{storage}->add_chunks(type => 'a', chunknum => $chunk_num, chunks => [@chunks], list => $list); # Must happen all at once => not 100% sure |
1106
|
|
|
|
|
|
|
} |
1107
|
|
|
|
|
|
|
else { |
1108
|
0
|
|
|
|
|
0
|
$self->error("Incorrect chunk type: $type, should be a: or s:\n"); |
1109
|
0
|
|
|
|
|
0
|
return INTERNAL_ERROR;# failed |
1110
|
|
|
|
|
|
|
} |
1111
|
|
|
|
|
|
|
|
1112
|
0
|
|
|
|
|
0
|
$self->debug("$type$chunk_num:$hash_length:$chunk_length OK\n"); |
1113
|
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
} |
1115
|
|
|
|
|
|
|
else { |
1116
|
0
|
|
|
|
|
0
|
$self->error("could not parse header\n"); |
1117
|
0
|
|
|
|
|
0
|
return INTERNAL_ERROR;# failed |
1118
|
|
|
|
|
|
|
} |
1119
|
|
|
|
|
|
|
} |
1120
|
|
|
|
|
|
|
|
1121
|
0
|
|
|
|
|
0
|
return SUCCESSFUL; |
1122
|
|
|
|
|
|
|
} |
1123
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
=head2 parse_s() |
1126
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
Parse s chunks information for a database update. |
1128
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
=cut |
1130
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
sub parse_s { |
1132
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
1133
|
0
|
|
0
|
|
|
0
|
my $value = $args{value} || return (); |
1134
|
0
|
|
0
|
|
|
0
|
my $hash_length = $args{hash_length} || 4; |
1135
|
|
|
|
|
|
|
|
1136
|
0
|
|
|
|
|
0
|
my @data = (); |
1137
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
|
1139
|
0
|
|
|
|
|
0
|
while (length $value > 0) { |
1140
|
|
|
|
|
|
|
# my $host = $self->hex_to_ascii( substr($value, 0, 4, '') ); # Host hash |
1141
|
0
|
|
|
|
|
0
|
my $host = substr($value, 0, 4, ''); # HEX |
1142
|
|
|
|
|
|
|
# print "\t Host key: $host\n"; |
1143
|
|
|
|
|
|
|
|
1144
|
0
|
|
|
|
|
0
|
my $count = substr($value, 0, 1, ''); # hex value |
1145
|
0
|
|
|
|
|
0
|
$count = ord($count); |
1146
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
# my $add_chunk_num_hex; |
1148
|
|
|
|
|
|
|
|
1149
|
0
|
0
|
|
|
|
0
|
if ($count == 0) { # ADDCHUNKNUM only |
1150
|
|
|
|
|
|
|
# $self->debug("\nadd_chuknum: " . substr($value, 0, 4) . " => "); |
1151
|
0
|
|
|
|
|
0
|
my $add_chunknum = hex($self->hex_to_ascii( substr($value, 0, 4, '') ) ); #chunk num |
1152
|
|
|
|
|
|
|
# $self->debug("$add_chunknum\n"); |
1153
|
|
|
|
|
|
|
|
1154
|
0
|
|
|
|
|
0
|
push(@data, { host => $host, add_chunknum => $add_chunknum, prefix => '' }); |
1155
|
|
|
|
|
|
|
|
1156
|
0
|
0
|
|
|
|
0
|
if ($self->{debug}) { |
1157
|
0
|
|
|
|
|
0
|
$self->debug("\t" . $self->hex_to_ascii($host) . " $add_chunknum\n"); |
1158
|
|
|
|
|
|
|
} |
1159
|
|
|
|
|
|
|
} |
1160
|
|
|
|
|
|
|
else { # ADDCHUNKNUM + PREFIX |
1161
|
0
|
|
|
|
|
0
|
for(my $i = 0; $i < $count; $i++) { |
1162
|
|
|
|
|
|
|
# my $add_chunknum = $self->hex_to_ascii( substr($value, 0, 4, '') ); #chunk num - ACII |
1163
|
|
|
|
|
|
|
# $self->debug("\nadd_chuknum: " . substr($value, 0, 4) . " => "); |
1164
|
0
|
|
|
|
|
0
|
my $add_chunknum = hex($self->hex_to_ascii( substr($value, 0, 4, '') )); # DEC |
1165
|
|
|
|
|
|
|
# $self->debug("$add_chunknum\n"); |
1166
|
|
|
|
|
|
|
|
1167
|
|
|
|
|
|
|
# my $prefix = $self->hex_to_ascii( substr($value, 0, $hash_length, '') ); # ASCII |
1168
|
0
|
|
|
|
|
0
|
my $prefix = substr($value, 0, $hash_length, ''); # HEX |
1169
|
|
|
|
|
|
|
|
1170
|
0
|
|
|
|
|
0
|
push(@data, { host => $host, add_chunknum => $add_chunknum, prefix => $prefix }); |
1171
|
|
|
|
|
|
|
|
1172
|
0
|
0
|
|
|
|
0
|
if ($self->{debug}) { |
1173
|
0
|
|
|
|
|
0
|
$self->debug("\t" . $self->hex_to_ascii($host) . " $add_chunknum " . $self->hex_to_ascii($prefix) . "\n"); |
1174
|
|
|
|
|
|
|
} |
1175
|
|
|
|
|
|
|
} |
1176
|
|
|
|
|
|
|
} |
1177
|
|
|
|
|
|
|
} |
1178
|
|
|
|
|
|
|
|
1179
|
0
|
|
|
|
|
0
|
return @data; |
1180
|
|
|
|
|
|
|
} |
1181
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
|
1183
|
|
|
|
|
|
|
=head2 parse_a() |
1184
|
|
|
|
|
|
|
|
1185
|
|
|
|
|
|
|
Parse a chunks information for a database update. |
1186
|
|
|
|
|
|
|
|
1187
|
|
|
|
|
|
|
=cut |
1188
|
|
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
sub parse_a { |
1190
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
1191
|
0
|
|
0
|
|
|
0
|
my $value = $args{value} || return (); |
1192
|
0
|
|
0
|
|
|
0
|
my $hash_length = $args{hash_length} || 4; |
1193
|
|
|
|
|
|
|
|
1194
|
0
|
|
|
|
|
0
|
my @data = (); |
1195
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
|
1197
|
0
|
|
|
|
|
0
|
while (length $value > 0) { |
1198
|
|
|
|
|
|
|
# my $host = $self->hex_to_ascii( substr($value, 0, 4, '') ); # Host hash |
1199
|
0
|
|
|
|
|
0
|
my $host = substr($value, 0, 4, ''); # HEX |
1200
|
|
|
|
|
|
|
# print "\t Host key: $host\n"; |
1201
|
|
|
|
|
|
|
|
1202
|
0
|
|
|
|
|
0
|
my $count = substr($value, 0, 1, ''); # hex value |
1203
|
0
|
|
|
|
|
0
|
$count = ord($count); |
1204
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
|
1206
|
0
|
0
|
|
|
|
0
|
if ($count > 0) { # ADDCHUNKNUM only |
1207
|
0
|
|
|
|
|
0
|
for(my $i = 0; $i < $count; $i++) { |
1208
|
|
|
|
|
|
|
# my $prefix = $self->hex_to_ascii( substr($value, 0, $hash_length, '') ); # ASCII |
1209
|
0
|
|
|
|
|
0
|
my $prefix = substr($value, 0, $hash_length, ''); # HEX |
1210
|
|
|
|
|
|
|
|
1211
|
0
|
|
|
|
|
0
|
push(@data, { host => $host, prefix => $prefix }); |
1212
|
|
|
|
|
|
|
|
1213
|
0
|
0
|
|
|
|
0
|
if ($self->{debug}) { |
1214
|
0
|
|
|
|
|
0
|
$self->debug("\t" . $self->hex_to_ascii($host) . " " . $self->hex_to_ascii($prefix) . "\n"); |
1215
|
|
|
|
|
|
|
} |
1216
|
|
|
|
|
|
|
} |
1217
|
|
|
|
|
|
|
} |
1218
|
|
|
|
|
|
|
else { |
1219
|
0
|
|
|
|
|
0
|
push(@data, { host => $host, prefix => '' }); |
1220
|
|
|
|
|
|
|
|
1221
|
0
|
0
|
|
|
|
0
|
if ($self->{debug}) { |
1222
|
0
|
|
|
|
|
0
|
$self->debug("\t" . $self->hex_to_ascii($host) . "\n"); |
1223
|
|
|
|
|
|
|
} |
1224
|
|
|
|
|
|
|
} |
1225
|
|
|
|
|
|
|
} |
1226
|
|
|
|
|
|
|
|
1227
|
0
|
|
|
|
|
0
|
return @data; |
1228
|
|
|
|
|
|
|
} |
1229
|
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
|
|
1231
|
|
|
|
|
|
|
=head2 hex_to_ascii() |
1232
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
Transform hexadecimal strings to printable ASCII strings. Used mainly for debugging. |
1234
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
print $gsb->hex_to_ascii('hex value'); |
1236
|
|
|
|
|
|
|
|
1237
|
|
|
|
|
|
|
=cut |
1238
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
sub hex_to_ascii { |
1240
|
2
|
|
|
2
|
1
|
7
|
my ($self, $hex) = @_; |
1241
|
|
|
|
|
|
|
|
1242
|
2
|
|
|
|
|
11
|
return String::HexConvert::ascii_to_hex($hex); |
1243
|
|
|
|
|
|
|
# my $ascii = ''; |
1244
|
|
|
|
|
|
|
# |
1245
|
|
|
|
|
|
|
# while (length $hex > 0) { |
1246
|
|
|
|
|
|
|
# $ascii .= sprintf("%02x", ord( substr($hex, 0, 1, '') ) ); |
1247
|
|
|
|
|
|
|
# } |
1248
|
|
|
|
|
|
|
# |
1249
|
|
|
|
|
|
|
# return $ascii; |
1250
|
|
|
|
|
|
|
} |
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
|
1253
|
|
|
|
|
|
|
=head2 ascii_to_hex() |
1254
|
|
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
Transform ASCII strings to hexadecimal strings. |
1256
|
|
|
|
|
|
|
|
1257
|
|
|
|
|
|
|
=cut |
1258
|
|
|
|
|
|
|
|
1259
|
|
|
|
|
|
|
sub ascii_to_hex { |
1260
|
4
|
|
|
4
|
1
|
382
|
my ($self, $ascii) = @_; |
1261
|
|
|
|
|
|
|
|
1262
|
4
|
|
|
|
|
7
|
my $hex = ''; |
1263
|
4
|
|
|
|
|
28
|
for (my $i = 0; $i < int(length($ascii) / 2); $i++) { |
1264
|
16
|
|
|
|
|
52
|
$hex .= chr hex( substr($ascii, $i * 2, 2) ); |
1265
|
|
|
|
|
|
|
} |
1266
|
|
|
|
|
|
|
|
1267
|
4
|
|
|
|
|
23
|
return $hex; |
1268
|
|
|
|
|
|
|
} |
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
=head2 debug() |
1271
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
Print debug output. |
1273
|
|
|
|
|
|
|
|
1274
|
|
|
|
|
|
|
=cut |
1275
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
sub debug { |
1277
|
0
|
|
|
0
|
1
|
0
|
my ($self, $message) = @_; |
1278
|
|
|
|
|
|
|
|
1279
|
0
|
0
|
|
|
|
0
|
print $message if ($self->{debug} > 0); |
1280
|
|
|
|
|
|
|
} |
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
=head2 error() |
1284
|
|
|
|
|
|
|
|
1285
|
|
|
|
|
|
|
Print error message. |
1286
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
=cut |
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
sub error { |
1290
|
0
|
|
|
0
|
1
|
0
|
my ($self, $message) = @_; |
1291
|
|
|
|
|
|
|
|
1292
|
0
|
0
|
0
|
|
|
0
|
print "ERROR - ", $message if ($self->{debug} > 0 || $self->{errors} > 0); |
1293
|
0
|
|
|
|
|
0
|
$self->{last_error} = $message; |
1294
|
|
|
|
|
|
|
} |
1295
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
=head2 error() |
1298
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
Print performance message. |
1300
|
|
|
|
|
|
|
|
1301
|
|
|
|
|
|
|
=cut |
1302
|
|
|
|
|
|
|
|
1303
|
|
|
|
|
|
|
sub perf { |
1304
|
0
|
|
|
0
|
1
|
0
|
my ($self, $message) = @_; |
1305
|
|
|
|
|
|
|
|
1306
|
0
|
0
|
|
|
|
0
|
print $message if ($self->{perf} > 0); |
1307
|
|
|
|
|
|
|
} |
1308
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
=head2 canonical_domain_suffixes() |
1310
|
|
|
|
|
|
|
|
1311
|
|
|
|
|
|
|
Find all suffixes for a domain. |
1312
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
=cut |
1314
|
|
|
|
|
|
|
|
1315
|
|
|
|
|
|
|
sub canonical_domain_suffixes { |
1316
|
3
|
|
|
3
|
1
|
2654
|
my ($self, $domain) = @_; |
1317
|
|
|
|
|
|
|
|
1318
|
3
|
|
|
|
|
5
|
my @domains = (); |
1319
|
|
|
|
|
|
|
|
1320
|
3
|
50
|
|
|
|
16
|
if ($domain =~ /^\d+\.\d+\.\d+\.\d+$/) { # loose check for IP address, should be enough |
1321
|
0
|
|
|
|
|
0
|
return ($domain); |
1322
|
|
|
|
|
|
|
} |
1323
|
|
|
|
|
|
|
|
1324
|
3
|
|
|
|
|
11
|
my @parts = split/\./, $domain; # take 3 components |
1325
|
3
|
100
|
|
|
|
9
|
if (scalar @parts >= 3) { |
1326
|
2
|
|
|
|
|
6
|
@parts = splice (@parts, -3, 3); |
1327
|
|
|
|
|
|
|
|
1328
|
2
|
|
|
|
|
6
|
push(@domains, join('.', @parts)); |
1329
|
|
|
|
|
|
|
|
1330
|
2
|
|
|
|
|
4
|
splice(@parts, 0, 1); |
1331
|
|
|
|
|
|
|
} |
1332
|
|
|
|
|
|
|
|
1333
|
3
|
|
|
|
|
6
|
push(@domains, join('.', @parts)); |
1334
|
|
|
|
|
|
|
|
1335
|
3
|
|
|
|
|
13
|
return @domains; |
1336
|
|
|
|
|
|
|
} |
1337
|
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
=head2 canonical_domain() |
1340
|
|
|
|
|
|
|
|
1341
|
|
|
|
|
|
|
Find all canonical domains a domain. |
1342
|
|
|
|
|
|
|
|
1343
|
|
|
|
|
|
|
=cut |
1344
|
|
|
|
|
|
|
|
1345
|
|
|
|
|
|
|
sub canonical_domain { |
1346
|
3
|
|
|
3
|
1
|
66
|
my ($self, $domain) = @_; |
1347
|
|
|
|
|
|
|
|
1348
|
3
|
|
|
|
|
7
|
my @domains = ($domain); |
1349
|
|
|
|
|
|
|
|
1350
|
|
|
|
|
|
|
|
1351
|
3
|
50
|
|
|
|
11
|
if ($domain =~ /^\d+\.\d+\.\d+\.\d+$/) { # loose check for IP address, should be enough |
1352
|
0
|
|
|
|
|
0
|
return @domains; |
1353
|
|
|
|
|
|
|
} |
1354
|
|
|
|
|
|
|
|
1355
|
3
|
|
|
|
|
15
|
my @parts = split/\./, $domain; |
1356
|
3
|
|
|
|
|
6
|
splice(@parts, 0, -6); # take 5 top most compoments |
1357
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
|
1359
|
3
|
|
|
|
|
10
|
while (scalar @parts > 2) { |
1360
|
6
|
|
|
|
|
7
|
shift @parts; |
1361
|
6
|
|
|
|
|
21
|
push(@domains, join(".", @parts) ); |
1362
|
|
|
|
|
|
|
} |
1363
|
|
|
|
|
|
|
|
1364
|
3
|
|
|
|
|
13
|
return @domains; |
1365
|
|
|
|
|
|
|
} |
1366
|
|
|
|
|
|
|
|
1367
|
|
|
|
|
|
|
=head2 canonical_path() |
1368
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
Find all canonical paths for a URL. |
1370
|
|
|
|
|
|
|
|
1371
|
|
|
|
|
|
|
=cut |
1372
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
sub canonical_path { |
1374
|
3
|
|
|
3
|
1
|
40
|
my ($self, $path) = @_; |
1375
|
|
|
|
|
|
|
|
1376
|
3
|
|
|
|
|
5
|
my @paths = ($path); # return full path |
1377
|
|
|
|
|
|
|
|
1378
|
3
|
100
|
|
|
|
11
|
if ($path =~ /\?/) { |
1379
|
1
|
|
|
|
|
6
|
$path =~ s/\?.*$//; |
1380
|
|
|
|
|
|
|
|
1381
|
1
|
|
|
|
|
3
|
push(@paths, $path); |
1382
|
|
|
|
|
|
|
} |
1383
|
|
|
|
|
|
|
|
1384
|
3
|
|
|
|
|
13
|
my @parts = split /\//, $path; |
1385
|
3
|
|
|
|
|
5
|
my $previous = ''; |
1386
|
3
|
|
66
|
|
|
19
|
while (scalar @parts > 1 && scalar @paths < 6) { |
1387
|
3
|
|
|
|
|
5
|
my $val = shift(@parts); |
1388
|
3
|
|
|
|
|
7
|
$previous .= "$val/"; |
1389
|
|
|
|
|
|
|
|
1390
|
3
|
|
|
|
|
12
|
push(@paths, $previous); |
1391
|
|
|
|
|
|
|
} |
1392
|
|
|
|
|
|
|
|
1393
|
3
|
|
|
|
|
11
|
return @paths; |
1394
|
|
|
|
|
|
|
} |
1395
|
|
|
|
|
|
|
|
1396
|
|
|
|
|
|
|
=head2 canonical() |
1397
|
|
|
|
|
|
|
|
1398
|
|
|
|
|
|
|
Find all canonical URLs for a URL. |
1399
|
|
|
|
|
|
|
|
1400
|
|
|
|
|
|
|
=cut |
1401
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
sub canonical { |
1403
|
3
|
|
|
3
|
1
|
7062
|
my ($self, $url) = @_; |
1404
|
|
|
|
|
|
|
|
1405
|
3
|
|
|
|
|
5
|
my @urls = (); |
1406
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
# my $uri = URI->new($url)->canonical; |
1408
|
3
|
|
|
|
|
10
|
my $uri = $self->canonical_uri($url); |
1409
|
3
|
|
|
|
|
150
|
my @domains = $self->canonical_domain($uri->host); |
1410
|
3
|
|
|
|
|
19
|
my @paths = $self->canonical_path($uri->path_query); |
1411
|
|
|
|
|
|
|
|
1412
|
3
|
|
|
|
|
9
|
foreach my $domain (@domains) { |
1413
|
9
|
|
|
|
|
10
|
foreach my $path (@paths) { |
1414
|
20
|
|
|
|
|
43
|
push(@urls, "$domain$path"); |
1415
|
|
|
|
|
|
|
} |
1416
|
|
|
|
|
|
|
} |
1417
|
|
|
|
|
|
|
|
1418
|
3
|
|
|
|
|
21
|
return @urls; |
1419
|
|
|
|
|
|
|
} |
1420
|
|
|
|
|
|
|
|
1421
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
=head2 canonical_uri() |
1423
|
|
|
|
|
|
|
|
1424
|
|
|
|
|
|
|
Create a canonical URI. |
1425
|
|
|
|
|
|
|
|
1426
|
|
|
|
|
|
|
NOTE: URI cannot handle all the test cases provided by Google. This method is a hack to pass most of the test. A few tests are still failing. The proper way to handle URL canonicalization according to Google would be to create a new module to handle URLs. However, I believe most real-life cases are handled correctly by this function. |
1427
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
=cut |
1429
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
sub canonical_uri { |
1431
|
39
|
|
|
39
|
1
|
14190
|
my ($self, $url) = @_; |
1432
|
|
|
|
|
|
|
|
1433
|
39
|
|
|
|
|
104
|
$url = trim $url; |
1434
|
|
|
|
|
|
|
|
1435
|
|
|
|
|
|
|
# Special case for \t \r \n |
1436
|
39
|
|
|
|
|
933
|
while ($url =~ s/^([^?]+)[\r\t\n]/$1/sgi) { } |
1437
|
|
|
|
|
|
|
|
1438
|
39
|
|
|
|
|
136
|
my $uri = URI->new($url)->canonical; # does not deal with directory traversing |
1439
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
# $self->debug("0. $url => " . $uri->as_string . "\n"); |
1441
|
|
|
|
|
|
|
|
1442
|
|
|
|
|
|
|
|
1443
|
39
|
100
|
66
|
|
|
16841
|
if (! $uri->scheme() || $uri->scheme() eq '') { |
1444
|
3
|
|
|
|
|
38
|
$uri = URI->new("http://$url")->canonical; |
1445
|
|
|
|
|
|
|
} |
1446
|
|
|
|
|
|
|
|
1447
|
39
|
|
|
|
|
1504
|
$uri->fragment(''); |
1448
|
|
|
|
|
|
|
|
1449
|
39
|
|
|
|
|
480
|
my $escape = $uri->as_string; |
1450
|
|
|
|
|
|
|
|
1451
|
|
|
|
|
|
|
# Reduce double // to single / in path |
1452
|
39
|
|
|
|
|
297
|
while ($escape =~ s/^([a-z]+:\/\/[^?]+)\/\//$1\//sgi) { } |
1453
|
|
|
|
|
|
|
|
1454
|
|
|
|
|
|
|
|
1455
|
|
|
|
|
|
|
# Remove empty fragment |
1456
|
39
|
|
|
|
|
123
|
$escape =~ s/#$//; |
1457
|
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
|
# canonial does not handle ../ |
1459
|
|
|
|
|
|
|
# $self->debug("\t$escape\n"); |
1460
|
39
|
|
|
|
|
130
|
while($escape =~ s/([^\/])\/([^\/]+)\/\.\.([\/?].*)$/$1$3/gi) { } |
1461
|
39
|
|
|
|
|
91
|
while($escape =~ s/([^\/])\/([^\/]+)\/\.\.$/$1/gi) { } |
1462
|
|
|
|
|
|
|
|
1463
|
|
|
|
|
|
|
# May have removed ending / |
1464
|
|
|
|
|
|
|
# $self->debug("\t$escape\n"); |
1465
|
39
|
100
|
|
|
|
136
|
$escape .= "/" if ($escape =~ /^[a-z]+:\/\/[^\/\?]+$/); |
1466
|
39
|
|
|
|
|
107
|
$escape =~ s/^([a-z]+:\/\/[^\/]+)(\?.*)$/$1\/$2/gi; |
1467
|
|
|
|
|
|
|
# $self->debug("\t$escape\n"); |
1468
|
|
|
|
|
|
|
|
1469
|
|
|
|
|
|
|
# other weird case if domain = digits only, try to translate it to IP address |
1470
|
39
|
100
|
|
|
|
106
|
if ((my $domain = URI->new($escape)->host) =~/^\d+$/) { |
1471
|
3
|
|
|
|
|
230
|
my $ip = Socket::inet_ntoa(Socket::inet_aton($domain)); |
1472
|
|
|
|
|
|
|
|
1473
|
3
|
|
|
|
|
12
|
$uri = URI->new($escape); |
1474
|
3
|
|
|
|
|
148
|
$uri->host($ip); |
1475
|
|
|
|
|
|
|
|
1476
|
3
|
|
|
|
|
213
|
$escape = $uri->as_string; |
1477
|
|
|
|
|
|
|
} |
1478
|
|
|
|
|
|
|
|
1479
|
|
|
|
|
|
|
# $self->debug("1. $url => $escape\n"); |
1480
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
# Try to escape the path again |
1482
|
39
|
|
|
|
|
2482
|
$url = $escape; |
1483
|
39
|
|
|
|
|
87
|
while (($escape = URI::Escape::uri_unescape($url)) ne $escape) { # wrong for %23 -> # |
1484
|
0
|
|
|
|
|
0
|
$url = $escape; |
1485
|
|
|
|
|
|
|
} |
1486
|
|
|
|
|
|
|
# while (($escape = URI->new($url)->canonical->as_string) ne $escape) { # breask more unit tests than previous |
1487
|
|
|
|
|
|
|
# $url = $escape; |
1488
|
|
|
|
|
|
|
# } |
1489
|
|
|
|
|
|
|
|
1490
|
|
|
|
|
|
|
# Fix for %23 -> # |
1491
|
39
|
|
|
|
|
366
|
while($escape =~ s/#/%23/sgi) { } |
1492
|
|
|
|
|
|
|
|
1493
|
|
|
|
|
|
|
# $self->debug("2. $url => $escape\n"); |
1494
|
|
|
|
|
|
|
|
1495
|
|
|
|
|
|
|
# Fix over escaping |
1496
|
39
|
|
|
|
|
97
|
while($escape =~ s/^([^?]+)%%(%.*)$/$1%25%25$2/sgi) { } |
1497
|
39
|
|
|
|
|
87
|
while($escape =~ s/^([^?]+)%%/$1%25%25/sgi) { } |
1498
|
|
|
|
|
|
|
|
1499
|
|
|
|
|
|
|
# URI has issues with % in domains, it gets the host wrong |
1500
|
|
|
|
|
|
|
|
1501
|
|
|
|
|
|
|
# 1. fix the host |
1502
|
|
|
|
|
|
|
# $self->debug("Domain: " . URI->new($escape)->host . "\n"); |
1503
|
39
|
|
|
|
|
46
|
my $exception = 0; |
1504
|
39
|
|
|
|
|
182
|
while ($escape =~ /^[a-z]+:\/\/[^\/]*([^a-z0-9%_.-\/:])[^\/]*(\/.*)$/) { |
1505
|
3
|
|
|
|
|
4
|
my $source = $1; |
1506
|
3
|
|
|
|
|
10
|
my $target = sprintf("%02x", ord($source)); |
1507
|
|
|
|
|
|
|
|
1508
|
3
|
|
|
|
|
46
|
$escape =~ s/^([a-z]+:\/\/[^\/]*)\Q$source\E/$1%\Q$target\E/; |
1509
|
|
|
|
|
|
|
|
1510
|
3
|
|
|
|
|
15
|
$exception = 1; |
1511
|
|
|
|
|
|
|
} |
1512
|
|
|
|
|
|
|
|
1513
|
|
|
|
|
|
|
# 2. need to parse the path again |
1514
|
39
|
50
|
66
|
|
|
103
|
if ($exception && $escape =~ /^[a-z]+:\/\/[^\/]+\/(.+)/) { |
1515
|
0
|
|
|
|
|
0
|
my $source = $1; |
1516
|
0
|
|
|
|
|
0
|
my $target = URI::Escape::uri_unescape($source); |
1517
|
|
|
|
|
|
|
|
1518
|
|
|
|
|
|
|
# print "Source: $source\n"; |
1519
|
0
|
|
|
|
|
0
|
while ($target ne URI::Escape::uri_unescape($target)) { |
1520
|
0
|
|
|
|
|
0
|
$target = URI::Escape::uri_unescape($target); |
1521
|
|
|
|
|
|
|
} |
1522
|
|
|
|
|
|
|
|
1523
|
|
|
|
|
|
|
|
1524
|
0
|
|
|
|
|
0
|
$escape =~ s/\/\Q$source\E/\/$target/; |
1525
|
|
|
|
|
|
|
|
1526
|
0
|
|
|
|
|
0
|
while ($escape =~ s/#/%23/sgi) { } # fragement has been removed earlier |
1527
|
0
|
|
|
|
|
0
|
while ($escape =~ s/^([a-z]+:\/\/[^\/]+\/.*)%5e/$1\&/sgi) { } # not in the host name |
1528
|
|
|
|
|
|
|
# while ($escape =~ s/%5e/&/sgi) { } |
1529
|
|
|
|
|
|
|
|
1530
|
0
|
|
|
|
|
0
|
while ($escape =~ s/%([^0-9a-f]|.[^0-9a-f])/%25$1/sgi) { } |
1531
|
|
|
|
|
|
|
} |
1532
|
|
|
|
|
|
|
|
1533
|
|
|
|
|
|
|
# $self->debug("$url => $escape\n"); |
1534
|
|
|
|
|
|
|
# $self->debug(URI->new($escape)->as_string . "\n"); |
1535
|
|
|
|
|
|
|
|
1536
|
39
|
|
|
|
|
115
|
return URI->new($escape); |
1537
|
|
|
|
|
|
|
} |
1538
|
|
|
|
|
|
|
|
1539
|
|
|
|
|
|
|
=head2 canonical() |
1540
|
|
|
|
|
|
|
|
1541
|
|
|
|
|
|
|
Return all possible full hashes for a URL. |
1542
|
|
|
|
|
|
|
|
1543
|
|
|
|
|
|
|
=cut |
1544
|
|
|
|
|
|
|
|
1545
|
|
|
|
|
|
|
sub full_hashes { |
1546
|
0
|
|
|
0
|
0
|
0
|
my ($self, $url) = @_; |
1547
|
|
|
|
|
|
|
|
1548
|
0
|
|
|
|
|
0
|
my @urls = $self->canonical($url); |
1549
|
0
|
|
|
|
|
0
|
my @hashes = (); |
1550
|
|
|
|
|
|
|
|
1551
|
0
|
|
|
|
|
0
|
foreach my $url (@urls) { |
1552
|
|
|
|
|
|
|
# $self->debug("$url\n"); |
1553
|
0
|
|
|
|
|
0
|
push(@hashes, sha256($url)); |
1554
|
|
|
|
|
|
|
# $self->debug("$url " . $self->hex_to_ascii(sha256($url)) . "\n"); |
1555
|
|
|
|
|
|
|
} |
1556
|
|
|
|
|
|
|
|
1557
|
0
|
|
|
|
|
0
|
return @hashes; |
1558
|
|
|
|
|
|
|
} |
1559
|
|
|
|
|
|
|
|
1560
|
|
|
|
|
|
|
=head2 prefix() |
1561
|
|
|
|
|
|
|
|
1562
|
|
|
|
|
|
|
Return a hash prefix. The size of the prefix is set to 4 bytes. |
1563
|
|
|
|
|
|
|
|
1564
|
|
|
|
|
|
|
=cut |
1565
|
|
|
|
|
|
|
|
1566
|
|
|
|
|
|
|
sub prefix { |
1567
|
4
|
|
|
4
|
1
|
397
|
my ($self, $string) = @_; |
1568
|
|
|
|
|
|
|
|
1569
|
4
|
|
|
|
|
9164
|
return substr(sha256($string), 0, 4); |
1570
|
|
|
|
|
|
|
} |
1571
|
|
|
|
|
|
|
|
1572
|
|
|
|
|
|
|
=head2 request_full_hash() |
1573
|
|
|
|
|
|
|
|
1574
|
|
|
|
|
|
|
Request full full hashes for specific prefixes from Google. |
1575
|
|
|
|
|
|
|
|
1576
|
|
|
|
|
|
|
=cut |
1577
|
|
|
|
|
|
|
|
1578
|
|
|
|
|
|
|
sub request_full_hash { |
1579
|
0
|
|
|
0
|
1
|
|
my ($self, %args) = @_; |
1580
|
0
|
|
0
|
|
|
|
my $prefixes = $args{prefixes} || return (); |
1581
|
0
|
|
0
|
|
|
|
my $size = $args{size} || length $prefixes->[0]; |
1582
|
|
|
|
|
|
|
|
1583
|
|
|
|
|
|
|
# # Handle errors |
1584
|
0
|
|
|
|
|
|
my $i = 0; |
1585
|
0
|
|
|
|
|
|
my $errors; |
1586
|
|
|
|
|
|
|
my $delay = sub { |
1587
|
0
|
|
|
0
|
|
|
my $time = shift; |
1588
|
0
|
0
|
|
|
|
|
if ((time() - $errors->{timestamp}) < $time) { |
1589
|
0
|
|
|
|
|
|
splice(@$prefixes, $i, 1); |
1590
|
|
|
|
|
|
|
} |
1591
|
|
|
|
|
|
|
else { |
1592
|
0
|
|
|
|
|
|
$i++; |
1593
|
|
|
|
|
|
|
} |
1594
|
0
|
|
|
|
|
|
}; |
1595
|
|
|
|
|
|
|
|
1596
|
0
|
|
|
|
|
|
while ($i < scalar @$prefixes) { |
1597
|
0
|
|
|
|
|
|
my $prefix = $prefixes->[$i]; |
1598
|
|
|
|
|
|
|
|
1599
|
0
|
|
|
|
|
|
$errors = $self->{storage}->get_full_hash_error(prefix => $prefix); |
1600
|
0
|
0
|
0
|
|
|
|
if (defined $errors && $errors->{errors} > 2) { # 2 errors is OK |
1601
|
0
|
0
|
|
|
|
|
$errors->{errors} == 3 ? $delay->(30 * 60) # 30 minutes |
|
|
0
|
|
|
|
|
|
1602
|
|
|
|
|
|
|
: $errors->{errors} == 4 ? $delay->(60 * 60) # 1 hour |
1603
|
|
|
|
|
|
|
: $delay->(2 * 60 * 60); # 2 hours |
1604
|
|
|
|
|
|
|
} |
1605
|
|
|
|
|
|
|
else { |
1606
|
0
|
|
|
|
|
|
$i++; |
1607
|
|
|
|
|
|
|
} |
1608
|
|
|
|
|
|
|
} |
1609
|
|
|
|
|
|
|
|
1610
|
0
|
|
|
|
|
|
my $url = $self->{server} . "gethash?client=api&apikey=" . $self->{key} . "&appver=$VERSION&pver=" . $self->{version}; |
1611
|
|
|
|
|
|
|
|
1612
|
0
|
|
|
|
|
|
my $prefix_list = join('', @$prefixes); |
1613
|
0
|
|
|
|
|
|
my $header = "$size:" . scalar @$prefixes * $size; |
1614
|
|
|
|
|
|
|
|
1615
|
|
|
|
|
|
|
# print @{$args{prefixes}}, "\n"; |
1616
|
|
|
|
|
|
|
# print $$prefixes[0], "\n"; return; |
1617
|
|
|
|
|
|
|
|
1618
|
|
|
|
|
|
|
|
1619
|
0
|
|
|
|
|
|
my $res = $self->ua->post($url, Content => "$header\n$prefix_list"); |
1620
|
|
|
|
|
|
|
|
1621
|
0
|
0
|
|
|
|
|
if (! $res->is_success) { |
1622
|
0
|
|
|
|
|
|
$self->error("Full hash request failed\n"); |
1623
|
0
|
|
|
|
|
|
$self->debug($res->as_string . "\n"); |
1624
|
|
|
|
|
|
|
|
1625
|
0
|
|
|
|
|
|
foreach my $prefix (@$prefixes) { |
1626
|
0
|
|
|
|
|
|
my $errors = $self->{storage}->get_full_hash_error(prefix => $prefix); |
1627
|
0
|
0
|
0
|
|
|
|
if (defined $errors && ( |
|
|
|
0
|
|
|
|
|
1628
|
|
|
|
|
|
|
$errors->{errors} >=2 # backoff mode |
1629
|
|
|
|
|
|
|
|| $errors->{errors} == 1 && (time() - $errors->{timestamp}) > 5 * 60)) { # 5 minutes |
1630
|
0
|
|
|
|
|
|
$self->{storage}->full_hash_error(prefix => $prefix, timestamp => time()); # more complicate than this, need to check time between 2 errors |
1631
|
|
|
|
|
|
|
} |
1632
|
|
|
|
|
|
|
} |
1633
|
|
|
|
|
|
|
|
1634
|
0
|
|
|
|
|
|
return (); |
1635
|
|
|
|
|
|
|
} |
1636
|
|
|
|
|
|
|
else { |
1637
|
0
|
|
|
|
|
|
$self->debug("Full hash request OK\n"); |
1638
|
|
|
|
|
|
|
|
1639
|
0
|
|
|
|
|
|
foreach my $prefix (@$prefixes) { |
1640
|
0
|
|
|
|
|
|
$self->{storage}->full_hash_ok(prefix => $prefix, timestamp => time()); |
1641
|
|
|
|
|
|
|
} |
1642
|
|
|
|
|
|
|
} |
1643
|
|
|
|
|
|
|
|
1644
|
0
|
|
|
|
|
|
$self->debug($res->request->as_string . "\n"); |
1645
|
0
|
|
|
|
|
|
$self->debug($res->as_string . "\n"); |
1646
|
|
|
|
|
|
|
# $self->debug(substr($res->content, 0, 250), "\n\n"); |
1647
|
|
|
|
|
|
|
|
1648
|
0
|
|
|
|
|
|
return $self->parse_full_hashes($res->content); |
1649
|
|
|
|
|
|
|
} |
1650
|
|
|
|
|
|
|
|
1651
|
|
|
|
|
|
|
=head2 parse_full_hashes() |
1652
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
Process the request for full hashes from Google. |
1654
|
|
|
|
|
|
|
|
1655
|
|
|
|
|
|
|
=cut |
1656
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
sub parse_full_hashes { |
1658
|
0
|
|
|
0
|
1
|
|
my ($self, $data) = @_; |
1659
|
|
|
|
|
|
|
|
1660
|
0
|
|
|
|
|
|
my @hashes = (); |
1661
|
|
|
|
|
|
|
|
1662
|
|
|
|
|
|
|
# goog-malware-shavar:22428:32\nHEX |
1663
|
0
|
|
|
|
|
|
while (length $data > 0) { |
1664
|
0
|
0
|
|
|
|
|
if ($data !~ /^[a-z-]+:\d+:\d+\n/) { |
1665
|
0
|
|
|
|
|
|
$self->error("list not found\n"); |
1666
|
0
|
|
|
|
|
|
return (); |
1667
|
|
|
|
|
|
|
} |
1668
|
0
|
|
|
|
|
|
$data =~ s/^([a-z-]+)://; |
1669
|
0
|
|
|
|
|
|
my $list = $1; |
1670
|
|
|
|
|
|
|
|
1671
|
0
|
|
|
|
|
|
$data =~ s/^(\d+)://; |
1672
|
0
|
|
|
|
|
|
my $chunknum = $1; |
1673
|
|
|
|
|
|
|
|
1674
|
0
|
|
|
|
|
|
$data =~ s/^(\d+)\n//; |
1675
|
0
|
|
|
|
|
|
my $length = $1; |
1676
|
|
|
|
|
|
|
|
1677
|
0
|
|
|
|
|
|
my $current = 0; |
1678
|
0
|
|
|
|
|
|
while ($current < $length) { |
1679
|
0
|
|
|
|
|
|
my $hash = substr($data, 0, 32, ''); |
1680
|
0
|
|
|
|
|
|
push(@hashes, { hash => $hash, chunknum => $chunknum, list => $list }); |
1681
|
|
|
|
|
|
|
|
1682
|
0
|
|
|
|
|
|
$current += 32; |
1683
|
|
|
|
|
|
|
} |
1684
|
|
|
|
|
|
|
} |
1685
|
|
|
|
|
|
|
|
1686
|
0
|
|
|
|
|
|
return @hashes; |
1687
|
|
|
|
|
|
|
} |
1688
|
|
|
|
|
|
|
|
1689
|
|
|
|
|
|
|
=head2 get_a_range() |
1690
|
|
|
|
|
|
|
|
1691
|
|
|
|
|
|
|
Get the list of a chunks ranges for a list update. |
1692
|
|
|
|
|
|
|
|
1693
|
|
|
|
|
|
|
=cut |
1694
|
|
|
|
|
|
|
|
1695
|
|
|
|
|
|
|
sub get_a_range { |
1696
|
0
|
|
|
0
|
1
|
|
my ($self, %args) = @_; |
1697
|
0
|
|
0
|
|
|
|
my $list = $args{'list'} || ''; |
1698
|
|
|
|
|
|
|
|
1699
|
0
|
|
|
|
|
|
my @nums = $self->{storage}->get_add_chunks_nums(); # trust storage to torder list, or reorder it here? |
1700
|
|
|
|
|
|
|
|
1701
|
0
|
|
|
|
|
|
return $self->create_range(numbers => [@nums]); |
1702
|
|
|
|
|
|
|
} |
1703
|
|
|
|
|
|
|
|
1704
|
|
|
|
|
|
|
=head2 get_s_range() |
1705
|
|
|
|
|
|
|
|
1706
|
|
|
|
|
|
|
Get the list of s chunks ranges for a list update. |
1707
|
|
|
|
|
|
|
|
1708
|
|
|
|
|
|
|
=cut |
1709
|
|
|
|
|
|
|
|
1710
|
|
|
|
|
|
|
sub get_s_range { |
1711
|
0
|
|
|
0
|
1
|
|
my ($self, %args) = @_; |
1712
|
0
|
|
0
|
|
|
|
my $list = $args{'list'} || ''; |
1713
|
|
|
|
|
|
|
|
1714
|
0
|
|
|
|
|
|
my @nums = $self->{storage}->get_sub_chunks_nums(); # trust storage to torder list, or reorder it here? |
1715
|
|
|
|
|
|
|
|
1716
|
0
|
|
|
|
|
|
return $self->create_range(numbers => [@nums]); |
1717
|
|
|
|
|
|
|
} |
1718
|
|
|
|
|
|
|
|
1719
|
|
|
|
|
|
|
=head2 create_range() |
1720
|
|
|
|
|
|
|
|
1721
|
|
|
|
|
|
|
Create a list of ranges (1-3, 5, 7-11) from a list of numbers. |
1722
|
|
|
|
|
|
|
|
1723
|
|
|
|
|
|
|
=cut |
1724
|
|
|
|
|
|
|
|
1725
|
|
|
|
|
|
|
sub create_range { |
1726
|
0
|
|
|
0
|
1
|
|
my ($self, %args) = @_; |
1727
|
0
|
|
0
|
|
|
|
my $numbers = $args{numbers} || []; # should already be ordered |
1728
|
|
|
|
|
|
|
|
1729
|
0
|
0
|
|
|
|
|
return '' if (scalar @$numbers == 0); |
1730
|
|
|
|
|
|
|
|
1731
|
0
|
|
|
|
|
|
my $range = $$numbers[0]; |
1732
|
0
|
|
|
|
|
|
my $new_range = 0; |
1733
|
0
|
|
|
|
|
|
for(my $i = 1; $i < scalar @$numbers; $i++) { |
1734
|
|
|
|
|
|
|
# next if ($$numbers[$i] == $$numbers[$i-1]); # should not happen |
1735
|
|
|
|
|
|
|
|
1736
|
0
|
0
|
|
|
|
|
if ($$numbers[$i] != $$numbers[$i-1] + 1) { |
|
|
0
|
|
|
|
|
|
1737
|
0
|
0
|
0
|
|
|
|
$range .= $$numbers[$i-1] if ($i > 1 && $new_range == 1); |
1738
|
0
|
|
|
|
|
|
$range .= ',' . $$numbers[$i]; |
1739
|
|
|
|
|
|
|
|
1740
|
0
|
|
|
|
|
|
$new_range = 0 |
1741
|
|
|
|
|
|
|
} |
1742
|
|
|
|
|
|
|
elsif ($new_range == 0) { |
1743
|
0
|
|
|
|
|
|
$range .= "-"; |
1744
|
0
|
|
|
|
|
|
$new_range = 1; |
1745
|
|
|
|
|
|
|
} |
1746
|
|
|
|
|
|
|
} |
1747
|
0
|
0
|
|
|
|
|
$range .= $$numbers[scalar @$numbers - 1] if ($new_range == 1); |
1748
|
|
|
|
|
|
|
|
1749
|
0
|
|
|
|
|
|
return $range; |
1750
|
|
|
|
|
|
|
} |
1751
|
|
|
|
|
|
|
|
1752
|
|
|
|
|
|
|
=head2 expand_range() |
1753
|
|
|
|
|
|
|
|
1754
|
|
|
|
|
|
|
Explode list of ranges (1-3, 5, 7-11) into a list of numbers (1,2,3,5,7,8,9,10,11). |
1755
|
|
|
|
|
|
|
|
1756
|
|
|
|
|
|
|
=cut |
1757
|
|
|
|
|
|
|
|
1758
|
|
|
|
|
|
|
sub expand_range { |
1759
|
0
|
|
|
0
|
1
|
|
my ($self, %args) = @_; |
1760
|
0
|
|
0
|
|
|
|
my $range = $args{range} || return (); |
1761
|
|
|
|
|
|
|
|
1762
|
0
|
|
|
|
|
|
my @list = (); |
1763
|
0
|
|
|
|
|
|
my @elements = split /,/, $range; |
1764
|
|
|
|
|
|
|
|
1765
|
0
|
|
|
|
|
|
foreach my $data (@elements) { |
1766
|
0
|
0
|
|
|
|
|
if ($data =~ /^\d+$/) { # single number |
|
|
0
|
|
|
|
|
|
1767
|
0
|
|
|
|
|
|
push(@list, $data); |
1768
|
|
|
|
|
|
|
} |
1769
|
|
|
|
|
|
|
elsif ($data =~ /^(\d+)-(\d+)$/) { |
1770
|
0
|
|
|
|
|
|
my $start = $1; |
1771
|
0
|
|
|
|
|
|
my $end = $2; |
1772
|
|
|
|
|
|
|
|
1773
|
0
|
|
|
|
|
|
for(my $i = $start; $i <= $end; $i++) { |
1774
|
0
|
|
|
|
|
|
push(@list, $i); |
1775
|
|
|
|
|
|
|
} |
1776
|
|
|
|
|
|
|
} |
1777
|
|
|
|
|
|
|
} |
1778
|
|
|
|
|
|
|
|
1779
|
0
|
|
|
|
|
|
return @list; |
1780
|
|
|
|
|
|
|
} |
1781
|
|
|
|
|
|
|
|
1782
|
|
|
|
|
|
|
|
1783
|
|
|
|
|
|
|
=head1 CHANGELOG |
1784
|
|
|
|
|
|
|
|
1785
|
|
|
|
|
|
|
=over 4 |
1786
|
|
|
|
|
|
|
|
1787
|
|
|
|
|
|
|
=item 1.11 |
1788
|
|
|
|
|
|
|
|
1789
|
|
|
|
|
|
|
Add dependency on IO::Socket::SSL. |
1790
|
|
|
|
|
|
|
Remove dependency on Net::IPAddress. |
1791
|
|
|
|
|
|
|
|
1792
|
|
|
|
|
|
|
=item 1.10 |
1793
|
|
|
|
|
|
|
|
1794
|
|
|
|
|
|
|
Force IPv4 to solve bug on CentOS. |
1795
|
|
|
|
|
|
|
|
1796
|
|
|
|
|
|
|
=item 1.09 |
1797
|
|
|
|
|
|
|
|
1798
|
|
|
|
|
|
|
Use HTTPS to access safebrowsing.clients.google.com/. |
1799
|
|
|
|
|
|
|
|
1800
|
|
|
|
|
|
|
=item 1.07 |
1801
|
|
|
|
|
|
|
|
1802
|
|
|
|
|
|
|
Add C feature to import add chunks and sub chunks from a file. |
1803
|
|
|
|
|
|
|
|
1804
|
|
|
|
|
|
|
=item 1.05 |
1805
|
|
|
|
|
|
|
|
1806
|
|
|
|
|
|
|
No code change. Move C to PRIVATE FUNCTIONS to avoid confusions. |
1807
|
|
|
|
|
|
|
|
1808
|
|
|
|
|
|
|
=item 1.04 |
1809
|
|
|
|
|
|
|
|
1810
|
|
|
|
|
|
|
Introduce L. Remind people that Google Safe Browsing v1 has been deprecated by Google. |
1811
|
|
|
|
|
|
|
|
1812
|
|
|
|
|
|
|
=item 1.03 |
1813
|
|
|
|
|
|
|
|
1814
|
|
|
|
|
|
|
The source code is available on github at L. |
1815
|
|
|
|
|
|
|
|
1816
|
|
|
|
|
|
|
=item 1.02 |
1817
|
|
|
|
|
|
|
|
1818
|
|
|
|
|
|
|
Fix uninitialized $self->{errors} variable |
1819
|
|
|
|
|
|
|
|
1820
|
|
|
|
|
|
|
=item 1.01 |
1821
|
|
|
|
|
|
|
|
1822
|
|
|
|
|
|
|
Use String::HexConvert for faster hex_to_ascii. |
1823
|
|
|
|
|
|
|
|
1824
|
|
|
|
|
|
|
=item 1.0 |
1825
|
|
|
|
|
|
|
|
1826
|
|
|
|
|
|
|
Separate the error output from the debug output. |
1827
|
|
|
|
|
|
|
|
1828
|
|
|
|
|
|
|
=item 0.9 |
1829
|
|
|
|
|
|
|
|
1830
|
|
|
|
|
|
|
Fix bug with local whitelisting (sub chunks). Fix the parsing of full hashes. |
1831
|
|
|
|
|
|
|
|
1832
|
|
|
|
|
|
|
=item 0.8 |
1833
|
|
|
|
|
|
|
|
1834
|
|
|
|
|
|
|
Reduce the number of full hash requests. |
1835
|
|
|
|
|
|
|
|
1836
|
|
|
|
|
|
|
=item 0.7 |
1837
|
|
|
|
|
|
|
|
1838
|
|
|
|
|
|
|
Add local_lookup to perform a lookup against the local database only. This function should be used for debugging purpose only. |
1839
|
|
|
|
|
|
|
Small code optimizations. |
1840
|
|
|
|
|
|
|
|
1841
|
|
|
|
|
|
|
=item 0.6 |
1842
|
|
|
|
|
|
|
|
1843
|
|
|
|
|
|
|
Handle local database reset. |
1844
|
|
|
|
|
|
|
|
1845
|
|
|
|
|
|
|
=item 0.5 |
1846
|
|
|
|
|
|
|
|
1847
|
|
|
|
|
|
|
Update documentation. |
1848
|
|
|
|
|
|
|
|
1849
|
|
|
|
|
|
|
=item 0.4 |
1850
|
|
|
|
|
|
|
|
1851
|
|
|
|
|
|
|
Speed update the database update. The first update went down from 20 minutes to 20 minutes. |
1852
|
|
|
|
|
|
|
|
1853
|
|
|
|
|
|
|
=item 0.3 |
1854
|
|
|
|
|
|
|
|
1855
|
|
|
|
|
|
|
Fix typos in the documentation. |
1856
|
|
|
|
|
|
|
|
1857
|
|
|
|
|
|
|
Remove dependency on Switch (thanks to Curtis Jewel). |
1858
|
|
|
|
|
|
|
|
1859
|
|
|
|
|
|
|
Fix value of FULL_HASH_TIME. |
1860
|
|
|
|
|
|
|
|
1861
|
|
|
|
|
|
|
=item 0.2 |
1862
|
|
|
|
|
|
|
|
1863
|
|
|
|
|
|
|
Add support for Message Authentication Code (MAC) |
1864
|
|
|
|
|
|
|
|
1865
|
|
|
|
|
|
|
=back |
1866
|
|
|
|
|
|
|
|
1867
|
|
|
|
|
|
|
=head1 SEE ALSO |
1868
|
|
|
|
|
|
|
|
1869
|
|
|
|
|
|
|
Source code available at L. |
1870
|
|
|
|
|
|
|
|
1871
|
|
|
|
|
|
|
See L, L and L for information on storing and managing the Google Safe Browsing database. |
1872
|
|
|
|
|
|
|
|
1873
|
|
|
|
|
|
|
Google Safe Browsing v2 API: L |
1874
|
|
|
|
|
|
|
|
1875
|
|
|
|
|
|
|
L (Google Safe Browsing v1) is deprecated by Google since 12/01/2011. |
1876
|
|
|
|
|
|
|
|
1877
|
|
|
|
|
|
|
L (Google Safe Browsing v2) will deprecated by Google on 12/01/2014. |
1878
|
|
|
|
|
|
|
|
1879
|
|
|
|
|
|
|
=head1 AUTHOR |
1880
|
|
|
|
|
|
|
|
1881
|
|
|
|
|
|
|
Julien Sobrier, Ejsobrier@zscaler.comE or Ejulien@sobrier.netE |
1882
|
|
|
|
|
|
|
|
1883
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
1884
|
|
|
|
|
|
|
|
1885
|
|
|
|
|
|
|
Copyright (C) 2012 by Julien Sobrier |
1886
|
|
|
|
|
|
|
|
1887
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
1888
|
|
|
|
|
|
|
it under the same terms as Perl itself, either Perl version 5.8.8 or, |
1889
|
|
|
|
|
|
|
at your option, any later version of Perl 5 you may have available. |
1890
|
|
|
|
|
|
|
|
1891
|
|
|
|
|
|
|
|
1892
|
|
|
|
|
|
|
=cut |
1893
|
|
|
|
|
|
|
|
1894
|
|
|
|
|
|
|
1; |