line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Net::Google::SafeBrowsing3; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
16826
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
38
|
|
4
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
26
|
|
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
4
|
use Carp; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
85
|
|
7
|
1
|
|
|
1
|
|
673
|
use LWP::UserAgent; |
|
1
|
|
|
|
|
83232
|
|
|
1
|
|
|
|
|
46
|
|
8
|
1
|
|
|
1
|
|
11
|
use URI; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
33
|
|
9
|
1
|
|
|
1
|
|
1629
|
use Digest::SHA qw(sha256); |
|
1
|
|
|
|
|
3319
|
|
|
1
|
|
|
|
|
87
|
|
10
|
1
|
|
|
1
|
|
6
|
use List::Util qw(first); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
55
|
|
11
|
1
|
|
|
1
|
|
434
|
use Text::Trim; |
|
1
|
|
|
|
|
462
|
|
|
1
|
|
|
|
|
50
|
|
12
|
1
|
|
|
1
|
|
679
|
use MIME::Base64::URLSafe; |
|
1
|
|
|
|
|
1272
|
|
|
1
|
|
|
|
|
75
|
|
13
|
1
|
|
|
1
|
|
6
|
use MIME::Base64; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
38
|
|
14
|
1
|
|
|
1
|
|
498
|
use String::HexConvert; |
|
1
|
|
|
|
|
255
|
|
|
1
|
|
|
|
|
36
|
|
15
|
1
|
|
|
1
|
|
721
|
use IO::Socket::SSL 'inet4'; |
|
1
|
|
|
|
|
99497
|
|
|
1
|
|
|
|
|
12
|
|
16
|
1
|
|
|
1
|
|
1084
|
use Google::ProtocolBuffers; |
|
1
|
|
|
|
|
62467
|
|
|
1
|
|
|
|
|
34
|
|
17
|
1
|
|
|
1
|
|
22
|
use Data::Dumper; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
49
|
|
18
|
|
|
|
|
|
|
|
19
|
1
|
|
|
1
|
|
4
|
use Exporter 'import'; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
61
|
|
20
|
|
|
|
|
|
|
our @EXPORT = qw(DATABASE_RESET INTERNAL_ERROR SERVER_ERROR NO_UPDATE NO_DATA SUCCESSFUL MALWARE PHISHING UNWANTED LANDING DISTRIBUTION); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
BEGIN { |
24
|
1
|
|
|
1
|
|
6
|
IO::Socket::SSL::set_ctx_defaults( |
25
|
|
|
|
|
|
|
# verify_mode => Net::SSLeay->VERIFY_PEER(), |
26
|
|
|
|
|
|
|
SSL_verify_mode => 0, |
27
|
|
|
|
|
|
|
); |
28
|
|
|
|
|
|
|
} |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
our $VERSION = '0.5'; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
Google::ProtocolBuffers->parse(" |
33
|
|
|
|
|
|
|
message ChunkData { |
34
|
|
|
|
|
|
|
required int32 chunk_number = 1; |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
// The chunk type is either an add or sub chunk. |
37
|
|
|
|
|
|
|
enum ChunkType { |
38
|
|
|
|
|
|
|
ADD = 0; |
39
|
|
|
|
|
|
|
SUB = 1; |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
optional ChunkType chunk_type = 2 [default = ADD]; |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
// Prefix type which currently is either 4B or 32B. The default is set |
44
|
|
|
|
|
|
|
// to the prefix length, so it doesn't have to be set at all for most |
45
|
|
|
|
|
|
|
// chunks. |
46
|
|
|
|
|
|
|
enum PrefixType { |
47
|
|
|
|
|
|
|
PREFIX_4B = 0; |
48
|
|
|
|
|
|
|
FULL_32B = 1; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
optional PrefixType prefix_type = 3 [default = PREFIX_4B]; |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
// Stores all SHA256 add or sub prefixes or full-length hashes. The number |
53
|
|
|
|
|
|
|
// of hashes can be inferred from the length of the hashes string and the |
54
|
|
|
|
|
|
|
// prefix type above. |
55
|
|
|
|
|
|
|
optional bytes hashes = 4; |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
// Sub chunks also encode one add chunk number for every hash stored above. |
58
|
|
|
|
|
|
|
repeated int32 add_numbers = 5 [packed = true]; |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
", |
62
|
|
|
|
|
|
|
{create_accessors => 0 } |
63
|
|
|
|
|
|
|
); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
Google::ProtocolBuffers->parse(" |
66
|
|
|
|
|
|
|
message MalwarePatternType { |
67
|
|
|
|
|
|
|
enum PATTERN_TYPE { |
68
|
|
|
|
|
|
|
LANDING = 1; |
69
|
|
|
|
|
|
|
DISTRIBUTION = 2; |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
required PATTERN_TYPE pattern_type = 1; |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
", |
75
|
|
|
|
|
|
|
{create_accessors => 0 } |
76
|
|
|
|
|
|
|
); |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
# TODO ################################################### |
79
|
|
|
|
|
|
|
#Todo: request full hashes: seperate 32bytes for 4bytes |
80
|
|
|
|
|
|
|
# Todo: optimize lookup_suffix, 1 search for all lists |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head1 NAME |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
Net::Google::SafeBrowsing3 - Perl extension for the Google Safe Browsing v3 API. (Google Safe Browsing v2 has been deprecated by Google.) |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 SYNOPSIS |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
use Net::Google::SafeBrowsing3; |
89
|
|
|
|
|
|
|
use Net::Google::SafeBrowsing3::Sqlite; |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
my $storage = Net::Google::SafeBrowsing3::Sqlite->new(file => 'google-v3.db'); |
92
|
|
|
|
|
|
|
my $gsb = Net::Google::SafeBrowsing3->new( |
93
|
|
|
|
|
|
|
key => "my key", |
94
|
|
|
|
|
|
|
storage => $storage, |
95
|
|
|
|
|
|
|
); |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
$gsb->update(); |
98
|
|
|
|
|
|
|
my $match = $gsb->lookup(url => 'http://www.gumblar.cn/'); |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
if ($match eq MALWARE) { |
101
|
|
|
|
|
|
|
print "http://www.gumblar.cn/ is flagged as a dangerous site\n"; |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
$storage->close(); |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=head1 DESCRIPTION |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Net::Google::SafeBrowsing3 implements the Google Safe Browsing v3 API. |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
The library passes most of the unit tests listed in the API documentation. See the documentation (L) for more details about the failed tests. |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
The Google Safe Browsing database must be stored and managed locally. L uses Sqlite as the storage back-end, L uses MySQL. Other storage mechanisms (databases, memory, etc.) can be added and used transparently with this module. |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
The source code is available on github at L. |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
If you do not need to inspect more than 10,000 URLs a day, you can use L with the Google Safe Browsing v2 Lookup API which does not require to store and maintain a local database. |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
IMPORTANT: If you start with an empty database, you will need to perform several updates to retrieve all the Google Safe Browsing information. This may require up to 24 hours. This is a limitation of the Google API, not of this module. |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
IMPORTANT: Google Safe Browsing v3 requires a different key than v2. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=head1 CONSTANTS |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
Several constants are exported by this module: |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=over 4 |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=item DATABASE_RESET |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
Google requested to reset (empty) the local database. |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=item INTERNAL_ERROR |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
An internal error occurred. |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
=item SERVER_ERROR |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
The server sent an error back to the client. |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=item NO_UPDATE |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
No update was performed, probably because it is too early to make a new request to Google Safe Browsing. |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=item NO_DATA |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
No data was sent back by Google to the client, probably because the database is up to date. |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=item SUCCESSFUL |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
The operation was successful. |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=item MALWARE |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Name of the Malware list in Google Safe Browsing (shortcut to 'goog-malware-shavar') |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=item PHISHING |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Name of the Phishing list in Google Safe Browsing (shortcut to 'googpub-phish-shavar') |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=item LANDING |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
Landing site. |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=item DISTRIBUTION |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
Distribution site. |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=back |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=cut |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
use constant { |
174
|
1
|
|
|
|
|
181
|
DATABASE_RESET => -6, |
175
|
|
|
|
|
|
|
INTERNAL_ERROR => -3, # internal/parsing error |
176
|
|
|
|
|
|
|
SERVER_ERROR => -2, # Server sent an error back |
177
|
|
|
|
|
|
|
NO_UPDATE => -1, # no update (too early) |
178
|
|
|
|
|
|
|
NO_DATA => 0, # no data sent |
179
|
|
|
|
|
|
|
SUCCESSFUL => 1, # data sent |
180
|
|
|
|
|
|
|
MALWARE => 'goog-malware-shavar', |
181
|
|
|
|
|
|
|
PHISHING => 'googpub-phish-shavar', |
182
|
|
|
|
|
|
|
UNWANTED => 'goog-unwanted-shavar', |
183
|
|
|
|
|
|
|
LANDING => 1, # Metadata goog-malware-shavar |
184
|
|
|
|
|
|
|
DISTRIBUTION => 2, # Metadata goog-malware-shavar |
185
|
1
|
|
|
1
|
|
128
|
}; |
|
1
|
|
|
|
|
1
|
|
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=over 4 |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=back |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=head2 new() |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
Create a Net::Google::SafeBrowsing3 object |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
my $gsb = Net::Google::SafeBrowsing3->new( |
199
|
|
|
|
|
|
|
key => "my key", |
200
|
|
|
|
|
|
|
storage => Net::Google::SafeBrowsing3::Sqlite->new(file => 'google-v3.db'), |
201
|
|
|
|
|
|
|
debug => 0, |
202
|
|
|
|
|
|
|
list => MALWARE, |
203
|
|
|
|
|
|
|
); |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
Arguments |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=over 4 |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=item server |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
Safe Browsing Server. https://safebrowsing.google.com/safebrowsing/ by default |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
=item key |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
Required. Your Google Safe browsing API key |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=item storage |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
Required. Object which handle the storage for the Google Safe Browsing database. See L for more details. |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
=item list |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
Optional. The Google Safe Browsing list to handle. By default, handles both MALWARE and PHISHING. |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=item debug |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
Optional. Set to 1 to enable debugging. 0 (disabled) by default. |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
The debug output maybe quite large and can slow down significantly the update and lookup functions. |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=item errors |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
Optional. Set to 1 to show errors to STDOUT. 0 (disabled by default). |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
=item perf |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
Optional. Set to 1 to show performance information. |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
=item version |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
Optional. Google Safe Browsing version. 3.0 by default |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
=back |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=cut |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
sub new { |
248
|
1
|
|
|
1
|
1
|
792
|
my ($class, %args) = @_; |
249
|
|
|
|
|
|
|
|
250
|
1
|
|
|
|
|
18
|
my $self = { # default arguments |
251
|
|
|
|
|
|
|
server => 'https://safebrowsing.google.com/safebrowsing/', |
252
|
|
|
|
|
|
|
list => [PHISHING, MALWARE, UNWANTED], |
253
|
|
|
|
|
|
|
key => '', |
254
|
|
|
|
|
|
|
version => '3.0', |
255
|
|
|
|
|
|
|
debug => 0, |
256
|
|
|
|
|
|
|
errors => 0, |
257
|
|
|
|
|
|
|
last_error => '', |
258
|
|
|
|
|
|
|
perf => 0, |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
%args, |
261
|
|
|
|
|
|
|
}; |
262
|
|
|
|
|
|
|
|
263
|
1
|
50
|
|
|
|
39
|
if (! exists $self->{storage}) { |
264
|
1
|
|
|
1
|
|
495
|
use Net::Google::SafeBrowsing3::Storage; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
5276
|
|
265
|
1
|
|
|
|
|
13
|
$self->{storage} = Net::Google::SafeBrowsing3::Storage->new(); |
266
|
|
|
|
|
|
|
} |
267
|
1
|
50
|
|
|
|
7
|
if (ref $self->{list} ne 'ARRAY') { |
268
|
0
|
|
|
|
|
0
|
$self->{list} = [$self->{list}]; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
|
271
|
1
|
50
|
|
|
|
9
|
bless $self, $class or croak "Can't bless $class: $!"; |
272
|
1
|
|
|
|
|
4
|
return $self; |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
=head1 PUBLIC FUNCTIONS |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
=over 4 |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=back |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=head2 update() |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
Perform a database update. |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
$gsb->update(); |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
Return the status of the update (see the list of constants above): INTERNAL_ERROR, SERVER_ERROR, NO_UPDATE, NO_DATA or SUCCESSFUL |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
This function can handle two lists at the same time. If one of the list should not be updated, it will automatically skip it and update the other one. It is faster to update two lists at once rather than doing them one by one. |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
Arguments |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
=over 4 |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
=item list |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
Optional. Update a specific list. Use the list(s) from new() by default. |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
=item force |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
Optional. Force the update (1). Disabled by default (0). |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
Be careful if you set this option to 1 as too frequent updates might result in the blacklisting of your API key. |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=back |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=cut |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
sub update { |
312
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
313
|
0
|
|
|
|
|
0
|
my $list = $args{list}; |
314
|
0
|
|
0
|
|
|
0
|
my $force = $args{force} || 0; |
315
|
|
|
|
|
|
|
|
316
|
0
|
|
|
|
|
0
|
my @lists = @{$self->{list}}; |
|
0
|
|
|
|
|
0
|
|
317
|
0
|
0
|
|
|
|
0
|
@lists = @{[$args{list}]} if (defined $list); |
|
0
|
|
|
|
|
0
|
|
318
|
|
|
|
|
|
|
|
319
|
0
|
|
|
|
|
0
|
my $result = 0; |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
# Too early to update? |
322
|
0
|
|
|
|
|
0
|
my $start = time(); |
323
|
0
|
|
|
|
|
0
|
my $i = 0; |
324
|
0
|
|
|
|
|
0
|
while ($i < scalar @lists) { |
325
|
0
|
|
|
|
|
0
|
my $list = $lists[$i]; |
326
|
0
|
|
|
|
|
0
|
my $info = $self->{storage}->last_update(list => $list); |
327
|
|
|
|
|
|
|
|
328
|
0
|
0
|
0
|
|
|
0
|
if ($info->{'time'} + $info->{'wait'} > time && $force == 0) { |
329
|
0
|
|
|
|
|
0
|
$self->debug("Too early to update $list\n"); |
330
|
0
|
|
|
|
|
0
|
splice(@lists, $i, 1); |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
else { |
333
|
0
|
|
|
|
|
0
|
$self->debug("OK to update $list: " . time() . "/" . ($info->{'time'} + $info->{'wait'}) . "\n"); |
334
|
0
|
|
|
|
|
0
|
$i++; |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
|
338
|
0
|
0
|
|
|
|
0
|
if (scalar @lists == 0) { |
339
|
0
|
|
|
|
|
0
|
$self->debug("Too early to update any list\n"); |
340
|
0
|
|
|
|
|
0
|
return NO_UPDATE; |
341
|
|
|
|
|
|
|
} |
342
|
0
|
|
|
|
|
0
|
$self->perf("OK to update check: " . (time() - $start) . "s\n"); |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
|
345
|
0
|
|
|
|
|
0
|
my $ua = $self->ua; |
346
|
|
|
|
|
|
|
|
347
|
0
|
|
|
|
|
0
|
my $url = $self->{server} . "downloads?client=api&key=" . $self->{key} . "&appver=$VERSION&pver=" . $self->{version}; |
348
|
|
|
|
|
|
|
|
349
|
0
|
|
|
|
|
0
|
my $body = ''; |
350
|
0
|
|
|
|
|
0
|
foreach my $list (@lists) { |
351
|
|
|
|
|
|
|
# Report existng chunks |
352
|
0
|
|
|
|
|
0
|
$start = time(); |
353
|
0
|
|
|
|
|
0
|
my $a_range = $self->create_range(numbers => [$self->{storage}->get_add_chunks_nums(list => $list)]); |
354
|
0
|
|
|
|
|
0
|
my $s_range = $self->create_range(numbers => [$self->{storage}->get_sub_chunks_nums(list => $list)]); |
355
|
0
|
|
|
|
|
0
|
$self->perf("Create add and sub ranges: " . (time() - $start) . "s\n"); |
356
|
|
|
|
|
|
|
|
357
|
0
|
|
|
|
|
0
|
my $chunks_list = ''; |
358
|
0
|
0
|
|
|
|
0
|
if ($a_range ne '') { |
359
|
0
|
|
|
|
|
0
|
$chunks_list .= "a:$a_range"; |
360
|
|
|
|
|
|
|
} |
361
|
0
|
0
|
|
|
|
0
|
if ($s_range ne '') { |
362
|
0
|
0
|
|
|
|
0
|
$chunks_list .= ":" if ($a_range ne ''); |
363
|
0
|
|
|
|
|
0
|
$chunks_list .= "s:$s_range"; |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
|
366
|
0
|
|
|
|
|
0
|
$body .= "$list;$chunks_list"; |
367
|
0
|
|
|
|
|
0
|
$body .= "\n"; |
368
|
|
|
|
|
|
|
} |
369
|
|
|
|
|
|
|
|
370
|
0
|
|
|
|
|
0
|
my $start_req = time(); |
371
|
0
|
|
|
|
|
0
|
my $res = $ua->post($url, Content => $body); |
372
|
0
|
|
|
|
|
0
|
$self->perf("$body\n"); |
373
|
|
|
|
|
|
|
|
374
|
0
|
|
|
|
|
0
|
$self->debug($res->request->as_string . "\n" . $res->as_string . "\n"); |
375
|
0
|
0
|
|
|
|
0
|
$self->debug($res->request->as_string . "\n") if ($self->{debug}); |
376
|
0
|
0
|
|
|
|
0
|
$self->debug($res->as_string . "\n") if ($self->{debug}); |
377
|
0
|
|
|
|
|
0
|
my $duration_req = time() - $start_req; |
378
|
|
|
|
|
|
|
|
379
|
0
|
0
|
|
|
|
0
|
if (! $res->is_success) { |
380
|
0
|
|
|
|
|
0
|
$self->error("Request failed\n"); |
381
|
|
|
|
|
|
|
|
382
|
0
|
|
|
|
|
0
|
foreach my $list (@lists) { |
383
|
0
|
|
|
|
|
0
|
$self->update_error('time' => time(), list => $list); |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
|
386
|
0
|
|
|
|
|
0
|
return SERVER_ERROR; |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
|
389
|
0
|
|
|
|
|
0
|
my $last_update = time; |
390
|
0
|
|
|
|
|
0
|
my $wait = 0; |
391
|
|
|
|
|
|
|
|
392
|
0
|
|
|
|
|
0
|
my @redirections = (); |
393
|
0
|
|
|
|
|
0
|
my $del_add_duration = 0; |
394
|
0
|
|
|
|
|
0
|
my $del_sub_duration = 0; |
395
|
0
|
|
|
|
|
0
|
my $add_range_info = ''; |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
# API doc: Clients must clear cached full-length hashes each time they send an update request. |
398
|
0
|
|
|
|
|
0
|
foreach my $list (@lists) { |
399
|
0
|
|
|
|
|
0
|
$self->{storage}->reset_full_hashes(list => $list); |
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
|
402
|
0
|
|
|
|
|
0
|
my @lines = split/\s/, $res->decoded_content; |
403
|
0
|
|
|
|
|
0
|
$list = ''; |
404
|
0
|
|
|
|
|
0
|
foreach my $line (@lines) { |
405
|
0
|
0
|
|
|
|
0
|
if ($line =~ /n:\s*(\d+)\s*$/) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
406
|
0
|
|
|
|
|
0
|
$self->debug("Next poll: $1 seconds\n"); |
407
|
0
|
|
|
|
|
0
|
$wait = $1; |
408
|
|
|
|
|
|
|
} |
409
|
|
|
|
|
|
|
elsif ($line =~ /i:\s*(\S+)\s*$/) { |
410
|
0
|
|
|
|
|
0
|
$self->debug("List: $1\n"); |
411
|
0
|
|
|
|
|
0
|
$list = $1; |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
elsif ($line =~ /u:\s*(\S+),(\S+)\s*$/) { |
414
|
0
|
|
|
|
|
0
|
$self->debug("Redirection: $1\n"); |
415
|
0
|
|
|
|
|
0
|
$self->debug("MAC: $2\n"); |
416
|
0
|
|
|
|
|
0
|
push(@redirections, [$1, $list, $2]); |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
elsif ($line =~ /u:\s*(\S+)\s*$/) { |
419
|
0
|
|
|
|
|
0
|
$self->debug("Redirection: $1\n"); |
420
|
0
|
|
|
|
|
0
|
push(@redirections, [$1, $list, '']); |
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
elsif ($line =~ /ad:(\S+)$/) { |
423
|
0
|
|
|
|
|
0
|
$self->debug("Delete Add Chunks: $1\n"); |
424
|
|
|
|
|
|
|
|
425
|
0
|
|
|
|
|
0
|
my $del_add_start = time(); |
426
|
0
|
|
|
|
|
0
|
$add_range_info = $1 . " $list"; |
427
|
0
|
|
|
|
|
0
|
my @nums = $self->expand_range(range => $1); |
428
|
0
|
|
|
|
|
0
|
$self->{storage}->delete_add_ckunks(chunknums => [@nums], list => $list); |
429
|
|
|
|
|
|
|
|
430
|
0
|
|
|
|
|
0
|
$result = 1; |
431
|
|
|
|
|
|
|
} |
432
|
|
|
|
|
|
|
elsif ($line =~ /sd:(\S+)$/) { |
433
|
0
|
|
|
|
|
0
|
$self->debug("Delete Sub Chunks: $1\n"); |
434
|
|
|
|
|
|
|
|
435
|
0
|
|
|
|
|
0
|
my $del_sub_start = time(); |
436
|
0
|
|
|
|
|
0
|
my @nums = $self->expand_range(range => $1); |
437
|
0
|
|
|
|
|
0
|
$self->{storage}->delete_sub_ckunks(chunknums => [@nums], list => $list); |
438
|
0
|
|
|
|
|
0
|
$del_add_duration = time() - $del_sub_start; |
439
|
|
|
|
|
|
|
|
440
|
0
|
|
|
|
|
0
|
$result = 1; |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
elsif ($line =~ /r:pleasereset/) { |
443
|
0
|
|
|
|
|
0
|
$self->debug("Database must be reset\n"); |
444
|
|
|
|
|
|
|
|
445
|
0
|
|
|
|
|
0
|
$self->{storage}->reset(list => $list); |
446
|
|
|
|
|
|
|
|
447
|
0
|
|
|
|
|
0
|
return DATABASE_RESET; |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
} |
450
|
0
|
|
|
|
|
0
|
$self->debug("\n"); |
451
|
0
|
|
|
|
|
0
|
$self->perf("Handle first request: " . (time() - $last_update) . "s (POST: ${duration_req}s, DEL add: ${del_add_duration}s, DEL sub: ${del_sub_duration}s, ADD range: ${add_range_info})\n"); |
452
|
|
|
|
|
|
|
|
453
|
0
|
0
|
|
|
|
0
|
$result = 1 if (scalar @redirections > 0); |
454
|
|
|
|
|
|
|
|
455
|
0
|
|
|
|
|
0
|
$self->perf("Parse redirections: "); |
456
|
0
|
|
|
|
|
0
|
foreach my $data (@redirections) { |
457
|
0
|
|
|
|
|
0
|
$start = time(); |
458
|
0
|
|
|
|
|
0
|
my $redirection = $data->[0]; |
459
|
0
|
|
|
|
|
0
|
$list = $data->[1]; |
460
|
|
|
|
|
|
|
|
461
|
0
|
|
|
|
|
0
|
$self->debug("Checking redirection https://$redirection ($list)\n"); |
462
|
0
|
|
|
|
|
0
|
$res = $ua->get("https://$redirection"); |
463
|
0
|
0
|
|
|
|
0
|
if (! $res->is_success) { |
464
|
0
|
|
|
|
|
0
|
$self->error("Request to $redirection failed\n"); |
465
|
|
|
|
|
|
|
|
466
|
0
|
|
|
|
|
0
|
foreach my $list (@lists) { |
467
|
0
|
|
|
|
|
0
|
$self->update_error('time' => $last_update, list => $list); |
468
|
|
|
|
|
|
|
} |
469
|
|
|
|
|
|
|
|
470
|
0
|
|
|
|
|
0
|
return SERVER_ERROR; |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
|
473
|
0
|
0
|
|
|
|
0
|
$self->debug(substr($res->as_string, 0, 250) . "\n\n") if ($self->{debug}); |
474
|
0
|
0
|
|
|
|
0
|
$self->debug(substr($res->content, 0, 250) . "\n\n") if ($self->{debug}); |
475
|
|
|
|
|
|
|
|
476
|
0
|
|
|
|
|
0
|
my $data = $res->content; |
477
|
|
|
|
|
|
|
|
478
|
0
|
|
|
|
|
0
|
my $result = $self->parse_data(data => $data, list => $list); |
479
|
0
|
0
|
|
|
|
0
|
if ($result != SUCCESSFUL) { |
480
|
0
|
|
|
|
|
0
|
foreach my $list (@lists) { |
481
|
0
|
|
|
|
|
0
|
$self->update_error('time' => $last_update, list => $list); |
482
|
|
|
|
|
|
|
} |
483
|
|
|
|
|
|
|
|
484
|
0
|
|
|
|
|
0
|
return $result; |
485
|
|
|
|
|
|
|
} |
486
|
0
|
|
|
|
|
0
|
$self->perf((time() - $start) . "s "); |
487
|
|
|
|
|
|
|
} |
488
|
0
|
|
|
|
|
0
|
$self->perf("\n"); |
489
|
|
|
|
|
|
|
|
490
|
0
|
|
|
|
|
0
|
foreach my $list (@lists) { |
491
|
0
|
|
|
|
|
0
|
$self->debug("List update: $last_update $wait $list\n"); |
492
|
0
|
|
|
|
|
0
|
$self->{storage}->updated('time' => $last_update, 'wait' => $wait, list => $list); |
493
|
|
|
|
|
|
|
} |
494
|
|
|
|
|
|
|
|
495
|
0
|
|
|
|
|
0
|
return $result; # ok |
496
|
|
|
|
|
|
|
} |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
=head2 lookup() |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
Lookup a URL against the Google Safe Browsing database. |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
my $match = $gsb->lookup(url => 'http://www.gumblar.cn'); |
503
|
|
|
|
|
|
|
my ($match, $type) = $gsb->lookup(url => 'http://www.gumblar.cn'); |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
In scalar context, returns the name of the list if there is any match, returns an empty string otherwise. |
506
|
|
|
|
|
|
|
In array context, return the name of the list (empty if no match) and the type of malware site (0 if no type specified) |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
Arguments |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
=over 4 |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
=item list |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
Optional. Lookup against a specific list. Use the list(s) from new() by default. |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
=item url |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
Required. URL to lookup. |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
=back |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=cut |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
sub lookup { |
525
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
526
|
0
|
|
0
|
|
|
0
|
my $list = $args{list} || ''; |
527
|
0
|
|
0
|
|
|
0
|
my $url = $args{url} || return ''; |
528
|
|
|
|
|
|
|
|
529
|
0
|
|
|
|
|
0
|
my @lists = @{$self->{list}}; |
|
0
|
|
|
|
|
0
|
|
530
|
0
|
0
|
|
|
|
0
|
@lists = @{[$args{list}]} if ($list ne ''); |
|
0
|
|
|
|
|
0
|
|
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
# TODO: create our own URI management for canonicalization |
534
|
|
|
|
|
|
|
# fix for http:///foo.com (3 ///) |
535
|
0
|
|
|
|
|
0
|
$url =~ s/^(https?:\/\/)\/+/$1/; |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
|
538
|
0
|
|
|
|
|
0
|
my $uri = URI->new($url)->canonical; |
539
|
0
|
|
|
|
|
0
|
my ($match, $type) = $self->lookup_suffix(lists => [@lists], url => $url); |
540
|
0
|
0
|
|
|
|
0
|
return ($match, $type) if (wantarray); |
541
|
0
|
|
|
|
|
0
|
return $match |
542
|
|
|
|
|
|
|
} |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
=pod |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
=head1 PRIVATE FUNCTIONS |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
These functions are not intended to be used externally. |
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
=over 4 |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
=back |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
=head2 lookup_suffix() |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
Lookup a host prefix. |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
=cut |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
sub lookup_suffix { |
568
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
569
|
0
|
|
0
|
|
|
0
|
my $lists = $args{lists} || croak "Missing lists\n"; |
570
|
0
|
|
0
|
|
|
0
|
my $url = $args{url} || return ''; |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
# Calculate prefixes |
573
|
0
|
|
|
|
|
0
|
my @full_hashes = $self->full_hashes($url); |
574
|
0
|
|
|
|
|
0
|
my @full_hashes_prefix = map (substr($_, 0, 4), @full_hashes); # Get the prefixes from the first 4 bytes |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
# Local lookup |
577
|
0
|
|
|
|
|
0
|
my @add_chunks = $self->local_lookup_suffix(lists => $lists, url => $url, full_hashes => [@full_hashes], full_hashes_prefix => [@full_hashes_prefix]); |
578
|
0
|
0
|
|
|
|
0
|
if (scalar @add_chunks == 0) { |
579
|
0
|
|
|
|
|
0
|
$self->debug("No hit in local lookup\n"); |
580
|
0
|
0
|
|
|
|
0
|
return ('', 0) if (wantarray); |
581
|
0
|
|
|
|
|
0
|
return ''; |
582
|
|
|
|
|
|
|
} |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
# get stored full hashes |
586
|
0
|
|
|
|
|
0
|
foreach my $hash (@full_hashes) { |
587
|
0
|
|
|
|
|
0
|
foreach my $list (@$lists) { |
588
|
0
|
|
|
|
|
0
|
my @hashes = $self->{storage}->get_full_hashes(hash => $hash, list => $list); |
589
|
|
|
|
|
|
|
|
590
|
0
|
0
|
|
|
|
0
|
if (scalar @hashes > 0) { |
591
|
0
|
|
|
|
|
0
|
$self->debug("Full hashes found: ", scalar(@hashes), "\n"); |
592
|
0
|
|
|
|
|
0
|
my $result = pop(@hashes); |
593
|
|
|
|
|
|
|
|
594
|
0
|
0
|
0
|
|
|
0
|
return ($list, $result->{type} || 0) if (wantarray); |
595
|
0
|
|
|
|
|
0
|
return $list; |
596
|
|
|
|
|
|
|
} |
597
|
|
|
|
|
|
|
} |
598
|
|
|
|
|
|
|
} |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
# ask for new hashes |
602
|
|
|
|
|
|
|
# TODO: make sure we don't keep asking for the same over and over |
603
|
0
|
|
|
|
|
0
|
my @hashes = $self->request_full_hash(prefixes => [ map($_->{prefix}, @add_chunks) ]); |
604
|
0
|
|
|
|
|
0
|
$self->{storage}->add_full_hashes(full_hashes => [@hashes], timestamp => time()); |
605
|
|
|
|
|
|
|
|
606
|
0
|
|
|
|
|
0
|
foreach my $full_hash (@full_hashes) { |
607
|
0
|
|
|
0
|
|
0
|
my $hash = first { $_->{hash} eq $full_hash} @hashes; |
|
0
|
|
|
|
|
0
|
|
608
|
0
|
0
|
|
|
|
0
|
next if (! defined $hash); |
609
|
|
|
|
|
|
|
|
610
|
0
|
|
|
0
|
|
0
|
my $list = first { $hash->{list} eq $_ } @$lists; |
|
0
|
|
|
|
|
0
|
|
611
|
|
|
|
|
|
|
|
612
|
0
|
0
|
0
|
|
|
0
|
if (defined $hash && defined $list) { |
613
|
|
|
|
|
|
|
# $self->debug($self->hex_to_ascii($hash->{hash}) . " eq " . $self->hex_to_ascii($full_hash) . "\n\n"); |
614
|
|
|
|
|
|
|
|
615
|
0
|
|
|
|
|
0
|
$self->debug("Match: " . $self->hex_to_ascii($full_hash) . "\n"); |
616
|
|
|
|
|
|
|
|
617
|
0
|
0
|
0
|
|
|
0
|
return ($hash->{list}, $hash->{type} || 0) if (wantarray); |
618
|
0
|
|
|
|
|
0
|
return $hash->{list}; |
619
|
|
|
|
|
|
|
} |
620
|
|
|
|
|
|
|
# elsif (defined $hash) { |
621
|
|
|
|
|
|
|
# $self->debug("hash: " . $self->hex_to_ascii($hash->{hash}) . "\n"); |
622
|
|
|
|
|
|
|
# $self->debug("list: " . $hash->{list} . "\n"); |
623
|
|
|
|
|
|
|
# } |
624
|
|
|
|
|
|
|
} |
625
|
|
|
|
|
|
|
|
626
|
0
|
|
|
|
|
0
|
$self->debug("No match\n"); |
627
|
0
|
0
|
|
|
|
0
|
return ('', 0) if (wantarray); |
628
|
0
|
|
|
|
|
0
|
return ''; |
629
|
|
|
|
|
|
|
} |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
=head2 local_lookup_suffix() |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
Lookup a host prefix in the local database only. |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
=cut |
636
|
|
|
|
|
|
|
sub local_lookup_suffix { |
637
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
638
|
0
|
|
0
|
|
|
0
|
my $lists = $args{lists} || croak "Missing lists\n"; |
639
|
0
|
|
0
|
|
|
0
|
my $url = $args{url} || return (); |
640
|
0
|
|
0
|
|
|
0
|
my $full_hashe_list = $args{full_hashes} || []; |
641
|
0
|
|
0
|
|
|
0
|
my $full_hashes_prefix_list = $args{full_hashes_prefix} || []; |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
# Step 1: calculate prefixes if not provided |
646
|
|
|
|
|
|
|
# Get the prefixes from the first 4 bytes |
647
|
0
|
|
|
|
|
0
|
my @full_hashes = @{$full_hashe_list}; |
|
0
|
|
|
|
|
0
|
|
648
|
0
|
|
|
|
|
0
|
my @full_hashes_prefix = @{$full_hashes_prefix_list}; |
|
0
|
|
|
|
|
0
|
|
649
|
0
|
0
|
|
|
|
0
|
if (scalar @full_hashes_prefix == 0) { |
650
|
0
|
0
|
|
|
|
0
|
@full_hashes = $self->full_hashes($url) if (scalar @full_hashes == 0); |
651
|
|
|
|
|
|
|
|
652
|
0
|
|
|
|
|
0
|
@full_hashes_prefix = map (substr($_, 0, 4), @full_hashes); |
653
|
|
|
|
|
|
|
} |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
# Step 2: get all add chunks for these suffixes |
656
|
|
|
|
|
|
|
# Do it for all lists |
657
|
0
|
|
|
|
|
0
|
my @add_chunks = (); |
658
|
0
|
|
|
|
|
0
|
foreach my $prefix (@full_hashes_prefix, @full_hashes) { |
659
|
0
|
|
|
|
|
0
|
push(@add_chunks, $self->{storage}->get_add_chunks(prefix => $prefix)); |
660
|
|
|
|
|
|
|
} |
661
|
|
|
|
|
|
|
|
662
|
0
|
0
|
|
|
|
0
|
if (scalar @add_chunks == 0) { # no match |
663
|
0
|
|
|
|
|
0
|
$self->debug("No prefix found\n"); |
664
|
0
|
|
|
|
|
0
|
return @add_chunks; |
665
|
|
|
|
|
|
|
} |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
# Step 3: get all sub chunks for this host key |
669
|
0
|
|
|
|
|
0
|
my @sub_chunks = (); |
670
|
0
|
|
|
|
|
0
|
foreach my $prefix (@full_hashes_prefix, @full_hashes) { |
671
|
0
|
|
|
|
|
0
|
push(@sub_chunks, $self->{storage}->get_sub_chunks(hostkey => $prefix)); |
672
|
|
|
|
|
|
|
} |
673
|
|
|
|
|
|
|
|
674
|
0
|
|
|
|
|
0
|
foreach my $sub_chunk (@sub_chunks) { |
675
|
0
|
|
|
|
|
0
|
my $i = 0; |
676
|
0
|
|
|
|
|
0
|
while ($i < scalar @add_chunks) { |
677
|
0
|
|
|
|
|
0
|
my $add_chunk = $add_chunks[$i]; |
678
|
|
|
|
|
|
|
|
679
|
0
|
0
|
0
|
|
|
0
|
if ($add_chunk->{chunknum} != $sub_chunk->{addchunknum} || $add_chunk->{list} ne $sub_chunk->{list}) { |
680
|
0
|
|
|
|
|
0
|
$i++; |
681
|
0
|
|
|
|
|
0
|
next; |
682
|
|
|
|
|
|
|
} |
683
|
|
|
|
|
|
|
|
684
|
0
|
0
|
|
|
|
0
|
if ($sub_chunk->{prefix} eq $add_chunk->{prefix}) { |
685
|
0
|
|
|
|
|
0
|
splice(@add_chunks, $i, 1); |
686
|
|
|
|
|
|
|
} |
687
|
|
|
|
|
|
|
else { |
688
|
0
|
|
|
|
|
0
|
$i++; |
689
|
|
|
|
|
|
|
} |
690
|
|
|
|
|
|
|
} |
691
|
|
|
|
|
|
|
} |
692
|
|
|
|
|
|
|
|
693
|
0
|
0
|
|
|
|
0
|
if (scalar @add_chunks == 0) { |
694
|
0
|
|
|
|
|
0
|
$self->debug("All add_chunks have been removed by sub_chunks\n"); |
695
|
|
|
|
|
|
|
} |
696
|
|
|
|
|
|
|
|
697
|
0
|
|
|
|
|
0
|
return @add_chunks; |
698
|
|
|
|
|
|
|
} |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
=head2 update_error() |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
Handle server errors during a database update. |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
=cut |
706
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
sub update_error { |
708
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
709
|
0
|
|
0
|
|
|
0
|
my $time = $args{'time'} || time; |
710
|
0
|
|
0
|
|
|
0
|
my $list = $args{'list'} || ''; |
711
|
|
|
|
|
|
|
|
712
|
0
|
|
|
|
|
0
|
my $info = $self->{storage}->last_update(list => $list); |
713
|
0
|
0
|
|
|
|
0
|
$info->{errors} = 0 if (! exists $info->{errors}); |
714
|
0
|
|
|
|
|
0
|
my $errors = $info->{errors} + 1; |
715
|
0
|
|
|
|
|
0
|
my $wait = 0; |
716
|
|
|
|
|
|
|
|
717
|
0
|
0
|
|
|
|
0
|
$wait = $errors == 1 ? 60 |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
718
|
|
|
|
|
|
|
: $errors == 2 ? int(30 * 60 * (rand(1) + 1)) # 30-60 mins |
719
|
|
|
|
|
|
|
: $errors == 3 ? int(60 * 60 * (rand(1) + 1)) # 60-120 mins |
720
|
|
|
|
|
|
|
: $errors == 4 ? int(2 * 60 * 60 * (rand(1) + 1)) # 120-240 mins |
721
|
|
|
|
|
|
|
: $errors == 5 ? int(4 * 60 * 60 * (rand(1) + 1)) # 240-480 mins |
722
|
|
|
|
|
|
|
: $errors > 5 ? 480 * 60 |
723
|
|
|
|
|
|
|
: 0; |
724
|
|
|
|
|
|
|
|
725
|
0
|
|
|
|
|
0
|
$self->{storage}->update_error('time' => $time, list => $list, 'wait' => $wait, errors => $errors); |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
} |
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
=head2 ua() |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
Create LWP::UserAgent to make HTTP requests to Google. |
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
=cut |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
sub ua { |
737
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
738
|
|
|
|
|
|
|
|
739
|
0
|
0
|
|
|
|
0
|
if (! exists $self->{ua}) { |
740
|
0
|
|
|
|
|
0
|
my $ua = LWP::UserAgent->new; |
741
|
0
|
|
|
|
|
0
|
$ua->timeout(60); |
742
|
|
|
|
|
|
|
|
743
|
0
|
|
|
|
|
0
|
$self->{ua} = $ua; |
744
|
|
|
|
|
|
|
} |
745
|
|
|
|
|
|
|
|
746
|
0
|
|
|
|
|
0
|
return $self->{ua}; |
747
|
|
|
|
|
|
|
} |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
=head2 parse_data() |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
Parse data from a redirection (add and sub chunk information). |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
=cut |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
sub parse_data { |
757
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
758
|
0
|
|
0
|
|
|
0
|
my $data = $args{data} || ''; |
759
|
0
|
|
0
|
|
|
0
|
my $list = $args{list} || ''; |
760
|
|
|
|
|
|
|
|
761
|
0
|
|
|
|
|
0
|
my $chunk_num = 0; |
762
|
0
|
|
|
|
|
0
|
my $hash_length = 0; |
763
|
0
|
|
|
|
|
0
|
my $chunk_length = 0; |
764
|
|
|
|
|
|
|
|
765
|
0
|
|
|
|
|
0
|
while (length $data > 0) { |
766
|
|
|
|
|
|
|
# my $length = substr($data, 0, 4); # HEX |
767
|
0
|
|
|
|
|
0
|
my $length = hex $self->hex_to_ascii( substr($data, 0, 4, '') ); |
768
|
0
|
|
|
|
|
0
|
$self->debug("Length: $length\n"); |
769
|
0
|
|
|
|
|
0
|
my $chunk = substr($data, 0, $length, ''); |
770
|
0
|
|
|
|
|
0
|
my $data = ChunkData->decode($chunk); |
771
|
0
|
|
|
|
|
0
|
$self->debug(Dumper($data), "\n"); |
772
|
|
|
|
|
|
|
|
773
|
0
|
0
|
0
|
|
|
0
|
if (! exists($data->{chunk_type}) || $data->{chunk_type} == 0) { |
774
|
0
|
|
|
|
|
0
|
my @chunks = $self->parse_a(chunk => $data); |
775
|
0
|
|
|
|
|
0
|
$self->{storage}->add_chunks(type => 'a', chunknum => $data->{chunk_number}, chunks => [@chunks], list => $list); |
776
|
|
|
|
|
|
|
} |
777
|
|
|
|
|
|
|
else { |
778
|
0
|
|
|
|
|
0
|
my @chunks = $self->parse_s(chunk => $data); |
779
|
0
|
|
|
|
|
0
|
$self->{storage}->add_chunks(type => 's', chunknum => $data->{chunk_number}, chunks => [@chunks], list => $list); |
780
|
|
|
|
|
|
|
} |
781
|
|
|
|
|
|
|
} |
782
|
|
|
|
|
|
|
|
783
|
0
|
|
|
|
|
0
|
return SUCCESSFUL; |
784
|
|
|
|
|
|
|
} |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
=head2 parse_s() |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
Parse s chunks information for a database update. |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
=cut |
791
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
sub parse_s { |
793
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
794
|
0
|
|
0
|
|
|
0
|
my $chunk = $args{chunk} || return (); |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
# { |
797
|
|
|
|
|
|
|
# 'add_numbers' => [ |
798
|
|
|
|
|
|
|
# 161383, |
799
|
|
|
|
|
|
|
# 156609, |
800
|
|
|
|
|
|
|
# 161686, |
801
|
|
|
|
|
|
|
# 159174, |
802
|
|
|
|
|
|
|
# 166040, |
803
|
|
|
|
|
|
|
# 164187 |
804
|
|
|
|
|
|
|
# ], |
805
|
|
|
|
|
|
|
# 'chunk_type' => 1, |
806
|
|
|
|
|
|
|
# 'chunk_number' => 158095, |
807
|
|
|
|
|
|
|
# 'hashes' => ' _*���F�E����A��;;v����i' |
808
|
|
|
|
|
|
|
# } |
809
|
|
|
|
|
|
|
|
810
|
0
|
|
|
|
|
0
|
my @data = (); |
811
|
0
|
|
0
|
|
|
0
|
my $prefix_type = $chunk->{prefix_type} || 0; |
812
|
0
|
|
0
|
|
|
0
|
my $prefix = $chunk->{hashes} || ''; # HEX |
813
|
0
|
|
|
|
|
0
|
$self->debug("Hashes length: ", length($prefix), "\n"); |
814
|
0
|
0
|
|
|
|
0
|
$self->debug("Hashes: ", $self->hex_to_ascii($prefix), "\n") if ($self->{debug}); |
815
|
|
|
|
|
|
|
|
816
|
0
|
|
|
|
|
0
|
my $hash_length = 4; |
817
|
0
|
0
|
|
|
|
0
|
$hash_length = 32 if ($prefix_type == 1); |
818
|
0
|
|
|
|
|
0
|
my @hashes = (); |
819
|
0
|
|
|
|
|
0
|
while(length($prefix) > 0) { |
820
|
0
|
|
|
|
|
0
|
push(@hashes, substr($prefix, 0, $hash_length, '')); |
821
|
|
|
|
|
|
|
} |
822
|
|
|
|
|
|
|
|
823
|
0
|
|
|
|
|
0
|
for(my $i = 0; $i < scalar @{ $chunk->{add_numbers} }; $i++) { |
|
0
|
|
|
|
|
0
|
|
824
|
0
|
|
|
|
|
0
|
push(@data, { add_chunknum => ${ $chunk->{add_numbers} }[$i], prefix => $hashes[$i] }); |
|
0
|
|
|
|
|
0
|
|
825
|
|
|
|
|
|
|
} |
826
|
|
|
|
|
|
|
|
827
|
0
|
|
|
|
|
0
|
return @data; |
828
|
|
|
|
|
|
|
} |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
|
831
|
|
|
|
|
|
|
=head2 parse_a() |
832
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
Parse a chunks information for a database update. |
834
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
=cut |
836
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
sub parse_a { |
838
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
839
|
0
|
|
0
|
|
|
0
|
my $chunk = $args{chunk} || return (); |
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
# { |
842
|
|
|
|
|
|
|
# 'chunk_number' => 166146, |
843
|
|
|
|
|
|
|
# 'hashes' => 'Z[�$~�����w5���B�;0����z;�E&�ʳY�H$`-' |
844
|
|
|
|
|
|
|
# } |
845
|
|
|
|
|
|
|
|
846
|
0
|
|
|
|
|
0
|
my @data = (); |
847
|
0
|
|
0
|
|
|
0
|
my $prefix_type = $chunk->{prefix_type} || 0; |
848
|
0
|
|
0
|
|
|
0
|
my $prefix = $chunk->{hashes} || ''; # HEX |
849
|
|
|
|
|
|
|
|
850
|
0
|
|
|
|
|
0
|
my $hash_length = 4; |
851
|
0
|
0
|
|
|
|
0
|
$hash_length = 32 if ($prefix_type == 1); |
852
|
|
|
|
|
|
|
|
853
|
0
|
|
|
|
|
0
|
while(length($prefix) > 0) { |
854
|
0
|
|
|
|
|
0
|
push(@data, { prefix => substr($prefix, 0, $hash_length, '') }); |
855
|
|
|
|
|
|
|
} |
856
|
|
|
|
|
|
|
|
857
|
0
|
|
|
|
|
0
|
return @data; |
858
|
|
|
|
|
|
|
} |
859
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
=head2 hex_to_ascii() |
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
Transform hexadecimal strings to printable ASCII strings. Used mainly for debugging. |
863
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
print $gsb->hex_to_ascii('hex value'); |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
=cut |
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
sub hex_to_ascii { |
869
|
2
|
|
|
2
|
1
|
9
|
my ($self, $hex) = @_; |
870
|
|
|
|
|
|
|
|
871
|
2
|
|
|
|
|
9
|
return String::HexConvert::ascii_to_hex($hex); |
872
|
|
|
|
|
|
|
} |
873
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
=head2 ascii_to_hex() |
876
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
Transform ASCII strings to hexadecimal strings. |
878
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
=cut |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
sub ascii_to_hex { |
882
|
1
|
|
|
1
|
1
|
471
|
my ($self, $ascii) = @_; |
883
|
|
|
|
|
|
|
|
884
|
1
|
|
|
|
|
2
|
my $hex = ''; |
885
|
1
|
|
|
|
|
8
|
for (my $i = 0; $i < int(length($ascii) / 2); $i++) { |
886
|
4
|
|
|
|
|
13
|
$hex .= chr hex( substr($ascii, $i * 2, 2) ); |
887
|
|
|
|
|
|
|
} |
888
|
|
|
|
|
|
|
|
889
|
1
|
|
|
|
|
4
|
return $hex; |
890
|
|
|
|
|
|
|
} |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
=head2 debug() |
893
|
|
|
|
|
|
|
|
894
|
|
|
|
|
|
|
Print debug output. |
895
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
=cut |
897
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
sub debug { |
899
|
0
|
|
|
0
|
1
|
0
|
my ($self, @messages) = @_; |
900
|
|
|
|
|
|
|
|
901
|
0
|
0
|
|
|
|
0
|
print join('', @messages) if ($self->{debug} > 0); |
902
|
|
|
|
|
|
|
} |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
=head2 error() |
906
|
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
Print error message. |
908
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
=cut |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
sub error { |
912
|
0
|
|
|
0
|
1
|
0
|
my ($self, $message) = @_; |
913
|
|
|
|
|
|
|
|
914
|
0
|
0
|
0
|
|
|
0
|
print "ERROR - ", $message if ($self->{debug} > 0 || $self->{errors} > 0); |
915
|
0
|
|
|
|
|
0
|
$self->{last_error} = $message; |
916
|
|
|
|
|
|
|
} |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
=head2 perf() |
920
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
Print performance message. |
922
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
=cut |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
sub perf { |
926
|
0
|
|
|
0
|
1
|
0
|
my ($self, $message) = @_; |
927
|
|
|
|
|
|
|
|
928
|
0
|
0
|
|
|
|
0
|
print $message if ($self->{perf} > 0); |
929
|
|
|
|
|
|
|
} |
930
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
=head2 canonical_domain() |
933
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
Find all canonical domains a domain. |
935
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
=cut |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
sub canonical_domain { |
939
|
11
|
|
|
11
|
1
|
6494
|
my ($self, $domain) = @_; |
940
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
# Remove all leading and trailing dots. |
942
|
11
|
|
|
|
|
28
|
$domain =~ s/^\.+//; |
943
|
11
|
|
|
|
|
32
|
$domain =~ s/\.+$//; |
944
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
# Replace consecutive dots with a single dot. |
946
|
11
|
|
|
|
|
47
|
while ($domain =~ s/\.\.+/\./g) { } |
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
# Lowercase the whole string. |
949
|
11
|
|
|
|
|
20
|
$domain = lc $domain; |
950
|
|
|
|
|
|
|
|
951
|
11
|
|
|
|
|
25
|
my @domains = ($domain); |
952
|
|
|
|
|
|
|
|
953
|
|
|
|
|
|
|
|
954
|
11
|
100
|
|
|
|
32
|
if ($domain =~ /^\d+\.\d+\.\d+\.\d+$/) { # loose check for IP address, should be enough |
955
|
1
|
|
|
|
|
3
|
return @domains; |
956
|
|
|
|
|
|
|
} |
957
|
|
|
|
|
|
|
|
958
|
10
|
|
|
|
|
36
|
my @parts = split/\./, $domain; |
959
|
10
|
|
|
|
|
21
|
splice(@parts, 0, -6); # take 5 top most compoments |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
|
962
|
10
|
|
|
|
|
27
|
while (scalar @parts > 2) { |
963
|
9
|
|
|
|
|
9
|
shift @parts; |
964
|
9
|
|
|
|
|
27
|
push(@domains, join(".", @parts) ); |
965
|
|
|
|
|
|
|
} |
966
|
|
|
|
|
|
|
|
967
|
10
|
|
|
|
|
42
|
return @domains; |
968
|
|
|
|
|
|
|
} |
969
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
=head2 canonical_path() |
971
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
Find all canonical paths for a URL. |
973
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
=cut |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
sub canonical_path { |
977
|
4
|
|
|
4
|
1
|
45
|
my ($self, $path) = @_; |
978
|
|
|
|
|
|
|
|
979
|
4
|
|
|
|
|
6
|
my @paths = ($path); # return full path |
980
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
# without query string |
982
|
4
|
100
|
|
|
|
17
|
if ($path =~ /\?/) { |
983
|
1
|
|
|
|
|
4
|
$path =~ s/\?.*$//; |
984
|
|
|
|
|
|
|
|
985
|
1
|
|
|
|
|
2
|
push(@paths, $path); |
986
|
|
|
|
|
|
|
} |
987
|
|
|
|
|
|
|
|
988
|
4
|
|
|
|
|
14
|
my @parts = split /\//, $path; |
989
|
4
|
50
|
|
|
|
11
|
if (scalar @parts > 4) { |
990
|
0
|
|
|
|
|
0
|
@parts = splice(@parts, -4, 4); |
991
|
|
|
|
|
|
|
} |
992
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
# if (scalar @parts == 0) { |
994
|
|
|
|
|
|
|
# push(@paths, "/"); |
995
|
|
|
|
|
|
|
# } |
996
|
|
|
|
|
|
|
|
997
|
|
|
|
|
|
|
|
998
|
4
|
|
|
|
|
7
|
my $previous = ''; |
999
|
4
|
|
|
|
|
12
|
while (scalar @parts > 1) { |
1000
|
4
|
|
|
|
|
6
|
my $val = shift(@parts); |
1001
|
4
|
|
|
|
|
7
|
$previous .= "$val/"; |
1002
|
|
|
|
|
|
|
|
1003
|
4
|
|
|
|
|
11
|
push(@paths, $previous); |
1004
|
|
|
|
|
|
|
} |
1005
|
|
|
|
|
|
|
|
1006
|
4
|
|
|
|
|
16
|
return @paths; |
1007
|
|
|
|
|
|
|
} |
1008
|
|
|
|
|
|
|
|
1009
|
|
|
|
|
|
|
=head2 canonical() |
1010
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
Find all canonical URLs for a URL. |
1012
|
|
|
|
|
|
|
|
1013
|
|
|
|
|
|
|
=cut |
1014
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
sub canonical { |
1016
|
4
|
|
|
4
|
1
|
10944
|
my ($self, $url) = @_; |
1017
|
|
|
|
|
|
|
|
1018
|
4
|
|
|
|
|
13
|
my @urls = (); |
1019
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
# my $uri = URI->new($url)->canonical; |
1021
|
4
|
|
|
|
|
11
|
my $uri = $self->canonical_uri($url); |
1022
|
4
|
|
|
|
|
186
|
my @domains = $self->canonical_domain($uri->host); |
1023
|
4
|
|
|
|
|
21
|
my @paths = $self->canonical_path($uri->path_query); |
1024
|
|
|
|
|
|
|
|
1025
|
4
|
|
|
|
|
9
|
foreach my $domain (@domains) { |
1026
|
10
|
|
|
|
|
11
|
foreach my $path (@paths) { |
1027
|
22
|
|
|
|
|
45
|
push(@urls, "$domain$path"); |
1028
|
|
|
|
|
|
|
} |
1029
|
|
|
|
|
|
|
} |
1030
|
|
|
|
|
|
|
|
1031
|
4
|
|
|
|
|
26
|
return @urls; |
1032
|
|
|
|
|
|
|
} |
1033
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
|
1035
|
|
|
|
|
|
|
=head2 canonical_uri() |
1036
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
Create a canonical URI. |
1038
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
NOTE: URI cannot handle all the test cases provided by Google. This method is a hack to pass most of the test. A few tests are still failing. The proper way to handle URL canonicalization according to Google would be to create a new module to handle URLs. However, I believe most real-life cases are handled correctly by this function. |
1040
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
=cut |
1042
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
sub canonical_uri { |
1044
|
40
|
|
|
40
|
1
|
19353
|
my ($self, $url) = @_; |
1045
|
|
|
|
|
|
|
|
1046
|
40
|
|
|
|
|
114
|
$url = trim $url; |
1047
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
# Special case for \t \r \n |
1049
|
40
|
|
|
|
|
1002
|
while ($url =~ s/^([^?]+)[\r\t\n]/$1/sgi) { } |
1050
|
|
|
|
|
|
|
|
1051
|
40
|
|
|
|
|
163
|
my $uri = URI->new($url)->canonical; # does not deal with directory traversing |
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
# $self->debug("0. $url => " . $uri->as_string . "\n"); |
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
|
1056
|
40
|
100
|
66
|
|
|
13695
|
if (! $uri->scheme() || $uri->scheme() eq '') { |
1057
|
3
|
|
|
|
|
34
|
$uri = URI->new("http://$url")->canonical; |
1058
|
|
|
|
|
|
|
} |
1059
|
|
|
|
|
|
|
|
1060
|
40
|
|
|
|
|
1291
|
$uri->fragment(''); |
1061
|
|
|
|
|
|
|
|
1062
|
40
|
|
|
|
|
444
|
my $escape = $uri->as_string; |
1063
|
|
|
|
|
|
|
|
1064
|
|
|
|
|
|
|
# Reduce double // to single / in path |
1065
|
40
|
|
|
|
|
287
|
while ($escape =~ s/^([a-z]+:\/\/[^?]+)\/\//$1\//sgi) { } |
1066
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
|
1068
|
|
|
|
|
|
|
# Remove empty fragment |
1069
|
40
|
|
|
|
|
128
|
$escape =~ s/#$//; |
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
# canonial does not handle ../ |
1072
|
|
|
|
|
|
|
# $self->debug("\t$escape\n"); |
1073
|
40
|
|
|
|
|
128
|
while($escape =~ s/([^\/])\/([^\/]+)\/\.\.([\/?].*)$/$1$3/gi) { } |
1074
|
40
|
|
|
|
|
88
|
while($escape =~ s/([^\/])\/([^\/]+)\/\.\.$/$1/gi) { } |
1075
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
# May have removed ending / |
1077
|
|
|
|
|
|
|
# $self->debug("\t$escape\n"); |
1078
|
40
|
100
|
|
|
|
127
|
$escape .= "/" if ($escape =~ /^[a-z]+:\/\/[^\/\?]+$/); |
1079
|
40
|
|
|
|
|
94
|
$escape =~ s/^([a-z]+:\/\/[^\/]+)(\?.*)$/$1\/$2/gi; |
1080
|
|
|
|
|
|
|
# $self->debug("\t$escape\n"); |
1081
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
# other weird case if domain = digits only, try to translate it to IP address |
1083
|
40
|
100
|
|
|
|
103
|
if ((my $domain = URI->new($escape)->host) =~/^\d+$/) { |
1084
|
3
|
|
|
|
|
187
|
my $ip = Socket::inet_ntoa(Socket::inet_aton($domain)); |
1085
|
|
|
|
|
|
|
|
1086
|
3
|
|
|
|
|
9
|
$uri = URI->new($escape); |
1087
|
3
|
|
|
|
|
110
|
$uri->host($ip); |
1088
|
|
|
|
|
|
|
|
1089
|
3
|
|
|
|
|
184
|
$escape = $uri->as_string; |
1090
|
|
|
|
|
|
|
} |
1091
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
# $self->debug("1. $url => $escape\n"); |
1093
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
# Try to escape the path again |
1095
|
40
|
|
|
|
|
2357
|
$url = $escape; |
1096
|
40
|
|
|
|
|
76
|
while (($escape = URI::Escape::uri_unescape($url)) ne $escape) { # wrong for %23 -> # |
1097
|
0
|
|
|
|
|
0
|
$url = $escape; |
1098
|
|
|
|
|
|
|
} |
1099
|
|
|
|
|
|
|
# while (($escape = URI->new($url)->canonical->as_string) ne $escape) { # breask more unit tests than previous |
1100
|
|
|
|
|
|
|
# $url = $escape; |
1101
|
|
|
|
|
|
|
# } |
1102
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
# Fix for %23 -> # |
1104
|
40
|
|
|
|
|
322
|
while($escape =~ s/#/%23/sgi) { } |
1105
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
# $self->debug("2. $url => $escape\n"); |
1107
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
# Fix over escaping |
1109
|
40
|
|
|
|
|
100
|
while($escape =~ s/^([^?]+)%%(%.*)$/$1%25%25$2/sgi) { } |
1110
|
40
|
|
|
|
|
74
|
while($escape =~ s/^([^?]+)%%/$1%25%25/sgi) { } |
1111
|
|
|
|
|
|
|
|
1112
|
|
|
|
|
|
|
# URI has issues with % in domains, it gets the host wrong |
1113
|
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
# 1. fix the host |
1115
|
|
|
|
|
|
|
# $self->debug("Domain: " . URI->new($escape)->host . "\n"); |
1116
|
40
|
|
|
|
|
42
|
my $exception = 0; |
1117
|
40
|
|
|
|
|
164
|
while ($escape =~ /^[a-z]+:\/\/[^\/]*([^a-z0-9%_.-\/:])[^\/]*(\/.*)$/) { |
1118
|
3
|
|
|
|
|
5
|
my $source = $1; |
1119
|
3
|
|
|
|
|
11
|
my $target = sprintf("%02x", ord($source)); |
1120
|
|
|
|
|
|
|
|
1121
|
3
|
|
|
|
|
51
|
$escape =~ s/^([a-z]+:\/\/[^\/]*)\Q$source\E/$1%\Q$target\E/; |
1122
|
|
|
|
|
|
|
|
1123
|
3
|
|
|
|
|
14
|
$exception = 1; |
1124
|
|
|
|
|
|
|
} |
1125
|
|
|
|
|
|
|
|
1126
|
|
|
|
|
|
|
# 2. need to parse the path again |
1127
|
40
|
50
|
66
|
|
|
94
|
if ($exception && $escape =~ /^[a-z]+:\/\/[^\/]+\/(.+)/) { |
1128
|
0
|
|
|
|
|
0
|
my $source = $1; |
1129
|
0
|
|
|
|
|
0
|
my $target = URI::Escape::uri_unescape($source); |
1130
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
# print "Source: $source\n"; |
1132
|
0
|
|
|
|
|
0
|
while ($target ne URI::Escape::uri_unescape($target)) { |
1133
|
0
|
|
|
|
|
0
|
$target = URI::Escape::uri_unescape($target); |
1134
|
|
|
|
|
|
|
} |
1135
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
|
1137
|
0
|
|
|
|
|
0
|
$escape =~ s/\/\Q$source\E/\/$target/; |
1138
|
|
|
|
|
|
|
|
1139
|
0
|
|
|
|
|
0
|
while ($escape =~ s/#/%23/sgi) { } # fragement has been removed earlier |
1140
|
0
|
|
|
|
|
0
|
while ($escape =~ s/^([a-z]+:\/\/[^\/]+\/.*)%5e/$1\&/sgi) { } # not in the host name |
1141
|
|
|
|
|
|
|
# while ($escape =~ s/%5e/&/sgi) { } |
1142
|
|
|
|
|
|
|
|
1143
|
0
|
|
|
|
|
0
|
while ($escape =~ s/%([^0-9a-f]|.[^0-9a-f])/%25$1/sgi) { } |
1144
|
|
|
|
|
|
|
} |
1145
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
# $self->debug("$url => $escape\n"); |
1147
|
|
|
|
|
|
|
# $self->debug(URI->new($escape)->as_string . "\n"); |
1148
|
|
|
|
|
|
|
|
1149
|
40
|
|
|
|
|
99
|
return URI->new($escape); |
1150
|
|
|
|
|
|
|
} |
1151
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
=head2 full_hashes() |
1153
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
Return all possible full hashes for a URL. |
1155
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
=cut |
1157
|
|
|
|
|
|
|
|
1158
|
|
|
|
|
|
|
sub full_hashes { |
1159
|
0
|
|
|
0
|
1
|
|
my ($self, $url) = @_; |
1160
|
|
|
|
|
|
|
|
1161
|
0
|
|
|
|
|
|
my @urls = $self->canonical($url); |
1162
|
0
|
|
|
|
|
|
my @hashes = (); |
1163
|
|
|
|
|
|
|
|
1164
|
0
|
|
|
|
|
|
foreach my $url (@urls) { |
1165
|
|
|
|
|
|
|
# $self->debug("$url\n"); |
1166
|
0
|
|
|
|
|
|
push(@hashes, sha256($url)); |
1167
|
0
|
|
|
|
|
|
$self->debug("$url " . $self->hex_to_ascii(sha256($url)) . "\n"); |
1168
|
|
|
|
|
|
|
} |
1169
|
|
|
|
|
|
|
|
1170
|
0
|
|
|
|
|
|
return @hashes; |
1171
|
|
|
|
|
|
|
} |
1172
|
|
|
|
|
|
|
|
1173
|
|
|
|
|
|
|
=head2 request_full_hash() |
1174
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
Request full full hashes for specific prefixes from Google. |
1176
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
=cut |
1178
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
sub request_full_hash { |
1180
|
0
|
|
|
0
|
1
|
|
my ($self, %args) = @_; |
1181
|
0
|
|
0
|
|
|
|
my $prefixes = $args{prefixes} || return (); |
1182
|
0
|
|
0
|
|
|
|
my $size = $args{size} || length $prefixes->[0]; |
1183
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
# # Handle errors |
1185
|
0
|
|
|
|
|
|
my $i = 0; |
1186
|
0
|
|
|
|
|
|
my $errors; |
1187
|
|
|
|
|
|
|
my $delay = sub { |
1188
|
0
|
|
|
0
|
|
|
my $time = shift; |
1189
|
0
|
0
|
|
|
|
|
if ((time() - $errors->{timestamp}) < $time) { |
1190
|
0
|
|
|
|
|
|
splice(@$prefixes, $i, 1); |
1191
|
|
|
|
|
|
|
} |
1192
|
|
|
|
|
|
|
else { |
1193
|
0
|
|
|
|
|
|
$i++; |
1194
|
|
|
|
|
|
|
} |
1195
|
0
|
|
|
|
|
|
}; |
1196
|
|
|
|
|
|
|
|
1197
|
0
|
|
|
|
|
|
while ($i < scalar @$prefixes) { |
1198
|
0
|
|
|
|
|
|
my $prefix = $prefixes->[$i]; |
1199
|
|
|
|
|
|
|
|
1200
|
0
|
|
|
|
|
|
$errors = $self->{storage}->get_full_hash_error(prefix => $prefix); |
1201
|
0
|
0
|
0
|
|
|
|
if (defined $errors && $errors->{errors} > 2) { # 2 errors is OK |
1202
|
0
|
0
|
|
|
|
|
$errors->{errors} == 3 ? $delay->(30 * 60) # 30 minutes |
|
|
0
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
: $errors->{errors} == 4 ? $delay->(60 * 60) # 1 hour |
1204
|
|
|
|
|
|
|
: $delay->(2 * 60 * 60); # 2 hours |
1205
|
|
|
|
|
|
|
} |
1206
|
|
|
|
|
|
|
else { |
1207
|
0
|
|
|
|
|
|
$i++; |
1208
|
|
|
|
|
|
|
} |
1209
|
|
|
|
|
|
|
} |
1210
|
|
|
|
|
|
|
|
1211
|
0
|
|
|
|
|
|
my $url = $self->{server} . "gethash?client=api&key=" . $self->{key} . "&appver=$VERSION&pver=" . $self->{version}; |
1212
|
|
|
|
|
|
|
|
1213
|
0
|
|
|
|
|
|
my $prefix_list = join('', @$prefixes); |
1214
|
0
|
|
|
|
|
|
my $header = "$size:" . scalar @$prefixes * $size; |
1215
|
|
|
|
|
|
|
|
1216
|
|
|
|
|
|
|
# print @{$args{prefixes}}, "\n"; |
1217
|
|
|
|
|
|
|
# print $$prefixes[0], "\n"; return; |
1218
|
|
|
|
|
|
|
|
1219
|
|
|
|
|
|
|
|
1220
|
0
|
|
|
|
|
|
my $res = $self->ua->post($url, Content => "$header\n$prefix_list"); |
1221
|
|
|
|
|
|
|
|
1222
|
0
|
0
|
|
|
|
|
if (! $res->is_success) { |
1223
|
0
|
|
|
|
|
|
$self->error("Full hash request failed\n"); |
1224
|
0
|
|
|
|
|
|
$self->debug($res->as_string . "\n"); |
1225
|
|
|
|
|
|
|
|
1226
|
0
|
|
|
|
|
|
foreach my $prefix (@$prefixes) { |
1227
|
0
|
|
|
|
|
|
my $errors = $self->{storage}->get_full_hash_error(prefix => $prefix); |
1228
|
0
|
0
|
0
|
|
|
|
if (defined $errors && ( |
|
|
|
0
|
|
|
|
|
1229
|
|
|
|
|
|
|
$errors->{errors} >=2 # backoff mode |
1230
|
|
|
|
|
|
|
|| $errors->{errors} == 1 && (time() - $errors->{timestamp}) > 5 * 60)) { # 5 minutes |
1231
|
0
|
|
|
|
|
|
$self->{storage}->full_hash_error(prefix => $prefix, timestamp => time()); # more complicate than this, need to check time between 2 errors |
1232
|
|
|
|
|
|
|
} |
1233
|
|
|
|
|
|
|
} |
1234
|
|
|
|
|
|
|
|
1235
|
0
|
|
|
|
|
|
return (); |
1236
|
|
|
|
|
|
|
} |
1237
|
|
|
|
|
|
|
else { |
1238
|
0
|
|
|
|
|
|
$self->debug("Full hash request OK\n"); |
1239
|
|
|
|
|
|
|
|
1240
|
0
|
|
|
|
|
|
foreach my $prefix (@$prefixes) { |
1241
|
0
|
|
|
|
|
|
$self->{storage}->full_hash_ok(prefix => $prefix, timestamp => time()); |
1242
|
|
|
|
|
|
|
} |
1243
|
|
|
|
|
|
|
} |
1244
|
|
|
|
|
|
|
|
1245
|
0
|
|
|
|
|
|
$self->debug($res->request->as_string . "\n"); |
1246
|
0
|
|
|
|
|
|
$self->debug($res->as_string . "\n"); |
1247
|
|
|
|
|
|
|
# $self->debug(substr($res->content, 0, 250), "\n\n"); |
1248
|
|
|
|
|
|
|
|
1249
|
0
|
|
|
|
|
|
return $self->parse_full_hashes($res->content); |
1250
|
|
|
|
|
|
|
} |
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
=head2 parse_full_hashes() |
1253
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
Process the request for full hashes from Google. |
1255
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
=cut |
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
sub parse_full_hashes { |
1259
|
0
|
|
|
0
|
1
|
|
my ($self, $data) = @_; |
1260
|
|
|
|
|
|
|
|
1261
|
0
|
|
|
|
|
|
my @hashes = (); |
1262
|
|
|
|
|
|
|
|
1263
|
|
|
|
|
|
|
# 900 |
1264
|
|
|
|
|
|
|
# goog-malware-shavar:32:2:m |
1265
|
|
|
|
|
|
|
# 01234567890123456789012345678901987654321098765432109876543210982 |
1266
|
|
|
|
|
|
|
# AA3 |
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
# cache life time |
1269
|
0
|
|
|
|
|
|
my $life = 0; |
1270
|
0
|
0
|
|
|
|
|
if ($data =~ s/^(\d+)\n//) { |
1271
|
0
|
|
|
|
|
|
$life = $1; |
1272
|
0
|
|
|
|
|
|
$self->debug("Full hash life time: ", $life, "\n"); |
1273
|
|
|
|
|
|
|
} |
1274
|
|
|
|
|
|
|
else { |
1275
|
0
|
|
|
|
|
|
$self->error("Life time not found\n"); |
1276
|
|
|
|
|
|
|
} |
1277
|
|
|
|
|
|
|
|
1278
|
0
|
|
|
|
|
|
while (length $data > 0) { |
1279
|
0
|
0
|
|
|
|
|
if ($data !~ /^[a-z-]+:\d+:\d+(:m)?\n/) { |
1280
|
0
|
|
|
|
|
|
$self->error("list not found\n"); |
1281
|
0
|
|
|
|
|
|
return (); |
1282
|
|
|
|
|
|
|
} |
1283
|
0
|
|
|
|
|
|
$data =~ s/^([a-z-]+)://; |
1284
|
0
|
|
|
|
|
|
my $list = $1; |
1285
|
|
|
|
|
|
|
|
1286
|
0
|
|
|
|
|
|
$data =~ s/^(\d+)://; |
1287
|
0
|
|
|
|
|
|
my $length = $1; |
1288
|
0
|
|
|
|
|
|
$self->debug("Full hash length: ", $length, "\n"); |
1289
|
|
|
|
|
|
|
|
1290
|
0
|
|
|
|
|
|
$data =~ s/^(\d+)//; |
1291
|
0
|
|
|
|
|
|
my $num = $1; |
1292
|
|
|
|
|
|
|
|
1293
|
0
|
|
|
|
|
|
$self->debug("Number of full hashes returned: ", $num, "\n"); |
1294
|
|
|
|
|
|
|
|
1295
|
0
|
|
|
|
|
|
my $metadata = 0; |
1296
|
0
|
0
|
|
|
|
|
if ($data =~ s/:m$//) { |
1297
|
0
|
|
|
|
|
|
$metadata = 1; |
1298
|
|
|
|
|
|
|
} |
1299
|
|
|
|
|
|
|
|
1300
|
0
|
|
|
|
|
|
my $current = 0; |
1301
|
0
|
|
|
|
|
|
my @local_hashes = (); |
1302
|
0
|
|
|
|
|
|
while ($current < $num) { |
1303
|
0
|
|
|
|
|
|
my $hash = substr($data, 0, $length, ''); |
1304
|
0
|
|
|
|
|
|
push(@local_hashes, { hash => $hash, list => $list, life => $life, type => 0 }); |
1305
|
|
|
|
|
|
|
|
1306
|
0
|
|
|
|
|
|
$current ++; |
1307
|
|
|
|
|
|
|
} |
1308
|
|
|
|
|
|
|
|
1309
|
0
|
0
|
|
|
|
|
if ($metadata) { |
1310
|
0
|
|
|
|
|
|
my $count = 0; |
1311
|
0
|
|
|
|
|
|
while ($data =~ s/(\d+)\n//) { |
1312
|
0
|
|
|
|
|
|
my $meta_length = $1; |
1313
|
|
|
|
|
|
|
|
1314
|
0
|
|
|
|
|
|
my $info = substr($data, 0, $meta_length, ''); |
1315
|
0
|
|
|
|
|
|
$self->debug("Metadata: $info"); |
1316
|
0
|
|
|
|
|
|
my $extra = MalwarePatternType->decode($info); |
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
# update the type |
1319
|
0
|
|
|
|
|
|
my $hash = $local_hashes[$count]; |
1320
|
0
|
|
|
|
|
|
$hash->{type} = $extra->{pattern_type}; |
1321
|
0
|
|
|
|
|
|
$local_hashes[$count] = $hash; |
1322
|
|
|
|
|
|
|
|
1323
|
0
|
|
|
|
|
|
$count++; |
1324
|
|
|
|
|
|
|
} |
1325
|
|
|
|
|
|
|
} |
1326
|
|
|
|
|
|
|
|
1327
|
|
|
|
|
|
|
|
1328
|
0
|
|
|
|
|
|
push(@hashes, @local_hashes); |
1329
|
|
|
|
|
|
|
} |
1330
|
|
|
|
|
|
|
|
1331
|
0
|
|
|
|
|
|
$self->debug("Number of hashes: ", scalar(@hashes), "\n"); |
1332
|
0
|
|
|
|
|
|
return @hashes; |
1333
|
|
|
|
|
|
|
} |
1334
|
|
|
|
|
|
|
|
1335
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
=head2 create_range() |
1337
|
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
|
Create a list of ranges (1-3, 5, 7-11) from a list of numbers. |
1339
|
|
|
|
|
|
|
|
1340
|
|
|
|
|
|
|
=cut |
1341
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
sub create_range { |
1343
|
0
|
|
|
0
|
1
|
|
my ($self, %args) = @_; |
1344
|
0
|
|
0
|
|
|
|
my $numbers = $args{numbers} || []; # should already be ordered |
1345
|
|
|
|
|
|
|
|
1346
|
0
|
0
|
|
|
|
|
return '' if (scalar @$numbers == 0); |
1347
|
|
|
|
|
|
|
|
1348
|
0
|
|
|
|
|
|
my $range = $$numbers[0]; |
1349
|
0
|
|
|
|
|
|
my $new_range = 0; |
1350
|
0
|
|
|
|
|
|
for(my $i = 1; $i < scalar @$numbers; $i++) { |
1351
|
|
|
|
|
|
|
# next if ($$numbers[$i] == $$numbers[$i-1]); # should not happen |
1352
|
|
|
|
|
|
|
|
1353
|
0
|
0
|
|
|
|
|
if ($$numbers[$i] != $$numbers[$i-1] + 1) { |
|
|
0
|
|
|
|
|
|
1354
|
0
|
0
|
0
|
|
|
|
$range .= $$numbers[$i-1] if ($i > 1 && $new_range == 1); |
1355
|
0
|
|
|
|
|
|
$range .= ',' . $$numbers[$i]; |
1356
|
|
|
|
|
|
|
|
1357
|
0
|
|
|
|
|
|
$new_range = 0 |
1358
|
|
|
|
|
|
|
} |
1359
|
|
|
|
|
|
|
elsif ($new_range == 0) { |
1360
|
0
|
|
|
|
|
|
$range .= "-"; |
1361
|
0
|
|
|
|
|
|
$new_range = 1; |
1362
|
|
|
|
|
|
|
} |
1363
|
|
|
|
|
|
|
} |
1364
|
0
|
0
|
|
|
|
|
$range .= $$numbers[scalar @$numbers - 1] if ($new_range == 1); |
1365
|
|
|
|
|
|
|
|
1366
|
0
|
|
|
|
|
|
return $range; |
1367
|
|
|
|
|
|
|
} |
1368
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
=head2 expand_range() |
1370
|
|
|
|
|
|
|
|
1371
|
|
|
|
|
|
|
Explode list of ranges (1-3, 5, 7-11) into a list of numbers (1,2,3,5,7,8,9,10,11). |
1372
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
=cut |
1374
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
sub expand_range { |
1376
|
0
|
|
|
0
|
1
|
|
my ($self, %args) = @_; |
1377
|
0
|
|
0
|
|
|
|
my $range = $args{range} || return (); |
1378
|
|
|
|
|
|
|
|
1379
|
0
|
|
|
|
|
|
my @list = (); |
1380
|
0
|
|
|
|
|
|
my @elements = split /,/, $range; |
1381
|
|
|
|
|
|
|
|
1382
|
0
|
|
|
|
|
|
foreach my $data (@elements) { |
1383
|
0
|
0
|
|
|
|
|
if ($data =~ /^\d+$/) { # single number |
|
|
0
|
|
|
|
|
|
1384
|
0
|
|
|
|
|
|
push(@list, $data); |
1385
|
|
|
|
|
|
|
} |
1386
|
|
|
|
|
|
|
elsif ($data =~ /^(\d+)-(\d+)$/) { |
1387
|
0
|
|
|
|
|
|
my $start = $1; |
1388
|
0
|
|
|
|
|
|
my $end = $2; |
1389
|
|
|
|
|
|
|
|
1390
|
0
|
|
|
|
|
|
for(my $i = $start; $i <= $end; $i++) { |
1391
|
0
|
|
|
|
|
|
push(@list, $i); |
1392
|
|
|
|
|
|
|
} |
1393
|
|
|
|
|
|
|
} |
1394
|
|
|
|
|
|
|
} |
1395
|
|
|
|
|
|
|
|
1396
|
0
|
|
|
|
|
|
return @list; |
1397
|
|
|
|
|
|
|
} |
1398
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
1; |
1400
|
|
|
|
|
|
|
__END__ |