line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTTP::UserAgentString::Parser; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
HTTP::UserAgentStringParser - User-Agent string parser |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
my $p = HTTP::UserAgentString::Parser->new(); |
10
|
|
|
|
|
|
|
my $ua = $p->parse("Opera/9.80 (X11; Linux x86_64; U; en) Presto/2.9.168 Version/11.50"); |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
if ($ua->isRobot) { |
13
|
|
|
|
|
|
|
print "It's a robot: ", $ua->name, "\n"; |
14
|
|
|
|
|
|
|
} else { |
15
|
|
|
|
|
|
|
print "It's a browser: ", $ua->name, " - version: ", $ua->version, "\n"; |
16
|
|
|
|
|
|
|
} |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 DESCRIPTION |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
C is a Perl API for user-agent-string.info. It |
21
|
|
|
|
|
|
|
can be used to parse user agent strings and determine whether the agent is a robot, |
22
|
|
|
|
|
|
|
a normal browser, mobile browser, e-mail client. It can also tell browser version, |
23
|
|
|
|
|
|
|
company that makes it, home page URL. In most of the cases it can also tell in which |
24
|
|
|
|
|
|
|
OS the browser is running. |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
HTTP::UserAgentString::Parser will download the .ini file provided by user-agent-string.info |
27
|
|
|
|
|
|
|
which contains all the information to do the parsing. The file will be cached by default |
28
|
|
|
|
|
|
|
for 7 days. After that time, it will check whether a new version was released. The |
29
|
|
|
|
|
|
|
default cache time can be modified, as well as the cache path (default is /tmp). A |
30
|
|
|
|
|
|
|
cache reload can also be forced. |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
In order to parse a string, a parse() method is provided which returns an object |
33
|
|
|
|
|
|
|
of classes HTTP::UserAgentString::Browser or HTTP::UserAgentString::Robot. Both classes |
34
|
|
|
|
|
|
|
have accesors to determine agent capabilities. In case the string does not match any known |
35
|
|
|
|
|
|
|
browser or robot, undef() is returned. |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
$p = HTTP::UserAgentString::Parser->new(%opts) |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
Valid options are: |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
cache_max_age: in seconds (default is 7 days) |
44
|
|
|
|
|
|
|
cache_dir: path must be writeable - default is /tmp |
45
|
|
|
|
|
|
|
parse_cache_size: size of parsing cache in number of elements. Default is 100_000 |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=head1 METHODS |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=over 4 |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=item $agent = $p->parse($string) |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
Parses a User-Agent string and returns a HTTP::UserAgentString::Browser or |
54
|
|
|
|
|
|
|
HTTP::UserAgentString::Robot object, or undef() if no matches where found. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=item $p->updateDB($force) |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
Updates the cache file from user-agent-string.info. If force is false or undef(), the |
59
|
|
|
|
|
|
|
check is only executed if the cache file has expired. If force is true, the method |
60
|
|
|
|
|
|
|
checks whether there is a new file and downloads it accordingly. |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=item $p->getCurrentVersion() |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
Retrieves the current database version from user-agent-string.info. Returns the version |
65
|
|
|
|
|
|
|
number or undef() if an error occurs. |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=item $p->getCachedVersion() |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Returns the version of the cached .ini file, or undef() if there is no cached file. |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=item $p->cache_file() |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
Local path to the cached .ini file. |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=item $p->version_file() |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
Local path to file that contains the version of the cached .ini file. |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=back |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=head1 SEE ALSO |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
See L and L for description |
84
|
|
|
|
|
|
|
of the objects returned by parse(). |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 COPYRIGHT |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Copyright (c) 2011 Nicolas Moldavsky (http://www.e-planning.net/) |
89
|
|
|
|
|
|
|
This is free software. You can redistribute it or modify it under the terms of the |
90
|
|
|
|
|
|
|
Perl license |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=cut |
93
|
|
|
|
|
|
|
|
94
|
4
|
|
|
4
|
|
1066799
|
use strict; |
|
4
|
|
|
|
|
9
|
|
|
4
|
|
|
|
|
153
|
|
95
|
4
|
|
|
4
|
|
22
|
use Carp (); |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
64
|
|
96
|
4
|
|
|
4
|
|
4371
|
use LWP::UserAgent; |
|
4
|
|
|
|
|
212307
|
|
|
4
|
|
|
|
|
145
|
|
97
|
4
|
|
|
4
|
|
42
|
use File::Spec; |
|
4
|
|
|
|
|
11
|
|
|
4
|
|
|
|
|
117
|
|
98
|
4
|
|
|
4
|
|
24
|
use Digest::MD5; |
|
4
|
|
|
|
|
6
|
|
|
4
|
|
|
|
|
168
|
|
99
|
4
|
|
|
4
|
|
2457
|
use HTTP::UserAgentString::Browser; |
|
4
|
|
|
|
|
12
|
|
|
4
|
|
|
|
|
114
|
|
100
|
4
|
|
|
4
|
|
2125
|
use HTTP::UserAgentString::Robot; |
|
4
|
|
|
|
|
11
|
|
|
4
|
|
|
|
|
162
|
|
101
|
4
|
|
|
4
|
|
2375
|
use HTTP::UserAgentString::OS; |
|
4
|
|
|
|
|
12
|
|
|
4
|
|
|
|
|
11783
|
|
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
our $VERSION = '0.6.1'; |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
my @REQUIRED_SECS = qw(robots os browser browser_type browser_reg browser_os os_reg); |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
my $REGEX_SECS = { 'browser_reg' => 1, 'os_reg' => 1 }; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
my $INI_URL = 'http://user-agent-string.info/rpc/get_data.php?key=free&format=ini'; |
110
|
|
|
|
|
|
|
my $VER_URL = 'http://user-agent-string.info/rpc/get_data.php?key=free&format=ini&ver=y'; |
111
|
|
|
|
|
|
|
my $MD5_URL = 'http://user-agent-string.info/rpc/get_data.php?format=ini&md5=y'; |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
my $DEFAULT_CACHE_DIR = '/tmp'; |
114
|
|
|
|
|
|
|
my $DEFAULT_CACHE_MAX_AGE = 7 * 86400; |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
my $DEFAULT_PARSE_CACHE_SIZE = 100000; |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
my $INI_FILE = 'uas.ini'; |
119
|
|
|
|
|
|
|
my $VER_FILE = 'uas.version'; |
120
|
|
|
|
|
|
|
|
121
|
8
|
|
|
8
|
0
|
176
|
sub cache_dir($) { $_[0]->{cache_dir} } |
122
|
0
|
|
|
0
|
0
|
0
|
sub parse_cache_count($) { $_[0]->{parse_cache_count} } |
123
|
4
|
|
|
4
|
0
|
14
|
sub cache_max_age($) { $_[0]->{cache_max_age} } |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub cache_file($) { |
126
|
8
|
|
|
8
|
1
|
19
|
my $self = shift; |
127
|
8
|
|
|
|
|
30
|
return File::Spec->catfile($self->cache_dir, $INI_FILE); |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
sub version_file($) { |
130
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
131
|
0
|
|
|
|
|
0
|
return File::Spec->catfile($self->cache_dir, $VER_FILE); |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
sub getCurrentVersion($) { |
135
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
136
|
0
|
|
|
|
|
0
|
my $lwp = LWP::UserAgent->new(); |
137
|
0
|
|
|
|
|
0
|
$lwp->env_proxy(); |
138
|
0
|
|
|
|
|
0
|
my $res = $lwp->get($VER_URL); |
139
|
0
|
0
|
|
|
|
0
|
if ($res->is_success) { |
140
|
0
|
|
|
|
|
0
|
return $res->content; |
141
|
|
|
|
|
|
|
} else { |
142
|
0
|
|
|
|
|
0
|
Carp::carp( "Can't get current file version from $VER_URL: " . $res->status_line . "\n"); |
143
|
0
|
|
|
|
|
0
|
return undef(); |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
sub getCachedVersion($) { |
148
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
149
|
0
|
|
|
|
|
0
|
my $path = $self->version_file; |
150
|
0
|
0
|
|
|
|
0
|
if (-f $path) { |
151
|
0
|
0
|
|
|
|
0
|
if (open(my $fh, "<", $path)) { |
152
|
0
|
|
|
|
|
0
|
my $version = <$fh>; |
153
|
0
|
|
|
|
|
0
|
close($fh); |
154
|
0
|
|
|
|
|
0
|
return $version; |
155
|
|
|
|
|
|
|
} else { |
156
|
0
|
|
|
|
|
0
|
Carp::carp("Can't open $path: $!\n"); |
157
|
0
|
|
|
|
|
0
|
return undef(); |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
} else { |
160
|
0
|
|
|
|
|
0
|
return undef(); |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub _writeCacheFile($$$) { |
166
|
0
|
|
|
0
|
|
0
|
my ($self, $filename, $content) = @_; |
167
|
|
|
|
|
|
|
|
168
|
0
|
0
|
|
|
|
0
|
if (open(my $fh, ">", $filename)) { |
169
|
0
|
0
|
|
|
|
0
|
if (print $fh $content) { |
170
|
0
|
0
|
|
|
|
0
|
if (close($fh)) { |
171
|
0
|
|
|
|
|
0
|
return 1; |
172
|
|
|
|
|
|
|
} else { |
173
|
0
|
|
|
|
|
0
|
Carp::carp("Can't close $filename: $!\n"); |
174
|
0
|
|
|
|
|
0
|
return 0; |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
} else { |
177
|
0
|
|
|
|
|
0
|
Carp::carp("Can't write to $filename: $!\n"); |
178
|
0
|
|
|
|
|
0
|
return 0; |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
} else { |
181
|
0
|
|
|
|
|
0
|
Carp::carp("Can't open $filename for writing: $!\n"); |
182
|
0
|
|
|
|
|
0
|
return 0; |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
sub _updateCache($$$) { |
187
|
0
|
|
|
0
|
|
0
|
my ($self, $inidata, $version) = @_; |
188
|
|
|
|
|
|
|
|
189
|
0
|
|
0
|
|
|
0
|
return ($self->_writeCacheFile($self->cache_file, $inidata) and $self->_writeCacheFile($self->version_file, $version)); |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
sub _downloadDB($$) { |
193
|
0
|
|
|
0
|
|
0
|
my ($self, $current_version) = @_; |
194
|
0
|
|
|
|
|
0
|
my $lwp = LWP::UserAgent->new(); |
195
|
0
|
|
|
|
|
0
|
$lwp->env_proxy(); |
196
|
0
|
|
|
|
|
0
|
my $res_ini = $lwp->get($INI_URL); |
197
|
0
|
0
|
|
|
|
0
|
if ($res_ini->is_success) { |
198
|
0
|
|
|
|
|
0
|
my $inidata = $res_ini->content; |
199
|
0
|
|
|
|
|
0
|
my $res_md5 = $lwp->get($MD5_URL); |
200
|
0
|
0
|
|
|
|
0
|
if ($res_md5->is_success) { |
201
|
0
|
|
|
|
|
0
|
my $expected_hash = $res_md5->content; |
202
|
0
|
|
|
|
|
0
|
my $ctx = Digest::MD5->new(); |
203
|
0
|
|
|
|
|
0
|
$ctx->add($inidata); |
204
|
0
|
|
|
|
|
0
|
my $hash = $ctx->hexdigest(); |
205
|
0
|
0
|
|
|
|
0
|
if ($hash eq $expected_hash) { |
206
|
|
|
|
|
|
|
# Write files to disk |
207
|
0
|
|
|
|
|
0
|
return $self->_updateCache($inidata, $current_version); |
208
|
|
|
|
|
|
|
} else { |
209
|
0
|
|
|
|
|
0
|
Carp::carp("MD5 digest does not match - expected=$expected_hash; calculate=$hash\n"); |
210
|
0
|
|
|
|
|
0
|
return 0; |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
} else { |
213
|
0
|
|
|
|
|
0
|
Carp::carp("Can't get MD5 from $MD5_URL: " . $res_md5->status_line . "\n"); |
214
|
0
|
|
|
|
|
0
|
return 0; |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
} else { |
217
|
0
|
|
|
|
|
0
|
Carp::carp("Can't get .ini from $INI_URL: " . $res_ini->status_line . "\n"); |
218
|
0
|
|
|
|
|
0
|
return 0; |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
sub updateDB($;$) { |
223
|
4
|
|
|
4
|
1
|
9
|
my ($self, $force) = @_; |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
# Check if cache file needs to be updated according to max_age |
226
|
|
|
|
|
|
|
|
227
|
4
|
|
|
|
|
21
|
my $cache_file = $self->cache_file; |
228
|
|
|
|
|
|
|
|
229
|
4
|
|
|
|
|
11
|
my $do_check; |
230
|
4
|
50
|
|
|
|
138
|
if (! -f $cache_file) { |
231
|
0
|
|
|
|
|
0
|
$do_check = 1; |
232
|
|
|
|
|
|
|
} else { |
233
|
4
|
|
|
|
|
96
|
my @stat = stat($cache_file); |
234
|
4
|
50
|
|
|
|
18
|
if (@stat) { |
235
|
4
|
|
|
|
|
9
|
my $mtime = $stat[9]; |
236
|
4
|
|
|
|
|
36
|
my $limit = time() - $self->cache_max_age; |
237
|
4
|
|
|
|
|
14
|
$do_check = ($mtime < $limit); |
238
|
|
|
|
|
|
|
} else { |
239
|
0
|
|
|
|
|
0
|
Carp::carp("Can't stat() $cache_file: $!\n"); |
240
|
0
|
|
|
|
|
0
|
return undef(); |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
4
|
50
|
33
|
|
|
31
|
if ($do_check or $force) { |
245
|
0
|
|
|
|
|
0
|
my $current_version = $self->getCurrentVersion(); |
246
|
0
|
|
|
|
|
0
|
my $cache_version = $self->getCachedVersion(); |
247
|
0
|
0
|
0
|
|
|
0
|
if (defined($current_version) and ((! defined($cache_version)) or ($current_version gt $cache_version))) { |
|
|
|
0
|
|
|
|
|
248
|
0
|
|
|
|
|
0
|
return $self->_downloadDB($current_version); |
249
|
|
|
|
|
|
|
} else { |
250
|
0
|
|
|
|
|
0
|
return -1; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
} else { |
253
|
4
|
|
|
|
|
41
|
return -1; |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
sub _compileRegexes($$) { |
259
|
8
|
|
|
8
|
|
21
|
my ($self, $regexes) = @_; |
260
|
|
|
|
|
|
|
|
261
|
8
|
|
|
|
|
25
|
foreach my $ir (@$regexes) { |
262
|
3392
|
|
|
|
|
5449
|
my $r = $ir->[0]; |
263
|
3392
|
|
|
|
|
265326
|
my $regex = eval "qr" . $r; |
264
|
3392
|
50
|
|
|
|
9773
|
if (defined($regex)) { |
265
|
3392
|
|
|
|
|
6654
|
$ir->[2] = $r; |
266
|
3392
|
|
|
|
|
7604
|
$ir->[0] = $regex; |
267
|
|
|
|
|
|
|
} else { |
268
|
0
|
|
|
|
|
0
|
Carp::carp("Invalid regex: " . $ir->[0] . "($@)\n"); |
269
|
0
|
|
|
|
|
0
|
return 0; |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
8
|
|
|
|
|
69
|
return 1; |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
sub _loadDB($) { |
277
|
4
|
|
|
4
|
|
9
|
my $self = shift; |
278
|
4
|
|
|
|
|
15
|
my $file = $self->cache_file; |
279
|
4
|
50
|
|
|
|
178
|
if (open(my $fh, "<", $file)) { |
280
|
4
|
|
|
|
|
6
|
my $cursec; |
281
|
4
|
|
|
|
|
8
|
my $nline = 1; |
282
|
4
|
|
|
|
|
7
|
my $lastvalues; |
283
|
|
|
|
|
|
|
my $lastid; |
284
|
4
|
|
|
|
|
169
|
while (<$fh>) { |
285
|
74808
|
|
|
|
|
79232
|
$nline++; |
286
|
74808
|
100
|
|
|
|
149443
|
next if (/^;/); |
287
|
74652
|
|
|
|
|
83058
|
chop; |
288
|
74652
|
100
|
|
|
|
347044
|
if (/^\[([\w_]+)\]\s*$/) { |
|
|
50
|
|
|
|
|
|
289
|
36
|
100
|
|
|
|
89
|
if (defined($lastvalues)) { |
290
|
8
|
|
|
|
|
22
|
push(@{$self->{$cursec}}, $lastvalues); |
|
8
|
|
|
|
|
85
|
|
291
|
|
|
|
|
|
|
} |
292
|
36
|
|
|
|
|
81
|
$cursec = $1; |
293
|
36
|
|
|
|
|
56
|
$lastid = undef(); |
294
|
36
|
|
|
|
|
168
|
$lastvalues = undef(); |
295
|
|
|
|
|
|
|
} elsif (/^(\d+)\[\] = "(.*)"\s*$/) { |
296
|
74616
|
|
|
|
|
162728
|
my ($id, $value) = ($1, $2); |
297
|
74616
|
100
|
|
|
|
129349
|
if ($REGEX_SECS->{$cursec}) { |
298
|
6784
|
100
|
100
|
|
|
26612
|
if (defined($lastid) and ($id == $lastid)) { |
299
|
3392
|
|
|
|
|
16087
|
push(@$lastvalues, $value); |
300
|
|
|
|
|
|
|
} else { |
301
|
3392
|
100
|
|
|
|
5871
|
push(@{$self->{$cursec}}, $lastvalues) if (defined($lastid)); |
|
3384
|
|
|
|
|
6732
|
|
302
|
3392
|
|
|
|
|
4497
|
$lastid = $id; |
303
|
3392
|
|
|
|
|
16687
|
$lastvalues = [ $value ]; |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
} else { |
306
|
67832
|
|
|
|
|
63429
|
push(@{$self->{$cursec}[$id]}, $value); |
|
67832
|
|
|
|
|
351306
|
|
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
} else { |
309
|
0
|
|
|
|
|
0
|
Carp::carp("Invalid format in line $nline: $_\n"); |
310
|
0
|
|
|
|
|
0
|
return 0; |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
} |
313
|
4
|
50
|
|
|
|
21
|
if (defined($lastvalues)) { |
314
|
0
|
|
|
|
|
0
|
push(@{$self->{$cursec}}, $lastvalues); |
|
0
|
|
|
|
|
0
|
|
315
|
|
|
|
|
|
|
} |
316
|
4
|
|
|
|
|
137
|
close($fh); |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
# Check that we have all required sections |
319
|
4
|
|
|
|
|
18
|
foreach my $sec (@REQUIRED_SECS) { |
320
|
28
|
|
|
|
|
73
|
my $a = $self->{$sec}; |
321
|
28
|
50
|
33
|
|
|
274
|
if (! defined($a) or (! @$a)) { |
322
|
0
|
|
|
|
|
0
|
Carp::carp("Section $a is not present in $file"); |
323
|
0
|
|
|
|
|
0
|
return 0; |
324
|
|
|
|
|
|
|
} |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
# Compile regexes |
328
|
4
|
|
|
|
|
26
|
foreach my $key (keys %$REGEX_SECS) { |
329
|
8
|
50
|
|
|
|
67
|
$self->_compileRegexes($self->{$key}) or return 0; |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
# Index for robots |
334
|
4
|
|
|
|
|
27
|
$self->{robot_index} = {}; |
335
|
4
|
|
|
|
|
12
|
my @r; |
336
|
4
|
|
|
|
|
12
|
foreach my $robot (grep { defined($_) } @{$self->{robots}}) { |
|
57588
|
|
|
|
|
86678
|
|
|
4
|
|
|
|
|
984
|
|
337
|
5620
|
|
|
|
|
10858
|
my $os_id = $robot->[7]; |
338
|
5620
|
|
|
|
|
5687
|
my $os; |
339
|
5620
|
50
|
33
|
|
|
12479
|
if ($os_id and defined($self->{os}[$os_id])) { |
340
|
0
|
|
|
|
|
0
|
$os = HTTP::UserAgentString::OS->new($self->{os}[$os_id]); |
341
|
|
|
|
|
|
|
} |
342
|
5620
|
|
|
|
|
17776
|
my $bot = HTTP::UserAgentString::Robot->new($robot, $os); |
343
|
5620
|
|
|
|
|
8748
|
push(@r, $bot); |
344
|
5620
|
|
|
|
|
27800
|
$self->{robot_index}{$robot->[0]} = $bot; |
345
|
|
|
|
|
|
|
} |
346
|
4
|
|
|
|
|
21
|
$self->{robots} = \@r; |
347
|
|
|
|
|
|
|
|
348
|
4
|
|
|
|
|
6413
|
$self->{parse_cache} = {}; |
349
|
4
|
|
|
|
|
21
|
$self->{parse_cache_count} = 0; |
350
|
|
|
|
|
|
|
|
351
|
4
|
|
|
|
|
75
|
return 1; |
352
|
|
|
|
|
|
|
} else { |
353
|
0
|
|
|
|
|
0
|
Carp::carp("Can't open $file for reading: $!\n"); |
354
|
0
|
|
|
|
|
0
|
return 0; |
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
sub new($;%) { |
359
|
4
|
|
|
4
|
0
|
97
|
my ($pkg, %opts) = @_; |
360
|
|
|
|
|
|
|
|
361
|
4
|
|
|
|
|
15
|
foreach my $key (qw(cache_max_age parse_cache_size)) { |
362
|
8
|
|
|
|
|
16
|
my $val = $opts{$key}; |
363
|
8
|
50
|
33
|
|
|
37
|
if (defined($val) and ($val !~ /^\d+$/)) { |
364
|
0
|
|
|
|
|
0
|
Carp::carp("$key must be an integer!\n"); |
365
|
0
|
|
|
|
|
0
|
return undef(); |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
|
369
|
4
|
50
|
|
|
|
24
|
if ($opts{cache_dir}) { |
370
|
0
|
0
|
|
|
|
0
|
if (! -d $opts{cache_dir}) { |
371
|
0
|
|
|
|
|
0
|
Carp::carp($opts{cache_dir} . " is not a valid directory: $!"); |
372
|
0
|
|
|
|
|
0
|
return undef(); |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
|
376
|
4
|
50
|
33
|
|
|
81
|
my $self = bless({ |
|
|
|
33
|
|
|
|
|
377
|
|
|
|
|
|
|
cache_dir => $opts{cache_dir} || $DEFAULT_CACHE_DIR, |
378
|
|
|
|
|
|
|
cache_max_age => $opts{cache_max_age} || $DEFAULT_CACHE_MAX_AGE, |
379
|
|
|
|
|
|
|
parse_cache_size => defined($opts{parse_cache_size}) ? $opts{parse_cache_size} : $DEFAULT_PARSE_CACHE_SIZE |
380
|
|
|
|
|
|
|
}, $pkg); |
381
|
|
|
|
|
|
|
|
382
|
4
|
50
|
33
|
|
|
24
|
if ($self->updateDB and $self->_loadDB()) { |
383
|
4
|
|
|
|
|
36
|
return $self; |
384
|
|
|
|
|
|
|
} else { |
385
|
0
|
|
|
|
|
0
|
return undef(); |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
|
389
|
2
|
|
|
2
|
0
|
10
|
sub robots($) { $_[0]->{robots} } |
390
|
2
|
|
|
2
|
0
|
761
|
sub browser_reg($) { $_[0]->{browser_reg} } |
391
|
2
|
|
|
2
|
0
|
11
|
sub os_reg($) { $_[0]->{os_reg} } |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
sub getBrowser($$) { |
394
|
0
|
|
|
0
|
0
|
0
|
my ($self, $browser_id) = @_; |
395
|
0
|
|
|
|
|
0
|
my $bos = $self->{browser_os}[$browser_id]; |
396
|
0
|
|
|
|
|
0
|
my $os; |
397
|
0
|
0
|
|
|
|
0
|
$os = $self->getOS($bos->[0]) if (defined($bos)); |
398
|
0
|
|
|
|
|
0
|
return HTTP::UserAgentString::Browser->new($self->{browser}[$browser_id], "", "", $os); |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
sub getOS($$) { |
402
|
0
|
|
|
0
|
0
|
0
|
my ($self, $os_id) = @_; |
403
|
0
|
|
|
|
|
0
|
return HTTP::UserAgentString::OS->new($self->{os}[$os_id]); |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
# Real parsing with no cache checking |
407
|
|
|
|
|
|
|
sub _parse($$) { |
408
|
3
|
|
|
3
|
|
7
|
my ($self, $string) = @_; |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
# First we check whether it is a robot |
411
|
3
|
100
|
|
|
|
22
|
if (defined(my $robot = $self->{robot_index}{$string})) { |
412
|
1
|
|
|
|
|
3
|
return $robot; |
413
|
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
# Now we check browser regexes |
416
|
2
|
|
|
|
|
4
|
my $idx = 0; |
417
|
2
|
|
|
|
|
5
|
foreach my $br (grep { defined($_) } @{$self->{browser_reg}}) { |
|
1258
|
|
|
|
|
1430
|
|
|
2
|
|
|
|
|
13
|
|
418
|
359
|
|
|
|
|
488
|
my ($regex, $browser_id) = @$br; |
419
|
359
|
100
|
|
|
|
1452
|
if ($string =~ $regex) { |
420
|
2
|
|
|
|
|
10
|
my $version = $1; |
421
|
2
|
|
|
|
|
12
|
my $browser = $self->{browser}[$browser_id]; |
422
|
2
|
|
|
|
|
6
|
my $typeDesc; |
423
|
2
|
|
|
|
|
6
|
my $type = $browser->[0]; |
424
|
2
|
50
|
|
|
|
13
|
if (defined($self->{browser_type}[$type])) { |
425
|
2
|
|
|
|
|
7
|
$typeDesc = $self->{browser_type}[$type][0]; |
426
|
|
|
|
|
|
|
} |
427
|
2
|
|
|
|
|
9
|
my $bos = $self->{browser_os}[$browser_id]; |
428
|
2
|
|
|
|
|
5
|
my $os_id; |
429
|
2
|
50
|
|
|
|
7
|
$os_id = $bos->[0] if (defined($bos)); |
430
|
2
|
|
|
|
|
5
|
my $os; |
431
|
|
|
|
|
|
|
|
432
|
2
|
50
|
|
|
|
10
|
if (! defined($os_id)) { |
433
|
|
|
|
|
|
|
# Use regexes to search lookup OS |
434
|
2
|
|
|
|
|
7
|
OS: foreach my $or (grep { defined($_) } @{$self->{os_reg}}) { |
|
438
|
|
|
|
|
527
|
|
|
2
|
|
|
|
|
8
|
|
435
|
223
|
|
|
|
|
377
|
my ($osregex, $id) = @$or; |
436
|
223
|
100
|
|
|
|
1145
|
if ($string =~ $osregex) { |
437
|
2
|
|
|
|
|
7
|
$os_id = $id; |
438
|
2
|
|
|
|
|
8
|
last OS; |
439
|
|
|
|
|
|
|
} |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
|
443
|
2
|
50
|
33
|
|
|
27
|
if (defined($os_id) and defined($self->{os}[$os_id])) { |
444
|
2
|
|
|
|
|
37
|
$os = HTTP::UserAgentString::OS->new($self->{os}[$os_id]); |
445
|
|
|
|
|
|
|
} |
446
|
|
|
|
|
|
|
|
447
|
2
|
|
|
|
|
28
|
return HTTP::UserAgentString::Browser->new($browser, $typeDesc, $version, $os); |
448
|
|
|
|
|
|
|
} |
449
|
357
|
|
|
|
|
414
|
$idx++; |
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
|
452
|
0
|
|
|
|
|
0
|
return undef(); |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
sub parse($$) { |
456
|
7
|
|
|
7
|
1
|
15794
|
my ($self, $string) = @_; |
457
|
|
|
|
|
|
|
|
458
|
7
|
|
|
|
|
16
|
my $obj; |
459
|
7
|
100
|
|
|
|
297
|
if (exists $self->{parse_cache}{$string}) { |
460
|
4
|
|
|
|
|
14
|
$obj = $self->{parse_cache}{$string}; |
461
|
|
|
|
|
|
|
} else { |
462
|
3
|
|
|
|
|
18
|
$obj = $self->_parse($string); |
463
|
|
|
|
|
|
|
# Cache it if we have enough space |
464
|
3
|
50
|
|
|
|
23
|
if ($self->{parse_cache_count} < $self->{parse_cache_size}) { |
465
|
3
|
|
|
|
|
7
|
$self->{parse_cache_count}++; |
466
|
3
|
|
|
|
|
15
|
$self->{parse_cache}{$string} = $obj; |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
} |
469
|
|
|
|
|
|
|
|
470
|
7
|
|
|
|
|
23
|
return $obj; |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
1; |
474
|
|
|
|
|
|
|
|