line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTTP::BrowserDetect::isRobot; |
2
|
|
|
|
|
|
|
{ |
3
|
|
|
|
|
|
|
$HTTP::BrowserDetect::isRobot::VERSION = '0.06'; |
4
|
|
|
|
|
|
|
} |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
# ABSTRACT: test if the user-agent is a robot or not |
7
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
649
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
30
|
|
9
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
26
|
|
10
|
1
|
|
|
1
|
|
4
|
use base 'Exporter'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
115
|
|
11
|
1
|
|
|
1
|
|
4
|
use vars qw/@EXPORT_OK/; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
1487
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
@EXPORT_OK = qw/is_robot is_site_robot is_program_robot/; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
sub is_robot { |
16
|
9
|
|
|
9
|
1
|
3384
|
my ($agent) = @_; |
17
|
|
|
|
|
|
|
|
18
|
9
|
100
|
|
|
|
20
|
return 1 if is_site_robot($agent); |
19
|
7
|
100
|
|
|
|
15
|
return 1 if is_program_robot($agent); |
20
|
|
|
|
|
|
|
|
21
|
2
|
|
|
|
|
7
|
return; |
22
|
|
|
|
|
|
|
} |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
sub is_site_robot { |
25
|
11
|
|
|
11
|
1
|
15
|
my ($agent) = @_; |
26
|
|
|
|
|
|
|
|
27
|
11
|
100
|
|
|
|
196
|
return 1 |
28
|
|
|
|
|
|
|
if $agent =~ |
29
|
|
|
|
|
|
|
/Googlebot|Baiduspider|Yahoo! Slurp|Bingbot|MSNbot|altavista|lycos|infoseek|webcrawler|lecodechecker|Ask Jeeves|facebookexternalhit|adsbot-google|ia_archive|FatBot|Xenu Link Sleuth|BlitzBOT|btbot|CatchBot|Charlotte|Discobot|FAST-WebCrawler|FurlBot|Gaisbot|iaskspider|Mediapartners-Google|Seekbot|SEOChat|SeznamBot|Sitebot|sogou spider|Sosospider|TweetedTimes|YahooSeeker|YandexBot|Yeti|YodaoBot|YoudaoBot|ZyBorg|Twitterbot|AhrefsBot|TweetedTimes Bot|TweetmemeBot|bitlybot|ShowyouBot|UnwindFetchor|MetaURI API|PaperLiBot|LinkedInBot|AddThis\.com robot|FriendFeedBot|MnoGoSearch|sistrix|MJ12bot|EZooms|UnisterBot|SiteExplorer|Exabot|Infohelfer|AcoonBot|Pixray-Seeker|emefgebot|Snipebot|Dataprovider Site Explorer|iBusiness Shopcrawler|pmoz\.info|Toplistbot|findlinks|netEstate NE Crawler|Crawler for Netopian|msnbot|webalta|suchen\.de|depspid|gigabot|3GSE bot|IRLbot|cuil\.com|Gigameme\.bot|BotOnParade|Crawly|infometrics-bot|Kaloogabot|Speedy Spider|iCcrawler|WebDataCentreBot|LinkWalker|Tagoobot|searchme\.com|Jyxobot|Purebot|Yanga WorldSearch|MSRBOT|VEDENSBOT|Fastsearch|Twiceler|Linguee Bot|ScoutJet/i; |
30
|
7
|
50
|
|
|
|
25
|
return 1 if $agent =~ /^silk/i; |
31
|
|
|
|
|
|
|
|
32
|
7
|
|
|
|
|
18
|
return; |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
sub is_program_robot { |
36
|
12
|
|
|
12
|
1
|
23
|
my ($agent) = @_; |
37
|
|
|
|
|
|
|
|
38
|
12
|
100
|
|
|
|
82
|
return 1 |
39
|
|
|
|
|
|
|
if $agent =~ |
40
|
|
|
|
|
|
|
/libwww-perl|PycURL|EventMachine HttpClient|Apache-HttpClient/; |
41
|
8
|
100
|
|
|
|
53
|
return 1 if $agent =~ m{Python-(\w+)/}i; |
42
|
4
|
100
|
|
|
|
21
|
return 1 if $agent =~ m{^Java/}; |
43
|
2
|
50
|
|
|
|
7
|
return 1 if $agent eq 'Ruby'; |
44
|
|
|
|
|
|
|
|
45
|
2
|
|
|
|
|
6
|
return; |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
1; |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
__END__ |