File Coverage

blib/lib/AnyEvent/Google/PageRank.pm
Criterion Covered Total %
statement 51 51 100.0
branch 17 22 77.2
condition 5 10 50.0
subroutine 10 10 100.0
pod 3 3 100.0
total 86 96 89.5


line stmt bran cond sub pod time code
1             package AnyEvent::Google::PageRank;
2              
3             =head1 NAME
4              
5             AnyEvent::Google::PageRank - Non-blocking wrapper for WWW::Google::PageRank
6              
7             =cut
8              
9 8     8   550790 use AnyEvent::HTTP;
  8         283659  
  8         558  
10 8     8   3700 use URI::Escape;
  8         11934  
  8         487  
11 8     8   61 use Carp;
  8         18  
  8         480  
12 8     8   58 use base 'Exporter';
  8         16  
  8         708  
13 8     8   49 use strict;
  8         16  
  8         825  
14             {
15             # we really do not need LWP, but WWW::Google::PageRank uses it
16             # let's lie that LWP::UserAgent already loaded
17             local $INC{'LWP/UserAgent.pm'} = 1;
18             require WWW::Google::PageRank;
19             }
20              
21             =head1 SYNOPSIS
22              
23             =over
24              
25             =item Object-oriented interface
26              
27             use AnyEvent::Google::PageRank;
28             use AnyEvent;
29            
30             my @urls = qw(http://perl.org http://cpan.org http://perlmonks.org);
31             my $rank = AnyEvent::Google::PageRank->new(
32             timeout => 10,
33             proxy => 'localhost:3128'
34             );
35              
36             my $cv = AnyEvent->condvar;
37             $cv->begin for @urls;
38              
39             foreach my $url (@urls) {
40             $rank->get($url, sub {
41             my ($rank, $headers) = @_;
42             print "$url - ", defined($rank) ? $rank : "fail: $headers->{Status} - $headers->{Reason}", "\n";
43             $cv->end;
44             });
45             }
46              
47             $cv->recv;
48              
49             =item Procedural interface
50              
51             use AnyEvent::Google::PageRank qw(rank_get);
52             use AnyEvent;
53              
54             my @urls = qw(http://perl.org http://cpan.org http://perlmonks.org);
55             my $cv = AnyEvent->condvar;
56             $cv->begin for @urls;
57              
58             foreach my $url (@urls) {
59             rank_get $url, timeout => 10, proxy => 'localhost:3128', sub {
60             my ($rank, $headers) = @_;
61             print "$url - ", defined($rank) ? $rank : "fail: $headers->{Status} - $headers->{Reason}", "\n";
62             $cv->end;
63             };
64             }
65              
66             $cv->recv;
67              
68             =back
69              
70             =cut
71              
72             =head1 CAUTION
73              
74             In 2016 Google officially closed public PageRank service. So, this module no longer works.
75              
76             =head1 DESCRIPTION
77              
78             AnyEvent::Google::PageRank helps to get google pagerank for specified url, like WWW::Google::PageRank
79             does. But in contrast to WWW::Google::PageRank you can perform many requests in parallel. This module
80             uses AnyEvent::HTTP as HTTP client.
81              
82             =head1 EXPORT
83              
84             =over
85              
86             =item rank_get() - on request
87              
88             =back
89              
90             =cut
91              
92             our $VERSION = '0.05';
93             our @EXPORT_OK = qw(rank_get);
94              
95             use constant {
96 8         6160 DEFAULT_AGENT => 'Mozilla/4.0 (compatible; GoogleToolbar 2.0.111-big; Windows XP 5.1)',
97             DEFAULT_HOST => 'toolbarqueries.google.com',
98 8     8   71 };
  8         12  
99              
100             =head1 METHODS
101              
102             =head2 new(%opts)
103              
104             Creates new AnyEvent::Google::PageRank object. The following options available (all are optional):
105              
106             KEY DESCRIPTION DEFAULT
107             ------------------------------------------------------------------
108             agent User-Agent value in the headers Mozilla/4.0 (compatible; GoogleToolbar 2.0.111-big; Windows XP 5.1)
109             proxy http proxy as address:port undef
110             timeout timeout for network operations AnyEvent::HTTP default timeout
111             host host for query toolbarqueries.google.com
112             ae_http AnyEvent::HTTP request options as hashref undef
113              
114             =cut
115              
116             sub new {
117 6     6 1 7940 my ($class, %opts) = @_;
118            
119 6         31 my $self = {};
120 6   50     94 $self->{agent} = delete($opts{agent}) || DEFAULT_AGENT;
121 6         17 $self->{timeout} = delete($opts{timeout});
122 6         44 $self->{proxy} = delete($opts{proxy});
123 6         42 $self->{host} = delete($opts{host});
124 6         24 $self->{ae_http} = delete($opts{ae_http});
125            
126 6 100       26 if (%opts) {
127 1         263 croak 'Unrecognized options specified: ', join(', ', keys %opts);
128             }
129            
130 5         138 bless $self, $class;
131             }
132              
133             =head2 get($url, $cb->($rank, $headers))
134              
135             Get rank for specified url and call specified callback on finish. Parameters for callback are:
136             rank and headers. On fail rank will be undef and reason could be found in $headers->{Reason},
137             code in $headers->{Status}. Special codes provided by this module are:
138              
139             695 - malformed url
140              
141             For other codes see L
142              
143             =cut
144              
145             sub get {
146 6     6 1 24 my ($self, $url, $cb) = @_;
147            
148 6 50       36 croak 'Not a code reference in $cb'
149             if ref($cb) ne 'CODE';
150            
151 6 100       111 return $cb->(undef, {Status => 695, Reason => 'malformed url'}) if $url !~ m[^https?://]i;
152            
153 5         97 my $ch = '6' . WWW::Google::PageRank::_compute_ch_new('info:' . $url);
154 5   50     4274 my $query = 'http://' . ($self->{host}||DEFAULT_HOST) . '/tbr?client=navclient-auto&ch=' . $ch .
155             '&ie=UTF-8&oe=UTF-8&features=Rank&q=info:' . uri_escape($url);
156            
157 5         272 my $opts = {};
158 5 100       20 if (ref($self) eq 'HASH') {
159             # call from rank_get
160 2         6 $opts = $self;
161 2 50 33     8 $opts->{proxy} = [split /:/, $opts->{proxy}] if defined $opts->{proxy} && index($opts->{proxy}, ':') != -1;
162 2 50       8 $opts->{headers}{'User-Agent'} = exists($opts->{agent}) ? $opts->{agent} : DEFAULT_AGENT;
163             }
164             else {
165             # object call
166 3 100       21 %$opts = %{$self->{ae_http}} if ref($self->{ae_http}) eq 'HASH';
  1         5  
167 3 100       26 $opts->{timeout} = $self->{timeout} if defined $self->{timeout};
168 3 50       10 $opts->{proxy} = [split /:/, $self->{proxy}] if defined $self->{proxy};
169 3 50       25 $opts->{headers}{'User-Agent'} = $self->{agent} if defined $self->{agent};
170             }
171            
172             http_get $query, %$opts, sub {
173 5     5   10002926 my ($data, $headers) = @_;
174            
175 5 100 66     104 if ($headers->{Status} =~ /^2/ && $data =~ /Rank_\d+:\d+:(\d+)/) {
176 3         25 $cb->($1, $headers);
177             }
178             else {
179 2         16 $cb->(undef, $headers);
180             }
181 5         77 };
182             }
183              
184             =head1 FUNCTIONS
185              
186             =head2 rank_get($url, key => val, ..., $cb->($rank, $headers))
187              
188             Get rank for specified url and call specified callback on finish. Key/value pairs
189             are options understanded by AnyEvent::HTTP::http_request() and new() method of this
190             module (except ae_http option). For $cb description see get() method.
191              
192             =cut
193              
194             sub rank_get {
195 3     3 1 10 my $cb = pop @_;
196 3         19 my ($url, %opts) = @_;
197 3         9 get(\%opts, $url, $cb);
198             }
199              
200             1;
201              
202             =head1 BUGS
203              
204             Not a bug: don't forget to set $AnyEvent::HTTP::MAX_PER_HOST to proper value.
205             See L for details.
206              
207             If you find any bug, please report.
208              
209             =head1 SEE ALSO
210              
211             L, L
212              
213             =head1 COPYRIGHT
214              
215             Copyright Oleg G .
216              
217             This library is free software; you can redistribute it and/or
218             modify it under the same terms as Perl itself.