File Coverage

blib/lib/AnyEvent/Google/PageRank.pm
Criterion Covered Total %
statement 51 51 100.0
branch 17 22 77.2
condition 6 10 60.0
subroutine 10 10 100.0
pod 3 3 100.0
total 87 96 90.6


line stmt bran cond sub pod time code
1             package AnyEvent::Google::PageRank;
2              
3             =head1 NAME
4              
5             AnyEvent::Google::PageRank - Non-blocking wrapper for WWW::Google::PageRank
6              
7             =cut
8              
9 8     8   222136 use AnyEvent::HTTP;
  8         367885  
  8         821  
10 8     8   8464 use URI::Escape;
  8         13387  
  8         554  
11 8     8   54 use Carp;
  8         12  
  8         487  
12 8     8   39 use base 'Exporter';
  8         15  
  8         679  
13 8     8   38 use strict;
  8         14  
  8         852  
14             {
15             # we really do not need LWP, but WWW::Google::PageRank uses it
16             # let's lie that LWP::UserAgent already loaded
17             local $INC{'LWP/UserAgent.pm'} = 1;
18             require WWW::Google::PageRank;
19             }
20              
21             =head1 SYNOPSIS
22              
23             =over
24              
25             =item Object-oriented interface
26              
27             use AnyEvent::Google::PageRank;
28             use AnyEvent;
29            
30             my @urls = qw(http://perl.org http://cpan.org http://perlmonks.org);
31             my $rank = AnyEvent::Google::PageRank->new(
32             timeout => 10,
33             proxy => 'localhost:3128'
34             );
35              
36             my $cv = AnyEvent->condvar;
37             $cv->begin for @urls;
38              
39             foreach my $url (@urls) {
40             $rank->get($url, sub {
41             my ($rank, $headers) = @_;
42             print "$url - ", defined($rank) ? $rank : "fail: $headers->{Status} - $headers->{Reason}", "\n";
43             $cv->end;
44             });
45             }
46              
47             $cv->recv;
48              
49             =item Procedural interface
50              
51             use AnyEvent::Google::PageRank qw(rank_get);
52             use AnyEvent;
53              
54             my @urls = qw(http://perl.org http://cpan.org http://perlmonks.org);
55             my $cv = AnyEvent->condvar;
56             $cv->begin for @urls;
57              
58             foreach my $url (@urls) {
59             rank_get $url, timeout => 10, proxy => 'localhost:3128', sub {
60             my ($rank, $headers) = @_;
61             print "$url - ", defined($rank) ? $rank : "fail: $headers->{Status} - $headers->{Reason}", "\n";
62             $cv->end;
63             };
64             }
65              
66             $cv->recv;
67              
68             =back
69              
70             =cut
71              
72             =head1 DESCRIPTION
73              
74             AnyEvent::Google::PageRank helps to get google pagerank for specified url, like WWW::Google::PageRank
75             does. But in contrast to WWW::Google::PageRank you can perform many requests in parallel. This module
76             uses AnyEvent::HTTP as HTTP client.
77              
78             =head1 EXPORT
79              
80             =over
81              
82             =item rank_get() - on request
83              
84             =back
85              
86             =cut
87              
88             our $VERSION = '0.04';
89             our @EXPORT_OK = qw(rank_get);
90              
91             use constant {
92 8         6156 DEFAULT_AGENT => 'Mozilla/4.0 (compatible; GoogleToolbar 2.0.111-big; Windows XP 5.1)',
93             DEFAULT_HOST => 'toolbarqueries.google.com',
94 8     8   41 };
  8         9  
95              
96             =head1 METHODS
97              
98             =head2 new(%opts)
99              
100             Creates new AnyEvent::Google::PageRank object. The following options available (all are optional):
101              
102             KEY DESCRIPTION DEFAULT
103             ------------------------------------------------------------------
104             agent User-Agent value in the headers Mozilla/4.0 (compatible; GoogleToolbar 2.0.111-big; Windows XP 5.1)
105             proxy http proxy as address:port undef
106             timeout timeout for network operations AnyEvent::HTTP default timeout
107             host host for query toolbarqueries.google.com
108             ae_http AnyEvent::HTTP request options as hashref undef
109              
110             =cut
111              
112             sub new {
113 6     6 1 8409 my ($class, %opts) = @_;
114            
115 6         36 my $self = {};
116 6   50     100 $self->{agent} = delete($opts{agent}) || DEFAULT_AGENT;
117 6         24 $self->{timeout} = delete($opts{timeout});
118 6         34 $self->{proxy} = delete($opts{proxy});
119 6         28 $self->{host} = delete($opts{host});
120 6         33 $self->{ae_http} = delete($opts{ae_http});
121            
122 6 100       33 if (%opts) {
123 1         285 croak 'Unrecognized options specified: ', join(', ', keys %opts);
124             }
125            
126 5         115 bless $self, $class;
127             }
128              
129             =head2 get($url, $cb->($rank, $headers))
130              
131             Get rank for specified url and call specified callback on finish. Parameters for callback are:
132             rank and headers. On fail rank will be undef and reason could be found in $headers->{Reason},
133             code in $headers->{Status}. Special codes provided by this module are:
134              
135             695 - malformed url
136              
137             For other codes see L
138              
139             =cut
140              
141             sub get {
142 7     7 1 29 my ($self, $url, $cb) = @_;
143            
144 7 50       82 croak 'Not a code reference in $cb'
145             if ref($cb) ne 'CODE';
146            
147 7 100       115 return $cb->(undef, {Status => 695, Reason => 'malformed url'}) if $url !~ m[^https?://]i;
148            
149 6         64 my $ch = '6' . WWW::Google::PageRank::_compute_ch_new('info:' . $url);
150 6   100     4577 my $query = 'http://' . ($self->{host}||DEFAULT_HOST) . '/tbr?client=navclient-auto&ch=' . $ch .
151             '&ie=UTF-8&oe=UTF-8&features=Rank&q=info:' . uri_escape($url);
152            
153 6         377 my $opts = {};
154 6 100       27 if (ref($self) eq 'HASH') {
155             # call from rank_get
156 3         6 $opts = $self;
157 3 50 33     17 $opts->{proxy} = [split /:/, $opts->{proxy}] if defined $opts->{proxy} && index($opts->{proxy}, ':') != -1;
158 3 50       28 $opts->{headers}{'User-Agent'} = exists($opts->{agent}) ? $opts->{agent} : DEFAULT_AGENT;
159             }
160             else {
161             # object call
162 3 100       18 %$opts = %{$self->{ae_http}} if ref($self->{ae_http}) eq 'HASH';
  1         6  
163 3 100       12 $opts->{timeout} = $self->{timeout} if defined $self->{timeout};
164 3 50       4148 $opts->{proxy} = [split /:/, $self->{proxy}] if defined $self->{proxy};
165 3 50       30 $opts->{headers}{'User-Agent'} = $self->{agent} if defined $self->{agent};
166             }
167            
168             http_get $query, %$opts, sub {
169 6     6   10121497 my ($data, $headers) = @_;
170            
171 6 100 66     193 if ($headers->{Status} =~ /^2/ && $data =~ /Rank_\d+:\d+:(\d+)/) {
172 4         29 $cb->($1, $headers);
173             }
174             else {
175 2         12 $cb->(undef, $headers);
176             }
177 6         100 };
178             }
179              
180             =head1 FUNCTIONS
181              
182             =head2 rank_get($url, key => val, ..., $cb->($rank, $headers))
183              
184             Get rank for specified url and call specified callback on finish. Key/value pairs
185             are options understanded by AnyEvent::HTTP::http_request() and new() method of this
186             module (except ae_http option). For $cb description see get() method.
187              
188             =cut
189              
190             sub rank_get {
191 4     4 1 5757 my $cb = pop @_;
192 4         24 my ($url, %opts) = @_;
193 4         18 get(\%opts, $url, $cb);
194             }
195              
196             1;
197              
198             =head1 BUGS
199              
200             Not a bug: don't forget to set $AnyEvent::HTTP::MAX_PER_HOST to proper value.
201             See L for details.
202              
203             If you find any bug, please report.
204              
205             =head1 SEE ALSO
206              
207             L, L
208              
209             =head1 COPYRIGHT
210              
211             Copyright Oleg G .
212              
213             This library is free software; you can redistribute it and/or
214             modify it under the same terms as Perl itself.