File Coverage

blib/lib/HTTP/Async/Polite.pm
Criterion Covered Total %
statement 74 74 100.0
branch 8 8 100.0
condition 4 4 100.0
subroutine 13 13 100.0
pod 4 4 100.0
total 103 103 100.0


line stmt bran cond sub pod time code
1 3     3   71610 use strict;
  3         5  
  3         71  
2 3     3   11 use warnings;
  3         1  
  3         86  
3              
4             package HTTP::Async::Polite;
5 3     3   9 use base 'HTTP::Async';
  3         3  
  3         873  
6              
7             our $VERSION = '0.33';
8              
9 3     3   10 use Carp;
  3         3  
  3         124  
10 3     3   10 use Data::Dumper;
  3         2  
  3         112  
11 3     3   9 use Time::HiRes qw( time sleep );
  3         3  
  3         12  
12 3     3   229 use URI;
  3         3  
  3         1150  
13              
14             =head1 NAME
15              
16             HTTP::Async::Polite - politely process multiple HTTP requests
17              
18             =head1 SYNOPSIS
19              
20             See L - the usage is unchanged.
21              
22             =head1 DESCRIPTION
23              
24             This L module allows you to have many requests going on at once.
25             This can be very rude if you are fetching several pages from the same domain.
26             This module add limits to the number of simultaneous requests to a given
27             domain and adds an interval between the requests.
28              
29             In all other ways it is identical in use to the original L.
30              
31             =head1 NEW METHODS
32              
33             =head2 send_interval
34              
35             Getter and setter for the C - the time in seconds to leave
36             between each request for a given domain. By default this is set to 5 seconds.
37              
38             =cut
39              
40             sub send_interval {
41 6     6 1 12 my $self = shift;
42             return scalar @_
43 6 100       34 ? $self->_set_opt( 'send_interval', @_ )
44             : $self->_get_opt('send_interval');
45             }
46              
47             =head1 OVERLOADED METHODS
48              
49             These methods are overloaded but otherwise work exactly as the original
50             methods did. The docs here just describe what they do differently.
51              
52             =head2 new
53              
54             Sets the C value to the default of 5 seconds.
55              
56             =cut
57              
58             sub new {
59 6     6 1 35 my $class = shift;
60              
61 6         31 my $self = $class->SUPER::new;
62              
63             # Set the interval between sends.
64 6         16 $self->{opts}{send_interval} = 5; # seconds
65 6         18 $class->_add_get_set_key('send_interval');
66              
67 6         11 $self->_init(@_);
68              
69 6         7 return $self;
70             }
71              
72             =head2 add_with_opts
73              
74             Adds the request to the correct queue depending on the domain.
75              
76             =cut
77              
78             sub add_with_opts {
79 7     7 1 6 my $self = shift;
80 7         5 my $req = shift;
81 7         6 my $opts = shift;
82 7         26 my $id = $self->_next_id;
83              
84             # Instead of putting this request and opts directly onto the to_send array
85             # instead get the domain and add it to the domain's queue. Store this
86             # domain with the opts so that it is easy to get at.
87 7         18 my $uri = URI->new( $req->uri );
88 7         378 my $host = $uri->host;
89 7         148 my $port = $uri->port;
90 7         112 my $domain = "$host:$port";
91 7         20 $opts->{_domain} = $domain;
92              
93             # Get the domain array - create it if needed.
94 7   100     42 my $domain_arrayref = $self->{domain_stats}{$domain}{to_send} ||= [];
95              
96 7         9 push @{$domain_arrayref}, [ $req, $id ];
  7         12  
97 7         23 $self->{id_opts}{$id} = $opts;
98              
99 7         19 $self->poke;
100              
101 7         20 return $id;
102             }
103              
104             =head2 to_send_count
105              
106             Returns the number of requests waiting to be sent. This is the number in the
107             actual queue plus the number in each domain specific queue.
108              
109             =cut
110              
111             sub to_send_count {
112 200     200 1 389 my $self = shift;
113 200         923 $self->poke;
114              
115 200         183 my $count = scalar @{ $$self{to_send} };
  200         383  
116              
117 390         577 $count += scalar @{ $self->{domain_stats}{$_}{to_send} }
118 200         217 for keys %{ $self->{domain_stats} };
  200         566  
119              
120 200         781 return $count;
121             }
122              
123             sub _process_to_send {
124 605     605   517 my $self = shift;
125              
126             # Go through the domain specific queues and add all requests that we can
127             # to the real queue.
128 605         418 foreach my $domain ( keys %{ $self->{domain_stats} } ) {
  605         1441  
129              
130 1177         1037 my $domain_stats = $self->{domain_stats}{$domain};
131 1177 100       718 next unless scalar @{ $domain_stats->{to_send} };
  1177         2020  
132              
133             # warn "TRYING TO ADD REQUEST FOR $domain";
134             # warn sleep 5;
135              
136             # Check that this request is good to go.
137 966 100       1471 next if $domain_stats->{count};
138 933 100 100     2910 next unless time > ( $domain_stats->{next_send} || 0 );
139              
140             # We can add this request.
141 7         14 $domain_stats->{count}++;
142 7         19 push @{ $self->{to_send} }, shift @{ $domain_stats->{to_send} };
  7         11  
  7         27  
143             }
144              
145             # Use the original to send the requests on the queue.
146 605         1615 return $self->SUPER::_process_to_send;
147             }
148              
149             sub _add_to_return_queue {
150 7     7   6 my $self = shift;
151 7         8 my $req_and_id = shift;
152              
153             # decrement the count for this domain so that another request can start.
154             # Also set the interval so that we don't scrape too fast.
155 7         10 my $id = $req_and_id->[1];
156 7         11 my $domain = $self->{id_opts}{$id}{_domain};
157 7         9 my $domain_stat = $self->{domain_stats}{$domain};
158 7         14 my $interval = $self->_get_opt( 'send_interval', $id );
159              
160 7         10 $domain_stat->{count}--;
161 7         20 $domain_stat->{next_send} = time + $interval;
162              
163 7         26 return $self->SUPER::_add_to_return_queue($req_and_id);
164             }
165              
166             =head1 SEE ALSO
167              
168             L - the module that this one is based on.
169              
170             =head1 AUTHOR
171              
172             Edmund von der Burg C<< >>.
173              
174             L
175              
176             =head1 LICENCE AND COPYRIGHT
177              
178             Copyright (c) 2006, Edmund von der Burg C<< >>.
179             All rights reserved.
180              
181             This module is free software; you can redistribute it and/or modify it under
182             the same terms as Perl itself.
183              
184             =head1 DISCLAIMER OF WARRANTY
185              
186             BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE
187             SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE
188             STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE
189             SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED,
190             INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
191             FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND
192             PERFORMANCE OF THE SOFTWARE IS WITH YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE,
193             YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR, OR CORRECTION.
194              
195             IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY
196             COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE
197             SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE LIABLE TO YOU FOR DAMAGES,
198             INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING
199             OUT OF THE USE OR INABILITY TO USE THE SOFTWARE (INCLUDING BUT NOT LIMITED TO
200             LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR
201             THIRD PARTIES OR A FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER
202             SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
203             POSSIBILITY OF SUCH DAMAGES.
204              
205             =cut
206              
207             1;
208