File Coverage

blib/lib/AnyEvent/HTTP.pm
Criterion Covered Total %
statement 94 365 25.7
branch 35 244 14.3
condition 6 97 6.1
subroutine 16 30 53.3
pod 8 12 66.6
total 159 748 21.2


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client
4              
5             =head1 SYNOPSIS
6              
7             use AnyEvent::HTTP;
8              
9             http_get "http://www.nethype.de/", sub { print $_[1] };
10              
11             # ... do something else here
12              
13             =head1 DESCRIPTION
14              
15             This module is an L user, you need to make sure that you use and
16             run a supported event loop.
17              
18             This module implements a simple, stateless and non-blocking HTTP
19             client. It supports GET, POST and other request methods, cookies and more,
20             all on a very low level. It can follow redirects, supports proxies, and
21             automatically limits the number of connections to the values specified in
22             the RFC.
23              
24             It should generally be a "good client" that is enough for most HTTP
25             tasks. Simple tasks should be simple, but complex tasks should still be
26             possible as the user retains control over request and response headers.
27              
28             The caller is responsible for authentication management, cookies (if
29             the simplistic implementation in this module doesn't suffice), referer
30             and other high-level protocol details for which this module offers only
31             limited support.
32              
33             =head2 METHODS
34              
35             =over 4
36              
37             =cut
38              
39             package AnyEvent::HTTP;
40              
41 3     3   3462 use common::sense;
  3         42  
  3         15  
42              
43 3     3   700 use Errno ();
  3         1450  
  3         69  
44              
45 3     3   1062 use AnyEvent 5.0 ();
  3         5708  
  3         64  
46 3     3   596 use AnyEvent::Util ();
  3         11711  
  3         52  
47 3     3   2184 use AnyEvent::Handle ();
  3         24175  
  3         87  
48              
49 3     3   22 use base Exporter::;
  3         7  
  3         24795  
50              
51             our $VERSION = 2.25;
52              
53             our @EXPORT = qw(http_get http_post http_head http_request);
54              
55             our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
56             our $MAX_RECURSE = 10;
57             our $PERSISTENT_TIMEOUT = 3;
58             our $TIMEOUT = 300;
59             our $MAX_PER_HOST = 4; # changing this is evil
60              
61             our $PROXY;
62             our $ACTIVE = 0;
63              
64             my %KA_CACHE; # indexed by uhost currently, points to [$handle...] array
65             my %CO_SLOT; # number of open connections, and wait queue, per host
66              
67             =item http_get $url, key => value..., $cb->($data, $headers)
68              
69             Executes an HTTP-GET request. See the http_request function for details on
70             additional parameters and the return value.
71              
72             =item http_head $url, key => value..., $cb->($data, $headers)
73              
74             Executes an HTTP-HEAD request. See the http_request function for details
75             on additional parameters and the return value.
76              
77             =item http_post $url, $body, key => value..., $cb->($data, $headers)
78              
79             Executes an HTTP-POST request with a request body of C<$body>. See the
80             http_request function for details on additional parameters and the return
81             value.
82              
83             =item http_request $method => $url, key => value..., $cb->($data, $headers)
84              
85             Executes a HTTP request of type C<$method> (e.g. C, C). The URL
86             must be an absolute http or https URL.
87              
88             When called in void context, nothing is returned. In other contexts,
89             C returns a "cancellation guard" - you have to keep the
90             object at least alive until the callback get called. If the object gets
91             destroyed before the callback is called, the request will be cancelled.
92              
93             The callback will be called with the response body data as first argument
94             (or C if an error occurred), and a hash-ref with response headers
95             (and trailers) as second argument.
96              
97             All the headers in that hash are lowercased. In addition to the response
98             headers, the "pseudo-headers" (uppercase to avoid clashing with possible
99             response headers) C, C and C contain the
100             three parts of the HTTP Status-Line of the same name. If an error occurs
101             during the body phase of a request, then the original C and
102             C values from the header are available as C and
103             C.
104              
105             The pseudo-header C contains the actual URL (which can differ from
106             the requested URL when following redirects - for example, you might get
107             an error that your URL scheme is not supported even though your URL is a
108             valid http URL because it redirected to an ftp URL, in which case you can
109             look at the URL pseudo header).
110              
111             The pseudo-header C only exists when the request was a result
112             of an internal redirect. In that case it is an array reference with
113             the C<($data, $headers)> from the redirect response. Note that this
114             response could in turn be the result of a redirect itself, and C<<
115             $headers->{Redirect}[1]{Redirect} >> will then contain the original
116             response, and so on.
117              
118             If the server sends a header multiple times, then their contents will be
119             joined together with a comma (C<,>), as per the HTTP spec.
120              
121             If an internal error occurs, such as not being able to resolve a hostname,
122             then C<$data> will be C, C<< $headers->{Status} >> will be
123             C<590>-C<599> and the C pseudo-header will contain an error
124             message. Currently the following status codes are used:
125              
126             =over 4
127              
128             =item 595 - errors during connection establishment, proxy handshake.
129              
130             =item 596 - errors during TLS negotiation, request sending and header processing.
131              
132             =item 597 - errors during body receiving or processing.
133              
134             =item 598 - user aborted request via C or C.
135              
136             =item 599 - other, usually nonretryable, errors (garbled URL etc.).
137              
138             =back
139              
140             A typical callback might look like this:
141              
142             sub {
143             my ($body, $hdr) = @_;
144              
145             if ($hdr->{Status} =~ /^2/) {
146             ... everything should be ok
147             } else {
148             print "error, $hdr->{Status} $hdr->{Reason}\n";
149             }
150             }
151              
152             Additional parameters are key-value pairs, and are fully optional. They
153             include:
154              
155             =over 4
156              
157             =item recurse => $count (default: $MAX_RECURSE)
158              
159             Whether to recurse requests or not, e.g. on redirects, authentication and
160             other retries and so on, and how often to do so.
161              
162             Only redirects to http and https URLs are supported. While most common
163             redirection forms are handled entirely within this module, some require
164             the use of the optional L module. If it is required but missing, then
165             the request will fail with an error.
166              
167             =item headers => hashref
168              
169             The request headers to use. Currently, C may provide its own
170             C, C, C and C headers and
171             will provide defaults at least for C, C and C
172             (this can be suppressed by using C for these headers in which case
173             they won't be sent at all).
174              
175             You really should provide your own C header value that is
176             appropriate for your program - I wouldn't be surprised if the default
177             AnyEvent string gets blocked by webservers sooner or later.
178              
179             Also, make sure that your headers names and values do not contain any
180             embedded newlines.
181              
182             =item timeout => $seconds
183              
184             The time-out to use for various stages - each connect attempt will reset
185             the timeout, as will read or write activity, i.e. this is not an overall
186             timeout.
187              
188             Default timeout is 5 minutes.
189              
190             =item proxy => [$host, $port[, $scheme]] or undef
191              
192             Use the given http proxy for all requests, or no proxy if C is
193             used.
194              
195             C<$scheme> must be either missing or must be C for HTTP.
196              
197             If not specified, then the default proxy is used (see
198             C).
199              
200             Currently, if your proxy requires authorization, you have to specify an
201             appropriate "Proxy-Authorization" header in every request.
202              
203             Note that this module will prefer an existing persistent connection,
204             even if that connection was made using another proxy. If you need to
205             ensure that a new connection is made in this case, you can either force
206             C to false or e.g. use the proxy address in your C.
207              
208             =item body => $string
209              
210             The request body, usually empty. Will be sent as-is (future versions of
211             this module might offer more options).
212              
213             =item cookie_jar => $hash_ref
214              
215             Passing this parameter enables (simplified) cookie-processing, loosely
216             based on the original netscape specification.
217              
218             The C<$hash_ref> must be an (initially empty) hash reference which
219             will get updated automatically. It is possible to save the cookie jar
220             to persistent storage with something like JSON or Storable - see the
221             C function if you wish to remove
222             expired or session-only cookies, and also for documentation on the format
223             of the cookie jar.
224              
225             Note that this cookie implementation is not meant to be complete. If
226             you want complete cookie management you have to do that on your
227             own. C is meant as a quick fix to get most cookie-using sites
228             working. Cookies are a privacy disaster, do not use them unless required
229             to.
230              
231             When cookie processing is enabled, the C and C
232             headers will be set and handled by this module, otherwise they will be
233             left untouched.
234              
235             =item tls_ctx => $scheme | $tls_ctx
236              
237             Specifies the AnyEvent::TLS context to be used for https connections. This
238             parameter follows the same rules as the C parameter to
239             L, but additionally, the two strings C or
240             C can be specified, which give you a predefined low-security (no
241             verification, highest compatibility) and high-security (CA and common-name
242             verification) TLS context.
243              
244             The default for this option is C, which could be interpreted as "give
245             me the page, no matter what".
246              
247             See also the C parameter.
248              
249             =item sessionid => $string
250              
251             The module might reuse connections to the same host internally (regardless
252             of other settings, such as C or C). Sometimes (e.g.
253             when using TLS or a specfic proxy), you do not want to reuse connections
254             from other sessions. This can be achieved by setting this parameter to
255             some unique ID (such as the address of an object storing your state data
256             or the TLS context, or the proxy IP) - only connections using the same
257             unique ID will be reused.
258              
259             =item on_prepare => $callback->($fh)
260              
261             In rare cases you need to "tune" the socket before it is used to
262             connect (for example, to bind it on a given IP address). This parameter
263             overrides the prepare callback passed to C
264             and behaves exactly the same way (e.g. it has to provide a
265             timeout). See the description for the C<$prepare_cb> argument of
266             C for details.
267              
268             =item tcp_connect => $callback->($host, $service, $connect_cb, $prepare_cb)
269              
270             In even rarer cases you want total control over how AnyEvent::HTTP
271             establishes connections. Normally it uses L
272             to do this, but you can provide your own C function -
273             obviously, it has to follow the same calling conventions, except that it
274             may always return a connection guard object.
275              
276             The connections made by this hook will be treated as equivalent to
277             connections made the built-in way, specifically, they will be put into
278             and taken from the persistent connection cache. If your C<$tcp_connect>
279             function is incompatible with this kind of re-use, consider switching off
280             C connections and/or providing a C identifier.
281              
282             There are probably lots of weird uses for this function, starting from
283             tracing the hosts C actually tries to connect, to (inexact
284             but fast) host => IP address caching or even socks protocol support.
285              
286             =item on_header => $callback->($headers)
287              
288             When specified, this callback will be called with the header hash as soon
289             as headers have been successfully received from the remote server (not on
290             locally-generated errors).
291              
292             It has to return either true (in which case AnyEvent::HTTP will continue),
293             or false, in which case AnyEvent::HTTP will cancel the download (and call
294             the finish callback with an error code of C<598>).
295              
296             This callback is useful, among other things, to quickly reject unwanted
297             content, which, if it is supposed to be rare, can be faster than first
298             doing a C request.
299              
300             The downside is that cancelling the request makes it impossible to re-use
301             the connection. Also, the C callback will not receive any
302             trailer (headers sent after the response body).
303              
304             Example: cancel the request unless the content-type is "text/html".
305              
306             on_header => sub {
307             $_[0]{"content-type"} =~ /^text\/html\s*(?:;|$)/
308             },
309              
310             =item on_body => $callback->($partial_body, $headers)
311              
312             When specified, all body data will be passed to this callback instead of
313             to the completion callback. The completion callback will get the empty
314             string instead of the body data.
315              
316             It has to return either true (in which case AnyEvent::HTTP will continue),
317             or false, in which case AnyEvent::HTTP will cancel the download (and call
318             the completion callback with an error code of C<598>).
319              
320             The downside to cancelling the request is that it makes it impossible to
321             re-use the connection.
322              
323             This callback is useful when the data is too large to be held in memory
324             (so the callback writes it to a file) or when only some information should
325             be extracted, or when the body should be processed incrementally.
326              
327             It is usually preferred over doing your own body handling via
328             C, but in case of streaming APIs, where HTTP is
329             only used to create a connection, C is the better
330             alternative, as it allows you to install your own event handler, reducing
331             resource usage.
332              
333             =item want_body_handle => $enable
334              
335             When enabled (default is disabled), the behaviour of AnyEvent::HTTP
336             changes considerably: after parsing the headers, and instead of
337             downloading the body (if any), the completion callback will be
338             called. Instead of the C<$body> argument containing the body data, the
339             callback will receive the L object associated with the
340             connection. In error cases, C will be passed. When there is no body
341             (e.g. status C<304>), the empty string will be passed.
342              
343             The handle object might or might not be in TLS mode, might be connected
344             to a proxy, be a persistent connection, use chunked transfer encoding
345             etc., and configured in unspecified ways. The user is responsible for this
346             handle (it will not be used by this module anymore).
347              
348             This is useful with some push-type services, where, after the initial
349             headers, an interactive protocol is used (typical example would be the
350             push-style twitter API which starts a JSON/XML stream).
351              
352             If you think you need this, first have a look at C, to see if
353             that doesn't solve your problem in a better way.
354              
355             =item persistent => $boolean
356              
357             Try to create/reuse a persistent connection. When this flag is set
358             (default: true for idempotent requests, false for all others), then
359             C tries to re-use an existing (previously-created)
360             persistent connection to same host (i.e. identical URL scheme, hostname,
361             port and sessionid) and, failing that, tries to create a new one.
362              
363             Requests failing in certain ways will be automatically retried once, which
364             is dangerous for non-idempotent requests, which is why it defaults to off
365             for them. The reason for this is because the bozos who designed HTTP/1.1
366             made it impossible to distinguish between a fatal error and a normal
367             connection timeout, so you never know whether there was a problem with
368             your request or not.
369              
370             When reusing an existent connection, many parameters (such as TLS context)
371             will be ignored. See the C parameter for a workaround.
372              
373             =item keepalive => $boolean
374              
375             Only used when C is also true. This parameter decides whether
376             C tries to handshake a HTTP/1.0-style keep-alive connection
377             (as opposed to only a HTTP/1.1 persistent connection).
378              
379             The default is true, except when using a proxy, in which case it defaults
380             to false, as HTTP/1.0 proxies cannot support this in a meaningful way.
381              
382             =item handle_params => { key => value ... }
383              
384             The key-value pairs in this hash will be passed to any L
385             constructor that is called - not all requests will create a handle, and
386             sometimes more than one is created, so this parameter is only good for
387             setting hints.
388              
389             Example: set the maximum read size to 4096, to potentially conserve memory
390             at the cost of speed.
391              
392             handle_params => {
393             max_read_size => 4096,
394             },
395              
396             =back
397              
398             Example: do a simple HTTP GET request for http://www.nethype.de/ and print
399             the response body.
400              
401             http_request GET => "http://www.nethype.de/", sub {
402             my ($body, $hdr) = @_;
403             print "$body\n";
404             };
405              
406             Example: do a HTTP HEAD request on https://www.google.com/, use a
407             timeout of 30 seconds.
408              
409             http_request
410             HEAD => "https://www.google.com",
411             headers => { "user-agent" => "MySearchClient 1.0" },
412             timeout => 30,
413             sub {
414             my ($body, $hdr) = @_;
415             use Data::Dumper;
416             print Dumper $hdr;
417             }
418             ;
419              
420             Example: do another simple HTTP GET request, but immediately try to
421             cancel it.
422              
423             my $request = http_request GET => "http://www.nethype.de/", sub {
424             my ($body, $hdr) = @_;
425             print "$body\n";
426             };
427              
428             undef $request;
429              
430             =cut
431              
432             #############################################################################
433             # wait queue/slots
434              
435             sub _slot_schedule;
436             sub _slot_schedule($) {
437 16     16   32 my $host = shift;
438              
439 16         48 while ($CO_SLOT{$host}[0] < $MAX_PER_HOST) {
440 24 100       5329 if (my $cb = shift @{ $CO_SLOT{$host}[1] }) {
  24         73  
441             # somebody wants that slot
442 8         18 ++$CO_SLOT{$host}[0];
443 8         9 ++$ACTIVE;
444              
445             $cb->(AnyEvent::Util::guard {
446 8     8   16 --$ACTIVE;
447 8         17 --$CO_SLOT{$host}[0];
448 8         23 _slot_schedule $host;
449 8         52 });
450             } else {
451             # nobody wants the slot, maybe we can forget about it
452 16 100       61 delete $CO_SLOT{$host} unless $CO_SLOT{$host}[0];
453 16         74 last;
454             }
455             }
456             }
457              
458             # wait for a free slot on host, call callback
459             sub _get_slot($$) {
460 8     8   14 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
  8         31  
461              
462 8         19 _slot_schedule $_[0];
463             }
464              
465             #############################################################################
466             # cookie handling
467              
468             # expire cookies
469             sub cookie_jar_expire($;$) {
470 0     0 1 0 my ($jar, $session_end) = @_;
471              
472 0 0       0 %$jar = () if $jar->{version} != 2;
473              
474 0         0 my $anow = AE::now;
475              
476 0         0 while (my ($chost, $paths) = each %$jar) {
477 0 0       0 next unless ref $paths;
478              
479 0         0 while (my ($cpath, $cookies) = each %$paths) {
480 0         0 while (my ($cookie, $kv) = each %$cookies) {
481 0 0       0 if (exists $kv->{_expires}) {
    0          
482             delete $cookies->{$cookie}
483 0 0       0 if $anow > $kv->{_expires};
484             } elsif ($session_end) {
485 0         0 delete $cookies->{$cookie};
486             }
487             }
488              
489 0 0       0 delete $paths->{$cpath}
490             unless %$cookies;
491             }
492              
493 0 0       0 delete $jar->{$chost}
494             unless %$paths;
495             }
496             }
497            
498             # extract cookies from jar
499             sub cookie_jar_extract($$$$) {
500 0     0 0 0 my ($jar, $scheme, $host, $path) = @_;
501              
502 0 0       0 %$jar = () if $jar->{version} != 2;
503              
504 0 0       0 $host = AnyEvent::Util::idn_to_ascii $host
505             if $host =~ /[^\x00-\x7f]/;
506              
507 0         0 my @cookies;
508              
509 0         0 while (my ($chost, $paths) = each %$jar) {
510 0 0       0 next unless ref $paths;
511              
512             # exact match or suffix including . match
513 0 0 0     0 $chost eq $host or ".$chost" eq substr $host, -1 - length $chost
514             or next;
515              
516 0         0 while (my ($cpath, $cookies) = each %$paths) {
517 0 0       0 next unless $cpath eq substr $path, 0, length $cpath;
518              
519 0         0 while (my ($cookie, $kv) = each %$cookies) {
520 0 0 0     0 next if $scheme ne "https" && exists $kv->{secure};
521              
522 0 0 0     0 if (exists $kv->{_expires} and AE::now > $kv->{_expires}) {
523 0         0 delete $cookies->{$cookie};
524 0         0 next;
525             }
526              
527 0         0 my $value = $kv->{value};
528              
529 0 0       0 if ($value =~ /[=;,[:space:]]/) {
530 0         0 $value =~ s/([\\"])/\\$1/g;
531 0         0 $value = "\"$value\"";
532             }
533              
534 0         0 push @cookies, "$cookie=$value";
535             }
536             }
537             }
538              
539             \@cookies
540 0         0 }
541            
542             # parse set_cookie header into jar
543             sub cookie_jar_set_cookie($$$$) {
544 0     0 0 0 my ($jar, $set_cookie, $host, $date) = @_;
545              
546 0 0       0 %$jar = () if $jar->{version} != 2;
547              
548 0         0 my $anow = int AE::now;
549 0         0 my $snow; # server-now
550              
551 0         0 for ($set_cookie) {
552             # parse NAME=VALUE
553 0         0 my @kv;
554              
555             # expires is not http-compliant in the original cookie-spec,
556             # we support the official date format and some extensions
557 0         0 while (
558             m{
559             \G\s*
560             (?:
561             expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+)
562             | ([^=;,[:space:]]+) (?: \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^;,[:space:]]*) ) )?
563             )
564             }gcxsi
565             ) {
566 0         0 my $name = $2;
567 0         0 my $value = $4;
568              
569 0 0       0 if (defined $1) {
    0          
570             # expires
571 0         0 $name = "expires";
572 0         0 $value = $1;
573             } elsif (defined $3) {
574             # quoted
575 0         0 $value = $3;
576 0         0 $value =~ s/\\(.)/$1/gs;
577             }
578              
579 0 0       0 push @kv, @kv ? lc $name : $name, $value;
580              
581 0 0       0 last unless /\G\s*;/gc;
582             }
583              
584 0 0       0 last unless @kv;
585              
586 0         0 my $name = shift @kv;
587 0         0 my %kv = (value => shift @kv, @kv);
588              
589 0 0       0 if (exists $kv{"max-age"}) {
    0          
590 0         0 $kv{_expires} = $anow + delete $kv{"max-age"};
591             } elsif (exists $kv{expires}) {
592 0   0     0 $snow ||= parse_date ($date) || $anow;
      0        
593 0         0 $kv{_expires} = $anow + (parse_date (delete $kv{expires}) - $snow);
594             } else {
595 0         0 delete $kv{_expires};
596             }
597              
598 0         0 my $cdom;
599 0   0     0 my $cpath = (delete $kv{path}) || "/";
600              
601 0 0       0 if (exists $kv{domain}) {
602 0         0 $cdom = $kv{domain};
603              
604 0         0 $cdom =~ s/^\.?/./; # make sure it starts with a "."
605              
606 0 0       0 next if $cdom =~ /\.$/;
607              
608             # this is not rfc-like and not netscape-like. go figure.
609 0         0 my $ndots = $cdom =~ y/.//;
610 0 0       0 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
    0          
611              
612 0         0 $cdom = substr $cdom, 1; # remove initial .
613             } else {
614 0         0 $cdom = $host;
615             }
616              
617             # store it
618 0         0 $jar->{version} = 2;
619 0         0 $jar->{lc $cdom}{$cpath}{$name} = \%kv;
620              
621 0 0       0 redo if /\G\s*,/gc;
622             }
623             }
624              
625             #############################################################################
626             # keepalive/persistent connection cache
627              
628             # fetch a connection from the keepalive cache
629             sub ka_fetch($) {
630 0     0 0 0 my $ka_key = shift;
631              
632 0         0 my $hdl = pop @{ $KA_CACHE{$ka_key} }; # currently we reuse the MOST RECENTLY USED connection
  0         0  
633             delete $KA_CACHE{$ka_key}
634 0 0       0 unless @{ $KA_CACHE{$ka_key} };
  0         0  
635              
636 0         0 $hdl
637             }
638              
639             sub ka_store($$) {
640 0     0 0 0 my ($ka_key, $hdl) = @_;
641              
642 0   0     0 my $kaa = $KA_CACHE{$ka_key} ||= [];
643              
644             my $destroy = sub {
645 0     0   0 my @ka = grep $_ != $hdl, @{ $KA_CACHE{$ka_key} };
  0         0  
646              
647 0         0 $hdl->destroy;
648              
649             @ka
650             ? $KA_CACHE{$ka_key} = \@ka
651 0 0       0 : delete $KA_CACHE{$ka_key};
652 0         0 };
653              
654             # on error etc., destroy
655 0         0 $hdl->on_error ($destroy);
656 0         0 $hdl->on_eof ($destroy);
657 0         0 $hdl->on_read ($destroy);
658 0         0 $hdl->timeout ($PERSISTENT_TIMEOUT);
659              
660 0         0 push @$kaa, $hdl;
661 0         0 shift @$kaa while @$kaa > $MAX_PER_HOST;
662             }
663              
664             #############################################################################
665             # utilities
666              
667             # continue to parse $_ for headers and place them into the arg
668             sub _parse_hdr() {
669 0     0   0 my %hdr;
670              
671             # things seen, not parsed:
672             # p3pP="NON CUR OTPi OUR NOR UNI"
673              
674 0         0 $hdr{lc $1} .= ",$2"
675             while /\G
676             ([^:\000-\037]*):
677             [\011\040]*
678             ((?: [^\012]+ | \012[\011\040] )*)
679             \012
680             /gxc;
681              
682 0 0       0 /\G$/
683             or return;
684              
685             # remove the "," prefix we added to all headers above
686             substr $_, 0, 1, ""
687 0         0 for values %hdr;
688              
689 0         0 \%hdr
690             }
691              
692             #############################################################################
693             # http_get
694              
695             our $qr_nlnl = qr{(?
696              
697             our $TLS_CTX_LOW = { cache => 1, sslv2 => 1 };
698             our $TLS_CTX_HIGH = { cache => 1, verify => 1, verify_peername => "https" };
699              
700             # maybe it should just become a normal object :/
701              
702             sub _destroy_state(\%) {
703 8     8   18 my ($state) = @_;
704              
705 8 50       23 $state->{handle}->destroy if $state->{handle};
706 8         66 %$state = ();
707             }
708              
709             sub _error(\%$$) {
710 8     8   24 my ($state, $cb, $hdr) = @_;
711              
712 8         24 &_destroy_state ($state);
713              
714 8         63 $cb->(undef, $hdr);
715             ()
716 8         588 }
717              
718             our %IDEMPOTENT = (
719             DELETE => 1,
720             GET => 1,
721             HEAD => 1,
722             OPTIONS => 1,
723             PUT => 1,
724             TRACE => 1,
725              
726             ACL => 1,
727             "BASELINE-CONTROL" => 1,
728             BIND => 1,
729             CHECKIN => 1,
730             CHECKOUT => 1,
731             COPY => 1,
732             LABEL => 1,
733             LINK => 1,
734             MERGE => 1,
735             MKACTIVITY => 1,
736             MKCALENDAR => 1,
737             MKCOL => 1,
738             MKREDIRECTREF => 1,
739             MKWORKSPACE => 1,
740             MOVE => 1,
741             ORDERPATCH => 1,
742             PROPFIND => 1,
743             PROPPATCH => 1,
744             REBIND => 1,
745             REPORT => 1,
746             SEARCH => 1,
747             UNBIND => 1,
748             UNCHECKOUT => 1,
749             UNLINK => 1,
750             UNLOCK => 1,
751             UPDATE => 1,
752             UPDATEREDIRECTREF => 1,
753             "VERSION-CONTROL" => 1,
754             );
755              
756             sub http_request($$@) {
757 8     8 1 13 my $cb = pop;
758 8         30 my ($method, $url, %arg) = @_;
759              
760 8         13 my %hdr;
761              
762 8 50 33     56 $arg{tls_ctx} = $TLS_CTX_LOW if $arg{tls_ctx} eq "low" || !exists $arg{tls_ctx};
763 8 50       27 $arg{tls_ctx} = $TLS_CTX_HIGH if $arg{tls_ctx} eq "high";
764              
765 8         20 $method = uc $method;
766              
767 8 50       19 if (my $hdr = $arg{headers}) {
768 0         0 while (my ($k, $v) = each %$hdr) {
769 0         0 $hdr{lc $k} = $v;
770             }
771             }
772              
773             # pseudo headers for all subsequent responses
774 8         20 my @pseudo = (URL => $url);
775 8 50       20 push @pseudo, Redirect => delete $arg{Redirect} if exists $arg{Redirect};
776              
777 8 50       24 my $recurse = exists $arg{recurse} ? delete $arg{recurse} : $MAX_RECURSE;
778              
779 8 50       17 return $cb->(undef, { @pseudo, Status => 599, Reason => "Too many redirections" })
780             if $recurse < 0;
781              
782 8 50       16 my $proxy = exists $arg{proxy} ? $arg{proxy} : $PROXY;
783 8   33     25 my $timeout = $arg{timeout} || $TIMEOUT;
784              
785 8         65 my ($uscheme, $uauthority, $upath, $query, undef) = # ignore fragment
786             $url =~ m|^([^:]+):(?://([^/?#]*))?([^?#]*)(?:(\?[^#]*))?(?:#(.*))?$|;
787              
788 8         22 $uscheme = lc $uscheme;
789              
790 8 0       21 my $uport = $uscheme eq "http" ? 80
    50          
791             : $uscheme eq "https" ? 443
792             : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" });
793              
794 8 50       58 $uauthority =~ /^(?: .*\@ )? ([^\@]+?) (?: : (\d+) )?$/x
795             or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" });
796              
797 8         29 my $uhost = lc $1;
798 8 100       27 $uport = $2 if defined $2;
799              
800             $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost"
801 8 100       35 unless exists $hdr{host};
    50          
802              
803 8         32 $uhost =~ s/^\[(.*)\]$/$1/;
804 8 50       21 $upath .= $query if length $query;
805              
806 8         25 $upath =~ s%^/?%/%;
807              
808             # cookie processing
809 8 50       21 if (my $jar = $arg{cookie_jar}) {
810 0         0 my $cookies = cookie_jar_extract $jar, $uscheme, $uhost, $upath;
811              
812 0 0       0 $hdr{cookie} = join "; ", @$cookies
813             if @$cookies;
814             }
815              
816 8         15 my ($rhost, $rport, $rscheme, $rpath); # request host, port, path
817              
818 8 50       16 if ($proxy) {
819 0         0 ($rpath, $rhost, $rport, $rscheme) = ($url, @$proxy);
820              
821 0 0       0 $rscheme = "http" unless defined $rscheme;
822              
823             # don't support https requests over https-proxy transport,
824             # can't be done with tls as spec'ed, unless you double-encrypt.
825 0 0 0     0 $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https";
826              
827 0         0 $rhost = lc $rhost;
828 0         0 $rscheme = lc $rscheme;
829             } else {
830 8         22 ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath);
831             }
832              
833             # leave out fragment and query string, just a heuristic
834 8 50       28 $hdr{referer} = "$uscheme://$uauthority$upath" unless exists $hdr{referer};
835 8 50       20 $hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"};
836              
837             $hdr{"content-length"} = length $arg{body}
838 8 50 33     37 if length $arg{body} || $method ne "GET";
839              
840 8         15 my $idempotent = $IDEMPOTENT{$method};
841              
842             # default value for keepalive is true iff the request is for an idempotent method
843 8 50       18 my $persistent = exists $arg{persistent} ? !!$arg{persistent} : $idempotent;
844 8 50       22 my $keepalive = exists $arg{keepalive} ? !!$arg{keepalive} : !$proxy;
845 8         11 my $was_persistent; # true if this is actually a recycled connection
846              
847             # the key to use in the keepalive cache
848 8         21 my $ka_key = "$uscheme\x00$uhost\x00$uport\x00$arg{sessionid}";
849              
850 8 50       67 $hdr{connection} = ($persistent ? $keepalive ? "keep-alive, " : "" : "close, ") . "Te"; #1.1
    50          
851 8 50       20 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1
852              
853 8         22 my %state = (connect_guard => 1);
854              
855 8         13 my $ae_error = 595; # connecting
856              
857             # handle actual, non-tunneled, request
858             my $handle_actual_request = sub {
859 0     0   0 $ae_error = 596; # request phase
860              
861 0         0 my $hdl = $state{handle};
862              
863 0 0 0     0 $hdl->starttls ("connect") if $uscheme eq "https" && !exists $hdl->{tls};
864              
865             # send request
866             $hdl->push_write (
867             "$method $rpath HTTP/1.1\015\012"
868             . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
869             . "\015\012"
870             . $arg{body}
871 0         0 );
872              
873             # return if error occurred during push_write()
874 0 0       0 return unless %state;
875              
876             # reduce memory usage, save a kitten, also re-use it for the response headers.
877 0         0 %hdr = ();
878              
879             # status line and headers
880             $state{read_response} = sub {
881 0 0       0 return unless %state;
882              
883 0         0 for ("$_[1]") {
884 0         0 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
885              
886 0 0       0 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
887             or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Invalid server response" };
888              
889             # 100 Continue handling
890             # should not happen as we don't send expect: 100-continue,
891             # but we handle it just in case.
892             # since we send the request body regardless, if we get an error
893             # we are out of-sync, which we currently do NOT handle correctly.
894             return $state{handle}->push_read (line => $qr_nlnl, $state{read_response})
895 0 0       0 if $2 eq 100;
896              
897 0         0 push @pseudo,
898             HTTPVersion => $1,
899             Status => $2,
900             Reason => $3,
901             ;
902              
903 0 0       0 my $hdr = _parse_hdr
904             or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Garbled response headers" };
905              
906 0         0 %hdr = (%$hdr, @pseudo);
907             }
908              
909             # redirect handling
910             # relative uri handling forced by microsoft and other shitheads.
911             # we give our best and fall back to URI if available.
912 0 0       0 if (exists $hdr{location}) {
913 0         0 my $loc = $hdr{location};
914              
915 0 0       0 if ($loc =~ m%^//%) { # //
    0          
    0          
916 0         0 $loc = "$uscheme:$loc";
917              
918             } elsif ($loc eq "") {
919 0         0 $loc = $url;
920              
921             } elsif ($loc !~ /^(?: $ | [^:\/?\#]+ : )/x) { # anything "simple"
922 0         0 $loc =~ s/^\.\/+//;
923              
924 0 0       0 if ($loc !~ m%^[.?#]%) {
    0          
925 0         0 my $prefix = "$uscheme://$uauthority";
926              
927 0 0       0 unless ($loc =~ s/^\///) {
928 0         0 $prefix .= $upath;
929 0         0 $prefix =~ s/\/[^\/]*$//;
930             }
931              
932 0         0 $loc = "$prefix/$loc";
933              
934 0         0 } elsif (eval { require URI }) { # uri
935 0         0 $loc = URI->new_abs ($loc, $url)->as_string;
936              
937             } else {
938 0         0 return _error %state, $cb, { @pseudo, Status => 599, Reason => "Cannot parse Location (URI module missing)" };
939             #$hdr{Status} = 599;
940             #$hdr{Reason} = "Unparsable Redirect (URI module missing)";
941             #$recurse = 0;
942             }
943             }
944              
945 0         0 $hdr{location} = $loc;
946             }
947              
948 0         0 my $redirect;
949              
950 0 0       0 if ($recurse) {
951 0         0 my $status = $hdr{Status};
952              
953             # industry standard is to redirect POST as GET for
954             # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
955             # also, the UA should ask the user for 301 and 307 and POST,
956             # industry standard seems to be to simply follow.
957             # we go with the industry standard. 308 is defined
958             # by rfc7538
959 0 0 0     0 if ($status == 301 or $status == 302 or $status == 303) {
    0 0        
      0        
960 0         0 $redirect = 1;
961             # HTTP/1.1 is unclear on how to mutate the method
962 0 0       0 unless ($method eq "HEAD") {
963 0         0 $method = "GET";
964 0         0 delete $arg{body};
965             }
966             } elsif ($status == 307 or $status == 308) {
967 0         0 $redirect = 1;
968             }
969             }
970              
971             my $finish = sub { # ($data, $err_status, $err_reason[, $persistent])
972 0 0       0 if ($state{handle}) {
973             # handle keepalive
974 0 0 0     0 if (
    0 0        
975             $persistent
976             && $_[3]
977             && ($hdr{HTTPVersion} < 1.1
978             ? $hdr{connection} =~ /\bkeep-?alive\b/i
979             : $hdr{connection} !~ /\bclose\b/i)
980             ) {
981 0         0 ka_store $ka_key, delete $state{handle};
982             } else {
983             # no keepalive, destroy the handle
984 0         0 $state{handle}->destroy;
985             }
986             }
987              
988 0         0 %state = ();
989              
990 0 0       0 if (defined $_[1]) {
991 0         0 $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
  0         0  
992 0         0 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
  0         0  
993             }
994              
995             # set-cookie processing
996 0 0       0 if ($arg{cookie_jar}) {
997 0         0 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost, $hdr{date};
998             }
999              
1000 0 0 0     0 if ($redirect && exists $hdr{location}) {
1001             # we ignore any errors, as it is very common to receive
1002             # Content-Length != 0 but no actual body
1003             # we also access %hdr, as $_[1] might be an erro
1004             $state{recurse} =
1005             http_request (
1006             $method => $hdr{location},
1007             %arg,
1008             recurse => $recurse - 1,
1009             Redirect => [$_[0], \%hdr],
1010             sub {
1011 0         0 %state = ();
1012 0         0 &$cb
1013             },
1014 0         0 );
1015             } else {
1016 0         0 $cb->($_[0], \%hdr);
1017             }
1018 0         0 };
1019              
1020 0         0 $ae_error = 597; # body phase
1021              
1022 0         0 my $chunked = $hdr{"transfer-encoding"} =~ /\bchunked\b/i; # not quite correct...
1023              
1024 0 0       0 my $len = $chunked ? undef : $hdr{"content-length"};
1025              
1026             # body handling, many different code paths
1027             # - no body expected
1028             # - want_body_handle
1029             # - te chunked
1030             # - 2x length known (with or without on_body)
1031             # - 2x length not known (with or without on_body)
1032 0 0 0     0 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
    0 0        
    0 0        
    0 0        
    0 0        
      0        
      0        
1033 0         0 $finish->(undef, 598 => "Request cancelled by on_header");
1034             } elsif (
1035             $hdr{Status} =~ /^(?:1..|204|205|304)$/
1036             or $method eq "HEAD"
1037             or (defined $len && $len == 0) # == 0, not !, because "0 " is true
1038             ) {
1039             # no body
1040 0         0 $finish->("", undef, undef, 1);
1041              
1042             } elsif (!$redirect && $arg{want_body_handle}) {
1043 0         0 $_[0]->on_eof (undef);
1044 0         0 $_[0]->on_error (undef);
1045 0         0 $_[0]->on_read (undef);
1046              
1047 0         0 $finish->(delete $state{handle});
1048              
1049             } elsif ($chunked) {
1050 0         0 my $cl = 0;
1051 0         0 my $body = "";
1052 0   0     0 my $on_body = (!$redirect && $arg{on_body}) || sub { $body .= shift; 1 };
1053              
1054             $state{read_chunk} = sub {
1055 0 0       0 $_[1] =~ /^([0-9a-fA-F]+)/
1056             or return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
1057              
1058 0         0 my $len = hex $1;
1059              
1060 0 0       0 if ($len) {
1061 0         0 $cl += $len;
1062              
1063             $_[0]->push_read (chunk => $len, sub {
1064 0 0       0 $on_body->($_[1], \%hdr)
1065             or return $finish->(undef, 598 => "Request cancelled by on_body");
1066              
1067             $_[0]->push_read (line => sub {
1068 0 0       0 length $_[1]
1069             and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
1070 0         0 $_[0]->push_read (line => $state{read_chunk});
1071 0         0 });
1072 0         0 });
1073             } else {
1074 0   0     0 $hdr{"content-length"} ||= $cl;
1075              
1076             $_[0]->push_read (line => $qr_nlnl, sub {
1077 0 0       0 if (length $_[1]) {
1078 0         0 for ("$_[1]") {
1079 0         0 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
1080              
1081 0 0       0 my $hdr = _parse_hdr
1082             or return $finish->(undef, $ae_error => "Garbled response trailers");
1083              
1084 0         0 %hdr = (%hdr, %$hdr);
1085             }
1086             }
1087              
1088 0         0 $finish->($body, undef, undef, 1);
1089 0         0 });
1090             }
1091 0         0 };
1092              
1093 0         0 $_[0]->push_read (line => $state{read_chunk});
1094              
1095             } elsif (!$redirect && $arg{on_body}) {
1096 0 0       0 if (defined $len) {
1097             $_[0]->on_read (sub {
1098 0         0 $len -= length $_[0]{rbuf};
1099              
1100 0 0       0 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
1101             or return $finish->(undef, 598 => "Request cancelled by on_body");
1102              
1103 0 0       0 $len > 0
1104             or $finish->("", undef, undef, 1);
1105 0         0 });
1106             } else {
1107             $_[0]->on_eof (sub {
1108 0         0 $finish->("");
1109 0         0 });
1110             $_[0]->on_read (sub {
1111 0 0       0 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
1112             or $finish->(undef, 598 => "Request cancelled by on_body");
1113 0         0 });
1114             }
1115             } else {
1116 0         0 $_[0]->on_eof (undef);
1117              
1118 0 0       0 if (defined $len) {
1119             $_[0]->on_read (sub {
1120             $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
1121 0 0       0 if $len <= length $_[0]{rbuf};
1122 0         0 });
1123             } else {
1124             $_[0]->on_error (sub {
1125             ($! == Errno::EPIPE || !$!)
1126             ? $finish->(delete $_[0]{rbuf})
1127 0 0 0     0 : $finish->(undef, $ae_error => $_[2]);
1128 0         0 });
1129 0         0 $_[0]->on_read (sub { });
1130             }
1131             }
1132 0         0 };
1133              
1134             # if keepalive is enabled, then the server closing the connection
1135             # before a response can happen legally - we retry on idempotent methods.
1136 0 0 0     0 if ($was_persistent && $idempotent) {
1137 0         0 my $old_eof = $hdl->{on_eof};
1138             $hdl->{on_eof} = sub {
1139 0         0 _destroy_state %state;
1140              
1141 0         0 %state = ();
1142             $state{recurse} =
1143             http_request (
1144             $method => $url,
1145             %arg,
1146             recurse => $recurse - 1,
1147             persistent => 0,
1148             sub {
1149 0         0 %state = ();
1150 0         0 &$cb
1151             }
1152 0         0 );
1153 0         0 };
1154             $hdl->on_read (sub {
1155 0 0       0 return unless %state;
1156              
1157             # as soon as we receive something, a connection close
1158             # once more becomes a hard error
1159 0         0 $hdl->{on_eof} = $old_eof;
1160 0         0 $hdl->push_read (line => $qr_nlnl, $state{read_response});
1161 0         0 });
1162             } else {
1163 0         0 $hdl->push_read (line => $qr_nlnl, $state{read_response});
1164             }
1165 8         36 };
1166              
1167             my $prepare_handle = sub {
1168 0     0   0 my ($hdl) = $state{handle};
1169              
1170             $hdl->on_error (sub {
1171 0         0 _error %state, $cb, { @pseudo, Status => $ae_error, Reason => $_[2] };
1172 0         0 });
1173             $hdl->on_eof (sub {
1174 0         0 _error %state, $cb, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" };
1175 0         0 });
1176 0         0 $hdl->timeout_reset;
1177 0         0 $hdl->timeout ($timeout);
1178 8         27 };
1179              
1180             # connected to proxy (or origin server)
1181             my $connect_cb = sub {
1182 8 50   8   78198 my $fh = shift
1183             or return _error %state, $cb, { @pseudo, Status => $ae_error, Reason => "$!" };
1184              
1185 0 0       0 return unless delete $state{connect_guard};
1186              
1187             # get handle
1188             $state{handle} = new AnyEvent::Handle
1189 0         0 %{ $arg{handle_params} },
1190             fh => $fh,
1191             peername => $uhost,
1192             tls_ctx => $arg{tls_ctx},
1193 0         0 ;
1194              
1195 0         0 $prepare_handle->();
1196              
1197             #$state{handle}->starttls ("connect") if $rscheme eq "https";
1198              
1199             # now handle proxy-CONNECT method
1200 0 0 0     0 if ($proxy && $uscheme eq "https") {
1201             # oh dear, we have to wrap it into a connect request
1202              
1203             my $auth = exists $hdr{"proxy-authorization"}
1204 0 0       0 ? "proxy-authorization: " . (delete $hdr{"proxy-authorization"}) . "\015\012"
1205             : "";
1206              
1207             # maybe re-use $uauthority with patched port?
1208 0         0 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012$auth\015\012");
1209             $state{handle}->push_read (line => $qr_nlnl, sub {
1210 0 0       0 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
1211             or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" };
1212              
1213 0 0       0 if ($2 == 200) {
1214 0         0 $rpath = $upath;
1215 0         0 $handle_actual_request->();
1216             } else {
1217 0         0 _error %state, $cb, { @pseudo, Status => $2, Reason => $3 };
1218             }
1219 0         0 });
1220             } else {
1221 0 0       0 delete $hdr{"proxy-authorization"} unless $proxy;
1222              
1223 0         0 $handle_actual_request->();
1224             }
1225 8         26 };
1226              
1227             _get_slot $uhost, sub {
1228 8     8   16 $state{slot_guard} = shift;
1229              
1230 8 50       19 return unless $state{connect_guard};
1231              
1232             # try to use an existing keepalive connection, but only if we, ourselves, plan
1233             # on a keepalive request (in theory, this should be a separate config option).
1234 8 50 33     31 if ($persistent && $KA_CACHE{$ka_key}) {
1235 0         0 $was_persistent = 1;
1236              
1237 0         0 $state{handle} = ka_fetch $ka_key;
1238             # $state{handle}->destroyed
1239             # and die "AnyEvent::HTTP: unexpectedly got a destructed handle (1), please report.";#d#
1240 0         0 $prepare_handle->();
1241             # $state{handle}->destroyed
1242             # and die "AnyEvent::HTTP: unexpectedly got a destructed handle (2), please report.";#d#
1243 0         0 $rpath = $upath;
1244 0         0 $handle_actual_request->();
1245              
1246             } else {
1247             my $tcp_connect = $arg{tcp_connect}
1248 8   33     20 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
1249              
1250 8   50     82 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout });
1251             }
1252 8         36 };
1253              
1254 0     0   0 defined wantarray && AnyEvent::Util::guard { _destroy_state %state }
1255 8 50       48 }
1256              
1257             sub http_get($@) {
1258 8     8 1 1634 unshift @_, "GET";
1259 8         19 &http_request
1260             }
1261              
1262             sub http_head($@) {
1263 0     0 1 0 unshift @_, "HEAD";
1264 0         0 &http_request
1265             }
1266              
1267             sub http_post($$@) {
1268 0     0 1 0 my $url = shift;
1269 0         0 unshift @_, "POST", $url, "body";
1270 0         0 &http_request
1271             }
1272              
1273             =back
1274              
1275             =head2 DNS CACHING
1276              
1277             AnyEvent::HTTP uses the AnyEvent::Socket::tcp_connect function for
1278             the actual connection, which in turn uses AnyEvent::DNS to resolve
1279             hostnames. The latter is a simple stub resolver and does no caching
1280             on its own. If you want DNS caching, you currently have to provide
1281             your own default resolver (by storing a suitable resolver object in
1282             C<$AnyEvent::DNS::RESOLVER>) or your own C callback.
1283              
1284             =head2 GLOBAL FUNCTIONS AND VARIABLES
1285              
1286             =over 4
1287              
1288             =item AnyEvent::HTTP::set_proxy "proxy-url"
1289              
1290             Sets the default proxy server to use. The proxy-url must begin with a
1291             string of the form C, croaks otherwise.
1292              
1293             To clear an already-set proxy, use C.
1294              
1295             When AnyEvent::HTTP is loaded for the first time it will query the
1296             default proxy from the operating system, currently by looking at
1297             C<$ENV{http_proxy>}.
1298              
1299             =item AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end]
1300              
1301             Remove all cookies from the cookie jar that have been expired. If
1302             C<$session_end> is given and true, then additionally remove all session
1303             cookies.
1304              
1305             You should call this function (with a true C<$session_end>) before you
1306             save cookies to disk, and you should call this function after loading them
1307             again. If you have a long-running program you can additionally call this
1308             function from time to time.
1309              
1310             A cookie jar is initially an empty hash-reference that is managed by this
1311             module. Its format is subject to change, but currently it is as follows:
1312              
1313             The key C has to contain C<2>, otherwise the hash gets
1314             cleared. All other keys are hostnames or IP addresses pointing to
1315             hash-references. The key for these inner hash references is the
1316             server path for which this cookie is meant, and the values are again
1317             hash-references. Each key of those hash-references is a cookie name, and
1318             the value, you guessed it, is another hash-reference, this time with the
1319             key-value pairs from the cookie, except for C and C,
1320             which have been replaced by a C<_expires> key that contains the cookie
1321             expiry timestamp. Session cookies are indicated by not having an
1322             C<_expires> key.
1323              
1324             Here is an example of a cookie jar with a single cookie, so you have a
1325             chance of understanding the above paragraph:
1326              
1327             {
1328             version => 2,
1329             "10.0.0.1" => {
1330             "/" => {
1331             "mythweb_id" => {
1332             _expires => 1293917923,
1333             value => "ooRung9dThee3ooyXooM1Ohm",
1334             },
1335             },
1336             },
1337             }
1338              
1339             =item $date = AnyEvent::HTTP::format_date $timestamp
1340              
1341             Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP
1342             Date (RFC 2616).
1343              
1344             =item $timestamp = AnyEvent::HTTP::parse_date $date
1345              
1346             Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) or a
1347             bunch of minor variations of those, and returns the corresponding POSIX
1348             timestamp, or C if the date cannot be parsed.
1349              
1350             =item $AnyEvent::HTTP::MAX_RECURSE
1351              
1352             The default value for the C request parameter (default: C<10>).
1353              
1354             =item $AnyEvent::HTTP::TIMEOUT
1355              
1356             The default timeout for connection operations (default: C<300>).
1357              
1358             =item $AnyEvent::HTTP::USERAGENT
1359              
1360             The default value for the C header (the default is
1361             C).
1362              
1363             =item $AnyEvent::HTTP::MAX_PER_HOST
1364              
1365             The maximum number of concurrent connections to the same host (identified
1366             by the hostname). If the limit is exceeded, then additional requests
1367             are queued until previous connections are closed. Both persistent and
1368             non-persistent connections are counted in this limit.
1369              
1370             The default value for this is C<4>, and it is highly advisable to not
1371             increase it much.
1372              
1373             For comparison: the RFC's recommend 4 non-persistent or 2 persistent
1374             connections, older browsers used 2, newer ones (such as firefox 3)
1375             typically use 6, and Opera uses 8 because like, they have the fastest
1376             browser and give a shit for everybody else on the planet.
1377              
1378             =item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
1379              
1380             The time after which idle persistent connections get closed by
1381             AnyEvent::HTTP (default: C<3>).
1382              
1383             =item $AnyEvent::HTTP::ACTIVE
1384              
1385             The number of active connections. This is not the number of currently
1386             running requests, but the number of currently open and non-idle TCP
1387             connections. This number can be useful for load-leveling.
1388              
1389             =back
1390              
1391             =cut
1392              
1393             our @month = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
1394             our @weekday = qw(Sun Mon Tue Wed Thu Fri Sat);
1395              
1396             sub format_date($) {
1397 0     0 1 0 my ($time) = @_;
1398              
1399             # RFC 822/1123 format
1400 0         0 my ($S, $M, $H, $mday, $mon, $year, $wday, $yday, undef) = gmtime $time;
1401              
1402 0         0 sprintf "%s, %02d %s %04d %02d:%02d:%02d GMT",
1403             $weekday[$wday], $mday, $month[$mon], $year + 1900,
1404             $H, $M, $S;
1405             }
1406              
1407             sub parse_date($) {
1408 0     0 1 0 my ($date) = @_;
1409              
1410 0         0 my ($d, $m, $y, $H, $M, $S);
1411              
1412 0 0       0 if ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
    0          
    0          
1413             # RFC 822/1123, required by RFC 2616 (with " ")
1414             # cookie dates (with "-")
1415              
1416 0         0 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6);
1417              
1418             } elsif ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
1419             # RFC 850
1420 0 0       0 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6);
1421              
1422             } elsif ($date =~ /^[A-Z][a-z][a-z]+ ([A-Z][a-z][a-z]) ([0-9 ]?[0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) ([0-9][0-9][0-9][0-9])$/) {
1423             # ISO C's asctime
1424 0         0 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5);
1425             }
1426             # other formats fail in the loop below
1427              
1428 0         0 for (0..11) {
1429 0 0       0 if ($m eq $month[$_]) {
1430 0         0 require Time::Local;
1431 0         0 return eval { Time::Local::timegm ($S, $M, $H, $d, $_, $y) };
  0         0  
1432             }
1433             }
1434              
1435             undef
1436 0         0 }
1437              
1438             sub set_proxy($) {
1439 3 50   3 1 17 if (length $_[0]) {
1440 0 0       0 $_[0] =~ m%^(http):// ([^:/]+) (?: : (\d*) )?%ix
1441             or Carp::croak "$_[0]: invalid proxy URL";
1442 0   0     0 $PROXY = [$2, $3 || 3128, $1]
1443             } else {
1444 3         46 undef $PROXY;
1445             }
1446             }
1447              
1448             # initialise proxy from environment
1449             eval {
1450             set_proxy $ENV{http_proxy};
1451             };
1452              
1453             =head2 SHOWCASE
1454              
1455             This section contains some more elaborate "real-world" examples or code
1456             snippets.
1457              
1458             =head2 HTTP/1.1 FILE DOWNLOAD
1459              
1460             Downloading files with HTTP can be quite tricky, especially when something
1461             goes wrong and you want to resume.
1462              
1463             Here is a function that initiates and resumes a download. It uses the
1464             last modified time to check for file content changes, and works with many
1465             HTTP/1.0 servers as well, and usually falls back to a complete re-download
1466             on older servers.
1467              
1468             It calls the completion callback with either C, which means a
1469             nonretryable error occurred, C<0> when the download was partial and should
1470             be retried, and C<1> if it was successful.
1471              
1472             use AnyEvent::HTTP;
1473              
1474             sub download($$$) {
1475             my ($url, $file, $cb) = @_;
1476              
1477             open my $fh, "+<", $file
1478             or die "$file: $!";
1479              
1480             my %hdr;
1481             my $ofs = 0;
1482              
1483             if (stat $fh and -s _) {
1484             $ofs = -s _;
1485             warn "-s is ", $ofs;
1486             $hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9];
1487             $hdr{"range"} = "bytes=$ofs-";
1488             }
1489              
1490             http_get $url,
1491             headers => \%hdr,
1492             on_header => sub {
1493             my ($hdr) = @_;
1494              
1495             if ($hdr->{Status} == 200 && $ofs) {
1496             # resume failed
1497             truncate $fh, $ofs = 0;
1498             }
1499              
1500             sysseek $fh, $ofs, 0;
1501              
1502             1
1503             },
1504             on_body => sub {
1505             my ($data, $hdr) = @_;
1506              
1507             if ($hdr->{Status} =~ /^2/) {
1508             length $data == syswrite $fh, $data
1509             or return; # abort on write errors
1510             }
1511              
1512             1
1513             },
1514             sub {
1515             my (undef, $hdr) = @_;
1516              
1517             my $status = $hdr->{Status};
1518              
1519             if (my $time = AnyEvent::HTTP::parse_date $hdr->{"last-modified"}) {
1520             utime $time, $time, $fh;
1521             }
1522              
1523             if ($status == 200 || $status == 206 || $status == 416) {
1524             # download ok || resume ok || file already fully downloaded
1525             $cb->(1, $hdr);
1526              
1527             } elsif ($status == 412) {
1528             # file has changed while resuming, delete and retry
1529             unlink $file;
1530             $cb->(0, $hdr);
1531              
1532             } elsif ($status == 500 or $status == 503 or $status =~ /^59/) {
1533             # retry later
1534             $cb->(0, $hdr);
1535              
1536             } else {
1537             $cb->(undef, $hdr);
1538             }
1539             }
1540             ;
1541             }
1542              
1543             download "http://server/somelargefile", "/tmp/somelargefile", sub {
1544             if ($_[0]) {
1545             print "OK!\n";
1546             } elsif (defined $_[0]) {
1547             print "please retry later\n";
1548             } else {
1549             print "ERROR\n";
1550             }
1551             };
1552              
1553             =head3 SOCKS PROXIES
1554              
1555             Socks proxies are not directly supported by AnyEvent::HTTP. You can
1556             compile your perl to support socks, or use an external program such as
1557             F (dante) or F to make your program use a socks proxy
1558             transparently.
1559              
1560             Alternatively, for AnyEvent::HTTP only, you can use your own
1561             C function that does the proxy handshake - here is an example
1562             that works with socks4a proxies:
1563              
1564             use Errno;
1565             use AnyEvent::Util;
1566             use AnyEvent::Socket;
1567             use AnyEvent::Handle;
1568              
1569             # host, port and username of/for your socks4a proxy
1570             my $socks_host = "10.0.0.23";
1571             my $socks_port = 9050;
1572             my $socks_user = "";
1573              
1574             sub socks4a_connect {
1575             my ($host, $port, $connect_cb, $prepare_cb) = @_;
1576              
1577             my $hdl = new AnyEvent::Handle
1578             connect => [$socks_host, $socks_port],
1579             on_prepare => sub { $prepare_cb->($_[0]{fh}) },
1580             on_error => sub { $connect_cb->() },
1581             ;
1582              
1583             $hdl->push_write (pack "CCnNZ*Z*", 4, 1, $port, 1, $socks_user, $host);
1584              
1585             $hdl->push_read (chunk => 8, sub {
1586             my ($hdl, $chunk) = @_;
1587             my ($status, $port, $ipn) = unpack "xCna4", $chunk;
1588              
1589             if ($status == 0x5a) {
1590             $connect_cb->($hdl->{fh}, (format_address $ipn) . ":$port");
1591             } else {
1592             $! = Errno::ENXIO; $connect_cb->();
1593             }
1594             });
1595              
1596             $hdl
1597             }
1598              
1599             Use C instead of C when doing Cs,
1600             possibly after switching off other proxy types:
1601              
1602             AnyEvent::HTTP::set_proxy undef; # usually you do not want other proxies
1603              
1604             http_get 'http://www.google.com', tcp_connect => \&socks4a_connect, sub {
1605             my ($data, $headers) = @_;
1606             ...
1607             };
1608              
1609             =head1 SEE ALSO
1610              
1611             L.
1612              
1613             =head1 AUTHOR
1614              
1615             Marc Lehmann
1616             http://home.schmorp.de/
1617              
1618             With many thanks to Дмитрий Шалашов, who provided countless
1619             testcases and bugreports.
1620              
1621             =cut
1622              
1623             1
1624