File Coverage

blib/lib/AnyEvent/HTTP.pm
Criterion Covered Total %
statement 94 366 25.6
branch 35 250 14.0
condition 6 91 6.5
subroutine 16 30 53.3
pod 8 12 66.6
total 159 749 21.2


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client
4              
5             =head1 SYNOPSIS
6              
7             use AnyEvent::HTTP;
8              
9             http_get "http://www.nethype.de/", sub { print $_[1] };
10              
11             # ... do something else here
12              
13             =head1 DESCRIPTION
14              
15             This module is an L user, you need to make sure that you use and
16             run a supported event loop.
17              
18             This module implements a simple, stateless and non-blocking HTTP
19             client. It supports GET, POST and other request methods, cookies and more,
20             all on a very low level. It can follow redirects, supports proxies, and
21             automatically limits the number of connections to the values specified in
22             the RFC.
23              
24             It should generally be a "good client" that is enough for most HTTP
25             tasks. Simple tasks should be simple, but complex tasks should still be
26             possible as the user retains control over request and response headers.
27              
28             The caller is responsible for authentication management, cookies (if
29             the simplistic implementation in this module doesn't suffice), referer
30             and other high-level protocol details for which this module offers only
31             limited support.
32              
33             =head2 METHODS
34              
35             =over 4
36              
37             =cut
38              
39             package AnyEvent::HTTP;
40              
41 3     3   1806 use common::sense;
  3         9  
  3         11  
42              
43 3     3   467 use Errno ();
  3         835  
  3         49  
44              
45 3     3   844 use AnyEvent 5.0 ();
  3         3681  
  3         46  
46 3     3   449 use AnyEvent::Util ();
  3         7189  
  3         37  
47 3     3   1752 use AnyEvent::Handle ();
  3         15038  
  3         61  
48              
49 3     3   13 use base Exporter::;
  3         3  
  3         16923  
50              
51             our $VERSION = 2.23;
52              
53             our @EXPORT = qw(http_get http_post http_head http_request);
54              
55             our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
56             our $MAX_RECURSE = 10;
57             our $PERSISTENT_TIMEOUT = 3;
58             our $TIMEOUT = 300;
59             our $MAX_PER_HOST = 4; # changing this is evil
60              
61             our $PROXY;
62             our $ACTIVE = 0;
63              
64             my %KA_CACHE; # indexed by uhost currently, points to [$handle...] array
65             my %CO_SLOT; # number of open connections, and wait queue, per host
66              
67             =item http_get $url, key => value..., $cb->($data, $headers)
68              
69             Executes an HTTP-GET request. See the http_request function for details on
70             additional parameters and the return value.
71              
72             =item http_head $url, key => value..., $cb->($data, $headers)
73              
74             Executes an HTTP-HEAD request. See the http_request function for details
75             on additional parameters and the return value.
76              
77             =item http_post $url, $body, key => value..., $cb->($data, $headers)
78              
79             Executes an HTTP-POST request with a request body of C<$body>. See the
80             http_request function for details on additional parameters and the return
81             value.
82              
83             =item http_request $method => $url, key => value..., $cb->($data, $headers)
84              
85             Executes a HTTP request of type C<$method> (e.g. C, C). The URL
86             must be an absolute http or https URL.
87              
88             When called in void context, nothing is returned. In other contexts,
89             C returns a "cancellation guard" - you have to keep the
90             object at least alive until the callback get called. If the object gets
91             destroyed before the callback is called, the request will be cancelled.
92              
93             The callback will be called with the response body data as first argument
94             (or C if an error occurred), and a hash-ref with response headers
95             (and trailers) as second argument.
96              
97             All the headers in that hash are lowercased. In addition to the response
98             headers, the "pseudo-headers" (uppercase to avoid clashing with possible
99             response headers) C, C and C contain the
100             three parts of the HTTP Status-Line of the same name. If an error occurs
101             during the body phase of a request, then the original C and
102             C values from the header are available as C and
103             C.
104              
105             The pseudo-header C contains the actual URL (which can differ from
106             the requested URL when following redirects - for example, you might get
107             an error that your URL scheme is not supported even though your URL is a
108             valid http URL because it redirected to an ftp URL, in which case you can
109             look at the URL pseudo header).
110              
111             The pseudo-header C only exists when the request was a result
112             of an internal redirect. In that case it is an array reference with
113             the C<($data, $headers)> from the redirect response. Note that this
114             response could in turn be the result of a redirect itself, and C<<
115             $headers->{Redirect}[1]{Redirect} >> will then contain the original
116             response, and so on.
117              
118             If the server sends a header multiple times, then their contents will be
119             joined together with a comma (C<,>), as per the HTTP spec.
120              
121             If an internal error occurs, such as not being able to resolve a hostname,
122             then C<$data> will be C, C<< $headers->{Status} >> will be
123             C<590>-C<599> and the C pseudo-header will contain an error
124             message. Currently the following status codes are used:
125              
126             =over 4
127              
128             =item 595 - errors during connection establishment, proxy handshake.
129              
130             =item 596 - errors during TLS negotiation, request sending and header processing.
131              
132             =item 597 - errors during body receiving or processing.
133              
134             =item 598 - user aborted request via C or C.
135              
136             =item 599 - other, usually nonretryable, errors (garbled URL etc.).
137              
138             =back
139              
140             A typical callback might look like this:
141              
142             sub {
143             my ($body, $hdr) = @_;
144              
145             if ($hdr->{Status} =~ /^2/) {
146             ... everything should be ok
147             } else {
148             print "error, $hdr->{Status} $hdr->{Reason}\n";
149             }
150             }
151              
152             Additional parameters are key-value pairs, and are fully optional. They
153             include:
154              
155             =over 4
156              
157             =item recurse => $count (default: $MAX_RECURSE)
158              
159             Whether to recurse requests or not, e.g. on redirects, authentication and
160             other retries and so on, and how often to do so.
161              
162             Only redirects to http and https URLs are supported. While most common
163             redirection forms are handled entirely within this module, some require
164             the use of the optional L module. If it is required but missing, then
165             the request will fail with an error.
166              
167             =item headers => hashref
168              
169             The request headers to use. Currently, C may provide its own
170             C, C, C and C headers and
171             will provide defaults at least for C, C and C
172             (this can be suppressed by using C for these headers in which case
173             they won't be sent at all).
174              
175             You really should provide your own C header value that is
176             appropriate for your program - I wouldn't be surprised if the default
177             AnyEvent string gets blocked by webservers sooner or later.
178              
179             Also, make sure that your headers names and values do not contain any
180             embedded newlines.
181              
182             =item timeout => $seconds
183              
184             The time-out to use for various stages - each connect attempt will reset
185             the timeout, as will read or write activity, i.e. this is not an overall
186             timeout.
187              
188             Default timeout is 5 minutes.
189              
190             =item proxy => [$host, $port[, $scheme]] or undef
191              
192             Use the given http proxy for all requests, or no proxy if C is
193             used.
194              
195             C<$scheme> must be either missing or must be C for HTTP.
196              
197             If not specified, then the default proxy is used (see
198             C).
199              
200             Currently, if your proxy requires authorization, you have to specify an
201             appropriate "Proxy-Authorization" header in every request.
202              
203             =item body => $string
204              
205             The request body, usually empty. Will be sent as-is (future versions of
206             this module might offer more options).
207              
208             =item cookie_jar => $hash_ref
209              
210             Passing this parameter enables (simplified) cookie-processing, loosely
211             based on the original netscape specification.
212              
213             The C<$hash_ref> must be an (initially empty) hash reference which
214             will get updated automatically. It is possible to save the cookie jar
215             to persistent storage with something like JSON or Storable - see the
216             C function if you wish to remove
217             expired or session-only cookies, and also for documentation on the format
218             of the cookie jar.
219              
220             Note that this cookie implementation is not meant to be complete. If
221             you want complete cookie management you have to do that on your
222             own. C is meant as a quick fix to get most cookie-using sites
223             working. Cookies are a privacy disaster, do not use them unless required
224             to.
225              
226             When cookie processing is enabled, the C and C
227             headers will be set and handled by this module, otherwise they will be
228             left untouched.
229              
230             =item tls_ctx => $scheme | $tls_ctx
231              
232             Specifies the AnyEvent::TLS context to be used for https connections. This
233             parameter follows the same rules as the C parameter to
234             L, but additionally, the two strings C or
235             C can be specified, which give you a predefined low-security (no
236             verification, highest compatibility) and high-security (CA and common-name
237             verification) TLS context.
238              
239             The default for this option is C, which could be interpreted as "give
240             me the page, no matter what".
241              
242             See also the C parameter.
243              
244             =item session => $string
245              
246             The module might reuse connections to the same host internally. Sometimes
247             (e.g. when using TLS), you do not want to reuse connections from other
248             sessions. This can be achieved by setting this parameter to some unique
249             ID (such as the address of an object storing your state data, or the TLS
250             context) - only connections using the same unique ID will be reused.
251              
252             =item on_prepare => $callback->($fh)
253              
254             In rare cases you need to "tune" the socket before it is used to
255             connect (for example, to bind it on a given IP address). This parameter
256             overrides the prepare callback passed to C
257             and behaves exactly the same way (e.g. it has to provide a
258             timeout). See the description for the C<$prepare_cb> argument of
259             C for details.
260              
261             =item tcp_connect => $callback->($host, $service, $connect_cb, $prepare_cb)
262              
263             In even rarer cases you want total control over how AnyEvent::HTTP
264             establishes connections. Normally it uses L
265             to do this, but you can provide your own C function -
266             obviously, it has to follow the same calling conventions, except that it
267             may always return a connection guard object.
268              
269             There are probably lots of weird uses for this function, starting from
270             tracing the hosts C actually tries to connect, to (inexact
271             but fast) host => IP address caching or even socks protocol support.
272              
273             =item on_header => $callback->($headers)
274              
275             When specified, this callback will be called with the header hash as soon
276             as headers have been successfully received from the remote server (not on
277             locally-generated errors).
278              
279             It has to return either true (in which case AnyEvent::HTTP will continue),
280             or false, in which case AnyEvent::HTTP will cancel the download (and call
281             the finish callback with an error code of C<598>).
282              
283             This callback is useful, among other things, to quickly reject unwanted
284             content, which, if it is supposed to be rare, can be faster than first
285             doing a C request.
286              
287             The downside is that cancelling the request makes it impossible to re-use
288             the connection. Also, the C callback will not receive any
289             trailer (headers sent after the response body).
290              
291             Example: cancel the request unless the content-type is "text/html".
292              
293             on_header => sub {
294             $_[0]{"content-type"} =~ /^text\/html\s*(?:;|$)/
295             },
296              
297             =item on_body => $callback->($partial_body, $headers)
298              
299             When specified, all body data will be passed to this callback instead of
300             to the completion callback. The completion callback will get the empty
301             string instead of the body data.
302              
303             It has to return either true (in which case AnyEvent::HTTP will continue),
304             or false, in which case AnyEvent::HTTP will cancel the download (and call
305             the completion callback with an error code of C<598>).
306              
307             The downside to cancelling the request is that it makes it impossible to
308             re-use the connection.
309              
310             This callback is useful when the data is too large to be held in memory
311             (so the callback writes it to a file) or when only some information should
312             be extracted, or when the body should be processed incrementally.
313              
314             It is usually preferred over doing your own body handling via
315             C, but in case of streaming APIs, where HTTP is
316             only used to create a connection, C is the better
317             alternative, as it allows you to install your own event handler, reducing
318             resource usage.
319              
320             =item want_body_handle => $enable
321              
322             When enabled (default is disabled), the behaviour of AnyEvent::HTTP
323             changes considerably: after parsing the headers, and instead of
324             downloading the body (if any), the completion callback will be
325             called. Instead of the C<$body> argument containing the body data, the
326             callback will receive the L object associated with the
327             connection. In error cases, C will be passed. When there is no body
328             (e.g. status C<304>), the empty string will be passed.
329              
330             The handle object might or might not be in TLS mode, might be connected
331             to a proxy, be a persistent connection, use chunked transfer encoding
332             etc., and configured in unspecified ways. The user is responsible for this
333             handle (it will not be used by this module anymore).
334              
335             This is useful with some push-type services, where, after the initial
336             headers, an interactive protocol is used (typical example would be the
337             push-style twitter API which starts a JSON/XML stream).
338              
339             If you think you need this, first have a look at C, to see if
340             that doesn't solve your problem in a better way.
341              
342             =item persistent => $boolean
343              
344             Try to create/reuse a persistent connection. When this flag is set
345             (default: true for idempotent requests, false for all others), then
346             C tries to re-use an existing (previously-created)
347             persistent connection to the host and, failing that, tries to create a new
348             one.
349              
350             Requests failing in certain ways will be automatically retried once, which
351             is dangerous for non-idempotent requests, which is why it defaults to off
352             for them. The reason for this is because the bozos who designed HTTP/1.1
353             made it impossible to distinguish between a fatal error and a normal
354             connection timeout, so you never know whether there was a problem with
355             your request or not.
356              
357             When reusing an existent connection, many parameters (such as TLS context)
358             will be ignored. See the C parameter for a workaround.
359              
360             =item keepalive => $boolean
361              
362             Only used when C is also true. This parameter decides whether
363             C tries to handshake a HTTP/1.0-style keep-alive connection
364             (as opposed to only a HTTP/1.1 persistent connection).
365              
366             The default is true, except when using a proxy, in which case it defaults
367             to false, as HTTP/1.0 proxies cannot support this in a meaningful way.
368              
369             =item handle_params => { key => value ... }
370              
371             The key-value pairs in this hash will be passed to any L
372             constructor that is called - not all requests will create a handle, and
373             sometimes more than one is created, so this parameter is only good for
374             setting hints.
375              
376             Example: set the maximum read size to 4096, to potentially conserve memory
377             at the cost of speed.
378              
379             handle_params => {
380             max_read_size => 4096,
381             },
382              
383             =back
384              
385             Example: do a simple HTTP GET request for http://www.nethype.de/ and print
386             the response body.
387              
388             http_request GET => "http://www.nethype.de/", sub {
389             my ($body, $hdr) = @_;
390             print "$body\n";
391             };
392              
393             Example: do a HTTP HEAD request on https://www.google.com/, use a
394             timeout of 30 seconds.
395              
396             http_request
397             HEAD => "https://www.google.com",
398             headers => { "user-agent" => "MySearchClient 1.0" },
399             timeout => 30,
400             sub {
401             my ($body, $hdr) = @_;
402             use Data::Dumper;
403             print Dumper $hdr;
404             }
405             ;
406              
407             Example: do another simple HTTP GET request, but immediately try to
408             cancel it.
409              
410             my $request = http_request GET => "http://www.nethype.de/", sub {
411             my ($body, $hdr) = @_;
412             print "$body\n";
413             };
414              
415             undef $request;
416              
417             =cut
418              
419             #############################################################################
420             # wait queue/slots
421              
422             sub _slot_schedule;
423             sub _slot_schedule($) {
424 16     16   18 my $host = shift;
425              
426 16         33 while ($CO_SLOT{$host}[0] < $MAX_PER_HOST) {
427 24 100       3351 if (my $cb = shift @{ $CO_SLOT{$host}[1] }) {
  24         55  
428             # somebody wants that slot
429 8         9 ++$CO_SLOT{$host}[0];
430 8         6 ++$ACTIVE;
431              
432             $cb->(AnyEvent::Util::guard {
433 8     8   8 --$ACTIVE;
434 8         10 --$CO_SLOT{$host}[0];
435 8         15 _slot_schedule $host;
436 8         33 });
437             } else {
438             # nobody wants the slot, maybe we can forget about it
439 16 100       40 delete $CO_SLOT{$host} unless $CO_SLOT{$host}[0];
440 16         56 last;
441             }
442             }
443             }
444              
445             # wait for a free slot on host, call callback
446             sub _get_slot($$) {
447 8     8   5 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
  8         21  
448              
449 8         12 _slot_schedule $_[0];
450             }
451              
452             #############################################################################
453             # cookie handling
454              
455             # expire cookies
456             sub cookie_jar_expire($;$) {
457 0     0 1 0 my ($jar, $session_end) = @_;
458              
459 0 0       0 %$jar = () if $jar->{version} != 1;
460              
461 0         0 my $anow = AE::now;
462              
463 0         0 while (my ($chost, $paths) = each %$jar) {
464 0 0       0 next unless ref $paths;
465              
466 0         0 while (my ($cpath, $cookies) = each %$paths) {
467 0         0 while (my ($cookie, $kv) = each %$cookies) {
468 0 0       0 if (exists $kv->{_expires}) {
    0          
469             delete $cookies->{$cookie}
470 0 0       0 if $anow > $kv->{_expires};
471             } elsif ($session_end) {
472 0         0 delete $cookies->{$cookie};
473             }
474             }
475              
476 0 0       0 delete $paths->{$cpath}
477             unless %$cookies;
478             }
479              
480 0 0       0 delete $jar->{$chost}
481             unless %$paths;
482             }
483             }
484            
485             # extract cookies from jar
486             sub cookie_jar_extract($$$$) {
487 0     0 0 0 my ($jar, $scheme, $host, $path) = @_;
488              
489 0 0       0 %$jar = () if $jar->{version} != 1;
490              
491 0         0 my @cookies;
492              
493 0         0 while (my ($chost, $paths) = each %$jar) {
494 0 0       0 next unless ref $paths;
495              
496 0 0       0 if ($chost =~ /^\./) {
    0          
497 0 0       0 next unless $chost eq substr $host, -length $chost;
498             } elsif ($chost =~ /\./) {
499 0 0       0 next unless $chost eq $host;
500             } else {
501 0         0 next;
502             }
503              
504 0         0 while (my ($cpath, $cookies) = each %$paths) {
505 0 0       0 next unless $cpath eq substr $path, 0, length $cpath;
506              
507 0         0 while (my ($cookie, $kv) = each %$cookies) {
508 0 0 0     0 next if $scheme ne "https" && exists $kv->{secure};
509              
510 0 0 0     0 if (exists $kv->{_expires} and AE::now > $kv->{_expires}) {
511 0         0 delete $cookies->{$cookie};
512 0         0 next;
513             }
514              
515 0         0 my $value = $kv->{value};
516              
517 0 0       0 if ($value =~ /[=;,[:space:]]/) {
518 0         0 $value =~ s/([\\"])/\\$1/g;
519 0         0 $value = "\"$value\"";
520             }
521              
522 0         0 push @cookies, "$cookie=$value";
523             }
524             }
525             }
526              
527             \@cookies
528 0         0 }
529            
530             # parse set_cookie header into jar
531             sub cookie_jar_set_cookie($$$$) {
532 0     0 0 0 my ($jar, $set_cookie, $host, $date) = @_;
533              
534 0         0 my $anow = int AE::now;
535 0         0 my $snow; # server-now
536              
537 0         0 for ($set_cookie) {
538             # parse NAME=VALUE
539 0         0 my @kv;
540              
541             # expires is not http-compliant in the original cookie-spec,
542             # we support the official date format and some extensions
543 0         0 while (
544             m{
545             \G\s*
546             (?:
547             expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+)
548             | ([^=;,[:space:]]+) (?: \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^;,[:space:]]*) ) )?
549             )
550             }gcxsi
551             ) {
552 0         0 my $name = $2;
553 0         0 my $value = $4;
554              
555 0 0       0 if (defined $1) {
    0          
556             # expires
557 0         0 $name = "expires";
558 0         0 $value = $1;
559             } elsif (defined $3) {
560             # quoted
561 0         0 $value = $3;
562 0         0 $value =~ s/\\(.)/$1/gs;
563             }
564              
565 0 0       0 push @kv, @kv ? lc $name : $name, $value;
566              
567 0 0       0 last unless /\G\s*;/gc;
568             }
569              
570 0 0       0 last unless @kv;
571              
572 0         0 my $name = shift @kv;
573 0         0 my %kv = (value => shift @kv, @kv);
574              
575 0 0       0 if (exists $kv{"max-age"}) {
    0          
576 0         0 $kv{_expires} = $anow + delete $kv{"max-age"};
577             } elsif (exists $kv{expires}) {
578 0   0     0 $snow ||= parse_date ($date) || $anow;
      0        
579 0         0 $kv{_expires} = $anow + (parse_date (delete $kv{expires}) - $snow);
580             } else {
581 0         0 delete $kv{_expires};
582             }
583              
584 0         0 my $cdom;
585 0   0     0 my $cpath = (delete $kv{path}) || "/";
586              
587 0 0       0 if (exists $kv{domain}) {
588 0         0 $cdom = delete $kv{domain};
589              
590 0         0 $cdom =~ s/^\.?/./; # make sure it starts with a "."
591              
592 0 0       0 next if $cdom =~ /\.$/;
593              
594             # this is not rfc-like and not netscape-like. go figure.
595 0         0 my $ndots = $cdom =~ y/.//;
596 0 0       0 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
    0          
597             } else {
598 0         0 $cdom = $host;
599             }
600              
601             # store it
602 0         0 $jar->{version} = 1;
603 0         0 $jar->{lc $cdom}{$cpath}{$name} = \%kv;
604              
605 0 0       0 redo if /\G\s*,/gc;
606             }
607             }
608              
609             #############################################################################
610             # keepalive/persistent connection cache
611              
612             # fetch a connection from the keepalive cache
613             sub ka_fetch($) {
614 0     0 0 0 my $ka_key = shift;
615              
616 0         0 my $hdl = pop @{ $KA_CACHE{$ka_key} }; # currently we reuse the MOST RECENTLY USED connection
  0         0  
617             delete $KA_CACHE{$ka_key}
618 0 0       0 unless @{ $KA_CACHE{$ka_key} };
  0         0  
619              
620 0         0 $hdl
621             }
622              
623             sub ka_store($$) {
624 0     0 0 0 my ($ka_key, $hdl) = @_;
625              
626 0   0     0 my $kaa = $KA_CACHE{$ka_key} ||= [];
627              
628             my $destroy = sub {
629 0     0   0 my @ka = grep $_ != $hdl, @{ $KA_CACHE{$ka_key} };
  0         0  
630              
631 0         0 $hdl->destroy;
632              
633             @ka
634             ? $KA_CACHE{$ka_key} = \@ka
635 0 0       0 : delete $KA_CACHE{$ka_key};
636 0         0 };
637              
638             # on error etc., destroy
639 0         0 $hdl->on_error ($destroy);
640 0         0 $hdl->on_eof ($destroy);
641 0         0 $hdl->on_read ($destroy);
642 0         0 $hdl->timeout ($PERSISTENT_TIMEOUT);
643              
644 0         0 push @$kaa, $hdl;
645 0         0 shift @$kaa while @$kaa > $MAX_PER_HOST;
646             }
647              
648             #############################################################################
649             # utilities
650              
651             # continue to parse $_ for headers and place them into the arg
652             sub _parse_hdr() {
653 0     0   0 my %hdr;
654              
655             # things seen, not parsed:
656             # p3pP="NON CUR OTPi OUR NOR UNI"
657              
658 0         0 $hdr{lc $1} .= ",$2"
659             while /\G
660             ([^:\000-\037]*):
661             [\011\040]*
662             ((?: [^\012]+ | \012[\011\040] )*)
663             \012
664             /gxc;
665              
666 0 0       0 /\G$/
667             or return;
668              
669             # remove the "," prefix we added to all headers above
670             substr $_, 0, 1, ""
671 0         0 for values %hdr;
672              
673 0         0 \%hdr
674             }
675              
676             #############################################################################
677             # http_get
678              
679             our $qr_nlnl = qr{(?
680              
681             our $TLS_CTX_LOW = { cache => 1, sslv2 => 1 };
682             our $TLS_CTX_HIGH = { cache => 1, verify => 1, verify_peername => "https" };
683              
684             # maybe it should just become a normal object :/
685              
686             sub _destroy_state(\%) {
687 8     8   9 my ($state) = @_;
688              
689 8 50       18 $state->{handle}->destroy if $state->{handle};
690 8         42 %$state = ();
691             }
692              
693             sub _error(\%$$) {
694 8     8   11 my ($state, $cb, $hdr) = @_;
695              
696 8         11 &_destroy_state ($state);
697              
698 8         44 $cb->(undef, $hdr);
699             ()
700 8         397 }
701              
702             our %IDEMPOTENT = (
703             DELETE => 1,
704             GET => 1,
705             HEAD => 1,
706             OPTIONS => 1,
707             PUT => 1,
708             TRACE => 1,
709              
710             ACL => 1,
711             "BASELINE-CONTROL" => 1,
712             BIND => 1,
713             CHECKIN => 1,
714             CHECKOUT => 1,
715             COPY => 1,
716             LABEL => 1,
717             LINK => 1,
718             MERGE => 1,
719             MKACTIVITY => 1,
720             MKCALENDAR => 1,
721             MKCOL => 1,
722             MKREDIRECTREF => 1,
723             MKWORKSPACE => 1,
724             MOVE => 1,
725             ORDERPATCH => 1,
726             PROPFIND => 1,
727             PROPPATCH => 1,
728             REBIND => 1,
729             REPORT => 1,
730             SEARCH => 1,
731             UNBIND => 1,
732             UNCHECKOUT => 1,
733             UNLINK => 1,
734             UNLOCK => 1,
735             UPDATE => 1,
736             UPDATEREDIRECTREF => 1,
737             "VERSION-CONTROL" => 1,
738             );
739              
740             sub http_request($$@) {
741 8     8 1 9 my $cb = pop;
742 8         15 my ($method, $url, %arg) = @_;
743              
744 8         7 my %hdr;
745              
746 8 50 33     43 $arg{tls_ctx} = $TLS_CTX_LOW if $arg{tls_ctx} eq "low" || !exists $arg{tls_ctx};
747 8 50       20 $arg{tls_ctx} = $TLS_CTX_HIGH if $arg{tls_ctx} eq "high";
748              
749 8         10 $method = uc $method;
750              
751 8 50       17 if (my $hdr = $arg{headers}) {
752 0         0 while (my ($k, $v) = each %$hdr) {
753 0         0 $hdr{lc $k} = $v;
754             }
755             }
756              
757             # pseudo headers for all subsequent responses
758 8         11 my @pseudo = (URL => $url);
759 8 50       13 push @pseudo, Redirect => delete $arg{Redirect} if exists $arg{Redirect};
760              
761 8 50       12 my $recurse = exists $arg{recurse} ? delete $arg{recurse} : $MAX_RECURSE;
762              
763 8 50       13 return $cb->(undef, { @pseudo, Status => 599, Reason => "Too many redirections" })
764             if $recurse < 0;
765              
766 8 50       10 my $proxy = exists $arg{proxy} ? $arg{proxy} : $PROXY;
767 8   33     13 my $timeout = $arg{timeout} || $TIMEOUT;
768              
769 8         42 my ($uscheme, $uauthority, $upath, $query, undef) = # ignore fragment
770             $url =~ m|^([^:]+):(?://([^/?#]*))?([^?#]*)(?:(\?[^#]*))?(?:#(.*))?$|;
771              
772 8         11 $uscheme = lc $uscheme;
773              
774 8 0       11 my $uport = $uscheme eq "http" ? 80
    50          
775             : $uscheme eq "https" ? 443
776             : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" });
777              
778 8 50       42 $uauthority =~ /^(?: .*\@ )? ([^\@]+?) (?: : (\d+) )?$/x
779             or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" });
780              
781 8         14 my $uhost = lc $1;
782 8 100       33 $uport = $2 if defined $2;
783              
784             $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost"
785 8 100       26 unless exists $hdr{host};
    50          
786              
787 8         20 $uhost =~ s/^\[(.*)\]$/$1/;
788 8 50       13 $upath .= $query if length $query;
789              
790 8         15 $upath =~ s%^/?%/%;
791              
792             # cookie processing
793 8 50       13 if (my $jar = $arg{cookie_jar}) {
794 0         0 my $cookies = cookie_jar_extract $jar, $uscheme, $uhost, $upath;
795              
796 0 0       0 $hdr{cookie} = join "; ", @$cookies
797             if @$cookies;
798             }
799              
800 8         6 my ($rhost, $rport, $rscheme, $rpath); # request host, port, path
801              
802 8 50       9 if ($proxy) {
803 0         0 ($rpath, $rhost, $rport, $rscheme) = ($url, @$proxy);
804              
805 0 0       0 $rscheme = "http" unless defined $rscheme;
806              
807             # don't support https requests over https-proxy transport,
808             # can't be done with tls as spec'ed, unless you double-encrypt.
809 0 0 0     0 $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https";
810              
811 0         0 $rhost = lc $rhost;
812 0         0 $rscheme = lc $rscheme;
813             } else {
814 8         12 ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath);
815             }
816              
817             # leave out fragment and query string, just a heuristic
818 8 50       18 $hdr{referer} = "$uscheme://$uauthority$upath" unless exists $hdr{referer};
819 8 50       14 $hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"};
820              
821             $hdr{"content-length"} = length $arg{body}
822 8 50 33     29 if length $arg{body} || $method ne "GET";
823              
824 8         7 my $idempotent = $IDEMPOTENT{$method};
825              
826             # default value for keepalive is true iff the request is for an idempotent method
827 8 50       13 my $persistent = exists $arg{persistent} ? !!$arg{persistent} : $idempotent;
828 8 50       12 my $keepalive = exists $arg{keepalive} ? !!$arg{keepalive} : !$proxy;
829 8         4 my $was_persistent; # true if this is actually a recycled connection
830              
831             # the key to use in the keepalive cache
832 8         15 my $ka_key = "$uscheme\x00$uhost\x00$uport\x00$arg{sessionid}";
833              
834 8 50       16 $hdr{connection} = ($persistent ? $keepalive ? "keep-alive, " : "" : "close, ") . "Te"; #1.1
    50          
835 8 50       13 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1
836              
837 8         12 my %state = (connect_guard => 1);
838              
839 8         6 my $ae_error = 595; # connecting
840              
841             # handle actual, non-tunneled, request
842             my $handle_actual_request = sub {
843 0     0   0 $ae_error = 596; # request phase
844              
845 0         0 my $hdl = $state{handle};
846              
847 0 0 0     0 $hdl->starttls ("connect") if $uscheme eq "https" && !exists $hdl->{tls};
848              
849             # send request
850             $hdl->push_write (
851             "$method $rpath HTTP/1.1\015\012"
852             . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
853             . "\015\012"
854             . $arg{body}
855 0         0 );
856              
857             # return if error occurred during push_write()
858 0 0       0 return unless %state;
859              
860             # reduce memory usage, save a kitten, also re-use it for the response headers.
861 0         0 %hdr = ();
862              
863             # status line and headers
864             $state{read_response} = sub {
865 0 0       0 return unless %state;
866              
867 0         0 for ("$_[1]") {
868 0         0 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
869              
870 0 0       0 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
871             or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Invalid server response" };
872              
873             # 100 Continue handling
874             # should not happen as we don't send expect: 100-continue,
875             # but we handle it just in case.
876             # since we send the request body regardless, if we get an error
877             # we are out of-sync, which we currently do NOT handle correctly.
878             return $state{handle}->push_read (line => $qr_nlnl, $state{read_response})
879 0 0       0 if $2 eq 100;
880              
881 0         0 push @pseudo,
882             HTTPVersion => $1,
883             Status => $2,
884             Reason => $3,
885             ;
886              
887 0 0       0 my $hdr = _parse_hdr
888             or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Garbled response headers" };
889              
890 0         0 %hdr = (%$hdr, @pseudo);
891             }
892              
893             # redirect handling
894             # relative uri handling forced by microsoft and other shitheads.
895             # we give our best and fall back to URI if available.
896 0 0       0 if (exists $hdr{location}) {
897 0         0 my $loc = $hdr{location};
898              
899 0 0       0 if ($loc =~ m%^//%) { # //
    0          
    0          
900 0         0 $loc = "$uscheme:$loc";
901              
902             } elsif ($loc eq "") {
903 0         0 $loc = $url;
904              
905             } elsif ($loc !~ /^(?: $ | [^:\/?\#]+ : )/x) { # anything "simple"
906 0         0 $loc =~ s/^\.\/+//;
907              
908 0 0       0 if ($loc !~ m%^[.?#]%) {
    0          
909 0         0 my $prefix = "$uscheme://$uhost:$uport";
910              
911 0 0       0 unless ($loc =~ s/^\///) {
912 0         0 $prefix .= $upath;
913 0         0 $prefix =~ s/\/[^\/]*$//;
914             }
915              
916 0         0 $loc = "$prefix/$loc";
917              
918 0         0 } elsif (eval { require URI }) { # uri
919 0         0 $loc = URI->new_abs ($loc, $url)->as_string;
920              
921             } else {
922 0         0 return _error %state, $cb, { @pseudo, Status => 599, Reason => "Cannot parse Location (URI module missing)" };
923             #$hdr{Status} = 599;
924             #$hdr{Reason} = "Unparsable Redirect (URI module missing)";
925             #$recurse = 0;
926             }
927             }
928              
929 0         0 $hdr{location} = $loc;
930             }
931              
932 0         0 my $redirect;
933              
934 0 0       0 if ($recurse) {
935 0         0 my $status = $hdr{Status};
936              
937             # industry standard is to redirect POST as GET for
938             # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
939             # also, the UA should ask the user for 301 and 307 and POST,
940             # industry standard seems to be to simply follow.
941             # we go with the industry standard. 308 is defined
942             # by rfc7538
943 0 0 0     0 if ($status == 301 or $status == 302 or $status == 303) {
    0 0        
      0        
944 0         0 $redirect = 1;
945             # HTTP/1.1 is unclear on how to mutate the method
946 0 0       0 unless ($method eq "HEAD") {
947 0         0 $method = "GET";
948 0         0 delete $arg{body};
949             }
950             } elsif ($status == 307 or $status == 308) {
951 0         0 $redirect = 1;
952             }
953             }
954              
955             my $finish = sub { # ($data, $err_status, $err_reason[, $persistent])
956 0 0       0 if ($state{handle}) {
957             # handle keepalive
958 0 0 0     0 if (
    0 0        
959             $persistent
960             && $_[3]
961             && ($hdr{HTTPVersion} < 1.1
962             ? $hdr{connection} =~ /\bkeep-?alive\b/i
963             : $hdr{connection} !~ /\bclose\b/i)
964             ) {
965 0         0 ka_store $ka_key, delete $state{handle};
966             } else {
967             # no keepalive, destroy the handle
968 0         0 $state{handle}->destroy;
969             }
970             }
971              
972 0         0 %state = ();
973              
974 0 0       0 if (defined $_[1]) {
975 0         0 $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
  0         0  
976 0         0 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
  0         0  
977             }
978              
979             # set-cookie processing
980 0 0       0 if ($arg{cookie_jar}) {
981 0         0 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost, $hdr{date};
982             }
983              
984 0 0 0     0 if ($redirect && exists $hdr{location}) {
985             # we ignore any errors, as it is very common to receive
986             # Content-Length != 0 but no actual body
987             # we also access %hdr, as $_[1] might be an erro
988             $state{recurse} =
989             http_request (
990             $method => $hdr{location},
991             %arg,
992             recurse => $recurse - 1,
993             Redirect => [$_[0], \%hdr],
994             sub {
995 0         0 %state = ();
996 0         0 &$cb
997             },
998 0         0 );
999             } else {
1000 0         0 $cb->($_[0], \%hdr);
1001             }
1002 0         0 };
1003              
1004 0         0 $ae_error = 597; # body phase
1005              
1006 0         0 my $chunked = $hdr{"transfer-encoding"} =~ /\bchunked\b/i; # not quite correct...
1007              
1008 0 0       0 my $len = $chunked ? undef : $hdr{"content-length"};
1009              
1010             # body handling, many different code paths
1011             # - no body expected
1012             # - want_body_handle
1013             # - te chunked
1014             # - 2x length known (with or without on_body)
1015             # - 2x length not known (with or without on_body)
1016 0 0 0     0 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
    0 0        
    0 0        
    0 0        
    0 0        
      0        
1017 0         0 $finish->(undef, 598 => "Request cancelled by on_header");
1018             } elsif (
1019             $hdr{Status} =~ /^(?:1..|204|205|304)$/
1020             or $method eq "HEAD"
1021             or (defined $len && $len == 0) # == 0, not !, because "0 " is true
1022             ) {
1023             # no body
1024 0         0 $finish->("", undef, undef, 1);
1025              
1026             } elsif (!$redirect && $arg{want_body_handle}) {
1027 0         0 $_[0]->on_eof (undef);
1028 0         0 $_[0]->on_error (undef);
1029 0         0 $_[0]->on_read (undef);
1030              
1031 0         0 $finish->(delete $state{handle});
1032              
1033             } elsif ($chunked) {
1034 0         0 my $cl = 0;
1035 0         0 my $body = "";
1036 0   0     0 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
1037              
1038             $state{read_chunk} = sub {
1039 0 0       0 $_[1] =~ /^([0-9a-fA-F]+)/
1040             or return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
1041              
1042 0         0 my $len = hex $1;
1043              
1044 0 0       0 if ($len) {
1045 0         0 $cl += $len;
1046              
1047             $_[0]->push_read (chunk => $len, sub {
1048 0 0       0 $on_body->($_[1], \%hdr)
1049             or return $finish->(undef, 598 => "Request cancelled by on_body");
1050              
1051             $_[0]->push_read (line => sub {
1052 0 0       0 length $_[1]
1053             and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
1054 0         0 $_[0]->push_read (line => $state{read_chunk});
1055 0         0 });
1056 0         0 });
1057             } else {
1058 0   0     0 $hdr{"content-length"} ||= $cl;
1059              
1060             $_[0]->push_read (line => $qr_nlnl, sub {
1061 0 0       0 if (length $_[1]) {
1062 0         0 for ("$_[1]") {
1063 0         0 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
1064              
1065 0 0       0 my $hdr = _parse_hdr
1066             or return $finish->(undef, $ae_error => "Garbled response trailers");
1067              
1068 0         0 %hdr = (%hdr, %$hdr);
1069             }
1070             }
1071              
1072 0         0 $finish->($body, undef, undef, 1);
1073 0         0 });
1074             }
1075 0         0 };
1076              
1077 0         0 $_[0]->push_read (line => $state{read_chunk});
1078              
1079             } elsif ($arg{on_body}) {
1080 0 0       0 if (defined $len) {
1081             $_[0]->on_read (sub {
1082 0         0 $len -= length $_[0]{rbuf};
1083              
1084 0 0       0 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
1085             or return $finish->(undef, 598 => "Request cancelled by on_body");
1086              
1087 0 0       0 $len > 0
1088             or $finish->("", undef, undef, 1);
1089 0         0 });
1090             } else {
1091             $_[0]->on_eof (sub {
1092 0         0 $finish->("");
1093 0         0 });
1094             $_[0]->on_read (sub {
1095 0 0       0 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
1096             or $finish->(undef, 598 => "Request cancelled by on_body");
1097 0         0 });
1098             }
1099             } else {
1100 0         0 $_[0]->on_eof (undef);
1101              
1102 0 0       0 if (defined $len) {
1103             $_[0]->on_read (sub {
1104             $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
1105 0 0       0 if $len <= length $_[0]{rbuf};
1106 0         0 });
1107             } else {
1108             $_[0]->on_error (sub {
1109             ($! == Errno::EPIPE || !$!)
1110             ? $finish->(delete $_[0]{rbuf})
1111 0 0 0     0 : $finish->(undef, $ae_error => $_[2]);
1112 0         0 });
1113 0         0 $_[0]->on_read (sub { });
1114             }
1115             }
1116 0         0 };
1117              
1118             # if keepalive is enabled, then the server closing the connection
1119             # before a response can happen legally - we retry on idempotent methods.
1120 0 0 0     0 if ($was_persistent && $idempotent) {
1121 0         0 my $old_eof = $hdl->{on_eof};
1122             $hdl->{on_eof} = sub {
1123 0         0 _destroy_state %state;
1124              
1125 0         0 %state = ();
1126             $state{recurse} =
1127             http_request (
1128             $method => $url,
1129             %arg,
1130             recurse => $recurse - 1,
1131             persistent => 0,
1132             sub {
1133 0         0 %state = ();
1134 0         0 &$cb
1135             }
1136 0         0 );
1137 0         0 };
1138             $hdl->on_read (sub {
1139 0 0       0 return unless %state;
1140              
1141             # as soon as we receive something, a connection close
1142             # once more becomes a hard error
1143 0         0 $hdl->{on_eof} = $old_eof;
1144 0         0 $hdl->push_read (line => $qr_nlnl, $state{read_response});
1145 0         0 });
1146             } else {
1147 0         0 $hdl->push_read (line => $qr_nlnl, $state{read_response});
1148             }
1149 8         28 };
1150              
1151             my $prepare_handle = sub {
1152 0     0   0 my ($hdl) = $state{handle};
1153              
1154             $hdl->on_error (sub {
1155 0         0 _error %state, $cb, { @pseudo, Status => $ae_error, Reason => $_[2] };
1156 0         0 });
1157             $hdl->on_eof (sub {
1158 0         0 _error %state, $cb, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" };
1159 0         0 });
1160 0         0 $hdl->timeout_reset;
1161 0         0 $hdl->timeout ($timeout);
1162 8         17 };
1163              
1164             # connected to proxy (or origin server)
1165             my $connect_cb = sub {
1166 8 50   8   12934 my $fh = shift
1167             or return _error %state, $cb, { @pseudo, Status => $ae_error, Reason => "$!" };
1168              
1169 0 0       0 return unless delete $state{connect_guard};
1170              
1171             # get handle
1172             $state{handle} = new AnyEvent::Handle
1173 0         0 %{ $arg{handle_params} },
1174             fh => $fh,
1175             peername => $uhost,
1176             tls_ctx => $arg{tls_ctx},
1177 0         0 ;
1178              
1179 0         0 $prepare_handle->();
1180              
1181             #$state{handle}->starttls ("connect") if $rscheme eq "https";
1182              
1183             # now handle proxy-CONNECT method
1184 0 0 0     0 if ($proxy && $uscheme eq "https") {
1185             # oh dear, we have to wrap it into a connect request
1186              
1187             my $auth = exists $hdr{"proxy-authorization"}
1188 0 0       0 ? "proxy-authorization: " . (delete $hdr{"proxy-authorization"}) . "\015\012"
1189             : "";
1190              
1191             # maybe re-use $uauthority with patched port?
1192 0         0 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012$auth\015\012");
1193             $state{handle}->push_read (line => $qr_nlnl, sub {
1194 0 0       0 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
1195             or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" };
1196              
1197 0 0       0 if ($2 == 200) {
1198 0         0 $rpath = $upath;
1199 0         0 $handle_actual_request->();
1200             } else {
1201 0         0 _error %state, $cb, { @pseudo, Status => $2, Reason => $3 };
1202             }
1203 0         0 });
1204             } else {
1205 0 0       0 delete $hdr{"proxy-authorization"} unless $proxy;
1206              
1207 0         0 $handle_actual_request->();
1208             }
1209 8         18 };
1210              
1211             _get_slot $uhost, sub {
1212 8     8   10 $state{slot_guard} = shift;
1213              
1214 8 50       17 return unless $state{connect_guard};
1215              
1216             # try to use an existing keepalive connection, but only if we, ourselves, plan
1217             # on a keepalive request (in theory, this should be a separate config option).
1218 8 50 33     20 if ($persistent && $KA_CACHE{$ka_key}) {
1219 0         0 $was_persistent = 1;
1220              
1221 0         0 $state{handle} = ka_fetch $ka_key;
1222             $state{handle}->destroyed
1223 0 0       0 and die "AnyEvent::HTTP: unexpectedly got a destructed handle (1), please report.";#d#
1224 0         0 $prepare_handle->();
1225             $state{handle}->destroyed
1226 0 0       0 and die "AnyEvent::HTTP: unexpectedly got a destructed handle (2), please report.";#d#
1227 0         0 $handle_actual_request->();
1228              
1229             } else {
1230             my $tcp_connect = $arg{tcp_connect}
1231 8   33     14 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
1232              
1233 8   50     55 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout });
1234             }
1235 8         22 };
1236              
1237 0     0   0 defined wantarray && AnyEvent::Util::guard { _destroy_state %state }
1238 8 50       33 }
1239              
1240             sub http_get($@) {
1241 8     8 1 1004 unshift @_, "GET";
1242 8         11 &http_request
1243             }
1244              
1245             sub http_head($@) {
1246 0     0 1 0 unshift @_, "HEAD";
1247 0         0 &http_request
1248             }
1249              
1250             sub http_post($$@) {
1251 0     0 1 0 my $url = shift;
1252 0         0 unshift @_, "POST", $url, "body";
1253 0         0 &http_request
1254             }
1255              
1256             =back
1257              
1258             =head2 DNS CACHING
1259              
1260             AnyEvent::HTTP uses the AnyEvent::Socket::tcp_connect function for
1261             the actual connection, which in turn uses AnyEvent::DNS to resolve
1262             hostnames. The latter is a simple stub resolver and does no caching
1263             on its own. If you want DNS caching, you currently have to provide
1264             your own default resolver (by storing a suitable resolver object in
1265             C<$AnyEvent::DNS::RESOLVER>) or your own C callback.
1266              
1267             =head2 GLOBAL FUNCTIONS AND VARIABLES
1268              
1269             =over 4
1270              
1271             =item AnyEvent::HTTP::set_proxy "proxy-url"
1272              
1273             Sets the default proxy server to use. The proxy-url must begin with a
1274             string of the form C, croaks otherwise.
1275              
1276             To clear an already-set proxy, use C.
1277              
1278             When AnyEvent::HTTP is loaded for the first time it will query the
1279             default proxy from the operating system, currently by looking at
1280             C<$ENV{http_proxy>}.
1281              
1282             =item AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end]
1283              
1284             Remove all cookies from the cookie jar that have been expired. If
1285             C<$session_end> is given and true, then additionally remove all session
1286             cookies.
1287              
1288             You should call this function (with a true C<$session_end>) before you
1289             save cookies to disk, and you should call this function after loading them
1290             again. If you have a long-running program you can additionally call this
1291             function from time to time.
1292              
1293             A cookie jar is initially an empty hash-reference that is managed by this
1294             module. Its format is subject to change, but currently it is as follows:
1295              
1296             The key C has to contain C<1>, otherwise the hash gets
1297             emptied. All other keys are hostnames or IP addresses pointing to
1298             hash-references. The key for these inner hash references is the
1299             server path for which this cookie is meant, and the values are again
1300             hash-references. Each key of those hash-references is a cookie name, and
1301             the value, you guessed it, is another hash-reference, this time with the
1302             key-value pairs from the cookie, except for C and C,
1303             which have been replaced by a C<_expires> key that contains the cookie
1304             expiry timestamp. Session cookies are indicated by not having an
1305             C<_expires> key.
1306              
1307             Here is an example of a cookie jar with a single cookie, so you have a
1308             chance of understanding the above paragraph:
1309              
1310             {
1311             version => 1,
1312             "10.0.0.1" => {
1313             "/" => {
1314             "mythweb_id" => {
1315             _expires => 1293917923,
1316             value => "ooRung9dThee3ooyXooM1Ohm",
1317             },
1318             },
1319             },
1320             }
1321              
1322             =item $date = AnyEvent::HTTP::format_date $timestamp
1323              
1324             Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP
1325             Date (RFC 2616).
1326              
1327             =item $timestamp = AnyEvent::HTTP::parse_date $date
1328              
1329             Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) or a
1330             bunch of minor variations of those, and returns the corresponding POSIX
1331             timestamp, or C if the date cannot be parsed.
1332              
1333             =item $AnyEvent::HTTP::MAX_RECURSE
1334              
1335             The default value for the C request parameter (default: C<10>).
1336              
1337             =item $AnyEvent::HTTP::TIMEOUT
1338              
1339             The default timeout for connection operations (default: C<300>).
1340              
1341             =item $AnyEvent::HTTP::USERAGENT
1342              
1343             The default value for the C header (the default is
1344             C).
1345              
1346             =item $AnyEvent::HTTP::MAX_PER_HOST
1347              
1348             The maximum number of concurrent connections to the same host (identified
1349             by the hostname). If the limit is exceeded, then additional requests
1350             are queued until previous connections are closed. Both persistent and
1351             non-persistent connections are counted in this limit.
1352              
1353             The default value for this is C<4>, and it is highly advisable to not
1354             increase it much.
1355              
1356             For comparison: the RFC's recommend 4 non-persistent or 2 persistent
1357             connections, older browsers used 2, newer ones (such as firefox 3)
1358             typically use 6, and Opera uses 8 because like, they have the fastest
1359             browser and give a shit for everybody else on the planet.
1360              
1361             =item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
1362              
1363             The time after which idle persistent connections get closed by
1364             AnyEvent::HTTP (default: C<3>).
1365              
1366             =item $AnyEvent::HTTP::ACTIVE
1367              
1368             The number of active connections. This is not the number of currently
1369             running requests, but the number of currently open and non-idle TCP
1370             connections. This number can be useful for load-leveling.
1371              
1372             =back
1373              
1374             =cut
1375              
1376             our @month = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
1377             our @weekday = qw(Sun Mon Tue Wed Thu Fri Sat);
1378              
1379             sub format_date($) {
1380 0     0 1 0 my ($time) = @_;
1381              
1382             # RFC 822/1123 format
1383 0         0 my ($S, $M, $H, $mday, $mon, $year, $wday, $yday, undef) = gmtime $time;
1384              
1385 0         0 sprintf "%s, %02d %s %04d %02d:%02d:%02d GMT",
1386             $weekday[$wday], $mday, $month[$mon], $year + 1900,
1387             $H, $M, $S;
1388             }
1389              
1390             sub parse_date($) {
1391 0     0 1 0 my ($date) = @_;
1392              
1393 0         0 my ($d, $m, $y, $H, $M, $S);
1394              
1395 0 0       0 if ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
    0          
    0          
1396             # RFC 822/1123, required by RFC 2616 (with " ")
1397             # cookie dates (with "-")
1398              
1399 0         0 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6);
1400              
1401             } elsif ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
1402             # RFC 850
1403 0 0       0 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6);
1404              
1405             } elsif ($date =~ /^[A-Z][a-z][a-z]+ ([A-Z][a-z][a-z]) ([0-9 ]?[0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) ([0-9][0-9][0-9][0-9])$/) {
1406             # ISO C's asctime
1407 0         0 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5);
1408             }
1409             # other formats fail in the loop below
1410              
1411 0         0 for (0..11) {
1412 0 0       0 if ($m eq $month[$_]) {
1413 0         0 require Time::Local;
1414 0         0 return eval { Time::Local::timegm ($S, $M, $H, $d, $_, $y) };
  0         0  
1415             }
1416             }
1417              
1418             undef
1419 0         0 }
1420              
1421             sub set_proxy($) {
1422 3 50   3 1 10 if (length $_[0]) {
1423 0 0       0 $_[0] =~ m%^(http):// ([^:/]+) (?: : (\d*) )?%ix
1424             or Carp::croak "$_[0]: invalid proxy URL";
1425 0   0     0 $PROXY = [$2, $3 || 3128, $1]
1426             } else {
1427 3         5 undef $PROXY;
1428             }
1429             }
1430              
1431             # initialise proxy from environment
1432             eval {
1433             set_proxy $ENV{http_proxy};
1434             };
1435              
1436             =head2 SHOWCASE
1437              
1438             This section contains some more elaborate "real-world" examples or code
1439             snippets.
1440              
1441             =head2 HTTP/1.1 FILE DOWNLOAD
1442              
1443             Downloading files with HTTP can be quite tricky, especially when something
1444             goes wrong and you want to resume.
1445              
1446             Here is a function that initiates and resumes a download. It uses the
1447             last modified time to check for file content changes, and works with many
1448             HTTP/1.0 servers as well, and usually falls back to a complete re-download
1449             on older servers.
1450              
1451             It calls the completion callback with either C, which means a
1452             nonretryable error occurred, C<0> when the download was partial and should
1453             be retried, and C<1> if it was successful.
1454              
1455             use AnyEvent::HTTP;
1456              
1457             sub download($$$) {
1458             my ($url, $file, $cb) = @_;
1459              
1460             open my $fh, "+<", $file
1461             or die "$file: $!";
1462              
1463             my %hdr;
1464             my $ofs = 0;
1465              
1466             if (stat $fh and -s _) {
1467             $ofs = -s _;
1468             warn "-s is ", $ofs;
1469             $hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9];
1470             $hdr{"range"} = "bytes=$ofs-";
1471             }
1472              
1473             http_get $url,
1474             headers => \%hdr,
1475             on_header => sub {
1476             my ($hdr) = @_;
1477              
1478             if ($hdr->{Status} == 200 && $ofs) {
1479             # resume failed
1480             truncate $fh, $ofs = 0;
1481             }
1482              
1483             sysseek $fh, $ofs, 0;
1484              
1485             1
1486             },
1487             on_body => sub {
1488             my ($data, $hdr) = @_;
1489              
1490             if ($hdr->{Status} =~ /^2/) {
1491             length $data == syswrite $fh, $data
1492             or return; # abort on write errors
1493             }
1494              
1495             1
1496             },
1497             sub {
1498             my (undef, $hdr) = @_;
1499              
1500             my $status = $hdr->{Status};
1501              
1502             if (my $time = AnyEvent::HTTP::parse_date $hdr->{"last-modified"}) {
1503             utime $time, $time, $fh;
1504             }
1505              
1506             if ($status == 200 || $status == 206 || $status == 416) {
1507             # download ok || resume ok || file already fully downloaded
1508             $cb->(1, $hdr);
1509              
1510             } elsif ($status == 412) {
1511             # file has changed while resuming, delete and retry
1512             unlink $file;
1513             $cb->(0, $hdr);
1514              
1515             } elsif ($status == 500 or $status == 503 or $status =~ /^59/) {
1516             # retry later
1517             $cb->(0, $hdr);
1518              
1519             } else {
1520             $cb->(undef, $hdr);
1521             }
1522             }
1523             ;
1524             }
1525              
1526             download "http://server/somelargefile", "/tmp/somelargefile", sub {
1527             if ($_[0]) {
1528             print "OK!\n";
1529             } elsif (defined $_[0]) {
1530             print "please retry later\n";
1531             } else {
1532             print "ERROR\n";
1533             }
1534             };
1535              
1536             =head3 SOCKS PROXIES
1537              
1538             Socks proxies are not directly supported by AnyEvent::HTTP. You can
1539             compile your perl to support socks, or use an external program such as
1540             F (dante) or F to make your program use a socks proxy
1541             transparently.
1542              
1543             Alternatively, for AnyEvent::HTTP only, you can use your own
1544             C function that does the proxy handshake - here is an example
1545             that works with socks4a proxies:
1546              
1547             use Errno;
1548             use AnyEvent::Util;
1549             use AnyEvent::Socket;
1550             use AnyEvent::Handle;
1551              
1552             # host, port and username of/for your socks4a proxy
1553             my $socks_host = "10.0.0.23";
1554             my $socks_port = 9050;
1555             my $socks_user = "";
1556              
1557             sub socks4a_connect {
1558             my ($host, $port, $connect_cb, $prepare_cb) = @_;
1559              
1560             my $hdl = new AnyEvent::Handle
1561             connect => [$socks_host, $socks_port],
1562             on_prepare => sub { $prepare_cb->($_[0]{fh}) },
1563             on_error => sub { $connect_cb->() },
1564             ;
1565              
1566             $hdl->push_write (pack "CCnNZ*Z*", 4, 1, $port, 1, $socks_user, $host);
1567              
1568             $hdl->push_read (chunk => 8, sub {
1569             my ($hdl, $chunk) = @_;
1570             my ($status, $port, $ipn) = unpack "xCna4", $chunk;
1571              
1572             if ($status == 0x5a) {
1573             $connect_cb->($hdl->{fh}, (format_address $ipn) . ":$port");
1574             } else {
1575             $! = Errno::ENXIO; $connect_cb->();
1576             }
1577             });
1578              
1579             $hdl
1580             }
1581              
1582             Use C instead of C when doing Cs,
1583             possibly after switching off other proxy types:
1584              
1585             AnyEvent::HTTP::set_proxy undef; # usually you do not want other proxies
1586              
1587             http_get 'http://www.google.com', tcp_connect => \&socks4a_connect, sub {
1588             my ($data, $headers) = @_;
1589             ...
1590             };
1591              
1592             =head1 SEE ALSO
1593              
1594             L.
1595              
1596             =head1 AUTHOR
1597              
1598             Marc Lehmann
1599             http://home.schmorp.de/
1600              
1601             With many thanks to Дмитрий Шалашов, who provided countless
1602             testcases and bugreports.
1603              
1604             =cut
1605              
1606             1
1607