File Coverage

blib/lib/AnyEvent/HTTP.pm
Criterion Covered Total %
statement 94 364 25.8
branch 35 244 14.3
condition 6 97 6.1
subroutine 16 30 53.3
pod 8 12 66.6
total 159 747 21.2


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client
4              
5             =head1 SYNOPSIS
6              
7             use AnyEvent::HTTP;
8              
9             http_get "http://www.nethype.de/", sub { print $_[1] };
10              
11             # ... do something else here
12              
13             =head1 DESCRIPTION
14              
15             This module is an L user, you need to make sure that you use and
16             run a supported event loop.
17              
18             This module implements a simple, stateless and non-blocking HTTP
19             client. It supports GET, POST and other request methods, cookies and more,
20             all on a very low level. It can follow redirects, supports proxies, and
21             automatically limits the number of connections to the values specified in
22             the RFC.
23              
24             It should generally be a "good client" that is enough for most HTTP
25             tasks. Simple tasks should be simple, but complex tasks should still be
26             possible as the user retains control over request and response headers.
27              
28             The caller is responsible for authentication management, cookies (if
29             the simplistic implementation in this module doesn't suffice), referer
30             and other high-level protocol details for which this module offers only
31             limited support.
32              
33             =head2 METHODS
34              
35             =over 4
36              
37             =cut
38              
39             package AnyEvent::HTTP;
40              
41 3     3   3374 use common::sense;
  3         41  
  3         15  
42              
43 3     3   689 use Errno ();
  3         1566  
  3         67  
44              
45 3     3   1069 use AnyEvent 5.0 ();
  3         5604  
  3         66  
46 3     3   566 use AnyEvent::Util ();
  3         12348  
  3         52  
47 3     3   2205 use AnyEvent::Handle ();
  3         23644  
  3         87  
48              
49 3     3   25 use base Exporter::;
  3         6  
  3         24556  
50              
51             our $VERSION = 2.24;
52              
53             our @EXPORT = qw(http_get http_post http_head http_request);
54              
55             our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
56             our $MAX_RECURSE = 10;
57             our $PERSISTENT_TIMEOUT = 3;
58             our $TIMEOUT = 300;
59             our $MAX_PER_HOST = 4; # changing this is evil
60              
61             our $PROXY;
62             our $ACTIVE = 0;
63              
64             my %KA_CACHE; # indexed by uhost currently, points to [$handle...] array
65             my %CO_SLOT; # number of open connections, and wait queue, per host
66              
67             =item http_get $url, key => value..., $cb->($data, $headers)
68              
69             Executes an HTTP-GET request. See the http_request function for details on
70             additional parameters and the return value.
71              
72             =item http_head $url, key => value..., $cb->($data, $headers)
73              
74             Executes an HTTP-HEAD request. See the http_request function for details
75             on additional parameters and the return value.
76              
77             =item http_post $url, $body, key => value..., $cb->($data, $headers)
78              
79             Executes an HTTP-POST request with a request body of C<$body>. See the
80             http_request function for details on additional parameters and the return
81             value.
82              
83             =item http_request $method => $url, key => value..., $cb->($data, $headers)
84              
85             Executes a HTTP request of type C<$method> (e.g. C, C). The URL
86             must be an absolute http or https URL.
87              
88             When called in void context, nothing is returned. In other contexts,
89             C returns a "cancellation guard" - you have to keep the
90             object at least alive until the callback get called. If the object gets
91             destroyed before the callback is called, the request will be cancelled.
92              
93             The callback will be called with the response body data as first argument
94             (or C if an error occurred), and a hash-ref with response headers
95             (and trailers) as second argument.
96              
97             All the headers in that hash are lowercased. In addition to the response
98             headers, the "pseudo-headers" (uppercase to avoid clashing with possible
99             response headers) C, C and C contain the
100             three parts of the HTTP Status-Line of the same name. If an error occurs
101             during the body phase of a request, then the original C and
102             C values from the header are available as C and
103             C.
104              
105             The pseudo-header C contains the actual URL (which can differ from
106             the requested URL when following redirects - for example, you might get
107             an error that your URL scheme is not supported even though your URL is a
108             valid http URL because it redirected to an ftp URL, in which case you can
109             look at the URL pseudo header).
110              
111             The pseudo-header C only exists when the request was a result
112             of an internal redirect. In that case it is an array reference with
113             the C<($data, $headers)> from the redirect response. Note that this
114             response could in turn be the result of a redirect itself, and C<<
115             $headers->{Redirect}[1]{Redirect} >> will then contain the original
116             response, and so on.
117              
118             If the server sends a header multiple times, then their contents will be
119             joined together with a comma (C<,>), as per the HTTP spec.
120              
121             If an internal error occurs, such as not being able to resolve a hostname,
122             then C<$data> will be C, C<< $headers->{Status} >> will be
123             C<590>-C<599> and the C pseudo-header will contain an error
124             message. Currently the following status codes are used:
125              
126             =over 4
127              
128             =item 595 - errors during connection establishment, proxy handshake.
129              
130             =item 596 - errors during TLS negotiation, request sending and header processing.
131              
132             =item 597 - errors during body receiving or processing.
133              
134             =item 598 - user aborted request via C or C.
135              
136             =item 599 - other, usually nonretryable, errors (garbled URL etc.).
137              
138             =back
139              
140             A typical callback might look like this:
141              
142             sub {
143             my ($body, $hdr) = @_;
144              
145             if ($hdr->{Status} =~ /^2/) {
146             ... everything should be ok
147             } else {
148             print "error, $hdr->{Status} $hdr->{Reason}\n";
149             }
150             }
151              
152             Additional parameters are key-value pairs, and are fully optional. They
153             include:
154              
155             =over 4
156              
157             =item recurse => $count (default: $MAX_RECURSE)
158              
159             Whether to recurse requests or not, e.g. on redirects, authentication and
160             other retries and so on, and how often to do so.
161              
162             Only redirects to http and https URLs are supported. While most common
163             redirection forms are handled entirely within this module, some require
164             the use of the optional L module. If it is required but missing, then
165             the request will fail with an error.
166              
167             =item headers => hashref
168              
169             The request headers to use. Currently, C may provide its own
170             C, C, C and C headers and
171             will provide defaults at least for C, C and C
172             (this can be suppressed by using C for these headers in which case
173             they won't be sent at all).
174              
175             You really should provide your own C header value that is
176             appropriate for your program - I wouldn't be surprised if the default
177             AnyEvent string gets blocked by webservers sooner or later.
178              
179             Also, make sure that your headers names and values do not contain any
180             embedded newlines.
181              
182             =item timeout => $seconds
183              
184             The time-out to use for various stages - each connect attempt will reset
185             the timeout, as will read or write activity, i.e. this is not an overall
186             timeout.
187              
188             Default timeout is 5 minutes.
189              
190             =item proxy => [$host, $port[, $scheme]] or undef
191              
192             Use the given http proxy for all requests, or no proxy if C is
193             used.
194              
195             C<$scheme> must be either missing or must be C for HTTP.
196              
197             If not specified, then the default proxy is used (see
198             C).
199              
200             Currently, if your proxy requires authorization, you have to specify an
201             appropriate "Proxy-Authorization" header in every request.
202              
203             =item body => $string
204              
205             The request body, usually empty. Will be sent as-is (future versions of
206             this module might offer more options).
207              
208             =item cookie_jar => $hash_ref
209              
210             Passing this parameter enables (simplified) cookie-processing, loosely
211             based on the original netscape specification.
212              
213             The C<$hash_ref> must be an (initially empty) hash reference which
214             will get updated automatically. It is possible to save the cookie jar
215             to persistent storage with something like JSON or Storable - see the
216             C function if you wish to remove
217             expired or session-only cookies, and also for documentation on the format
218             of the cookie jar.
219              
220             Note that this cookie implementation is not meant to be complete. If
221             you want complete cookie management you have to do that on your
222             own. C is meant as a quick fix to get most cookie-using sites
223             working. Cookies are a privacy disaster, do not use them unless required
224             to.
225              
226             When cookie processing is enabled, the C and C
227             headers will be set and handled by this module, otherwise they will be
228             left untouched.
229              
230             =item tls_ctx => $scheme | $tls_ctx
231              
232             Specifies the AnyEvent::TLS context to be used for https connections. This
233             parameter follows the same rules as the C parameter to
234             L, but additionally, the two strings C or
235             C can be specified, which give you a predefined low-security (no
236             verification, highest compatibility) and high-security (CA and common-name
237             verification) TLS context.
238              
239             The default for this option is C, which could be interpreted as "give
240             me the page, no matter what".
241              
242             See also the C parameter.
243              
244             =item session => $string
245              
246             The module might reuse connections to the same host internally. Sometimes
247             (e.g. when using TLS), you do not want to reuse connections from other
248             sessions. This can be achieved by setting this parameter to some unique
249             ID (such as the address of an object storing your state data, or the TLS
250             context) - only connections using the same unique ID will be reused.
251              
252             =item on_prepare => $callback->($fh)
253              
254             In rare cases you need to "tune" the socket before it is used to
255             connect (for example, to bind it on a given IP address). This parameter
256             overrides the prepare callback passed to C
257             and behaves exactly the same way (e.g. it has to provide a
258             timeout). See the description for the C<$prepare_cb> argument of
259             C for details.
260              
261             =item tcp_connect => $callback->($host, $service, $connect_cb, $prepare_cb)
262              
263             In even rarer cases you want total control over how AnyEvent::HTTP
264             establishes connections. Normally it uses L
265             to do this, but you can provide your own C function -
266             obviously, it has to follow the same calling conventions, except that it
267             may always return a connection guard object.
268              
269             There are probably lots of weird uses for this function, starting from
270             tracing the hosts C actually tries to connect, to (inexact
271             but fast) host => IP address caching or even socks protocol support.
272              
273             =item on_header => $callback->($headers)
274              
275             When specified, this callback will be called with the header hash as soon
276             as headers have been successfully received from the remote server (not on
277             locally-generated errors).
278              
279             It has to return either true (in which case AnyEvent::HTTP will continue),
280             or false, in which case AnyEvent::HTTP will cancel the download (and call
281             the finish callback with an error code of C<598>).
282              
283             This callback is useful, among other things, to quickly reject unwanted
284             content, which, if it is supposed to be rare, can be faster than first
285             doing a C request.
286              
287             The downside is that cancelling the request makes it impossible to re-use
288             the connection. Also, the C callback will not receive any
289             trailer (headers sent after the response body).
290              
291             Example: cancel the request unless the content-type is "text/html".
292              
293             on_header => sub {
294             $_[0]{"content-type"} =~ /^text\/html\s*(?:;|$)/
295             },
296              
297             =item on_body => $callback->($partial_body, $headers)
298              
299             When specified, all body data will be passed to this callback instead of
300             to the completion callback. The completion callback will get the empty
301             string instead of the body data.
302              
303             It has to return either true (in which case AnyEvent::HTTP will continue),
304             or false, in which case AnyEvent::HTTP will cancel the download (and call
305             the completion callback with an error code of C<598>).
306              
307             The downside to cancelling the request is that it makes it impossible to
308             re-use the connection.
309              
310             This callback is useful when the data is too large to be held in memory
311             (so the callback writes it to a file) or when only some information should
312             be extracted, or when the body should be processed incrementally.
313              
314             It is usually preferred over doing your own body handling via
315             C, but in case of streaming APIs, where HTTP is
316             only used to create a connection, C is the better
317             alternative, as it allows you to install your own event handler, reducing
318             resource usage.
319              
320             =item want_body_handle => $enable
321              
322             When enabled (default is disabled), the behaviour of AnyEvent::HTTP
323             changes considerably: after parsing the headers, and instead of
324             downloading the body (if any), the completion callback will be
325             called. Instead of the C<$body> argument containing the body data, the
326             callback will receive the L object associated with the
327             connection. In error cases, C will be passed. When there is no body
328             (e.g. status C<304>), the empty string will be passed.
329              
330             The handle object might or might not be in TLS mode, might be connected
331             to a proxy, be a persistent connection, use chunked transfer encoding
332             etc., and configured in unspecified ways. The user is responsible for this
333             handle (it will not be used by this module anymore).
334              
335             This is useful with some push-type services, where, after the initial
336             headers, an interactive protocol is used (typical example would be the
337             push-style twitter API which starts a JSON/XML stream).
338              
339             If you think you need this, first have a look at C, to see if
340             that doesn't solve your problem in a better way.
341              
342             =item persistent => $boolean
343              
344             Try to create/reuse a persistent connection. When this flag is set
345             (default: true for idempotent requests, false for all others), then
346             C tries to re-use an existing (previously-created)
347             persistent connection to the host and, failing that, tries to create a new
348             one.
349              
350             Requests failing in certain ways will be automatically retried once, which
351             is dangerous for non-idempotent requests, which is why it defaults to off
352             for them. The reason for this is because the bozos who designed HTTP/1.1
353             made it impossible to distinguish between a fatal error and a normal
354             connection timeout, so you never know whether there was a problem with
355             your request or not.
356              
357             When reusing an existent connection, many parameters (such as TLS context)
358             will be ignored. See the C parameter for a workaround.
359              
360             =item keepalive => $boolean
361              
362             Only used when C is also true. This parameter decides whether
363             C tries to handshake a HTTP/1.0-style keep-alive connection
364             (as opposed to only a HTTP/1.1 persistent connection).
365              
366             The default is true, except when using a proxy, in which case it defaults
367             to false, as HTTP/1.0 proxies cannot support this in a meaningful way.
368              
369             =item handle_params => { key => value ... }
370              
371             The key-value pairs in this hash will be passed to any L
372             constructor that is called - not all requests will create a handle, and
373             sometimes more than one is created, so this parameter is only good for
374             setting hints.
375              
376             Example: set the maximum read size to 4096, to potentially conserve memory
377             at the cost of speed.
378              
379             handle_params => {
380             max_read_size => 4096,
381             },
382              
383             =back
384              
385             Example: do a simple HTTP GET request for http://www.nethype.de/ and print
386             the response body.
387              
388             http_request GET => "http://www.nethype.de/", sub {
389             my ($body, $hdr) = @_;
390             print "$body\n";
391             };
392              
393             Example: do a HTTP HEAD request on https://www.google.com/, use a
394             timeout of 30 seconds.
395              
396             http_request
397             HEAD => "https://www.google.com",
398             headers => { "user-agent" => "MySearchClient 1.0" },
399             timeout => 30,
400             sub {
401             my ($body, $hdr) = @_;
402             use Data::Dumper;
403             print Dumper $hdr;
404             }
405             ;
406              
407             Example: do another simple HTTP GET request, but immediately try to
408             cancel it.
409              
410             my $request = http_request GET => "http://www.nethype.de/", sub {
411             my ($body, $hdr) = @_;
412             print "$body\n";
413             };
414              
415             undef $request;
416              
417             =cut
418              
419             #############################################################################
420             # wait queue/slots
421              
422             sub _slot_schedule;
423             sub _slot_schedule($) {
424 16     16   30 my $host = shift;
425              
426 16         54 while ($CO_SLOT{$host}[0] < $MAX_PER_HOST) {
427 24 100       5640 if (my $cb = shift @{ $CO_SLOT{$host}[1] }) {
  24         83  
428             # somebody wants that slot
429 8         19 ++$CO_SLOT{$host}[0];
430 8         15 ++$ACTIVE;
431              
432             $cb->(AnyEvent::Util::guard {
433 8     8   64 --$ACTIVE;
434 8         20 --$CO_SLOT{$host}[0];
435 8         24 _slot_schedule $host;
436 8         60 });
437             } else {
438             # nobody wants the slot, maybe we can forget about it
439 16 100       66 delete $CO_SLOT{$host} unless $CO_SLOT{$host}[0];
440 16         67 last;
441             }
442             }
443             }
444              
445             # wait for a free slot on host, call callback
446             sub _get_slot($$) {
447 8     8   14 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
  8         30  
448              
449 8         23 _slot_schedule $_[0];
450             }
451              
452             #############################################################################
453             # cookie handling
454              
455             # expire cookies
456             sub cookie_jar_expire($;$) {
457 0     0 1 0 my ($jar, $session_end) = @_;
458              
459 0 0       0 %$jar = () if $jar->{version} != 2;
460              
461 0         0 my $anow = AE::now;
462              
463 0         0 while (my ($chost, $paths) = each %$jar) {
464 0 0       0 next unless ref $paths;
465              
466 0         0 while (my ($cpath, $cookies) = each %$paths) {
467 0         0 while (my ($cookie, $kv) = each %$cookies) {
468 0 0       0 if (exists $kv->{_expires}) {
    0          
469             delete $cookies->{$cookie}
470 0 0       0 if $anow > $kv->{_expires};
471             } elsif ($session_end) {
472 0         0 delete $cookies->{$cookie};
473             }
474             }
475              
476 0 0       0 delete $paths->{$cpath}
477             unless %$cookies;
478             }
479              
480 0 0       0 delete $jar->{$chost}
481             unless %$paths;
482             }
483             }
484            
485             # extract cookies from jar
486             sub cookie_jar_extract($$$$) {
487 0     0 0 0 my ($jar, $scheme, $host, $path) = @_;
488              
489 0 0       0 %$jar = () if $jar->{version} != 2;
490              
491 0 0       0 $host = AnyEvent::Util::idn_to_ascii $host
492             if $host =~ /[^\x00-\x7f]/;
493              
494 0         0 my @cookies;
495              
496 0         0 while (my ($chost, $paths) = each %$jar) {
497 0 0       0 next unless ref $paths;
498              
499             # exact match or suffix including . match
500 0 0 0     0 $chost eq $host or ".$chost" eq substr $host, -1 - length $chost
501             or next;
502              
503 0         0 while (my ($cpath, $cookies) = each %$paths) {
504 0 0       0 next unless $cpath eq substr $path, 0, length $cpath;
505              
506 0         0 while (my ($cookie, $kv) = each %$cookies) {
507 0 0 0     0 next if $scheme ne "https" && exists $kv->{secure};
508              
509 0 0 0     0 if (exists $kv->{_expires} and AE::now > $kv->{_expires}) {
510 0         0 delete $cookies->{$cookie};
511 0         0 next;
512             }
513              
514 0         0 my $value = $kv->{value};
515              
516 0 0       0 if ($value =~ /[=;,[:space:]]/) {
517 0         0 $value =~ s/([\\"])/\\$1/g;
518 0         0 $value = "\"$value\"";
519             }
520              
521 0         0 push @cookies, "$cookie=$value";
522             }
523             }
524             }
525              
526             \@cookies
527 0         0 }
528            
529             # parse set_cookie header into jar
530             sub cookie_jar_set_cookie($$$$) {
531 0     0 0 0 my ($jar, $set_cookie, $host, $date) = @_;
532              
533 0 0       0 %$jar = () if $jar->{version} != 2;
534              
535 0         0 my $anow = int AE::now;
536 0         0 my $snow; # server-now
537              
538 0         0 for ($set_cookie) {
539             # parse NAME=VALUE
540 0         0 my @kv;
541              
542             # expires is not http-compliant in the original cookie-spec,
543             # we support the official date format and some extensions
544 0         0 while (
545             m{
546             \G\s*
547             (?:
548             expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+)
549             | ([^=;,[:space:]]+) (?: \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^;,[:space:]]*) ) )?
550             )
551             }gcxsi
552             ) {
553 0         0 my $name = $2;
554 0         0 my $value = $4;
555              
556 0 0       0 if (defined $1) {
    0          
557             # expires
558 0         0 $name = "expires";
559 0         0 $value = $1;
560             } elsif (defined $3) {
561             # quoted
562 0         0 $value = $3;
563 0         0 $value =~ s/\\(.)/$1/gs;
564             }
565              
566 0 0       0 push @kv, @kv ? lc $name : $name, $value;
567              
568 0 0       0 last unless /\G\s*;/gc;
569             }
570              
571 0 0       0 last unless @kv;
572              
573 0         0 my $name = shift @kv;
574 0         0 my %kv = (value => shift @kv, @kv);
575              
576 0 0       0 if (exists $kv{"max-age"}) {
    0          
577 0         0 $kv{_expires} = $anow + delete $kv{"max-age"};
578             } elsif (exists $kv{expires}) {
579 0   0     0 $snow ||= parse_date ($date) || $anow;
      0        
580 0         0 $kv{_expires} = $anow + (parse_date (delete $kv{expires}) - $snow);
581             } else {
582 0         0 delete $kv{_expires};
583             }
584              
585 0         0 my $cdom;
586 0   0     0 my $cpath = (delete $kv{path}) || "/";
587              
588 0 0       0 if (exists $kv{domain}) {
589 0         0 $cdom = $kv{domain};
590              
591 0         0 $cdom =~ s/^\.?/./; # make sure it starts with a "."
592              
593 0 0       0 next if $cdom =~ /\.$/;
594              
595             # this is not rfc-like and not netscape-like. go figure.
596 0         0 my $ndots = $cdom =~ y/.//;
597 0 0       0 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
    0          
598              
599 0         0 $cdom = substr $cdom, 1; # remove initial .
600             } else {
601 0         0 $cdom = $host;
602             }
603              
604             # store it
605 0         0 $jar->{version} = 2;
606 0         0 $jar->{lc $cdom}{$cpath}{$name} = \%kv;
607              
608 0 0       0 redo if /\G\s*,/gc;
609             }
610             }
611              
612             #############################################################################
613             # keepalive/persistent connection cache
614              
615             # fetch a connection from the keepalive cache
616             sub ka_fetch($) {
617 0     0 0 0 my $ka_key = shift;
618              
619 0         0 my $hdl = pop @{ $KA_CACHE{$ka_key} }; # currently we reuse the MOST RECENTLY USED connection
  0         0  
620             delete $KA_CACHE{$ka_key}
621 0 0       0 unless @{ $KA_CACHE{$ka_key} };
  0         0  
622              
623 0         0 $hdl
624             }
625              
626             sub ka_store($$) {
627 0     0 0 0 my ($ka_key, $hdl) = @_;
628              
629 0   0     0 my $kaa = $KA_CACHE{$ka_key} ||= [];
630              
631             my $destroy = sub {
632 0     0   0 my @ka = grep $_ != $hdl, @{ $KA_CACHE{$ka_key} };
  0         0  
633              
634 0         0 $hdl->destroy;
635              
636             @ka
637             ? $KA_CACHE{$ka_key} = \@ka
638 0 0       0 : delete $KA_CACHE{$ka_key};
639 0         0 };
640              
641             # on error etc., destroy
642 0         0 $hdl->on_error ($destroy);
643 0         0 $hdl->on_eof ($destroy);
644 0         0 $hdl->on_read ($destroy);
645 0         0 $hdl->timeout ($PERSISTENT_TIMEOUT);
646              
647 0         0 push @$kaa, $hdl;
648 0         0 shift @$kaa while @$kaa > $MAX_PER_HOST;
649             }
650              
651             #############################################################################
652             # utilities
653              
654             # continue to parse $_ for headers and place them into the arg
655             sub _parse_hdr() {
656 0     0   0 my %hdr;
657              
658             # things seen, not parsed:
659             # p3pP="NON CUR OTPi OUR NOR UNI"
660              
661 0         0 $hdr{lc $1} .= ",$2"
662             while /\G
663             ([^:\000-\037]*):
664             [\011\040]*
665             ((?: [^\012]+ | \012[\011\040] )*)
666             \012
667             /gxc;
668              
669 0 0       0 /\G$/
670             or return;
671              
672             # remove the "," prefix we added to all headers above
673             substr $_, 0, 1, ""
674 0         0 for values %hdr;
675              
676 0         0 \%hdr
677             }
678              
679             #############################################################################
680             # http_get
681              
682             our $qr_nlnl = qr{(?
683              
684             our $TLS_CTX_LOW = { cache => 1, sslv2 => 1 };
685             our $TLS_CTX_HIGH = { cache => 1, verify => 1, verify_peername => "https" };
686              
687             # maybe it should just become a normal object :/
688              
689             sub _destroy_state(\%) {
690 8     8   14 my ($state) = @_;
691              
692 8 50       25 $state->{handle}->destroy if $state->{handle};
693 8         74 %$state = ();
694             }
695              
696             sub _error(\%$$) {
697 8     8   25 my ($state, $cb, $hdr) = @_;
698              
699 8         26 &_destroy_state ($state);
700              
701 8         34 $cb->(undef, $hdr);
702             ()
703 8         594 }
704              
705             our %IDEMPOTENT = (
706             DELETE => 1,
707             GET => 1,
708             HEAD => 1,
709             OPTIONS => 1,
710             PUT => 1,
711             TRACE => 1,
712              
713             ACL => 1,
714             "BASELINE-CONTROL" => 1,
715             BIND => 1,
716             CHECKIN => 1,
717             CHECKOUT => 1,
718             COPY => 1,
719             LABEL => 1,
720             LINK => 1,
721             MERGE => 1,
722             MKACTIVITY => 1,
723             MKCALENDAR => 1,
724             MKCOL => 1,
725             MKREDIRECTREF => 1,
726             MKWORKSPACE => 1,
727             MOVE => 1,
728             ORDERPATCH => 1,
729             PROPFIND => 1,
730             PROPPATCH => 1,
731             REBIND => 1,
732             REPORT => 1,
733             SEARCH => 1,
734             UNBIND => 1,
735             UNCHECKOUT => 1,
736             UNLINK => 1,
737             UNLOCK => 1,
738             UPDATE => 1,
739             UPDATEREDIRECTREF => 1,
740             "VERSION-CONTROL" => 1,
741             );
742              
743             sub http_request($$@) {
744 8     8 1 17 my $cb = pop;
745 8         27 my ($method, $url, %arg) = @_;
746              
747 8         16 my %hdr;
748              
749 8 50 33     54 $arg{tls_ctx} = $TLS_CTX_LOW if $arg{tls_ctx} eq "low" || !exists $arg{tls_ctx};
750 8 50       28 $arg{tls_ctx} = $TLS_CTX_HIGH if $arg{tls_ctx} eq "high";
751              
752 8         19 $method = uc $method;
753              
754 8 50       24 if (my $hdr = $arg{headers}) {
755 0         0 while (my ($k, $v) = each %$hdr) {
756 0         0 $hdr{lc $k} = $v;
757             }
758             }
759              
760             # pseudo headers for all subsequent responses
761 8         21 my @pseudo = (URL => $url);
762 8 50       18 push @pseudo, Redirect => delete $arg{Redirect} if exists $arg{Redirect};
763              
764 8 50       21 my $recurse = exists $arg{recurse} ? delete $arg{recurse} : $MAX_RECURSE;
765              
766 8 50       18 return $cb->(undef, { @pseudo, Status => 599, Reason => "Too many redirections" })
767             if $recurse < 0;
768              
769 8 50       15 my $proxy = exists $arg{proxy} ? $arg{proxy} : $PROXY;
770 8   33     27 my $timeout = $arg{timeout} || $TIMEOUT;
771              
772 8         65 my ($uscheme, $uauthority, $upath, $query, undef) = # ignore fragment
773             $url =~ m|^([^:]+):(?://([^/?#]*))?([^?#]*)(?:(\?[^#]*))?(?:#(.*))?$|;
774              
775 8         23 $uscheme = lc $uscheme;
776              
777 8 0       20 my $uport = $uscheme eq "http" ? 80
    50          
778             : $uscheme eq "https" ? 443
779             : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" });
780              
781 8 50       58 $uauthority =~ /^(?: .*\@ )? ([^\@]+?) (?: : (\d+) )?$/x
782             or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" });
783              
784 8         23 my $uhost = lc $1;
785 8 100       28 $uport = $2 if defined $2;
786              
787             $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost"
788 8 100       39 unless exists $hdr{host};
    50          
789              
790 8         34 $uhost =~ s/^\[(.*)\]$/$1/;
791 8 50       20 $upath .= $query if length $query;
792              
793 8         26 $upath =~ s%^/?%/%;
794              
795             # cookie processing
796 8 50       22 if (my $jar = $arg{cookie_jar}) {
797 0         0 my $cookies = cookie_jar_extract $jar, $uscheme, $uhost, $upath;
798              
799 0 0       0 $hdr{cookie} = join "; ", @$cookies
800             if @$cookies;
801             }
802              
803 8         15 my ($rhost, $rport, $rscheme, $rpath); # request host, port, path
804              
805 8 50       17 if ($proxy) {
806 0         0 ($rpath, $rhost, $rport, $rscheme) = ($url, @$proxy);
807              
808 0 0       0 $rscheme = "http" unless defined $rscheme;
809              
810             # don't support https requests over https-proxy transport,
811             # can't be done with tls as spec'ed, unless you double-encrypt.
812 0 0 0     0 $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https";
813              
814 0         0 $rhost = lc $rhost;
815 0         0 $rscheme = lc $rscheme;
816             } else {
817 8         22 ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath);
818             }
819              
820             # leave out fragment and query string, just a heuristic
821 8 50       29 $hdr{referer} = "$uscheme://$uauthority$upath" unless exists $hdr{referer};
822 8 50       19 $hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"};
823              
824             $hdr{"content-length"} = length $arg{body}
825 8 50 33     37 if length $arg{body} || $method ne "GET";
826              
827 8         18 my $idempotent = $IDEMPOTENT{$method};
828              
829             # default value for keepalive is true iff the request is for an idempotent method
830 8 50       19 my $persistent = exists $arg{persistent} ? !!$arg{persistent} : $idempotent;
831 8 50       24 my $keepalive = exists $arg{keepalive} ? !!$arg{keepalive} : !$proxy;
832 8         10 my $was_persistent; # true if this is actually a recycled connection
833              
834             # the key to use in the keepalive cache
835 8         23 my $ka_key = "$uscheme\x00$uhost\x00$uport\x00$arg{sessionid}";
836              
837 8 50       27 $hdr{connection} = ($persistent ? $keepalive ? "keep-alive, " : "" : "close, ") . "Te"; #1.1
    50          
838 8 50       20 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1
839              
840 8         25 my %state = (connect_guard => 1);
841              
842 8         11 my $ae_error = 595; # connecting
843              
844             # handle actual, non-tunneled, request
845             my $handle_actual_request = sub {
846 0     0   0 $ae_error = 596; # request phase
847              
848 0         0 my $hdl = $state{handle};
849              
850 0 0 0     0 $hdl->starttls ("connect") if $uscheme eq "https" && !exists $hdl->{tls};
851              
852             # send request
853             $hdl->push_write (
854             "$method $rpath HTTP/1.1\015\012"
855             . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
856             . "\015\012"
857             . $arg{body}
858 0         0 );
859              
860             # return if error occurred during push_write()
861 0 0       0 return unless %state;
862              
863             # reduce memory usage, save a kitten, also re-use it for the response headers.
864 0         0 %hdr = ();
865              
866             # status line and headers
867             $state{read_response} = sub {
868 0 0       0 return unless %state;
869              
870 0         0 for ("$_[1]") {
871 0         0 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
872              
873 0 0       0 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
874             or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Invalid server response" };
875              
876             # 100 Continue handling
877             # should not happen as we don't send expect: 100-continue,
878             # but we handle it just in case.
879             # since we send the request body regardless, if we get an error
880             # we are out of-sync, which we currently do NOT handle correctly.
881             return $state{handle}->push_read (line => $qr_nlnl, $state{read_response})
882 0 0       0 if $2 eq 100;
883              
884 0         0 push @pseudo,
885             HTTPVersion => $1,
886             Status => $2,
887             Reason => $3,
888             ;
889              
890 0 0       0 my $hdr = _parse_hdr
891             or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Garbled response headers" };
892              
893 0         0 %hdr = (%$hdr, @pseudo);
894             }
895              
896             # redirect handling
897             # relative uri handling forced by microsoft and other shitheads.
898             # we give our best and fall back to URI if available.
899 0 0       0 if (exists $hdr{location}) {
900 0         0 my $loc = $hdr{location};
901              
902 0 0       0 if ($loc =~ m%^//%) { # //
    0          
    0          
903 0         0 $loc = "$uscheme:$loc";
904              
905             } elsif ($loc eq "") {
906 0         0 $loc = $url;
907              
908             } elsif ($loc !~ /^(?: $ | [^:\/?\#]+ : )/x) { # anything "simple"
909 0         0 $loc =~ s/^\.\/+//;
910              
911 0 0       0 if ($loc !~ m%^[.?#]%) {
    0          
912 0         0 my $prefix = "$uscheme://$uauthority";
913              
914 0 0       0 unless ($loc =~ s/^\///) {
915 0         0 $prefix .= $upath;
916 0         0 $prefix =~ s/\/[^\/]*$//;
917             }
918              
919 0         0 $loc = "$prefix/$loc";
920              
921 0         0 } elsif (eval { require URI }) { # uri
922 0         0 $loc = URI->new_abs ($loc, $url)->as_string;
923              
924             } else {
925 0         0 return _error %state, $cb, { @pseudo, Status => 599, Reason => "Cannot parse Location (URI module missing)" };
926             #$hdr{Status} = 599;
927             #$hdr{Reason} = "Unparsable Redirect (URI module missing)";
928             #$recurse = 0;
929             }
930             }
931              
932 0         0 $hdr{location} = $loc;
933             }
934              
935 0         0 my $redirect;
936              
937 0 0       0 if ($recurse) {
938 0         0 my $status = $hdr{Status};
939              
940             # industry standard is to redirect POST as GET for
941             # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
942             # also, the UA should ask the user for 301 and 307 and POST,
943             # industry standard seems to be to simply follow.
944             # we go with the industry standard. 308 is defined
945             # by rfc7538
946 0 0 0     0 if ($status == 301 or $status == 302 or $status == 303) {
    0 0        
      0        
947 0         0 $redirect = 1;
948             # HTTP/1.1 is unclear on how to mutate the method
949 0 0       0 unless ($method eq "HEAD") {
950 0         0 $method = "GET";
951 0         0 delete $arg{body};
952             }
953             } elsif ($status == 307 or $status == 308) {
954 0         0 $redirect = 1;
955             }
956             }
957              
958             my $finish = sub { # ($data, $err_status, $err_reason[, $persistent])
959 0 0       0 if ($state{handle}) {
960             # handle keepalive
961 0 0 0     0 if (
    0 0        
962             $persistent
963             && $_[3]
964             && ($hdr{HTTPVersion} < 1.1
965             ? $hdr{connection} =~ /\bkeep-?alive\b/i
966             : $hdr{connection} !~ /\bclose\b/i)
967             ) {
968 0         0 ka_store $ka_key, delete $state{handle};
969             } else {
970             # no keepalive, destroy the handle
971 0         0 $state{handle}->destroy;
972             }
973             }
974              
975 0         0 %state = ();
976              
977 0 0       0 if (defined $_[1]) {
978 0         0 $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
  0         0  
979 0         0 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
  0         0  
980             }
981              
982             # set-cookie processing
983 0 0       0 if ($arg{cookie_jar}) {
984 0         0 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost, $hdr{date};
985             }
986              
987 0 0 0     0 if ($redirect && exists $hdr{location}) {
988             # we ignore any errors, as it is very common to receive
989             # Content-Length != 0 but no actual body
990             # we also access %hdr, as $_[1] might be an erro
991             $state{recurse} =
992             http_request (
993             $method => $hdr{location},
994             %arg,
995             recurse => $recurse - 1,
996             Redirect => [$_[0], \%hdr],
997             sub {
998 0         0 %state = ();
999 0         0 &$cb
1000             },
1001 0         0 );
1002             } else {
1003 0         0 $cb->($_[0], \%hdr);
1004             }
1005 0         0 };
1006              
1007 0         0 $ae_error = 597; # body phase
1008              
1009 0         0 my $chunked = $hdr{"transfer-encoding"} =~ /\bchunked\b/i; # not quite correct...
1010              
1011 0 0       0 my $len = $chunked ? undef : $hdr{"content-length"};
1012              
1013             # body handling, many different code paths
1014             # - no body expected
1015             # - want_body_handle
1016             # - te chunked
1017             # - 2x length known (with or without on_body)
1018             # - 2x length not known (with or without on_body)
1019 0 0 0     0 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
    0 0        
    0 0        
    0 0        
    0 0        
      0        
      0        
1020 0         0 $finish->(undef, 598 => "Request cancelled by on_header");
1021             } elsif (
1022             $hdr{Status} =~ /^(?:1..|204|205|304)$/
1023             or $method eq "HEAD"
1024             or (defined $len && $len == 0) # == 0, not !, because "0 " is true
1025             ) {
1026             # no body
1027 0         0 $finish->("", undef, undef, 1);
1028              
1029             } elsif (!$redirect && $arg{want_body_handle}) {
1030 0         0 $_[0]->on_eof (undef);
1031 0         0 $_[0]->on_error (undef);
1032 0         0 $_[0]->on_read (undef);
1033              
1034 0         0 $finish->(delete $state{handle});
1035              
1036             } elsif ($chunked) {
1037 0         0 my $cl = 0;
1038 0         0 my $body = "";
1039 0   0     0 my $on_body = (!$redirect && $arg{on_body}) || sub { $body .= shift; 1 };
1040              
1041             $state{read_chunk} = sub {
1042 0 0       0 $_[1] =~ /^([0-9a-fA-F]+)/
1043             or return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
1044              
1045 0         0 my $len = hex $1;
1046              
1047 0 0       0 if ($len) {
1048 0         0 $cl += $len;
1049              
1050             $_[0]->push_read (chunk => $len, sub {
1051 0 0       0 $on_body->($_[1], \%hdr)
1052             or return $finish->(undef, 598 => "Request cancelled by on_body");
1053              
1054             $_[0]->push_read (line => sub {
1055 0 0       0 length $_[1]
1056             and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
1057 0         0 $_[0]->push_read (line => $state{read_chunk});
1058 0         0 });
1059 0         0 });
1060             } else {
1061 0   0     0 $hdr{"content-length"} ||= $cl;
1062              
1063             $_[0]->push_read (line => $qr_nlnl, sub {
1064 0 0       0 if (length $_[1]) {
1065 0         0 for ("$_[1]") {
1066 0         0 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
1067              
1068 0 0       0 my $hdr = _parse_hdr
1069             or return $finish->(undef, $ae_error => "Garbled response trailers");
1070              
1071 0         0 %hdr = (%hdr, %$hdr);
1072             }
1073             }
1074              
1075 0         0 $finish->($body, undef, undef, 1);
1076 0         0 });
1077             }
1078 0         0 };
1079              
1080 0         0 $_[0]->push_read (line => $state{read_chunk});
1081              
1082             } elsif (!$redirect && $arg{on_body}) {
1083 0 0       0 if (defined $len) {
1084             $_[0]->on_read (sub {
1085 0         0 $len -= length $_[0]{rbuf};
1086              
1087 0 0       0 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
1088             or return $finish->(undef, 598 => "Request cancelled by on_body");
1089              
1090 0 0       0 $len > 0
1091             or $finish->("", undef, undef, 1);
1092 0         0 });
1093             } else {
1094             $_[0]->on_eof (sub {
1095 0         0 $finish->("");
1096 0         0 });
1097             $_[0]->on_read (sub {
1098 0 0       0 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
1099             or $finish->(undef, 598 => "Request cancelled by on_body");
1100 0         0 });
1101             }
1102             } else {
1103 0         0 $_[0]->on_eof (undef);
1104              
1105 0 0       0 if (defined $len) {
1106             $_[0]->on_read (sub {
1107             $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
1108 0 0       0 if $len <= length $_[0]{rbuf};
1109 0         0 });
1110             } else {
1111             $_[0]->on_error (sub {
1112             ($! == Errno::EPIPE || !$!)
1113             ? $finish->(delete $_[0]{rbuf})
1114 0 0 0     0 : $finish->(undef, $ae_error => $_[2]);
1115 0         0 });
1116 0         0 $_[0]->on_read (sub { });
1117             }
1118             }
1119 0         0 };
1120              
1121             # if keepalive is enabled, then the server closing the connection
1122             # before a response can happen legally - we retry on idempotent methods.
1123 0 0 0     0 if ($was_persistent && $idempotent) {
1124 0         0 my $old_eof = $hdl->{on_eof};
1125             $hdl->{on_eof} = sub {
1126 0         0 _destroy_state %state;
1127              
1128 0         0 %state = ();
1129             $state{recurse} =
1130             http_request (
1131             $method => $url,
1132             %arg,
1133             recurse => $recurse - 1,
1134             persistent => 0,
1135             sub {
1136 0         0 %state = ();
1137 0         0 &$cb
1138             }
1139 0         0 );
1140 0         0 };
1141             $hdl->on_read (sub {
1142 0 0       0 return unless %state;
1143              
1144             # as soon as we receive something, a connection close
1145             # once more becomes a hard error
1146 0         0 $hdl->{on_eof} = $old_eof;
1147 0         0 $hdl->push_read (line => $qr_nlnl, $state{read_response});
1148 0         0 });
1149             } else {
1150 0         0 $hdl->push_read (line => $qr_nlnl, $state{read_response});
1151             }
1152 8         41 };
1153              
1154             my $prepare_handle = sub {
1155 0     0   0 my ($hdl) = $state{handle};
1156              
1157             $hdl->on_error (sub {
1158 0         0 _error %state, $cb, { @pseudo, Status => $ae_error, Reason => $_[2] };
1159 0         0 });
1160             $hdl->on_eof (sub {
1161 0         0 _error %state, $cb, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" };
1162 0         0 });
1163 0         0 $hdl->timeout_reset;
1164 0         0 $hdl->timeout ($timeout);
1165 8         27 };
1166              
1167             # connected to proxy (or origin server)
1168             my $connect_cb = sub {
1169 8 50   8   20233 my $fh = shift
1170             or return _error %state, $cb, { @pseudo, Status => $ae_error, Reason => "$!" };
1171              
1172 0 0       0 return unless delete $state{connect_guard};
1173              
1174             # get handle
1175             $state{handle} = new AnyEvent::Handle
1176 0         0 %{ $arg{handle_params} },
1177             fh => $fh,
1178             peername => $uhost,
1179             tls_ctx => $arg{tls_ctx},
1180 0         0 ;
1181              
1182 0         0 $prepare_handle->();
1183              
1184             #$state{handle}->starttls ("connect") if $rscheme eq "https";
1185              
1186             # now handle proxy-CONNECT method
1187 0 0 0     0 if ($proxy && $uscheme eq "https") {
1188             # oh dear, we have to wrap it into a connect request
1189              
1190             my $auth = exists $hdr{"proxy-authorization"}
1191 0 0       0 ? "proxy-authorization: " . (delete $hdr{"proxy-authorization"}) . "\015\012"
1192             : "";
1193              
1194             # maybe re-use $uauthority with patched port?
1195 0         0 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012$auth\015\012");
1196             $state{handle}->push_read (line => $qr_nlnl, sub {
1197 0 0       0 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
1198             or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" };
1199              
1200 0 0       0 if ($2 == 200) {
1201 0         0 $rpath = $upath;
1202 0         0 $handle_actual_request->();
1203             } else {
1204 0         0 _error %state, $cb, { @pseudo, Status => $2, Reason => $3 };
1205             }
1206 0         0 });
1207             } else {
1208 0 0       0 delete $hdr{"proxy-authorization"} unless $proxy;
1209              
1210 0         0 $handle_actual_request->();
1211             }
1212 8         26 };
1213              
1214             _get_slot $uhost, sub {
1215 8     8   16 $state{slot_guard} = shift;
1216              
1217 8 50       22 return unless $state{connect_guard};
1218              
1219             # try to use an existing keepalive connection, but only if we, ourselves, plan
1220             # on a keepalive request (in theory, this should be a separate config option).
1221 8 50 33     33 if ($persistent && $KA_CACHE{$ka_key}) {
1222 0         0 $was_persistent = 1;
1223              
1224 0         0 $state{handle} = ka_fetch $ka_key;
1225             # $state{handle}->destroyed
1226             # and die "AnyEvent::HTTP: unexpectedly got a destructed handle (1), please report.";#d#
1227 0         0 $prepare_handle->();
1228             # $state{handle}->destroyed
1229             # and die "AnyEvent::HTTP: unexpectedly got a destructed handle (2), please report.";#d#
1230 0         0 $handle_actual_request->();
1231              
1232             } else {
1233             my $tcp_connect = $arg{tcp_connect}
1234 8   33     22 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
1235              
1236 8   50     67 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout });
1237             }
1238 8         40 };
1239              
1240 0     0   0 defined wantarray && AnyEvent::Util::guard { _destroy_state %state }
1241 8 50       52 }
1242              
1243             sub http_get($@) {
1244 8     8 1 1744 unshift @_, "GET";
1245 8         21 &http_request
1246             }
1247              
1248             sub http_head($@) {
1249 0     0 1 0 unshift @_, "HEAD";
1250 0         0 &http_request
1251             }
1252              
1253             sub http_post($$@) {
1254 0     0 1 0 my $url = shift;
1255 0         0 unshift @_, "POST", $url, "body";
1256 0         0 &http_request
1257             }
1258              
1259             =back
1260              
1261             =head2 DNS CACHING
1262              
1263             AnyEvent::HTTP uses the AnyEvent::Socket::tcp_connect function for
1264             the actual connection, which in turn uses AnyEvent::DNS to resolve
1265             hostnames. The latter is a simple stub resolver and does no caching
1266             on its own. If you want DNS caching, you currently have to provide
1267             your own default resolver (by storing a suitable resolver object in
1268             C<$AnyEvent::DNS::RESOLVER>) or your own C callback.
1269              
1270             =head2 GLOBAL FUNCTIONS AND VARIABLES
1271              
1272             =over 4
1273              
1274             =item AnyEvent::HTTP::set_proxy "proxy-url"
1275              
1276             Sets the default proxy server to use. The proxy-url must begin with a
1277             string of the form C, croaks otherwise.
1278              
1279             To clear an already-set proxy, use C.
1280              
1281             When AnyEvent::HTTP is loaded for the first time it will query the
1282             default proxy from the operating system, currently by looking at
1283             C<$ENV{http_proxy>}.
1284              
1285             =item AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end]
1286              
1287             Remove all cookies from the cookie jar that have been expired. If
1288             C<$session_end> is given and true, then additionally remove all session
1289             cookies.
1290              
1291             You should call this function (with a true C<$session_end>) before you
1292             save cookies to disk, and you should call this function after loading them
1293             again. If you have a long-running program you can additionally call this
1294             function from time to time.
1295              
1296             A cookie jar is initially an empty hash-reference that is managed by this
1297             module. Its format is subject to change, but currently it is as follows:
1298              
1299             The key C has to contain C<1>, otherwise the hash gets
1300             emptied. All other keys are hostnames or IP addresses pointing to
1301             hash-references. The key for these inner hash references is the
1302             server path for which this cookie is meant, and the values are again
1303             hash-references. Each key of those hash-references is a cookie name, and
1304             the value, you guessed it, is another hash-reference, this time with the
1305             key-value pairs from the cookie, except for C and C,
1306             which have been replaced by a C<_expires> key that contains the cookie
1307             expiry timestamp. Session cookies are indicated by not having an
1308             C<_expires> key.
1309              
1310             Here is an example of a cookie jar with a single cookie, so you have a
1311             chance of understanding the above paragraph:
1312              
1313             {
1314             version => 1,
1315             "10.0.0.1" => {
1316             "/" => {
1317             "mythweb_id" => {
1318             _expires => 1293917923,
1319             value => "ooRung9dThee3ooyXooM1Ohm",
1320             },
1321             },
1322             },
1323             }
1324              
1325             =item $date = AnyEvent::HTTP::format_date $timestamp
1326              
1327             Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP
1328             Date (RFC 2616).
1329              
1330             =item $timestamp = AnyEvent::HTTP::parse_date $date
1331              
1332             Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) or a
1333             bunch of minor variations of those, and returns the corresponding POSIX
1334             timestamp, or C if the date cannot be parsed.
1335              
1336             =item $AnyEvent::HTTP::MAX_RECURSE
1337              
1338             The default value for the C request parameter (default: C<10>).
1339              
1340             =item $AnyEvent::HTTP::TIMEOUT
1341              
1342             The default timeout for connection operations (default: C<300>).
1343              
1344             =item $AnyEvent::HTTP::USERAGENT
1345              
1346             The default value for the C header (the default is
1347             C).
1348              
1349             =item $AnyEvent::HTTP::MAX_PER_HOST
1350              
1351             The maximum number of concurrent connections to the same host (identified
1352             by the hostname). If the limit is exceeded, then additional requests
1353             are queued until previous connections are closed. Both persistent and
1354             non-persistent connections are counted in this limit.
1355              
1356             The default value for this is C<4>, and it is highly advisable to not
1357             increase it much.
1358              
1359             For comparison: the RFC's recommend 4 non-persistent or 2 persistent
1360             connections, older browsers used 2, newer ones (such as firefox 3)
1361             typically use 6, and Opera uses 8 because like, they have the fastest
1362             browser and give a shit for everybody else on the planet.
1363              
1364             =item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
1365              
1366             The time after which idle persistent connections get closed by
1367             AnyEvent::HTTP (default: C<3>).
1368              
1369             =item $AnyEvent::HTTP::ACTIVE
1370              
1371             The number of active connections. This is not the number of currently
1372             running requests, but the number of currently open and non-idle TCP
1373             connections. This number can be useful for load-leveling.
1374              
1375             =back
1376              
1377             =cut
1378              
1379             our @month = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
1380             our @weekday = qw(Sun Mon Tue Wed Thu Fri Sat);
1381              
1382             sub format_date($) {
1383 0     0 1 0 my ($time) = @_;
1384              
1385             # RFC 822/1123 format
1386 0         0 my ($S, $M, $H, $mday, $mon, $year, $wday, $yday, undef) = gmtime $time;
1387              
1388 0         0 sprintf "%s, %02d %s %04d %02d:%02d:%02d GMT",
1389             $weekday[$wday], $mday, $month[$mon], $year + 1900,
1390             $H, $M, $S;
1391             }
1392              
1393             sub parse_date($) {
1394 0     0 1 0 my ($date) = @_;
1395              
1396 0         0 my ($d, $m, $y, $H, $M, $S);
1397              
1398 0 0       0 if ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
    0          
    0          
1399             # RFC 822/1123, required by RFC 2616 (with " ")
1400             # cookie dates (with "-")
1401              
1402 0         0 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6);
1403              
1404             } elsif ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
1405             # RFC 850
1406 0 0       0 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6);
1407              
1408             } elsif ($date =~ /^[A-Z][a-z][a-z]+ ([A-Z][a-z][a-z]) ([0-9 ]?[0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) ([0-9][0-9][0-9][0-9])$/) {
1409             # ISO C's asctime
1410 0         0 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5);
1411             }
1412             # other formats fail in the loop below
1413              
1414 0         0 for (0..11) {
1415 0 0       0 if ($m eq $month[$_]) {
1416 0         0 require Time::Local;
1417 0         0 return eval { Time::Local::timegm ($S, $M, $H, $d, $_, $y) };
  0         0  
1418             }
1419             }
1420              
1421             undef
1422 0         0 }
1423              
1424             sub set_proxy($) {
1425 3 50   3 1 19 if (length $_[0]) {
1426 0 0       0 $_[0] =~ m%^(http):// ([^:/]+) (?: : (\d*) )?%ix
1427             or Carp::croak "$_[0]: invalid proxy URL";
1428 0   0     0 $PROXY = [$2, $3 || 3128, $1]
1429             } else {
1430 3         48 undef $PROXY;
1431             }
1432             }
1433              
1434             # initialise proxy from environment
1435             eval {
1436             set_proxy $ENV{http_proxy};
1437             };
1438              
1439             =head2 SHOWCASE
1440              
1441             This section contains some more elaborate "real-world" examples or code
1442             snippets.
1443              
1444             =head2 HTTP/1.1 FILE DOWNLOAD
1445              
1446             Downloading files with HTTP can be quite tricky, especially when something
1447             goes wrong and you want to resume.
1448              
1449             Here is a function that initiates and resumes a download. It uses the
1450             last modified time to check for file content changes, and works with many
1451             HTTP/1.0 servers as well, and usually falls back to a complete re-download
1452             on older servers.
1453              
1454             It calls the completion callback with either C, which means a
1455             nonretryable error occurred, C<0> when the download was partial and should
1456             be retried, and C<1> if it was successful.
1457              
1458             use AnyEvent::HTTP;
1459              
1460             sub download($$$) {
1461             my ($url, $file, $cb) = @_;
1462              
1463             open my $fh, "+<", $file
1464             or die "$file: $!";
1465              
1466             my %hdr;
1467             my $ofs = 0;
1468              
1469             if (stat $fh and -s _) {
1470             $ofs = -s _;
1471             warn "-s is ", $ofs;
1472             $hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9];
1473             $hdr{"range"} = "bytes=$ofs-";
1474             }
1475              
1476             http_get $url,
1477             headers => \%hdr,
1478             on_header => sub {
1479             my ($hdr) = @_;
1480              
1481             if ($hdr->{Status} == 200 && $ofs) {
1482             # resume failed
1483             truncate $fh, $ofs = 0;
1484             }
1485              
1486             sysseek $fh, $ofs, 0;
1487              
1488             1
1489             },
1490             on_body => sub {
1491             my ($data, $hdr) = @_;
1492              
1493             if ($hdr->{Status} =~ /^2/) {
1494             length $data == syswrite $fh, $data
1495             or return; # abort on write errors
1496             }
1497              
1498             1
1499             },
1500             sub {
1501             my (undef, $hdr) = @_;
1502              
1503             my $status = $hdr->{Status};
1504              
1505             if (my $time = AnyEvent::HTTP::parse_date $hdr->{"last-modified"}) {
1506             utime $time, $time, $fh;
1507             }
1508              
1509             if ($status == 200 || $status == 206 || $status == 416) {
1510             # download ok || resume ok || file already fully downloaded
1511             $cb->(1, $hdr);
1512              
1513             } elsif ($status == 412) {
1514             # file has changed while resuming, delete and retry
1515             unlink $file;
1516             $cb->(0, $hdr);
1517              
1518             } elsif ($status == 500 or $status == 503 or $status =~ /^59/) {
1519             # retry later
1520             $cb->(0, $hdr);
1521              
1522             } else {
1523             $cb->(undef, $hdr);
1524             }
1525             }
1526             ;
1527             }
1528              
1529             download "http://server/somelargefile", "/tmp/somelargefile", sub {
1530             if ($_[0]) {
1531             print "OK!\n";
1532             } elsif (defined $_[0]) {
1533             print "please retry later\n";
1534             } else {
1535             print "ERROR\n";
1536             }
1537             };
1538              
1539             =head3 SOCKS PROXIES
1540              
1541             Socks proxies are not directly supported by AnyEvent::HTTP. You can
1542             compile your perl to support socks, or use an external program such as
1543             F (dante) or F to make your program use a socks proxy
1544             transparently.
1545              
1546             Alternatively, for AnyEvent::HTTP only, you can use your own
1547             C function that does the proxy handshake - here is an example
1548             that works with socks4a proxies:
1549              
1550             use Errno;
1551             use AnyEvent::Util;
1552             use AnyEvent::Socket;
1553             use AnyEvent::Handle;
1554              
1555             # host, port and username of/for your socks4a proxy
1556             my $socks_host = "10.0.0.23";
1557             my $socks_port = 9050;
1558             my $socks_user = "";
1559              
1560             sub socks4a_connect {
1561             my ($host, $port, $connect_cb, $prepare_cb) = @_;
1562              
1563             my $hdl = new AnyEvent::Handle
1564             connect => [$socks_host, $socks_port],
1565             on_prepare => sub { $prepare_cb->($_[0]{fh}) },
1566             on_error => sub { $connect_cb->() },
1567             ;
1568              
1569             $hdl->push_write (pack "CCnNZ*Z*", 4, 1, $port, 1, $socks_user, $host);
1570              
1571             $hdl->push_read (chunk => 8, sub {
1572             my ($hdl, $chunk) = @_;
1573             my ($status, $port, $ipn) = unpack "xCna4", $chunk;
1574              
1575             if ($status == 0x5a) {
1576             $connect_cb->($hdl->{fh}, (format_address $ipn) . ":$port");
1577             } else {
1578             $! = Errno::ENXIO; $connect_cb->();
1579             }
1580             });
1581              
1582             $hdl
1583             }
1584              
1585             Use C instead of C when doing Cs,
1586             possibly after switching off other proxy types:
1587              
1588             AnyEvent::HTTP::set_proxy undef; # usually you do not want other proxies
1589              
1590             http_get 'http://www.google.com', tcp_connect => \&socks4a_connect, sub {
1591             my ($data, $headers) = @_;
1592             ...
1593             };
1594              
1595             =head1 SEE ALSO
1596              
1597             L.
1598              
1599             =head1 AUTHOR
1600              
1601             Marc Lehmann
1602             http://home.schmorp.de/
1603              
1604             With many thanks to Дмитрий Шалашов, who provided countless
1605             testcases and bugreports.
1606              
1607             =cut
1608              
1609             1
1610