File Coverage

blib/lib/HTTP/CookieJar.pm
Criterion Covered Total %
statement 159 161 98.7
branch 91 110 82.7
condition 28 41 68.2
subroutine 20 20 100.0
pod 7 7 100.0
total 305 339 89.9


line stmt bran cond sub pod time code
1 8     8   467278 use 5.008001;
  8         103  
2 8     8   36 use strict;
  8         13  
  8         153  
3 8     8   35 use warnings;
  8         18  
  8         385  
4              
5             package HTTP::CookieJar;
6             # ABSTRACT: A minimalist HTTP user agent cookie jar
7             our $VERSION = '0.013'; # TRIAL
8              
9 8     8   45 use Carp ();
  8         10  
  8         111  
10 8     8   3035 use HTTP::Date ();
  8         27725  
  8         15986  
11              
12             my $HAS_MPS = eval { require Mozilla::PublicSuffix; 1 };
13              
14             #pod =construct new
15             #pod
16             #pod my $jar = HTTP::CookieJar->new;
17             #pod
18             #pod Return a new, empty cookie jar
19             #pod
20             #pod =cut
21              
22             sub new {
23 31     31 1 96475 my ($class) = @_;
24 31         113 bless { store => {} }, $class;
25             }
26              
27             #pod =method add
28             #pod
29             #pod $jar->add(
30             #pod "http://www.example.com/", "lang=en-US; Path=/; Domain=example.com"
31             #pod );
32             #pod
33             #pod Given a request URL and a C header string, attempts to adds the
34             #pod cookie to the jar. If the cookie is expired, instead it deletes any matching
35             #pod cookie from the jar. A C attribute will be converted to an absolute
36             #pod C attribute.
37             #pod
38             #pod It will throw an exception if the request URL is missing or invalid. Returns true if
39             #pod successful cookie processing or undef/empty-list on failure.
40             #pod
41             #pod =cut
42              
43             sub add {
44 44     44 1 259 my ( $self, $request, $cookie ) = @_;
45 44 50 33     189 return unless defined $cookie and length $cookie;
46 44         67 my ( $scheme, $host, $port, $request_path ) = eval { _split_url($request) };
  44         84  
47 44 50       92 Carp::croak($@) if $@;
48              
49 44 100       84 return unless my $parse = _parse_cookie($cookie);
50 43         72 my $name = $parse->{name};
51              
52             # check and normalize domain
53 43 100       77 if ( exists $parse->{domain} ) {
54 16 100       47 _normalize_domain( $host, $parse ) or return;
55             }
56             else {
57 27         43 $parse->{domain} = $host;
58 27         50 $parse->{hostonly} = 1;
59             }
60 39         56 my $domain = $parse->{domain};
61              
62             # normalize path
63 39 100 66     135 if ( !exists $parse->{path} || substr( $parse->{path}, 0, 1 ) ne "/" ) {
64 22         44 $parse->{path} = _default_path($request_path);
65             }
66 39         59 my $path = $parse->{path};
67             # set timestamps and normalize expires
68 39         91 my $now = $parse->{creation_time} = $parse->{last_access_time} = time;
69 39 100       97 if ( exists $parse->{'max-age'} ) {
70             # "If delta-seconds is less than or equal to zero (0), let expiry-time
71             # be the earliest representable date and time."
72             $parse->{expires} = $parse->{'max-age'} <= 0
73             ? 0
74 6 100       21 : $now + $parse->{'max-age'};
75 6         9 delete $parse->{'max-age'};
76             }
77             # update creation time from old cookie, if exists
78 39 100       132 if ( my $old = $self->{store}{$domain}{$path}{$name} ) {
79 4         7 $parse->{creation_time} = $old->{creation_time};
80             }
81             # if cookie has expired, purge any old matching cookie, too
82 39 100 100     103 if ( defined $parse->{expires} && $parse->{expires} < $now ) {
83 4         9 delete $self->{store}{$domain}{$path}{$name};
84             }
85             else {
86 35         64 $self->{store}{$domain}{$path}{$name} = $parse;
87             }
88 39         114 return 1;
89             }
90              
91             #pod =method clear
92             #pod
93             #pod $jar->clear
94             #pod
95             #pod Empties the cookie jar.
96             #pod
97             #pod =cut
98              
99             sub clear {
100 5     5 1 9783 my ($self) = @_;
101 5         20 $self->{store} = {};
102 5         11 return 1;
103             }
104              
105             #pod =method cookies_for
106             #pod
107             #pod my @cookies = $jar->cookies_for("http://www.example.com/foo/bar");
108             #pod
109             #pod Given a request URL, returns a list of hash references representing cookies
110             #pod that should be sent. The hash references are copies -- changing values
111             #pod will not change the cookies in the jar.
112             #pod
113             #pod Cookies set C will only be returned if the request scheme is C.
114             #pod Expired cookies will not be returned.
115             #pod
116             #pod Keys of a cookie hash reference might include:
117             #pod
118             #pod =for :list
119             #pod * name -- the name of the cookie
120             #pod * value -- the value of the cookie
121             #pod * domain -- the domain name to which the cookie applies
122             #pod * path -- the path to which the cookie applies
123             #pod * expires -- if present, when the cookie expires in epoch seconds
124             #pod * secure -- if present, the cookie was set C
125             #pod * httponly -- if present, the cookie was set C
126             #pod * hostonly -- if present, the cookie may only be used with the domain as a host
127             #pod * creation_time -- epoch time when the cookie was first stored
128             #pod * last_access_time -- epoch time when the cookie was last accessed (i.e. "now")
129             #pod
130             #pod Keep in mind that C means it should only be used in requests and not
131             #pod made available via Javascript, etc. This is pretty meaningless for Perl user
132             #pod agents.
133             #pod
134             #pod Generally, user agents should use the C method instead.
135             #pod
136             #pod It will throw an exception if the request URL is missing or invalid.
137             #pod
138             #pod =cut
139              
140             sub cookies_for {
141 11     11 1 2892 my ( $self, $request ) = @_;
142 11         27 my @found = $self->_cookies_for($request);
143 11         29 return map { {%$_} } @found;
  17         92  
144             }
145              
146             # _cookies_for returns originals, not copies, which helps in testing
147             sub _cookies_for {
148 12     12   51 my ( $self, $request ) = @_;
149 12         22 my ( $scheme, $host, $port, $request_path ) = eval { _split_url($request) };
  12         25  
150 12 50       27 Carp::croak($@) if $@;
151              
152 12         18 my @found;
153 12         22 my $now = time;
154 12         33 for my $cookie ( $self->_all_cookies ) {
155 23 100 100     87 next if $cookie->{hostonly} && $host ne $cookie->{domain};
156 22 100 100     46 next if $cookie->{secure} && $scheme ne 'https';
157 21 50 66     44 next if defined( $cookie->{expires} ) && $cookie->{expires} < $now;
158 21 50       37 next unless _domain_match( $host, $cookie->{domain} );
159 21 100       39 next unless _path_match( $request_path, $cookie->{path} );
160 20         44 $cookie->{last_access_time} = time;
161 20         55 push @found, $cookie;
162             }
163             @found = sort {
164 12         34 length( $b->{path} ) <=> length( $a->{path} )
165             || $a->{creation_time} <=> $b->{creation_time}
166 15 50       89 } @found;
167 12         29 return @found;
168             }
169              
170             #pod =method cookie_header
171             #pod
172             #pod my $header = $jar->cookie_header("http://www.example.com/foo/bar");
173             #pod
174             #pod Given a request URL, returns a correctly-formatted string with all relevant
175             #pod cookies for the request. This string is ready to be used in a C header
176             #pod in an HTTP request. E.g.:
177             #pod
178             #pod SID=31d4d96e407aad42; lang=en-US
179             #pod
180             #pod It follows the same exclusion rules as C.
181             #pod
182             #pod If the request is invalid or no cookies apply, it will return an empty string.
183             #pod
184             #pod =cut
185              
186             sub cookie_header {
187 7     7 1 29 my ( $self, $req ) = @_;
188 7         15 return join( "; ", map { "$_->{name}=$_->{value}" } $self->cookies_for($req) );
  5         27  
189             }
190              
191             #pod =method dump_cookies
192             #pod
193             #pod my @list = $jar->dump_cookies;
194             #pod my @list = $jar->dump_cookies( { persistent => 1 } );
195             #pod
196             #pod Returns a list of raw cookies in string form. The strings resemble what
197             #pod would be received from C headers, but with additional internal
198             #pod fields. The list is only intended for use with C to allow
199             #pod cookie jar persistence.
200             #pod
201             #pod If a hash reference with a true C key is given as an argument,
202             #pod cookies without an C time (i.e. "session cookies") will be omitted.
203             #pod
204             #pod Here is a trivial example of saving a cookie jar file with L:
205             #pod
206             #pod path("jar.txt")->spew( join "\n", $jar->dump_cookies );
207             #pod
208             #pod =cut
209              
210             sub dump_cookies {
211 7     7 1 25 my ( $self, $args ) = @_;
212 7         11 my @list;
213 7         15 for my $c ( $self->_all_cookies ) {
214 8         19 my @parts = "$c->{name}=$c->{value}";
215 8 100       16 if ( defined $c->{expires} ) {
216 4         11 push @parts, 'Expires=' . HTTP::Date::time2str( $c->{expires} );
217             }
218             else {
219 4 100       10 next if $args->{persistent};
220             }
221 7         68 for my $attr (qw/Domain Path Creation_Time Last_Access_Time/) {
222 28 50       78 push @parts, "$attr=$c->{lc $attr}" if defined $c->{ lc $attr };
223             }
224 7         12 for my $attr (qw/Secure HttpOnly HostOnly/) {
225 21 100       50 push @parts, $attr if $c->{ lc $attr };
226             }
227 7         26 push @list, join( "; ", @parts );
228             }
229 7         25 return @list;
230             }
231              
232             #pod =method load_cookies
233             #pod
234             #pod $jar->load_cookies( @cookies );
235             #pod
236             #pod Given a list of cookie strings from C, it adds them to
237             #pod the cookie jar. Cookies added in this way will supersede any existing
238             #pod cookies with similar domain, path and name.
239             #pod
240             #pod It returns the jar object for convenience when loading a new object:
241             #pod
242             #pod my $jar = HTTP::CookieJar->new->load_cookies( @cookies );
243             #pod
244             #pod Here is a trivial example of loading a cookie jar file with L:
245             #pod
246             #pod my $jar = HTTP::CookieJar->new->load_cookies(
247             #pod path("jar.txt")->lines
248             #pod );
249             #pod
250             #pod =cut
251              
252             sub load_cookies {
253 4     4 1 12 my ( $self, @cookies ) = @_;
254 4         7 for my $cookie (@cookies) {
255 5         22 my $p = _parse_cookie( $cookie, 1 );
256 5 100 66     19 next unless exists $p->{domain} && exists $p->{path};
257 4         8 $p->{$_} = time for grep { !defined $p->{$_} } qw/creation_time last_access_time/;
  8         19  
258 4         14 $self->{store}{ $p->{domain} }{ $p->{path} }{ $p->{name} } = $p;
259             }
260 4         15 return $self;
261             }
262              
263             #--------------------------------------------------------------------------#
264             # private methods
265             #--------------------------------------------------------------------------#
266              
267             # return flattened list of all cookies
268             sub _all_cookies {
269 19     19   35 return map { values %$_ } map { values %$_ } values %{ $_[0]->{store} };
  32         68  
  24         58  
  19         86  
270             }
271              
272             #--------------------------------------------------------------------------#
273             # Helper subroutines
274             #--------------------------------------------------------------------------#
275              
276             my $pub_re = qr/(?:domain|path|expires|max-age|httponly|secure)/;
277             my $pvt_re = qr/(?:$pub_re|creation_time|last_access_time|hostonly)/;
278              
279             sub _parse_cookie {
280 62     62   19650 my ( $cookie, $private ) = @_;
281 62 50       118 $cookie = '' unless defined $cookie;
282 62         182 my ( $kvp, @attrs ) = split /;/, $cookie;
283 62 100       110 $kvp = '' unless defined $kvp;
284             my ( $name, $value ) =
285 62         136 map { s/^\s*//; s/\s*$//; $_ } split( /=/, $kvp, 2 ); ## no critic
  118         284  
  118         408  
  118         250  
286              
287 62 100 100     225 return unless defined $name and length $name;
288 58 50       102 $value = '' unless defined $value;
289 58         149 my $parse = { name => $name, value => $value };
290 58         100 for my $s (@attrs) {
291 97 100 66     365 next unless defined $s && $s =~ /\S/;
292 94         204 my ( $k, $v ) = map { s/^\s*//; s/\s*$//; $_ } split( /=/, $s, 2 ); ## no critic
  166         335  
  166         436  
  166         311  
293 94         155 $k = lc $k;
294 94 100       774 next unless $private ? ( $k =~ m/^$pvt_re$/ ) : ( $k =~ m/^$pub_re$/ );
    50          
295 94 100       242 $v = 1 if $k =~ m/^(?:httponly|secure|hostonly)$/; # boolean flag if present
296 94 100 100     176 $v = HTTP::Date::str2time($v) || 0 if $k eq 'expires'; # convert to epoch
297 94 100       864 next unless length $v;
298 93 100       151 $v =~ s{^\.}{} if $k eq 'domain'; # strip leading dot
299 93 100       146 $v =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if $k eq 'path'; # unescape
  0         0  
300 93         217 $parse->{$k} = $v;
301             }
302 58         143 return $parse;
303             }
304              
305             sub _domain_match {
306 46     46   82 my ( $string, $dom_string ) = @_;
307 46 100       97 return 1 if $dom_string eq $string;
308 27 100       75 return unless $string =~ /[a-z]/i; # non-numeric
309 24 100       183 if ( $string =~ s{\Q$dom_string\E$}{} ) {
310 13         48 return substr( $string, -1, 1 ) eq '.'; # "foo."
311             }
312 11         35 return;
313             }
314              
315             sub _normalize_domain {
316 16     16   29 my ( $host, $parse ) = @_;
317              
318 16 50       38 if ($HAS_MPS) {
319 16         22 my $host_pub_suff = eval { Mozilla::PublicSuffix::public_suffix($host) };
  16         46  
320 16 100       633 $host_pub_suff = '' unless defined $host_pub_suff;
321 16 100       41 if ( _domain_match( $host_pub_suff, $parse->{domain} ) ) {
322 4 100       10 if ( $parse->{domain} eq $host ) {
323 2         7 return $parse->{hostonly} = 1;
324             }
325             else {
326 2         9 return;
327             }
328             }
329             }
330              
331 12 100 66     57 if ( $parse->{domain} !~ m{\.} && $parse->{domain} eq $host ) {
332 3         10 return $parse->{hostonly} = 1;
333             }
334              
335 9         23 return _domain_match( $host, $parse->{domain} );
336             }
337              
338             sub _default_path {
339 22     22   42 my ($path) = @_;
340 22 50 33     102 return "/" if !length $path || substr( $path, 0, 1 ) ne "/";
341 22         80 my ($default) = $path =~ m{^(.*)/}; # greedy to last /
342 22 100       65 return length($default) ? $default : "/";
343             }
344              
345             sub _path_match {
346 21     21   35 my ( $req_path, $cookie_path ) = @_;
347 21 100       35 return 1 if $req_path eq $cookie_path;
348 18 100       202 if ( $req_path =~ m{^\Q$cookie_path\E(.*)} ) {
349 17         38 my $rest = $1;
350 17 100       42 return 1 if substr( $cookie_path, -1, 1 ) eq '/';
351 11 50       45 return 1 if substr( $rest, 0, 1 ) eq '/';
352             }
353 1         4 return;
354             }
355              
356             sub _split_url {
357 56     56   82 my $url = shift;
358 56 50 33     187 die(qq/No URL provided\n/) unless defined $url and length $url;
359              
360             # URI regex adapted from the URI module
361             # XXX path_query here really chops at ? or # to get just the path and not the query
362 56 50       342 my ( $scheme, $authority, $path_query ) = $url =~ m<\A([^:/?#]+)://([^/?#]*)([^#?]*)>
363             or die(qq/Cannot parse URL: '$url'\n/);
364              
365 56         117 $scheme = lc $scheme;
366 56 50       144 $path_query = "/$path_query" unless $path_query =~ m<\A/>;
367 56         93 $path_query =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
  0         0  
368              
369 56 50       139 my $host = ( length($authority) ) ? lc $authority : 'localhost';
370 56         82 $host =~ s/\A[^@]*@//; # userinfo
371 56         90 my $port = do {
372 56 50 33     222 $host =~ s/:([0-9]*)\z// && length $1
    100          
    50          
373             ? $1
374             : ( $scheme eq 'http' ? 80 : $scheme eq 'https' ? 443 : undef );
375             };
376              
377 56         173 return ( $scheme, $host, $port, $path_query );
378             }
379              
380             1;
381              
382              
383             # vim: ts=4 sts=4 sw=4 et:
384              
385             __END__