line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTTP::Cache::Transparent; |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
139525
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
125
|
|
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
our $VERSION = '1.2'; |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 NAME |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
HTTP::Cache::Transparent - Cache the result of http get-requests persistently. |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 SYNOPSIS |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
use LWP::Simple; |
14
|
|
|
|
|
|
|
use HTTP::Cache::Transparent; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
HTTP::Cache::Transparent::init( { |
17
|
|
|
|
|
|
|
BasePath => '/tmp/cache', |
18
|
|
|
|
|
|
|
} ); |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
my $data = get( 'http://www.sn.no' ); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 DESCRIPTION |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
An implementation of http get that keeps a local cache of fetched |
25
|
|
|
|
|
|
|
pages to avoid fetching the same data from the server if it hasn't |
26
|
|
|
|
|
|
|
been updated. The cache is stored on disk and is thus persistent |
27
|
|
|
|
|
|
|
between invocations. |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
Uses the http-headers If-Modified-Since and ETag to let the server |
30
|
|
|
|
|
|
|
decide if the version in the cache is up-to-date or not. |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
The cache is implemented by modifying the LWP::UserAgent class to |
33
|
|
|
|
|
|
|
seamlessly cache the result of all requests that can be cached. |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=head1 INITIALIZING THE CACHE |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
HTTP::Cache::Transparent provides an init-method that sets the |
38
|
|
|
|
|
|
|
parameters for the cache and overloads a method in LWP::UserAgent |
39
|
|
|
|
|
|
|
to activate the cache.After init has been called, the normal |
40
|
|
|
|
|
|
|
LWP-methods (LWP::Simple as well as the more full-fledged |
41
|
|
|
|
|
|
|
LWP::Request methods) should be used as usual. |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=over 4 |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=cut |
46
|
|
|
|
|
|
|
|
47
|
2
|
|
|
2
|
|
11
|
use Carp; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
165
|
|
48
|
2
|
|
|
2
|
|
772
|
use LWP::UserAgent; |
|
2
|
|
|
|
|
51115
|
|
|
2
|
|
|
|
|
94
|
|
49
|
2
|
|
|
2
|
|
15
|
use HTTP::Status qw/RC_NOT_MODIFIED RC_OK RC_PARTIAL_CONTENT status_message/; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
344
|
|
50
|
|
|
|
|
|
|
|
51
|
2
|
|
|
2
|
|
13
|
use Digest::MD5 qw/md5_hex/; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
115
|
|
52
|
2
|
|
|
2
|
|
1237
|
use IO::File; |
|
2
|
|
|
|
|
7510
|
|
|
2
|
|
|
|
|
182
|
|
53
|
2
|
|
|
2
|
|
870
|
use File::Copy; |
|
2
|
|
|
|
|
3416
|
|
|
2
|
|
|
|
|
91
|
|
54
|
2
|
|
|
2
|
|
10
|
use File::Path; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
88
|
|
55
|
2
|
|
|
2
|
|
8
|
use Cwd; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
523
|
|
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# These are the response-headers that we should store in the |
58
|
|
|
|
|
|
|
# cache-entry and recreate when we return a cached response. |
59
|
|
|
|
|
|
|
my @cache_headers = qw/Content-Type Content-Encoding |
60
|
|
|
|
|
|
|
Content-Length Content-Range |
61
|
|
|
|
|
|
|
Last-Modified/; |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
my $basepath; |
64
|
|
|
|
|
|
|
my $maxage; |
65
|
|
|
|
|
|
|
my $verbose; |
66
|
|
|
|
|
|
|
my $noupdate; |
67
|
|
|
|
|
|
|
my $approvecontent; |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
my $org_simple_request; |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=item init |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
Initialize the HTTP cache. Takes a single parameter which is a |
74
|
|
|
|
|
|
|
hashref containing named arguments to the object. |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
HTTP::Cache::Transparent::init( { |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
# Directory to store the cache in. |
79
|
|
|
|
|
|
|
BasePath => "/tmp/cache", |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# How many hours should items be kept in the cache |
82
|
|
|
|
|
|
|
# after they were last requested? |
83
|
|
|
|
|
|
|
# Default is 8*24. |
84
|
|
|
|
|
|
|
MaxAge => 8*24, |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# Print progress-messages to STDERR. |
87
|
|
|
|
|
|
|
# Default is 0. |
88
|
|
|
|
|
|
|
Verbose => 1, |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# If a request is made for a url that has been requested |
91
|
|
|
|
|
|
|
# from the server less than NoUpdate seconds ago, the |
92
|
|
|
|
|
|
|
# response will be generated from the cache without |
93
|
|
|
|
|
|
|
# contacting the server. |
94
|
|
|
|
|
|
|
# Default is 0. |
95
|
|
|
|
|
|
|
NoUpdate => 15*60, |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# When a url has been downloaded and the response indicates that |
98
|
|
|
|
|
|
|
# has been modified compared to the content in the cache, |
99
|
|
|
|
|
|
|
# the ApproveContent callback is called with the HTTP::Response. |
100
|
|
|
|
|
|
|
# The callback shall return true if the response shall be used and |
101
|
|
|
|
|
|
|
# stored in the cache or false if the response shall be discarded |
102
|
|
|
|
|
|
|
# and the response in the cache used instead. |
103
|
|
|
|
|
|
|
# This mechanism can be used to work around servers that return errors |
104
|
|
|
|
|
|
|
# intermittently. The default is to accept all responses. |
105
|
|
|
|
|
|
|
ApproveContent => sub { return $_[0]->is_success }, |
106
|
|
|
|
|
|
|
} ); |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
The directory where the cache is stored must be writable. It must also only |
109
|
|
|
|
|
|
|
contain files created by HTTP::Cache::Transparent. |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=cut |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
my $initialized = 0; |
114
|
|
|
|
|
|
|
sub init { |
115
|
1
|
|
|
1
|
1
|
1308
|
my( $arg ) = @_; |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
defined( $arg->{BasePath} ) |
118
|
1
|
50
|
|
|
|
4
|
or croak( "You must specify a BasePath" ); |
119
|
|
|
|
|
|
|
|
120
|
1
|
|
|
|
|
2
|
$basepath = $arg->{BasePath}; |
121
|
|
|
|
|
|
|
|
122
|
1
|
50
|
|
|
|
14
|
if( not -d $basepath ) { |
123
|
0
|
|
|
|
|
0
|
eval { mkpath($basepath) }; |
|
0
|
|
|
|
|
0
|
|
124
|
0
|
0
|
|
|
|
0
|
if ($@) { |
125
|
0
|
|
|
|
|
0
|
print STDERR "$basepath is not a directory and cannot be created: $@\n"; |
126
|
0
|
|
|
|
|
0
|
exit 1; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
# Append a trailing slash if it is missing. |
131
|
1
|
|
|
|
|
9
|
$basepath =~ s%([^/])$%$1/%; |
132
|
|
|
|
|
|
|
|
133
|
1
|
|
50
|
|
|
6
|
$maxage = $arg->{MaxAge} || 8*24; |
134
|
1
|
|
50
|
|
|
6
|
$verbose = $arg->{Verbose} || 0; |
135
|
1
|
|
50
|
|
|
3
|
$noupdate = $arg->{NoUpdate} || 0; |
136
|
1
|
|
50
|
0
|
|
11
|
$approvecontent = $arg->{ApproveContent} || sub { return 1; }; |
|
0
|
|
|
|
|
0
|
|
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# Make sure that LWP::Simple does not use its simplified |
139
|
|
|
|
|
|
|
# get-method that bypasses LWP::UserAgent. |
140
|
1
|
|
|
|
|
2
|
$LWP::Simple::FULL_LWP++; |
141
|
|
|
|
|
|
|
|
142
|
1
|
50
|
|
|
|
7
|
unless ($initialized++) { |
143
|
1
|
|
|
|
|
2
|
$org_simple_request = \&LWP::UserAgent::simple_request; |
144
|
|
|
|
|
|
|
|
145
|
2
|
|
|
2
|
|
20
|
no warnings; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
2423
|
|
146
|
1
|
|
|
|
|
6
|
*LWP::UserAgent::simple_request = \&_simple_request_cache |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=item Initializing from use-line |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
An alternative way of initializing HTTP::Cache::Transparent is to supply |
153
|
|
|
|
|
|
|
parameters in the use-line. This allows you to write |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
use HTTP::Cache::Transparent ( BasePath => '/tmp/cache' ); |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
which is exactly equivalent to |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
use HTTP::Cache::Transparent; |
160
|
|
|
|
|
|
|
HTTP::Cache::Transparent::init( BasePath => '/tmp/cache' ); |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
The advantage to using this method is that you can do |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
perl -MHTTP::Cache::Transparent=BasePath,/tmp/cache myscript.pl |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
or even set the environment variable PERL5OPT |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
PERL5OPT=-MHTTP::Cache::Transparent=BasePath,/tmp/cache |
169
|
|
|
|
|
|
|
myscript.pl |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
and have all the http-requests performed by myscript.pl go through the |
172
|
|
|
|
|
|
|
cache without changing myscript.pl |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=back |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=cut |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
sub import { |
179
|
2
|
|
|
2
|
|
19
|
my( $module, %args ) = @_; |
180
|
2
|
50
|
|
|
|
1587
|
return if (scalar(keys(%args)) == 0); |
181
|
|
|
|
|
|
|
|
182
|
0
|
|
|
|
|
0
|
HTTP::Cache::Transparent::init( \%args ); |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
END { |
186
|
2
|
|
|
2
|
|
1770
|
_remove_old_entries(); |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
sub _simple_request_cache { |
190
|
3
|
|
|
3
|
|
10012396
|
my($self, $r, $content_cb, $read_size_hint) = @_; |
191
|
|
|
|
|
|
|
|
192
|
3
|
|
|
|
|
6
|
my $res; |
193
|
|
|
|
|
|
|
|
194
|
3
|
50
|
33
|
|
|
12
|
if( $r->method eq "GET" and |
|
|
|
33
|
|
|
|
|
195
|
|
|
|
|
|
|
not defined( $r->header( 'If-Modified-Since' ) ) and |
196
|
|
|
|
|
|
|
not defined( $content_cb ) ) { |
197
|
3
|
50
|
|
|
|
206
|
print STDERR "Fetching " . $r->uri |
198
|
|
|
|
|
|
|
if( $verbose ); |
199
|
|
|
|
|
|
|
|
200
|
3
|
|
|
|
|
10
|
my $url = $r->uri->as_string; |
201
|
3
|
|
|
|
|
84
|
my $key = $url; |
202
|
3
|
50
|
|
|
|
10
|
$key .= "\n" . $r->header('Range') |
203
|
|
|
|
|
|
|
if defined $r->header('Range'); |
204
|
|
|
|
|
|
|
|
205
|
3
|
|
|
|
|
86
|
my $filename = $basepath . _urlhash( $url ); |
206
|
|
|
|
|
|
|
|
207
|
3
|
|
|
|
|
4
|
my $fh; |
208
|
|
|
|
|
|
|
my $meta; |
209
|
|
|
|
|
|
|
|
210
|
3
|
100
|
|
|
|
86
|
if( -s $filename ) { |
211
|
2
|
50
|
|
|
|
15
|
$fh = new IO::File "< $filename" |
212
|
|
|
|
|
|
|
or die "Failed to read from $filename"; |
213
|
|
|
|
|
|
|
|
214
|
2
|
|
|
|
|
133
|
$meta = _read_meta( $fh ); |
215
|
|
|
|
|
|
|
|
216
|
2
|
50
|
|
|
|
6
|
if( $meta->{Url} eq $url ) { |
217
|
|
|
|
|
|
|
$meta->{'Range'} = "" |
218
|
2
|
50
|
|
|
|
7
|
unless defined( $meta->{'Range'} ); |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
# Check that the Range is the same for this request as |
221
|
|
|
|
|
|
|
# for the one in the cache. |
222
|
2
|
50
|
33
|
|
|
5
|
if( (not defined( $r->header( 'Range' ) ) ) or |
223
|
|
|
|
|
|
|
$r->header( 'Range' ) eq $meta->{'Range'} ) { |
224
|
|
|
|
|
|
|
$r->header( 'If-Modified-Since', $meta->{'Last-Modified'} ) |
225
|
2
|
50
|
|
|
|
59
|
if exists( $meta->{'Last-Modified'} ); |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
$r->header( 'If-None-Match', $meta->{ETag} ) |
228
|
2
|
50
|
|
|
|
67
|
if( exists( $meta->{ETag} ) ); |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
else { |
232
|
0
|
|
|
|
|
0
|
warn "Cache collision: $url and $meta->{Url} have the same md5sum"; |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
|
236
|
3
|
100
|
100
|
|
|
71
|
if( defined( $meta->{'X-HCT-LastUpdated'} ) and |
237
|
|
|
|
|
|
|
$noupdate > (time - $meta->{'X-HCT-LastUpdated'} ) ) { |
238
|
1
|
50
|
|
|
|
2
|
print STDERR " from cache without checking with server.\n" |
239
|
|
|
|
|
|
|
if $verbose; |
240
|
|
|
|
|
|
|
|
241
|
1
|
|
|
|
|
6
|
$res = HTTP::Response->new( $meta->{Code} ); |
242
|
1
|
|
|
|
|
29
|
$res->request($r); |
243
|
1
|
|
|
|
|
7
|
_get_from_cachefile( $filename, $fh, $res, $meta ); |
244
|
1
|
50
|
|
|
|
32
|
$fh->close() |
245
|
|
|
|
|
|
|
if defined $fh;; |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
# Set X-No-Server-Contact header as content delivered without contact with external server |
248
|
1
|
|
|
|
|
5
|
$res->header( "X-No-Server-Contact", 1 ); |
249
|
|
|
|
|
|
|
|
250
|
1
|
|
|
|
|
35
|
return $res; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
2
|
|
|
|
|
8
|
$res = &$org_simple_request( $self, $r ); |
254
|
|
|
|
|
|
|
|
255
|
2
|
100
|
33
|
|
|
560359
|
if( $res->code == RC_NOT_MODIFIED ) { |
|
|
50
|
|
|
|
|
|
256
|
1
|
50
|
|
|
|
18
|
print STDERR " from cache.\n" |
257
|
|
|
|
|
|
|
if( $verbose ); |
258
|
|
|
|
|
|
|
|
259
|
1
|
|
|
|
|
12
|
_get_from_cachefile( $filename, $fh, $res, $meta ); |
260
|
|
|
|
|
|
|
|
261
|
1
|
50
|
|
|
|
62
|
$fh->close() |
262
|
|
|
|
|
|
|
if defined $fh;; |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
# We need to rewrite the cache-entry to update X-HCT-LastUpdated |
265
|
1
|
|
|
|
|
9
|
_write_cache_entry( $filename, $url, $r, $res ); |
266
|
1
|
|
|
|
|
188
|
return $res; |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
elsif( defined( $meta->{'X-HCT-LastUpdated'} ) |
269
|
0
|
|
|
|
|
0
|
and not &{$approvecontent}( $res ) ) { |
270
|
0
|
0
|
|
|
|
0
|
print STDERR " from cache since the response was not approved.\n" |
271
|
|
|
|
|
|
|
if( $verbose ); |
272
|
|
|
|
|
|
|
|
273
|
0
|
|
|
|
|
0
|
_get_from_cachefile( $filename, $fh, $res, $meta ); |
274
|
|
|
|
|
|
|
|
275
|
0
|
0
|
|
|
|
0
|
$fh->close() |
276
|
|
|
|
|
|
|
if defined $fh;; |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
# Do NOT update the cache! |
279
|
|
|
|
|
|
|
|
280
|
0
|
|
|
|
|
0
|
return $res; |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
else { |
283
|
1
|
50
|
|
|
|
16
|
$fh->close() |
284
|
|
|
|
|
|
|
if defined $fh;; |
285
|
|
|
|
|
|
|
|
286
|
1
|
|
|
|
|
7
|
my $content = $res->content; |
287
|
1
|
50
|
|
|
|
11
|
$content = "" if not defined $content; |
288
|
|
|
|
|
|
|
|
289
|
1
|
50
|
33
|
|
|
5
|
if( defined( $meta->{MD5} ) and |
290
|
|
|
|
|
|
|
md5_hex( $content ) eq $meta->{MD5} ) { |
291
|
0
|
|
|
|
|
0
|
$res->header( "X-Content-Unchanged", 1 ); |
292
|
0
|
0
|
|
|
|
0
|
print STDERR " unchanged" |
293
|
|
|
|
|
|
|
if( $verbose ); |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
1
|
50
|
|
|
|
2
|
print STDERR " from server.\n" |
297
|
|
|
|
|
|
|
if( $verbose ); |
298
|
|
|
|
|
|
|
|
299
|
1
|
50
|
33
|
|
|
28
|
_write_cache_entry( $filename, $url, $r, $res ) |
300
|
|
|
|
|
|
|
if( $res->code == RC_OK or |
301
|
|
|
|
|
|
|
$res->code == RC_PARTIAL_CONTENT ); |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
else { |
305
|
|
|
|
|
|
|
# We won't try to cache this request. |
306
|
0
|
|
|
|
|
0
|
$res = &$org_simple_request( $self, $r, |
307
|
|
|
|
|
|
|
$content_cb, $read_size_hint ); |
308
|
|
|
|
|
|
|
} |
309
|
|
|
|
|
|
|
|
310
|
1
|
|
|
|
|
97
|
return $res; |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
sub _get_from_cachefile { |
314
|
2
|
|
|
2
|
|
5
|
my( $filename, $fh, $res, $meta ) = @_; |
315
|
|
|
|
|
|
|
|
316
|
2
|
|
|
|
|
3
|
my $content; |
317
|
|
|
|
|
|
|
my $buf; |
318
|
2
|
|
|
|
|
13
|
while ( $fh->read( $buf, 1024 ) > 0 ) { |
319
|
4
|
|
|
|
|
50
|
$content .= $buf; |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
|
322
|
2
|
|
|
|
|
16
|
$fh->close(); |
323
|
|
|
|
|
|
|
|
324
|
2
|
50
|
|
|
|
28
|
$content = "" if not defined $content; |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
# Set last-accessed for cache-entry. |
327
|
2
|
|
|
|
|
3
|
my $mtime = time; |
328
|
2
|
|
|
|
|
82
|
utime( $mtime, $mtime, $filename ); |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
# modify response |
331
|
2
|
50
|
|
|
|
10
|
if( $HTTP::Message::VERSION >= 1.44 ) { |
332
|
2
|
|
|
|
|
14
|
$res->content_ref( \$content ); |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
else { |
335
|
0
|
|
|
|
|
0
|
$res->content( $content ); |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
# For HTTP::Cache::Transparent earlier than 0.4, |
339
|
|
|
|
|
|
|
# there is no Code in the cache. |
340
|
2
|
50
|
|
|
|
38
|
if( defined( $meta->{Code} ) ) { |
341
|
2
|
|
|
|
|
7
|
$res->code( $meta->{Code} ); |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
else { |
344
|
0
|
|
|
|
|
0
|
$res->code( RC_OK ); |
345
|
|
|
|
|
|
|
} |
346
|
2
|
|
50
|
|
|
18
|
$res->message(status_message($res->code) || "Unknown code"); |
347
|
|
|
|
|
|
|
|
348
|
2
|
|
|
|
|
39
|
foreach my $h (@cache_headers) { |
349
|
|
|
|
|
|
|
$res->header( $h, $meta->{$h} ) |
350
|
10
|
100
|
|
|
|
167
|
if defined( $meta->{ $h } ); |
351
|
|
|
|
|
|
|
} |
352
|
|
|
|
|
|
|
|
353
|
2
|
|
|
|
|
81
|
$res->header( "X-Cached", 1 ); |
354
|
2
|
|
|
|
|
95
|
$res->header( "X-Content-Unchanged", 1 ); |
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
# Read metadata and position filehandle at start of data. |
358
|
|
|
|
|
|
|
sub _read_meta { |
359
|
2
|
|
|
2
|
|
4
|
my( $fh ) = @_; |
360
|
2
|
|
|
|
|
2
|
my %meta; |
361
|
|
|
|
|
|
|
|
362
|
2
|
|
|
|
|
2
|
my( $key, $value ); |
363
|
2
|
|
|
|
|
3
|
do { |
364
|
18
|
|
|
|
|
43
|
my $line = <$fh>; |
365
|
18
|
|
|
|
|
36
|
( $key, $value ) = ($line =~ /(\S+)\s+(.*)[\n\r]*/); |
366
|
|
|
|
|
|
|
|
367
|
18
|
100
|
|
|
|
51
|
$meta{$key} = $value |
368
|
|
|
|
|
|
|
if( defined $value ); |
369
|
|
|
|
|
|
|
} while( defined( $value ) ); |
370
|
|
|
|
|
|
|
|
371
|
2
|
|
|
|
|
5
|
return \%meta; |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
# Write metadata and position filehandle where data should be written. |
375
|
|
|
|
|
|
|
sub _write_meta { |
376
|
2
|
|
|
2
|
|
4
|
my( $fh, $meta ) = @_; |
377
|
|
|
|
|
|
|
|
378
|
2
|
|
|
|
|
4
|
foreach my $key (sort keys( %{$meta} ) ) { |
|
2
|
|
|
|
|
14
|
|
379
|
16
|
|
|
|
|
43
|
print $fh "$key $meta->{$key}\n"; |
380
|
|
|
|
|
|
|
} |
381
|
|
|
|
|
|
|
|
382
|
2
|
|
|
|
|
6
|
print $fh "\n"; |
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
sub _write_cache_entry { |
386
|
2
|
|
|
2
|
|
16
|
my( $filename, $url, $req, $res ) = @_; |
387
|
|
|
|
|
|
|
|
388
|
2
|
|
|
|
|
10
|
my $out_filename = "$filename.tmp$$"; |
389
|
2
|
50
|
|
|
|
19
|
my $fh = new IO::File "> $out_filename" |
390
|
|
|
|
|
|
|
or die "Failed to write to $out_filename"; |
391
|
|
|
|
|
|
|
|
392
|
2
|
|
|
|
|
307
|
my $meta; |
393
|
2
|
|
|
|
|
7
|
$meta->{Url} = $url; |
394
|
2
|
50
|
|
|
|
7
|
$meta->{ETag} = $res->header('ETag') |
395
|
|
|
|
|
|
|
if defined( $res->header('ETag') ); |
396
|
|
|
|
|
|
|
|
397
|
2
|
|
|
|
|
138
|
my $content = $res->content; |
398
|
2
|
50
|
|
|
|
25
|
$content = "" if not defined $content; |
399
|
|
|
|
|
|
|
|
400
|
2
|
|
|
|
|
22
|
$meta->{MD5} = md5_hex( $content ); |
401
|
2
|
50
|
|
|
|
9
|
$meta->{Range} = $req->header('Range') |
402
|
|
|
|
|
|
|
if defined( $req->header('Range') ); |
403
|
2
|
|
|
|
|
63
|
$meta->{Code} = $res->code; |
404
|
2
|
|
|
|
|
17
|
$meta->{'X-HCT-LastUpdated'} = time; |
405
|
|
|
|
|
|
|
|
406
|
2
|
|
|
|
|
6
|
foreach my $h (@cache_headers) { |
407
|
10
|
100
|
|
|
|
322
|
$meta->{$h} = $res->header( $h ) |
408
|
|
|
|
|
|
|
if defined $res->header( $h ); |
409
|
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
|
411
|
2
|
|
|
|
|
99
|
_write_meta( $fh, $meta ); |
412
|
|
|
|
|
|
|
|
413
|
2
|
|
|
|
|
4
|
print $fh $content; |
414
|
2
|
|
|
|
|
11
|
$fh->close; |
415
|
|
|
|
|
|
|
|
416
|
2
|
50
|
|
|
|
119
|
move( $out_filename, $filename ) || unlink $out_filename; |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
sub _urlhash { |
420
|
3
|
|
|
3
|
|
5
|
my( $url ) = @_; |
421
|
|
|
|
|
|
|
|
422
|
3
|
|
|
|
|
17
|
return md5_hex( $url ); |
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
sub _remove_old_entries { |
426
|
2
|
100
|
66
|
2
|
|
37
|
if( defined( $basepath ) and -d( $basepath ) ) { |
427
|
1
|
|
|
|
|
9
|
my $oldcwd = getcwd(); |
428
|
1
|
|
|
|
|
20
|
chdir( $basepath ); |
429
|
|
|
|
|
|
|
|
430
|
1
|
|
|
|
|
103
|
my @files = glob("*"); |
431
|
1
|
|
|
|
|
5
|
foreach my $file (@files) { |
432
|
1
|
50
|
|
|
|
9
|
if( $file !~ m%^[0-9a-f]{32}$% ) { |
433
|
0
|
|
|
|
|
0
|
print STDERR "HTTP::Cache::Transparent: Unknown file found in cache directory: $basepath$file\n"; |
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
else { |
436
|
1
|
|
|
|
|
10
|
my $age = (-M $file); |
437
|
|
|
|
|
|
|
# The file may have disappeared if another process has cleaned |
438
|
|
|
|
|
|
|
# the cache. |
439
|
1
|
50
|
33
|
|
|
13
|
if( defined($age) && ( $age*24 > $maxage ) ) { |
440
|
0
|
0
|
|
|
|
0
|
print STDERR "Deleting $file.\n" |
441
|
|
|
|
|
|
|
if( $verbose ); |
442
|
0
|
|
|
|
|
0
|
unlink( $file ); |
443
|
|
|
|
|
|
|
} |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
} |
446
|
|
|
|
|
|
|
|
447
|
1
|
|
|
|
|
24
|
chdir( $oldcwd ); |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
} |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
=head1 INSPECTING CACHE BEHAVIOR |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
The HTTP::Cache::Transparent inserts three special headers in the |
454
|
|
|
|
|
|
|
HTTP::Response object. These can be accessed via the |
455
|
|
|
|
|
|
|
HTTP::Response::header()-method. |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=over 4 |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=item X-Cached |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
This header is inserted and set to 1 if the response is delivered from |
462
|
|
|
|
|
|
|
the cache instead of from the server. |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
=item X-Content-Unchanged |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
This header is inserted and set to 1 if the content returned is the same |
467
|
|
|
|
|
|
|
as the content returned the last time this url was fetched. This header |
468
|
|
|
|
|
|
|
is always inserted and set to 1 when the response is delivered from |
469
|
|
|
|
|
|
|
the cache. |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
=item X-No-Server-Contact |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
This header is inserted and set to 1 if the content returned has been |
474
|
|
|
|
|
|
|
delivered without any contact with the external server, i.e. no |
475
|
|
|
|
|
|
|
conditional or unconditional HTTP GET request has been sent, the content |
476
|
|
|
|
|
|
|
has been delivered directly from cache. This may be useful when seeking |
477
|
|
|
|
|
|
|
to control loading of the external server. |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
=back |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
=head1 LIMITATIONS |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
This module has a number of limitations that you should be aware of |
484
|
|
|
|
|
|
|
before using it. |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
=over 4 |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
=item - |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
There is no upper limit to how much diskspace the cache requires. The |
491
|
|
|
|
|
|
|
only limiting mechanism is that data for urls that haven't been requested |
492
|
|
|
|
|
|
|
in the last MaxAge hours will be removed from the cache the next time |
493
|
|
|
|
|
|
|
the program exits. |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
=item - |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
Currently, only get-requests that store the result in memory (i.e. do |
498
|
|
|
|
|
|
|
not use the option to have the result stored directly in a file or |
499
|
|
|
|
|
|
|
delivered via a callback) is cached. I intend to remove this limitation |
500
|
|
|
|
|
|
|
in a future version. |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
=item - |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
The support for Ranges is a bit primitive. It creates a new object in |
505
|
|
|
|
|
|
|
the cache for each unique combination of url and range. This will work ok |
506
|
|
|
|
|
|
|
as long as you always request the same range(s) for a url. |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
=item - |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
The cache doesn't properly check and store all headers in the HTTP |
511
|
|
|
|
|
|
|
request and response. Therefore, if you request the same url repeatedly |
512
|
|
|
|
|
|
|
with different sets of headers (cookies, accept-encoding etc), and these |
513
|
|
|
|
|
|
|
headers affect the response from the server, the cache may return the |
514
|
|
|
|
|
|
|
wrong response. |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
=item - |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
HTTP::Cache::Transparent has not been tested with threads, and will |
519
|
|
|
|
|
|
|
most likely not work if you use them. |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
=back |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
=head1 CACHE FORMAT |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
The cache is stored on disk as one file per cached object. The filename |
526
|
|
|
|
|
|
|
is equal to the md5sum of the url and the Range-header if it exists. |
527
|
|
|
|
|
|
|
The file contains a set of |
528
|
|
|
|
|
|
|
key/value-pairs with metadata (one entry per line) followed by a blank |
529
|
|
|
|
|
|
|
line and then the actual data returned by the server. |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
The last modified date of the cache file is set to the time when the |
532
|
|
|
|
|
|
|
cache object was last requested by a user. |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
=head1 AUTHOR |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
Mattias Holmlund, E$firstname -at- $lastname -dot- seE |
537
|
|
|
|
|
|
|
L |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
=head1 GIT REPOSITORY |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
A git repository containing the source for this module can be found |
542
|
|
|
|
|
|
|
via http://git.holmlund.se/ |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
Copyright (C) 2004-2007 by Mattias Holmlund |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
549
|
|
|
|
|
|
|
it under the same terms as Perl itself, either Perl version 5.8.4 or, |
550
|
|
|
|
|
|
|
at your option, any later version of Perl 5 you may have available. |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
=cut |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
1; |