| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package LWP::UserAgent::Cached; |
|
2
|
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
62406
|
use strict; |
|
|
2
|
|
|
|
|
6
|
|
|
|
2
|
|
|
|
|
79
|
|
|
4
|
2
|
|
|
2
|
|
12
|
use Carp; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
178
|
|
|
5
|
2
|
|
|
2
|
|
22
|
use Digest::MD5; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
64
|
|
|
6
|
2
|
|
|
2
|
|
2258
|
use HTTP::Response; |
|
|
2
|
|
|
|
|
43278
|
|
|
|
2
|
|
|
|
|
69
|
|
|
7
|
2
|
|
|
2
|
|
16
|
use base 'LWP::UserAgent'; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
5465
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = '0.06'; |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
sub new { |
|
12
|
3
|
|
|
3
|
1
|
155340
|
my ($class, %opts) = @_; |
|
13
|
|
|
|
|
|
|
|
|
14
|
3
|
|
|
|
|
8
|
my $cache_dir = delete $opts{cache_dir}; |
|
15
|
3
|
|
|
|
|
7
|
my $nocache_if = delete $opts{nocache_if}; |
|
16
|
3
|
|
|
|
|
6
|
my $recache_if = delete $opts{recache_if}; |
|
17
|
3
|
|
|
|
|
9
|
my $on_uncached = delete $opts{on_uncached}; |
|
18
|
3
|
|
|
|
|
7
|
my $cachename_spec = delete $opts{cachename_spec}; |
|
19
|
3
|
|
|
|
|
25
|
my $self = $class->SUPER::new(%opts, parse_head => 0); |
|
20
|
|
|
|
|
|
|
|
|
21
|
3
|
|
|
|
|
106
|
$self->{cache_dir} = $cache_dir; |
|
22
|
3
|
|
|
|
|
10
|
$self->{nocache_if} = $nocache_if; |
|
23
|
3
|
|
|
|
|
6
|
$self->{recache_if} = $recache_if; |
|
24
|
3
|
|
|
|
|
6
|
$self->{on_uncached} = $on_uncached; |
|
25
|
3
|
|
|
|
|
6
|
$self->{cachename_spec} = $cachename_spec; |
|
26
|
|
|
|
|
|
|
|
|
27
|
3
|
|
|
|
|
13
|
return $self; |
|
28
|
|
|
|
|
|
|
} |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# generate getters and setters |
|
31
|
|
|
|
|
|
|
foreach my $opt_name (qw(cache_dir nocache_if recache_if on_uncached cachename_spec)) { |
|
32
|
2
|
|
|
2
|
|
95690
|
no strict 'refs'; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
2882
|
|
|
33
|
|
|
|
|
|
|
*$opt_name = sub { |
|
34
|
21
|
|
|
21
|
|
4989
|
my $self = shift; |
|
35
|
21
|
100
|
|
|
|
56
|
if (@_) { |
|
36
|
13
|
|
|
|
|
31
|
my $opt_val = $self->{$opt_name}; |
|
37
|
13
|
|
|
|
|
24
|
$self->{$opt_name} = shift; |
|
38
|
13
|
|
|
|
|
45
|
return $opt_val; |
|
39
|
|
|
|
|
|
|
} |
|
40
|
|
|
|
|
|
|
|
|
41
|
8
|
|
|
|
|
28
|
return $self->{$opt_name}; |
|
42
|
|
|
|
|
|
|
} |
|
43
|
|
|
|
|
|
|
} |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
sub parse_head { |
|
46
|
3
|
|
|
3
|
1
|
107705
|
my ($self, $bool) = @_; |
|
47
|
|
|
|
|
|
|
|
|
48
|
3
|
50
|
|
|
|
15
|
if ($bool) { |
|
49
|
0
|
|
|
|
|
0
|
die "parse_head() is disabled, because it may cause encoding troubles while saving cache"; |
|
50
|
|
|
|
|
|
|
} |
|
51
|
|
|
|
|
|
|
|
|
52
|
3
|
|
|
|
|
20
|
$self->SUPER::parse_head($bool); |
|
53
|
|
|
|
|
|
|
} |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
sub simple_request { |
|
56
|
29
|
|
|
29
|
1
|
33456
|
my $self = shift; |
|
57
|
29
|
50
|
|
|
|
106
|
unless (defined $self->{cache_dir}) { |
|
58
|
0
|
|
|
|
|
0
|
return $self->SUPER::simple_request(@_); |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
|
|
61
|
29
|
|
|
|
|
48
|
my $request = $_[0]; |
|
62
|
29
|
|
|
|
|
48
|
eval{ $self->prepare_request($request) }; |
|
|
29
|
|
|
|
|
95
|
|
|
63
|
29
|
|
|
|
|
14660
|
my $fpath = $self->_get_cache_name($request); |
|
64
|
29
|
|
|
|
|
2380
|
my $response; |
|
65
|
|
|
|
|
|
|
my $no_collision_suffix; |
|
66
|
|
|
|
|
|
|
|
|
67
|
29
|
100
|
|
|
|
98
|
unless ($self->{was_redirect}) { |
|
68
|
23
|
|
|
|
|
38
|
@{$self->{last_cached}} = (); |
|
|
23
|
|
|
|
|
59
|
|
|
69
|
23
|
|
|
|
|
32
|
@{$self->{last_used_cache}} = (); |
|
|
23
|
|
|
|
|
51
|
|
|
70
|
|
|
|
|
|
|
} |
|
71
|
|
|
|
|
|
|
|
|
72
|
29
|
100
|
|
|
|
1230
|
if (-e $fpath) { |
|
73
|
17
|
100
|
|
|
|
61
|
unless ($response = $self->_parse_cached_response($fpath, $request)) { |
|
74
|
|
|
|
|
|
|
# collision |
|
75
|
3
|
100
|
|
|
|
379
|
if (my @cache_list = <$fpath-*>) { |
|
76
|
2
|
|
|
|
|
7
|
foreach my $cache_file (@cache_list) { |
|
77
|
3
|
100
|
|
|
|
9
|
if ($response = $self->_parse_cached_response($cache_file, $request)) { |
|
78
|
1
|
|
|
|
|
2
|
$fpath = $cache_file; |
|
79
|
1
|
|
|
|
|
3
|
last; |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
} |
|
82
|
|
|
|
|
|
|
|
|
83
|
2
|
100
|
|
|
|
9
|
unless ($response) { |
|
84
|
1
|
|
|
|
|
7
|
$no_collision_suffix = sprintf('-%03d', substr($cache_list[-1], -3) + 1); |
|
85
|
|
|
|
|
|
|
} |
|
86
|
|
|
|
|
|
|
} |
|
87
|
|
|
|
|
|
|
else { |
|
88
|
1
|
|
|
|
|
3
|
$no_collision_suffix = '-001'; |
|
89
|
|
|
|
|
|
|
} |
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
|
|
92
|
17
|
100
|
100
|
|
|
124
|
if ($response && defined($self->{recache_if}) && $self->{recache_if}->($response, $fpath, $request)) { |
|
|
|
|
66
|
|
|
|
|
|
93
|
1
|
|
|
|
|
1583
|
$response = undef; |
|
94
|
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
} |
|
96
|
|
|
|
|
|
|
|
|
97
|
29
|
100
|
|
|
|
75
|
unless ($response) { |
|
98
|
15
|
100
|
|
|
|
47
|
if (defined $self->{on_uncached}) { |
|
99
|
1
|
|
|
|
|
5
|
$self->{on_uncached}->($request); |
|
100
|
|
|
|
|
|
|
} |
|
101
|
|
|
|
|
|
|
|
|
102
|
15
|
|
|
|
|
93
|
$response = $self->SUPER::simple_request(@_); |
|
103
|
|
|
|
|
|
|
|
|
104
|
15
|
100
|
66
|
|
|
6210
|
if (!defined($self->{nocache_if}) || !$self->{nocache_if}->($response)) { |
|
105
|
14
|
100
|
|
|
|
87
|
if (defined $no_collision_suffix) { |
|
106
|
2
|
|
|
|
|
4
|
$fpath .= $no_collision_suffix; |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
|
|
109
|
14
|
50
|
|
|
|
1635
|
if (open my $fh, '>:raw', $fpath) { |
|
110
|
14
|
|
|
|
|
62
|
print $fh $request->url, "\n"; |
|
111
|
14
|
|
|
|
|
279
|
print $fh $response->as_string; |
|
112
|
14
|
|
|
|
|
1716
|
close $fh; |
|
113
|
|
|
|
|
|
|
|
|
114
|
14
|
|
|
|
|
23
|
push @{$self->{last_cached}}, $fpath; |
|
|
14
|
|
|
|
|
51
|
|
|
115
|
14
|
|
|
|
|
27
|
push @{$self->{last_used_cache}}, $fpath; |
|
|
14
|
|
|
|
|
73
|
|
|
116
|
|
|
|
|
|
|
} |
|
117
|
|
|
|
|
|
|
else { |
|
118
|
0
|
|
|
|
|
0
|
carp "open('$fpath', 'w'): $!"; |
|
119
|
|
|
|
|
|
|
} |
|
120
|
|
|
|
|
|
|
} |
|
121
|
|
|
|
|
|
|
} |
|
122
|
|
|
|
|
|
|
else { |
|
123
|
14
|
|
|
|
|
16
|
push @{$self->{last_used_cache}}, $fpath; |
|
|
14
|
|
|
|
|
43
|
|
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
|
|
126
|
29
|
|
66
|
|
|
131
|
$self->{was_redirect} = $response->is_redirect && _in($request->method, $self->requests_redirectable); |
|
127
|
29
|
|
|
|
|
303
|
return $response; |
|
128
|
|
|
|
|
|
|
} |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub last_cached { |
|
131
|
3
|
|
|
3
|
1
|
1869
|
my $self = shift; |
|
132
|
3
|
|
|
|
|
142
|
return exists $self->{last_cached} ? |
|
133
|
3
|
50
|
|
|
|
13
|
@{$self->{last_cached}} : (); |
|
134
|
|
|
|
|
|
|
} |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub last_used_cache { |
|
137
|
2
|
|
|
2
|
1
|
5
|
my $self = shift; |
|
138
|
2
|
|
|
|
|
11
|
return exists $self->{last_used_cache} ? |
|
139
|
2
|
50
|
|
|
|
9
|
@{$self->{last_used_cache}} : (); |
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
sub uncache { |
|
143
|
1
|
|
|
1
|
1
|
52
|
my $self = shift; |
|
144
|
1
|
|
|
|
|
5
|
unlink $_ for $self->last_cached; |
|
145
|
|
|
|
|
|
|
} |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
sub _get_cache_name { |
|
148
|
30
|
|
|
30
|
|
148
|
my ($self, $request) = @_; |
|
149
|
|
|
|
|
|
|
|
|
150
|
30
|
100
|
66
|
|
|
119
|
if (defined($self->{cachename_spec}) && %{$self->{cachename_spec}}) { |
|
|
5
|
|
|
|
|
37
|
|
|
151
|
5
|
|
|
|
|
19
|
my $tmp_request = $request->clone(); |
|
152
|
5
|
|
|
|
|
745
|
my $leave_only_specified; |
|
153
|
5
|
100
|
|
|
|
22
|
if (exists $self->{cachename_spec}{_headers}) { |
|
154
|
3
|
50
|
|
|
|
15
|
ref $self->{cachename_spec}{_headers} eq 'ARRAY' |
|
155
|
|
|
|
|
|
|
or croak 'cachename_spec->{_headers} should be array ref'; |
|
156
|
3
|
|
|
|
|
5
|
$leave_only_specified = 1; |
|
157
|
|
|
|
|
|
|
} |
|
158
|
|
|
|
|
|
|
|
|
159
|
5
|
|
|
|
|
17
|
foreach my $hname ($tmp_request->headers->header_field_names) { |
|
160
|
15
|
100
|
66
|
|
|
425
|
if (exists $self->{cachename_spec}{$hname}) { |
|
|
|
100
|
|
|
|
|
|
|
161
|
5
|
100
|
|
|
|
17
|
if (defined $self->{cachename_spec}{$hname}) { |
|
162
|
4
|
|
|
|
|
12
|
$tmp_request->headers->header($hname, $self->{cachename_spec}{$hname}); |
|
163
|
|
|
|
|
|
|
} |
|
164
|
|
|
|
|
|
|
else { |
|
165
|
1
|
|
|
|
|
4
|
$tmp_request->headers->remove_header($hname); |
|
166
|
|
|
|
|
|
|
} |
|
167
|
|
|
|
|
|
|
} |
|
168
|
|
|
|
|
|
|
elsif ($leave_only_specified && !_in($hname, $self->{cachename_spec}{_headers})) { |
|
169
|
9
|
|
|
|
|
25
|
$tmp_request->headers->remove_header($hname); |
|
170
|
|
|
|
|
|
|
} |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
|
|
173
|
5
|
100
|
|
|
|
139
|
if (exists $self->{cachename_spec}{_body}) { |
|
174
|
3
|
|
|
|
|
12
|
$tmp_request->content($self->{cachename_spec}{_body}); |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
|
|
177
|
5
|
|
|
|
|
66
|
return $self->{cache_dir} . '/' . Digest::MD5::md5_hex($tmp_request->as_string); |
|
178
|
|
|
|
|
|
|
} |
|
179
|
|
|
|
|
|
|
|
|
180
|
25
|
|
|
|
|
115
|
return $self->{cache_dir} . '/' . Digest::MD5::md5_hex($request->as_string); |
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
sub _parse_cached_response { |
|
184
|
20
|
|
|
20
|
|
45
|
my ($self, $cache_file, $request) = @_; |
|
185
|
|
|
|
|
|
|
|
|
186
|
20
|
|
|
|
|
26
|
my $fh; |
|
187
|
20
|
50
|
|
|
|
938
|
unless (open $fh, '<:raw', $cache_file) { |
|
188
|
0
|
|
|
|
|
0
|
carp "open('$cache_file', 'r'): $!"; |
|
189
|
0
|
|
|
|
|
0
|
return; |
|
190
|
|
|
|
|
|
|
} |
|
191
|
|
|
|
|
|
|
|
|
192
|
20
|
|
|
|
|
312
|
my $url = <$fh>; |
|
193
|
20
|
|
|
|
|
120
|
$url =~ s/\s+$//; |
|
194
|
20
|
100
|
|
|
|
84
|
if ($url ne $request->url) { |
|
195
|
5
|
|
|
|
|
131
|
close $fh; |
|
196
|
5
|
|
|
|
|
32
|
return; |
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
|
|
199
|
15
|
|
|
|
|
238
|
local $/ = undef; |
|
200
|
15
|
|
|
|
|
179
|
my $response_str = <$fh>; |
|
201
|
15
|
|
|
|
|
438
|
close $fh; |
|
202
|
|
|
|
|
|
|
|
|
203
|
15
|
|
|
|
|
107
|
my $response = HTTP::Response->parse($response_str); |
|
204
|
15
|
|
|
|
|
1628
|
$response->request($request); |
|
205
|
|
|
|
|
|
|
|
|
206
|
15
|
50
|
|
|
|
161
|
if ($self->cookie_jar) { |
|
207
|
15
|
|
|
|
|
133
|
$self->cookie_jar->extract_cookies($response); |
|
208
|
|
|
|
|
|
|
} |
|
209
|
|
|
|
|
|
|
|
|
210
|
15
|
|
|
|
|
2610
|
return $response; |
|
211
|
|
|
|
|
|
|
} |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
sub _in($$) { |
|
214
|
15
|
|
|
15
|
|
873
|
my ($what, $where) = @_; |
|
215
|
|
|
|
|
|
|
|
|
216
|
15
|
|
|
|
|
38
|
foreach my $item (@$where) { |
|
217
|
7
|
100
|
|
|
|
45
|
return 1 if ($what eq $item); |
|
218
|
|
|
|
|
|
|
} |
|
219
|
|
|
|
|
|
|
|
|
220
|
9
|
|
|
|
|
41
|
return 0; |
|
221
|
|
|
|
|
|
|
} |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
1; |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=pod |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
=head1 NAME |
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
LWP::UserAgent::Cached - LWP::UserAgent with simple caching mechanism |
|
230
|
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
use LWP::UserAgent::Cached; |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
my $ua = LWP::UserAgent::Cached->new(cache_dir => '/tmp/lwp-cache'); |
|
236
|
|
|
|
|
|
|
my $resp = $ua->get('http://google.com/'); # makes http request |
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
... |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
$resp = $ua->get('http://google.com/'); # no http request - will get it from the cache |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
When you process content from some website, you will get page one by one and extract some data from this |
|
245
|
|
|
|
|
|
|
page with regexp, DOM parser or smth else. Sometimes we makes errors in our data extractors and realize this |
|
246
|
|
|
|
|
|
|
only when all 1_000_000 pages were processed. We should fix our extraction logic and start all process from the |
|
247
|
|
|
|
|
|
|
beginning. Please STOP! How about cache? Yes, you can cache all responses and second, third and other attempts will |
|
248
|
|
|
|
|
|
|
be very fast. |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
LWP::UserAgent::Cached is yet another LWP::UserAgent subclass with cache support. It stores |
|
251
|
|
|
|
|
|
|
cache in the files on local filesystem and if response already available in the cache returns it instead of making HTTP request. |
|
252
|
|
|
|
|
|
|
This module was writed because other available alternatives didn't meet my needs: |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=over |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=item L |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
caches responses on local filesystem and gets it from the cache only if online document was not modified |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=item L |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
same as above but stores cache in memory |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
=item L |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
can record responses in the cache or get responses from the cache, but not both for one useragent |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=item L |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
seems it may cache responses and get responses from the cache, but has too much dependencies and unclear |
|
271
|
|
|
|
|
|
|
`delay' parameter |
|
272
|
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
=back |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
=head1 METHODS |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
All LWP::UserAgent methods and several new. |
|
278
|
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=head2 new(...) |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
Creates new LWP::UserAgent::Cached object. Since LWP::UserAgent::Cached is LWP::UserAgent subclass it has all same |
|
282
|
|
|
|
|
|
|
parameters, but in additional it has some new optional pararmeters: |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
L |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
L |
|
287
|
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
L |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
L |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
L |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
LWP::UserAgent::Cached creation example: |
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
my $ua = LWP::UserAgent::Cached->new(cache_dir => 'cache/lwp', nocache_if => sub { |
|
297
|
|
|
|
|
|
|
my $response = shift; |
|
298
|
|
|
|
|
|
|
return $response->code >= 500; # do not cache any bad response |
|
299
|
|
|
|
|
|
|
}, recache_if => sub { |
|
300
|
|
|
|
|
|
|
my ($response, $path, $request) = @_; |
|
301
|
|
|
|
|
|
|
return $response->code == 404 && -M $path > 1; # recache any 404 response older than 1 day |
|
302
|
|
|
|
|
|
|
}, on_uncached => sub { |
|
303
|
|
|
|
|
|
|
my $request = shift; |
|
304
|
|
|
|
|
|
|
sleep 5 if $request->uri =~ '/category/\d+'; # delay before http requests inside "/category" |
|
305
|
|
|
|
|
|
|
}, cachename_spec => { |
|
306
|
|
|
|
|
|
|
'User-Agent' => undef, # omit agent while calculating cache name |
|
307
|
|
|
|
|
|
|
}); |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=head2 cache_dir() or cache_dir($dir) |
|
310
|
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
Gets or sets path to the directory where cache will be stored. |
|
312
|
|
|
|
|
|
|
If not set useragent will behaves as LWP::UserAgent without cache support. |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
=head2 nocache_if() or nocache_if($sub) |
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
Gets or sets reference to subroutine which will be called after receiving each non-cached response. First parameter |
|
317
|
|
|
|
|
|
|
of this subroutine will be HTTP::Response object. This subroutine should return true if this response should |
|
318
|
|
|
|
|
|
|
not be cached and false otherwise. If not set all responses will be cached. |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=head2 recache_if() or recache_if($sub) |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
Gets or sets reference to subroutine which will be called for each response available in the cache. First parameter |
|
323
|
|
|
|
|
|
|
of this subroutine will be HTTP::Response object, second - path to file with cache, third - HTTP::Request object. |
|
324
|
|
|
|
|
|
|
This subroutine should return true if response needs to be recached (new HTTP request will be made) and false otherwise. |
|
325
|
|
|
|
|
|
|
This $sub will be called only if response already available in the cache. Here you can also modify request for your needs. |
|
326
|
|
|
|
|
|
|
This will not change name of the file with cache. |
|
327
|
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
=head2 on_uncached() or on_uncached($sub) |
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
Gets or sets reference to subroutine which will be called for each non-cached http request, before actually request. |
|
331
|
|
|
|
|
|
|
First parameter of this subroutine will be HTTP::Request object. Here you can also modify request for your needs. |
|
332
|
|
|
|
|
|
|
This will not change name of the file with cache. |
|
333
|
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
=head2 cachename_spec() or cachename_spec($spec) |
|
335
|
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
Gets or sets hash reference to cache naming specification. In fact cache naming for each request based on request content. |
|
337
|
|
|
|
|
|
|
Internally it is md5_hex($request->as_string). But what if some of request headers in your program changed dinamically, e.g. |
|
338
|
|
|
|
|
|
|
User-Agent or Cookie? In such case caching will not work properly for you. We need some way to omit this headers when calculating |
|
339
|
|
|
|
|
|
|
cache name. This option is what you need. Specification hash should contain header name and header value which will be used |
|
340
|
|
|
|
|
|
|
(instead of values in request) while calculating cache name. |
|
341
|
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
For example we already have cache where 'User-Agent' value in the headers was 'Mozilla/5.0', but in the current version of the program |
|
343
|
|
|
|
|
|
|
it will be changed for each request. So we force specified that for cache name calculation 'User-Agent' should be 'Mozilla/5.0'. Cached |
|
344
|
|
|
|
|
|
|
request had not 'Accept' header, but in the current version it has. So we force specified do not include this header for cache name |
|
345
|
|
|
|
|
|
|
calculation. |
|
346
|
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
cachename_spec => { |
|
348
|
|
|
|
|
|
|
'User-Agent' => 'Mozilla/5.0', |
|
349
|
|
|
|
|
|
|
'Accept' => undef |
|
350
|
|
|
|
|
|
|
} |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
Specification hash may contain two special keys: '_body' and '_headers'. With '_body' key you can specify body content in the request |
|
353
|
|
|
|
|
|
|
for cache name calculation. For example to not include body content in cache name calculation set '_body' to undef or empty string. |
|
354
|
|
|
|
|
|
|
With '_headers' key you can specify which headers should be included in $request for cache name calculation. For example you can say to |
|
355
|
|
|
|
|
|
|
include only 'Host' and 'Referer'. '_headers' value should be array reference: |
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
cachename_spec => { |
|
358
|
|
|
|
|
|
|
_body => undef, # omit body |
|
359
|
|
|
|
|
|
|
_headers => ['Host'], # include only host with value from request |
|
360
|
|
|
|
|
|
|
# It will be smth like: |
|
361
|
|
|
|
|
|
|
# md5_hex("METHOD url\r\nHost: host\r\n\r\n") |
|
362
|
|
|
|
|
|
|
# method and url will be included in any case |
|
363
|
|
|
|
|
|
|
} |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
Another example. Omit body, include only 'Host' and 'User-Agent' headers, use 'Host' value from request and specified 'User-Agent' value, |
|
366
|
|
|
|
|
|
|
in addition include referrer with specified value ('Referer' not specified in '_headers', but values from main specification hash has |
|
367
|
|
|
|
|
|
|
higher priority): |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
cachename_spec => { |
|
370
|
|
|
|
|
|
|
_body => '', |
|
371
|
|
|
|
|
|
|
_headers => ['Host', 'User-Agent'], |
|
372
|
|
|
|
|
|
|
'User-Agent' => 'Mozilla/5.0', |
|
373
|
|
|
|
|
|
|
'Referer' => 'http://www.com' |
|
374
|
|
|
|
|
|
|
} |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
One more example. Calculate cache name based only on method and url: |
|
377
|
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
cachename_spec => { |
|
379
|
|
|
|
|
|
|
_body =>'', |
|
380
|
|
|
|
|
|
|
_headers => [] |
|
381
|
|
|
|
|
|
|
} |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=head2 last_cached() |
|
384
|
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
Returns list with pathes to files with cache stored by last noncached response. List may contain more than one |
|
386
|
|
|
|
|
|
|
element if there was redirect. |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=head2 last_used_cache() |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
Returns list with pathes to files with cache used in last response. This includes files just stored (last_cached) |
|
391
|
|
|
|
|
|
|
and files that may be already exists (cached earlier). List may contain more than one element if there was redirect. |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=head2 uncache() |
|
394
|
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
Removes last response from the cache. Use case example: |
|
396
|
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
my $page = $ua->get($url)->decoded_content; |
|
398
|
|
|
|
|
|
|
if ($page =~ /Access for this ip was blocked/) { |
|
399
|
|
|
|
|
|
|
$ua->uncache(); |
|
400
|
|
|
|
|
|
|
} |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
=head1 Proxy and cache name |
|
403
|
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
Here you can see how changing of proxy for useragent will affect cache name |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
=head2 HTTP proxy |
|
407
|
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
HTTP proxy support works out of the box and causes no problems. Changing of proxy server will not affect cache name |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
=head2 HTTPS proxy |
|
411
|
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
Proper HTTPS proxy support added in LWP since 6.06 and causes no problems. Changing of proxy server will not affect cache name |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=head2 CONNECT proxy |
|
415
|
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
CONNECT proxy support may be added using L. The problem is that this module uses |
|
417
|
|
|
|
|
|
|
LWP's request() for creation of CONNECT tunnel, so this response will be cached. But in fact it shouldn't. To workaround this |
|
418
|
|
|
|
|
|
|
you need to install C hook |
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
$ua->nocache_if(sub { |
|
421
|
|
|
|
|
|
|
my $resp = shift; |
|
422
|
|
|
|
|
|
|
# do not cache creation of tunnel |
|
423
|
|
|
|
|
|
|
$resp->request->method eq 'CONNECT'; |
|
424
|
|
|
|
|
|
|
}); |
|
425
|
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
After that it works without problems. Changing of proxy server will not affect cache name |
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
=head2 SOCKS proxy |
|
429
|
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
SOCKS proxy support may be added using L and causes no problems. |
|
431
|
|
|
|
|
|
|
Changing of proxy server will not affect cache name |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
L |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
=head1 COPYRIGHT |
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
Copyright Oleg G . |
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or |
|
442
|
|
|
|
|
|
|
modify it under the same terms as Perl itself. |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
=cut |