line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# Weather::GHCN::CacheURI.pm - class for fetching from a URI, with file caching |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
## no critic (Documentation::RequirePodAtEnd) |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Weather::GHCN::CacheURI - URI page fetch with file-based caching |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 VERSION |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
version v0.0.011 |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 SYNOPSIS |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
use Weather::GHCN::CacheURI; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# put files cached by fetch() in $cachedir and refresh if not changed this year |
18
|
|
|
|
|
|
|
my $cache_uri = Weather::GHCN::CacheURI->new($cachedir, 'yearly'); |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
$cache_uri->clean_cache; # empty the cache |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# this will cause fetch to do a network access |
23
|
|
|
|
|
|
|
my ($from_cache, $content) = $cache_uri->fetch($uri); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# depending on the refresh option, this will either fetch the content |
26
|
|
|
|
|
|
|
# from the cache, or get a fresher copy from the network |
27
|
|
|
|
|
|
|
my ($from_cache, $content) = $cache_uri->fetch($uri); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# fetch calls these to access the cached file according to the |
30
|
|
|
|
|
|
|
# refresh rule and the state of the cached file and the web page |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
my $content = $cache_uri->loca($uri); |
33
|
|
|
|
|
|
|
$cache_uri->store($uri, $content); |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=head1 DESCRIPTION |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
This cache module enables callers to fetch web pages and store the |
38
|
|
|
|
|
|
|
content on the filesystem so that it can be retrieved subsequently |
39
|
|
|
|
|
|
|
without a network access. |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
Unlike caching performed by Fetch::URI or LWP, no Etags or |
42
|
|
|
|
|
|
|
Last-Modified-Date or other data is included with the content data. |
43
|
|
|
|
|
|
|
This metadata can be an obstacle to platform portability. |
44
|
|
|
|
|
|
|
Essentially, just utf-8 page content that is stored. That should be |
45
|
|
|
|
|
|
|
neutral enough that the cache file can be used on another platform. |
46
|
|
|
|
|
|
|
This is a benefit to unit testing, because tests can be constructed |
47
|
|
|
|
|
|
|
to fetch pages, and the cached pages can be packaged with the tests. |
48
|
|
|
|
|
|
|
This allows the tests to run faster, and without network access. |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
The approach is simple, and geared towards accessing and caching |
51
|
|
|
|
|
|
|
the content of the NOAA GHCN weather repository. The files in that |
52
|
|
|
|
|
|
|
repository are simple ASCII files with uncomplicated names. The |
53
|
|
|
|
|
|
|
caching algorithm simply strips off the URI path and stores the file |
54
|
|
|
|
|
|
|
using the filename found in the repository; e.g. 'ghcnd-stations.txt' or |
55
|
|
|
|
|
|
|
'CA006105887.dly'. All files are kept in the cache directory, since |
56
|
|
|
|
|
|
|
all filenames are expected to be unique. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=cut |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
## no critic [ValuesAndExpressions::ProhibitVersionStrings] |
61
|
|
|
|
|
|
|
## no critic [TestingAndDebugging::RequireUseWarnings] |
62
|
|
|
|
|
|
|
|
63
|
6
|
|
|
6
|
|
112321
|
use v5.18; # minimum for Object::Pad |
|
6
|
|
|
|
|
37
|
|
64
|
6
|
|
|
6
|
|
1216
|
use Object::Pad 0.66 qw( :experimental(init_expr) ); |
|
6
|
|
|
|
|
23483
|
|
|
6
|
|
|
|
|
32
|
|
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
package Weather::GHCN::CacheURI; |
67
|
|
|
|
|
|
|
class Weather::GHCN::CacheURI; |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
our $VERSION = 'v0.0.011'; |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
|
72
|
6
|
|
|
6
|
|
2356
|
use Carp qw(carp croak); |
|
6
|
|
|
|
|
1668
|
|
|
6
|
|
|
|
|
361
|
|
73
|
6
|
|
|
6
|
|
933
|
use Const::Fast; |
|
6
|
|
|
|
|
5283
|
|
|
6
|
|
|
|
|
34
|
|
74
|
6
|
|
|
6
|
|
491
|
use Fcntl qw( :DEFAULT ); |
|
6
|
|
|
|
|
13
|
|
|
6
|
|
|
|
|
2285
|
|
75
|
6
|
|
|
6
|
|
2972
|
use File::stat; |
|
6
|
|
|
|
|
47965
|
|
|
6
|
|
|
|
|
33
|
|
76
|
6
|
|
|
6
|
|
2308
|
use Path::Tiny; |
|
6
|
|
|
|
|
27751
|
|
|
6
|
|
|
|
|
321
|
|
77
|
6
|
|
|
6
|
|
1148
|
use Try::Tiny; |
|
6
|
|
|
|
|
4330
|
|
|
6
|
|
|
|
|
367
|
|
78
|
6
|
|
|
6
|
|
3417
|
use Time::Piece 1.32; |
|
6
|
|
|
|
|
73570
|
|
|
6
|
|
|
|
|
40
|
|
79
|
6
|
|
|
6
|
|
2933
|
use LWP::Simple; |
|
6
|
|
|
|
|
352046
|
|
|
6
|
|
|
|
|
60
|
|
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
const my $TRUE => 1; # perl's usual TRUE |
82
|
|
|
|
|
|
|
const my $FALSE => not $TRUE; # a dual-var consisting of '' and 0 |
83
|
|
|
|
|
|
|
const my $EMPTY => q(); |
84
|
|
|
|
|
|
|
const my $FSLASH => q(/); |
85
|
|
|
|
|
|
|
const my $ONE_DAY => 24*60*60; # number of seconds in a day |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
const my $REFRESH_RE => qr{ \A ( yearly | never | always | \d+ ) \Z }xms; |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
# First return value from _fetch methods indicating whether the fetch |
90
|
|
|
|
|
|
|
# was from the cache or the web page URI |
91
|
|
|
|
|
|
|
const my $FROM_CACHE => $TRUE; |
92
|
|
|
|
|
|
|
const my $FROM_URI => $FALSE; |
93
|
|
|
|
|
|
|
|
94
|
0
|
|
|
0
|
1
|
0
|
field $_cachedir :reader :param {}; |
|
0
|
|
|
|
|
0
|
|
95
|
0
|
|
|
0
|
1
|
0
|
field $_refresh :reader :param {}; |
|
0
|
|
|
|
|
0
|
|
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
BUILD ($cachedir, $refresh) { |
98
|
|
|
|
|
|
|
$_cachedir //= $cachedir; |
99
|
|
|
|
|
|
|
$_refresh //= lc $refresh; |
100
|
|
|
|
|
|
|
croak '*E* invalid refresh option' unless $_refresh =~ $REFRESH_RE; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head1 FIELDS |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=head2 cachedir |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
Returns the cache location defined when the object was instantiated. |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=head2 refresh |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
Returns the refresh option that was defined when the object was |
112
|
|
|
|
|
|
|
instantiated. See new(), |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=cut |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=head1 METHODS |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=head2 new ($cachedir, $refresh) |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
New instances of this class must be provided a location for the cache |
121
|
|
|
|
|
|
|
files upon creation ($cachedir). This directory must exist or the |
122
|
|
|
|
|
|
|
new() will fail. Similarly, $refresh must be a valid value, one of: |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=over 4 |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=item refresh 'yearly' |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
The origin HTTP server is contacted and the page refreshed if the |
129
|
|
|
|
|
|
|
cached file has not been changed within the current year. The |
130
|
|
|
|
|
|
|
rationale for this, and for this being the default, is that the GHCN |
131
|
|
|
|
|
|
|
data for the current year will always be incomplete, and that will |
132
|
|
|
|
|
|
|
skew any statistical analysis and so should normally be truncated. |
133
|
|
|
|
|
|
|
If the user needs the data for the current year, they should use a |
134
|
|
|
|
|
|
|
refresh value of 'always' or a number. |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=item refresh 'never' |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
The origin HTTP is never contacted, regardless of the page being in |
139
|
|
|
|
|
|
|
cache or not. If the page is missing from cache, the fetch method will |
140
|
|
|
|
|
|
|
return undef. If the page is in cache, that page will be returned, no |
141
|
|
|
|
|
|
|
matter how old it is. |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=item refresh 'always' |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
If a page is in the cache, the origin HTTP server is always checked for |
146
|
|
|
|
|
|
|
a fresher copy |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=item refresh |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
The origin HTTP server is not contacted if the page is in cache |
151
|
|
|
|
|
|
|
and the cached page was inserted within the last days. |
152
|
|
|
|
|
|
|
Otherwise the server is checked for a fresher page. |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=back |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=head2 clean_cache |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
Removes all the files in the cache, but leaves the cache directory. |
159
|
|
|
|
|
|
|
Returns a list of errors for any files that couldn't be removed. |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=cut |
162
|
|
|
|
|
|
|
|
163
|
1
|
|
|
1
|
1
|
1588
|
method clean_cache () { |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
2
|
|
164
|
1
|
|
|
|
|
7
|
my $re = qr{ \A ( ghcnd-\w+[.]txt | \w+[.]dly ) \Z }xms; |
165
|
1
|
|
|
|
|
4
|
my @files = path($_cachedir)->children( $re ); |
166
|
1
|
|
|
|
|
244
|
my @errors; |
167
|
1
|
|
|
|
|
3
|
foreach my $f (@files) { |
168
|
|
|
|
|
|
|
try { |
169
|
3
|
|
|
3
|
|
125
|
$f->remove; |
170
|
|
|
|
|
|
|
} catch { |
171
|
0
|
|
|
0
|
|
0
|
push @errors, "*E* unable to remove $f: $_"; |
172
|
3
|
|
|
|
|
150
|
}; |
173
|
|
|
|
|
|
|
} |
174
|
1
|
|
|
|
|
65
|
return @errors; |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=head2 clean_data_cache |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
Removes all the daily weather data files (*.dly) from the cache, but |
180
|
|
|
|
|
|
|
leaves the cache directory. Returns a list of errors for any files |
181
|
|
|
|
|
|
|
that couldn't be removed. |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=cut |
184
|
|
|
|
|
|
|
|
185
|
1
|
|
|
1
|
1
|
11463
|
method clean_data_cache () { |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
3
|
|
186
|
|
|
|
|
|
|
# delete the daily weather data files in the cache |
187
|
1
|
|
|
|
|
8
|
my $re = qr{ \A \w+[.]dly \Z }xms; |
188
|
1
|
|
|
|
|
6
|
my @files = path($_cachedir)->children( $re ); |
189
|
1
|
|
|
|
|
217
|
my @errors; |
190
|
1
|
|
|
|
|
4
|
foreach my $f (@files) { |
191
|
|
|
|
|
|
|
try { |
192
|
1
|
|
|
1
|
|
68
|
$f->remove; |
193
|
|
|
|
|
|
|
} catch { |
194
|
0
|
|
|
0
|
|
0
|
push @errors, "*E* unable to remove $f: $_"; |
195
|
1
|
|
|
|
|
12
|
}; |
196
|
|
|
|
|
|
|
} |
197
|
1
|
|
|
|
|
102
|
return @errors; |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=head2 clean_station_cache |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
Removes the station list and station inventory files (ghcnd-*.txt) |
203
|
|
|
|
|
|
|
from the cache, but leaves the cache directory. Returns a list of |
204
|
|
|
|
|
|
|
errors for any files that couldn't be removed. |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=cut |
207
|
|
|
|
|
|
|
|
208
|
1
|
|
|
1
|
1
|
1310
|
method clean_station_cache () { |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
2
|
|
209
|
|
|
|
|
|
|
# delete the station list and inventory files in the cache |
210
|
1
|
|
|
|
|
6
|
my $re = qr{ \A ghcnd-\w+[.]txt \Z }xms; |
211
|
1
|
|
|
|
|
5
|
my @files = path($_cachedir)->children( $re ); |
212
|
1
|
|
|
|
|
200
|
my @errors; |
213
|
1
|
|
|
|
|
5
|
foreach my $f (@files) { |
214
|
|
|
|
|
|
|
try { |
215
|
2
|
|
|
2
|
|
89
|
$f->remove; |
216
|
|
|
|
|
|
|
} catch { |
217
|
0
|
|
|
0
|
|
0
|
push @errors, "*E* unable to remove $f: $_"; |
218
|
2
|
|
|
|
|
93
|
}; |
219
|
|
|
|
|
|
|
} |
220
|
1
|
|
|
|
|
70
|
return @errors; |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
=head2 fetch ($uri, $refresh="yearly") |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
Fetch the web page given by the URI $uri, returning its content |
226
|
|
|
|
|
|
|
and caching it. If a cached entry for it exists, and is current |
227
|
|
|
|
|
|
|
according to the refresh option, then the cached entry is returned. |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
=cut |
230
|
|
|
|
|
|
|
|
231
|
55
|
|
|
55
|
1
|
4426
|
method fetch ($uri) { |
|
55
|
|
|
|
|
147
|
|
|
55
|
|
|
|
|
196
|
|
|
55
|
|
|
|
|
111
|
|
232
|
|
|
|
|
|
|
|
233
|
55
|
|
|
|
|
142
|
my $from_cache; |
234
|
|
|
|
|
|
|
my $content; |
235
|
|
|
|
|
|
|
|
236
|
55
|
50
|
|
|
|
223
|
carp '*W* no cache directory specified therefore no caching of HTTP queries available' |
237
|
|
|
|
|
|
|
if not $_cachedir; |
238
|
|
|
|
|
|
|
|
239
|
55
|
50
|
33
|
|
|
2945
|
carp '*W* cache location specified but doesn\'t exist, therefore no caching of HTTP queries available' |
240
|
|
|
|
|
|
|
if $_cachedir and not -d $_cachedir; |
241
|
|
|
|
|
|
|
|
242
|
55
|
50
|
33
|
|
|
996
|
if (not $_cachedir or not -d $_cachedir) { |
243
|
0
|
|
|
|
|
0
|
($from_cache, $content) = $self->_fetch_without_cache($uri); |
244
|
0
|
|
|
|
|
0
|
return ($from_cache, $content); |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
55
|
50
|
|
|
|
435
|
if ($_refresh eq 'always') { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
248
|
0
|
|
|
|
|
0
|
($from_cache, $content) = $self->_fetch_refresh_always($uri); |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
elsif ($_refresh eq 'never') { |
251
|
54
|
|
|
|
|
294
|
($from_cache, $content) = $self->_fetch_refresh_never($uri); |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
elsif ($_refresh eq 'yearly') { |
254
|
1
|
|
|
|
|
8
|
my $cutoff_mtime = localtime->truncate( to => 'year' ); |
255
|
1
|
|
|
|
|
361
|
($from_cache, $content) = $self->_fetch_refresh_n_days($uri, $cutoff_mtime); |
256
|
|
|
|
|
|
|
} else { |
257
|
0
|
0
|
|
|
|
0
|
croak unless $_refresh =~ m{ \A \d+ \Z }xms; |
258
|
0
|
|
|
|
|
0
|
my $cutoff_mtime = localtime->truncate( to => 'day') - ( $_refresh * $ONE_DAY ); |
259
|
0
|
|
|
|
|
0
|
($from_cache, $content) = $self->_fetch_refresh_n_days($uri, $cutoff_mtime); |
260
|
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
|
262
|
55
|
|
|
|
|
458
|
return ($from_cache, $content); |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
=head2 load ($uri) |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
Load a previously fetched and stored $uri from the file cache and |
268
|
|
|
|
|
|
|
returns the content. Uses Path::Tiny->slurp_utf8, which will lock |
269
|
|
|
|
|
|
|
the file during the operation and which uses a binmode of |
270
|
|
|
|
|
|
|
:unix:encoding(UTF-8) for platform portability of the files. |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=cut |
273
|
|
|
|
|
|
|
|
274
|
56
|
|
|
56
|
1
|
3378
|
method load ($uri) { |
|
56
|
|
|
|
|
171
|
|
|
56
|
|
|
|
|
137
|
|
|
56
|
|
|
|
|
100
|
|
275
|
56
|
|
|
|
|
292
|
my $file = $self->_path_to_key($uri); |
276
|
|
|
|
|
|
|
|
277
|
56
|
100
|
66
|
|
|
1267
|
if ( defined $file && -f $file ) { |
278
|
55
|
|
|
|
|
344
|
return _read_file($file); |
279
|
|
|
|
|
|
|
} else { |
280
|
1
|
|
|
|
|
6
|
return; |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=head2 store ($uri, $content) |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
Stores content obtained from a URI using fetch() into a file in the |
287
|
|
|
|
|
|
|
cache. The filename is derived from the tail end of the URI. |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
Uses Path::Tiny->spew_utf8, which writes data to the file atomically. |
290
|
|
|
|
|
|
|
The file is written to a temporary file in the cache directory, then |
291
|
|
|
|
|
|
|
renamed over the original. |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
A binmode of :unix:encoding(UTF-8) (i.e. PerlIO::utf8_strict) is |
294
|
|
|
|
|
|
|
used, unless Unicode::UTF8 0.58+ is installed. In that case, the content |
295
|
|
|
|
|
|
|
will be encoded by Unicode::UTF8 and written using spew_raw. |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
The idea is to store data in a platform-neutral fashion, so cached |
298
|
|
|
|
|
|
|
files can be used for unit testing on multiple platforms. |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
=cut |
301
|
|
|
|
|
|
|
|
302
|
1
|
|
|
1
|
1
|
11122
|
method store ($uri, $content) { |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
2
|
|
303
|
|
|
|
|
|
|
|
304
|
1
|
50
|
|
|
|
25
|
croak '*E* cache directory doesn\'t exist: ' . $_cachedir |
305
|
|
|
|
|
|
|
unless -d $_cachedir; |
306
|
|
|
|
|
|
|
|
307
|
1
|
|
|
|
|
7
|
my $store_file = $self->_path_to_key($uri); |
308
|
1
|
50
|
|
|
|
4
|
return if not defined $store_file; |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
# path($dir)->make_path( $dir, mode => $_dir_create_mode ) |
311
|
|
|
|
|
|
|
# if not -d $dir; |
312
|
|
|
|
|
|
|
|
313
|
1
|
|
|
|
|
5
|
_write_file( $store_file, $content ); |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
# method purge_cache($mtime) { |
317
|
|
|
|
|
|
|
# delete daily data files older than $mtime |
318
|
|
|
|
|
|
|
# } |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=head2 remove ($uri) |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
Remove the cache file associated with this URI. |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
=cut |
325
|
|
|
|
|
|
|
|
326
|
1
|
|
|
1
|
1
|
1113
|
method remove ($uri) { |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
3
|
|
327
|
1
|
50
|
|
|
|
4
|
my $file = $self->_path_to_key($uri) |
328
|
|
|
|
|
|
|
or return; |
329
|
1
|
|
|
|
|
84
|
unlink $file; |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
335
|
|
|
|
|
|
|
# Private methods |
336
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
337
|
|
|
|
|
|
|
|
338
|
54
|
|
|
54
|
|
184
|
method _fetch_refresh_never ($uri) { |
|
54
|
|
|
|
|
111
|
|
|
54
|
|
|
|
|
173
|
|
|
54
|
|
|
|
|
119
|
|
339
|
|
|
|
|
|
|
# use the cache only |
340
|
54
|
|
|
|
|
261
|
my $key = $self->_uri_to_key($uri); |
341
|
54
|
|
|
|
|
312
|
my $content = $self->load($key); |
342
|
54
|
|
|
|
|
1809041
|
return ($FROM_CACHE, $content); |
343
|
|
|
|
|
|
|
} |
344
|
|
|
|
|
|
|
|
345
|
0
|
|
|
0
|
|
0
|
method _fetch_refresh_always ($uri) { |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
346
|
|
|
|
|
|
|
# check for a fresher copy on the server |
347
|
0
|
|
|
|
|
0
|
my $key = $self->_uri_to_key($uri); |
348
|
0
|
|
|
|
|
0
|
my $file = $self->_path_to_key($key); |
349
|
|
|
|
|
|
|
|
350
|
0
|
|
|
|
|
0
|
my $st = stat $file; |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
# if we have a cached file, check to see if the page is newer |
353
|
0
|
0
|
|
|
|
0
|
if ($st) { |
354
|
0
|
0
|
|
|
|
0
|
my ($ctype, $doclen, $mtime, $exp, $svr) = head($uri) |
355
|
|
|
|
|
|
|
or croak '*E* unable to fetch header for: ' . $uri; |
356
|
|
|
|
|
|
|
|
357
|
0
|
0
|
|
|
|
0
|
if ($mtime > $st->mtime) { |
358
|
|
|
|
|
|
|
# page changed since it was cached |
359
|
0
|
|
|
|
|
0
|
my $content = get($uri); |
360
|
0
|
0
|
|
|
|
0
|
$self->store($key, $content) if $content; |
361
|
0
|
|
|
|
|
0
|
return ($FROM_URI, $content); |
362
|
|
|
|
|
|
|
} else { |
363
|
|
|
|
|
|
|
# page is unchanged, so use the cached file |
364
|
0
|
|
|
|
|
0
|
my $content = $self->load($key); |
365
|
0
|
|
|
|
|
0
|
return ($FROM_CACHE, $content); |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# there's no cached file, so get the page from the URI and cache it |
370
|
0
|
|
|
|
|
0
|
my $content = get($uri); |
371
|
0
|
0
|
|
|
|
0
|
$self->store($key, $content) if $content; |
372
|
0
|
|
|
|
|
0
|
return ($FROM_URI, $content); |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
|
375
|
1
|
|
|
1
|
|
4
|
method _fetch_refresh_n_days ($uri, $cutoff_mtime) { |
|
1
|
|
|
|
|
11
|
|
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
2
|
|
376
|
|
|
|
|
|
|
# check whether the cache or page is older than N days |
377
|
|
|
|
|
|
|
# if the cache file is younger than N days ago, use it |
378
|
|
|
|
|
|
|
# otherwise get the latest page from the server |
379
|
|
|
|
|
|
|
# check the server if the file is older than this year |
380
|
|
|
|
|
|
|
|
381
|
1
|
|
|
|
|
6
|
my $key = $self->_uri_to_key($uri); |
382
|
1
|
|
|
|
|
5
|
my $file = $self->_path_to_key($key); |
383
|
|
|
|
|
|
|
|
384
|
1
|
|
|
|
|
7
|
my $st = stat $file; |
385
|
|
|
|
|
|
|
|
386
|
1
|
50
|
33
|
|
|
315
|
if ($st and $st->mtime >= $cutoff_mtime) { |
387
|
|
|
|
|
|
|
# the cached file we have is at or new than the cutoff, so we'll use it |
388
|
1
|
|
|
|
|
131
|
my $content = $self->load($key); |
389
|
1
|
|
|
|
|
100498
|
return ($FROM_CACHE, $content); |
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
# get the mtime for the URI |
393
|
0
|
0
|
|
|
|
0
|
my ($ctype, $doclen, $mtime, $exp, $svr) = head($uri) |
394
|
|
|
|
|
|
|
or croak '*E* unable to fetch header for: ' . $uri; |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
# our cached file is older than the cutoff, but if it's up to date |
397
|
|
|
|
|
|
|
# with the web page then we can use it |
398
|
0
|
0
|
0
|
|
|
0
|
if ($st and $st->mtime >= $mtime) { |
399
|
|
|
|
|
|
|
# web page hasn't changed since it was cached. so we'll use it |
400
|
0
|
|
|
|
|
0
|
my $content = $self->load($key); |
401
|
0
|
|
|
|
|
0
|
return ($FROM_CACHE, $content); |
402
|
|
|
|
|
|
|
} |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
# there's no cached file, or the cached file is out of date, so |
405
|
|
|
|
|
|
|
# we get the page from the URI and cache it |
406
|
0
|
|
|
|
|
0
|
my $content = get($uri); |
407
|
0
|
0
|
|
|
|
0
|
$self->store($key, $content) if $content; |
408
|
0
|
|
|
|
|
0
|
return ($FROM_URI, $content); |
409
|
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
|
411
|
0
|
|
|
0
|
|
0
|
method _fetch_without_cache ($uri) { |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
412
|
|
|
|
|
|
|
# check for a fresher copy on the server |
413
|
0
|
|
|
|
|
0
|
my $key = $self->_uri_to_key($uri); |
414
|
|
|
|
|
|
|
|
415
|
0
|
|
|
|
|
0
|
my $content = get($uri); |
416
|
0
|
|
|
|
|
0
|
return ($FROM_URI, $content); |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
|
419
|
120
|
|
|
120
|
|
6171
|
method _uri_to_key ($uri) { |
|
120
|
|
|
|
|
187
|
|
|
120
|
|
|
|
|
238
|
|
|
120
|
|
|
|
|
199
|
|
420
|
120
|
|
|
|
|
897
|
my @parts = split m{ $FSLASH }xms, $uri; |
421
|
120
|
|
|
|
|
311
|
my $key = $parts[-1]; # use the last part as the key |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
# this transformation is for testing using CPAN pages and is not |
424
|
|
|
|
|
|
|
# necessary for the NOAA GHCN pages we actually deal with |
425
|
120
|
|
|
|
|
344
|
$key =~ s{ [:] }{}xmsg; |
426
|
|
|
|
|
|
|
|
427
|
120
|
|
|
|
|
415
|
return $key; |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
|
430
|
62
|
|
|
62
|
|
2802
|
method _path_to_key ($uri) { |
|
62
|
|
|
|
|
143
|
|
|
62
|
|
|
|
|
114
|
|
|
62
|
|
|
|
|
115
|
|
431
|
62
|
50
|
|
|
|
236
|
return if not defined $uri; |
432
|
|
|
|
|
|
|
|
433
|
62
|
|
|
|
|
208
|
my $key = $self->_uri_to_key( $uri ); |
434
|
|
|
|
|
|
|
|
435
|
62
|
|
|
|
|
464
|
my $filepath = path($_cachedir)->child($key)->stringify; |
436
|
|
|
|
|
|
|
|
437
|
62
|
|
|
|
|
7139
|
return $filepath; |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
###################################################################### |
441
|
|
|
|
|
|
|
# Private subroutines |
442
|
|
|
|
|
|
|
###################################################################### |
443
|
|
|
|
|
|
|
|
444
|
55
|
|
|
55
|
|
119
|
sub _read_file ( $file ) { |
|
55
|
|
|
|
|
165
|
|
|
55
|
|
|
|
|
111
|
|
445
|
55
|
|
|
|
|
215
|
return path($file)->slurp_utf8; |
446
|
|
|
|
|
|
|
} |
447
|
|
|
|
|
|
|
|
448
|
1
|
|
|
1
|
|
2
|
sub _write_file ( $file, $data ) { |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
2
|
|
449
|
1
|
|
|
|
|
4
|
return path($file)->spew_utf8( $data ); |
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
1; |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
__END__ |