| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#!/usr/bin/perl |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package CPAN::CachingProxy; |
|
4
|
|
|
|
|
|
|
|
|
5
|
1
|
|
|
1
|
|
1445
|
use strict; |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
42
|
|
|
6
|
1
|
|
|
1
|
|
5
|
use Carp; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
76
|
|
|
7
|
1
|
|
|
1
|
|
10119
|
use URI; |
|
|
1
|
|
|
|
|
18449
|
|
|
|
1
|
|
|
|
|
39
|
|
|
8
|
1
|
|
|
1
|
|
694
|
use Cache::File; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use Data::Dumper; |
|
10
|
|
|
|
|
|
|
use LWP::UserAgent; |
|
11
|
|
|
|
|
|
|
use Fcntl qw(:flock); |
|
12
|
|
|
|
|
|
|
use Digest::SHA1 qw(sha1_hex); |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
our $VERSION = "1.6500"; |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
# wget -O MIRRORED.BY http://www.cpan.org/MIRRORED.BY |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
# new {{{ |
|
19
|
|
|
|
|
|
|
sub new { |
|
20
|
|
|
|
|
|
|
my $class = shift; |
|
21
|
|
|
|
|
|
|
my $this = bless {@_}, $class; |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
unless( $this->{cgi} ) { |
|
24
|
|
|
|
|
|
|
require CGI or die $@; |
|
25
|
|
|
|
|
|
|
$this->{cgi} = new CGI; |
|
26
|
|
|
|
|
|
|
} |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
unless( $this->{cache_object} ) { |
|
29
|
|
|
|
|
|
|
$this->{cache_root} = "/tmp/ccp/" unless exists $this->{cache_root}; |
|
30
|
|
|
|
|
|
|
$this->{cache_root} = "/tmp/ccp/" unless exists $this->{cache_root}; |
|
31
|
|
|
|
|
|
|
$this->{default_expire} = "2 day" unless exists $this->{default_expire}; |
|
32
|
|
|
|
|
|
|
$this->{index_expire} = "3 hour" unless exists $this->{index_expire}; |
|
33
|
|
|
|
|
|
|
$this->{error_expire} = "15 minute" unless exists $this->{error_expire}; |
|
34
|
|
|
|
|
|
|
$this->{url_lockfile_dir} = "/tmp" unless exists $this->{url_lockfile_dir}; |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
$this->{index_regexp} = qr/(?:03modlist\.data|02packages\.details\.txt|01mailrc\.txt)/ unless exists $this->{index_regexp}; |
|
37
|
|
|
|
|
|
|
$this->{cache_object} = Cache::File->new(cache_root=>$this->{cache_root}, default_expires => $this->{default_expire} ); |
|
38
|
|
|
|
|
|
|
} |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
$this->{key_space} = "CK" unless $this->{key_space}; |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
unless( $this->{ua} ) { |
|
43
|
|
|
|
|
|
|
my $ua = $this->{ua} = new LWP::UserAgent; |
|
44
|
|
|
|
|
|
|
$ua->agent($this->{agent} ? $this->{agent} : "CCP/$VERSION (Paul's CPAN caching proxy / perlmonks-id=16186)"); |
|
45
|
|
|
|
|
|
|
if( exists $this->{activity_timeout} ) { |
|
46
|
|
|
|
|
|
|
if( defined (my $at = $this->{activity_timeout}) ) { |
|
47
|
|
|
|
|
|
|
$ua->timeout($at); |
|
48
|
|
|
|
|
|
|
} |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
} else { |
|
51
|
|
|
|
|
|
|
$ua->timeout(12); |
|
52
|
|
|
|
|
|
|
} |
|
53
|
|
|
|
|
|
|
} |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
$this->{ua}->timeout( $this->{activity_timeout} ) if defined $this->{activity_timeout}; |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
croak "there are no default mirrors, they must be set" unless $this->{mirrors}; |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
return $this; |
|
60
|
|
|
|
|
|
|
} |
|
61
|
|
|
|
|
|
|
# }}} |
|
62
|
|
|
|
|
|
|
# run {{{ |
|
63
|
|
|
|
|
|
|
sub run { |
|
64
|
|
|
|
|
|
|
my $this = shift; |
|
65
|
|
|
|
|
|
|
my $cgi = $this->{cgi}; |
|
66
|
|
|
|
|
|
|
my $mirror = $this->{mirrors}[ rand @{$this->{mirrors}} ]; |
|
67
|
|
|
|
|
|
|
my $pinfo = $cgi->path_info || return print $cgi->redirect( $cgi->url . "/" ); |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
$pinfo =~ s/^\///; |
|
70
|
|
|
|
|
|
|
$mirror=~ s/\/$//; |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
my $CK = "$this->{key_space}:$pinfo"; |
|
73
|
|
|
|
|
|
|
my $URL = "$mirror/$pinfo"; |
|
74
|
|
|
|
|
|
|
# $URL =~ s/\/{2,}/\//g; |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
if( $pinfo =~ s{^___/}{} ) { |
|
77
|
|
|
|
|
|
|
# NOTE: undocumented special case. If the path begins with ___, it |
|
78
|
|
|
|
|
|
|
# probably came from a 404 handler. in which case, the real pinfo was |
|
79
|
|
|
|
|
|
|
# probably an absolute url. replace the entire path portion of our |
|
80
|
|
|
|
|
|
|
# mirror url with the non ___'d part of the pinfo. |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
my $nurl = URI->new($mirror); |
|
83
|
|
|
|
|
|
|
$nurl->path($pinfo); |
|
84
|
|
|
|
|
|
|
# arguably we should use URI for all our path manips, but this section is new and the old stuff works fine |
|
85
|
|
|
|
|
|
|
$URL = "$nurl"; |
|
86
|
|
|
|
|
|
|
} |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
my $lockfile_fh; |
|
89
|
|
|
|
|
|
|
my $uld = $this->{url_lockfile_dir}; |
|
90
|
|
|
|
|
|
|
if( $uld and -d $uld ) { |
|
91
|
|
|
|
|
|
|
for(glob("$uld/.CP_FILE*")) { |
|
92
|
|
|
|
|
|
|
open my $tlf, "<", $_ or next; |
|
93
|
|
|
|
|
|
|
next unless flock $tlf, (LOCK_NB|LOCK_EX); |
|
94
|
|
|
|
|
|
|
warn "[DEBUG] unlinking old URL-lockfile $_\n" if $this->{debug}; |
|
95
|
|
|
|
|
|
|
unlink $_; |
|
96
|
|
|
|
|
|
|
} |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
# NOTE: sha1 is not for security, as I think timing attacks on this |
|
99
|
|
|
|
|
|
|
# have little value really, aside from DoS, and then the local |
|
100
|
|
|
|
|
|
|
# attackers probably have better things to do. These are simply here |
|
101
|
|
|
|
|
|
|
# because my /tmp is tmpfs, which has surprisingly low file name length |
|
102
|
|
|
|
|
|
|
# restrictions. |
|
103
|
|
|
|
|
|
|
my $converted = join("/", $uld, ".CP_FILE_" . sha1_hex($URL)); |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
warn "[DEBUG] locking $URL using $converted lockfile\n" if $this->{debug}; |
|
106
|
|
|
|
|
|
|
open $lockfile_fh, ">", $converted or die "error opening lockfile for $URL: $!"; |
|
107
|
|
|
|
|
|
|
flock $lockfile_fh, LOCK_EX or die "failed to lock lockfile for $URL: $!"; |
|
108
|
|
|
|
|
|
|
} |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
else { |
|
111
|
|
|
|
|
|
|
die "as of version 1.6, url_locking_dir is a required option."; |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
my $cache = $this->{cache_object}; |
|
115
|
|
|
|
|
|
|
if( $cache->exists($CK) and $cache->exists("$CK.hdr") ) { our $VAR1; |
|
116
|
|
|
|
|
|
|
my $res = eval $cache->get( "$CK.hdr" ); die "problem finding cache entry\n" if $@; |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
unless( $this->{ignore_last_modified} ) { |
|
119
|
|
|
|
|
|
|
if( my $lm = $res->header('last_modified') ) { |
|
120
|
|
|
|
|
|
|
my $_lm = eval { $this->{ua}->head($URL)->header('last_modified') }; |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# $lm = "hehe, random failure time" if (int rand(7)) == 0; |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
if( $_lm and $lm ne $_lm ) { |
|
125
|
|
|
|
|
|
|
warn "[DEBUG] last_modified differs ($lm vs $_lm), forcing cache miss\n" if $this->{debug}; |
|
126
|
|
|
|
|
|
|
goto FORCE_CACHE_MISS; |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
} |
|
129
|
|
|
|
|
|
|
} |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
my $start = $this->my_copy_hdr($res, "cache hit"); |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# XXX: is it the right thing to do to close the lockfile here? |
|
134
|
|
|
|
|
|
|
# Probably. At this point, we should have the whole file, and we sure |
|
135
|
|
|
|
|
|
|
# don't mind serving similtaneous requests, right? |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
close $lockfile_fh; |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
### |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
my $fh = $cache->handle( $CK, "<" ) or die "problem finding cache entry\n"; |
|
142
|
|
|
|
|
|
|
my $buf; |
|
143
|
|
|
|
|
|
|
BUF: while( read $fh, $buf, 4096 ) { |
|
144
|
|
|
|
|
|
|
if( $start > 0 ) { |
|
145
|
|
|
|
|
|
|
if( $start > length $buf ) { |
|
146
|
|
|
|
|
|
|
$start -= length $buf; |
|
147
|
|
|
|
|
|
|
next BUF; |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
} else { |
|
150
|
|
|
|
|
|
|
substr $buf, 0, $start, ""; |
|
151
|
|
|
|
|
|
|
$start = 0; |
|
152
|
|
|
|
|
|
|
} |
|
153
|
|
|
|
|
|
|
} |
|
154
|
|
|
|
|
|
|
print $buf; |
|
155
|
|
|
|
|
|
|
} |
|
156
|
|
|
|
|
|
|
close $fh; |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
} else { |
|
159
|
|
|
|
|
|
|
FORCE_CACHE_MISS: |
|
160
|
|
|
|
|
|
|
my $expire = $this->{default_expire}; |
|
161
|
|
|
|
|
|
|
$expire = $this->{index_expire} if $pinfo =~ $this->{index_regexp}; |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
$cache->set($CK, 1, $expire ); # doesn't seem like we should have to do this, but apparently we do |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
warn "[DEBUG] getting $URL\n" if $this->{debug}; |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
my $fh = $cache->handle( $CK, ">", $expire ); |
|
168
|
|
|
|
|
|
|
my $request = HTTP::Request->new(GET => $URL); |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
my $announced_header; |
|
171
|
|
|
|
|
|
|
my $response = $this->{ua}->request($request, sub { |
|
172
|
|
|
|
|
|
|
my $chunk = shift; |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
unless( $announced_header ) { |
|
175
|
|
|
|
|
|
|
my $res = shift; |
|
176
|
|
|
|
|
|
|
$announced_header = 1; |
|
177
|
|
|
|
|
|
|
$this->my_copy_hdr($res, "cache miss"); |
|
178
|
|
|
|
|
|
|
} |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
print $fh $chunk; |
|
181
|
|
|
|
|
|
|
print $chunk; |
|
182
|
|
|
|
|
|
|
}); |
|
183
|
|
|
|
|
|
|
close $fh; |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
unless( $response->is_success ) { |
|
186
|
|
|
|
|
|
|
my $my_fail = "FAIL: " . $response->status_line . "\n"; |
|
187
|
|
|
|
|
|
|
$cache->set($CK => $my_fail, $expire); |
|
188
|
|
|
|
|
|
|
$response->header(content_length=>length $my_fail); # fix content length so we don't lie to clients |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
$this->my_copy_hdr($response, "cache miss [fail]"); |
|
191
|
|
|
|
|
|
|
print $my_fail; |
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
warn "[DEBUG] setting $CK\n" if $this->{debug}; |
|
195
|
|
|
|
|
|
|
$cache->set("$CK.hdr", Dumper($response), $expire); |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
# if there was an error (which we don't know until ex post facto), go back and fix the expiry |
|
198
|
|
|
|
|
|
|
if( defined $this->{error_expire} and not $response->is_success ) { |
|
199
|
|
|
|
|
|
|
$cache->set_expiry( $CK => $this->{error_expire} ); |
|
200
|
|
|
|
|
|
|
$cache->set_expiry( "$CK.hdr" => $this->{error_expire} ); |
|
201
|
|
|
|
|
|
|
} |
|
202
|
|
|
|
|
|
|
} |
|
203
|
|
|
|
|
|
|
} |
|
204
|
|
|
|
|
|
|
# }}} |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
# {{{ sub my_copy_hdr |
|
207
|
|
|
|
|
|
|
sub my_copy_hdr { |
|
208
|
|
|
|
|
|
|
my ($this, $res, $hit) = @_; |
|
209
|
|
|
|
|
|
|
my $cgi = $this->{cgi}; |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
my $status = $res->status_line; |
|
212
|
|
|
|
|
|
|
warn "[DEBUG] cache status: $hit; status: $status\n" if $this->{debug}; |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
my %more_headers = (qw(accept_ranges bytes)); |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
for(qw(content_length), $this->{ignore_last_modified} ? ():(qw(last_modified))) { |
|
217
|
|
|
|
|
|
|
my $v = $res->header($_); |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
if( $v ) { |
|
220
|
|
|
|
|
|
|
my $k = lc $_; |
|
221
|
|
|
|
|
|
|
$k =~ s/-/_/g; |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
$more_headers{$k} = $v; |
|
224
|
|
|
|
|
|
|
} |
|
225
|
|
|
|
|
|
|
} |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
my $start = 0; |
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
if( my $r = $cgi->http("Range") ) { |
|
230
|
|
|
|
|
|
|
if( ($start) = $r =~ m/^bytes=(\d+)-$/ ) { |
|
231
|
|
|
|
|
|
|
my $len = $more_headers{content_length}; |
|
232
|
|
|
|
|
|
|
my $new = $len - $start; |
|
233
|
|
|
|
|
|
|
my $end = $len - 1; # this is the byte number, not a number of bytes or something |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
$more_headers{content_range} = "bytes $start-$end/$len"; |
|
236
|
|
|
|
|
|
|
$more_headers{content_length} = $new; |
|
237
|
|
|
|
|
|
|
} |
|
238
|
|
|
|
|
|
|
} |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
print $cgi->header(-status=>$status, -charset=>"", -type=>$res->header( 'content-type' ), %more_headers); |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
return $start; |
|
243
|
|
|
|
|
|
|
} |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
# }}} |