line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/usr/bin/perl |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package CPAN::CachingProxy; |
4
|
|
|
|
|
|
|
|
5
|
1
|
|
|
1
|
|
1445
|
use strict; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
42
|
|
6
|
1
|
|
|
1
|
|
5
|
use Carp; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
76
|
|
7
|
1
|
|
|
1
|
|
10119
|
use URI; |
|
1
|
|
|
|
|
18449
|
|
|
1
|
|
|
|
|
39
|
|
8
|
1
|
|
|
1
|
|
694
|
use Cache::File; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use Data::Dumper; |
10
|
|
|
|
|
|
|
use LWP::UserAgent; |
11
|
|
|
|
|
|
|
use Fcntl qw(:flock); |
12
|
|
|
|
|
|
|
use Digest::SHA1 qw(sha1_hex); |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
our $VERSION = "1.6500"; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
# wget -O MIRRORED.BY http://www.cpan.org/MIRRORED.BY |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
# new {{{ |
19
|
|
|
|
|
|
|
sub new { |
20
|
|
|
|
|
|
|
my $class = shift; |
21
|
|
|
|
|
|
|
my $this = bless {@_}, $class; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
unless( $this->{cgi} ) { |
24
|
|
|
|
|
|
|
require CGI or die $@; |
25
|
|
|
|
|
|
|
$this->{cgi} = new CGI; |
26
|
|
|
|
|
|
|
} |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
unless( $this->{cache_object} ) { |
29
|
|
|
|
|
|
|
$this->{cache_root} = "/tmp/ccp/" unless exists $this->{cache_root}; |
30
|
|
|
|
|
|
|
$this->{cache_root} = "/tmp/ccp/" unless exists $this->{cache_root}; |
31
|
|
|
|
|
|
|
$this->{default_expire} = "2 day" unless exists $this->{default_expire}; |
32
|
|
|
|
|
|
|
$this->{index_expire} = "3 hour" unless exists $this->{index_expire}; |
33
|
|
|
|
|
|
|
$this->{error_expire} = "15 minute" unless exists $this->{error_expire}; |
34
|
|
|
|
|
|
|
$this->{url_lockfile_dir} = "/tmp" unless exists $this->{url_lockfile_dir}; |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
$this->{index_regexp} = qr/(?:03modlist\.data|02packages\.details\.txt|01mailrc\.txt)/ unless exists $this->{index_regexp}; |
37
|
|
|
|
|
|
|
$this->{cache_object} = Cache::File->new(cache_root=>$this->{cache_root}, default_expires => $this->{default_expire} ); |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
$this->{key_space} = "CK" unless $this->{key_space}; |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
unless( $this->{ua} ) { |
43
|
|
|
|
|
|
|
my $ua = $this->{ua} = new LWP::UserAgent; |
44
|
|
|
|
|
|
|
$ua->agent($this->{agent} ? $this->{agent} : "CCP/$VERSION (Paul's CPAN caching proxy / perlmonks-id=16186)"); |
45
|
|
|
|
|
|
|
if( exists $this->{activity_timeout} ) { |
46
|
|
|
|
|
|
|
if( defined (my $at = $this->{activity_timeout}) ) { |
47
|
|
|
|
|
|
|
$ua->timeout($at); |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
} else { |
51
|
|
|
|
|
|
|
$ua->timeout(12); |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
$this->{ua}->timeout( $this->{activity_timeout} ) if defined $this->{activity_timeout}; |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
croak "there are no default mirrors, they must be set" unless $this->{mirrors}; |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
return $this; |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
# }}} |
62
|
|
|
|
|
|
|
# run {{{ |
63
|
|
|
|
|
|
|
sub run { |
64
|
|
|
|
|
|
|
my $this = shift; |
65
|
|
|
|
|
|
|
my $cgi = $this->{cgi}; |
66
|
|
|
|
|
|
|
my $mirror = $this->{mirrors}[ rand @{$this->{mirrors}} ]; |
67
|
|
|
|
|
|
|
my $pinfo = $cgi->path_info || return print $cgi->redirect( $cgi->url . "/" ); |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
$pinfo =~ s/^\///; |
70
|
|
|
|
|
|
|
$mirror=~ s/\/$//; |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
my $CK = "$this->{key_space}:$pinfo"; |
73
|
|
|
|
|
|
|
my $URL = "$mirror/$pinfo"; |
74
|
|
|
|
|
|
|
# $URL =~ s/\/{2,}/\//g; |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
if( $pinfo =~ s{^___/}{} ) { |
77
|
|
|
|
|
|
|
# NOTE: undocumented special case. If the path begins with ___, it |
78
|
|
|
|
|
|
|
# probably came from a 404 handler. in which case, the real pinfo was |
79
|
|
|
|
|
|
|
# probably an absolute url. replace the entire path portion of our |
80
|
|
|
|
|
|
|
# mirror url with the non ___'d part of the pinfo. |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
my $nurl = URI->new($mirror); |
83
|
|
|
|
|
|
|
$nurl->path($pinfo); |
84
|
|
|
|
|
|
|
# arguably we should use URI for all our path manips, but this section is new and the old stuff works fine |
85
|
|
|
|
|
|
|
$URL = "$nurl"; |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
my $lockfile_fh; |
89
|
|
|
|
|
|
|
my $uld = $this->{url_lockfile_dir}; |
90
|
|
|
|
|
|
|
if( $uld and -d $uld ) { |
91
|
|
|
|
|
|
|
for(glob("$uld/.CP_FILE*")) { |
92
|
|
|
|
|
|
|
open my $tlf, "<", $_ or next; |
93
|
|
|
|
|
|
|
next unless flock $tlf, (LOCK_NB|LOCK_EX); |
94
|
|
|
|
|
|
|
warn "[DEBUG] unlinking old URL-lockfile $_\n" if $this->{debug}; |
95
|
|
|
|
|
|
|
unlink $_; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
# NOTE: sha1 is not for security, as I think timing attacks on this |
99
|
|
|
|
|
|
|
# have little value really, aside from DoS, and then the local |
100
|
|
|
|
|
|
|
# attackers probably have better things to do. These are simply here |
101
|
|
|
|
|
|
|
# because my /tmp is tmpfs, which has surprisingly low file name length |
102
|
|
|
|
|
|
|
# restrictions. |
103
|
|
|
|
|
|
|
my $converted = join("/", $uld, ".CP_FILE_" . sha1_hex($URL)); |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
warn "[DEBUG] locking $URL using $converted lockfile\n" if $this->{debug}; |
106
|
|
|
|
|
|
|
open $lockfile_fh, ">", $converted or die "error opening lockfile for $URL: $!"; |
107
|
|
|
|
|
|
|
flock $lockfile_fh, LOCK_EX or die "failed to lock lockfile for $URL: $!"; |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
else { |
111
|
|
|
|
|
|
|
die "as of version 1.6, url_locking_dir is a required option."; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
my $cache = $this->{cache_object}; |
115
|
|
|
|
|
|
|
if( $cache->exists($CK) and $cache->exists("$CK.hdr") ) { our $VAR1; |
116
|
|
|
|
|
|
|
my $res = eval $cache->get( "$CK.hdr" ); die "problem finding cache entry\n" if $@; |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
unless( $this->{ignore_last_modified} ) { |
119
|
|
|
|
|
|
|
if( my $lm = $res->header('last_modified') ) { |
120
|
|
|
|
|
|
|
my $_lm = eval { $this->{ua}->head($URL)->header('last_modified') }; |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# $lm = "hehe, random failure time" if (int rand(7)) == 0; |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
if( $_lm and $lm ne $_lm ) { |
125
|
|
|
|
|
|
|
warn "[DEBUG] last_modified differs ($lm vs $_lm), forcing cache miss\n" if $this->{debug}; |
126
|
|
|
|
|
|
|
goto FORCE_CACHE_MISS; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
my $start = $this->my_copy_hdr($res, "cache hit"); |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# XXX: is it the right thing to do to close the lockfile here? |
134
|
|
|
|
|
|
|
# Probably. At this point, we should have the whole file, and we sure |
135
|
|
|
|
|
|
|
# don't mind serving similtaneous requests, right? |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
close $lockfile_fh; |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
### |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
my $fh = $cache->handle( $CK, "<" ) or die "problem finding cache entry\n"; |
142
|
|
|
|
|
|
|
my $buf; |
143
|
|
|
|
|
|
|
BUF: while( read $fh, $buf, 4096 ) { |
144
|
|
|
|
|
|
|
if( $start > 0 ) { |
145
|
|
|
|
|
|
|
if( $start > length $buf ) { |
146
|
|
|
|
|
|
|
$start -= length $buf; |
147
|
|
|
|
|
|
|
next BUF; |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
} else { |
150
|
|
|
|
|
|
|
substr $buf, 0, $start, ""; |
151
|
|
|
|
|
|
|
$start = 0; |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
print $buf; |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
close $fh; |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
} else { |
159
|
|
|
|
|
|
|
FORCE_CACHE_MISS: |
160
|
|
|
|
|
|
|
my $expire = $this->{default_expire}; |
161
|
|
|
|
|
|
|
$expire = $this->{index_expire} if $pinfo =~ $this->{index_regexp}; |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
$cache->set($CK, 1, $expire ); # doesn't seem like we should have to do this, but apparently we do |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
warn "[DEBUG] getting $URL\n" if $this->{debug}; |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
my $fh = $cache->handle( $CK, ">", $expire ); |
168
|
|
|
|
|
|
|
my $request = HTTP::Request->new(GET => $URL); |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
my $announced_header; |
171
|
|
|
|
|
|
|
my $response = $this->{ua}->request($request, sub { |
172
|
|
|
|
|
|
|
my $chunk = shift; |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
unless( $announced_header ) { |
175
|
|
|
|
|
|
|
my $res = shift; |
176
|
|
|
|
|
|
|
$announced_header = 1; |
177
|
|
|
|
|
|
|
$this->my_copy_hdr($res, "cache miss"); |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
print $fh $chunk; |
181
|
|
|
|
|
|
|
print $chunk; |
182
|
|
|
|
|
|
|
}); |
183
|
|
|
|
|
|
|
close $fh; |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
unless( $response->is_success ) { |
186
|
|
|
|
|
|
|
my $my_fail = "FAIL: " . $response->status_line . "\n"; |
187
|
|
|
|
|
|
|
$cache->set($CK => $my_fail, $expire); |
188
|
|
|
|
|
|
|
$response->header(content_length=>length $my_fail); # fix content length so we don't lie to clients |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
$this->my_copy_hdr($response, "cache miss [fail]"); |
191
|
|
|
|
|
|
|
print $my_fail; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
warn "[DEBUG] setting $CK\n" if $this->{debug}; |
195
|
|
|
|
|
|
|
$cache->set("$CK.hdr", Dumper($response), $expire); |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
# if there was an error (which we don't know until ex post facto), go back and fix the expiry |
198
|
|
|
|
|
|
|
if( defined $this->{error_expire} and not $response->is_success ) { |
199
|
|
|
|
|
|
|
$cache->set_expiry( $CK => $this->{error_expire} ); |
200
|
|
|
|
|
|
|
$cache->set_expiry( "$CK.hdr" => $this->{error_expire} ); |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
# }}} |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
# {{{ sub my_copy_hdr |
207
|
|
|
|
|
|
|
sub my_copy_hdr { |
208
|
|
|
|
|
|
|
my ($this, $res, $hit) = @_; |
209
|
|
|
|
|
|
|
my $cgi = $this->{cgi}; |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
my $status = $res->status_line; |
212
|
|
|
|
|
|
|
warn "[DEBUG] cache status: $hit; status: $status\n" if $this->{debug}; |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
my %more_headers = (qw(accept_ranges bytes)); |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
for(qw(content_length), $this->{ignore_last_modified} ? ():(qw(last_modified))) { |
217
|
|
|
|
|
|
|
my $v = $res->header($_); |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
if( $v ) { |
220
|
|
|
|
|
|
|
my $k = lc $_; |
221
|
|
|
|
|
|
|
$k =~ s/-/_/g; |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
$more_headers{$k} = $v; |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
my $start = 0; |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
if( my $r = $cgi->http("Range") ) { |
230
|
|
|
|
|
|
|
if( ($start) = $r =~ m/^bytes=(\d+)-$/ ) { |
231
|
|
|
|
|
|
|
my $len = $more_headers{content_length}; |
232
|
|
|
|
|
|
|
my $new = $len - $start; |
233
|
|
|
|
|
|
|
my $end = $len - 1; # this is the byte number, not a number of bytes or something |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
$more_headers{content_range} = "bytes $start-$end/$len"; |
236
|
|
|
|
|
|
|
$more_headers{content_length} = $new; |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
print $cgi->header(-status=>$status, -charset=>"", -type=>$res->header( 'content-type' ), %more_headers); |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
return $start; |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
# }}} |