line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
|
2
|
1
|
|
|
1
|
|
1093
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
30
|
|
3
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
38
|
|
4
|
|
|
|
|
|
|
package Net::IMP::HTTP::SaveResponse; |
5
|
1
|
|
|
1
|
|
5
|
use base 'Net::IMP::HTTP::Request'; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
132
|
|
6
|
1
|
|
|
1
|
|
5
|
use fields qw(root file); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
9
|
|
7
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
41
|
use Net::IMP; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
87
|
|
9
|
1
|
|
|
1
|
|
4
|
use Net::IMP::Debug; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
7
|
|
10
|
1
|
|
|
1
|
|
95
|
use File::Path 'make_path'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
45
|
|
11
|
1
|
|
|
1
|
|
1162
|
use File::Temp 'tempfile'; |
|
1
|
|
|
|
|
20376
|
|
|
1
|
|
|
|
|
81
|
|
12
|
1
|
|
|
1
|
|
10
|
use Digest::MD5; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
31
|
|
13
|
1
|
|
|
1
|
|
6
|
use Carp; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
70
|
|
14
|
1
|
|
|
1
|
|
5
|
use Scalar::Util 'looks_like_number'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
2496
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
my $DEFAULT_LIMIT = 10_000_000; |
17
|
|
|
|
|
|
|
|
18
|
0
|
|
|
0
|
0
|
|
sub RTYPES { (IMP_PREPASS) } |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
sub new_factory { |
21
|
0
|
|
|
0
|
1
|
|
my ($class,%args) = @_; |
22
|
0
|
0
|
|
|
|
|
my $dir = $args{root} or croak("no root directory given"); |
23
|
0
|
0
|
0
|
|
|
|
-d $dir && -r _ && -x _ or croak("cannot use base dir $dir: $!"); |
|
|
|
0
|
|
|
|
|
24
|
0
|
0
|
|
|
|
|
$args{limit} = $DEFAULT_LIMIT if ! defined $args{limit}; |
25
|
0
|
|
|
|
|
|
return $class->SUPER::new_factory(%args); |
26
|
|
|
|
|
|
|
} |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub new_analyzer { |
29
|
0
|
|
|
0
|
1
|
|
my ($factory,%args) = @_; |
30
|
0
|
|
|
|
|
|
my $self = $factory->SUPER::new_analyzer(%args); |
31
|
|
|
|
|
|
|
# we don't modify |
32
|
0
|
|
|
|
|
|
$self->run_callback( |
33
|
|
|
|
|
|
|
[ IMP_PREPASS,0,IMP_MAXOFFSET ], |
34
|
|
|
|
|
|
|
[ IMP_PREPASS,1,IMP_MAXOFFSET ] |
35
|
|
|
|
|
|
|
); |
36
|
0
|
|
|
|
|
|
return $self; |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub DESTROY { |
41
|
0
|
|
|
0
|
|
|
my $self = shift; |
42
|
0
|
0
|
0
|
|
|
|
$self->{file} && $self->{file}{tname} && unlink($self->{file}{tname}); |
43
|
|
|
|
|
|
|
} |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
sub request_hdr { |
46
|
0
|
|
|
0
|
1
|
|
my ($self,$hdr) = @_; |
47
|
|
|
|
|
|
|
|
48
|
0
|
|
|
|
|
|
my ($method,$proto,$host,$path) = $hdr =~m{\A([A-Z]+) +(?:(\w+)://([^/]+))?(\S+)}; |
49
|
0
|
0
|
|
|
|
|
$host = $1 if $hdr =~m{\nHost: *(\S+)}i; |
50
|
0
|
0
|
|
|
|
|
$host or goto IGNORE; |
51
|
0
|
|
0
|
|
|
|
$proto ||= 'http'; |
52
|
0
|
|
|
|
|
|
$host = lc($host); |
53
|
0
|
0
|
|
|
|
|
my $port = |
54
|
0
|
0
|
|
|
|
|
$host=~s{^(?:\[(\w._\-:)+\]|(\w._\-))(?::(\d+))?$}{ $1 || $2 }e ? |
55
|
|
|
|
|
|
|
$3:80; |
56
|
|
|
|
|
|
|
|
57
|
0
|
0
|
|
|
|
|
if ( my $rx = $self->{factory_args}{only_url} ) { |
58
|
0
|
0
|
0
|
|
|
|
goto IGNORE if "$proto://$host:$port$path" !~ $rx |
59
|
|
|
|
|
|
|
and "$proto://$host$path" !~ $rx |
60
|
|
|
|
|
|
|
} |
61
|
0
|
0
|
|
|
|
|
if ( my $rx = $self->{factory_args}{exclude_url} ) { |
62
|
0
|
0
|
0
|
|
|
|
goto IGNORE if "$proto://$host:$port$path" =~ $rx |
63
|
|
|
|
|
|
|
or "$proto://$host$path" =~ $rx |
64
|
|
|
|
|
|
|
} |
65
|
0
|
0
|
|
|
|
|
if ( my $srh = $self->{factory_args}{method} ) { |
66
|
0
|
0
|
|
|
|
|
goto IGNORE if ! _check_srh($srh,$method); |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
|
70
|
0
|
|
|
|
|
|
my $dir = $self->{factory_args}{root}."/$host:$port"; |
71
|
0
|
0
|
|
|
|
|
if ( ! -d $dir ) { |
72
|
0
|
|
|
|
|
|
my $err; |
73
|
0
|
|
|
|
|
|
make_path($dir, { error => \$err }); |
74
|
|
|
|
|
|
|
} |
75
|
0
|
0
|
|
|
|
|
my ($fh,$fname) = tempfile( "tmpXXXXXXX", DIR => $dir ) |
76
|
|
|
|
|
|
|
or goto IGNORE; |
77
|
|
|
|
|
|
|
|
78
|
0
|
|
|
|
|
|
$hdr =~s{^(Content-encoding:|Transfer-encoding:|Content-length:)}{X-Original-$1}mig; |
79
|
0
|
|
|
|
|
|
print $fh $hdr; |
80
|
|
|
|
|
|
|
|
81
|
0
|
0
|
|
|
|
|
my $qstring = $path =~s{\?(.+)}{} ? $1 : undef; |
82
|
0
|
|
|
|
|
|
$self->{file} = { |
83
|
|
|
|
|
|
|
tfh => $fh, |
84
|
|
|
|
|
|
|
tname => $fname, |
85
|
|
|
|
|
|
|
dir => $dir, |
86
|
|
|
|
|
|
|
method => $method, |
87
|
|
|
|
|
|
|
md5path => Digest::MD5->new->add($path)->hexdigest, |
88
|
|
|
|
|
|
|
md5data => undef, |
89
|
|
|
|
|
|
|
size => [ length($hdr),0,0,0 ], |
90
|
|
|
|
|
|
|
rphdr => '', |
91
|
|
|
|
|
|
|
rpbody => '', |
92
|
|
|
|
|
|
|
eof => 0, |
93
|
|
|
|
|
|
|
}; |
94
|
0
|
0
|
0
|
|
|
|
( $self->{file}{md5data} = Digest::MD5->new )->add("\000$qstring\001") |
95
|
|
|
|
|
|
|
if defined $qstring and ! $self->{factory_args}{ignore_parameters}; |
96
|
0
|
|
|
|
|
|
return; # continue in request body |
97
|
|
|
|
|
|
|
|
98
|
0
|
|
|
|
|
|
IGNORE: |
99
|
|
|
|
|
|
|
# pass thru w/o saving |
100
|
|
|
|
|
|
|
debug("no save $host:$port/$path"); |
101
|
0
|
|
|
|
|
|
$self->run_callback( |
102
|
|
|
|
|
|
|
# pass thru everything |
103
|
|
|
|
|
|
|
[ IMP_PASS,0,IMP_MAXOFFSET ], |
104
|
|
|
|
|
|
|
[ IMP_PASS,1,IMP_MAXOFFSET ], |
105
|
|
|
|
|
|
|
); |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub request_body { |
109
|
0
|
|
|
0
|
1
|
|
my ($self,$data) = @_; |
110
|
0
|
0
|
|
|
|
|
my $f = $self->{file} or return; |
111
|
0
|
|
|
|
|
|
print { $f->{tfh} } $data; |
|
0
|
|
|
|
|
|
|
112
|
0
|
|
|
|
|
|
my $md = $f->{md5data}; |
113
|
0
|
0
|
|
|
|
|
if ( $data ne '' ) { |
114
|
0
|
|
|
|
|
|
$f->{size}[1] += length($data); |
115
|
0
|
0
|
|
|
|
|
if ( my $l = $self->{factory_args}{limit} ) { |
116
|
0
|
0
|
|
|
|
|
return _stop_saving($self) if $f->{size}[1] > $l; |
117
|
|
|
|
|
|
|
} |
118
|
0
|
0
|
|
|
|
|
if ( ! $md ) { |
119
|
0
|
0
|
|
|
|
|
return if $self->{factory_args}{ignore_parameters}; |
120
|
0
|
|
|
|
|
|
$md = $f->{md5data} = Digest::MD5->new; |
121
|
|
|
|
|
|
|
} |
122
|
0
|
|
|
|
|
|
$md->add($data); |
123
|
0
|
|
|
|
|
|
return; |
124
|
|
|
|
|
|
|
} |
125
|
0
|
0
|
|
|
|
|
if ( defined( my $rp = $f->{rphdr} )) { |
126
|
0
|
|
|
|
|
|
print { $f->{tfh} } $rp; |
|
0
|
|
|
|
|
|
|
127
|
0
|
|
|
|
|
|
$f->{rphdr} = undef; |
128
|
0
|
0
|
|
|
|
|
if ( defined( $rp = $f->{rpbody} )) { |
129
|
0
|
|
|
|
|
|
print { $f->{tfh} } $rp; |
|
0
|
|
|
|
|
|
|
130
|
0
|
|
|
|
|
|
$f->{rpbody} = undef; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
} |
133
|
0
|
|
|
|
|
|
_check_eof($self,1); |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub response_hdr { |
137
|
0
|
|
|
0
|
1
|
|
my ($self,$hdr) = @_; |
138
|
0
|
0
|
|
|
|
|
my $f = $self->{file} or return; |
139
|
0
|
0
|
|
|
|
|
return _stop_saving($self) if $hdr =~m{\AHTTP/1\.[01] (100|304|5\d\d)}; |
140
|
0
|
0
|
|
|
|
|
if ( my $srh = $self->{factory_args}{content_type} ) { |
141
|
0
|
|
|
|
|
|
my ($ct) = $hdr =~m{^Content-type:\s*([^\s;]+)}mi; |
142
|
0
|
|
0
|
|
|
|
$ct ||= 'application/octet-stream'; |
143
|
0
|
0
|
|
|
|
|
return _stop_saving($self) if ! _check_srh( $srh, lc($ct)); |
144
|
|
|
|
|
|
|
} |
145
|
0
|
|
|
|
|
|
$hdr =~s{^(Content-encoding:|Transfer-encoding:|Content-length:)}{X-Original-$1}mig; |
146
|
0
|
|
|
|
|
|
$f->{size}[2] = length($hdr); |
147
|
0
|
0
|
|
|
|
|
if ( defined $f->{rphdr} ) { |
148
|
|
|
|
|
|
|
# defer, request body not fully read |
149
|
0
|
|
|
|
|
|
$f->{rphdr} = $hdr; |
150
|
|
|
|
|
|
|
} else { |
151
|
0
|
|
|
|
|
|
print {$f->{tfh}} $hdr; |
|
0
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub response_body { |
156
|
0
|
|
|
0
|
1
|
|
my ($self,$data) = @_; |
157
|
0
|
0
|
|
|
|
|
my $f = $self->{file} or return; |
158
|
0
|
|
|
|
|
|
$f->{size}[3] += length($data); |
159
|
0
|
0
|
|
|
|
|
if ( my $l = $self->{factory_args}{limit} ) { |
160
|
0
|
0
|
|
|
|
|
return _stop_saving($self) if $f->{size}[3] > $l; |
161
|
|
|
|
|
|
|
} |
162
|
0
|
0
|
|
|
|
|
if ( $data eq '' ) { |
|
|
0
|
|
|
|
|
|
163
|
0
|
|
|
|
|
|
_check_eof($self,2) |
164
|
|
|
|
|
|
|
} elsif ( defined $f->{rpbody} ) { |
165
|
0
|
|
|
|
|
|
$f->{rpbody} .= $data; |
166
|
|
|
|
|
|
|
} else { |
167
|
0
|
|
|
|
|
|
print {$f->{tfh}} $data; |
|
0
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub _check_eof { |
172
|
0
|
|
|
0
|
|
|
my ($self,$bit) = @_; |
173
|
0
|
0
|
|
|
|
|
my $f = $self->{file} or return; |
174
|
0
|
0
|
|
|
|
|
( $f->{eof} |= $bit ) == 3 or return; |
175
|
0
|
|
|
|
|
|
$self->{file} = undef; |
176
|
0
|
|
|
|
|
|
print {$f->{tfh}} pack("NNNN",@{ $f->{size} }); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
177
|
0
|
|
|
|
|
|
close($f->{tfh}); |
178
|
0
|
0
|
|
|
|
|
my $fname = "$f->{dir}/".join( "-", |
179
|
|
|
|
|
|
|
lc($f->{method}), |
180
|
|
|
|
|
|
|
$f->{md5path}, |
181
|
|
|
|
|
|
|
$f->{md5data} ? ($f->{md5data}->hexdigest):() |
182
|
|
|
|
|
|
|
); |
183
|
0
|
|
|
|
|
|
rename($f->{tname}, $fname); |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
# will not be tracked |
187
|
|
|
|
|
|
|
sub any_data { |
188
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
189
|
0
|
0
|
|
|
|
|
my $f = $self->{file} or return; |
190
|
0
|
|
|
|
|
|
unlink($f->{tname}); |
191
|
0
|
|
|
|
|
|
$self->{file} = undef; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
### config stuff ###### |
195
|
|
|
|
|
|
|
sub validate_cfg { |
196
|
0
|
|
|
0
|
1
|
|
my ($class,%cfg) = @_; |
197
|
0
|
|
|
|
|
|
my $dir = delete $cfg{root}; |
198
|
0
|
|
|
|
|
|
my @err; |
199
|
0
|
0
|
0
|
|
|
|
push @err, "no or non-existing root dir given" |
200
|
|
|
|
|
|
|
if ! defined $dir or ! -d $dir; |
201
|
0
|
0
|
|
|
|
|
if ( my $limit = delete $cfg{limit} ) { |
202
|
0
|
0
|
|
|
|
|
push @err, "limit should be number" if ! looks_like_number($limit) |
203
|
|
|
|
|
|
|
} |
204
|
0
|
|
|
|
|
|
for my $k (qw(content_type method)) { |
205
|
0
|
|
0
|
|
|
|
my $v = delete $cfg{$k} // next; |
206
|
0
|
0
|
0
|
|
|
|
push @err,"$k should be string, hash or regexp" if |
207
|
|
|
|
|
|
|
ref($v) and not ref($v) ~~ [ 'Regexp','HASH' ]; |
208
|
|
|
|
|
|
|
} |
209
|
0
|
|
|
|
|
|
for my $k (qw(exclude_url only_url)) { |
210
|
0
|
|
0
|
|
|
|
my $v = delete $cfg{$k} // next; |
211
|
0
|
0
|
|
|
|
|
push @err,"$k should be regexp" if ref($v) ne 'Regexp'; |
212
|
|
|
|
|
|
|
} |
213
|
0
|
|
|
|
|
|
delete $cfg{ignore_parameters}; |
214
|
|
|
|
|
|
|
|
215
|
0
|
|
|
|
|
|
push @err, $class->SUPER::validate_cfg(%cfg); |
216
|
0
|
|
|
|
|
|
return @err; |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
sub str2cfg { |
220
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
221
|
0
|
|
|
|
|
|
my %cfg = $self->SUPER::str2cfg(@_); |
222
|
0
|
|
|
|
|
|
for my $k (qw(content_type method)) { |
223
|
0
|
|
0
|
|
|
|
my $v = $cfg{$k} // next; |
224
|
0
|
0
|
|
|
|
|
if ( $v =~m{^/(.*)/$}s ) { |
|
|
0
|
|
|
|
|
|
225
|
0
|
0
|
|
|
|
|
$cfg{$k} = eval { qr/$1/ } or croak("invalid regexp '$v': $@"); |
|
0
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
} elsif (( my @v = split( /,/,$v )) > 1 ) { |
227
|
0
|
|
|
|
|
|
$cfg{$k} = map { lc($_) => 1 } @v |
|
0
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
} else { |
229
|
0
|
|
|
|
|
|
$cfg{$k} = lc($v) |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
} |
232
|
0
|
|
|
|
|
|
for my $k (qw(exclude_url only_url)) { |
233
|
0
|
|
0
|
|
|
|
my $v = $cfg{$k} // next; |
234
|
0
|
0
|
|
|
|
|
$v =~m{^/(.*)/$}s or croak("$k should be /regex/"); |
235
|
0
|
0
|
|
|
|
|
$cfg{$k} = eval { qr/$1/ } or croak("invalid regexp '$v': $@"); |
|
0
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
} |
237
|
0
|
|
|
|
|
|
return %cfg; |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
sub _check_srh { |
241
|
0
|
|
|
0
|
|
|
my ($srh,$v) = @_; |
242
|
0
|
0
|
|
|
|
|
return $v =~ $srh if ref($srh) eq 'Regexp'; |
243
|
0
|
0
|
|
|
|
|
return $srh->{$_} if ref($srh) eq 'HASH'; |
244
|
0
|
|
|
|
|
|
return $srh eq $v; |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
sub _stop_saving { |
248
|
0
|
|
|
0
|
|
|
my $self = shift; |
249
|
0
|
0
|
|
|
|
|
my $f = $self->{file} or return; |
250
|
0
|
|
|
|
|
|
unlink($f->{tname}); |
251
|
0
|
|
|
|
|
|
$self->{file} = undef; |
252
|
0
|
|
|
|
|
|
$self->run_callback( |
253
|
|
|
|
|
|
|
[ IMP_PASS,0,IMP_MAXOFFSET ], |
254
|
|
|
|
|
|
|
[ IMP_PASS,1,IMP_MAXOFFSET ], |
255
|
|
|
|
|
|
|
); |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
1; |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
__END__ |