| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  |  | 
| 2 | 1 |  |  | 1 |  | 941 | use strict; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 39 |  | 
| 3 | 1 |  |  | 1 |  | 3 | use warnings; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 40 |  | 
| 4 |  |  |  |  |  |  | package Net::IMP::HTTP::Example::SaveResponse; | 
| 5 | 1 |  |  | 1 |  | 4 | use base 'Net::IMP::HTTP::Request'; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 140 |  | 
| 6 | 1 |  |  | 1 |  | 5 | use fields qw(root file); | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 6 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 | 1 |  |  | 1 |  | 114 | use Net::IMP; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 92 |  | 
| 9 | 1 |  |  | 1 |  | 5 | use Net::IMP::Debug; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 5 |  | 
| 10 | 1 |  |  | 1 |  | 96 | use File::Path 'make_path'; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 55 |  | 
| 11 | 1 |  |  | 1 |  | 739 | use File::Temp 'tempfile'; | 
|  | 1 |  |  |  |  | 15261 |  | 
|  | 1 |  |  |  |  | 68 |  | 
| 12 | 1 |  |  | 1 |  | 6 | use Digest::MD5; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 27 |  | 
| 13 | 1 |  |  | 1 |  | 4 | use Carp; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 46 |  | 
| 14 | 1 |  |  | 1 |  | 4 | use Scalar::Util 'looks_like_number'; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 46 |  | 
| 15 | 1 |  |  | 1 |  | 5 | no if $] >= 5.017011, warnings => 'experimental::smartmatch'; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 6 |  | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | my $DEFAULT_LIMIT = 10_000_000; | 
| 18 |  |  |  |  |  |  |  | 
| 19 | 0 |  |  | 0 | 0 |  | sub RTYPES { (IMP_PREPASS) } | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | sub new_factory { | 
| 22 | 0 |  |  | 0 | 1 |  | my ($class,%args) = @_; | 
| 23 | 0 | 0 |  |  |  |  | my $dir = $args{root} or croak("no root directory given"); | 
| 24 | 0 | 0 | 0 |  |  |  | -d $dir && -r _ && -x _ or croak("cannot use base dir $dir: $!"); | 
|  |  |  | 0 |  |  |  |  | 
| 25 | 0 | 0 |  |  |  |  | $args{limit} = $DEFAULT_LIMIT if ! defined $args{limit}; | 
| 26 | 0 |  |  |  |  |  | return $class->SUPER::new_factory(%args); | 
| 27 |  |  |  |  |  |  | } | 
| 28 |  |  |  |  |  |  |  | 
| 29 |  |  |  |  |  |  | sub new_analyzer { | 
| 30 | 0 |  |  | 0 | 1 |  | my ($factory,%args) = @_; | 
| 31 | 0 |  |  |  |  |  | my $self = $factory->SUPER::new_analyzer(%args); | 
| 32 |  |  |  |  |  |  | # we don't modify | 
| 33 | 0 |  |  |  |  |  | $self->run_callback( | 
| 34 |  |  |  |  |  |  | [ IMP_PREPASS,0,IMP_MAXOFFSET ], | 
| 35 |  |  |  |  |  |  | [ IMP_PREPASS,1,IMP_MAXOFFSET ] | 
| 36 |  |  |  |  |  |  | ); | 
| 37 | 0 |  |  |  |  |  | return $self; | 
| 38 |  |  |  |  |  |  | } | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | sub DESTROY { | 
| 42 | 0 |  |  | 0 |  |  | my $self = shift; | 
| 43 | 0 | 0 | 0 |  |  |  | $self->{file} && $self->{file}{tname} && unlink($self->{file}{tname}); | 
| 44 |  |  |  |  |  |  | } | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | sub request_hdr { | 
| 47 | 0 |  |  | 0 | 1 |  | my ($self,$hdr) = @_; | 
| 48 |  |  |  |  |  |  |  | 
| 49 | 0 |  |  |  |  |  | my ($method,$proto,$host,$path) = $hdr =~m{\A([A-Z]+) +(?:(\w+)://([^/]+))?(\S+)}; | 
| 50 | 0 | 0 |  |  |  |  | $host = $1 if $hdr =~m{\nHost: *(\S+)}i; | 
| 51 | 0 | 0 |  |  |  |  | $host or goto IGNORE; | 
| 52 | 0 |  | 0 |  |  |  | $proto ||= 'http'; | 
| 53 | 0 |  |  |  |  |  | $host = lc($host); | 
| 54 | 0 | 0 |  |  |  |  | my $port = | 
| 55 | 0 | 0 |  |  |  |  | $host=~s{^(?:\[(\w._\-:)+\]|(\w._\-))(?::(\d+))?$}{ $1 || $2 }e ? | 
| 56 |  |  |  |  |  |  | $3:80; | 
| 57 |  |  |  |  |  |  |  | 
| 58 | 0 | 0 |  |  |  |  | if ( my $rx = $self->{factory_args}{only_url} ) { | 
| 59 | 0 | 0 | 0 |  |  |  | goto IGNORE if "$proto://$host:$port$path" !~ $rx | 
| 60 |  |  |  |  |  |  | and "$proto://$host$path" !~ $rx | 
| 61 |  |  |  |  |  |  | } | 
| 62 | 0 | 0 |  |  |  |  | if ( my $rx = $self->{factory_args}{exclude_url} ) { | 
| 63 | 0 | 0 | 0 |  |  |  | goto IGNORE if "$proto://$host:$port$path" =~ $rx | 
| 64 |  |  |  |  |  |  | or "$proto://$host$path" =~ $rx | 
| 65 |  |  |  |  |  |  | } | 
| 66 | 0 | 0 |  |  |  |  | if ( my $srh = $self->{factory_args}{method} ) { | 
| 67 | 0 | 0 |  |  |  |  | goto IGNORE if ! _check_srh($srh,$method); | 
| 68 |  |  |  |  |  |  | } | 
| 69 |  |  |  |  |  |  |  | 
| 70 |  |  |  |  |  |  |  | 
| 71 | 0 |  |  |  |  |  | my $dir = $self->{factory_args}{root}."/$host:$port"; | 
| 72 | 0 | 0 |  |  |  |  | if ( ! -d $dir ) { | 
| 73 | 0 |  |  |  |  |  | my $err; | 
| 74 | 0 |  |  |  |  |  | make_path($dir, { error => \$err  }); | 
| 75 |  |  |  |  |  |  | } | 
| 76 | 0 | 0 |  |  |  |  | my ($fh,$fname) = tempfile( "tmpXXXXXXX", DIR => $dir ) | 
| 77 |  |  |  |  |  |  | or goto IGNORE; | 
| 78 |  |  |  |  |  |  |  | 
| 79 | 0 |  |  |  |  |  | $hdr =~s{^(Content-encoding:|Transfer-encoding:|Content-length:)}{X-Original-$1}mig; | 
| 80 | 0 |  |  |  |  |  | print $fh $hdr; | 
| 81 |  |  |  |  |  |  |  | 
| 82 | 0 | 0 |  |  |  |  | my $qstring = $path =~s{\?(.+)}{} ? $1 : undef; | 
| 83 | 0 |  |  |  |  |  | $self->{file} = { | 
| 84 |  |  |  |  |  |  | tfh => $fh, | 
| 85 |  |  |  |  |  |  | tname => $fname, | 
| 86 |  |  |  |  |  |  | dir => $dir, | 
| 87 |  |  |  |  |  |  | method => $method, | 
| 88 |  |  |  |  |  |  | md5path => Digest::MD5->new->add($path)->hexdigest, | 
| 89 |  |  |  |  |  |  | md5data => undef, | 
| 90 |  |  |  |  |  |  | size => [ length($hdr),0,0,0 ], | 
| 91 |  |  |  |  |  |  | rphdr => '', | 
| 92 |  |  |  |  |  |  | rpbody => '', | 
| 93 |  |  |  |  |  |  | eof => 0, | 
| 94 |  |  |  |  |  |  | }; | 
| 95 | 0 | 0 | 0 |  |  |  | ( $self->{file}{md5data} = Digest::MD5->new )->add("\000$qstring\001") | 
| 96 |  |  |  |  |  |  | if defined $qstring and ! $self->{factory_args}{ignore_parameters}; | 
| 97 | 0 |  |  |  |  |  | return; # continue in request body | 
| 98 |  |  |  |  |  |  |  | 
| 99 | 0 |  |  |  |  |  | IGNORE: | 
| 100 |  |  |  |  |  |  | # pass thru w/o saving | 
| 101 |  |  |  |  |  |  | debug("no save $host:$port/$path"); | 
| 102 | 0 |  |  |  |  |  | $self->run_callback( | 
| 103 |  |  |  |  |  |  | # pass thru everything | 
| 104 |  |  |  |  |  |  | [ IMP_PASS,0,IMP_MAXOFFSET ], | 
| 105 |  |  |  |  |  |  | [ IMP_PASS,1,IMP_MAXOFFSET ], | 
| 106 |  |  |  |  |  |  | ); | 
| 107 |  |  |  |  |  |  | } | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | sub request_body { | 
| 110 | 0 |  |  | 0 | 1 |  | my ($self,$data) = @_; | 
| 111 | 0 | 0 |  |  |  |  | my $f = $self->{file} or return; | 
| 112 | 0 |  |  |  |  |  | print { $f->{tfh} } $data; | 
|  | 0 |  |  |  |  |  |  | 
| 113 | 0 |  |  |  |  |  | my $md = $f->{md5data}; | 
| 114 | 0 | 0 |  |  |  |  | if ( $data ne '' ) { | 
| 115 | 0 |  |  |  |  |  | $f->{size}[1] += length($data); | 
| 116 | 0 | 0 |  |  |  |  | if ( my $l = $self->{factory_args}{limit} ) { | 
| 117 | 0 | 0 |  |  |  |  | return _stop_saving($self) if $f->{size}[1] > $l; | 
| 118 |  |  |  |  |  |  | } | 
| 119 | 0 | 0 |  |  |  |  | if ( ! $md ) { | 
| 120 | 0 | 0 |  |  |  |  | return if $self->{factory_args}{ignore_parameters}; | 
| 121 | 0 |  |  |  |  |  | $md = $f->{md5data} = Digest::MD5->new; | 
| 122 |  |  |  |  |  |  | } | 
| 123 | 0 |  |  |  |  |  | $md->add($data); | 
| 124 | 0 |  |  |  |  |  | return; | 
| 125 |  |  |  |  |  |  | } | 
| 126 | 0 | 0 |  |  |  |  | if ( defined( my $rp = $f->{rphdr} )) { | 
| 127 | 0 |  |  |  |  |  | print { $f->{tfh} } $rp; | 
|  | 0 |  |  |  |  |  |  | 
| 128 | 0 |  |  |  |  |  | $f->{rphdr} = undef; | 
| 129 | 0 | 0 |  |  |  |  | if ( defined( $rp = $f->{rpbody} )) { | 
| 130 | 0 |  |  |  |  |  | print { $f->{tfh} } $rp; | 
|  | 0 |  |  |  |  |  |  | 
| 131 | 0 |  |  |  |  |  | $f->{rpbody} = undef; | 
| 132 |  |  |  |  |  |  | } | 
| 133 |  |  |  |  |  |  | } | 
| 134 | 0 |  |  |  |  |  | _check_eof($self,1); | 
| 135 |  |  |  |  |  |  | } | 
| 136 |  |  |  |  |  |  |  | 
| 137 |  |  |  |  |  |  | sub response_hdr { | 
| 138 | 0 |  |  | 0 | 1 |  | my ($self,$hdr) = @_; | 
| 139 | 0 | 0 |  |  |  |  | my $f = $self->{file} or return; | 
| 140 | 0 | 0 |  |  |  |  | return _stop_saving($self) if $hdr =~m{\AHTTP/1\.[01] (100|304|5\d\d)}; | 
| 141 | 0 | 0 |  |  |  |  | if ( my $srh = $self->{factory_args}{content_type} ) { | 
| 142 | 0 |  |  |  |  |  | my ($ct) = $hdr =~m{^Content-type:\s*([^\s;]+)}mi; | 
| 143 | 0 |  | 0 |  |  |  | $ct ||= 'application/octet-stream'; | 
| 144 | 0 | 0 |  |  |  |  | return _stop_saving($self) if ! _check_srh( $srh, lc($ct)); | 
| 145 |  |  |  |  |  |  | } | 
| 146 | 0 |  |  |  |  |  | $hdr =~s{^(Content-encoding:|Transfer-encoding:|Content-length:)}{X-Original-$1}mig; | 
| 147 | 0 |  |  |  |  |  | $f->{size}[2] = length($hdr); | 
| 148 | 0 | 0 |  |  |  |  | if ( defined $f->{rphdr} ) { | 
| 149 |  |  |  |  |  |  | # defer, request body not fully read | 
| 150 | 0 |  |  |  |  |  | $f->{rphdr} = $hdr; | 
| 151 |  |  |  |  |  |  | } else { | 
| 152 | 0 |  |  |  |  |  | print {$f->{tfh}} $hdr; | 
|  | 0 |  |  |  |  |  |  | 
| 153 |  |  |  |  |  |  | } | 
| 154 |  |  |  |  |  |  | } | 
| 155 |  |  |  |  |  |  |  | 
| 156 |  |  |  |  |  |  | sub response_body { | 
| 157 | 0 |  |  | 0 | 1 |  | my ($self,$data) = @_; | 
| 158 | 0 | 0 |  |  |  |  | my $f = $self->{file} or return; | 
| 159 | 0 |  |  |  |  |  | $f->{size}[3] += length($data); | 
| 160 | 0 | 0 |  |  |  |  | if ( my $l = $self->{factory_args}{limit} ) { | 
| 161 | 0 | 0 |  |  |  |  | return _stop_saving($self) if $f->{size}[3] > $l; | 
| 162 |  |  |  |  |  |  | } | 
| 163 | 0 | 0 |  |  |  |  | if ( $data eq '' ) { | 
|  |  | 0 |  |  |  |  |  | 
| 164 | 0 |  |  |  |  |  | _check_eof($self,2) | 
| 165 |  |  |  |  |  |  | } elsif ( defined $f->{rpbody} ) { | 
| 166 | 0 |  |  |  |  |  | $f->{rpbody} .= $data; | 
| 167 |  |  |  |  |  |  | } else { | 
| 168 | 0 |  |  |  |  |  | print {$f->{tfh}} $data; | 
|  | 0 |  |  |  |  |  |  | 
| 169 |  |  |  |  |  |  | } | 
| 170 |  |  |  |  |  |  | } | 
| 171 |  |  |  |  |  |  |  | 
| 172 |  |  |  |  |  |  | sub _check_eof { | 
| 173 | 0 |  |  | 0 |  |  | my ($self,$bit) = @_; | 
| 174 | 0 | 0 |  |  |  |  | my $f = $self->{file} or return; | 
| 175 | 0 | 0 |  |  |  |  | ( $f->{eof} |= $bit ) == 3 or return; | 
| 176 | 0 |  |  |  |  |  | $self->{file} = undef; | 
| 177 | 0 |  |  |  |  |  | print {$f->{tfh}} pack("NNNN",@{ $f->{size} }); | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 178 | 0 |  |  |  |  |  | close($f->{tfh}); | 
| 179 | 0 | 0 |  |  |  |  | my $fname = "$f->{dir}/".join( "-", | 
| 180 |  |  |  |  |  |  | lc($f->{method}), | 
| 181 |  |  |  |  |  |  | $f->{md5path}, | 
| 182 |  |  |  |  |  |  | $f->{md5data} ? ($f->{md5data}->hexdigest):() | 
| 183 |  |  |  |  |  |  | ); | 
| 184 | 0 |  |  |  |  |  | rename($f->{tname}, $fname); | 
| 185 |  |  |  |  |  |  | } | 
| 186 |  |  |  |  |  |  |  | 
| 187 |  |  |  |  |  |  | # will not be tracked | 
| 188 |  |  |  |  |  |  | sub any_data { | 
| 189 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 190 | 0 | 0 |  |  |  |  | my $f = $self->{file} or return; | 
| 191 | 0 |  |  |  |  |  | unlink($f->{tname}); | 
| 192 | 0 |  |  |  |  |  | $self->{file} = undef; | 
| 193 |  |  |  |  |  |  | } | 
| 194 |  |  |  |  |  |  |  | 
| 195 |  |  |  |  |  |  | ### config stuff ###### | 
| 196 |  |  |  |  |  |  | sub validate_cfg { | 
| 197 | 0 |  |  | 0 | 1 |  | my ($class,%cfg) = @_; | 
| 198 | 0 |  |  |  |  |  | my $dir = delete $cfg{root}; | 
| 199 | 0 |  |  |  |  |  | my @err; | 
| 200 | 0 | 0 | 0 |  |  |  | push @err, "no or non-existing root dir given" | 
| 201 |  |  |  |  |  |  | if ! defined $dir or ! -d $dir; | 
| 202 | 0 | 0 |  |  |  |  | if ( my $limit = delete $cfg{limit} ) { | 
| 203 | 0 | 0 |  |  |  |  | push @err, "limit should be number" if ! looks_like_number($limit) | 
| 204 |  |  |  |  |  |  | } | 
| 205 | 0 |  |  |  |  |  | for my $k (qw(content_type method)) { | 
| 206 | 0 |  | 0 |  |  |  | my $v = delete $cfg{$k} // next; | 
| 207 | 0 | 0 | 0 |  |  |  | push @err,"$k should be string, hash or regexp" if | 
| 208 |  |  |  |  |  |  | ref($v) and not ref($v) ~~ [ 'Regexp','HASH' ]; | 
| 209 |  |  |  |  |  |  | } | 
| 210 | 0 |  |  |  |  |  | for my $k (qw(exclude_url only_url)) { | 
| 211 | 0 |  | 0 |  |  |  | my $v = delete $cfg{$k} // next; | 
| 212 | 0 | 0 |  |  |  |  | push @err,"$k should be regexp" if ref($v) ne 'Regexp'; | 
| 213 |  |  |  |  |  |  | } | 
| 214 | 0 |  |  |  |  |  | delete $cfg{ignore_parameters}; | 
| 215 |  |  |  |  |  |  |  | 
| 216 | 0 |  |  |  |  |  | push @err, $class->SUPER::validate_cfg(%cfg); | 
| 217 | 0 |  |  |  |  |  | return @err; | 
| 218 |  |  |  |  |  |  | } | 
| 219 |  |  |  |  |  |  |  | 
| 220 |  |  |  |  |  |  | sub str2cfg { | 
| 221 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 222 | 0 |  |  |  |  |  | my %cfg = $self->SUPER::str2cfg(@_); | 
| 223 | 0 |  |  |  |  |  | for my $k (qw(content_type method)) { | 
| 224 | 0 |  | 0 |  |  |  | my $v = $cfg{$k} // next; | 
| 225 | 0 | 0 |  |  |  |  | if ( $v =~m{^/(.*)/$}s ) { | 
|  |  | 0 |  |  |  |  |  | 
| 226 | 0 | 0 |  |  |  |  | $cfg{$k} = eval { qr/$1/ } or croak("invalid regexp '$v': $@"); | 
|  | 0 |  |  |  |  |  |  | 
| 227 |  |  |  |  |  |  | } elsif (( my @v = split( /,/,$v )) > 1 ) { | 
| 228 | 0 |  |  |  |  |  | $cfg{$k} = map { lc($_) => 1 } @v | 
|  | 0 |  |  |  |  |  |  | 
| 229 |  |  |  |  |  |  | } else { | 
| 230 | 0 |  |  |  |  |  | $cfg{$k} = lc($v) | 
| 231 |  |  |  |  |  |  | } | 
| 232 |  |  |  |  |  |  | } | 
| 233 | 0 |  |  |  |  |  | for my $k (qw(exclude_url only_url)) { | 
| 234 | 0 |  | 0 |  |  |  | my $v = $cfg{$k} // next; | 
| 235 | 0 | 0 |  |  |  |  | $v =~m{^/(.*)/$}s or croak("$k should be /regex/"); | 
| 236 | 0 | 0 |  |  |  |  | $cfg{$k} = eval { qr/$1/ } or croak("invalid regexp '$v': $@"); | 
|  | 0 |  |  |  |  |  |  | 
| 237 |  |  |  |  |  |  | } | 
| 238 | 0 |  |  |  |  |  | return %cfg; | 
| 239 |  |  |  |  |  |  | } | 
| 240 |  |  |  |  |  |  |  | 
| 241 |  |  |  |  |  |  | sub _check_srh { | 
| 242 | 0 |  |  | 0 |  |  | my ($srh,$v) = @_; | 
| 243 | 0 | 0 |  |  |  |  | return $v =~ $srh if ref($srh) eq 'Regexp'; | 
| 244 | 0 | 0 |  |  |  |  | return $srh->{$_} if ref($srh) eq 'HASH'; | 
| 245 | 0 |  |  |  |  |  | return $srh eq $v; | 
| 246 |  |  |  |  |  |  | } | 
| 247 |  |  |  |  |  |  |  | 
| 248 |  |  |  |  |  |  | sub _stop_saving { | 
| 249 | 0 |  |  | 0 |  |  | my $self = shift; | 
| 250 | 0 | 0 |  |  |  |  | my $f = $self->{file} or return; | 
| 251 | 0 |  |  |  |  |  | unlink($f->{tname}); | 
| 252 | 0 |  |  |  |  |  | $self->{file} = undef; | 
| 253 | 0 |  |  |  |  |  | $self->run_callback( | 
| 254 |  |  |  |  |  |  | [ IMP_PASS,0,IMP_MAXOFFSET ], | 
| 255 |  |  |  |  |  |  | [ IMP_PASS,1,IMP_MAXOFFSET ], | 
| 256 |  |  |  |  |  |  | ); | 
| 257 |  |  |  |  |  |  | } | 
| 258 |  |  |  |  |  |  |  | 
| 259 |  |  |  |  |  |  |  | 
| 260 |  |  |  |  |  |  | 1; | 
| 261 |  |  |  |  |  |  |  | 
| 262 |  |  |  |  |  |  | __END__ |