File Coverage

blib/lib/HTTP/Proxy/BodyFilter/save.pm
Criterion Covered Total %
statement 90 92 97.8
branch 46 50 92.0
condition 15 16 93.7
subroutine 13 13 100.0
pod 5 5 100.0
total 169 176 96.0


line stmt bran cond sub pod time code
1             package HTTP::Proxy::BodyFilter::save;
2             $HTTP::Proxy::BodyFilter::save::VERSION = '0.302';
3 3     3   15980 use strict;
  3         5  
  3         90  
4 3     3   422 use HTTP::Proxy;
  3         6  
  3         138  
5 3     3   619 use HTTP::Proxy::BodyFilter;
  3         4  
  3         72  
6 3     3   13 use vars qw( @ISA );
  3         5  
  3         141  
7             @ISA = qw( HTTP::Proxy::BodyFilter );
8 3     3   12 use Fcntl;
  3         3  
  3         746  
9 3     3   14 use File::Spec;
  3         4  
  3         55  
10 3     3   11 use File::Path;
  3         5  
  3         137  
11 3     3   12 use Carp;
  3         3  
  3         2788  
12              
13             sub init {
14 24     24 1 24 my $self = shift;
15              
16             # options
17 24         339 my %args = (
18             template => File::Spec->catfile( '%h', '%P' ),
19             no_host => 0,
20             no_dirs => 0,
21             cut_dirs => 0,
22             prefix => '',
23             filename => undef,
24             multiple => 1,
25             keep_old => 0, # no_clobber in wget parlance
26             timestamp => 0,
27             status => [ 200 ],
28             @_
29             );
30             # keep_old and timestamp can't be selected together
31 24 100 100     274 croak "Can't timestamp and keep older files at the same time"
32             if $args{keep_old} && $args{timestamp};
33 23 100       166 croak "status must be an array reference"
34             unless ref($args{status}) eq 'ARRAY';
35 24         237 croak "status must contain only HTTP codes"
36 22 100       22 if grep { !/^[12345]\d\d$/ } @{ $args{status} };
  22         41  
37 21 100 100     170 croak "filename must be a code reference"
38             if defined $args{filename} && !UNIVERSAL::isa( $args{filename}, 'CODE' );
39              
40 20         40 $self->{"_hpbf_save_filename_code"} = $args{filename};
41             $self->{"_hpbf_save_$_"} = $args{$_}
42 20         231 for qw( template no_host no_dirs cut_dirs prefix
43             multiple keep_old timestamp status );
44             }
45              
46             sub begin {
47 23     23 1 11343 my ( $self, $message ) = @_;
48              
49             # internal data initialisation
50 23         26 delete @{$self}{qw( _hpbf_save_filename _hpbf_save_fh )};
  23         41  
51              
52 23 100       107 my $uri = $message->isa( 'HTTP::Request' )
53             ? $message->uri : $message->request->uri;
54              
55             # save only the accepted status codes
56 23 100       185 if( $message->isa( 'HTTP::Response' ) ) {
57 4         11 my $code = $message->code;
58 4 100       25 return unless grep { $code eq $_ } @{ $self->{_hpbf_save_status} };
  5         18  
  4         8  
59             }
60            
61 22         26 my $file = '';
62 22 100       42 if( defined $self->{_hpbf_save_filename_code} ) {
63             # use the user-provided callback
64 4         7 $file = $self->{_hpbf_save_filename_code}->($message);
65 4 100 100     24 unless ( defined $file and $file ne '' ) {
66 2         5 $self->proxy->log( HTTP::Proxy::FILTERS, "HTBF::save",
67             "Filter will not save $uri" );
68 2         5 return;
69             }
70             }
71             else {
72             # set the template variables from the URI
73 18         55 my @segs = $uri->path_segments; # starts with an empty string
74 18         547 shift @segs;
75 18 100       55 splice(@segs, 0, $self->{_hpbf_save_cut_dirs} >= @segs
76             ? @segs - 1 : $self->{_hpbf_save_cut_dirs} );
77 18 100 100     87 my %vars = (
    100 100        
78             '%' => '%',
79             h => $self->{_hpbf_save_no_host} ? '' : $uri->host,
80             f => $segs[-1] || 'index.html', # same default as wget
81             p => $self->{_hpbf_save_no_dirs} ? $segs[-1] || 'index.html'
82             : File::Spec->catfile(@segs),
83             q => $uri->query,
84             );
85 18         527 pop @segs;
86 18 100       70 $vars{d}
    100          
87             = $self->{_hpbf_save_no_dirs} ? ''
88             : @segs ? File::Spec->catfile(@segs)
89             : '';
90 18 100       45 $vars{P} = $vars{p} . ( $vars{q} ? "?$vars{q}" : '' );
91            
92             # create the filename
93 18   66     112 $file = File::Spec->catfile( $self->{_hpbf_save_prefix} || (),
94             $self->{_hpbf_save_template} );
95 18         135 $file =~ s/%(.)/$vars{$1}/g;
96             }
97 20         172 $file = File::Spec->rel2abs( $file );
98              
99             # create the directory
100 20         227 my $dir = File::Spec->catpath( (File::Spec->splitpath($file))[ 0, 1 ], '' );
101 20 100       352 if( ! -e $dir ) {
102 5         7 eval { mkpath( $dir ) };
  5         846  
103 5 50       13 if ($@) {
104 0         0 $self->proxy->log( HTTP::Proxy::ERROR, "HTBF::save",
105             "Unable to create directory $dir" );
106 0         0 return;
107             }
108 5         17 $self->proxy->log( HTTP::Proxy::FILTERS, "HTBF::save",
109             "Created directory $dir" );
110             }
111              
112             # keep old file?
113 20 100       296 if ( -e $file ) {
114 3 50       14 if ( $self->{_hpbf_save_timestamp} ) {
    100          
115             # FIXME timestamp
116             }
117             elsif ( $self->{_hpbf_save_keep_old} ) {
118 1         5 $self->proxy->log( HTTP::Proxy::FILTERS, "HPBF::save",
119             "Skip saving $uri" );
120 1         3 delete $self->{_hpbf_save_fh}; # it's a closed filehandle
121 1         2 return;
122             }
123             }
124              
125             # open and lock the file
126 19         31 my ( $ext, $n, $i ) = ( "", 0 );
127 19         20 my $flags = O_WRONLY | O_EXCL | O_CREAT;
128 19         939 while( ! sysopen( $self->{_hpbf_save_fh}, "$file$ext", $flags ) ) {
129 2 50       7 $self->proxy->log( HTTP::Proxy::ERROR, "HPBF::save",
130             "Too many errors opening $file$ext" ), return
131             if $i++ - $n == 10; # should be ok now
132 2 100       5 if( $self->{_hpbf_save_multiple} ) {
133 1         27 $ext = "." . ++$n while -e $file.$ext;
134 1         45 next;
135             }
136             else {
137 1         22 $flags = O_WRONLY | O_CREAT;
138             }
139             }
140              
141             # we have an open filehandle
142 19         62 $self->{_hpbf_save_filename} = $file.$ext;
143 19         38 binmode( $self->{_hpbf_save_fh} ); # for Win32 and friends
144 19         58 $self->proxy->log( HTTP::Proxy::FILTERS, "HPBF::save",
145             "Saving $uri to $file$ext" );
146             }
147              
148             sub filter {
149 8     8 1 4415 my ( $self, $dataref ) = @_;
150 8 100       21 return unless exists $self->{_hpbf_save_fh};
151              
152             # save the data to the file
153 4         27 my $res = $self->{_hpbf_save_fh}->syswrite( $$dataref );
154 4 50       100 $self->proxy->log( HTTP::Proxy::ERROR, "HPBF::save", "syswrite() error: $!")
155             if ! defined $res; # FIXME error handling
156             }
157              
158             sub end {
159 4     4 1 11 my ($self) = @_;
160              
161             # close file
162 4 100       10 if( $self->{_hpbf_save_fh} ) {
163 2         9 $self->{_hpbf_save_fh}->close; # FIXME error handling
164 2         22 delete $self->{_hpbf_save_fh};
165             }
166             }
167              
168 2     2 1 8 sub will_modify { 0 }
169              
170             1;
171              
172             __END__