File Coverage

blib/lib/HTTP/Proxy/BodyFilter/save.pm
Criterion Covered Total %
statement 90 92 97.8
branch 46 50 92.0
condition 15 16 93.7
subroutine 13 13 100.0
pod 5 5 100.0
total 169 176 96.0


line stmt bran cond sub pod time code
1             package HTTP::Proxy::BodyFilter::save;
2             $HTTP::Proxy::BodyFilter::save::VERSION = '0.303';
3 3     3   13555 use strict;
  3         4  
  3         75  
4 3     3   503 use HTTP::Proxy;
  3         3  
  3         81  
5 3     3   297 use HTTP::Proxy::BodyFilter;
  3         3  
  3         46  
6 3     3   9 use vars qw( @ISA );
  3         4  
  3         93  
7             @ISA = qw( HTTP::Proxy::BodyFilter );
8 3     3   13 use Fcntl;
  3         2  
  3         616  
9 3     3   12 use File::Spec;
  3         3  
  3         50  
10 3     3   10 use File::Path;
  3         3  
  3         109  
11 3     3   11 use Carp;
  3         3  
  3         2399  
12              
13             sub init {
14 24     24 1 22 my $self = shift;
15              
16             # options
17 24         302 my %args = (
18             template => File::Spec->catfile( '%h', '%P' ),
19             no_host => 0,
20             no_dirs => 0,
21             cut_dirs => 0,
22             prefix => '',
23             filename => undef,
24             multiple => 1,
25             keep_old => 0, # no_clobber in wget parlance
26             timestamp => 0,
27             status => [ 200 ],
28             @_
29             );
30             # keep_old and timestamp can't be selected together
31 24 100 100     238 croak "Can't timestamp and keep older files at the same time"
32             if $args{keep_old} && $args{timestamp};
33 23 100       163 croak "status must be an array reference"
34             unless ref($args{status}) eq 'ARRAY';
35 24         221 croak "status must contain only HTTP codes"
36 22 100       17 if grep { !/^[12345]\d\d$/ } @{ $args{status} };
  22         37  
37 21 100 100     147 croak "filename must be a code reference"
38             if defined $args{filename} && !UNIVERSAL::isa( $args{filename}, 'CODE' );
39              
40 20         34 $self->{"_hpbf_save_filename_code"} = $args{filename};
41             $self->{"_hpbf_save_$_"} = $args{$_}
42 20         240 for qw( template no_host no_dirs cut_dirs prefix
43             multiple keep_old timestamp status );
44             }
45              
46             sub begin {
47 23     23 1 9684 my ( $self, $message ) = @_;
48              
49             # internal data initialisation
50 23         18 delete @{$self}{qw( _hpbf_save_filename _hpbf_save_fh )};
  23         35  
51              
52 23 100       106 my $uri = $message->isa( 'HTTP::Request' )
53             ? $message->uri : $message->request->uri;
54              
55             # save only the accepted status codes
56 23 100       186 if( $message->isa( 'HTTP::Response' ) ) {
57 4         9 my $code = $message->code;
58 4 100       20 return unless grep { $code eq $_ } @{ $self->{_hpbf_save_status} };
  5         21  
  4         7  
59             }
60            
61 22         18 my $file = '';
62 22 100       38 if( defined $self->{_hpbf_save_filename_code} ) {
63             # use the user-provided callback
64 4         9 $file = $self->{_hpbf_save_filename_code}->($message);
65 4 100 100     23 unless ( defined $file and $file ne '' ) {
66 2         7 $self->proxy->log( HTTP::Proxy::FILTERS, "HTBF::save",
67             "Filter will not save $uri" );
68 2         3 return;
69             }
70             }
71             else {
72             # set the template variables from the URI
73 18         58 my @segs = $uri->path_segments; # starts with an empty string
74 18         498 shift @segs;
75 18 100       50 splice(@segs, 0, $self->{_hpbf_save_cut_dirs} >= @segs
76             ? @segs - 1 : $self->{_hpbf_save_cut_dirs} );
77 18 100 100     80 my %vars = (
    100 100        
78             '%' => '%',
79             h => $self->{_hpbf_save_no_host} ? '' : $uri->host,
80             f => $segs[-1] || 'index.html', # same default as wget
81             p => $self->{_hpbf_save_no_dirs} ? $segs[-1] || 'index.html'
82             : File::Spec->catfile(@segs),
83             q => $uri->query,
84             );
85 18         512 pop @segs;
86 18 100       63 $vars{d}
    100          
87             = $self->{_hpbf_save_no_dirs} ? ''
88             : @segs ? File::Spec->catfile(@segs)
89             : '';
90 18 100       43 $vars{P} = $vars{p} . ( $vars{q} ? "?$vars{q}" : '' );
91            
92             # create the filename
93 18   66     105 $file = File::Spec->catfile( $self->{_hpbf_save_prefix} || (),
94             $self->{_hpbf_save_template} );
95 18         125 $file =~ s/%(.)/$vars{$1}/g;
96             }
97 20         157 $file = File::Spec->rel2abs( $file );
98              
99             # create the directory
100 20         214 my $dir = File::Spec->catpath( (File::Spec->splitpath($file))[ 0, 1 ], '' );
101 20 100       318 if( ! -e $dir ) {
102 5         5 eval { mkpath( $dir ) };
  5         1011  
103 5 50       16 if ($@) {
104 0         0 $self->proxy->log( HTTP::Proxy::ERROR, "HTBF::save",
105             "Unable to create directory $dir" );
106 0         0 return;
107             }
108 5         14 $self->proxy->log( HTTP::Proxy::FILTERS, "HTBF::save",
109             "Created directory $dir" );
110             }
111              
112             # keep old file?
113 20 100       288 if ( -e $file ) {
114 3 50       11 if ( $self->{_hpbf_save_timestamp} ) {
    100          
115             # FIXME timestamp
116             }
117             elsif ( $self->{_hpbf_save_keep_old} ) {
118 1         4 $self->proxy->log( HTTP::Proxy::FILTERS, "HPBF::save",
119             "Skip saving $uri" );
120 1         2 delete $self->{_hpbf_save_fh}; # it's a closed filehandle
121 1         3 return;
122             }
123             }
124              
125             # open and lock the file
126 19         26 my ( $ext, $n, $i ) = ( "", 0 );
127 19         19 my $flags = O_WRONLY | O_EXCL | O_CREAT;
128 19         432716 while( ! sysopen( $self->{_hpbf_save_fh}, "$file$ext", $flags ) ) {
129 2 50       8 $self->proxy->log( HTTP::Proxy::ERROR, "HPBF::save",
130             "Too many errors opening $file$ext" ), return
131             if $i++ - $n == 10; # should be ok now
132 2 100       5 if( $self->{_hpbf_save_multiple} ) {
133 1         25 $ext = "." . ++$n while -e $file.$ext;
134 1         40 next;
135             }
136             else {
137 1         17 $flags = O_WRONLY | O_CREAT;
138             }
139             }
140              
141             # we have an open filehandle
142 19         58 $self->{_hpbf_save_filename} = $file.$ext;
143 19         41 binmode( $self->{_hpbf_save_fh} ); # for Win32 and friends
144 19         61 $self->proxy->log( HTTP::Proxy::FILTERS, "HPBF::save",
145             "Saving $uri to $file$ext" );
146             }
147              
148             sub filter {
149 8     8 1 3621 my ( $self, $dataref ) = @_;
150 8 100       18 return unless exists $self->{_hpbf_save_fh};
151              
152             # save the data to the file
153 4         14 my $res = $self->{_hpbf_save_fh}->syswrite( $$dataref );
154 4 50       92 $self->proxy->log( HTTP::Proxy::ERROR, "HPBF::save", "syswrite() error: $!")
155             if ! defined $res; # FIXME error handling
156             }
157              
158             sub end {
159 4     4 1 9 my ($self) = @_;
160              
161             # close file
162 4 100       9 if( $self->{_hpbf_save_fh} ) {
163 2         8 $self->{_hpbf_save_fh}->close; # FIXME error handling
164 2         19 delete $self->{_hpbf_save_fh};
165             }
166             }
167              
168 2     2 1 6 sub will_modify { 0 }
169              
170             1;
171              
172             __END__