File Coverage

blib/lib/CGI/Compress/Gzip.pm
Criterion Covered Total %
statement 89 120 74.1
branch 44 60 73.3
condition 12 24 50.0
subroutine 11 14 78.5
pod 5 5 100.0
total 161 223 72.2


line stmt bran cond sub pod time code
1             package CGI::Compress::Gzip;
2              
3 1     1   184833 use 5.006;
  1         4  
  1         41  
4 1     1   6 use warnings;
  1         2  
  1         31  
5 1     1   6 use strict;
  1         6  
  1         40  
6 1     1   4 use English qw(-no_match_vars);
  1         2  
  1         8  
7 1     1   1016 use CGI::Compress::Gzip::FileHandle;
  1         3  
  1         10  
8 1     1   52 use base 'CGI';
  1         2  
  1         2115  
9              
10             our $VERSION = '1.03';
11              
12             # Package globals - testing and debugging flags
13              
14             # These should only be used for extreme circumstances (e.g. testing)
15             our $global_use_compression = 1; # user-settable
16             our $global_can_compress = undef; # 1 = yes, 0 = no, undef = don't know yet
17              
18             # If true, add an outgoing HTTP header explaining why we are not
19             # compressing if gzip turns itself off.
20             our $global_give_reason = 0;
21              
22             #=encoding utf8
23              
24             =head1 NAME
25              
26             CGI::Compress::Gzip - CGI with automatically compressed output
27              
28             =head1 LICENSE
29              
30             Copyright 2006-2007 Clotho Advanced Media, Inc.,
31              
32             Copyright 2007-2008 Chris Dolan
33              
34             This library is free software; you can redistribute it and/or modify it
35             under the same terms as Perl itself.
36              
37             =head1 SYNOPSIS
38              
39             use CGI::Compress::Gzip;
40            
41             my $cgi = new CGI::Compress::Gzip;
42             print $cgi->header();
43             print " ...";
44              
45             See the CAVEATS section below!
46              
47             =head1 DESCRIPTION
48              
49             CGI::Compress::Gzip extends the CGI module to auto-detect whether the
50             client browser wants compressed output and, if so and if the script
51             chooses HTML output, apply gzip compression on any content header for
52             STDOUT. This module is intended to be a drop-in replacement for
53             CGI.pm.
54              
55             Apache mod_perl users may wish to consider the Apache::Compress or
56             Apache::GzipChain modules, which allow more transparent output
57             compression than this module can provide. However, as of this writing
58             those modules are more aggressive about compressing, regardless of
59             Content-Type.
60              
61             =head2 Headers
62              
63             At the time that a header is requested, CGI::Compress::Gzip checks the
64             HTTP_ACCEPT_ENCODING environment variable (passed by Apache). If this
65             variable includes the flag "gzip" and the outgoing mime-type is
66             "text/*", then gzipped output is preferred. [the default mime-type
67             selection of text/* can be changed by subclassing -- see below] The
68             header is altered to add the "Content-Encoding: gzip" flag which
69             indicates that compression is turned on.
70              
71             Naturally, it is crucial that the CGI application output nothing
72             before the header is printed. If this is violated, things will go
73             badly.
74              
75             =head2 Compression
76              
77             When the header is created, this module sets up a new filehandle to
78             accept data. STDOUT is redirected through that filehandle. The new
79             filehandle passes data verbatim until it detects the end of the CGI
80             header. At that time, it switches over to Gzip output for the
81             remainder of the CGI run.
82              
83             Note that the Zlib library on which this code is ultimately based
84             requires a fileno for the output filehandle. Where the output
85             filehandle is faked (i.e. in mod_perl), we instead use in-memory
86             compression. This is more wasteful of RAM, but it is the only
87             solution I've found (and it is one shared by the Apache::* compression
88             modules).
89              
90             Debugging note: if you set B<$CGI::Compress::Gzip::global_give_reason>
91             to a true value, then this module will add an HTTP header entry called
92             B with an explanation of why it chose not to gzip
93             the output stream.
94              
95             =head2 Buffering
96              
97             The Zlib library introduces latencies. In some cases, this module may
98             delay output until the CGI object is garbage collected, presumably at
99             the end of the program. This buffering can be detrimental to
100             long-lived programs which are supposed to have incremental output,
101             causing browser timeouts. To compensate, compression is automatically
102             disabled when autoflush (i.e. the $| variable) is set to true. Future
103             versions may try to enable autoflushing on the Zlib filehandles, if
104             possible [Help wanted].
105              
106             =head1 CLASS METHODS
107              
108             =over 4
109              
110             =item $pkg->new([CGI ARGS])
111              
112             Create a new object. This resets the environment before creating a
113             CGI.pm object. This should not be called more than once per script
114             run! All arguments are passed to the parent class.
115              
116             =cut
117              
118             sub new
119             {
120 1     1 1 5551 my ($pkg, @args) = @_;
121              
122 1         4 select STDOUT;
123 1         12 my $self = $pkg->SUPER::new(@args);
124 1         5083 $self->{'.CGIgz'} = {
125             ext_fh => undef,
126             zlib_fh => undef,
127             header_done => 0,
128             use_compression => undef,
129             };
130 1         5 return $self;
131             }
132              
133             =item $pkg->useCompression($boolean)
134              
135             =item $self->useCompression($boolean)
136              
137             This can be used as a class method or an instance method. The former
138             is included for backward compatibility, and is NOT recommended. As a
139             class method, this changes the default value. As an instance method
140             it affects only the specified instance.
141              
142             Turn compression on/off for the target. If turned on, compression
143             will be used only if the prerequisite compression libraries are
144             available and if the client browser requests compression.
145              
146             Defaults to on.
147              
148             =cut
149              
150             sub useCompression
151             {
152 5     5 1 2523 my ($pkg_or_self, $value) = @_;
153              
154 5 100       12 if (ref $pkg_or_self)
155             {
156 2 100       8 $pkg_or_self->{'.CGIgz'}->{use_compression} = $value ? 1 : 0;
157             }
158             else
159             {
160 3 100       9 $global_use_compression = $value ? 1 : 0;
161             }
162 5         17 return $pkg_or_self;
163             }
164              
165             =back
166              
167             =head1 INSTANCE METHODS
168              
169             =over 4
170              
171             =item $self->useFileHandle($filehandle)
172              
173             Manually set the output filehandle. Because of limitations of Zlib,
174             this MUST be a real filehandle (with valid results from fileno()) and
175             not a pseudo filehandle like IO::String.
176              
177             If this is not set, STDOUT is used.
178              
179             =cut
180              
181             sub useFileHandle
182             {
183 0     0 1 0 my ($self, $fh) = @_;
184              
185 0         0 $self->{'.CGIgz'}->{ext_fh} = $fh;
186 0         0 return $self;
187             }
188              
189             =item $self->isCompressibleType($content_type)
190              
191             Given a MIME type (with possible charset attached), return a boolean
192             indicating if this media type is a good candidate for compression.
193             This implementation is simply:
194              
195             return $type =~ /^text\//;
196              
197             Subclasses may wish to override this method to apply different
198             criteria.
199              
200             =cut
201              
202             sub isCompressibleType
203             {
204 19     19 1 40 my ($self, $type) = @_;
205              
206 19   100     155 return ($type || q{}) =~ m/ \A text\/ /xms;
207             }
208              
209             =item $self->header([HEADER ARGS])
210              
211             Overrides the C method in L.
212              
213             Return a CGI header with the compression flags set properly. Returns
214             an empty string is a header has already been printed.
215              
216             This method engages the Gzip output by fiddling with the default
217             output filehandle. All subsequent output via usual Perl print() will
218             be automatically gzipped except for this header (which must go out as
219             plain text).
220              
221             Any arguments will be passed on to CGI::header. This method should
222             NOT be called if you don't want your header or STDOUT to be fiddled
223             with.
224              
225             =cut
226              
227             sub header
228             {
229 0     0 1 0 my ($self, @args) = @_;
230              
231 0         0 my ($compress, $reason) = $self->_can_compress(\@args);
232 0 0 0     0 if (!$compress && $global_give_reason && $reason)
      0        
233             {
234 0         0 push @args, '-X_non_gzip_reason', $reason;
235             }
236              
237 0         0 my $header = $self->SUPER::header(@args);
238 0 0       0 if (!defined $header) # workaround for problem found on 5.6.0 on Linux
239             {
240 0         0 $header = q{};
241             }
242              
243 0 0 0     0 if (!$self->{'.CGIgz'}->{header_done}++ && $compress)
244             {
245 0         0 $self->_start_compression($header);
246             }
247 0         0 return $header;
248             }
249              
250             # Enable the compression filehandle if:
251             # - The mime-type is appropriate (text/* is the default)
252             # - The programmer wants compression, indicated by the useCompression()
253             # method
254             # - Client wants compression, indicated by the Accepted-Encoding HTTP field
255             # - The IO::Zlib compression library is available
256             # Returns: (boolean, reason) -- reason is a string if boolean is false
257             # Side effects:
258             # - may alter $header to add gzip flag if boolean is true
259             # - may set $global_can_compress if not yet set
260              
261             sub _can_compress ## no critic(Subroutines::ProhibitExcessComplexity)
262             {
263 23     23   16236 my ($self, $header) = @_;
264             # $header is an array ref
265              
266 23         38 my $settings = $self->{'.CGIgz'};
267              
268             # Check programmer preference
269 23 100       95 if (defined $settings->{use_compression} ?
    100          
270             !$settings->{use_compression} : !$global_use_compression)
271             {
272 2         8 return (0, 'programmer request');
273             }
274              
275             # save it in case we change it
276 21         86 $settings->{flush} = $OUTPUT_AUTOFLUSH;
277              
278             # Check buffering (disable if autoflushing)
279 21 100       45 if ($settings->{flush})
280             {
281 1         4 return (0, 'programmer wants unbuffered output');
282             }
283              
284             # Check that browser supports gzip
285 20         42 my $acc = $ENV{HTTP_ACCEPT_ENCODING};
286 20 100 66     103 if (!$acc || $acc !~ m/ \bgzip\b /ixms)
287             {
288 2         7 return (0, 'user agent does not want gzip');
289             }
290              
291             # Parse the header data and look for indicators of compressibility:
292             # * appropriate content type
293             # * already set for compression
294             # * HTTP status not 200
295              
296 18         24 my @newheader;
297             my $content_type;
298              
299             # This search reproduces the header parsing done by CGI.pm
300 18 100 100     25 if (@{$header} && $header->[0] =~ m/ \A [a-z] /xms) ## no critic (ProhibitEnumeratedClasses)
  18         103  
301             {
302              
303             # Using unkeyed version of arguments - convert to the keyed version
304              
305             # arg order comes from the header() function in CGI.pm
306 1         5 my @flags = qw(
307             Content_Type Status Cookie Target Expires
308             NPH Charset Attachment P3P
309             );
310 1         3 for my $i (0 .. $#{$header})
  1         4  
311             {
312 1 50       3 if ($i < @flags)
313             {
314 1         6 push @newheader, q{-} . $flags[$i], $header->[$i];
315             }
316             else
317             {
318             # Extra args
319 0         0 push @newheader, $header->[$i];
320             }
321             }
322             }
323             else
324             {
325 17         25 @newheader = @{$header};
  17         58  
326             }
327              
328             # gets set if we find an existing encoding directive
329 18         21 my $encoding_index;
330              
331 18         53 for (my $i = 0; $i < @newheader; $i++)
332             {
333 17 50       39 next if (!defined $newheader[$i]);
334              
335 17 100       114 if ($newheader[$i] =~ m/ \A -?(?:Content[-_]Type|Type)(.*) \z /ixms)
    100          
    100          
336             {
337 6         12 $content_type = $1;
338 6 100       24 if ($content_type !~ s/ \A :\s* //xms)
339             {
340 4         12 $content_type = $newheader[++$i];
341             }
342             }
343             elsif ($newheader[$i] =~ m/ \A -?Status(.*) \z /ixms)
344             {
345 6         13 my $content = $1;
346 6 100       26 if ($content !~ s/ \A :\s* //xms)
347             {
348 2         5 $content = $newheader[++$i];
349             }
350 6         18 my ($status) = $content =~ m/ \A (\d+) /xms;
351 6 100 100     35 if (!defined $status || $status ne '200')
352             {
353 3         14 return (0, 'HTTP status not 200');
354             }
355             }
356             elsif ($newheader[$i] =~ m/ \A -?Content[-_]Encoding(.*) \z /ixms)
357             {
358 3         5 my $content = $1;
359 3 100       12 if ($content !~ s/ \A :\s* //xms)
360             {
361 2         5 $content = $newheader[++$i];
362             }
363 3         4 $encoding_index = $i;
364            
365 3 100       13 if ($content =~ m/ \bgzip\b /ixms)
366             {
367             # Already gzip compressed
368 1         5 return (0, 'someone already requested gzip');
369             }
370             }
371             }
372              
373 14 100       28 if (defined $encoding_index)
374             {
375             # prepend gzip encoding to the existing encoding list
376 2         16 $newheader[$encoding_index] =~ s/ \A ((?:-?Content[-_]Encoding:\s*)?) /$1gzip, /ioxms;
377             }
378             else
379             {
380 12         25 push @newheader, '-Content_Encoding', 'gzip';
381             }
382              
383 14   100     51 $content_type ||= 'text/html';
384 14 100       33 if (!$self->isCompressibleType($content_type))
385             {
386             # Not compressible media
387 1         5 return (0, 'incompatible content-type ' . $content_type);
388             }
389              
390             # Check that IO::Zlib is available
391 13 100       30 if (!defined $global_can_compress)
392             {
393 1         6 local $SIG{__WARN__} = 'DEFAULT';
394 1         5 local $SIG{__DIE__} = 'DEFAULT';
395 1         2 eval { require IO::Zlib; }; ## no critic (RequireCheckingReturnValueOfEval)
  1         9  
396 1 50       8 $global_can_compress = $EVAL_ERROR ? 0 : 1;
397             }
398 13 50       26 if (!$global_can_compress)
399             {
400 0         0 return (0, 'IO::Zlib not found');
401             }
402              
403             # Commit any changes made above
404 13         16 @{$header} = @newheader;
  13         38  
405              
406 13         49 return (1, undef);
407             }
408              
409             sub _start_compression
410             {
411 0     0   0 my ($self, $header) = @_;
412              
413 0         0 my $settings = $self->{'.CGIgz'};
414 0   0     0 $settings->{ext_fh} ||= \*STDOUT;
415 0         0 binmode $settings->{ext_fh};
416              
417 0         0 my $filehandle = CGI::Compress::Gzip::FileHandle->new($settings->{ext_fh}, 'wb');
418 0 0       0 if (!$filehandle)
419             {
420 0         0 warn 'Failed to open Zlib output, reverting to uncompressed output';
421 0         0 return;
422             }
423              
424             # All output from here on goes to our new filehandle
425              
426             ## Autoflush makes no sense since compression is disabled if autoflush is on
427             #if ($filehandle->can('autoflush'))
428             #{
429             # $filehandle->autoflush();
430             #}
431              
432 0         0 select $filehandle;
433              
434 0         0 $settings->{zlib_fh} = $filehandle; # needed for destructor
435              
436 0         0 my $tied = tied ${$filehandle};
  0         0  
437 0         0 $tied->{pending_header} = $header;
438              
439 0         0 return $self;
440             }
441              
442             =item $self->DESTROY()
443              
444             Override the L destructor so we can close the Gzip output stream, if
445             there is one open.
446              
447             =cut
448              
449             sub DESTROY
450             {
451 1     1   848 my ($self) = @_;
452              
453 1 50       8 if ($self->{'.CGIgz'}->{zlib_fh})
454             {
455 0 0       0 $self->{'.CGIgz'}->{zlib_fh}->close()
456             or die 'Failed to close the Zlib filehandle';
457             }
458 1 50       6 if ($self->{'.CGIgz'}->{ext_fh})
459             {
460 0         0 select $self->{'.CGIgz'}->{ext_fh};
461             }
462              
463 1         16 return $self->SUPER::DESTROY();
464             }
465              
466             1;
467             __END__