File Coverage

blib/lib/CGI/SHTML.pm
Criterion Covered Total %
statement 19 177 10.7
branch 0 98 0.0
condition 1 43 2.3
subroutine 7 20 35.0
pod 5 5 100.0
total 32 343 9.3


line stmt bran cond sub pod time code
1             $VERSION = "1.29";
2             package CGI::SHTML;
3             our $VERSION = "1.29";
4              
5             # -*- Perl -*- Wed May 19 13:09:58 CDT 2004
6             #############################################################################
7             # Written by Tim Skirvin
8             # Copyright 2001-2004, Tim Skirvin and UIUC Board of Trustees.
9             # Redistribution terms are below.
10             #############################################################################
11              
12             =head1 NAME
13              
14             CGI::SHTML - a CGI module for parsing SSI
15              
16             =head1 SYNOPSIS
17              
18             use CGI::SHTML;
19             my $cgi = new CGI::SHTML;
20              
21             # Print a full page worth of info
22             print $cgi->header();
23             print $cgi->start_html('internal', -title=>"SAMPLE PAGE");
24             # Insert content here
25             print $cgi->end_html('internal', -author=>"Webmaster",
26             -address=>'webserver@ks.uiuc.edu');
27              
28             # Just parse some SSI text
29             my @text = '';
30             print CGI::SHTML->parse_shtml(@text);
31              
32             # Use a different configuration file
33             BEGIN { $CGI::SHTML::CONFIG = "/home/tskirvin/shtml.pm"; }
34             use CGI::SHTML;
35              
36             Further functionality is documented with the CGI module.
37              
38             =head1 DESCRIPTION
39              
40             In order to parse SSI, you generally have to configure your scripts to be
41             re-parsed through Apache itself. This module eliminates that need by
42             parsing SSI headers itself, as best it can.
43              
44             Some information on SSI is available at
45             B.
46              
47             =head2 VARIABLES
48              
49             =over 2
50              
51             =item $CGI::SHTML::CONFIG
52              
53             Defines a file that has further configuration for your web site. This is
54             useful to allow the module to be installed system-wide without actually
55             requiring changes to be internal to the file. Note that you'll need to
56             reset this value *before* loading CGI::SHTML if you want it to actually
57             make any difference; it's loaded when you load the module.
58              
59             =back
60              
61             =cut
62              
63 1     1   806 use strict;
  1         2  
  1         33  
64 1     1   1039 use Time::Local;
  1         2028  
  1         68  
65 1     1   11197 use CGI;
  1         22439  
  1         7  
66 1     1   54 use warnings;
  1         1  
  1         35  
67 1     1   6 use vars qw( @ISA $EMPTY $ROOTDIR %REPLACE %CONFIG %HEADER %FOOTER $CONFIG );
  1         1  
  1         94  
68 1     1   4 use vars qw( $IF $NOPRINT );
  1         2  
  1         6323  
69              
70             ### User Defined Variables ####################################################
71             $CONFIG ||= "/home/webserver/conf/shtml.pm";
72             $ROOTDIR = $ENV{'DOCUMENT_ROOT'} || "/Common/WebRoot";
73             $EMPTY = ""; # Edit this for debugging
74             %REPLACE = ( );
75             %CONFIG = ( 'timefmt' => "%D",);
76             %HEADER = (
77             'internal' => '/include/header-info.shtml',
78             'generic' => '/include/header-generic.shtml',
79             );
80             %FOOTER = (
81             'internal' => '/include/footer-info.shtml',
82             'generic' => '/include/footer-generic.shtml',
83             );
84             ###############################################################################
85              
86             # Set some environment variables that are important for SSI
87             $ENV{'DATE_GMT'} = gmtime(time);
88             $ENV{'DATE_LOCAL'} = localtime(time);
89             $ENV{'DOCUMENT_URI'} = join('', "http://",
90             $ENV{'SERVER_NAME'} || "localhost",
91             $ENV{'SCRIPT_NAME'} || $0 ) ;
92             $ENV{'LAST_MODIFIED'} = CGI::SHTML->_flastmod( $ENV{'SCRIPT_FILENAME'} || $0 );
93             delete $ENV{'PATH'};
94              
95             @ISA = "CGI";
96              
97             if ( -r $CONFIG ) { do $CONFIG }
98              
99             =head2 SUBROUTINES
100              
101             =over 2
102              
103             =item new ()
104              
105             Invokes CGI's new() command, but blesses with the local class. Also
106             performs the various local functions that are necessary.
107              
108             =cut
109              
110             sub new {
111 0     0 1 0 my $item = CGI::new(@_);
112 0         0 $$item{'NOPRINT'} = [];
113 0         0 $$item{'IFDONE'} = [];
114 0         0 $$item{'IF'} = 0;
115 0         0 bless $item, shift; $item;
  0         0  
116             }
117              
118             =item parse_shtml ( LINE [, LINE [, LINE ]] )
119              
120             Parses C as if it were an SHTML file. Returns the parsed set of
121             lines, either in an array context or as a single string suitable for
122             printing. All of the work is actually done by C.
123              
124             =cut
125              
126             sub parse_shtml {
127 0     0 1 0 my ($self, @lines) = @_;
128 0         0 map { chomp } @lines; my $line = join("\n", @lines);
  0         0  
  0         0  
129 0         0 my @parts = split m/()/s, $line;
130              
131 0         0 my @return;
132 0         0 while (@parts) {
133 0         0 my @ssi = ();
134 0   0     0 my $text = shift @parts || "";
135 0 0       0 unless ($self->_noprint) {
136 0 0 0     0 push @return, $text if defined $text && $text ne '';
137             }
138 0 0 0     0 if (scalar @parts && $parts[0] =~ /^\s*$/m) {
139 0         0 @ssi = ($1, $2); shift @parts;
  0         0  
140             }
141 0 0       0 my $ssival = $ssi[0] ? $self->ssi(@ssi) : undef;
142 0 0       0 unless ($self->_noprint) {
143 0 0 0     0 push @return, $ssival if defined $ssival && $ssival ne '';
144             }
145             }
146              
147 0         0 my $final = join("\n", @return);
148 0         0 $final;
149             }
150              
151 0     0   0 sub _ifdone { shift->_arrayset('IFDONE', @_) }
152 0     0   0 sub _noprint { shift->_arrayset('NOPRINT', @_) }
153              
154             sub _arrayset {
155 0     0   0 my ($self, $key, $val) = @_;
156 0         0 my $array = $$self{$key};
157 0         0 my $if = $$self{'IF'} - 1;
158 0 0       0 if (defined $val) { $$array[$if] = $val }
  0         0  
159 0 0       0 $$array[$if] || 0;
160             }
161              
162             =item ssi ( COMMAND, ARGS )
163              
164             Does the work of parsing an SSI statement. C is one of the
165             standard SSI "tags" - 'echo', 'include', 'fsize', 'flastmod', 'exec',
166             'set', 'config', 'odbc', 'email', 'if', 'goto', 'label', and 'break'.
167             C is a string containing the rest of the SSI command - it is parsed
168             by this function.
169              
170             Note: not all commands are implemented. In fact, all that is implemented
171             is 'echo', 'include', 'fsize', 'flastmod', 'exec', 'if/elif/else/endif',
172             and 'set'. These are all the ones that I've actually had to use to this
173             point.
174              
175             =cut
176              
177             sub ssi {
178 0     0 1 0 my ($self, $command, $args) = @_;
179 0         0 my %hash = ();
180              
181 0         0 while ($args) { # Parse $args
182 0         0 $args =~ s/^(\w+)=(\"[^\"]*\"|'.*'|\S+)\s*//;
183 0 0       0 last unless defined($1);
184 0         0 my $item = lc $1; my $val = $2;
  0         0  
185 0         0 $val =~ s/^\"|\"$//g;
186 0 0       0 $hash{$item} = $val if defined($val);
187             }
188              
189 0         0 my $orig = $self->_noprint;
190 0         0 my $if = $$self{'IF'};
191 0 0 0     0 if (lc $command eq 'if' or lc $command eq 'elif') {
    0          
    0          
192 0 0       0 if (lc $command eq 'if') { $$self{'IF'}++; $if = $$self{'IF'}; }
  0         0  
  0         0  
193 0 0       0 if ($self->_ifdone) { $self->_noprint(1); return "" }
  0         0  
  0         0  
194 0         0 my $val = _ssieval(\%hash);
195 0 0       0 if ($val) { $self->_noprint(0); $self->_ifdone(1); }
  0         0  
  0         0  
196 0         0 else { $self->_noprint(1); }
197            
198 0         0 my $noprint = $self->_noprint;
199 0         0 return "";
200              
201             } elsif (lc $command eq 'else') {
202 0 0       0 if ($self->_ifdone) { $self->_noprint(1); }
  0         0  
203 0         0 else { $self->_noprint(0); $self->_ifdone(1); }
  0         0  
204 0         0 my $noprint = $self->_noprint;
205 0         0 return "";
206              
207             } elsif (lc $command eq 'endif') {
208 0         0 my $noprint = $self->_noprint(0);
209 0         0 my $ifdone = $self->_ifdone(0);
210 0         0 $$self{'IF'}--;
211 0         0 return "";
212             }
213              
214 0 0       0 if (lc $command eq 'include') {
    0          
    0          
    0          
    0          
    0          
215 0 0       0 if ( defined $hash{'virtual'} ) { $self->_file(_vfile( $hash{'virtual'} )) }
  0 0       0  
216 0         0 elsif ( defined $hash{'file'} ) { $self->_file( $hash{'file'} ) }
217 0         0 else { return "No filename offered" };
218             } elsif (lc $command eq 'set') {
219 0   0     0 my $var = $hash{'var'} || return "No variable to set";
220 0   0     0 my $value = $hash{'value'} || "";
221 0         0 $value =~ s/\{(.*)\}/$1/g;
222 0 0       0 $value =~ s/^\$(\S+)/$ENV{$1} || $EMPTY/egx;
  0         0  
223 0         0 $ENV{$var} = $value;
224             # Should do something with "config"
225 0         0 return "";
226             } elsif (lc $command eq 'echo') {
227 0         0 $hash{'var'} =~ s/\{(.*)\}/$1/g;
228 0   0     0 return $ENV{$hash{'var'}} || $EMPTY;
229             } elsif (lc $command eq 'exec') {
230 0 0       0 if ( defined $hash{'cmd'} ) { $self->_execute( $hash{'cmd'} ) || "" }
  0 0       0  
    0          
231 0         0 elsif ( defined $hash{'cgi'} ) { $self->_execute( _vfile($hash{'cgi'}) ) }
232 0         0 else { return "No filename offered" };
233             } elsif (lc $command eq 'fsize') {
234 0 0       0 if ( defined $hash{'virtual'}) { $self->_fsize(_vfile($hash{'virtual'}))}
  0 0       0  
235 0         0 elsif ( defined $hash{'file'}) { $self->_fsize( $hash{'file'} ) }
236 0         0 else { return "No filename offered" };
237             } elsif (lc $command eq 'flastmod') {
238 0 0       0 if (defined $hash{'virtual'}) { $self->_flastmod(_vfile($hash{'virtual'}))}
  0 0       0  
239 0         0 elsif ( defined $hash{'file'}) { $self->_flastmod( $hash{'file'} ) }
240 0         0 else { return "No filename offered" };
241 0         0 } else { return "" }
242             }
243              
244             =item start_html ( TYPE, OPTIONS )
245              
246             Invokes C, and includes the appropriate header file.
247             C is passed directly into C, after being parsed
248             for the 'title' field (which is specially set). C is used to decide
249             which header file is being used; the possibilities are in
250             C<$CGI::SHTML::HEADER>.
251              
252             =cut
253              
254             sub start_html {
255 0     0 1 0 my ($self, $type, %hash) = @_;
256 0   0     0 $type = lc $type; $type ||= 'default';
  0         0  
257            
258 0         0 foreach my $key (keys %hash) {
259 0 0       0 if (lc $key eq '-title') { $ENV{'TITLE'} = $hash{$key} }
  0         0  
260             }
261            
262 0         0 my $command = "";
263              
264 0         0 return join("\n", CGI->start_html(\%hash), $self->parse_shtml($command) );
265             }
266              
267             =item end_html ( TYPE, OPTIONS )
268              
269             Loads the appropriate footer file out of C<$CGI::SHTML::FOOTER>, and invokes
270             C.
271              
272             =cut
273              
274             sub end_html {
275 0     0 1 0 my ($self, $type, %hash) = @_;
276 0   0     0 $type = lc $type; $type ||= 'default';
  0         0  
277            
278 0         0 my $command = "";
279              
280 0         0 join("\n", $self->parse_shtml($command), CGI->end_html(\%hash));
281             }
282              
283             =back
284              
285             =cut
286              
287             ###############################################################################
288             ### Internal Functions ########################################################
289             ###############################################################################
290              
291             ### _vfile ( FILENAME )
292             # Gets the virtual filename out of FILENAME, based on ROOTDIR. Also
293             # performs the substitutions in C.
294              
295             sub _vfile {
296 0   0 0   0 my $filename = shift || return undef;
297              
298             # If it starts with a '$' sign, then get the value out first
299 0 0 0     0 if ($filename =~ /^\$\{?(\S+)\}?$/) { $filename = $ENV{$1} || ""; }
  0         0  
300              
301 0   0     0 my $hostname = $ENV{'HTTP_HOST'} || $ENV{'HOSTNAME'};
302 0         0 foreach my $replace (keys %REPLACE) {
303 0 0       0 next if ($hostname =~ /^www/); # Hack
304 0         0 $filename =~ s%$replace%$REPLACE{$replace}%g;
305             }
306 0         0 my $newname;
307 0 0       0 if ($filename =~ m%^~(\w+)/(.*)$%) { $newname = "/home/$1/public_html/$2"; }
  0 0       0  
308             elsif ( $filename =~ m%^[^/]% ) {
309 0         0 my ($directory, $program) = $0 =~ m%^(.*)/(.*)$%;
310 0         0 $newname = "$directory/$filename"
311             }
312 0         0 else { $newname = "$ROOTDIR/$filename" }
313 0         0 $newname =~ s%/+%/%g; # Remove doubled-up /'s
314 0         0 $newname;
315             }
316              
317             ## _file( FILE )
318             # Open a file and parse it with parse_shtml().
319             sub _file {
320 0     0   0 my ($self, $file) = @_;
321 0 0 0     0 open( FILE, "<$file" ) or warn "Couldn't open $file: $!\n" && return "";
322 0         0 my @list = ;
323 0         0 close (FILE);
324 0         0 map { chomp } @list;
  0         0  
325 0         0 return $self->parse_shtml(@list);
326             }
327              
328             ## _execute( CMD )
329             # Run a command and get the information about it out. This isn't as
330             # secure as we'd like it to be...
331             sub _execute {
332 0     0   0 my ($self, $cmd) = @_;
333 0         0 foreach (qw( IFS CDPATH ENV BASH_ENV PATH ) ) { $ENV{$_} = ""; }
  0         0  
334 0         0 my ($command) = $cmd =~ /^(.*)$/; # Not particularly secure
335 0 0       0 open ( COMMAND, "$command |" ) or warn "Couldn't open $command\n";
336 0         0 my @list = ;
337 0         0 close (COMMAND);
338 0         0 map { chomp } @list;
  0         0  
339 0 0       0 return "" unless scalar(@list) > 0; # Didn't return anything
340             # Take out the "Content-type:" part, if it's a CGI - note, THIS IS A HACK
341 0 0 0     0 if ( scalar(@list) > 1 && $list[0] =~ /^Content-type: (.*)$/i) {
342 0         0 shift @list; shift @list;
  0         0  
343             }
344 0 0       0 wantarray ? @list : join("\n", @list);
345             }
346              
347             ## _flastmod( FILE )
348             ## _fsize( FILE )
349             # Last modification and file size of the given FILE, respectively.
350 1   50 1   44 sub _flastmod { localtime( (stat($_[1]))[9] || 0 ); }
351             sub _fsize {
352 0   0 0     my $size = ((stat($_[1]))[7]) || 0;
353 0 0         if ($size >= 1048576) {
    0          
354 0           sprintf("%4.1fMB", $size / 1048576);
355             } elsif ($size >= 1024) {
356 0           sprintf("%4.1fKB", $size / 1024);
357             } else {
358 0           sprintf("%4d bytes", $size);
359             }
360             }
361              
362             ## _ssieval( HASHREF )
363             # Evaluates the expression with 'var' or 'expr'. Meant for use with
364             # if/elif clauses. This actually more-or-less works! It's also very
365             # dangerous, though, since it uses 'eval'. Then again, given that we're
366             # already giving the user the capacity to invoke random pieces of code,
367             # it's not realy that much of a stretch...
368             sub _ssieval {
369 0     0     my $hash = shift;
370 0 0         if (my $var = $$hash{'var'}) { return $var ? 1 : 0 }
  0 0          
371 0 0         if (my $eval = $$hash{'expr'}) {
372 0           $eval =~ s/\s*\$(?:\{(\S+?)\}|(\S+?))\s*
373 0   0       / join('', "'", $ENV{$1 || $2} || "", "'" ) /egx;
374 0           my $val = eval($eval);
375 0 0         return $val ? 1 : 0; # Need to do more here.
376             }
377             0
378 0           }
379              
380             1;
381              
382             ###############################################################################
383             ### Further Documentation #####################################################
384             ###############################################################################
385              
386             =head1 NOTES
387              
388             This module was generated for a single research group at UIUC. Its goal
389             was simple: parse the SSI header and footers that were being used for the
390             rest of the web site, so that they wouldn't have to be re-implemented
391             later. Ideally, we would liked to just have Apache take care of this, but
392             it wasn't an option at the time (and as far as I know it still isn't one.)
393              
394             I mention the above because it's worth understanding the problem before
395             you think about its limitations. This script will not offer particularly
396             high performance for reasonably-sized sites that use a lot of CGI; I doubt
397             it would work at all well with mod_perl, for instance. But it has done
398             the job just fine for our research group, however; and if you want to copy
399             our general website layout, you're going to need something like this to
400             help you out.
401              
402             Also of note is that this has been designed for use so that if headers and
403             footers are not being included, you can generally fall back to the default
404             CGI.pm fairly easily enough.
405              
406             Also of note are the security issues. There are lots of ways for the user
407             to run arbitrary code with this module; however, there were already plenty
408             of ways for them to do it if you're giving them unfettered SSI access.
409             This isn't a change. So make sure that the user that your webserver runs
410             as isn't a particularly priveleged user, and *never* run code through this
411             that came from the outside! You would be a fool to do otherwise.
412              
413             =head1 SEE ALSO
414              
415             C
416              
417             =head1 TODO
418              
419             There are still a few functions that should be better implemented (format
420             strings for flastmod(), for instance). It might be nice to make this more
421             object-oriented as well; as it stands this wouldn't stand a chance with
422             mod_perl.
423              
424             =head1 AUTHOR
425              
426             Tim Skirvin
427              
428             =head1 HOMEPAGE
429              
430             B
431              
432             =head1 LICENSE
433              
434             This code is distributed under the University of Illinois Open Source
435             License. See
436             B for
437             details.
438              
439             =head1 COPYRIGHT
440              
441             Copyright 2000-2004 by the University of Illinois Board of Trustees and
442             Tim Skirvin .
443              
444             =cut
445              
446             ###############################################################################
447             ### Version History ###########################################################
448             ###############################################################################
449             # v1.0 Thu Apr 13 13:30:30 CDT 2000
450             ### Documented it, and put this module into its proper home.
451             # v1.1 Thu Apr 20 09:25:28 CDT 2000
452             ### Updated for new page layout, included better counter capabilities, and
453             ### put in the possiblity of hooks for when we need to update this for all
454             ### of the web pages.
455             # v1.11 Thu Apr 20 13:48:28 CDT 2000
456             ### Further updates, added NOCOUNTER flag for error messages
457             # v1.12 Tue Apr 25 13:28:15 CDT 2000
458             ### More updates of the header/footer files
459             # v1.2 Tue Jun 13 09:42:11 CDT 2000
460             ### Now just parses the header/footer files from the main directory, and
461             ### includes a "parse_shtml" function set. Hopefully at some point I'll
462             ### finish off parse_shtml to do all SSI functions.
463             # v1.21 Wed Jun 28 10:56:26 CDT 2000
464             ### Fixed the CGI handlings to trim out the Content-type header.
465             # v1.22 Wed Oct 31 09:46:16 CST 2001
466             ### Fixed _vfile() to do local directory checks properly.
467             ### Changed execute() behaviour to not worry about tainting - probably a
468             ### bad idea, but necessary for now.
469             # v1.23 Mon Dec 10 11:58:25 CST 2001
470             ### Created $EMPTY. Updated 'set' to use variables in its code.
471             # v1.24 Tue Apr 2 13:05:12 CST 2002
472             ### Changed parse_shtml() to remove a warning
473             # v1.25 Tue Mar 11 10:47:36 CST 2003
474             ### Updated to be a more generic name - CGI::SHTML. This will make things
475             ### a lot easier to distribute. Have to make a real package now. Eliminated
476             ### the COUNTER stuff, because it's not in use and was silly anyway. Put
477             ### in 'default' values in the headers/footers
478             # v1.26 Thu Apr 22 15:00:51 CDT 2004
479             ### Making fsize(), flastmod(), etc into internal functions.
480             # v1.26.01 Thu Apr 22 23:32:57 CDT 2004
481             ### Forgot to turn off some debugging information.
482             # v1.27 Thu May 06 10:52:32 CDT 2004
483             ### Added if/elif/else/endif functionality. This was challenging.
484             ### Documentation chanes came with it.
485             # v1.28 Mon May 17 15:15:22 CDT 2004
486             ### Put back old environment variables after an execute.
487             # v1.28 Wed May 19 11:37:06 CDT 2004
488             ### Parsing information is accurate again with parse_shtml - doesn't lose
489             ### newlines. Setting blank versions of those environment variables.