File Coverage

blib/lib/CGI/Struct.pm
Criterion Covered Total %
statement 69 71 97.1
branch 43 48 89.5
condition 39 42 92.8
subroutine 5 5 100.0
pod 1 1 100.0
total 157 167 94.0


line stmt bran cond sub pod time code
1             package CGI::Struct;
2              
3 15     15   412297 use strict;
  15         37  
  15         690  
4 15     15   83 use warnings;
  15         32  
  15         2875  
5              
6             =head1 NAME
7              
8             CGI::Struct - Build structures from CGI data
9              
10             =head1 VERSION
11              
12             Version 1.21
13              
14             =cut
15              
16             our $VERSION = '1.21';
17              
18              
19             =head1 SYNOPSIS
20              
21             This module allows transforming CGI GET/POST data into intricate data
22             structures. It is reminiscent of PHP's building arrays from form data,
23             but with a perl twist.
24              
25             use CGI;
26             use CGI::Struct;
27             my $cgi = CGI->new;
28             my %params = $cgi->Vars;
29             my $struct = build_cgi_struct \%params;
30              
31             =head1 DESCRIPTION
32              
33             CGI::Struct lets you transform CGI data keys that I<look like> perl data
34             structures into I<actual> perl data structures.
35              
36             CGI::Struct makes no attempt to actually I<read in> the variables from
37             the request. You should be using L<CGI> or some equivalent for that.
38             CGI::Struct expects to be handed a reference to a hash containing all the
39             keys/values you care about. The common way is to use something like
40             C<CGI-E<gt>Vars> or (as the author does)
41             C<Plack::Request-E<gt>parameters-E<gt>mixed>.
42              
43             Whatever you use should give you a hash mapping the request variable
44             names (keys) to the values sent in by the users (values). Any of the
45             major CGIish modules will have such a method; consult the documentation
46             for yours if you don't know it offhand.
47              
48             Of course, this isn't necessarily tied strictly to CGI; you I<could> use
49             it to build data structures from any other source with similar syntax.
50             All CGI::Struct does is take one hash (reference) and turn it into
51             another hash (reference). However, it's aimed at CGI uses, so it may or
52             may not work for something else.
53              
54              
55             =head1 EXAMPLES
56              
57             =head2 Basic Usage
58              
59             <form action="request.cgi">
60             Name: <input type="text" name="uinfo{name}">
61             Address: <input type="text" name="uinfo{addr}">
62             Email: <input type="text" name="uinfo{email}">
63             </form>
64              
65             When filled out and submitted the data will come in to request.cgi, which
66             will use something like C<CGI-E<gt>Vars> to parse it out into a hash
67              
68             use CGI;
69             my $cgi = CGI->new;
70             my %params = $cgi->Vars;
71              
72             You'll wind up with something like
73              
74             %params = (
75             'uinfo{name}' => 'Bob',
76             'uinfo{addr}' => '123 Main Street',
77             'uinfo{email}' => 'bob@bob.bob',
78             )
79              
80             Now we use CGI::Struct to parse that out
81              
82             use CGI::Struct;
83             my $struct = build_cgi_struct \%params;
84              
85             and we wind up with a structure that looks more like
86              
87             $struct = {
88             'uinfo' => {
89             name => 'Bob',
90             addr => '123 Main Street',
91             email => 'bob@bob.bob',
92             }
93             }
94              
95             which is much simpler to use in your code.
96              
97             =head2 Arrays
98              
99             CGI::Struct also has the ability to build out arrays.
100              
101             First cousin: <input type="text" name="cousins[0]">
102             Second cousin: <input type="text" name="cousins[1]">
103             Third cousin: <input type="text" name="cousins[2]">
104              
105             Run it through CGI to get the parameters, run through
106             L</build_cgi_struct>, and we get
107              
108             $struct = {
109             'cousins' => [
110             'Jill',
111             'Joe',
112             'Judy'
113             ]
114             }
115              
116             Of course, most CGIish modules will roll that up into an array if you
117             just call it 'cousins' and have multiple inputs. But this lets you
118             specify the indices. For instance, you may want to base the array from 1
119             instead of 0:
120              
121             First cousin: <input type="text" name="cousins[1]">
122             Second cousin: <input type="text" name="cousins[2]">
123             Third cousin: <input type="text" name="cousins[3]">
124              
125             $struct = {
126             'cousins' => [
127             undef,
128             'Jill',
129             'Joe',
130             'Judy'
131             ]
132             }
133              
134             See also the L</Auto-arrays> section.
135              
136             =head3 NULL delimited multiple values
137              
138             When using L<CGI>'s C<-E<gt>Vars> and similar, multiple passed values
139             will wind up as a C<\0>-delimited string, rather than an array ref. By
140             default, CGI::Struct will split it out into an array ref. This behavior
141             can by disabled by using the C<nullsplit> config param; see the
142             L<function doc below|/build_cgi_struct>.
143              
144             =head2 Deeper and deeper
145              
146             Specifying arrays explicitly is also useful when building arbitrarily
147             deep structures, since the array doesn't have to be at the end
148              
149             <select name="users{bob}{cousins}[5]{firstname}">
150              
151             After a quick trip through L</build_cgi_struct>, that'll turn into
152             C<$struct-E<gt>{users}{bob}{cousins}[5]{firstname}> just like you'd expect.
153              
154             =head2 Dotted hashes
155              
156             Also supported is dot notation for hash keys. This saves you a few
157             keystrokes, and can look neater. Hashes may be specified with either
158             the C<.> or with C<{}>. Arrays can only be written with C<[]>.
159              
160             The above C<select> could be written using dots for some or all of the
161             hash keys instead, looking a little Javascript-ish
162              
163             <select name="users.bob.cousins[5].firstname">
164             <select name="users.bob{cousins}[5].firstname">
165             <select name="users{bob}.cousins[5]{firstname}">
166              
167             of course, you wouldn't really want to mix-and-match in one field in
168             practice; it just looks silly.
169              
170             Sometimes, though, you may want to have dots in field names, and you
171             wouldn't want this parsing to happen then. It can be disabled for a run
172             of L</build_cgi_struct> by passing a config param in; see the L<function
173             doc below|/build_cgi_struct>.
174              
175             =head2 Auto-arrays
176              
177             CGI::Struct also builds 'auto-arrays', which is to say it turns
178             parameters ending with an empty C<[]> into arrays and pushes things onto
179             them.
180              
181             <select multiple="multiple" name="users[]">
182              
183             turns into
184              
185             $struct->{users} = ['lots', 'of', 'choices'];
186              
187             This may seem unnecessary, given the ability of most CGI modules to
188             already build the array just by having multiple C<users> params given.
189             Also, since L</build_cgi_struct> only sees the data after your CGI module
190             has already parsed it out, it will only ever see a single key in its
191             input hash for any name anyway, since hashes can't have multiple keys
192             with the same name anyway.
193              
194             However, there are a few uses for it. PHP does this, so it makes for an
195             easier transition. Also, it forces an array, so if you only chose one
196             entry in the list, L</build_cgi_struct> would still make that element in
197             the structure a (single-element) array
198              
199             $struct->{users} = ['one choice'];
200              
201             which makes your code a bit simpler, since you don't have to expect both
202             a scalar and an array in that place (though of course you should make
203             sure it's what you expect for robustness).
204              
205              
206             =head1 FUNCTIONS
207              
208             =cut
209              
210              
211             # Delimiters/groupers
212             my $delims = "[{.";
213              
214             # Tuple types for each delim
215             my %dtypes = ( '[' => 'array', '{' => 'hash', '.' => 'hash' );
216              
217             # Correponding ending groups
218             my %dcorr = ( '[' => ']', '{' => '}', '.' => undef );
219              
220             # Yeah, export it
221             require Exporter;
222             our @ISA = qw(Exporter);
223             our @EXPORT = qw(build_cgi_struct);
224              
225 15     15   17738 use Storable qw(dclone);
  15         57043  
  15         16857  
226              
227              
228              
229              
230              
231             =head2 build_cgi_struct
232              
233             $struct = build_cgi_struct \%params;
234              
235             $struct = build_cgi_struct \%params, \@errs;
236              
237             $struct = build_cgi_struct \%params, \@errs, \%conf;
238              
239             C<build_cgi_struct()> is the only function provided by this module. It
240             takes as an argument a reference to a hash of parameter name keys and
241             parameter value values. It returns a reference to a hash with the fully
242             built up structure. Any keys that can't be figured out are not present
243             in the returned hash.
244              
245             An optional array reference can be passed as the second argument, in
246             which case the array will be filled in with any warnings or errors found
247             in trying to build the structure. This should be taken as a debugging
248             tool for the developer's eyes to parse, not a source of friendly-looking
249             warnings to hand to non-technical users or as strongly formatted strings
250             for automated error mining.
251              
252             A hash reference may be supplied as a third argument for passing config
253             parameters. The currently supported parameters are:
254              
255             =over
256              
257             =item nodot
258              
259             This allows you to disable processing of C<.> as a hash element
260             separator. There may be cases where you want a C<.> as part of a field
261             name, so this lets you still use C<{}> and C<[]> structure in those
262             cases.
263              
264             The default is B<false> (i.e., I<do> use C<.> as separator). Pass a true
265             value (like C<1>) to B<not> do so.
266              
267             =item nullsplit
268              
269             C<CGI-E<gt>Vars> and compatible functions tend to, in hash form, wind up
270             with a NULL-delimited list rather than an array ref when passed multiple
271             values with the same key. CGI::Struct will check string values for
272             embedded C<\0>'s and, if found, C<split> the string on them and create an
273             arrayref.
274              
275             The C<nullsplit> config param lets you disable this if you want strings
276             with embedded C<\0> to pass through unmolested. Pass a false value (like
277             C<0>) to disable the splitting.
278              
279             =item dclone
280              
281             By default, CGI::Struct uses L<Storable>'s C<dclone> to do deep copies of
282             incoming data structures. This ensures that whatever changes you might
283             make to C<$struct> later on don't change stuff in C<%params> too. By
284             setting dclone to a B<false> value (like C<0>) you can disable this, and
285             make it so deeper refs in the data structures point to the same items.
286              
287             You probably don't want to do this, unless some data is so huge you don't
288             want to keep 2 copies around, or you really I<do> want to edit the
289             original C<%params> for some reason.
290              
291             =back
292              
293             =cut
294              
295             sub build_cgi_struct
296             {
297 18     18 1 20297 my ($iv, $errs, $conf) = @_;
298              
299 18         35 my (%ret, @errs);
300              
301             # Allow disabling '.'
302 18         46 my $delims = $delims;
303 18 100 100     707 $delims =~ s/\.// if($conf && $conf->{nodot});
304              
305             # nullsplit defaults on
306 18 100       118 $conf->{nullsplit} = 1 unless exists $conf->{nullsplit};
307              
308             # So does deep cloning
309 18 100       755 $conf->{dclone} = 1 unless exists $conf->{dclone};
310 18 50   3   100 my $dclone = sub { @_ > 1 ? @_ : $_[0] };
  3         9  
311 18 100       88 $dclone = \&dclone if $conf->{dclone};
312              
313             # Loop over keys, one at a time.
314 18         140 DKEYS: for my $k (keys %$iv)
315             {
316             # Shortcut; if it doesn't contain any special chars, just assign
317             # to the output and go back around.
318 105 100       1476 unless( $k =~ /[$delims]/)
319             {
320 13 100       238 my $nval = ref $iv->{$k} ? $dclone->($iv->{$k}) : $iv->{$k};
321 13 100 100     111 $nval = [split /\0/, $nval]
      100        
322             if($conf->{nullsplit} && ref($nval) eq ''
323             && $nval =~ /\0/);
324 13         25 $ret{$k} = $nval;
325 13         32 next;
326             }
327              
328             # Bomb if it starts with a special
329 92 100       416 if($k =~ /^[$delims]/)
330             {
331 1         3 push @errs, "Bad key; unexpected initial char in $k";
332 1         19 next;
333             }
334              
335             # Break it up into the pieces. Use the capture in split's
336             # pattern so we get the bits it matched, so we can differentiate
337             # between hashes and arrays.
338 91         605 my @kps = split /([$delims])/, $k;
339              
340             # The first of that is our top-level key. Use that to initialize
341             # our pointer to walk down the structure.
342             # $p remains a reference to a reference all the way down the
343             # walk. That's necessary; if we just make it a single reference,
344             # then it couldn't be used to replace a level as necessary (e.g.,
345             # from undef to [] or {} when we initialize).
346 91         127 my $p;
347             {
348 91         99 my $topname = shift @kps;
  91         133  
349              
350             # Make sure the key exists, then ref at it.
351 91   100     1412 $ret{$topname} ||= undef;
352              
353             # A reference to a reference
354 91         155 $p = \$ret{$topname};
355             }
356              
357             # Flag for autoarr'ing the value
358 91         128 my $autoarr = 0;
359              
360             # Now walk over the rest of the pieces and create the structure
361             # all the way down
362 91         618 my $i = 0;
363 91         1240 while($i <= $#kps)
364             {
365             # First bit should be a special
366 182 50 33     1914 if(length($kps[$i]) != 1 || $kps[$i] !~ /^[$delims]$/)
367             {
368             # This should only be possible via internal error. If
369             # deliminters aren't properly matched anywhere along the
370             # way, we _could_ end up with a case where the
371             # even-numbered items here aren't valid openers, but if
372             # that's the case then some error will have already
373             # triggered about the mismatch.
374 0         0 die "Internal error: Bad type $kps[$i] found at $i for $k";
375             }
376              
377             # OK, pull out that delimiter, and the name of the piece
378 182         784 my $sdel = $kps[$i++];
379 182         834 my $sname = $kps[$i++];
380              
381             # The name should end with the corresponding ender...
382 182 100 100     804 if($dcorr{$sdel} && $dcorr{$sdel} ne substr($sname, -1))
383             {
384 5         18 push @errs, "Didn't find ender for ${sdel} in $sname for $k";
385 5         31 next DKEYS;
386             }
387             # ... and remove it, leaving just the name
388 177 100       401 chop $sname if $dcorr{$sdel};
389              
390             # Better be >0 chars...
391 177 100 100     669 unless(defined($sname) && length $sname)
392             {
393             # Special case: if this is the last bit, and it's an
394             # array, then we do the auto-array stuff.
395 7 100 100     33 if($i > $#kps && $dtypes{$sdel} eq "array")
396             {
397 3         4 $autoarr = 1;
398 3         4 last;
399             }
400              
401             # Otherwise a 0-length label is an error.
402 4         10 push @errs, "Zero-length name element found in $k";
403 4         15 next DKEYS;
404             }
405              
406             # If it's an array, better be a number
407 170 100 100     651 if($dtypes{$sdel} eq "array" && $sname !~ /^\d+$/)
408             {
409 2         8 push @errs, "Array subscript should be a number, "
410             . "not $sname in $k";
411 2         7 next DKEYS;
412             }
413              
414              
415             # Now we know the type, so fill in that level of the
416             # structure
417 168         217 my $stype = $dtypes{$sdel};
418              
419             # Initialize if necessary.
420 168 100       330 if($stype eq "array")
    50          
421 70   100     196 { ($$p) ||= [] }
422             elsif($stype eq "hash")
423 98   100     527 { ($$p) ||= {} }
424             else
425 0         0 { die "Internal error: unknown type $stype in $k" }
426              
427             # Check type
428 168 100       429 unless(ref($$p) eq uc($stype))
429             {
430 1         6 push @errs, "Type mismatch: already have " . ref($$p)
431             . ", expecting $stype for $sname in $k";
432             # Give up on this key totally; who knows what to do
433 1         4 next DKEYS;
434             }
435              
436             # Set. Move our pointer down a step, and loop back around to
437             # the next component in this path
438 167 100       360 if($stype eq "array")
    50          
439 69         270 { $p = \($$p)->[$sname] }
440             elsif($stype eq "hash")
441 98         405 { $p = \($$p)->{$sname} }
442              
443             # And back around
444             }
445              
446              
447             # OK, we're now all the way to the bottom, and $p is a reference
448             # to that last step in the structure. Fill in the value ($p
449             # becomes a reference to a reference to that value).
450             # Special case: for autoarrays, we make sure the value ends up
451             # being a single-element array rather than a scalar, if it isn't
452             # already an array.
453 79 100       342 my $nval = ref $iv->{$k} ? $dclone->($iv->{$k}) : $iv->{$k};
454 79 100 100     564 $nval = [split /\0/, $nval]
      100        
455             if($conf->{nullsplit} && ref($nval) eq '' && $nval =~ /\0/);
456 79 100 66     252 if($autoarr && $nval && ref($nval) ne 'ARRAY')
      100        
457 1         4 { $$p = [$nval]; }
458             else
459 78         245 { $$p = $nval; }
460              
461             # And around to the next key
462             }
463              
464              
465             # If they asked for error details, give it to 'em
466 18 50       223 push @$errs, @errs if $errs;
467              
468             # Done!
469 18         85 return \%ret;
470             }
471              
472             =head1 SEE ALSO
473              
474             L<CGI>, L<CGI::Simple>, L<CGI::Minimal>, L<Plack>, and many other choices
475             for handling transforming a browser's request info a data structure
476             suitable for parsing.
477              
478             L<CGI::State> is somewhat similar to CGI::Struct, but is extremely
479             tightly coupled to L<CGI> and doesn't have as much flexibility in the
480             structures it can build.
481              
482             L<CGI::Expand> also does similar things, but is more closely tied to
483             L<CGI> or a near-equivalent. It tries to DWIM hashes and arrays using
484             only a single separator.
485              
486             The structure building done here is a perlish equivalent to the structure
487             building PHP does with passed-in parameters.
488              
489             =head1 AUTHOR
490              
491             Matthew Fuller, C<< <fullermd@over-yonder.net> >>
492              
493             =head1 BUGS
494              
495             Please report any bugs or feature requests to C<bug-cgi-struct at
496             rt.cpan.org>, or through the web interface at
497             L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=CGI-Struct>. I will be
498             notified, and then you'll automatically be notified of progress on your
499             bug as I make changes.
500              
501             =head1 SUPPORTED VERSIONS
502              
503             CGI::Struct should work on perl 5.6 and later. It includes a
504             comprehensive test suite, so passing that should be an indicator that it
505             works. If that's not the case, I want to hear about it so the testing
506             can be improved!
507              
508             =head1 SUPPORT
509              
510             You can find documentation for this module with the perldoc command.
511              
512             perldoc CGI::Struct
513              
514              
515             You can also look for information at:
516              
517             =over 4
518              
519             =item * RT: CPAN's request tracker
520              
521             L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=CGI-Struct>
522              
523             =item * AnnoCPAN: Annotated CPAN documentation
524              
525             L<http://annocpan.org/dist/CGI-Struct>
526              
527             =item * CPAN Ratings
528              
529             L<http://cpanratings.perl.org/d/CGI-Struct>
530              
531             =item * Search CPAN
532              
533             L<http://search.cpan.org/dist/CGI-Struct/>
534              
535             =back
536              
537              
538             =head1 LICENSE AND COPYRIGHT
539              
540             Copyright 2010-2012 Matthew Fuller.
541              
542             This software is licensed under the 2-clause BSD license. See the
543             LICENSE file in the distribution for details.
544              
545             =cut
546              
547             1; # End of CGI::Struct