File Coverage

blib/lib/Hash/Type.pm
Criterion Covered Total %
statement 100 102 98.0
branch 41 54 75.9
condition 4 6 66.6
subroutine 16 16 100.0
pod 4 4 100.0
total 165 182 90.6


line stmt bran cond sub pod time code
1             package Hash::Type;
2              
3 3     3   83280 use strict;
  3         6  
  3         174  
4 3     3   16 use warnings;
  3         3  
  3         83  
5 3     3   16 use Carp;
  3         15  
  3         5759  
6              
7             our $VERSION = "1.09";
8              
9             =head1 NAME
10              
11             Hash::Type - pseudo-hashes as arrays tied to a "type" (list of fields)
12              
13             =head1 SYNOPSIS
14              
15             use Hash::Type;
16              
17             # create a Hash::Type
18             my $personType = new Hash::Type(qw(firstname lastname city));
19              
20             # create and populate some hashes tied to $personType
21             tie %wolfgang, $personType, "wolfgang amadeus", "mozart", "salzburg";
22             $ludwig = new $personType ("ludwig", "van beethoven", "vienna");
23             $jsb = new $personType;
24             $jsb->{city} = "leipzig";
25             @{$jsb}{qw(firstname lastname)} = ("johann sebastian", "bach");
26              
27             # add fields dynamically
28             $personType->add("birth", "death") or die "fields not added";
29             $wolfgang{birth} = 1750;
30              
31             # More complete example : read a flat file with headers on first line
32             my ($headerline, @datalines) = map {chomp; $_} ;
33             my $ht = new Hash::Type(split /\t/, $headerline);
34             foreach my $line (@datalines) {
35             my $data = new $ht(split /\t/, $line);
36             work_with($data->{someField}, $data->{someOtherField});
37             }
38              
39             # an alternative to Time::gmtime and Time::localtime
40             my $timeType = new Hash::Type qw(sec min hour mday mon year wday yday);
41             my $localtime = new $timeType (localtime);
42             my $gmtime = new $timeType (gmtime);
43             print $localtime->{hour} - $gmtime->{hour}, " hours difference to GMT";
44              
45             # comparison functions
46             my $byAge = $personType->cmp("birth : -num, lastname, firstname");
47             my $byNameLength = $personType->cmp(lastname => {length($b) <=> length($a)},
48             lastname => 'alpha',
49             firstname => 'alpha');
50             showPerson($_) foreach (sort $byAge @people);
51             showPerson($_) foreach (sort $byNameLength @people);
52              
53             # special comparisons : dates
54             my $US_DateCmp = $myHashType->cmp("someDateField : m/d/y");
55             my $FR_InverseDateCmp = $myHashType->cmp("someDateField : -d.m.y");
56              
57             =head1 DESCRIPTION
58              
59             A Hash::Type is a collection of field names.
60             Internally, an index is associated with each name.
61             Such collections are created dynamically and can be extended.
62             They are used to build tied hashes, either through C
63             or through object-oriented method calls; such tied hashes :
64              
65             =over
66              
67             =item *
68              
69             are 'restricted' (will only accept operations on names previously
70             declared in their Hash::Type)
71              
72             =item *
73              
74             are implemented internally as arrays (so they use less memory)
75              
76             =item *
77              
78             can be sorted efficiently through comparison functions generated
79             and compiled by the class
80              
81             =back
82              
83             The 'pseudo-hashes' in core Perl were very similar, but they
84             are deprecated starting from Perl 5.8.0. More on comparison
85             with other packages in section L
86              
87             =head1 METHODS
88              
89             =over
90              
91             =item C<$myType = new Hash::Type(@names)>
92              
93             Creates a new object which holds a collection of names and associated indices
94             (technically, this is a hash reference blessed in package Hash::Type).
95             This object can then be used to generate tied hashes.
96             The list of C<@names> is optional ; names can be added later through
97             method C.
98              
99              
100             =item C<$h = new $myType(@vals)>
101              
102             Creates a new tied hash associated to package Hash::Type and
103             containing a reference to $myType (technically, this is an array
104             reference, tied to package Hash::Type).
105              
106             The other way to create a tied hash is through the C syntax :
107              
108             tie %h, $myType, @vals;
109              
110             Access to C<$h{name}> is equivalent to writing
111              
112             tied(%h)->[$myType->{name}]
113              
114             C<$h{'Hash::Type'}> is a special, predefined name that gives back the object
115             to which this hash is tied (you may need it for example to generate a
116             comparison function, see below).
117              
118             The operation C is forbidden.
119             To delete a value, you have to go to the underlying array :
120              
121             delete tied(%h)->[$myType->{name}];
122              
123             =cut
124              
125              
126             sub new {
127 25     25 1 6508 my $class = shift;
128              
129 25 100       66 if (ref($class)) { # $class is an object, create a new tied hash from it
130 21         30 my %h;
131 21         78 tie %h, $class , @_;
132 21         83 return \%h;
133             }
134             else { # create a new Hash::Type object
135 4         9 my $self = {};
136 4         14 CORE::bless $self, $class;
137 4         21 $self->add(@_); # add indices for fields given in @_
138 4         12 return $self;
139             }
140             }
141              
142              
143             # tied hash implementation
144              
145 22     22   980 sub TIEHASH { CORE::bless [@_] }
146 8 100   8   1917 sub STORE { my $ix = $_[0]->[0]{$_[1]} or
147             croak "can't STORE, key '$_[1]' was never added to this Hash::Type";
148 6         23 $_[0]->[$ix] = $_[2]; }
149              
150             # FETCH : must be an lvalue because may be used in $h{field} =~ s/.../../;
151             # And since lvalues cannot use "return" (cf. L), we
152             # must write intricate ternary ifs -- not nice to read !
153              
154             sub FETCH : lvalue {
155 252     252   4260 my $ix = $_[0]->[0]{$_[1]};
156 252 50       1159 $_[1] eq 'Hash::Type' ? $_[0]->[0]
    100          
157             : $ix ? $_[0]->[$ix]
158             : undef;
159             }
160              
161 2     2   712 sub FIRSTKEY { my $a = scalar keys %{$_[0]->[0]}; each %{$_[0]->[0]} }
  2         8  
  2         3  
  2         12  
162 8     8   11 sub NEXTKEY { each %{$_[0]->[0]} }
  8         24  
163 8     8   1134 sub EXISTS { exists $_[0]->[0]{$_[1]} }
164 1     1   595 sub DELETE { croak "DELETE forbidden on hash tied to " . __PACKAGE__; }
165 1     1   553 sub CLEAR { delete @{$_[0]}[1 .. $#{$_[0]}] }
  1         6  
  1         3  
166              
167              
168              
169             =item C<$myType-Eadd(@newNames)>
170              
171             Adds @newNames in $myType and gives them new indices.
172             Does nothing for names that were already present.
173             Returns the number of names actually added.
174              
175             You can also dynamically remove names by writing
176             C{name}> ; however, this merely
177             masks access to {name} for all hashes tied to $myType,
178             so the values are still present in the underlying arrays and
179             you will not gain any memory by doing this.
180              
181             After deleting C<{name}>, you can again call
182             C<$myType-Eadd('name')>, but this will allocate a new index,
183             and not recover the previous one allocated to that key.
184              
185              
186             =cut
187              
188             sub add {
189 5     5 1 6360 my $self = shift;
190              
191             # find the highed current index (cannot just take scalar(keys %$self)
192             # because some keys might have been deleted in the meantime
193 5         10 my $max = 0;
194 5 100       34 foreach (values %$self) { $max = $_ if $_ > $max; }
  3         12  
195              
196 5         10 my $ix = $max;
197 5 100       14 foreach (@_) { $self->{$_} = ++$ix unless exists $self->{$_}; }
  24         83  
198 5         17 return $ix - $max;
199             }
200              
201              
202              
203             =item C<$myType-Enames>
204              
205             Returns the list of defined names, in index order
206             (which might be different from (keys %$myType)).
207              
208             =cut
209              
210 1     1 1 558 sub names { sort {$_[0]->{$a} <=> $_[0]->{$b} } keys %{$_[0]} }
  8         18  
  1         7  
211              
212              
213             =item C<$cmp = $myType-Ecmp("f1 : cmp1, f2 : cmp2 , ...")>
214              
215             Returns a reference to an anonymous sub which successively compares
216             the given field names, applying the given operators,
217             and returns a positive, negative or zero value.
218             This sub can then be fed to C. 'f1', 'f2', etc are field names,
219             'cmp1', 'cmp2' are comparison operators written as :
220              
221             [+|-] [alpha|num|cmp|<=>|d.m.y|d/m/y|y-m-d|...]
222              
223             The sign is '+' for ascending order, '-' for descending; default is '+'.
224             Operator 'alpha' is synonym to 'cmp' and 'num' is synonym to '<=>';
225             operators 'd.m.y', 'd/m/y', etc. are for dates in various
226             formats; default is 'alpha'.
227              
228             If all you want is alphabetic ascending order,
229             just write the field names :
230              
231             $cmp = $personType->cmp('lastname', 'firstname');
232              
233             B : C will not accept something like
234              
235             sort $personType->cmp('lastname', 'firstname') @people;
236              
237             so you I store it in a variable first :
238              
239             my $cmp = $personType->cmp('lastname', 'firstname');
240             sort $cmp @people;
241              
242             For date comparisons, values are parsed into day/month/year, according
243             to the shape specified (for example 'd.m.y') will take '.' as
244             a separator. Day, month or year need not be several digits,
245             so '1.1.1' will be interpreted as '01.01.2001'. Years of 2 or 1 digits
246             are mapped to 2000 or 1900, with pivot at 33 (so 32 becomes 2032 and
247             33 becomes 1933).
248              
249             =item C<$cmp = $myType-Ecmp(f1 =E cmp1, f2 =E cmp2, ...)>
250              
251             This second syntax, with pairs of field names and operators,
252             is a bit more verbose but gives you more flexibility,
253             as you can write your own
254             comparison functions using C<$a> and C<$b> :
255              
256             my $byNameLength = $personType->cmp(lastname => {length($b) <=> length($a)},
257             lastname => 'alpha',
258             firstname => 'alpha');
259              
260             B : the resulting closure is bound to
261             special variables C<$a> and <$b>. Since those
262             are different in each package, you cannot
263             pass the comparison function to another
264             package : the call to C has to be done here.
265              
266              
267              
268             =back
269              
270             =cut
271              
272             sub cmp {
273 11     11 1 6305 my $self = shift;
274              
275 11 50       36 croak "cmp : no cmp args" if not @_;
276              
277 11 100       89 if (@_ == 1) { # first syntax, all in one string
278 10         39 my @fields = split /,/, shift;
279 10         20 foreach (@fields) {
280 18 50       124 m[^\s*(\S.*?)\s*(?::([^:]+))?$] or croak "bad cmp op : $_";
281 18         94 push @_, $1, $2; # feed back to @_ as arguments to second syntax
282             }
283             }
284              
285             # $a and $b are different in each package, so must refer to the caller's
286 11         27 my $caller = caller;
287 11         39 my ($a, $b) = ("\$${caller}::a", "\$${caller}::b");
288              
289 11         17 my @cmp; # holds code for each comparison to perform
290             my @callerSub; # references to comparison subs given by caller
291             # (must copy them from @_ into a lexical in order to
292             # build a proper closure)
293 0         0 my $regex; # used only for date comparisons, see below
294              
295 11         42 for (my $i = 0; $i < @_; $i += 2) {
296 21 50       64 my $ix = $self->{$_[$i]} or croak "can't do cmp on absent field : $_[$i]";
297              
298 21 100       52 if (ref $_[$i+1] eq 'CODE') { # ref. to cmp function supplied by caller
299 1         3 push @callerSub, $_[$i+1];
300 1         9 push @cmp, "do {local ($a, $b) = (tied(%$a)->[$ix], tied(%$b)->[$ix]);".
301             "&{\$callerSub[$#callerSub]}}";
302             }
303             else { # builtin comparison operator
304 20         37 my ($sign, $op) = ("", "cmp");
305 20         28 my $str;
306 20 100       50 if (defined $_[$i+1]) {
307 15         88 ($sign, $op) = ($_[$i+1] =~ /^\s*([-+]?)\s*(.+)/);
308             }
309              
310 20         59 for ($op) {
311 20 100       75 /^(alpha|cmp)\s*$/ and do {$str = "%s cmp %s"; last};
  7         9  
  7         10  
312 13 100       107 /^(num|<=>)\s*$/ and do {$str = "%s <=> %s"; last};
  9         16  
  9         11  
313 4 100       14 /^d(\W+)m(\W+)y\s*$/ and do {$regex=qr{(\d+)\Q$1\E(\d+)\Q$2\E(\d+)};
  2         32  
314 2         4 $str = "_dateCmp(\$regex, 0, 1, 2, %s, %s)";
315 2         6 last};
316 2 100       9 /^m(\W+)d(\W+)y\s*$/ and do {$regex=qr{(\d+)\Q$1\E(\d+)\Q$2\E(\d+)};
  1         19  
317 1         1 $str = "_dateCmp(\$regex, 1, 0, 2, %s, %s)";
318 1         2 last};
319 1 50       7 /^y(\W+)m(\W+)d\s*$/ and do {$regex=qr{(\d+)\Q$1\E(\d+)\Q$2\E(\d+)};
  1         19  
320 1         2 $str = "_dateCmp(\$regex, 2, 1, 0, %s, %s)";
321 1         3 last};
322 0         0 croak "bad operator for Hash::Type::cmp : $_[$i+1]";
323             }
324 20         107 $str = sprintf("$sign($str)", "tied(%$a)->[$ix]", "tied(%$b)->[$ix]");
325 20         68 push @cmp, $str;
326             }
327             }
328              
329 11         14 local $@;
330 11 50       1560 my $sub = eval "sub {" . join(" || ", @cmp) . "}"
331             or croak $@;
332 11         55 return $sub;
333             }
334              
335              
336             sub _dateCmp {
337 55     55   154 my ($regex, $d, $m, $y, $date1, $date2) = @_;
338              
339 55 0 33     103 return 0 if not $date1 and not $date2;
340 55 50       85 return 1 if not $date1; # null date treated as bigger than any other
341 55 50       88 return -1 if not $date2;
342              
343 55         73 for my $date ($date1, $date2) {
344 110         134 $date =~ s[<.*?>][]g; # remove any markup
345 110         203 $date =~ tr/{}[]()//d; # remove any {}[]() chars
346             };
347              
348 55 50       375 my @d1 = ($date1 =~ $regex) or croak "invalid date '$date1' for regex $regex";
349 55 50       385 my @d2 = ($date2 =~ $regex) or croak "invalid date '$date2' for regex $regex";
350              
351 55 50       190 $d1[$y] += ($d1[$y] < 33) ? 2000 : 1900 if $d1[$y] < 100;
    100          
352 55 100       141 $d2[$y] += ($d2[$y] < 33) ? 2000 : 1900 if $d2[$y] < 100;
    100          
353              
354 55   100     2281 return ($d1[$y]<=>$d2[$y]) || ($d1[$m]<=>$d2[$m]) || ($d1[$d]<=>$d2[$d]);
355             }
356              
357              
358             =head1 CAVEATS
359              
360             The implementation of 'each', 'keys', 'values' on tied hashes
361             calls corresponding operations on the Hash::Type object ;
362             therefore, nested 'each' on several tied hashes won't work.
363              
364             =head1 SEE ALSO
365              
366             The 'pseudo-hashes' documented in L are very similar,
367             but are deprecated starting from Perl 5.8.0.
368             Each pseudo-hash holds its own copy of key names in position 0
369             of the underlying array, whereas hashes tied to C
370             hold a reference to a shared collection of keys.
371              
372             Typed references together with the C pragma
373             provide support for compile-time translation of key names
374             to array indices; see L. This will be faster, but will
375             not help if field names are only known at runtime (like
376             in the flat file parsing example of the synopsis).
377              
378             For other ways to restrict the keys of a hash to a fixed set, see
379             L, L, L.
380              
381             The L module in CPAN uses similar techniques for
382             dynamically building sorting criterias according to field
383             positions; but it is intended for numbered fields, not
384             for named fields, and has no support for caller-supplied
385             comparison operators. The design is also a bit different :
386             C does everything at once (splitting, comparing
387             and sorting), whereas C only compares, and
388             leaves it to the caller to do the rest.
389              
390             C was primarily designed as a core element
391             for implementing rows of data in L.
392              
393             =head1 AUTHOR
394              
395             Laurent Dami, Elaurent.dami AT etat geneve chE
396              
397             =head1 COPYRIGHT AND LICENSE
398              
399             Copyright 2005 by Laurent Dami.
400              
401             This library is free software; you can redistribute it and/or modify
402             it under the same terms as Perl itself.
403              
404             =cut
405              
406             1;
407