File Coverage

blib/lib/Data/Hash/Transform.pm
Criterion Covered Total %
statement 44 44 100.0
branch 8 10 80.0
condition 1 2 50.0
subroutine 9 9 100.0
pod 0 5 0.0
total 62 70 88.5


line stmt bran cond sub pod time code
1              
2             package Data::Hash::Transform;
3             $Data::Hash::Transform::VERSION = '0.05';
4 2     2   21858 use 5.006;
  2         6  
5 2     2   11 use strict;
  2         3  
  2         49  
6 2     2   9 use warnings;
  2         3  
  2         138  
7              
8             require Exporter;
9             our @ISA = qw(Exporter);
10             our @EXPORT_OK = qw(hash_f hash_l hash_m hash_a hash_em);
11              
12 2     2   9 use Carp qw(croak);
  2         5  
  2         1070  
13              
14             =head1 NAME
15              
16             Data::Hash::Transform - Turns array of hashes to hash of hashes in predefined ways
17              
18             =head1 SYNOPSIS
19              
20             use Data::Hash::Transform qw(hash_f hash_l hash_m hash_a hash_em);
21              
22             my $loh = [ { k => 1, n => 'one' }, { k => 2, n => 'two' }, { k => 1, n => 'ein' } ];
23             $hoh1 = hash_f($loh, 'k'); # keep first
24             $hoh2 = hash_l($loh, 'k'); # keep last
25             $hoh3 = hash_m($loh, 'k'); # keep a list (if needed)
26             $hoh4 = hash_a($loh, 'k'); # always keep a list
27              
28             $hoh = hash_em($loh, 'k', $meth); # $meth is one of 'f', 'l', 'm', or 'a'
29              
30             =head1 DESCRIPTION
31              
32             This module provides four algorithms to turn an array of hashes
33             to a hash of hashes. The transformation is based on using
34             the value at a certain key of inner hashes as the key
35             in the outer hash.
36              
37             So:
38              
39             [ { k => 1, n => 'one' }, { k => 2, n => 'two' } ]
40              
41             turns to
42              
43             { 1 => { k => 1, n => 'one' }, 2 => { k => 2, n => 'two } }
44              
45             when C<'k'> is the key of keys. (From this example, it was
46             made obvious that here we mean array and hash refs when talking about
47             arrays and hashes.)
48              
49             The difference among the algorithms happen when the same key happens
50             twice or more. For example, how do the following array maps
51             to a hash? (C<'k'> is still the key of keys here.)
52              
53             [ { k => 1, n => 'one' }, { k => 2, n => 'two' }, { k => 1, n => 'ein' } ]
54              
55             The following alternatives (among others) are possible:
56              
57             =over 4
58              
59             =item *
60              
61             keep the first
62              
63             { 1 => { k => 1, n => 'one' }, 2 => { k => 2, n => 'two' } }
64              
65             =item *
66              
67             keep the last
68              
69             { 2 => { k => 2, n => 'two' }, 1 => { k => 1, n => 'ein' } }
70              
71             =item *
72              
73             keep a list in the case of collisions
74              
75             { 1 => [ { k => 1, n => 'one' }, { k => 1, n => 'ein' } ],
76             2 => { k => 2, n => 'two' } }
77              
78             =item *
79              
80             always keep a list (for the case of collisions)
81              
82             { 1 => [ { k => 1, n => 'one' }, { k => 1, n => 'ein' } ],
83             2 => [ { k => 2, n => 'two' } ] }
84              
85             =back
86              
87             That is exactly what we implement here.
88              
89             =head2 EXPORT
90              
91             None by default. C, C, C, C,
92             C can be exported on demand.
93              
94             =cut
95              
96              
97             # keep last (remember (l)ast)
98             sub hash_l {
99 2     2 0 4 my ($ary, $kk) = @_;
100 2         3 my %hash;
101 2         14 $hash{$_->{$kk}} = $_ for @$ary;
102 2         16 return \%hash;
103             }
104              
105             # note. The implementation takes for granted that
106             # the inner hashes have $kk as keys. If they don't
107             # C will turn to C<''> and things can get
108             # messed up.
109              
110             # keep first (remember (f)irst)
111             sub hash_f {
112 2     2 0 18 my ($ary, $kk) = @_;
113 2         2 my %hash;
114 2         6 for (@$ary) {
115 6         10 my $k = $_->{$kk};
116 6 100       21 $hash{$k} = $_ unless exists $hash{$k};
117             }
118 2         21 return \%hash;
119             }
120              
121             # keep an array in case of collisions (remember (m)ulti)
122             sub hash_m {
123 2     2 0 3 my ($ary, $kk) = @_;
124 2         3 my %hash;
125 2         6 for (@$ary) {
126 6         12 my $k = $_->{$kk};
127 6 100       14 if (exists $hash{$k}) {
128 2 50       8 $hash{$k} = [ $hash{$k} ] if ref $hash{$k} ne 'ARRAY';
129 2         4 push @{$hash{$k}}, $_;
  2         7  
130             } else {
131 4         9 $hash{$k} = $_;
132             }
133             }
134 2         21 return \%hash;
135             }
136              
137             # always keep an array (remember (a)rray)
138             sub hash_a {
139 2     2 0 4 my ($ary, $kk) = @_;
140 2         3 my %hash;
141 2         6 for (@$ary) {
142 6         10 my $k = $_->{$kk};
143 6 100       15 if (exists $hash{$k}) {
144 2         3 push @{$hash{$k}}, $_;
  2         7  
145             } else {
146 4         11 $hash{$k} = [ $_ ];
147             }
148             }
149 2         23 return \%hash;
150             }
151              
152             # all of them together
153             sub hash_em {
154 4     4 0 10 my ($ary, $kk, $m) = @_;
155 4         19 my %methods = ( l => \&hash_l, f => \&hash_f, m => \&hash_m, a => \&hash_a );
156 4 50 50     18 my $method = $methods{$m || 'f'}
157             or croak "hash_em method '$m' unknown: should be one of 'l', 'f', 'm', or 'a'";
158 4         11 return &$method($ary, $kk);
159             }
160              
161             =pod
162              
163             =head1 HASH_M VERSUS HASH_A
164              
165             The difference between using C and C is
166             primarily oriented to the code that is going to consume
167             the transformed hash. In the case of C, it must
168             be ready to handle two cases: a single element which appears
169             as a hash ref and multiple elements which appear as an
170             array ref of hash refs. In the case of C,
171             the treatment is more homogeneous and you will always
172             get an array ref of hash refs.
173              
174             A typical code with the return of C is illustrated
175             by the code below.
176              
177             my $h = hash_m($loh);
178             while (my ($k, $v) = each %$h) {
179             if (ref $v eq 'ARRAY') {
180             do something with $_ for @$v;
181             } else {
182             do something with $v
183             }
184             }
185              
186             or the shorter:
187              
188             my $h = hash_m($loh);
189             while (my ($k, $v) = each %$h) {
190             my @vs = (ref $v eq 'ARRAY') ? @$v : ($v);
191             do something with $_ for @vs;
192             }
193              
194             With C, it would look like:
195              
196             my $h = hash_m($loh);
197             while (my ($k, $v) = each %$h) {
198             do something with $_ for @$v;
199             }
200              
201             It is a trade-off: the client code can be simple (C)
202             or the overhead of data structures can be reduced (C).
203              
204             =head1 TO DO
205              
206             If you are familiar with L, you probably have
207             recognized some of the tranformations it does with hashes against arrays.
208             Mainly, the ones represented by C and C
209             (when C is used).
210              
211             Other transformations based on typical behavior of
212             L are possible. For example,
213              
214             =over 4
215              
216             =item *
217              
218             discard the key element
219              
220             [ { k => 1, n => 'one' }, { k => 2, n => 'two' } ]
221              
222             to
223              
224             { 1 => { n => 'one' }, 2 => { n => 'two' } }
225              
226             and even (for C<'n'> defined to be the contents key)
227              
228             { 1 => 'one', 2 => 'two' }
229              
230             =item *
231              
232             mark the key element
233              
234             [ { k => 1, n => 'one' }, { k => 2, n => 'two' }, { k => 1, n => 'ein' } ]
235              
236             to
237              
238             { 1 => { -k => 1, n => 'one' }, 2 => { -k => 2, n => 'two' } }
239              
240             =back
241              
242             Maybe someday this gets implemented too.
243              
244             =head1 ISSUES
245              
246             The functions C have been designed to be fast
247             and that's why their code is redundant. One could write a
248             function with all bells and whistles which does all the
249             work of them together, by using options and querying them
250             at runtime. I think the code would be slightly harder to maintain
251             and perfomance may suffer. But this is just guessing.
252             Soon I will write such an implementation and a benchmark
253             to make sure it is worth to use this code as it is.
254              
255             =for comment
256             =head1 SEE ALSO
257              
258             =head1 BUGS
259              
260             Please report bugs via CPAN RT L.
261              
262             =head1 AUTHOR
263              
264             Adriano R. Ferreira, Eferreira@cpan.orgE
265              
266             =head1 COPYRIGHT AND LICENSE
267              
268             Copyright (C) 2005 by Adriano R. Ferreira
269              
270             This library is free software; you can redistribute it and/or modify
271             it under the same terms as Perl itself.
272              
273              
274             =cut
275              
276             1;
277