File Coverage

blib/lib/Data/Hash/Transform.pm

Criterion	Covered	Total	%
statement	44	44	100.0
branch	8	10	80.0
condition	1	2	50.0
subroutine	9	9	100.0
pod	0	5	0.0
total	62	70	88.5

line	stmt	bran	cond	sub	pod	time	code
1
2							package Data::Hash::Transform;
3							$Data::Hash::Transform::VERSION = '0.05';
4	2			2		21858	use 5.006;
	2					6
5	2			2		11	use strict;
	2					3
	2					49
6	2			2		9	use warnings;
	2					3
	2					138
7
8							require Exporter;
9							our @ISA = qw(Exporter);
10							our @EXPORT_OK = qw(hash_f hash_l hash_m hash_a hash_em);
11
12	2			2		9	use Carp qw(croak);
	2					5
	2					1070
13
14							=head1 NAME
15
16							Data::Hash::Transform - Turns array of hashes to hash of hashes in predefined ways
17
18							=head1 SYNOPSIS
19
20							use Data::Hash::Transform qw(hash_f hash_l hash_m hash_a hash_em);
21
22							my $loh = [ { k => 1, n => 'one' }, { k => 2, n => 'two' }, { k => 1, n => 'ein' } ];
23							$hoh1 = hash_f($loh, 'k'); # keep first
24							$hoh2 = hash_l($loh, 'k'); # keep last
25							$hoh3 = hash_m($loh, 'k'); # keep a list (if needed)
26							$hoh4 = hash_a($loh, 'k'); # always keep a list
27
28							$hoh = hash_em($loh, 'k', $meth); # $meth is one of 'f', 'l', 'm', or 'a'
29
30							=head1 DESCRIPTION
31
32							This module provides four algorithms to turn an array of hashes
33							to a hash of hashes. The transformation is based on using
34							the value at a certain key of inner hashes as the key
35							in the outer hash.
36
37							So:
38
39							[ { k => 1, n => 'one' }, { k => 2, n => 'two' } ]
40
41							turns to
42
43							{ 1 => { k => 1, n => 'one' }, 2 => { k => 2, n => 'two } }
44
45							when C<'k'> is the key of keys. (From this example, it was
46							made obvious that here we mean array and hash refs when talking about
47							arrays and hashes.)
48
49							The difference among the algorithms happen when the same key happens
50							twice or more. For example, how do the following array maps
51							to a hash? (C<'k'> is still the key of keys here.)
52
53							[ { k => 1, n => 'one' }, { k => 2, n => 'two' }, { k => 1, n => 'ein' } ]
54
55							The following alternatives (among others) are possible:
56
57							=over 4
58
59							=item *
60
61							keep the first
62
63							{ 1 => { k => 1, n => 'one' }, 2 => { k => 2, n => 'two' } }
64
65							=item *
66
67							keep the last
68
69							{ 2 => { k => 2, n => 'two' }, 1 => { k => 1, n => 'ein' } }
70
71							=item *
72
73							keep a list in the case of collisions
74
75							{ 1 => [ { k => 1, n => 'one' }, { k => 1, n => 'ein' } ],
76							2 => { k => 2, n => 'two' } }
77
78							=item *
79
80							always keep a list (for the case of collisions)
81
82							{ 1 => [ { k => 1, n => 'one' }, { k => 1, n => 'ein' } ],
83							2 => [ { k => 2, n => 'two' } ] }
84
85							=back
86
87							That is exactly what we implement here.
88
89							=head2 EXPORT
90
91							None by default. C, C, C, C,
92							C can be exported on demand.
93
94							=cut
95
96
97							# keep last (remember (l)ast)
98							sub hash_l {
99	2			2	0	4	my ($ary, $kk) = @_;
100	2					3	my %hash;
101	2					14	$hash{$_->{$kk}} = $_ for @$ary;
102	2					16	return \%hash;
103							}
104
105							# note. The implementation takes for granted that
106							# the inner hashes have $kk as keys. If they don't
107							# C will turn to C<''> and things can get
108							# messed up.
109
110							# keep first (remember (f)irst)
111							sub hash_f {
112	2			2	0	18	my ($ary, $kk) = @_;
113	2					2	my %hash;
114	2					6	for (@$ary) {
115	6					10	my $k = $_->{$kk};
116	6	100				21	$hash{$k} = $_ unless exists $hash{$k};
117							}
118	2					21	return \%hash;
119							}
120
121							# keep an array in case of collisions (remember (m)ulti)
122							sub hash_m {
123	2			2	0	3	my ($ary, $kk) = @_;
124	2					3	my %hash;
125	2					6	for (@$ary) {
126	6					12	my $k = $_->{$kk};
127	6	100				14	if (exists $hash{$k}) {
128	2	50				8	$hash{$k} = [ $hash{$k} ] if ref $hash{$k} ne 'ARRAY';
129	2					4	push @{$hash{$k}}, $_;
	2					7
130							} else {
131	4					9	$hash{$k} = $_;
132							}
133							}
134	2					21	return \%hash;
135							}
136
137							# always keep an array (remember (a)rray)
138							sub hash_a {
139	2			2	0	4	my ($ary, $kk) = @_;
140	2					3	my %hash;
141	2					6	for (@$ary) {
142	6					10	my $k = $_->{$kk};
143	6	100				15	if (exists $hash{$k}) {
144	2					3	push @{$hash{$k}}, $_;
	2					7
145							} else {
146	4					11	$hash{$k} = [ $_ ];
147							}
148							}
149	2					23	return \%hash;
150							}
151
152							# all of them together
153							sub hash_em {
154	4			4	0	10	my ($ary, $kk, $m) = @_;
155	4					19	my %methods = ( l => \&hash_l, f => \&hash_f, m => \&hash_m, a => \&hash_a );
156	4	50	50			18	my $method = $methods{$m \|\| 'f'}
157							or croak "hash_em method '$m' unknown: should be one of 'l', 'f', 'm', or 'a'";
158	4					11	return &$method($ary, $kk);
159							}
160
161							=pod
162
163							=head1 HASH_M VERSUS HASH_A
164
165							The difference between using C and C is
166							primarily oriented to the code that is going to consume
167							the transformed hash. In the case of C, it must
168							be ready to handle two cases: a single element which appears
169							as a hash ref and multiple elements which appear as an
170							array ref of hash refs. In the case of C,
171							the treatment is more homogeneous and you will always
172							get an array ref of hash refs.
173
174							A typical code with the return of C is illustrated
175							by the code below.
176
177							my $h = hash_m($loh);
178							while (my ($k, $v) = each %$h) {
179							if (ref $v eq 'ARRAY') {
180							do something with $_ for @$v;
181							} else {
182							do something with $v
183							}
184							}
185
186							or the shorter:
187
188							my $h = hash_m($loh);
189							while (my ($k, $v) = each %$h) {
190							my @vs = (ref $v eq 'ARRAY') ? @$v : ($v);
191							do something with $_ for @vs;
192							}
193
194							With C, it would look like:
195
196							my $h = hash_m($loh);
197							while (my ($k, $v) = each %$h) {
198							do something with $_ for @$v;
199							}
200
201							It is a trade-off: the client code can be simple (C)
202							or the overhead of data structures can be reduced (C).
203
204							=head1 TO DO
205
206							If you are familiar with L, you probably have
207							recognized some of the tranformations it does with hashes against arrays.
208							Mainly, the ones represented by C and C
209							(when C is used).
210
211							Other transformations based on typical behavior of
212							L are possible. For example,
213
214							=over 4
215
216							=item *
217
218							discard the key element
219
220							[ { k => 1, n => 'one' }, { k => 2, n => 'two' } ]
221
222							to
223
224							{ 1 => { n => 'one' }, 2 => { n => 'two' } }
225
226							and even (for C<'n'> defined to be the contents key)
227
228							{ 1 => 'one', 2 => 'two' }
229
230							=item *
231
232							mark the key element
233
234							[ { k => 1, n => 'one' }, { k => 2, n => 'two' }, { k => 1, n => 'ein' } ]
235
236							to
237
238							{ 1 => { -k => 1, n => 'one' }, 2 => { -k => 2, n => 'two' } }
239
240							=back
241
242							Maybe someday this gets implemented too.
243
244							=head1 ISSUES
245
246							The functions C have been designed to be fast
247							and that's why their code is redundant. One could write a
248							function with all bells and whistles which does all the
249							work of them together, by using options and querying them
250							at runtime. I think the code would be slightly harder to maintain
251							and perfomance may suffer. But this is just guessing.
252							Soon I will write such an implementation and a benchmark
253							to make sure it is worth to use this code as it is.
254
255							=for comment
256							=head1 SEE ALSO
257
258							=head1 BUGS
259
260							Please report bugs via CPAN RT L.
261
262							=head1 AUTHOR
263
264							Adriano R. Ferreira, Eferreira@cpan.orgE
265
266							=head1 COPYRIGHT AND LICENSE
267
268							Copyright (C) 2005 by Adriano R. Ferreira
269
270							This library is free software; you can redistribute it and/or modify
271							it under the same terms as Perl itself.
272
273
274							=cut
275
276							1;
277