File Coverage

blib/lib/Data/DPath/Flatten.pm

Criterion	Covered	Total	%
statement	26	33	78.7
branch	13	22	59.0
condition			n/a
subroutine	5	5	100.0
pod	1	1	100.0
total	45	61	73.7

line	stmt	bran	sub	pod	time	code
1						=head1 NAME
2
3						Data::DPath::Flatten - Convert complex data structure into key/value pairs
4
5						=head1 SYNOPSIS
6
7						use Data::DPath::Flatten qw/flatten/;
8
9						# Data can be arrays or hashes.
10						my $hash = flatten( \@record );
11						my $hash = flatten( \%record );
12
13						# Aliases add more human readable field names.
14						my $hash = flatten( \@record );
15						my $hash = flatten( \%record );
16
17						=head1 DESCRIPTION
18
19						B transforms an arbitrary data structure into a hash of
20						key/value pairs.
21
22						Why? To store raw data in an SQL database. L returns
23						arbitrary data structures. For example, Excel files return an array but XML
24						files a hash. B gives me a unique key for each field in
25						the file, regardless of the Perl data structure.
26
27						Use B where you need key/value pairs from arbitrary data.
28						The module traverses nested data structures of any depth and converts into a
29						single dimension.
30
31						=cut
32
33						package Data::DPath::Flatten;
34
35	2		2		76032	use 5.14.0;
	2				20
36	2		2		11	use Carp;
	2				4
	2				166
37	2		2		12	use Exporter qw/import/;
	2				4
	2				1023
38
39
40						our @EXPORT = (qw/flatten/);
41						our $VERSION = '1.00';
42
43
44						=head1 FUNCTIONS
45
46						=head3 flatten( $data )
47
48						B takes an arbitrary data structure and converts into a one level array
49						reference. Essentially, it flattens out nested data structures.
50
51						B returns a hash reference. The keys are L paths into the
52						original record. The value is the raw data value from the file.
53
54						The parameter C<$data> is required. It is a reference to the input data
55						structure.
56
57						# Recursively traverse arrays and hashes.
58						my $hash = flatten( \@fields );
59						my $hash = flatten( \%record );
60
61						# Scalars work, but it's kind of pointless. These come out the same.
62						my $hash = flatten( $single );
63						my $hash = flatten( \$single );
64
65						When B encounters a HASH or ARRAY reference, it recursively traverses
66						the nested structure. Circular references are traversed only once, to avoid
67						infinite loops.
68
69						SCALAR references are dereferenced and the value stored.
70
71						All other references and objects are stored as references.
72
73						=cut
74
75						sub flatten {
76	4		4	1	2651	my $data = shift;
77
78						# Flatten the original data into a one level hash. Make sure I get a new
79						# reference for every call.
80	4				7	my $new = {};
81	4				10	_step( $data, $new, '', {} );
82
83						# Return the flattened hash reference.
84	4				10	return $new;
85						}
86
87
88						#-------------------------------------------------------------------------------
89						# Internal subroutines.
90
91						# Recursively traverse the data structure, building the path string as it goes.
92						# The initial path is an empty string. This code adds the leading "/".
93						#
94						# The $seen parameter stops circular references from causing infinite loops. We
95						# traverse any reference only once.
96						#
97						# I looked into using existing data traversal modules such as Data::Rmap,
98						# Data::Traverse, Data::Visitor, or Data::Walk. Simple recurrsion was so much
99						# easier. I would have to use all kinds of gloabl variables and conditionals
100						# just to build the correct paths. This works and handles circular references.
101						sub _step {
102	50		50		72	my ($from, $to, $path, $seen) = @_;
103
104						# Process this node of the structure.
105	50	50			106	if (!defined( $from )) {
		100
		50
		100
		50
106						# No op!
107						} elsif (ref( $from ) eq '') {
108	24	50			30	if ($path eq '') { $to->{'/' } = $from; }
	0				0
109	24				48	else { $to->{$path} = $from; }
110						} elsif (ref( $from) eq 'SCALAR') {
111	0	0			0	if ($path eq '') { $to->{'/' } = $$from; }
	0				0
112	0				0	else { $to->{$path} = $$from; }
113						} elsif (ref( $from) eq 'HASH') {
114	16	100			28	unless (exists $seen->{$from}) {
115	15				26	$seen->{$from}++;
116	15				37	while (my ($key, $value) = each %$from) {
117	24	50			37	$key = "\"$key\"" if m/\.\.\|\*\|::ancestor(-or-self)?\|\/\/\|\[\|\]/;
118	24				48	_step( $value, $to, "$path/$key", $seen );
119						}
120						}
121						} elsif (ref( $from ) eq 'ARRAY') {
122	10	100			22	unless (exists $seen->{$from}) {
123	9				14	$seen->{$from}++;
124	9				24	while (my ($index, $value) = each @$from) {
125	22				46	_step( $value, $to, "$path/*[$index]", $seen );
126						}
127						}
128						} else {
129	0	0			0	if ($path eq '') { $to->{'/' } = $from; }
	0				0
130	0				0	else { $to->{$path} = $from; }
131						}
132
133	50				114	return;
134						}
135
136
137						=head1 SEE ALSO
138
139						L
140
141						=head1 REPOSITORY
142
143						L
144
145						=head1 AUTHOR
146
147						Robert Wohlfarth
148
149						=head1 COPYRIGHT AND LICENSE
150
151						Copyright (c) 2022 Robert Wohlfarth
152
153						This module is free software; you can redistribute it and/or modify it
154						under the same terms as Perl 5.10.0. For details, see the full text of the
155						license in the file LICENSE.
156
157						This program is distributed in the hope that it will be useful, but
158						without any warranty; without even the implied
159
160						=cut
161
162						# Required by Perl to load the module.
163						1;