File Coverage

blib/lib/Data/SplitSerializer.pm

Criterion	Covered	Total	%
statement	85	86	98.8
branch	35	40	87.5
condition	6	12	50.0
subroutine	15	16	93.7
pod	4	5	80.0
total	145	159	91.1

line	stmt	bran	cond	sub	pod	time	code
1							package Data::SplitSerializer;
2
3							our $VERSION = '0.91'; # VERSION
4							# ABSTRACT: Modules that "split serialize" data structures
5
6							#############################################################################
7							# Modules
8
9	2			2		4239	use sanity;
	2					973681
	2					77
10	2			2		828515	use Moo;
	2					33479
	2					14
11	2			2		7223	use Types::Standard qw(Bool Str HashRef InstanceOf HasMethods);
	2					360701
	2					35
12
13	2			2		3102	use Module::Runtime qw( use_module );
	2					5
	2					20
14	2			2		2358	use Hash::Merge;
	2					9168
	2					192
15	2			2		2670	use Try::Tiny;
	2					16899
	2					234
16	2			2		23	use Scalar::Util qw( blessed );
	2					7
	2					148
17
18	2			2		2235	use namespace::clean;
	2					57712
	2					20
19	2			2		874	no warnings 'uninitialized';
	2					117
	2					10488
20
21							#############################################################################
22							# Custom Hash::Merge behaviors
23
24							my $default_behavior = 'LEFT_PRECEDENT_STRICT_ARRAY_INDEX';
25
26							Hash::Merge::specify_behavior(
27							{
28							# NOTE: Undef is still considered 'SCALAR'.
29							SCALAR => {
30							SCALAR => sub { $_[1] },
31							ARRAY => sub {
32							return $_[1] unless defined $_[0];
33							die sprintf('mismatched type (%s vs. %s) found during merge: $scalar = %s', 'SCALAR', 'ARRAY', $_[0]);
34							},
35							HASH => sub {
36							return $_[1] unless defined $_[0];
37							die sprintf('mismatched type (%s vs. %s) found during merge: $scalar = %s', 'SCALAR', 'HASH', $_[0]);
38							},
39							},
40							ARRAY => {
41							SCALAR => sub {
42							return $_[0] unless defined $_[1];
43							die sprintf('mismatched type (%s vs. %s) found during merge: $scalar = %s', 'ARRAY', 'SCALAR', $_[1]);
44							},
45							ARRAY => sub {
46							# Handle arrays by index, not by combining
47							my ($l, $r) = @_;
48							$l->[$_] = $r->[$_] for (
49							grep { defined $r->[$_] }
50							(0 .. $#{$_[1]})
51							);
52							return $l;
53							},
54							HASH => sub { die sprintf('mismatched type (%s vs. %s) found during merge', 'ARRAY', 'HASH'); },
55							},
56							HASH => {
57							SCALAR => sub {
58							return $_[0] unless defined $_[1];
59							die sprintf('mismatched type (%s vs. %s) found during merge: $scalar = %s', 'HASH', 'SCALAR', $_[1]);
60							},
61							ARRAY => sub { die sprintf('mismatched type (%s vs. %s) found during merge', 'HASH', 'ARRAY'); },
62							HASH => sub { Hash::Merge::_merge_hashes( $_[0], $_[1] ) },
63							},
64							},
65							$default_behavior,
66							);
67
68							#############################################################################
69							# Attributes
70
71							has _merge_obj => (
72							is => 'rw',
73							isa => InstanceOf['Hash::Merge'],
74							default => sub { Hash::Merge->new($default_behavior); },
75							handles => { qw(
76							merge merge
77							specify_behavior specify_merge_behavior
78							set_behavior set_merge_behavior
79							) },
80							);
81
82							has path_style => (
83							is => 'ro',
84							isa => Str,
85							default => sub { 'DZIL' },
86							coerce => sub {
87							'Parse::Path::'.$_[0] unless ($_[0] =~ s/^\=//); # NOTE: kill two birds with one stone
88							},
89							);
90
91							has path_options => (
92							is => 'ro',
93							isa => HashRef,
94							default => sub { {
95							auto_normalize => 1,
96							auto_cleanup => 1,
97							} },
98							);
99
100							has remove_undefs => (
101							is => 'ro',
102							isa => Bool,
103							default => sub { 1 },
104							);
105
106							#############################################################################
107							# Pre/post-BUILD
108
109
110							sub BUILD {
111	2			2	0	67	my $self = $_[0];
112
113							# Load the path class
114	2					109	use_module $self->path_style;
115
116	2					2065445	return $self;
117							}
118
119							#############################################################################
120							# Methods
121
122							### FLATTENING ###
123
124							sub serialize {
125	5			5	1	16508	my ($self, $ref) = @_;
126
127	5					16	my $type = ref $ref;
128	5	50	33			149	die 'Reference must be an unblessed HASH or ARRAY!'
			33
129							unless (defined $ref && !blessed $ref && $type =~ /HASH\|ARRAY/);
130
131	5					26	return $self->serialize_refpath('', $ref);
132							}
133
134							sub serialize_refpath {
135	90			90	1	150	my ($self, $path, $ref) = @_;
136	90		50			311	$path //= '';
137
138	90					888	my $prh = { $path => $ref }; # single row answer
139
140	90	50				8147	return $prh if blessed $ref; # down that path leads madness...
141	90		100			245	my $type = ref $ref \|\| return $prh; # that covers SCALARs...
142	58	100				311	return $prh unless $type =~ /HASH\|ARRAY/; # ...and all other endpoints
143
144							# Blessed is the path
145	55	100				179	unless (blessed $path) {
146	5					198	$path = $self->path_style->new(
147	5					25	%{ $self->path_options },
148							stash_obj => $self,
149							path => $path,
150							);
151							}
152
153	55	50				1710	die sprintf("Too deep down the rabbit hole, stopped at '%s'", $path)
154							if ($path->step_count > 255);
155
156	55					331	my $hash = {};
157	55	100				225	my @keys = $type eq 'HASH' ? (keys %$ref) : (0 .. $#$ref);
158	55					98	foreach my $key (@keys) {
159	85	100				204	my $val = $type eq 'HASH' ? $ref->{$key} : $ref->[$key];
160
161							# Add on to $path
162	85					253	my $newpath = $path->clone;
163	85					19909	$newpath->push( $newpath->key2hash($key, $type) );
164
165							# Recurse back to give us a full set of $path => $val pairs
166	85					29031	my $newhash = $self->serialize_refpath($newpath, $val);
167
168							# Merge (shallowly)
169	85	100				199	$hash->{$_} = $newhash->{$_} for (grep { defined $newhash->{$_} or !$self->remove_undefs } keys %$newhash);
	94					1600
170							}
171
172	55					245	return $hash;
173							}
174
175							### EXPANSION ###
176
177							sub deserialize {
178	10			10	1	17745	my ($self, $hash) = @_;
179
180	10					18	my $root; # not sure if it's a hash or array yet
181	10					79	foreach my $path (sort keys %$hash) {
182	42		50			159	my $branch = $self->deserialize_pathval($path, $hash->{$path}) \|\| return; # error already set
183
184							# New root?
185	42	100				100	unless (defined $root) {
186	10					14	$root = $branch;
187	10					21	next;
188							}
189
190							# Our merge behavior might die on us (or Hash::Merge itself)
191	32					38	my $err;
192	32			32		2034	try { $root = $self->merge($root, $branch); }
193	32			0		256	catch { $err = $_; };
	0					0
194
195							# Add path to error
196	32	50				1557	die sprintf("In path '%s', %s", $path, $err) if ($err);
197							}
198
199	10					85	return $root;
200							}
201
202							sub deserialize_pathval {
203	42			42	1	72	my ($self, $path, $val) = @_;
204
205	42					51	my ($root, $leaf, $hash_steps);
206	42					1194	$path = $self->path_style->new(
207	42					104	%{ $self->path_options },
208							path => $path,
209							);
210
211	42					60597	for my $i (0 .. $path->step_count - 1) {
212	157					509	my $hash_step = $path->_path->[$i];
213	157	100				387	my $next_step = ($i == $path->step_count - 1) ? undef : $path->_path->[$i+1];
214
215							# Construct $root if we need to
216	157	100				1077	$root = $leaf = ( $hash_step->{type} eq 'HASH' ? {} : [] ) unless ($i);
		100
217
218							# Add in the key, construct the next ref, and move the leaf forward
219	157					275	my $type_str = substr($hash_step->{type}, 0, 1);
220	157	100				424	$type_str .= substr($next_step->{type}, 0, 1) if $next_step;
221
222	157					209	my $key = $hash_step->{key};
223
224							# (RIP for/when)
225	157	100				496	if ($type_str eq 'HH') { $leaf = $leaf->{$key} = {}; }
	65	100				233
		100
		100
		100
		50
226	14					55	elsif ($type_str eq 'HA') { $leaf = $leaf->{$key} = []; }
227	16					52	elsif ($type_str eq 'AH') { $leaf = $leaf->[$key] = {}; }
228	20					65	elsif ($type_str eq 'AA') { $leaf = $leaf->[$key] = []; }
229	36					138	elsif ($type_str eq 'H') { $leaf->{$key} = $val; }
230	6					22	elsif ($type_str eq 'A') { $leaf->[$key] = $val; }
231							}
232
233	42					768	return $root;
234							}
235
236							42;
237
238							__END__
239
240							=pod
241
242							=encoding utf-8
243
244							=head1 NAME
245
246							Data::SplitSerializer - Modules that "split serialize" data structures
247
248							=head1 SYNOPSIS
249
250							use Data::SplitSerializer;
251
252							my $dss = Data::SplitSerializer->new( path_style => 'DZIL' );
253							my $serialized = {
254							'gophers[0].holes' => 3,
255							'gophers[0].food.type' => 'grubs',
256							'gophers[0].food.count' => 7,
257
258							'gophers[1].holes' => 1,
259							'gophers[1].food.type' => 'fruit',
260							'gophers[1].food.count' => 5,
261							};
262							my $deserialized = $dss->deserialize($serialized);
263
264							my $more_gophers = [];
265							$more_gophers->[2] = {
266							holes => 2,
267							food => {
268							type => 'earthworms',
269							count => 15,
270							},
271							};
272
273							$deserialized = $dss->merge( $deserialized, $more_gophers );
274
275							=head1 DESCRIPTION
276
277							Split serialization is a unique form of serialization that only serializes part of the data structure (as a path on the left side) and
278							leaves the rest of the data, typically a scalar, untouched (as a value on the right side). Consider the gopher example above:
279
280							my $deserialized = {
281							gophers => [
282							{
283							holes => 3,
284							food => {
285							type => 'grubs',
286							count => 7,
287							},
288							},
289							{
290							holes => 1,
291							food => {
292							type => 'fruit',
293							count => 5,
294							},
295							},
296							{
297							holes => 2,
298							food => {
299							type => 'earthworms',
300							count => 15,
301							},
302							}
303							],
304							};
305
306							A full serializer, like L<Data::Serializer> or L<Data::Dumper>, would turn the entire object into a string, much like the real code
307							above. Or into JSON, XML, BerkleyDB, etc. But, the end values would be lost in the stream. If you were given an object like this,
308							how would you be able to store the data in an easy-to-access form for a caching module like L<CHI>? It requires keyE<sol>value pairs. Same
309							goes for L<KiokuDB> or various other storageE<sol>ORM modules.
310
311							Data::SplitSerializer uses split serialization to turn the data into a path like this:
312
313							my $serialized = {
314							'gophers[0].holes' => 3,
315							'gophers[0].food.type' => 'grubs',
316							'gophers[0].food.count' => 7,
317
318							'gophers[1].holes' => 1,
319							'gophers[1].food.type' => 'fruit',
320							'gophers[1].food.count' => 5,
321
322							'gophers[2].holes' => 2,
323							'gophers[2].food.type' => 'earthworms',
324							'gophers[2].food.count' => 15,
325							};
326
327							Now, you can stash the data into whatever storage engine you want... or use just use it as a simple hash.
328
329							=for Pod::Coverage BUILD
330
331							=head1 CONSTRUCTOR
332
333							# Defaults shown
334							my $stash = Data::Stash->new(
335							path_style => 'DZIL',
336							path_options => {
337							auto_normalize => 1,
338							auto_cleanup => 1,
339							},
340							);
341
342							Creates a new serializer object. Accepts the following arguments:
343
344							=head2 path_style
345
346							path_style => 'File::Unix'
347							path_style => '=MyApp::Parse::Path::Foobar'
348
349							Class used to create new L<path objects\|Parse::Path> for path parsing. With a C<<< = >>> prefix, it will use that as the full
350							class. Otherwise, the class will be intepreted as C<<< Parse::Path::$class >>>.
351
352							Default is L<DZIL\|Parse::Path::DZIL>.
353
354							=head2 path_options
355
356							path_options => {
357							auto_normalize => 1,
358							auto_cleanup => 1,
359							}
360
361							Hash of options to pass to new path objects. Typically, the default set of options are recommended to ensure a more commutative
362							path.
363
364							=head2 remove_undefs
365
366							remove_undefs => 0
367
368							Boolean to indicate whether to remove See L</Undefined values> for more information.
369
370							Default is on.
371
372							=head1 METHODS
373
374							=head2 serialize
375
376							my $serialized = $dss->serialize($deserialized);
377
378							SerializesE<sol>flattens a ref. Returns a serialized hashref of pathE<sol>value pairs.
379
380							=head2 serialize_refpath
381
382							my $serialized = $dss->serialize_refpath($path_prefix, $deserialized);
383
384							# serialize is basically this with some extra sanity checks
385							my $serialized = $dss->serialize_refpath('', $deserialized);
386
387							The real workhorse for C<<< serialize_ref >>>. Recursively dives down the different pieces of the deserialized tree and eventually comes
388							back with the serialized hashref. The path prefix can be used for prepending all of the paths returned in the serialized hashref.
389
390							=head2 deserialize
391
392							my $deserialized = $dss->deserialize($serialized);
393
394							DeserializesE<sol>expands a hash of pathE<sol>data pairs. Returns the expanded object, which is usually a hashref, but might be an arrayref.
395							For example:
396
397							# Starts with an array
398							my $serialized = {
399							'[0].thingy' => 1,
400							'[1].thingy' => 2,
401							};
402							my $deserialized = $dss->deserialize($serialized);
403
404							# Returns:
405							$deserialized = [
406							{ thingy => 1 },
407							{ thingy => 2 },
408							];
409
410							=head2 deserialize_pathval
411
412							my $deserialized = $dss->deserialize_pathval($path, $value);
413
414							DeserializesE<sol>expands a single pathE<sol>data pair. Returns the expanded object.
415
416							=head2 merge
417
418							my $newhash = $dss->merge($hash1, $hash2);
419
420							Merges two hashes. This is a direct handle to C<<< merge >>> from an (internal) L<Hash::Merge> object, and is used by L</deserialize> to
421							combine individual expanded objects.
422
423							=head2 set_merge_behavior
424
425							Handle to C<<< set_behavior >>> from the (internal) L<Hash::Merge> object. B<Advanced usage only!>
426
427							Data::SplitSerializer uses a special custom type called C<<< LEFT_PRECEDENT_STRICT_ARRAY_INDEX >>>, which properly handles array
428							indexes and dies on any non-array-or-hash refs.
429
430							=head2 specify_merge_behavior
431
432							Handle to C<<< specify_behavior >>> from the (internal) L<Hash::Merge> object. B<Advanced usage only!>
433
434							=head1 CAVEATS
435
436							=head2 Undefined values
437
438							Flattening will remove pathE<sol>values if the value is undefined. This is to clean up unused array values that appeared as holes in a
439							sparse array. For example:
440
441							# From one of the basic tests
442							my $round_trip = $dss->serialize( $dss->deserialize_pathval(
443							'a[0][1][1][1][1][2].too' => 'long'
444							) );
445
446							# Without undef removal, this returns:
447							$round_trip = {
448							'a[0][0]' => undef,
449							'a[0][1][0]' => undef,
450							'a[0][1][1][0]' => undef,
451							'a[0][1][1][1][0]' => undef,
452							'a[0][1][1][1][1][0]' => undef,
453							'a[0][1][1][1][1][1]' => undef,
454							'a[0][1][1][1][1][2].too' => 'long',
455							};
456
457							You can disable this with the L</remove_undefs> switch.
458
459							=head2 Refs in split serialization
460
461							Split serialization works by looking for HASH or ARRAY refs and diving further into them, adding path prefixes as it goes down. If
462							it encounters some other ref (like a SCALAR), it will stop and consider that to be the value for that path. In terms of ref parsing,
463							this means two things:
464
465							=over
466
467							=item 1.
468
469							Only HASH and ARRAYs can be examined deeper.
470
471							=item 2.
472
473							If you have a HASH or ARRAY as a "value", serialization cannot tell the difference and it will be included in the path.
474
475							=back
476
477							The former isn't that big of a problem, since deeper dives with other kinds of refs are either not possible or dangerous (like CODE).
478
479							The latter could be a problem if you started with a hashref with a pathE<sol>data pair, expanded it, and tried to flatten it again. This
480							can be solved by protecting the hash with a REF. Consider this example:
481
482							my $round_trip = $dss->serialize( $dss->deserialize_pathval(
483							'a[0]' => { your => 'hash' }
484							) );
485
486							# Returns:
487							$round_trip = {
488							'a[0].your' => 'hash',
489							};
490
491							# Now protect the hash
492							my $round_trip = $dss->serialize( $dss->deserialize_pathval(
493							'a[0]' => \{ your => 'hash' }
494							) );
495
496							# Returns:
497							$round_trip = {
498							'a[0]' => \{ your => 'hash' }
499							};
500
501							=head2 Sparse arrays and memory usage
502
503							Since arrays within paths are based on indexes, there's a potential security issue with large indexes causing abnormal memory usage.
504							In Perl, these two arrays would have drastically different memory footprints:
505
506							my @small;
507							$small[0] = 1;
508
509							my @large;
510							$large[999999] = 1;
511
512							This can be mitigated by making sure the Path style you use will limit the total digits for array indexes. L<Parse::Path> handles
513							this on all of its paths, but it's something to be aware of if you create your own path classes.
514
515							=head1 TODO
516
517							This module might split off into individual split serializers, but so far, this is the only one "out in the wild".
518
519							=head1 SEE ALSO
520
521							L<Parse::Path>
522
523							=head1 ACKNOWLEDGEMENTS
524
525							Kent Fredric for getting me started on the basic idea.
526
527							=head1 AVAILABILITY
528
529							The project homepage is L<https://github.com/SineSwiper/Data-SplitSerializer/wiki>.
530
531							The latest version of this module is available from the Comprehensive Perl
532							Archive Network (CPAN). Visit L<http://www.perl.com/CPAN/> to find a CPAN
533							site near you, or see L<https://metacpan.org/module/Data::SplitSerializer/>.
534
535							=for :stopwords cpan testmatrix url annocpan anno bugtracker rt cpants kwalitee diff irc mailto metadata placeholders metacpan
536
537							=head1 SUPPORT
538
539							=head2 Internet Relay Chat
540
541							You can get live help by using IRC ( Internet Relay Chat ). If you don't know what IRC is,
542							please read this excellent guide: L<http://en.wikipedia.org/wiki/Internet_Relay_Chat>. Please
543							be courteous and patient when talking to us, as we might be busy or sleeping! You can join
544							those networks/channels and get help:
545
546							=over 4
547
548							=item *
549
550							irc.perl.org
551
552							You can connect to the server at 'irc.perl.org' and talk to this person for help: SineSwiper.
553
554							=back
555
556							=head2 Bugs / Feature Requests
557
558							Please report any bugs or feature requests via L<https://github.com/SineSwiper/Data-SplitSerializer/issues>.
559
560							=head1 AUTHOR
561
562							Brendan Byrd <BBYRD@CPAN.org>
563
564							=head1 CONTRIBUTOR
565
566							Brendan Byrd <bbyrd@cpan.org>
567
568							=head1 COPYRIGHT AND LICENSE
569
570							This software is Copyright (c) 2013 by Brendan Byrd.
571
572							This is free software, licensed under:
573
574							The Artistic License 2.0 (GPL Compatible)
575
576							=cut