File Coverage

blib/lib/Data/Clean.pm
Criterion Covered Total %
statement 149 167 89.2
branch 29 44 65.9
condition 11 19 57.8
subroutine 21 26 80.7
pod 3 15 20.0
total 213 271 78.6


line stmt bran cond sub pod time code
1             package Data::Clean;
2              
3             our $DATE = '2019-11-18'; # DATE
4             our $VERSION = '0.506'; # VERSION
5              
6 1     1   500 use 5.010001;
  1         8  
7 1     1   4 use strict;
  1         1  
  1         27  
8 1     1   5 use warnings;
  1         2  
  1         20  
9 1     1   1436 use Log::ger;
  1         54  
  1         4  
10              
11             sub new {
12 11     11 1 450234 my ($class, %opts) = @_;
13 11         27 my $self = bless {_opts=>\%opts}, $class;
14 11         33 log_trace("Cleanser options: %s", \%opts);
15              
16 11         34 my $cd = $self->_generate_cleanser_code;
17 9         12 for my $mod (keys %{ $cd->{modules} }) {
  9         23  
18 18         57 (my $mod_pm = "$mod.pm") =~ s!::!/!g;
19 18         659 require $mod_pm;
20             }
21 9         706 $self->{_cd} = $cd;
22 9         3404 $self->{_code} = eval $cd->{src};
23             {
24 9 50       16 last unless $cd->{clone_func} =~ /(.+)::(.+)/;
  9         45  
25 9         37 (my $mod_pm = "$1.pm") =~ s!::!/!g;
26 9         43 require $mod_pm;
27             }
28 9 50       19 die "Can't generate code: $@" if $@;
29              
30 9         28 $self;
31             }
32              
33             sub command_call_method {
34 1     1 0 3 my ($self, $cd, $args) = @_;
35 1         2 my $mn = $args->[0];
36 1 50       22 die "Invalid method name syntax" unless $mn =~ /\A\w+\z/;
37 0         0 return "{{var}} = {{var}}->$mn; \$ref = ref({{var}})";
38             }
39              
40             sub command_call_func {
41 2     2 0 4 my ($self, $cd, $args) = @_;
42 2         4 my $fn = $args->[0];
43 2 100       28 die "Invalid func name syntax" unless $fn =~ /\A\w+(::\w+)*\z/;
44 1         4 return "{{var}} = $fn({{var}}); \$ref = ref({{var}})";
45             }
46              
47             sub command_one_or_zero {
48 0     0 0 0 my ($self, $cd, $args) = @_;
49 0         0 return "{{var}} = {{var}} ? 1:0; \$ref = ''";
50             }
51              
52             sub command_deref_scalar_one_or_zero {
53 0     0 0 0 my ($self, $cd, $args) = @_;
54 0         0 return "{{var}} = \${ {{var}} } ? 1:0; \$ref = ''";
55             }
56              
57             sub command_deref_scalar {
58 0     0 0 0 my ($self, $cd, $args) = @_;
59 0         0 return '{{var}} = ${ {{var}} }; $ref = ref({{var}})';
60             }
61              
62             sub command_stringify {
63 0     0 0 0 my ($self, $cd, $args) = @_;
64 0         0 return '{{var}} = "{{var}}"; $ref = ""';
65             }
66              
67             sub command_replace_with_ref {
68 2     2 0 6 my ($self, $cd, $args) = @_;
69 2         3 return '{{var}} = $ref; $ref = ""';
70             }
71              
72             sub command_replace_with_str {
73 3     3 0 474 require String::PerlQuote;
74              
75 3         546 my ($self, $cd, $args) = @_;
76 3         12 return "{{var}} = ".String::PerlQuote::double_quote($args->[0]).'; $ref=""';
77             }
78              
79             sub command_unbless {
80 1     1 0 3 my ($self, $cd, $args) = @_;
81              
82 1         3 return join(
83             "",
84             'my $reftype = Scalar::Util::reftype({{var}}); ',
85             '{{var}} = $reftype eq "HASH" ? {%{ {{var}} }} :',
86             ' $reftype eq "ARRAY" ? [@{ {{var}} }] :',
87             ' $reftype eq "SCALAR" ? \(my $copy = ${ {{var}} }) :',
88             ' $reftype eq "CODE" ? sub { goto &{ {{var}} } } :',
89             '(die "Cannot unbless object with type $ref")',
90             );
91             }
92              
93             sub command_clone {
94 2     2 0 5 my ($self, $cd, $args) = @_;
95              
96 2   50     6 my $limit = $args->[0] // 1;
97 2         9 return join(
98             "",
99             "if (++\$ctr_circ <= $limit) { ",
100             "{{var}} = $cd->{clone_func}({{var}}); redo ",
101             "} else { ",
102             "{{var}} = 'CIRCULAR'; \$ref = '' }",
103             );
104             }
105              
106             sub command_unbless_ffc_inlined {
107 1     1 0 3 my ($self, $cd, $args) = @_;
108              
109             # code taken from Function::Fallback::CoreOrPP 0.07
110 1   50     6 $cd->{subs}{unbless} //= <<'EOC';
111             my $ref = shift;
112              
113             my $r = ref($ref);
114             # not a reference
115             return $ref unless $r;
116              
117             # return if not a blessed ref
118             my ($r2, $r3) = "$ref" =~ /(.+)=(.+?)\(/
119             or return $ref;
120              
121             if ($r3 eq 'HASH') {
122             return { %$ref };
123             } elsif ($r3 eq 'ARRAY') {
124             return [ @$ref ];
125             } elsif ($r3 eq 'SCALAR') {
126             return \( my $copy = ${$ref} );
127             } else {
128             die "Can't handle $ref";
129             }
130             EOC
131              
132 1         2 "{{var}} = \$sub_unbless->({{var}}); \$ref = ref({{var}})";
133             }
134              
135             # test
136             sub command_die {
137 0     0 0 0 my ($self, $cd, $args) = @_;
138 0         0 return "die";
139             }
140              
141             sub _generate_cleanser_code {
142 11     11   17 my $self = shift;
143 11         20 my $opts = $self->{_opts};
144              
145             # compilation data, a structure that will be passed around between routines
146             # during the generation of cleanser code.
147             my $cd = {
148             modules => {}, # key = module name, val = version
149 11         36 clone_func => $self->{_opts}{'!clone_func'},
150             code => '',
151             subs => {},
152             };
153              
154 11   50     55 $cd->{modules}{'Scalar::Util'} //= 0;
155              
156 11 50       22 if (!$cd->{clone_func}) {
157 11         18 $cd->{clone_func} = 'Clone::PP::clone';
158             }
159             {
160 11 50       15 last unless $cd->{clone_func} =~ /(.+)::(.+)/;
  11         66  
161 11   50     45 $cd->{modules}{$1} //= 0;
162             }
163              
164 11         16 my (@code, @stmts_ary, @stmts_hash, @stmts_main);
165              
166 11         40 my $n = 0;
167             my $add_stmt = sub {
168 30     30   39 my $which = shift;
169 30 100 100     73 if ($which eq 'if' || $which eq 'new_if') {
170 28         42 my ($cond0, $act0) = @_;
171 28         72 for ([\@stmts_ary, '$e', 'ary'],
172             [\@stmts_hash, '$h->{$k}', 'hash'],
173             [\@stmts_main, '$_', 'main']) {
174 84         102 my $act = $act0 ; $act =~ s/\Q{{var}}\E/$_->[1]/g;
  84         237  
175 84         106 my $cond = $cond0; $cond =~ s/\Q{{var}}\E/$_->[1]/g;
  84         122  
176             #unless (@{ $_->[0] }) { push @{ $_->[0] }, ' say "D:'.$_->[2].' val=", '.$_->[1].', ", ref=$ref"; # DEBUG'."\n" }
177 84 100 100     94 push @{ $_->[0] }, " ".($n && $which ne 'new_if' ? "els":"")."if ($cond) { $act }\n";
  84         338  
178             }
179 28         69 $n++;
180             } else {
181 2         3 my ($stmt0) = @_;
182 2         6 for ([\@stmts_ary, '$e', 'ary'],
183             [\@stmts_hash, '$h->{$k}', 'hash'],
184             [\@stmts_main, '$_', 'main']) {
185 6         9 my $stmt = $stmt0; $stmt =~ s/\Q{{var}}\E/$_->[1]/g;
  6         16  
186 6         9 push @{ $_->[0] }, " $stmt;\n";
  6         14  
187             }
188             }
189 11         42 };
190             my $add_if = sub {
191 16     16   34 $add_stmt->('if', @_);
192 11         26 };
193             my $add_new_if = sub {
194 12     12   22 $add_stmt->('new_if', @_);
195 11         31 };
196             my $add_if_ref = sub {
197 10     10   15 my ($ref, $act0) = @_;
198 10         22 $add_if->("\$ref eq '$ref'", $act0);
199 11         29 };
200             my $add_new_if_ref = sub {
201 7     7   12 my ($ref, $act0) = @_;
202 7         16 $add_new_if->("\$ref eq '$ref'", $act0);
203 11         31 };
204              
205             # catch circular references
206 11         15 my $circ = $opts->{-circular};
207 11 100       22 if ($circ) {
208 3         6 my $meth = "command_$circ->[0]";
209 3 50       13 die "Can't handle command $circ->[0] for option '-circular'" unless $self->can($meth);
210 3         8 my @args = @$circ; shift @args;
  3         5  
211 3         8 my $act = $self->$meth($cd, \@args);
212             #$add_stmt->('stmt', 'say "ref=$ref, " . {{var}}'); # DEBUG
213 3         17 $add_new_if->('$ref && $refs{ {{var}} }++', $act);
214             }
215              
216             # catch object of specified classes (e.g. DateTime, etc)
217 11         37 for my $on (grep {/\A\w*(::\w+)*\z/} sort keys %$opts) {
  14         63  
218 3         6 my $o = $opts->{$on};
219 3 50       7 next unless $o;
220 3         7 my $meth = "command_$o->[0]";
221 3 50       17 die "Can't handle command $o->[0] for option '$on'" unless $self->can($meth);
222 3         6 my @args = @$o; shift @args;
  3         4  
223 3         7 my $act = $self->$meth($cd, \@args);
224 3         16 $add_if_ref->($on, $act);
225             }
226              
227             # catch general object not caught by previous
228 11         29 for my $p ([-obj => 'Scalar::Util::blessed({{var}})']) {
229 11         18 my $o = $opts->{$p->[0]};
230 11 100       20 next unless $o;
231 6         14 my $meth = "command_$o->[0]";
232 6 50       25 die "Can't handle command $o->[0] for option '$p->[0]'" unless $self->can($meth);
233 6         14 my @args = @$o; shift @args;
  6         6  
234 6         20 $add_if->($p->[1], $self->$meth($cd, \@args));
235             }
236              
237             # recurse array and hash
238 9 100       20 if ($opts->{'!recurse_obj'}) {
239 2         4 $add_stmt->('stmt', 'my $reftype=Scalar::Util::reftype({{var}})//""');
240 2         5 $add_new_if->('$reftype eq "ARRAY"', '$process_array->({{var}})');
241 2         3 $add_if->('$reftype eq "HASH"' , '$process_hash->({{var}})');
242             } else {
243 7         14 $add_new_if_ref->("ARRAY", '$process_array->({{var}})');
244 7         11 $add_if_ref->("HASH" , '$process_hash->({{var}})');
245             }
246              
247             # lastly, catch any reference left
248 9         16 for my $p ([-ref => '$ref']) {
249 9         14 my $o = $opts->{$p->[0]};
250 9 50       19 next unless $o;
251 0         0 my $meth = "command_$o->[0]";
252 0 0       0 die "Can't handle command $o->[0] for option '$p->[0]'" unless $self->can($meth);
253 0         0 my @args = @$o; shift @args;
  0         0  
254 0         0 $add_if->($p->[1], $self->$meth($cd, \@args));
255             }
256              
257 9         15 push @code, 'sub {'."\n";
258              
259 9         12 for (sort keys %{$cd->{subs}}) {
  9         24  
260 1         13 push @code, "state \$sub_$_ = sub { ".$cd->{subs}{$_}." };\n";
261             }
262              
263 9         14 push @code, 'my $data = shift;'."\n";
264 9 100       19 push @code, 'state %refs;'."\n" if $circ;
265 9 100       14 push @code, 'state $ctr_circ;'."\n" if $circ;
266 9         14 push @code, 'state $process_array;'."\n";
267 9         9 push @code, 'state $process_hash;'."\n";
268 9         46 push @code, (
269             'if (!$process_array) { $process_array = sub { my $a = shift; for my $e (@$a) { ',
270             'my $ref=ref($e);'."\n",
271             join("", @stmts_ary).'} } }'."\n"
272             );
273 9         27 push @code, (
274             'if (!$process_hash) { $process_hash = sub { my $h = shift; for my $k (keys %$h) { ',
275             'my $ref=ref($h->{$k});'."\n",
276             join("", @stmts_hash).'} } }'."\n"
277             );
278 9 100       18 push @code, '%refs = (); $ctr_circ=0;'."\n" if $circ;
279 9         21 push @code, (
280             'for ($data) { ',
281             'my $ref=ref($_);'."\n",
282             join("", @stmts_main).'}'."\n"
283             );
284 9         14 push @code, '$data'."\n";
285 9         10 push @code, '}'."\n";
286              
287 9         45 my $code = join("", @code).";";
288              
289 9 50 33     25 if ($ENV{LOG_CLEANSER_CODE} && log_is_trace()) {
290 0         0 require String::LineNumber;
291             log_trace("Cleanser code:\n%s",
292 0 0 0     0 $ENV{LINENUM} // 1 ?
293             String::LineNumber::linenum($code) : $code);
294             }
295              
296 9         15 $cd->{src} = $code;
297              
298 9         110 $cd;
299             }
300              
301             sub clean_in_place {
302 11     11 1 1143 my ($self, $data) = @_;
303              
304 11         207 $self->{_code}->($data);
305             }
306              
307             sub clone_and_clean {
308 1     1   2096 no strict 'refs';
  1         2  
  1         86  
309              
310 4     4 1 23 my ($self, $data) = @_;
311 4         5 my $clone = &{$self->{_cd}{clone_func}}($data);
  4         19  
312 4         395 $self->clean_in_place($clone);
313             }
314              
315             1;
316             # ABSTRACT: Clean data structure
317              
318             __END__
319              
320             =pod
321              
322             =encoding UTF-8
323              
324             =head1 NAME
325              
326             Data::Clean - Clean data structure
327              
328             =head1 VERSION
329              
330             This document describes version 0.506 of Data::Clean (from Perl distribution Data-Clean), released on 2019-11-18.
331              
332             =head1 SYNOPSIS
333              
334             use Data::Clean;
335              
336             my $cleanser = Data::Clean->new(
337             # specify how to deal with specific classes
338             'DateTime' => [call_method => 'epoch'], # replace object with its epoch
339             'Time::Moment' => [call_method => 'epoch'], # replace object with its epoch
340             'Regexp' => ['stringify'], # replace $obj with "$obj"
341              
342             # specify how to deal with all scalar refs
343             SCALAR => ['deref_scalar'], # replace \1 with 1
344              
345             # specify how to deal with circular reference
346             -circular => ['clone'],
347              
348             # specify how to deal with all other kinds of objects
349             -obj => ['unbless'],
350             );
351              
352             # to get cleansed data
353             my $cleansed_data = $cleanser->clone_and_clean($data);
354              
355             # to replace original data with cleansed one
356             $cleanser->clean_in_place($data);
357              
358             =head1 DESCRIPTION
359              
360             This class can be used to process a data structure by replacing some forms of
361             data items with other forms. One of the main uses is to clean "unsafe" data,
362             e.g. clean a data structure so it can be encoded to JSON (see
363             L<Data::Clean::ForJSON>, which is a thin wrapper over this class).
364              
365             As can be seen from the example, you specify a list of transformations to be
366             done, and then this class will generate an appropriate Perl code to do the
367             cleansing. This class is faster than the other ways of processing, e.g.
368             L<Data::Rmap> (see L<Bencher::Scenarios::DataCleansing> for some benchmarks).
369              
370             =for Pod::Coverage ^(command_.+)$
371              
372             =head1 METHODS
373              
374             =head2 new(%opts) => $obj
375              
376             Create a new instance.
377              
378             Options specify what to do with certain category of data. Option keys are either
379             reference types (like C<HASH>, C<ARRAY>, C<SCALAR>) or class names (like
380             C<Foo::Bar>), or C<-obj> (to match all kinds of objects, a.k.a. blessed
381             references), C<-circular> (to match circular references), C<-ref> (to refer to
382             any kind of references, used to process references not handled by other
383             options). Option values are arrayrefs, the first element of the array is command
384             name, to specify what to do with the reference/class. The rest are command
385             arguments.
386              
387             Note that arrayrefs and hashrefs are always walked into, so it's not trapped by
388             C<-ref>.
389              
390             Default for C<%opts>: C<< -ref => 'stringify' >>.
391              
392             Option keys that start with C<!> are special:
393              
394             =over
395              
396             =item * !recurse_obj (bool)
397              
398             Can be set to true to to recurse into objects if they are hash- or array-based.
399             By default objects are not recursed into. Note that if you enable this option,
400             object options (like C<Foo::Bar> or C<-obj>) won't work for hash- and
401             array-based objects because they will be recursed instead.
402              
403             =item * !clone_func (str)
404              
405             Set fully qualified name of clone function to use. The default is to use
406             C<Clone::PP::clone>.
407              
408             The clone module (all but the last part of the C<!clone_func> value) will
409             automatically be loaded using C<require()>.
410              
411             =back
412              
413             Available commands:
414              
415             =over 4
416              
417             =item * ['stringify']
418              
419             This will stringify a reference like C<{}> to something like C<HASH(0x135f998)>.
420              
421             =item * ['replace_with_ref']
422              
423             This will replace a reference like C<{}> with C<HASH>.
424              
425             =item * ['replace_with_str', STR]
426              
427             This will replace a reference like C<{}> with I<STR>.
428              
429             =item * ['call_method' => STR]
430              
431             This will call a method named I<STR> and use its return as the replacement. For
432             example: C<< DateTime->from_epoch(epoch=>1000) >> when processed with C<<
433             [call_method => 'epoch'] >> will become 1000.
434              
435             =item * ['call_func', STR]
436              
437             This will call a function named I<STR> with value as argument and use its return
438             as the replacement.
439              
440             =item * ['one_or_zero']
441              
442             This will perform C<< $val ? 1:0 >>.
443              
444             =item * ['deref_scalar_one_or_zero']
445              
446             This will perform C<< ${$val} ? 1:0 >>.
447              
448             =item * ['deref_scalar']
449              
450             This will replace a scalar reference like \1 with 1.
451              
452             =item * ['unbless']
453              
454             This will perform unblessing using L<Function::Fallback::CoreOrPP::unbless()>.
455             Should be done only for objects (C<-obj>).
456              
457             =item * ['code', STR]
458              
459             This will replace with I<STR> treated as Perl code.
460              
461             =item * ['clone', INT]
462              
463             This command is useful if you have circular references and want to expand/copy
464             them. For example:
465              
466             my $def_opts = { opt1 => 'default', opt2 => 0 };
467             my $users = { alice => $def_opts, bob => $def_opts, charlie => $def_opts };
468              
469             C<$users> contains three references to the same data structure. With the default
470             behaviour of C<< -circular => [replace_with_str => 'CIRCULAR'] >> the cleaned
471             data structure will be:
472              
473             { alice => { opt1 => 'default', opt2 => 0 },
474             bob => 'CIRCULAR',
475             charlie => 'CIRCULAR' }
476              
477             But with C<< -circular => ['clone'] >> option, the data structure will be
478             cleaned to become (the C<$def_opts> is cloned):
479              
480             { alice => { opt1 => 'default', opt2 => 0 },
481             bob => { opt1 => 'default', opt2 => 0 },
482             charlie => { opt1 => 'default', opt2 => 0 }, }
483              
484             The command argument specifies the number of references to clone as a limit (the
485             default is 50), since a cyclical structure can lead to infinite cloning. Above
486             this limit, the circular references will be replaced with a string
487             C<"CIRCULAR">. For example:
488              
489             my $a = [1]; push @$a, $a;
490              
491             With C<< -circular => ['clone', 2] >> the data will be cleaned as:
492              
493             [1, [1, [1, "CIRCULAR"]]]
494              
495             With C<< -circular => ['clone', 3] >> the data will be cleaned as:
496              
497             [1, [1, [1, [1, "CIRCULAR"]]]]
498              
499             =back
500              
501             =head2 $obj->clean_in_place($data) => $cleaned
502              
503             Clean $data. Modify data in-place.
504              
505             =head2 $obj->clone_and_clean($data) => $cleaned
506              
507             Clean $data. Clone $data first.
508              
509             =head1 ENVIRONMENT
510              
511             =over
512              
513             =item * LOG_CLEANSER_CODE => BOOL (default: 0)
514              
515             Can be enabled if you want to see the generated cleanser code. It is logged at
516             level C<trace> using L<Log::ger>.
517              
518             =item * LINENUM => BOOL (default: 1)
519              
520             When logging cleanser code, whether to give line numbers.
521              
522             =back
523              
524             =head1 HOMEPAGE
525              
526             Please visit the project's homepage at L<https://metacpan.org/release/Data-Clean>.
527              
528             =head1 SOURCE
529              
530             Source repository is at L<https://github.com/perlancar/perl-Data-Clean>.
531              
532             =head1 BUGS
533              
534             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=Data-Clean>
535              
536             When submitting a bug or request, please include a test-file or a
537             patch to an existing test-file that illustrates the bug or desired
538             feature.
539              
540             =head1 SEE ALSO
541              
542             Related modules: L<Data::Rmap>, L<Hash::Sanitize>, L<Data::Walk>.
543              
544             =head1 AUTHOR
545              
546             perlancar <perlancar@cpan.org>
547              
548             =head1 COPYRIGHT AND LICENSE
549              
550             This software is copyright (c) 2019, 2018, 2017, 2016 by perlancar@cpan.org.
551              
552             This is free software; you can redistribute it and/or modify it under
553             the same terms as the Perl 5 programming language system itself.
554              
555             =cut