File Coverage

blib/lib/App/CSVUtils/csv_lookup_fields.pm
Criterion Covered Total %
statement 11 11 100.0
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 15 15 100.0


line stmt bran cond sub pod time code
1             package App::CSVUtils::csv_lookup_fields;
2              
3 1     1   8808 use 5.010001;
  1         4  
4 1     1   5 use strict;
  1         2  
  1         26  
5 1     1   7 use warnings;
  1         3  
  1         83  
6              
7             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
8             our $DATE = '2023-07-25'; # DATE
9             our $DIST = 'App-CSVUtils'; # DIST
10             our $VERSION = '1.030'; # VERSION
11              
12 1         1073 use App::CSVUtils qw(
13             gen_csv_util
14 1     1   6 );
  1         2  
15              
16             gen_csv_util(
17             name => 'csv_lookup_fields',
18             summary => 'Fill fields of a CSV file from another',
19             description => <<'_',
20              
21             Example input:
22              
23             # report.csv
24             client_id,followup_staff,followup_note,client_email,client_phone
25             101,Jerry,not renewing,
26             299,Jerry,still thinking over,
27             734,Elaine,renewing,
28              
29             # clients.csv
30             id,name,email,phone
31             101,Andy,andy@example.com,555-2983
32             102,Bob,bob@acme.example.com,555-2523
33             299,Cindy,cindy@example.com,555-7892
34             400,Derek,derek@example.com,555-9018
35             701,Edward,edward@example.com,555-5833
36             734,Felipe,felipe@example.com,555-9067
37              
38             To fill up the `client_email` and `client_phone` fields of `report.csv` from
39             `clients.csv`, we can use:
40              
41             % csv-lookup-fields report.csv clients.csv --lookup-fields client_id:id --fill-fields client_email:email,client_phone:phone
42              
43             The result will be:
44              
45             client_id,followup_staff,followup_note,client_email,client_phone
46             101,Jerry,not renewing,andy@example.com,555-2983
47             299,Jerry,still thinking over,cindy@example.com,555-7892
48             734,Elaine,renewing,felipe@example.com,555-9067
49              
50             _
51             add_args => {
52             ignore_case => {
53             schema => 'bool*',
54             cmdline_aliases => {ci=>{}, i=>{}},
55             },
56             fill_fields => {
57             schema => ['str*'],
58             req => 1,
59             },
60             lookup_fields => {
61             schema => ['str*'],
62             req => 1,
63             },
64             count => {
65             summary => 'Do not output rows, just report the number of rows filled',
66             schema => 'bool*',
67             cmdline_aliases => {c=>{}},
68             },
69             },
70              
71             reads_multiple_csv => 1,
72              
73             tags => ['category:templating'],
74              
75             on_begin => sub {
76             my $r = shift;
77              
78             # check arguments
79             @{ $r->{util_args}{input_filenames} } == 2
80             or die [400, "Please specify exactly 2 files: target and source"];
81              
82             my @lookup_fields; # elem = [fieldname-in-target, fieldname-in-source]
83             {
84             my @ff = ref($r->{util_args}{lookup_fields}) eq 'ARRAY' ?
85             @{$r->{util_args}{lookup_fields}} : split(/,/, $r->{util_args}{lookup_fields});
86             for my $field_idx (0..$#ff) {
87             my @ff2 = split /:/, $ff[$field_idx], 2;
88             if (@ff2 < 2) {
89             $ff2[1] = $ff2[0];
90             }
91             $lookup_fields[$field_idx] = \@ff2;
92             }
93             }
94              
95             my %fill_fields; # key=fieldname-in-target, val=fieldname-in-source
96             {
97             my @ff = ref($r->{util_args}{fill_fields}) eq 'ARRAY' ?
98             @{$r->{util_args}{fill_fields}} : split(/,/, $r->{util_args}{fill_fields});
99             for my $field_idx (0..$#ff) {
100             my @ff2 = split /:/, $ff[$field_idx], 2;
101             if (@ff2 < 2) {
102             $ff2[1] = $ff2[0];
103             }
104             $fill_fields{ $ff2[0] } = $ff2[1];
105             }
106             }
107              
108             # these are the keys that we add to the stash
109             $r->{lookup_fields} = \@lookup_fields;
110             $r->{fill_fields} = \%fill_fields;
111             $r->{source_fields_idx} = [];
112             $r->{source_fields} = [];
113             $r->{source_data_rows} = [];
114             $r->{target_fields_idx} = [];
115             $r->{target_fields} = [];
116             $r->{target_data_rows} = [];
117             },
118              
119             on_input_header_row => sub {
120             my $r = shift;
121              
122             if ($r->{input_filenum} == 1) {
123             $r->{target_fields} = $r->{input_fields};
124             $r->{target_fields_idx} = $r->{input_fields_idx};
125             $r->{output_fields} = $r->{input_fields};
126             } else {
127             $r->{source_fields} = $r->{input_fields};
128             $r->{source_fields_idx} = $r->{input_fields_idx};
129             }
130             },
131              
132             on_input_data_row => sub {
133             my $r = shift;
134              
135             if ($r->{input_filenum} == 1) {
136             push @{ $r->{target_data_rows} }, $r->{input_row};
137             } else {
138             push @{ $r->{source_data_rows} }, $r->{input_row};
139             }
140             },
141              
142             after_close_input_files => sub {
143             my $r = shift;
144              
145             my $ci = $r->{util_args}{ignore_case};
146              
147             # build lookup table
148             my %lookup_table; # key = joined lookup fields, val = source row idx
149             for my $row_idx (0..$#{$r->{source_data_rows}}) {
150             my $row = $r->{source_data_rows}[$row_idx];
151             my $key = join "|", map {
152             my $field = $r->{lookup_fields}[$_][1];
153             my $field_idx = $r->{source_fields_idx}{$field};
154             my $val = defined $field_idx ? $row->[$field_idx] : "";
155             $val = lc $val if $ci;
156             $val;
157             } 0..$#{ $r->{lookup_fields} };
158             $lookup_table{$key} //= $row_idx;
159             }
160             #use DD; dd { lookup_fields=>$r->{lookup_fields}, fill_fields=>$r->{fill_fields}, lookup_table=>\%lookup_table };
161              
162             # fill target csv
163             my $num_filled = 0;
164              
165             for my $row (@{ $r->{target_data_rows} }) {
166             my $key = join "|", map {
167             my $field = $r->{lookup_fields}[$_][0];
168             my $field_idx = $r->{target_fields_idx}{$field};
169             my $val = defined $field_idx ? $row->[$field_idx] : "";
170             $val = lc $val if $ci;
171             $val;
172             } 0..$#{ $r->{lookup_fields} };
173              
174             #say "D:looking up '$key' ...";
175             if (defined(my $row_idx = $lookup_table{$key})) {
176             #say " D:found";
177             my $row_filled;
178             my $source_row = $r->{source_data_rows}[$row_idx];
179             for my $field (keys %{$r->{fill_fields}}) {
180             my $target_field_idx = $r->{target_fields_idx}{$field};
181             next unless defined $target_field_idx;
182             my $source_field_idx = $r->{source_fields_idx}{ $r->{fill_fields}{$field} };
183             next unless defined $source_field_idx;
184             $row->[$target_field_idx] =
185             $source_row->[$source_field_idx];
186             $row_filled++;
187             }
188             $num_filled++ if $row_filled;
189             }
190             unless ($r->{util_args}{count}) {
191             $r->{code_print_row}->($row);
192             }
193             } # for target data row
194              
195             if ($r->{util_args}{count}) {
196             $r->{result} = [200, "OK", $num_filled];
197             }
198             },
199             );
200              
201             1;
202             # ABSTRACT: Fill fields of a CSV file from another
203              
204             __END__
205              
206             =pod
207              
208             =encoding UTF-8
209              
210             =head1 NAME
211              
212             App::CSVUtils::csv_lookup_fields - Fill fields of a CSV file from another
213              
214             =head1 VERSION
215              
216             This document describes version 1.030 of App::CSVUtils::csv_lookup_fields (from Perl distribution App-CSVUtils), released on 2023-07-25.
217              
218             =head1 FUNCTIONS
219              
220              
221             =head2 csv_lookup_fields
222              
223             Usage:
224              
225             csv_lookup_fields(%args) -> [$status_code, $reason, $payload, \%result_meta]
226              
227             Fill fields of a CSV file from another.
228              
229             Example input:
230              
231             # report.csv
232             client_id,followup_staff,followup_note,client_email,client_phone
233             101,Jerry,not renewing,
234             299,Jerry,still thinking over,
235             734,Elaine,renewing,
236            
237             # clients.csv
238             id,name,email,phone
239             101,Andy,andy@example.com,555-2983
240             102,Bob,bob@acme.example.com,555-2523
241             299,Cindy,cindy@example.com,555-7892
242             400,Derek,derek@example.com,555-9018
243             701,Edward,edward@example.com,555-5833
244             734,Felipe,felipe@example.com,555-9067
245              
246             To fill up the C<client_email> and C<client_phone> fields of C<report.csv> from
247             C<clients.csv>, we can use:
248              
249             % csv-lookup-fields report.csv clients.csv --lookup-fields client_id:id --fill-fields client_email:email,client_phone:phone
250              
251             The result will be:
252              
253             client_id,followup_staff,followup_note,client_email,client_phone
254             101,Jerry,not renewing,andy@example.com,555-2983
255             299,Jerry,still thinking over,cindy@example.com,555-7892
256             734,Elaine,renewing,felipe@example.com,555-9067
257              
258             This function is not exported.
259              
260             Arguments ('*' denotes required arguments):
261              
262             =over 4
263              
264             =item * B<count> => I<bool>
265              
266             Do not output rows, just report the number of rows filled.
267              
268             =item * B<fill_fields>* => I<str>
269              
270             (No description)
271              
272             =item * B<ignore_case> => I<bool>
273              
274             (No description)
275              
276             =item * B<inplace> => I<true>
277              
278             Output to the same file as input.
279              
280             Normally, you output to a different file than input. If you try to output to the
281             same file (C<-o INPUT.csv -O>) you will clobber the input file; thus the utility
282             prevents you from doing it. However, with this C<--inplace> option, you can
283             output to the same file. Like perl's C<-i> option, this will first output to a
284             temporary file in the same directory as the input file then rename to the final
285             file at the end. You cannot specify output file (C<-o>) when using this option,
286             but you can specify backup extension with C<-b> option.
287              
288             Some caveats:
289              
290             =over
291              
292             =item * if input file is a symbolic link, it will be replaced with a regular file;
293              
294             =item * renaming (implemented using C<rename()>) can fail if input filename is too long;
295              
296             =item * value specified in C<-b> is currently not checked for acceptable characters;
297              
298             =item * things can also fail if permissions are restrictive;
299              
300             =back
301              
302             =item * B<inplace_backup_ext> => I<str> (default: "")
303              
304             Extension to add for backup of input file.
305              
306             In inplace mode (C<--inplace>), if this option is set to a non-empty string, will
307             rename the input file using this extension as a backup. The old existing backup
308             will be overwritten, if any.
309              
310             =item * B<input_escape_char> => I<str>
311              
312             Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS.
313              
314             Defaults to C<\\> (backslash). Overrides C<--input-tsv> option.
315              
316             =item * B<input_filenames> => I<array[filename]> (default: ["-"])
317              
318             Input CSV files.
319              
320             Use C<-> to read from stdin.
321              
322             Encoding of input file is assumed to be UTF-8.
323              
324             =item * B<input_header> => I<bool> (default: 1)
325              
326             Specify whether input CSV has a header row.
327              
328             By default, the first row of the input CSV will be assumed to contain field
329             names (and the second row contains the first data row). When you declare that
330             input CSV does not have header row (C<--no-input-header>), the first row of the
331             CSV is assumed to contain the first data row. Fields will be named C<field1>,
332             C<field2>, and so on.
333              
334             =item * B<input_quote_char> => I<str>
335              
336             Specify field quote character in input CSV, will be passed to Text::CSV_XS.
337              
338             Defaults to C<"> (double quote). Overrides C<--input-tsv> option.
339              
340             =item * B<input_sep_char> => I<str>
341              
342             Specify field separator character in input CSV, will be passed to Text::CSV_XS.
343              
344             Defaults to C<,> (comma). Overrides C<--input-tsv> option.
345              
346             =item * B<input_tsv> => I<true>
347              
348             Inform that input file is in TSV (tab-separated) format instead of CSV.
349              
350             Overriden by C<--input-sep-char>, C<--input-quote-char>, C<--input-escape-char>
351             options. If one of those options is specified, then C<--input-tsv> will be
352             ignored.
353              
354             =item * B<lookup_fields>* => I<str>
355              
356             (No description)
357              
358             =item * B<output_always_quote> => I<bool> (default: 0)
359              
360             Whether to always quote values.
361              
362             When set to false (the default), values are quoted only when necessary:
363              
364             field1,field2,"field three contains comma (,)",field4
365              
366             When set to true, then all values will be quoted:
367              
368             "field1","field2","field three contains comma (,)","field4"
369              
370             =item * B<output_escape_char> => I<str>
371              
372             Specify character to escape value in field in output CSV, will be passed to Text::CSV_XS.
373              
374             This is like C<--input-escape-char> option but for output instead of input.
375              
376             Defaults to C<\\> (backslash). Overrides C<--output-tsv> option.
377              
378             =item * B<output_filename> => I<filename>
379              
380             Output filename.
381              
382             Use C<-> to output to stdout (the default if you don't specify this option).
383              
384             Encoding of output file is assumed to be UTF-8.
385              
386             =item * B<output_header> => I<bool>
387              
388             Whether output CSV should have a header row.
389              
390             By default, a header row will be output I<if> input CSV has header row. Under
391             C<--output-header>, a header row will be output even if input CSV does not have
392             header row (value will be something like "col0,col1,..."). Under
393             C<--no-output-header>, header row will I<not> be printed even if input CSV has
394             header row. So this option can be used to unconditionally add or remove header
395             row.
396              
397             =item * B<output_quote_char> => I<str>
398              
399             Specify field quote character in output CSV, will be passed to Text::CSV_XS.
400              
401             This is like C<--input-quote-char> option but for output instead of input.
402              
403             Defaults to C<"> (double quote). Overrides C<--output-tsv> option.
404              
405             =item * B<output_quote_empty> => I<bool> (default: 0)
406              
407             Whether to quote empty values.
408              
409             When set to false (the default), empty values are not quoted:
410              
411             field1,field2,,field4
412              
413             When set to true, then empty values will be quoted:
414              
415             field1,field2,"",field4
416              
417             =item * B<output_sep_char> => I<str>
418              
419             Specify field separator character in output CSV, will be passed to Text::CSV_XS.
420              
421             This is like C<--input-sep-char> option but for output instead of input.
422              
423             Defaults to C<,> (comma). Overrides C<--output-tsv> option.
424              
425             =item * B<output_tsv> => I<bool>
426              
427             Inform that output file is TSV (tab-separated) format instead of CSV.
428              
429             This is like C<--input-tsv> option but for output instead of input.
430              
431             Overriden by C<--output-sep-char>, C<--output-quote-char>, C<--output-escape-char>
432             options. If one of those options is specified, then C<--output-tsv> will be
433             ignored.
434              
435             =item * B<overwrite> => I<bool>
436              
437             Whether to override existing output file.
438              
439              
440             =back
441              
442             Returns an enveloped result (an array).
443              
444             First element ($status_code) is an integer containing HTTP-like status code
445             (200 means OK, 4xx caller error, 5xx function error). Second element
446             ($reason) is a string containing error message, or something like "OK" if status is
447             200. Third element ($payload) is the actual result, but usually not present when enveloped result is an error response ($status_code is not 2xx). Fourth
448             element (%result_meta) is called result metadata and is optional, a hash
449             that contains extra information, much like how HTTP response headers provide additional metadata.
450              
451             Return value: (any)
452              
453             =head1 HOMEPAGE
454              
455             Please visit the project's homepage at L<https://metacpan.org/release/App-CSVUtils>.
456              
457             =head1 SOURCE
458              
459             Source repository is at L<https://github.com/perlancar/perl-App-CSVUtils>.
460              
461             =head1 AUTHOR
462              
463             perlancar <perlancar@cpan.org>
464              
465             =head1 CONTRIBUTING
466              
467              
468             To contribute, you can send patches by email/via RT, or send pull requests on
469             GitHub.
470              
471             Most of the time, you don't need to build the distribution yourself. You can
472             simply modify the code, then test via:
473              
474             % prove -l
475              
476             If you want to build the distribution (e.g. to try to install it locally on your
477             system), you can install L<Dist::Zilla>,
478             L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
479             L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
480             Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
481             that are considered a bug and can be reported to me.
482              
483             =head1 COPYRIGHT AND LICENSE
484              
485             This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>.
486              
487             This is free software; you can redistribute it and/or modify it under
488             the same terms as the Perl 5 programming language system itself.
489              
490             =head1 BUGS
491              
492             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-CSVUtils>
493              
494             When submitting a bug or request, please include a test-file or a
495             patch to an existing test-file that illustrates the bug or desired
496             feature.
497              
498             =cut