File Coverage

blib/lib/App/CSVUtils/csv_lookup_fields.pm

Criterion	Covered	Total	%
statement	11	11	100.0
branch			n/a
condition			n/a
subroutine	4	4	100.0
pod			n/a
total	15	15	100.0

line	stmt	sub	time	code
1				package App::CSVUtils::csv_lookup_fields;
2
3	1	1	8808	use 5.010001;
	1		4
4	1	1	5	use strict;
	1		2
	1		26
5	1	1	7	use warnings;
	1		3
	1		83
6
7				our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
8				our $DATE = '2023-07-25'; # DATE
9				our $DIST = 'App-CSVUtils'; # DIST
10				our $VERSION = '1.030'; # VERSION
11
12	1		1073	use App::CSVUtils qw(
13				gen_csv_util
14	1	1	6	);
	1		2
15
16				gen_csv_util(
17				name => 'csv_lookup_fields',
18				summary => 'Fill fields of a CSV file from another',
19				description => <<'_',
20
21				Example input:
22
23				# report.csv
24				client_id,followup_staff,followup_note,client_email,client_phone
25				101,Jerry,not renewing,
26				299,Jerry,still thinking over,
27				734,Elaine,renewing,
28
29				# clients.csv
30				id,name,email,phone
31				101,Andy,andy@example.com,555-2983
32				102,Bob,bob@acme.example.com,555-2523
33				299,Cindy,cindy@example.com,555-7892
34				400,Derek,derek@example.com,555-9018
35				701,Edward,edward@example.com,555-5833
36				734,Felipe,felipe@example.com,555-9067
37
38				To fill up the `client_email` and `client_phone` fields of `report.csv` from
39				`clients.csv`, we can use:
40
41				% csv-lookup-fields report.csv clients.csv --lookup-fields client_id:id --fill-fields client_email:email,client_phone:phone
42
43				The result will be:
44
45				client_id,followup_staff,followup_note,client_email,client_phone
46				101,Jerry,not renewing,andy@example.com,555-2983
47				299,Jerry,still thinking over,cindy@example.com,555-7892
48				734,Elaine,renewing,felipe@example.com,555-9067
49
50				_
51				add_args => {
52				ignore_case => {
53				schema => 'bool*',
54				cmdline_aliases => {ci=>{}, i=>{}},
55				},
56				fill_fields => {
57				schema => ['str*'],
58				req => 1,
59				},
60				lookup_fields => {
61				schema => ['str*'],
62				req => 1,
63				},
64				count => {
65				summary => 'Do not output rows, just report the number of rows filled',
66				schema => 'bool*',
67				cmdline_aliases => {c=>{}},
68				},
69				},
70
71				reads_multiple_csv => 1,
72
73				tags => ['category:templating'],
74
75				on_begin => sub {
76				my $r = shift;
77
78				# check arguments
79				@{ $r->{util_args}{input_filenames} } == 2
80				or die [400, "Please specify exactly 2 files: target and source"];
81
82				my @lookup_fields; # elem = [fieldname-in-target, fieldname-in-source]
83				{
84				my @ff = ref($r->{util_args}{lookup_fields}) eq 'ARRAY' ?
85				@{$r->{util_args}{lookup_fields}} : split(/,/, $r->{util_args}{lookup_fields});
86				for my $field_idx (0..$#ff) {
87				my @ff2 = split /:/, $ff[$field_idx], 2;
88				if (@ff2 < 2) {
89				$ff2[1] = $ff2[0];
90				}
91				$lookup_fields[$field_idx] = \@ff2;
92				}
93				}
94
95				my %fill_fields; # key=fieldname-in-target, val=fieldname-in-source
96				{
97				my @ff = ref($r->{util_args}{fill_fields}) eq 'ARRAY' ?
98				@{$r->{util_args}{fill_fields}} : split(/,/, $r->{util_args}{fill_fields});
99				for my $field_idx (0..$#ff) {
100				my @ff2 = split /:/, $ff[$field_idx], 2;
101				if (@ff2 < 2) {
102				$ff2[1] = $ff2[0];
103				}
104				$fill_fields{ $ff2[0] } = $ff2[1];
105				}
106				}
107
108				# these are the keys that we add to the stash
109				$r->{lookup_fields} = \@lookup_fields;
110				$r->{fill_fields} = \%fill_fields;
111				$r->{source_fields_idx} = [];
112				$r->{source_fields} = [];
113				$r->{source_data_rows} = [];
114				$r->{target_fields_idx} = [];
115				$r->{target_fields} = [];
116				$r->{target_data_rows} = [];
117				},
118
119				on_input_header_row => sub {
120				my $r = shift;
121
122				if ($r->{input_filenum} == 1) {
123				$r->{target_fields} = $r->{input_fields};
124				$r->{target_fields_idx} = $r->{input_fields_idx};
125				$r->{output_fields} = $r->{input_fields};
126				} else {
127				$r->{source_fields} = $r->{input_fields};
128				$r->{source_fields_idx} = $r->{input_fields_idx};
129				}
130				},
131
132				on_input_data_row => sub {
133				my $r = shift;
134
135				if ($r->{input_filenum} == 1) {
136				push @{ $r->{target_data_rows} }, $r->{input_row};
137				} else {
138				push @{ $r->{source_data_rows} }, $r->{input_row};
139				}
140				},
141
142				after_close_input_files => sub {
143				my $r = shift;
144
145				my $ci = $r->{util_args}{ignore_case};
146
147				# build lookup table
148				my %lookup_table; # key = joined lookup fields, val = source row idx
149				for my $row_idx (0..$#{$r->{source_data_rows}}) {
150				my $row = $r->{source_data_rows}[$row_idx];
151				my $key = join "\|", map {
152				my $field = $r->{lookup_fields}[$_][1];
153				my $field_idx = $r->{source_fields_idx}{$field};
154				my $val = defined $field_idx ? $row->[$field_idx] : "";
155				$val = lc $val if $ci;
156				$val;
157				} 0..$#{ $r->{lookup_fields} };
158				$lookup_table{$key} //= $row_idx;
159				}
160				#use DD; dd { lookup_fields=>$r->{lookup_fields}, fill_fields=>$r->{fill_fields}, lookup_table=>\%lookup_table };
161
162				# fill target csv
163				my $num_filled = 0;
164
165				for my $row (@{ $r->{target_data_rows} }) {
166				my $key = join "\|", map {
167				my $field = $r->{lookup_fields}[$_][0];
168				my $field_idx = $r->{target_fields_idx}{$field};
169				my $val = defined $field_idx ? $row->[$field_idx] : "";
170				$val = lc $val if $ci;
171				$val;
172				} 0..$#{ $r->{lookup_fields} };
173
174				#say "D:looking up '$key' ...";
175				if (defined(my $row_idx = $lookup_table{$key})) {
176				#say " D:found";
177				my $row_filled;
178				my $source_row = $r->{source_data_rows}[$row_idx];
179				for my $field (keys %{$r->{fill_fields}}) {
180				my $target_field_idx = $r->{target_fields_idx}{$field};
181				next unless defined $target_field_idx;
182				my $source_field_idx = $r->{source_fields_idx}{ $r->{fill_fields}{$field} };
183				next unless defined $source_field_idx;
184				$row->[$target_field_idx] =
185				$source_row->[$source_field_idx];
186				$row_filled++;
187				}
188				$num_filled++ if $row_filled;
189				}
190				unless ($r->{util_args}{count}) {
191				$r->{code_print_row}->($row);
192				}
193				} # for target data row
194
195				if ($r->{util_args}{count}) {
196				$r->{result} = [200, "OK", $num_filled];
197				}
198				},
199				);
200
201				1;
202				# ABSTRACT: Fill fields of a CSV file from another
203
204				__END__
205
206				=pod
207
208				=encoding UTF-8
209
210				=head1 NAME
211
212				App::CSVUtils::csv_lookup_fields - Fill fields of a CSV file from another
213
214				=head1 VERSION
215
216				This document describes version 1.030 of App::CSVUtils::csv_lookup_fields (from Perl distribution App-CSVUtils), released on 2023-07-25.
217
218				=head1 FUNCTIONS
219
220
221				=head2 csv_lookup_fields
222
223				Usage:
224
225				csv_lookup_fields(%args) -> [$status_code, $reason, $payload, \%result_meta]
226
227				Fill fields of a CSV file from another.
228
229				Example input:
230
231				# report.csv
232				client_id,followup_staff,followup_note,client_email,client_phone
233				101,Jerry,not renewing,
234				299,Jerry,still thinking over,
235				734,Elaine,renewing,
236
237				# clients.csv
238				id,name,email,phone
239				101,Andy,andy@example.com,555-2983
240				102,Bob,bob@acme.example.com,555-2523
241				299,Cindy,cindy@example.com,555-7892
242				400,Derek,derek@example.com,555-9018
243				701,Edward,edward@example.com,555-5833
244				734,Felipe,felipe@example.com,555-9067
245
246				To fill up the C<client_email> and C<client_phone> fields of C<report.csv> from
247				C<clients.csv>, we can use:
248
249				% csv-lookup-fields report.csv clients.csv --lookup-fields client_id:id --fill-fields client_email:email,client_phone:phone
250
251				The result will be:
252
253				client_id,followup_staff,followup_note,client_email,client_phone
254				101,Jerry,not renewing,andy@example.com,555-2983
255				299,Jerry,still thinking over,cindy@example.com,555-7892
256				734,Elaine,renewing,felipe@example.com,555-9067
257
258				This function is not exported.
259
260				Arguments ('*' denotes required arguments):
261
262				=over 4
263
264				=item * B<count> => I<bool>
265
266				Do not output rows, just report the number of rows filled.
267
268				=item * B<fill_fields>* => I<str>
269
270				(No description)
271
272				=item * B<ignore_case> => I<bool>
273
274				(No description)
275
276				=item * B<inplace> => I<true>
277
278				Output to the same file as input.
279
280				Normally, you output to a different file than input. If you try to output to the
281				same file (C<-o INPUT.csv -O>) you will clobber the input file; thus the utility
282				prevents you from doing it. However, with this C<--inplace> option, you can
283				output to the same file. Like perl's C<-i> option, this will first output to a
284				temporary file in the same directory as the input file then rename to the final
285				file at the end. You cannot specify output file (C<-o>) when using this option,
286				but you can specify backup extension with C<-b> option.
287
288				Some caveats:
289
290				=over
291
292				=item * if input file is a symbolic link, it will be replaced with a regular file;
293
294				=item * renaming (implemented using C<rename()>) can fail if input filename is too long;
295
296				=item * value specified in C<-b> is currently not checked for acceptable characters;
297
298				=item * things can also fail if permissions are restrictive;
299
300				=back
301
302				=item * B<inplace_backup_ext> => I<str> (default: "")
303
304				Extension to add for backup of input file.
305
306				In inplace mode (C<--inplace>), if this option is set to a non-empty string, will
307				rename the input file using this extension as a backup. The old existing backup
308				will be overwritten, if any.
309
310				=item * B<input_escape_char> => I<str>
311
312				Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS.
313
314				Defaults to C<\\> (backslash). Overrides C<--input-tsv> option.
315
316				=item * B<input_filenames> => I<array[filename]> (default: ["-"])
317
318				Input CSV files.
319
320				Use C<-> to read from stdin.
321
322				Encoding of input file is assumed to be UTF-8.
323
324				=item * B<input_header> => I<bool> (default: 1)
325
326				Specify whether input CSV has a header row.
327
328				By default, the first row of the input CSV will be assumed to contain field
329				names (and the second row contains the first data row). When you declare that
330				input CSV does not have header row (C<--no-input-header>), the first row of the
331				CSV is assumed to contain the first data row. Fields will be named C<field1>,
332				C<field2>, and so on.
333
334				=item * B<input_quote_char> => I<str>
335
336				Specify field quote character in input CSV, will be passed to Text::CSV_XS.
337
338				Defaults to C<"> (double quote). Overrides C<--input-tsv> option.
339
340				=item * B<input_sep_char> => I<str>
341
342				Specify field separator character in input CSV, will be passed to Text::CSV_XS.
343
344				Defaults to C<,> (comma). Overrides C<--input-tsv> option.
345
346				=item * B<input_tsv> => I<true>
347
348				Inform that input file is in TSV (tab-separated) format instead of CSV.
349
350				Overriden by C<--input-sep-char>, C<--input-quote-char>, C<--input-escape-char>
351				options. If one of those options is specified, then C<--input-tsv> will be
352				ignored.
353
354				=item * B<lookup_fields>* => I<str>
355
356				(No description)
357
358				=item * B<output_always_quote> => I<bool> (default: 0)
359
360				Whether to always quote values.
361
362				When set to false (the default), values are quoted only when necessary:
363
364				field1,field2,"field three contains comma (,)",field4
365
366				When set to true, then all values will be quoted:
367
368				"field1","field2","field three contains comma (,)","field4"
369
370				=item * B<output_escape_char> => I<str>
371
372				Specify character to escape value in field in output CSV, will be passed to Text::CSV_XS.
373
374				This is like C<--input-escape-char> option but for output instead of input.
375
376				Defaults to C<\\> (backslash). Overrides C<--output-tsv> option.
377
378				=item * B<output_filename> => I<filename>
379
380				Output filename.
381
382				Use C<-> to output to stdout (the default if you don't specify this option).
383
384				Encoding of output file is assumed to be UTF-8.
385
386				=item * B<output_header> => I<bool>
387
388				Whether output CSV should have a header row.
389
390				By default, a header row will be output I<if> input CSV has header row. Under
391				C<--output-header>, a header row will be output even if input CSV does not have
392				header row (value will be something like "col0,col1,..."). Under
393				C<--no-output-header>, header row will I<not> be printed even if input CSV has
394				header row. So this option can be used to unconditionally add or remove header
395				row.
396
397				=item * B<output_quote_char> => I<str>
398
399				Specify field quote character in output CSV, will be passed to Text::CSV_XS.
400
401				This is like C<--input-quote-char> option but for output instead of input.
402
403				Defaults to C<"> (double quote). Overrides C<--output-tsv> option.
404
405				=item * B<output_quote_empty> => I<bool> (default: 0)
406
407				Whether to quote empty values.
408
409				When set to false (the default), empty values are not quoted:
410
411				field1,field2,,field4
412
413				When set to true, then empty values will be quoted:
414
415				field1,field2,"",field4
416
417				=item * B<output_sep_char> => I<str>
418
419				Specify field separator character in output CSV, will be passed to Text::CSV_XS.
420
421				This is like C<--input-sep-char> option but for output instead of input.
422
423				Defaults to C<,> (comma). Overrides C<--output-tsv> option.
424
425				=item * B<output_tsv> => I<bool>
426
427				Inform that output file is TSV (tab-separated) format instead of CSV.
428
429				This is like C<--input-tsv> option but for output instead of input.
430
431				Overriden by C<--output-sep-char>, C<--output-quote-char>, C<--output-escape-char>
432				options. If one of those options is specified, then C<--output-tsv> will be
433				ignored.
434
435				=item * B<overwrite> => I<bool>
436
437				Whether to override existing output file.
438
439
440				=back
441
442				Returns an enveloped result (an array).
443
444				First element ($status_code) is an integer containing HTTP-like status code
445				(200 means OK, 4xx caller error, 5xx function error). Second element
446				($reason) is a string containing error message, or something like "OK" if status is
447				200. Third element ($payload) is the actual result, but usually not present when enveloped result is an error response ($status_code is not 2xx). Fourth
448				element (%result_meta) is called result metadata and is optional, a hash
449				that contains extra information, much like how HTTP response headers provide additional metadata.
450
451				Return value: (any)
452
453				=head1 HOMEPAGE
454
455				Please visit the project's homepage at L<https://metacpan.org/release/App-CSVUtils>.
456
457				=head1 SOURCE
458
459				Source repository is at L<https://github.com/perlancar/perl-App-CSVUtils>.
460
461				=head1 AUTHOR
462
463				perlancar <perlancar@cpan.org>
464
465				=head1 CONTRIBUTING
466
467
468				To contribute, you can send patches by email/via RT, or send pull requests on
469				GitHub.
470
471				Most of the time, you don't need to build the distribution yourself. You can
472				simply modify the code, then test via:
473
474				% prove -l
475
476				If you want to build the distribution (e.g. to try to install it locally on your
477				system), you can install L<Dist::Zilla>,
478				L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
479				L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
480				Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
481				that are considered a bug and can be reported to me.
482
483				=head1 COPYRIGHT AND LICENSE
484
485				This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>.
486
487				This is free software; you can redistribute it and/or modify it under
488				the same terms as the Perl 5 programming language system itself.
489
490				=head1 BUGS
491
492				Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-CSVUtils>
493
494				When submitting a bug or request, please include a test-file or a
495				patch to an existing test-file that illustrates the bug or desired
496				feature.
497
498				=cut