File Coverage

blib/lib/App/CSVUtils/csv_replace_newline.pm
Criterion Covered Total %
statement 11 11 100.0
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 15 15 100.0


line stmt bran cond sub pod time code
1             package App::CSVUtils::csv_replace_newline;
2              
3 1     1   4639 use 5.010001;
  1         4  
4 1     1   5 use strict;
  1         2  
  1         21  
5 1     1   4 use warnings;
  1         2  
  1         69  
6              
7             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
8             our $DATE = '2023-07-25'; # DATE
9             our $DIST = 'App-CSVUtils'; # DIST
10             our $VERSION = '1.030'; # VERSION
11              
12 1         476 use App::CSVUtils qw(
13             gen_csv_util
14 1     1   6 );
  1         2  
15              
16             gen_csv_util(
17             name => 'csv_replace_newline',
18             summary => 'Replace newlines in CSV values',
19             description => <<'_',
20              
21             Some CSV parsers or applications cannot handle multiline CSV values. This
22             utility can be used to convert the newline to something else. There are a few
23             choices: replace newline with space (`--with-space`, the default), remove
24             newline (`--with-nothing`), replace with encoded representation
25             (`--with-backslash-n`), or with characters of your choice (`--with 'blah'`).
26              
27             _
28             add_args => {
29             with => {
30             schema => 'str*',
31             default => ' ',
32             cmdline_aliases => {
33             with_space => { is_flag=>1, code=>sub { $_[0]{with} = ' ' } },
34             with_nothing => { is_flag=>1, code=>sub { $_[0]{with} = '' } },
35             with_backslash_n => { is_flag=>1, code=>sub { $_[0]{with} = "\\n" } },
36             },
37             },
38             },
39             tags => ['category:munging', 'modifies-rows'],
40              
41             examples => [
42             {
43             summary => 'Replace newline in a CSV file to space',
44             argv => ['file.csv'],
45             test => 0,
46             'x.doc.show_result' => 0,
47             },
48             ],
49              
50             on_input_header_row => sub {
51             my $r = shift;
52              
53             $r->{output_fields} = [];
54             my $with = $r->{util_args}{with};
55             for my $j (0 .. $#{ $r->{input_fields} }) {
56             my $val = $r->{input_fields}[$j];
57             $val =~ s/[\015\012]+/$with/g;
58             push @{ $r->{output_fields} }, $val;
59             }
60             },
61              
62             on_input_data_row => sub {
63             my $r = shift;
64              
65             my $row = [];
66             my $with = $r->{util_args}{with};
67             for my $j (0 .. $#{ $r->{input_fields} }) {
68             my $val = $r->{input_row}[$j];
69             $val =~ s/[\015\012]+/$with/g;
70             push @$row, $val;
71             }
72              
73             $r->{code_print_row}->($row);
74             },
75             );
76              
77             1;
78             # ABSTRACT: Replace newlines in CSV values
79              
80             __END__
81              
82             =pod
83              
84             =encoding UTF-8
85              
86             =head1 NAME
87              
88             App::CSVUtils::csv_replace_newline - Replace newlines in CSV values
89              
90             =head1 VERSION
91              
92             This document describes version 1.030 of App::CSVUtils::csv_replace_newline (from Perl distribution App-CSVUtils), released on 2023-07-25.
93              
94             =head1 FUNCTIONS
95              
96              
97             =head2 csv_replace_newline
98              
99             Usage:
100              
101             csv_replace_newline(%args) -> [$status_code, $reason, $payload, \%result_meta]
102              
103             Replace newlines in CSV values.
104              
105             Examples:
106              
107             =over
108              
109             =item * Replace newline in a CSV file to space:
110              
111             csv_replace_newline(input_filename => "file.csv");
112              
113             =back
114              
115             Some CSV parsers or applications cannot handle multiline CSV values. This
116             utility can be used to convert the newline to something else. There are a few
117             choices: replace newline with space (C<--with-space>, the default), remove
118             newline (C<--with-nothing>), replace with encoded representation
119             (C<--with-backslash-n>), or with characters of your choice (C<--with 'blah'>).
120              
121             This function is not exported.
122              
123             Arguments ('*' denotes required arguments):
124              
125             =over 4
126              
127             =item * B<inplace> => I<true>
128              
129             Output to the same file as input.
130              
131             Normally, you output to a different file than input. If you try to output to the
132             same file (C<-o INPUT.csv -O>) you will clobber the input file; thus the utility
133             prevents you from doing it. However, with this C<--inplace> option, you can
134             output to the same file. Like perl's C<-i> option, this will first output to a
135             temporary file in the same directory as the input file then rename to the final
136             file at the end. You cannot specify output file (C<-o>) when using this option,
137             but you can specify backup extension with C<-b> option.
138              
139             Some caveats:
140              
141             =over
142              
143             =item * if input file is a symbolic link, it will be replaced with a regular file;
144              
145             =item * renaming (implemented using C<rename()>) can fail if input filename is too long;
146              
147             =item * value specified in C<-b> is currently not checked for acceptable characters;
148              
149             =item * things can also fail if permissions are restrictive;
150              
151             =back
152              
153             =item * B<inplace_backup_ext> => I<str> (default: "")
154              
155             Extension to add for backup of input file.
156              
157             In inplace mode (C<--inplace>), if this option is set to a non-empty string, will
158             rename the input file using this extension as a backup. The old existing backup
159             will be overwritten, if any.
160              
161             =item * B<input_escape_char> => I<str>
162              
163             Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS.
164              
165             Defaults to C<\\> (backslash). Overrides C<--input-tsv> option.
166              
167             =item * B<input_filename> => I<filename> (default: "-")
168              
169             Input CSV file.
170              
171             Use C<-> to read from stdin.
172              
173             Encoding of input file is assumed to be UTF-8.
174              
175             =item * B<input_header> => I<bool> (default: 1)
176              
177             Specify whether input CSV has a header row.
178              
179             By default, the first row of the input CSV will be assumed to contain field
180             names (and the second row contains the first data row). When you declare that
181             input CSV does not have header row (C<--no-input-header>), the first row of the
182             CSV is assumed to contain the first data row. Fields will be named C<field1>,
183             C<field2>, and so on.
184              
185             =item * B<input_quote_char> => I<str>
186              
187             Specify field quote character in input CSV, will be passed to Text::CSV_XS.
188              
189             Defaults to C<"> (double quote). Overrides C<--input-tsv> option.
190              
191             =item * B<input_sep_char> => I<str>
192              
193             Specify field separator character in input CSV, will be passed to Text::CSV_XS.
194              
195             Defaults to C<,> (comma). Overrides C<--input-tsv> option.
196              
197             =item * B<input_tsv> => I<true>
198              
199             Inform that input file is in TSV (tab-separated) format instead of CSV.
200              
201             Overriden by C<--input-sep-char>, C<--input-quote-char>, C<--input-escape-char>
202             options. If one of those options is specified, then C<--input-tsv> will be
203             ignored.
204              
205             =item * B<output_always_quote> => I<bool> (default: 0)
206              
207             Whether to always quote values.
208              
209             When set to false (the default), values are quoted only when necessary:
210              
211             field1,field2,"field three contains comma (,)",field4
212              
213             When set to true, then all values will be quoted:
214              
215             "field1","field2","field three contains comma (,)","field4"
216              
217             =item * B<output_escape_char> => I<str>
218              
219             Specify character to escape value in field in output CSV, will be passed to Text::CSV_XS.
220              
221             This is like C<--input-escape-char> option but for output instead of input.
222              
223             Defaults to C<\\> (backslash). Overrides C<--output-tsv> option.
224              
225             =item * B<output_filename> => I<filename>
226              
227             Output filename.
228              
229             Use C<-> to output to stdout (the default if you don't specify this option).
230              
231             Encoding of output file is assumed to be UTF-8.
232              
233             =item * B<output_header> => I<bool>
234              
235             Whether output CSV should have a header row.
236              
237             By default, a header row will be output I<if> input CSV has header row. Under
238             C<--output-header>, a header row will be output even if input CSV does not have
239             header row (value will be something like "col0,col1,..."). Under
240             C<--no-output-header>, header row will I<not> be printed even if input CSV has
241             header row. So this option can be used to unconditionally add or remove header
242             row.
243              
244             =item * B<output_quote_char> => I<str>
245              
246             Specify field quote character in output CSV, will be passed to Text::CSV_XS.
247              
248             This is like C<--input-quote-char> option but for output instead of input.
249              
250             Defaults to C<"> (double quote). Overrides C<--output-tsv> option.
251              
252             =item * B<output_quote_empty> => I<bool> (default: 0)
253              
254             Whether to quote empty values.
255              
256             When set to false (the default), empty values are not quoted:
257              
258             field1,field2,,field4
259              
260             When set to true, then empty values will be quoted:
261              
262             field1,field2,"",field4
263              
264             =item * B<output_sep_char> => I<str>
265              
266             Specify field separator character in output CSV, will be passed to Text::CSV_XS.
267              
268             This is like C<--input-sep-char> option but for output instead of input.
269              
270             Defaults to C<,> (comma). Overrides C<--output-tsv> option.
271              
272             =item * B<output_tsv> => I<bool>
273              
274             Inform that output file is TSV (tab-separated) format instead of CSV.
275              
276             This is like C<--input-tsv> option but for output instead of input.
277              
278             Overriden by C<--output-sep-char>, C<--output-quote-char>, C<--output-escape-char>
279             options. If one of those options is specified, then C<--output-tsv> will be
280             ignored.
281              
282             =item * B<overwrite> => I<bool>
283              
284             Whether to override existing output file.
285              
286             =item * B<with> => I<str> (default: " ")
287              
288             (No description)
289              
290              
291             =back
292              
293             Returns an enveloped result (an array).
294              
295             First element ($status_code) is an integer containing HTTP-like status code
296             (200 means OK, 4xx caller error, 5xx function error). Second element
297             ($reason) is a string containing error message, or something like "OK" if status is
298             200. Third element ($payload) is the actual result, but usually not present when enveloped result is an error response ($status_code is not 2xx). Fourth
299             element (%result_meta) is called result metadata and is optional, a hash
300             that contains extra information, much like how HTTP response headers provide additional metadata.
301              
302             Return value: (any)
303              
304             =head1 HOMEPAGE
305              
306             Please visit the project's homepage at L<https://metacpan.org/release/App-CSVUtils>.
307              
308             =head1 SOURCE
309              
310             Source repository is at L<https://github.com/perlancar/perl-App-CSVUtils>.
311              
312             =head1 AUTHOR
313              
314             perlancar <perlancar@cpan.org>
315              
316             =head1 CONTRIBUTING
317              
318              
319             To contribute, you can send patches by email/via RT, or send pull requests on
320             GitHub.
321              
322             Most of the time, you don't need to build the distribution yourself. You can
323             simply modify the code, then test via:
324              
325             % prove -l
326              
327             If you want to build the distribution (e.g. to try to install it locally on your
328             system), you can install L<Dist::Zilla>,
329             L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
330             L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
331             Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
332             that are considered a bug and can be reported to me.
333              
334             =head1 COPYRIGHT AND LICENSE
335              
336             This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>.
337              
338             This is free software; you can redistribute it and/or modify it under
339             the same terms as the Perl 5 programming language system itself.
340              
341             =head1 BUGS
342              
343             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-CSVUtils>
344              
345             When submitting a bug or request, please include a test-file or a
346             patch to an existing test-file that illustrates the bug or desired
347             feature.
348              
349             =cut