File Coverage

blib/lib/App/CSVUtils/csv_uniq.pm
Criterion Covered Total %
statement 11 11 100.0
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 15 15 100.0


line stmt bran cond sub pod time code
1             package App::CSVUtils::csv_uniq;
2              
3 1     1   4716 use 5.010001;
  1         4  
4 1     1   6 use strict;
  1         3  
  1         22  
5 1     1   5 use warnings;
  1         2  
  1         69  
6              
7             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
8             our $DATE = '2023-07-25'; # DATE
9             our $DIST = 'App-CSVUtils'; # DIST
10             our $VERSION = '1.030'; # VERSION
11              
12 1         442 use App::CSVUtils qw(
13             gen_csv_util
14 1     1   6 );
  1         2  
15              
16             gen_csv_util(
17             name => 'csv_uniq',
18             summary => 'Report or omit duplicated values in CSV',
19             add_args => {
20             %App::CSVUtils::argspec_fields_1plus,
21             ignore_case => {
22             summary => 'Ignore case when comparing',
23             schema => 'true*',
24             cmdline_aliases => {i=>{}},
25             },
26             unique => {
27             summary => 'Instead of reporting duplicate values, report unique values instead',
28             schema => 'true*',
29             },
30             },
31             examples => [
32             {
33             summary => 'Check that field "foo" in CSV is unique, compare case-insensitively, report duplicates',
34             argv => ['file.csv', '-i', 'foo'],
35             test => 0,
36             'x.doc.show_result' => 0,
37             },
38             {
39             summary => 'Check that combination of fields "foo", "bar", "baz" in CSV is unique, report duplicates',
40             argv => ['file.csv', 'foo', 'bar', 'baz'],
41             test => 0,
42             'x.doc.show_result' => 0,
43             },
44             ],
45              
46             writes_csv => 0,
47              
48             tags => ['category:filtering'],
49              
50             on_input_header_row => sub {
51             my $r = shift;
52              
53             # we add this key to the stash
54             $r->{seen} = {};
55             $r->{fields_idx} = [];
56              
57             # check arguments
58             for my $field (@{ $r->{util_args}{fields} }) {
59             push @{ $r->{fields_idx} }, App::CSVUtils::_find_field($r->{input_fields}, $field);
60             }
61             },
62              
63             on_input_data_row => sub {
64             my $r = shift;
65              
66             my @vals;
67             for my $field_idx (@{ $r->{fields_idx} }) {
68             my $fieldval = $r->{input_row}[ $field_idx ] // '';
69             push @vals, $r->{util_args}{ignore_case} ? lc($fieldval) : $fieldval;
70             }
71             my $val = join("|", @vals);
72             $r->{seen}{$val}++;
73             unless ($r->{util_args}{unique}) {
74             print "csv-uniq: Duplicate value '$val'\n" if $r->{seen}{$val} == 2;
75             }
76             },
77              
78             on_end => sub {
79             my $r = shift;
80              
81             if ($r->{util_args}{unique}) {
82             for my $val (sort keys %{ $r->{seen} }) {
83             print "csv-uniq: Unique value '$val'\n" if $r->{seen}{$val} == 1;
84             }
85             }
86             },
87             );
88              
89             1;
90             # ABSTRACT: Report or omit duplicated values in CSV
91              
92             __END__
93              
94             =pod
95              
96             =encoding UTF-8
97              
98             =head1 NAME
99              
100             App::CSVUtils::csv_uniq - Report or omit duplicated values in CSV
101              
102             =head1 VERSION
103              
104             This document describes version 1.030 of App::CSVUtils::csv_uniq (from Perl distribution App-CSVUtils), released on 2023-07-25.
105              
106             =head1 FUNCTIONS
107              
108              
109             =head2 csv_uniq
110              
111             Usage:
112              
113             csv_uniq(%args) -> [$status_code, $reason, $payload, \%result_meta]
114              
115             Report or omit duplicated values in CSV.
116              
117             Examples:
118              
119             =over
120              
121             =item * Check that field "foo" in CSV is unique, compare case-insensitively, report duplicates:
122              
123             csv_uniq(input_filename => "file.csv", fields => ["foo"], ignore_case => 1);
124              
125             =item * Check that combination of fields "foo", "bar", "baz" in CSV is unique, report duplicates:
126              
127             csv_uniq(input_filename => "file.csv", fields => ["foo", "bar", "baz"]);
128              
129             =back
130              
131             (No description)
132              
133             This function is not exported.
134              
135             Arguments ('*' denotes required arguments):
136              
137             =over 4
138              
139             =item * B<fields>* => I<array[str]>
140              
141             Field names.
142              
143             =item * B<ignore_case> => I<true>
144              
145             Ignore case when comparing.
146              
147             =item * B<input_escape_char> => I<str>
148              
149             Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS.
150              
151             Defaults to C<\\> (backslash). Overrides C<--input-tsv> option.
152              
153             =item * B<input_filename> => I<filename> (default: "-")
154              
155             Input CSV file.
156              
157             Use C<-> to read from stdin.
158              
159             Encoding of input file is assumed to be UTF-8.
160              
161             =item * B<input_header> => I<bool> (default: 1)
162              
163             Specify whether input CSV has a header row.
164              
165             By default, the first row of the input CSV will be assumed to contain field
166             names (and the second row contains the first data row). When you declare that
167             input CSV does not have header row (C<--no-input-header>), the first row of the
168             CSV is assumed to contain the first data row. Fields will be named C<field1>,
169             C<field2>, and so on.
170              
171             =item * B<input_quote_char> => I<str>
172              
173             Specify field quote character in input CSV, will be passed to Text::CSV_XS.
174              
175             Defaults to C<"> (double quote). Overrides C<--input-tsv> option.
176              
177             =item * B<input_sep_char> => I<str>
178              
179             Specify field separator character in input CSV, will be passed to Text::CSV_XS.
180              
181             Defaults to C<,> (comma). Overrides C<--input-tsv> option.
182              
183             =item * B<input_tsv> => I<true>
184              
185             Inform that input file is in TSV (tab-separated) format instead of CSV.
186              
187             Overriden by C<--input-sep-char>, C<--input-quote-char>, C<--input-escape-char>
188             options. If one of those options is specified, then C<--input-tsv> will be
189             ignored.
190              
191             =item * B<unique> => I<true>
192              
193             Instead of reporting duplicate values, report unique values instead.
194              
195              
196             =back
197              
198             Returns an enveloped result (an array).
199              
200             First element ($status_code) is an integer containing HTTP-like status code
201             (200 means OK, 4xx caller error, 5xx function error). Second element
202             ($reason) is a string containing error message, or something like "OK" if status is
203             200. Third element ($payload) is the actual result, but usually not present when enveloped result is an error response ($status_code is not 2xx). Fourth
204             element (%result_meta) is called result metadata and is optional, a hash
205             that contains extra information, much like how HTTP response headers provide additional metadata.
206              
207             Return value: (any)
208              
209             =head1 HOMEPAGE
210              
211             Please visit the project's homepage at L<https://metacpan.org/release/App-CSVUtils>.
212              
213             =head1 SOURCE
214              
215             Source repository is at L<https://github.com/perlancar/perl-App-CSVUtils>.
216              
217             =head1 AUTHOR
218              
219             perlancar <perlancar@cpan.org>
220              
221             =head1 CONTRIBUTING
222              
223              
224             To contribute, you can send patches by email/via RT, or send pull requests on
225             GitHub.
226              
227             Most of the time, you don't need to build the distribution yourself. You can
228             simply modify the code, then test via:
229              
230             % prove -l
231              
232             If you want to build the distribution (e.g. to try to install it locally on your
233             system), you can install L<Dist::Zilla>,
234             L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
235             L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
236             Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
237             that are considered a bug and can be reported to me.
238              
239             =head1 COPYRIGHT AND LICENSE
240              
241             This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>.
242              
243             This is free software; you can redistribute it and/or modify it under
244             the same terms as the Perl 5 programming language system itself.
245              
246             =head1 BUGS
247              
248             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-CSVUtils>
249              
250             When submitting a bug or request, please include a test-file or a
251             patch to an existing test-file that illustrates the bug or desired
252             feature.
253              
254             =cut