| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package App::CSVUtils::csv_uniq; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 1 |  |  | 1 |  | 4439 | use 5.010001; | 
|  | 1 |  |  |  |  | 4 |  | 
| 4 | 1 |  |  | 1 |  | 5 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 22 |  | 
| 5 | 1 |  |  | 1 |  | 5 | use warnings; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 67 |  | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY | 
| 8 |  |  |  |  |  |  | our $DATE = '2023-04-01'; # DATE | 
| 9 |  |  |  |  |  |  | our $DIST = 'App-CSVUtils'; # DIST | 
| 10 |  |  |  |  |  |  | our $VERSION = '1.024'; # VERSION | 
| 11 |  |  |  |  |  |  |  | 
| 12 | 1 |  |  |  |  | 454 | use App::CSVUtils qw( | 
| 13 |  |  |  |  |  |  | gen_csv_util | 
| 14 | 1 |  |  | 1 |  | 6 | ); | 
|  | 1 |  |  |  |  | 2 |  | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | gen_csv_util( | 
| 17 |  |  |  |  |  |  | name => 'csv_uniq', | 
| 18 |  |  |  |  |  |  | summary => 'Report or omit duplicated values in CSV', | 
| 19 |  |  |  |  |  |  | add_args => { | 
| 20 |  |  |  |  |  |  | %App::CSVUtils::argspec_fields_1plus, | 
| 21 |  |  |  |  |  |  | ignore_case => { | 
| 22 |  |  |  |  |  |  | summary => 'Ignore case when comparing', | 
| 23 |  |  |  |  |  |  | schema => 'true*', | 
| 24 |  |  |  |  |  |  | cmdline_aliases => {i=>{}}, | 
| 25 |  |  |  |  |  |  | }, | 
| 26 |  |  |  |  |  |  | unique => { | 
| 27 |  |  |  |  |  |  | summary => 'Instead of reporting duplicate values, report unique values instead', | 
| 28 |  |  |  |  |  |  | schema => 'true*', | 
| 29 |  |  |  |  |  |  | }, | 
| 30 |  |  |  |  |  |  | }, | 
| 31 |  |  |  |  |  |  | examples => [ | 
| 32 |  |  |  |  |  |  | { | 
| 33 |  |  |  |  |  |  | summary => 'Check that field "foo" in CSV is unique, compare case-insensitively, report duplicates', | 
| 34 |  |  |  |  |  |  | argv => ['file.csv', '-i', 'foo'], | 
| 35 |  |  |  |  |  |  | test => 0, | 
| 36 |  |  |  |  |  |  | 'x.doc.show_result' => 0, | 
| 37 |  |  |  |  |  |  | }, | 
| 38 |  |  |  |  |  |  | { | 
| 39 |  |  |  |  |  |  | summary => 'Check that combination of fields "foo", "bar", "baz" in CSV is unique, report duplicates', | 
| 40 |  |  |  |  |  |  | argv => ['file.csv', 'foo', 'bar', 'baz'], | 
| 41 |  |  |  |  |  |  | test => 0, | 
| 42 |  |  |  |  |  |  | 'x.doc.show_result' => 0, | 
| 43 |  |  |  |  |  |  | }, | 
| 44 |  |  |  |  |  |  | ], | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | writes_csv => 0, | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  | tags => ['category:filtering'], | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | on_input_header_row => sub { | 
| 51 |  |  |  |  |  |  | my $r = shift; | 
| 52 |  |  |  |  |  |  |  | 
| 53 |  |  |  |  |  |  | # we add this key to the stash | 
| 54 |  |  |  |  |  |  | $r->{seen} = {}; | 
| 55 |  |  |  |  |  |  | $r->{fields_idx} = []; | 
| 56 |  |  |  |  |  |  |  | 
| 57 |  |  |  |  |  |  | # check arguments | 
| 58 |  |  |  |  |  |  | for my $field (@{ $r->{util_args}{fields} }) { | 
| 59 |  |  |  |  |  |  | push @{ $r->{fields_idx} }, App::CSVUtils::_find_field($r->{input_fields}, $field); | 
| 60 |  |  |  |  |  |  | } | 
| 61 |  |  |  |  |  |  | }, | 
| 62 |  |  |  |  |  |  |  | 
| 63 |  |  |  |  |  |  | on_input_data_row => sub { | 
| 64 |  |  |  |  |  |  | my $r = shift; | 
| 65 |  |  |  |  |  |  |  | 
| 66 |  |  |  |  |  |  | my @vals; | 
| 67 |  |  |  |  |  |  | for my $field_idx (@{ $r->{fields_idx} }) { | 
| 68 |  |  |  |  |  |  | my $fieldval = $r->{input_row}[ $field_idx ] // ''; | 
| 69 |  |  |  |  |  |  | push @vals, $r->{util_args}{ignore_case} ? lc($fieldval) : $fieldval; | 
| 70 |  |  |  |  |  |  | } | 
| 71 |  |  |  |  |  |  | my $val = join("|", @vals); | 
| 72 |  |  |  |  |  |  | $r->{seen}{$val}++; | 
| 73 |  |  |  |  |  |  | unless ($r->{util_args}{unique}) { | 
| 74 |  |  |  |  |  |  | print "csv-uniq: Duplicate value '$val'\n" if $r->{seen}{$val} == 2; | 
| 75 |  |  |  |  |  |  | } | 
| 76 |  |  |  |  |  |  | }, | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | on_end => sub { | 
| 79 |  |  |  |  |  |  | my $r = shift; | 
| 80 |  |  |  |  |  |  |  | 
| 81 |  |  |  |  |  |  | if ($r->{util_args}{unique}) { | 
| 82 |  |  |  |  |  |  | for my $val (sort keys %{ $r->{seen} }) { | 
| 83 |  |  |  |  |  |  | print "csv-uniq: Unique value '$val'\n" if $r->{seen}{$val} == 1; | 
| 84 |  |  |  |  |  |  | } | 
| 85 |  |  |  |  |  |  | } | 
| 86 |  |  |  |  |  |  | }, | 
| 87 |  |  |  |  |  |  | ); | 
| 88 |  |  |  |  |  |  |  | 
| 89 |  |  |  |  |  |  | 1; | 
| 90 |  |  |  |  |  |  | # ABSTRACT: Report or omit duplicated values in CSV | 
| 91 |  |  |  |  |  |  |  | 
| 92 |  |  |  |  |  |  | __END__ | 
| 93 |  |  |  |  |  |  |  | 
| 94 |  |  |  |  |  |  | =pod | 
| 95 |  |  |  |  |  |  |  | 
| 96 |  |  |  |  |  |  | =encoding UTF-8 | 
| 97 |  |  |  |  |  |  |  | 
| 98 |  |  |  |  |  |  | =head1 NAME | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  | App::CSVUtils::csv_uniq - Report or omit duplicated values in CSV | 
| 101 |  |  |  |  |  |  |  | 
| 102 |  |  |  |  |  |  | =head1 VERSION | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | This document describes version 1.024 of App::CSVUtils::csv_uniq (from Perl distribution App-CSVUtils), released on 2023-04-01. | 
| 105 |  |  |  |  |  |  |  | 
| 106 |  |  |  |  |  |  | =head1 FUNCTIONS | 
| 107 |  |  |  |  |  |  |  | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | =head2 csv_uniq | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | Usage: | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  | csv_uniq(%args) -> [$status_code, $reason, $payload, \%result_meta] | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | Report or omit duplicated values in CSV. | 
| 116 |  |  |  |  |  |  |  | 
| 117 |  |  |  |  |  |  | Examples: | 
| 118 |  |  |  |  |  |  |  | 
| 119 |  |  |  |  |  |  | =over | 
| 120 |  |  |  |  |  |  |  | 
| 121 |  |  |  |  |  |  | =item * Check that field "foo" in CSV is unique, compare case-insensitively, report duplicates: | 
| 122 |  |  |  |  |  |  |  | 
| 123 |  |  |  |  |  |  | csv_uniq(input_filename => "file.csv", fields => ["foo"], ignore_case => 1); | 
| 124 |  |  |  |  |  |  |  | 
| 125 |  |  |  |  |  |  | =item * Check that combination of fields "foo", "bar", "baz" in CSV is unique, report duplicates: | 
| 126 |  |  |  |  |  |  |  | 
| 127 |  |  |  |  |  |  | csv_uniq(input_filename => "file.csv", fields => ["foo", "bar", "baz"]); | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | =back | 
| 130 |  |  |  |  |  |  |  | 
| 131 |  |  |  |  |  |  | (No description) | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | This function is not exported. | 
| 134 |  |  |  |  |  |  |  | 
| 135 |  |  |  |  |  |  | Arguments ('*' denotes required arguments): | 
| 136 |  |  |  |  |  |  |  | 
| 137 |  |  |  |  |  |  | =over 4 | 
| 138 |  |  |  |  |  |  |  | 
| 139 |  |  |  |  |  |  | =item * B<fields>* => I<array[str]> | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  | Field names. | 
| 142 |  |  |  |  |  |  |  | 
| 143 |  |  |  |  |  |  | =item * B<ignore_case> => I<true> | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | Ignore case when comparing. | 
| 146 |  |  |  |  |  |  |  | 
| 147 |  |  |  |  |  |  | =item * B<input_escape_char> => I<str> | 
| 148 |  |  |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS. | 
| 150 |  |  |  |  |  |  |  | 
| 151 |  |  |  |  |  |  | Defaults to C<\\> (backslash). Overrides C<--input-tsv> option. | 
| 152 |  |  |  |  |  |  |  | 
| 153 |  |  |  |  |  |  | =item * B<input_filename> => I<filename> (default: "-") | 
| 154 |  |  |  |  |  |  |  | 
| 155 |  |  |  |  |  |  | Input CSV file. | 
| 156 |  |  |  |  |  |  |  | 
| 157 |  |  |  |  |  |  | Use C<-> to read from stdin. | 
| 158 |  |  |  |  |  |  |  | 
| 159 |  |  |  |  |  |  | Encoding of input file is assumed to be UTF-8. | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | =item * B<input_header> => I<bool> (default: 1) | 
| 162 |  |  |  |  |  |  |  | 
| 163 |  |  |  |  |  |  | Specify whether input CSV has a header row. | 
| 164 |  |  |  |  |  |  |  | 
| 165 |  |  |  |  |  |  | By default, the first row of the input CSV will be assumed to contain field | 
| 166 |  |  |  |  |  |  | names (and the second row contains the first data row). When you declare that | 
| 167 |  |  |  |  |  |  | input CSV does not have header row (C<--no-input-header>), the first row of the | 
| 168 |  |  |  |  |  |  | CSV is assumed to contain the first data row. Fields will be named C<field1>, | 
| 169 |  |  |  |  |  |  | C<field2>, and so on. | 
| 170 |  |  |  |  |  |  |  | 
| 171 |  |  |  |  |  |  | =item * B<input_quote_char> => I<str> | 
| 172 |  |  |  |  |  |  |  | 
| 173 |  |  |  |  |  |  | Specify field quote character in input CSV, will be passed to Text::CSV_XS. | 
| 174 |  |  |  |  |  |  |  | 
| 175 |  |  |  |  |  |  | Defaults to C<"> (double quote). Overrides C<--input-tsv> option. | 
| 176 |  |  |  |  |  |  |  | 
| 177 |  |  |  |  |  |  | =item * B<input_sep_char> => I<str> | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  | Specify field separator character in input CSV, will be passed to Text::CSV_XS. | 
| 180 |  |  |  |  |  |  |  | 
| 181 |  |  |  |  |  |  | Defaults to C<,> (comma). Overrides C<--input-tsv> option. | 
| 182 |  |  |  |  |  |  |  | 
| 183 |  |  |  |  |  |  | =item * B<input_tsv> => I<true> | 
| 184 |  |  |  |  |  |  |  | 
| 185 |  |  |  |  |  |  | Inform that input file is in TSV (tab-separated) format instead of CSV. | 
| 186 |  |  |  |  |  |  |  | 
| 187 |  |  |  |  |  |  | Overriden by C<--input-sep-char>, C<--input-quote-char>, C<--input-escape-char> | 
| 188 |  |  |  |  |  |  | options. If one of those options is specified, then C<--input-tsv> will be | 
| 189 |  |  |  |  |  |  | ignored. | 
| 190 |  |  |  |  |  |  |  | 
| 191 |  |  |  |  |  |  | =item * B<unique> => I<true> | 
| 192 |  |  |  |  |  |  |  | 
| 193 |  |  |  |  |  |  | Instead of reporting duplicate values, report unique values instead. | 
| 194 |  |  |  |  |  |  |  | 
| 195 |  |  |  |  |  |  |  | 
| 196 |  |  |  |  |  |  | =back | 
| 197 |  |  |  |  |  |  |  | 
| 198 |  |  |  |  |  |  | Returns an enveloped result (an array). | 
| 199 |  |  |  |  |  |  |  | 
| 200 |  |  |  |  |  |  | First element ($status_code) is an integer containing HTTP-like status code | 
| 201 |  |  |  |  |  |  | (200 means OK, 4xx caller error, 5xx function error). Second element | 
| 202 |  |  |  |  |  |  | ($reason) is a string containing error message, or something like "OK" if status is | 
| 203 |  |  |  |  |  |  | 200. Third element ($payload) is the actual result, but usually not present when enveloped result is an error response ($status_code is not 2xx). Fourth | 
| 204 |  |  |  |  |  |  | element (%result_meta) is called result metadata and is optional, a hash | 
| 205 |  |  |  |  |  |  | that contains extra information, much like how HTTP response headers provide additional metadata. | 
| 206 |  |  |  |  |  |  |  | 
| 207 |  |  |  |  |  |  | Return value:  (any) | 
| 208 |  |  |  |  |  |  |  | 
| 209 |  |  |  |  |  |  | =head1 HOMEPAGE | 
| 210 |  |  |  |  |  |  |  | 
| 211 |  |  |  |  |  |  | Please visit the project's homepage at L<https://metacpan.org/release/App-CSVUtils>. | 
| 212 |  |  |  |  |  |  |  | 
| 213 |  |  |  |  |  |  | =head1 SOURCE | 
| 214 |  |  |  |  |  |  |  | 
| 215 |  |  |  |  |  |  | Source repository is at L<https://github.com/perlancar/perl-App-CSVUtils>. | 
| 216 |  |  |  |  |  |  |  | 
| 217 |  |  |  |  |  |  | =head1 AUTHOR | 
| 218 |  |  |  |  |  |  |  | 
| 219 |  |  |  |  |  |  | perlancar <perlancar@cpan.org> | 
| 220 |  |  |  |  |  |  |  | 
| 221 |  |  |  |  |  |  | =head1 CONTRIBUTING | 
| 222 |  |  |  |  |  |  |  | 
| 223 |  |  |  |  |  |  |  | 
| 224 |  |  |  |  |  |  | To contribute, you can send patches by email/via RT, or send pull requests on | 
| 225 |  |  |  |  |  |  | GitHub. | 
| 226 |  |  |  |  |  |  |  | 
| 227 |  |  |  |  |  |  | Most of the time, you don't need to build the distribution yourself. You can | 
| 228 |  |  |  |  |  |  | simply modify the code, then test via: | 
| 229 |  |  |  |  |  |  |  | 
| 230 |  |  |  |  |  |  | % prove -l | 
| 231 |  |  |  |  |  |  |  | 
| 232 |  |  |  |  |  |  | If you want to build the distribution (e.g. to try to install it locally on your | 
| 233 |  |  |  |  |  |  | system), you can install L<Dist::Zilla>, | 
| 234 |  |  |  |  |  |  | L<Dist::Zilla::PluginBundle::Author::PERLANCAR>, | 
| 235 |  |  |  |  |  |  | L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other | 
| 236 |  |  |  |  |  |  | Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond | 
| 237 |  |  |  |  |  |  | that are considered a bug and can be reported to me. | 
| 238 |  |  |  |  |  |  |  | 
| 239 |  |  |  |  |  |  | =head1 COPYRIGHT AND LICENSE | 
| 240 |  |  |  |  |  |  |  | 
| 241 |  |  |  |  |  |  | This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>. | 
| 242 |  |  |  |  |  |  |  | 
| 243 |  |  |  |  |  |  | This is free software; you can redistribute it and/or modify it under | 
| 244 |  |  |  |  |  |  | the same terms as the Perl 5 programming language system itself. | 
| 245 |  |  |  |  |  |  |  | 
| 246 |  |  |  |  |  |  | =head1 BUGS | 
| 247 |  |  |  |  |  |  |  | 
| 248 |  |  |  |  |  |  | Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-CSVUtils> | 
| 249 |  |  |  |  |  |  |  | 
| 250 |  |  |  |  |  |  | When submitting a bug or request, please include a test-file or a | 
| 251 |  |  |  |  |  |  | patch to an existing test-file that illustrates the bug or desired | 
| 252 |  |  |  |  |  |  | feature. | 
| 253 |  |  |  |  |  |  |  | 
| 254 |  |  |  |  |  |  | =cut |