| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package App::UniqFiles; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 1 |  |  | 1 |  | 148801 | use 5.010001; | 
|  | 1 |  |  |  |  | 13 |  | 
| 4 | 1 |  |  | 1 |  | 6 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 18 |  | 
| 5 | 1 |  |  | 1 |  | 5 | use warnings; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 23 |  | 
| 6 | 1 |  |  | 1 |  | 2273 | use Log::ger; | 
|  | 1 |  |  |  |  | 57 |  | 
|  | 1 |  |  |  |  | 5 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 | 1 |  |  | 1 |  | 257 | use Cwd qw(abs_path); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 49 |  | 
| 9 | 1 |  |  | 1 |  | 6 | use Exporter qw(import); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 24 |  | 
| 10 | 1 |  |  | 1 |  | 626 | use Perinci::Sub::Util qw(gen_modified_sub); | 
|  | 1 |  |  |  |  | 2626 |  | 
|  | 1 |  |  |  |  | 174 |  | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY | 
| 13 |  |  |  |  |  |  | our $DATE = '2023-02-06'; # DATE | 
| 14 |  |  |  |  |  |  | our $DIST = 'App-UniqFiles'; # DIST | 
| 15 |  |  |  |  |  |  | our $VERSION = '0.141'; # VERSION | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | our @EXPORT_OK = qw(uniq_files); | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | our %SPEC; | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | sub _glob { | 
| 22 | 10 |  |  | 10 |  | 64 | require File::Find; | 
| 23 |  |  |  |  |  |  |  | 
| 24 | 10 |  |  |  |  | 21 | my $dir; | 
| 25 |  |  |  |  |  |  | my @res; | 
| 26 |  |  |  |  |  |  | File::Find::finddepth( | 
| 27 |  |  |  |  |  |  | sub { | 
| 28 | 30 | 50 |  | 30 |  | 313 | return if -l $_; | 
| 29 | 30 | 100 |  |  |  | 265 | return unless -f _; | 
| 30 | 1 |  |  | 1 |  | 8 | no warnings 'once'; # $File::Find::dir | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 2754 |  | 
| 31 | 20 |  |  |  |  | 260 | push @res, "$File::Find::dir/$_"; | 
| 32 |  |  |  |  |  |  | }, | 
| 33 | 10 |  |  |  |  | 1160 | @_, | 
| 34 |  |  |  |  |  |  | ); | 
| 35 | 10 |  |  |  |  | 83 | @res; | 
| 36 |  |  |  |  |  |  | } | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | our %argspec_authoritative_dirs = ( | 
| 39 |  |  |  |  |  |  | authoritative_dirs => { | 
| 40 |  |  |  |  |  |  | summary => 'Denote director(y|ies) where authoritative/"Original" copies are found', | 
| 41 |  |  |  |  |  |  | 'x.name.is_plural' => 1, | 
| 42 |  |  |  |  |  |  | 'x.name.singular' => 'authoritative_dir', | 
| 43 |  |  |  |  |  |  | schema => ['array*', of=>'str*'], # XXX dirname | 
| 44 |  |  |  |  |  |  | cmdline_aliases => {O=>{}}, | 
| 45 |  |  |  |  |  |  | }, | 
| 46 |  |  |  |  |  |  | ); | 
| 47 |  |  |  |  |  |  | our %argspecs_filter = ( | 
| 48 |  |  |  |  |  |  | include_file_patterns => { | 
| 49 |  |  |  |  |  |  | summary => 'Filename (including path) regex patterns to exclude', | 
| 50 |  |  |  |  |  |  | 'x.name.is_plural' => 1, | 
| 51 |  |  |  |  |  |  | 'x.name.singular' => 'include_file_pattern', | 
| 52 |  |  |  |  |  |  | schema => ['array*', of=>'str*'], # XXX re | 
| 53 |  |  |  |  |  |  | cmdline_aliases => {I=>{}}, | 
| 54 |  |  |  |  |  |  | }, | 
| 55 |  |  |  |  |  |  | exclude_file_patterns => { | 
| 56 |  |  |  |  |  |  | summary => 'Filename (including path) regex patterns to include', | 
| 57 |  |  |  |  |  |  | 'x.name.is_plural' => 1, | 
| 58 |  |  |  |  |  |  | 'x.name.singular' => 'exclude_file_pattern', | 
| 59 |  |  |  |  |  |  | schema => ['array*', of=>'str*'], # XXX re | 
| 60 |  |  |  |  |  |  | cmdline_aliases => {X=>{}}, | 
| 61 |  |  |  |  |  |  | }, | 
| 62 |  |  |  |  |  |  | exclude_empty_files => { | 
| 63 |  |  |  |  |  |  | schema => 'bool*', | 
| 64 |  |  |  |  |  |  | cmdline_aliases => {Z=>{}}, | 
| 65 |  |  |  |  |  |  | }, | 
| 66 |  |  |  |  |  |  | min_size => { | 
| 67 |  |  |  |  |  |  | summary => 'Minimum file size to consider', | 
| 68 |  |  |  |  |  |  | schema => 'filesize*', | 
| 69 |  |  |  |  |  |  | }, | 
| 70 |  |  |  |  |  |  | max_size => { | 
| 71 |  |  |  |  |  |  | summary => 'Maximum file size to consider', | 
| 72 |  |  |  |  |  |  | schema => 'filesize*', | 
| 73 |  |  |  |  |  |  | }, | 
| 74 |  |  |  |  |  |  | ); | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | $SPEC{uniq_files} = { | 
| 77 |  |  |  |  |  |  | v => 1.1, | 
| 78 |  |  |  |  |  |  | summary => 'Report duplicate or unique file contents', | 
| 79 |  |  |  |  |  |  | description => <<'_', | 
| 80 |  |  |  |  |  |  |  | 
| 81 |  |  |  |  |  |  | Given a list of filenames, will check each file size and content for duplicate | 
| 82 |  |  |  |  |  |  | content. Interface is a bit like the `uniq` Unix command-line program. | 
| 83 |  |  |  |  |  |  |  | 
| 84 |  |  |  |  |  |  | _ | 
| 85 |  |  |  |  |  |  | args    => { | 
| 86 |  |  |  |  |  |  | files => { | 
| 87 |  |  |  |  |  |  | schema => ['array*' => {of=>'str*'}], | 
| 88 |  |  |  |  |  |  | req    => 1, | 
| 89 |  |  |  |  |  |  | pos    => 0, | 
| 90 |  |  |  |  |  |  | slurpy => 1, | 
| 91 |  |  |  |  |  |  | }, | 
| 92 |  |  |  |  |  |  | recurse => { | 
| 93 |  |  |  |  |  |  | schema => 'bool*', | 
| 94 |  |  |  |  |  |  | cmdline_aliases => {R=>{}}, | 
| 95 |  |  |  |  |  |  | description => <<'_', | 
| 96 |  |  |  |  |  |  |  | 
| 97 |  |  |  |  |  |  | If set to true, will recurse into subdirectories. | 
| 98 |  |  |  |  |  |  |  | 
| 99 |  |  |  |  |  |  | _ | 
| 100 |  |  |  |  |  |  | }, | 
| 101 |  |  |  |  |  |  | group_by_digest => { | 
| 102 |  |  |  |  |  |  | summary => 'Sort files by its digest (or size, if not computing digest), separate each different digest', | 
| 103 |  |  |  |  |  |  | schema => 'bool*', | 
| 104 |  |  |  |  |  |  | }, | 
| 105 |  |  |  |  |  |  | show_digest => { | 
| 106 |  |  |  |  |  |  | summary => 'Show the digest value (or the size, if not computing digest) for each file', | 
| 107 |  |  |  |  |  |  | description => <<'_', | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | Note that this routine does not compute digest for files which have unique | 
| 110 |  |  |  |  |  |  | sizes, so they will show up as empty. | 
| 111 |  |  |  |  |  |  |  | 
| 112 |  |  |  |  |  |  | _ | 
| 113 |  |  |  |  |  |  | schema => 'true*', | 
| 114 |  |  |  |  |  |  | }, | 
| 115 |  |  |  |  |  |  | show_size => { | 
| 116 |  |  |  |  |  |  | summary => 'Show the size for each file', | 
| 117 |  |  |  |  |  |  | schema => 'true*', | 
| 118 |  |  |  |  |  |  | }, | 
| 119 |  |  |  |  |  |  | # TODO add option follow_symlinks? | 
| 120 |  |  |  |  |  |  | report_unique => { | 
| 121 |  |  |  |  |  |  | schema => [bool => {default=>1}], | 
| 122 |  |  |  |  |  |  | summary => 'Whether to return unique items', | 
| 123 |  |  |  |  |  |  | cmdline_aliases => { | 
| 124 |  |  |  |  |  |  | a => { | 
| 125 |  |  |  |  |  |  | summary => 'Alias for --report-unique --report-duplicate=1 (report all files)', | 
| 126 |  |  |  |  |  |  | code => sub { | 
| 127 |  |  |  |  |  |  | my $args = shift; | 
| 128 |  |  |  |  |  |  | $args->{report_unique}    = 1; | 
| 129 |  |  |  |  |  |  | $args->{report_duplicate} = 1; | 
| 130 |  |  |  |  |  |  | }, | 
| 131 |  |  |  |  |  |  | }, | 
| 132 |  |  |  |  |  |  | u => { | 
| 133 |  |  |  |  |  |  | summary => 'Alias for --report-unique --report-duplicate=0', | 
| 134 |  |  |  |  |  |  | code => sub { | 
| 135 |  |  |  |  |  |  | my $args = shift; | 
| 136 |  |  |  |  |  |  | $args->{report_unique}    = 1; | 
| 137 |  |  |  |  |  |  | $args->{report_duplicate} = 0; | 
| 138 |  |  |  |  |  |  | }, | 
| 139 |  |  |  |  |  |  | }, | 
| 140 |  |  |  |  |  |  | d => { | 
| 141 |  |  |  |  |  |  | summary => | 
| 142 |  |  |  |  |  |  | 'Alias for --noreport-unique --report-duplicate=1', | 
| 143 |  |  |  |  |  |  | code => sub { | 
| 144 |  |  |  |  |  |  | my $args = shift; | 
| 145 |  |  |  |  |  |  | $args->{report_unique}    = 0; | 
| 146 |  |  |  |  |  |  | $args->{report_duplicate} = 1; | 
| 147 |  |  |  |  |  |  | }, | 
| 148 |  |  |  |  |  |  | }, | 
| 149 |  |  |  |  |  |  | D => { | 
| 150 |  |  |  |  |  |  | summary => | 
| 151 |  |  |  |  |  |  | 'Alias for --noreport-unique --report-duplicate=3', | 
| 152 |  |  |  |  |  |  | code => sub { | 
| 153 |  |  |  |  |  |  | my $args = shift; | 
| 154 |  |  |  |  |  |  | $args->{report_unique}    = 0; | 
| 155 |  |  |  |  |  |  | $args->{report_duplicate} = 3; | 
| 156 |  |  |  |  |  |  | }, | 
| 157 |  |  |  |  |  |  | }, | 
| 158 |  |  |  |  |  |  | }, | 
| 159 |  |  |  |  |  |  | }, | 
| 160 |  |  |  |  |  |  | report_duplicate => { | 
| 161 |  |  |  |  |  |  | schema => [int => {in=>[0,1,2,3], default=>2}], | 
| 162 |  |  |  |  |  |  | summary => 'Whether to return duplicate items', | 
| 163 |  |  |  |  |  |  | description => <<'_', | 
| 164 |  |  |  |  |  |  |  | 
| 165 |  |  |  |  |  |  | Can be set to either 0, 1, 2, or 3. | 
| 166 |  |  |  |  |  |  |  | 
| 167 |  |  |  |  |  |  | If set to 0, duplicate items will not be returned. | 
| 168 |  |  |  |  |  |  |  | 
| 169 |  |  |  |  |  |  | If set to 1 (the default for `dupe-files`), will return all the the duplicate | 
| 170 |  |  |  |  |  |  | files. For example: `file1` contains text 'a', `file2` 'b', `file3` 'a'. Then | 
| 171 |  |  |  |  |  |  | `file1` and `file3` will be returned. | 
| 172 |  |  |  |  |  |  |  | 
| 173 |  |  |  |  |  |  | If set to 2 (the default for `uniq-files`), will only return the first of | 
| 174 |  |  |  |  |  |  | duplicate items. Continuing from previous example, only `file1` will be returned | 
| 175 |  |  |  |  |  |  | because `file2` is unique and `file3` contains 'a' (already represented by | 
| 176 |  |  |  |  |  |  | `file1`). If one or more `--authoritative-dir` (`-O`) options are specified, | 
| 177 |  |  |  |  |  |  | files under these directories will be preferred. | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  | If set to 3, will return all but the first of duplicate items. Continuing from | 
| 180 |  |  |  |  |  |  | previous example: `file3` will be returned. This is useful if you want to keep | 
| 181 |  |  |  |  |  |  | only one copy of the duplicate content. You can use the output of this routine | 
| 182 |  |  |  |  |  |  | to `mv` or `rm`. Similar to the previous case, if one or more | 
| 183 |  |  |  |  |  |  | `--authoritative-dir` (`-O`) options are specified, then files under these | 
| 184 |  |  |  |  |  |  | directories will not be listed if possible. | 
| 185 |  |  |  |  |  |  |  | 
| 186 |  |  |  |  |  |  | _ | 
| 187 |  |  |  |  |  |  | cmdline_aliases => { | 
| 188 |  |  |  |  |  |  | }, | 
| 189 |  |  |  |  |  |  | }, | 
| 190 |  |  |  |  |  |  | algorithm => { | 
| 191 |  |  |  |  |  |  | schema => ['str*'], | 
| 192 |  |  |  |  |  |  | summary => "What algorithm is used to compute the digest of the content", | 
| 193 |  |  |  |  |  |  | description => <<'_', | 
| 194 |  |  |  |  |  |  |  | 
| 195 |  |  |  |  |  |  | The default is to use `md5`. Some algorithms supported include `crc32`, `sha1`, | 
| 196 |  |  |  |  |  |  | `sha256`, as well as `Digest` to use Perl <pm:Digest> which supports a lot of | 
| 197 |  |  |  |  |  |  | other algorithms, e.g. `SHA-1`, `BLAKE2b`. | 
| 198 |  |  |  |  |  |  |  | 
| 199 |  |  |  |  |  |  | If set to '', 'none', or 'size', then digest will be set to file size. This | 
| 200 |  |  |  |  |  |  | means uniqueness will be determined solely from file size. This can be quicker | 
| 201 |  |  |  |  |  |  | but will generate a false positive when two files of the same size are deemed as | 
| 202 |  |  |  |  |  |  | duplicate even though their content may be different. | 
| 203 |  |  |  |  |  |  |  | 
| 204 |  |  |  |  |  |  | If set to 'name' then only name comparison will be performed. This of course can | 
| 205 |  |  |  |  |  |  | potentially generate lots of false positives, but in some cases you might want | 
| 206 |  |  |  |  |  |  | to compare filename for uniqueness. | 
| 207 |  |  |  |  |  |  |  | 
| 208 |  |  |  |  |  |  | _ | 
| 209 |  |  |  |  |  |  | }, | 
| 210 |  |  |  |  |  |  | digest_args => { | 
| 211 |  |  |  |  |  |  | schema => ['array*', | 
| 212 |  |  |  |  |  |  |  | 
| 213 |  |  |  |  |  |  | # comment out temporarily, Perinci::Sub::GetArgs::Argv | 
| 214 |  |  |  |  |  |  | # clashes with coerce rules; we should fix | 
| 215 |  |  |  |  |  |  | # Perinci::Sub::GetArgs::Argv to observe coercion rules | 
| 216 |  |  |  |  |  |  | # first | 
| 217 |  |  |  |  |  |  | #of=>'str*', | 
| 218 |  |  |  |  |  |  |  | 
| 219 |  |  |  |  |  |  | 'x.perl.coerce_rules'=>['From_str::comma_sep']], | 
| 220 |  |  |  |  |  |  | description => <<'_', | 
| 221 |  |  |  |  |  |  |  | 
| 222 |  |  |  |  |  |  | Some Digest algorithms require arguments, you can pass them here. | 
| 223 |  |  |  |  |  |  |  | 
| 224 |  |  |  |  |  |  | _ | 
| 225 |  |  |  |  |  |  | cmdline_aliases => {A=>{}}, | 
| 226 |  |  |  |  |  |  | }, | 
| 227 |  |  |  |  |  |  | show_count => { | 
| 228 |  |  |  |  |  |  | schema => [bool => {default=>0}], | 
| 229 |  |  |  |  |  |  | summary => "Whether to return each file content's ". | 
| 230 |  |  |  |  |  |  | "number of occurence", | 
| 231 |  |  |  |  |  |  | description => <<'_', | 
| 232 |  |  |  |  |  |  |  | 
| 233 |  |  |  |  |  |  | 1 means the file content is only encountered once (unique), 2 means there is one | 
| 234 |  |  |  |  |  |  | duplicate, and so on. | 
| 235 |  |  |  |  |  |  |  | 
| 236 |  |  |  |  |  |  | _ | 
| 237 |  |  |  |  |  |  | cmdline_aliases => {count=>{}, c=>{}}, | 
| 238 |  |  |  |  |  |  | }, | 
| 239 |  |  |  |  |  |  | detail => { | 
| 240 |  |  |  |  |  |  | summary => 'Show details (a.k.a. --show-digest, --show-size, --show-count)', | 
| 241 |  |  |  |  |  |  | schema => 'true*', | 
| 242 |  |  |  |  |  |  | cmdline_aliases => {l=>{}}, | 
| 243 |  |  |  |  |  |  | }, | 
| 244 |  |  |  |  |  |  | %argspec_authoritative_dirs, | 
| 245 |  |  |  |  |  |  | %argspecs_filter, | 
| 246 |  |  |  |  |  |  | }, | 
| 247 |  |  |  |  |  |  | examples => [ | 
| 248 |  |  |  |  |  |  | { | 
| 249 |  |  |  |  |  |  | summary   => 'List all files which do no have duplicate contents', | 
| 250 |  |  |  |  |  |  | src       => 'uniq-files *', | 
| 251 |  |  |  |  |  |  | src_plang => 'bash', | 
| 252 |  |  |  |  |  |  | test      => 0, | 
| 253 |  |  |  |  |  |  | 'x.doc.show_result' => 0, | 
| 254 |  |  |  |  |  |  | }, | 
| 255 |  |  |  |  |  |  | { | 
| 256 |  |  |  |  |  |  | summary   => 'List all files (recursively, and in detail) which have duplicate contents (all duplicate copies), exclude some files', | 
| 257 |  |  |  |  |  |  | src       => q(uniq-files -R -l -d -X '\.git/' --min-size 10k .), | 
| 258 |  |  |  |  |  |  | src_plang => 'bash', | 
| 259 |  |  |  |  |  |  | test      => 0, | 
| 260 |  |  |  |  |  |  | 'x.doc.show_result' => 0, | 
| 261 |  |  |  |  |  |  | }, | 
| 262 |  |  |  |  |  |  | { | 
| 263 |  |  |  |  |  |  | summary   => 'Move all duplicate files (except one copy) in this directory (and subdirectories) to .dupes/', | 
| 264 |  |  |  |  |  |  | src       => 'uniq-files -D -R * | while read f; do mv "$f" .dupes/; done', | 
| 265 |  |  |  |  |  |  | src_plang => 'bash', | 
| 266 |  |  |  |  |  |  | test      => 0, | 
| 267 |  |  |  |  |  |  | 'x.doc.show_result' => 0, | 
| 268 |  |  |  |  |  |  | }, | 
| 269 |  |  |  |  |  |  | { | 
| 270 |  |  |  |  |  |  | summary   => 'List number of occurences of contents for duplicate files', | 
| 271 |  |  |  |  |  |  | src       => 'uniq-files -c *', | 
| 272 |  |  |  |  |  |  | src_plang => 'bash', | 
| 273 |  |  |  |  |  |  | test      => 0, | 
| 274 |  |  |  |  |  |  | 'x.doc.show_result' => 0, | 
| 275 |  |  |  |  |  |  | }, | 
| 276 |  |  |  |  |  |  | { | 
| 277 |  |  |  |  |  |  | summary   => 'List number of occurences of contents for all files', | 
| 278 |  |  |  |  |  |  | src       => 'uniq-files -a -c *', | 
| 279 |  |  |  |  |  |  | src_plang => 'bash', | 
| 280 |  |  |  |  |  |  | test      => 0, | 
| 281 |  |  |  |  |  |  | 'x.doc.show_result' => 0, | 
| 282 |  |  |  |  |  |  | }, | 
| 283 |  |  |  |  |  |  | { | 
| 284 |  |  |  |  |  |  | summary   => 'List all files, along with their number of content occurrences and content digest. '. | 
| 285 |  |  |  |  |  |  | 'Use the BLAKE2b digest algorithm. And group the files according to their digest.', | 
| 286 |  |  |  |  |  |  | src       => 'uniq-files -a -c --show-digest -A BLAKE2,blake2b *', | 
| 287 |  |  |  |  |  |  | src_plang => 'bash', | 
| 288 |  |  |  |  |  |  | test      => 0, | 
| 289 |  |  |  |  |  |  | 'x.doc.show_result' => 0, | 
| 290 |  |  |  |  |  |  | }, | 
| 291 |  |  |  |  |  |  | ], | 
| 292 |  |  |  |  |  |  | }; | 
| 293 |  |  |  |  |  |  | sub uniq_files { | 
| 294 | 11 |  |  | 11 | 1 | 37974 | my %args = @_; | 
| 295 |  |  |  |  |  |  |  | 
| 296 | 11 |  |  |  |  | 25 | my $files = $args{files}; | 
| 297 | 11 | 50 | 33 |  |  | 62 | return [400, "Please specify files"] if !$files || !@$files; | 
| 298 | 11 |  |  |  |  | 15 | my $recurse          = $args{recurse}; | 
| 299 | 11 |  | 100 |  |  | 32 | my $report_unique    = $args{report_unique}    // 1; | 
| 300 | 11 |  | 100 |  |  | 81 | my $report_duplicate = $args{report_duplicate} // 2; | 
| 301 | 11 |  | 100 |  |  | 41 | my $show_count       = $args{show_count}       // 0; | 
| 302 | 11 |  | 100 |  |  | 25 | my $show_digest      = $args{show_digest}      // 0; | 
| 303 | 11 |  | 100 |  |  | 32 | my $show_size        = $args{show_size}        // 0; | 
| 304 | 11 |  |  |  |  | 15 | my $digest_args      = $args{digest_args}; | 
| 305 | 11 | 50 | 66 |  |  | 40 | my $algorithm        = $args{algorithm}        // ($digest_args ? 'Digest' : 'md5'); | 
| 306 | 11 |  |  |  |  | 17 | my $group_by_digest  = $args{group_by_digest}; | 
| 307 |  |  |  |  |  |  |  | 
| 308 | 11 | 50 |  |  |  | 23 | if ($args{detail}) { | 
| 309 | 0 |  |  |  |  | 0 | $show_digest = 1; | 
| 310 | 0 |  |  |  |  | 0 | $show_size = 1; | 
| 311 | 0 |  |  |  |  | 0 | $show_count = 1; | 
| 312 |  |  |  |  |  |  | } | 
| 313 |  |  |  |  |  |  |  | 
| 314 |  |  |  |  |  |  | my @authoritative_dirs = $args{authoritative_dirs} && @{$args{authoritative_dirs}} ? | 
| 315 | 11 | 100 | 66 |  |  | 29 | @{ $args{authoritative_dirs} } : (); | 
|  | 2 |  |  |  |  | 5 |  | 
| 316 | 11 |  |  |  |  | 24 | for my $dir (@authoritative_dirs) { | 
| 317 | 2 | 50 |  |  |  | 33 | (-d $dir) or return [400, "Authoritative dir '$dir' does not exist or not a directory"]; | 
| 318 | 2 | 50 |  |  |  | 62 | my $abs_dir = abs_path $dir or return [400, "Cannot get absolute path for authoritative dir '$dir'"]; | 
| 319 | 2 |  |  |  |  | 8 | $dir = $abs_dir; | 
| 320 |  |  |  |  |  |  | } | 
| 321 |  |  |  |  |  |  | #log_trace "authoritative_dirs=%s", \@authoritative_dirs if @authoritative_dirs; | 
| 322 |  |  |  |  |  |  |  | 
| 323 | 11 |  |  |  |  | 15 | my @include_re; | 
| 324 | 11 |  | 50 |  |  | 15 | for my $re0 (@{ $args{include_file_patterns} // [] }) { | 
|  | 11 |  |  |  |  | 45 |  | 
| 325 | 0 |  |  |  |  | 0 | require Regexp::Util; | 
| 326 | 0 |  |  |  |  | 0 | my $re; | 
| 327 | 0 | 0 |  |  |  | 0 | if (ref $re0 eq 'Regexp') { | 
| 328 | 0 |  |  |  |  | 0 | $re = $re0; | 
| 329 |  |  |  |  |  |  | } else { | 
| 330 | 0 |  |  |  |  | 0 | eval { $re = Regexp::Util::deserialize_regexp("qr($re0)") }; | 
|  | 0 |  |  |  |  | 0 |  | 
| 331 | 0 | 0 |  |  |  | 0 | return [400, "Invalid/unsafe regex pattern in include_file_patterns '$re0': $@"] if $@; | 
| 332 | 0 | 0 |  |  |  | 0 | return [400, "Unsafe regex pattern (contains embedded code) in include_file_patterns '$re0'"] if Regexp::Util::regexp_seen_evals($re); | 
| 333 |  |  |  |  |  |  | } | 
| 334 | 0 |  |  |  |  | 0 | push @include_re, $re; | 
| 335 |  |  |  |  |  |  | } | 
| 336 | 11 |  |  |  |  | 19 | my @exclude_re; | 
| 337 | 11 |  | 50 |  |  | 16 | for my $re0 (@{ $args{exclude_file_patterns} // [] }) { | 
|  | 11 |  |  |  |  | 39 |  | 
| 338 | 0 |  |  |  |  | 0 | require Regexp::Util; | 
| 339 | 0 |  |  |  |  | 0 | my $re; | 
| 340 | 0 | 0 |  |  |  | 0 | if (ref $re0 eq 'Regexp') { | 
| 341 | 0 |  |  |  |  | 0 | $re = $re0; | 
| 342 |  |  |  |  |  |  | } else { | 
| 343 | 0 |  |  |  |  | 0 | eval { $re = Regexp::Util::deserialize_regexp("qr($re0)") }; | 
|  | 0 |  |  |  |  | 0 |  | 
| 344 | 0 | 0 |  |  |  | 0 | return [400, "Invalid/unsafe regex pattern in exclude_file_patterns '$re0': $@"] if $@; | 
| 345 | 0 | 0 |  |  |  | 0 | return [400, "Unsafe regex pattern (contains embedded code) in exclude_file_patterns '$re0'"] if Regexp::Util::regexp_seen_evals($re); | 
| 346 |  |  |  |  |  |  | } | 
| 347 | 0 |  |  |  |  | 0 | push @exclude_re, $re; | 
| 348 |  |  |  |  |  |  | } | 
| 349 |  |  |  |  |  |  |  | 
| 350 | 11 | 100 |  |  |  | 24 | if ($recurse) { | 
| 351 |  |  |  |  |  |  | $files = [ map { | 
| 352 | 5 | 50 |  |  |  | 12 | if (-l $_) { | 
|  | 35 | 100 |  |  |  | 330 |  | 
| 353 | 0 |  |  |  |  | 0 | (); | 
| 354 |  |  |  |  |  |  | } elsif (-d _) { | 
| 355 | 10 |  |  |  |  | 34 | (_glob($_)); | 
| 356 |  |  |  |  |  |  | } else { | 
| 357 | 25 |  |  |  |  | 94 | ($_); | 
| 358 |  |  |  |  |  |  | } | 
| 359 |  |  |  |  |  |  | } @$files ]; | 
| 360 |  |  |  |  |  |  | } | 
| 361 |  |  |  |  |  |  |  | 
| 362 |  |  |  |  |  |  | FILTER: { | 
| 363 | 11 |  |  |  |  | 19 | my $ffiles; | 
|  | 11 |  |  |  |  | 15 |  | 
| 364 |  |  |  |  |  |  | FILE: | 
| 365 | 11 |  |  |  |  | 25 | for my $f (@$files) { | 
| 366 | 87 | 50 |  |  |  | 778 | if (-l $f) { | 
| 367 | 0 |  |  |  |  | 0 | log_warn "File '$f' is a symlink, ignored"; | 
| 368 | 0 |  |  |  |  | 0 | next FILE; | 
| 369 |  |  |  |  |  |  | } | 
| 370 | 87 | 100 |  |  |  | 218 | if (-d _) { | 
| 371 | 12 |  |  |  |  | 54 | log_warn "File '$f' is a directory, ignored"; | 
| 372 | 12 |  |  |  |  | 46 | next FILE; | 
| 373 |  |  |  |  |  |  | } | 
| 374 | 75 | 50 |  |  |  | 132 | unless (-f _) { | 
| 375 | 0 |  |  |  |  | 0 | log_warn "File '$f' is not a regular file, ignored"; | 
| 376 | 0 |  |  |  |  | 0 | next FILE; | 
| 377 |  |  |  |  |  |  | } | 
| 378 |  |  |  |  |  |  |  | 
| 379 | 75 | 50 |  |  |  | 151 | if (@include_re) { | 
| 380 | 0 |  |  |  |  | 0 | my $included; | 
| 381 | 0 |  |  |  |  | 0 | for my $re (@include_re) { | 
| 382 | 0 | 0 |  |  |  | 0 | if ($f =~ $re) { $included++; last } | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 383 |  |  |  |  |  |  | } | 
| 384 | 0 | 0 |  |  |  | 0 | unless ($included) { | 
| 385 | 0 |  |  |  |  | 0 | log_info "File '$f' is not in --include-file-patterns, skipped"; | 
| 386 | 0 |  |  |  |  | 0 | next FILE; | 
| 387 |  |  |  |  |  |  | } | 
| 388 |  |  |  |  |  |  | } | 
| 389 | 75 | 50 |  |  |  | 150 | if (@exclude_re) { | 
| 390 | 0 |  |  |  |  | 0 | for my $re (@exclude_re) { | 
| 391 | 0 | 0 |  |  |  | 0 | if ($f =~ $re) { | 
| 392 | 0 |  |  |  |  | 0 | log_info "File '$f' is in --exclude-file-patterns, skipped"; | 
| 393 | 0 |  |  |  |  | 0 | next FILE; | 
| 394 |  |  |  |  |  |  | } | 
| 395 |  |  |  |  |  |  | } | 
| 396 |  |  |  |  |  |  | } | 
| 397 |  |  |  |  |  |  |  | 
| 398 | 75 |  |  |  |  | 583 | my $size = -s $f; | 
| 399 | 75 | 50 | 33 |  |  | 208 | if ($args{exclude_empty_files} && !$size) { | 
| 400 | 0 |  |  |  |  | 0 | log_info "File '$f' is empty, skipped by option -Z"; | 
| 401 | 0 |  |  |  |  | 0 | next FILE; | 
| 402 |  |  |  |  |  |  | } | 
| 403 | 75 | 50 | 33 |  |  | 156 | if ($args{min_size} && $size < $args{min_size}) { | 
| 404 | 0 |  |  |  |  | 0 | log_info "File '$f' (size=$size) is smaller than min_file ($args{min_size}), skipped"; | 
| 405 | 0 |  |  |  |  | 0 | next FILE; | 
| 406 |  |  |  |  |  |  | } | 
| 407 | 75 | 50 | 33 |  |  | 130 | if ($args{max_size} && $size > $args{max_size}) { | 
| 408 | 0 |  |  |  |  | 0 | log_info "File '$f' (size=$size) is larger than max_file ($args{max_size}), skipped"; | 
| 409 | 0 |  |  |  |  | 0 | next FILE; | 
| 410 |  |  |  |  |  |  | } | 
| 411 |  |  |  |  |  |  |  | 
| 412 | 75 |  |  |  |  | 203 | push @$ffiles, $f; | 
| 413 |  |  |  |  |  |  | } | 
| 414 | 11 |  |  |  |  | 25 | $files = $ffiles; | 
| 415 |  |  |  |  |  |  | } # FILTER | 
| 416 |  |  |  |  |  |  |  | 
| 417 | 11 |  |  |  |  | 17 | my %name_files; # key = filename (computed), value = [path, ...] | 
| 418 |  |  |  |  |  |  | GROUP_FILE_NAMES: { | 
| 419 | 11 |  |  |  |  | 15 | for my $f (@$files) { | 
|  | 11 |  |  |  |  | 20 |  | 
| 420 |  |  |  |  |  |  | #my $path = abs_path($f); | 
| 421 | 75 |  |  |  |  | 173 | (my $basename = $f) =~ s!.+/!!; | 
| 422 | 75 |  | 50 |  |  | 280 | $name_files{$basename} //= []; | 
| 423 | 75 |  |  |  |  | 154 | push @{ $name_files{$basename} }, $f | 
| 424 | 75 | 50 |  |  |  | 95 | unless grep { $_ eq $f } @{ $name_files{$basename} }; | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 75 |  |  |  |  | 189 |  | 
| 425 |  |  |  |  |  |  | } | 
| 426 |  |  |  |  |  |  | #use DD; dd \%name_files; | 
| 427 |  |  |  |  |  |  | } | 
| 428 |  |  |  |  |  |  |  | 
| 429 | 11 |  |  |  |  | 30 | my %size_counts; # key = size, value = number of files having that size | 
| 430 |  |  |  |  |  |  | my %size_files; # key = size, value = [file, ...] | 
| 431 | 11 |  |  |  |  | 0 | my %file_sizes; # key = filename, value = file size, for caching stat() | 
| 432 |  |  |  |  |  |  | GET_FILE_SIZES: { | 
| 433 | 11 |  |  |  |  | 14 | for my $f (@$files) { | 
|  | 11 |  |  |  |  | 17 |  | 
| 434 | 75 |  |  |  |  | 730 | my @st = stat $f; | 
| 435 | 75 | 50 |  |  |  | 192 | unless (@st) { | 
| 436 | 0 |  |  |  |  | 0 | log_error("Can't stat file `$f`: $!, skipped"); | 
| 437 | 0 |  |  |  |  | 0 | next; | 
| 438 |  |  |  |  |  |  | } | 
| 439 | 75 |  |  |  |  | 162 | $size_counts{$st[7]}++; | 
| 440 | 75 |  | 100 |  |  | 220 | $size_files{$st[7]} //= []; | 
| 441 | 75 |  |  |  |  | 94 | push @{$size_files{$st[7]}}, $f; | 
|  | 75 |  |  |  |  | 157 |  | 
| 442 | 75 |  |  |  |  | 214 | $file_sizes{$f} = $st[7]; | 
| 443 |  |  |  |  |  |  | } | 
| 444 |  |  |  |  |  |  | } | 
| 445 |  |  |  |  |  |  |  | 
| 446 | 11 |  | 66 |  |  | 70 | my $calc_digest = !($algorithm eq '' || $algorithm eq 'none' || $algorithm eq 'size' || $algorithm eq 'name'); | 
| 447 |  |  |  |  |  |  |  | 
| 448 |  |  |  |  |  |  | # calculate digest for all files having non-unique sizes | 
| 449 | 11 |  |  |  |  | 25 | my %digest_counts; # key = digest, value = num of files having that digest | 
| 450 |  |  |  |  |  |  | my %digest_files; # key = digest, value = [file, ...] | 
| 451 | 11 |  |  |  |  | 0 | my %file_digests; # key = filename, value = file digest | 
| 452 |  |  |  |  |  |  | CALC_FILE_DIGESTS: { | 
| 453 | 11 | 100 |  |  |  | 14 | last unless $calc_digest; | 
|  | 11 |  |  |  |  | 22 |  | 
| 454 | 10 |  |  |  |  | 522 | require File::Digest; | 
| 455 |  |  |  |  |  |  |  | 
| 456 | 10 |  |  |  |  | 2197 | for my $f (@$files) { | 
| 457 | 66 | 50 |  |  |  | 148 | next unless defined $file_sizes{$f}; # just checking. all files should have sizes. | 
| 458 | 66 | 100 |  |  |  | 147 | next if $size_counts{ $file_sizes{$f} } == 1; # skip unique file sizes. | 
| 459 | 60 |  |  |  |  | 146 | my $res = File::Digest::digest_file( | 
| 460 |  |  |  |  |  |  | file=>$f, algorithm=>$algorithm, digest_args=>$digest_args); | 
| 461 | 60 | 50 |  |  |  | 9846 | return [500, "Can't calculate digest for file '$f': $res->[0] - $res->[1]"] | 
| 462 |  |  |  |  |  |  | unless $res->[0] == 200; | 
| 463 | 60 |  |  |  |  | 107 | my $digest = $res->[2]; | 
| 464 | 60 |  |  |  |  | 123 | $digest_counts{$digest}++; | 
| 465 | 60 |  | 100 |  |  | 207 | $digest_files{$digest} //= []; | 
| 466 | 60 |  |  |  |  | 89 | push @{$digest_files{$digest}}, $f; | 
|  | 60 |  |  |  |  | 145 |  | 
| 467 | 60 |  |  |  |  | 160 | $file_digests{$f} = $digest; | 
| 468 |  |  |  |  |  |  | } | 
| 469 |  |  |  |  |  |  | } | 
| 470 |  |  |  |  |  |  |  | 
| 471 | 11 |  |  |  |  | 21 | my %file_counts; # key = file name, value = num of files having file content | 
| 472 | 11 |  |  |  |  | 18 | for my $f (@$files) { | 
| 473 | 75 | 50 |  |  |  | 197 | next unless defined $file_sizes{$f}; # just checking | 
| 474 | 75 | 100 |  |  |  | 123 | if (!defined($file_digests{$f})) { | 
| 475 | 15 |  |  |  |  | 35 | $file_counts{$f} = $size_counts{ $file_sizes{$f} }; | 
| 476 |  |  |  |  |  |  | } else { | 
| 477 | 60 |  |  |  |  | 110 | $file_counts{$f} = $digest_counts{ $file_digests{$f} }; | 
| 478 |  |  |  |  |  |  | } | 
| 479 |  |  |  |  |  |  | } | 
| 480 |  |  |  |  |  |  |  | 
| 481 |  |  |  |  |  |  | SORT_DUPLICATE_FILES: { | 
| 482 | 11 | 100 |  |  |  | 15 | last unless @authoritative_dirs; | 
|  | 11 |  |  |  |  | 26 |  | 
| 483 | 2 | 0 |  |  |  | 8 | my $hash = $calc_digest ? \%digest_files : $algorithm eq 'name' ? \%name_files : \%size_files; | 
|  |  | 50 |  |  |  |  |  | 
| 484 | 2 |  |  |  |  | 8 | for my $key (keys %$hash) { | 
| 485 | 10 |  |  |  |  | 31 | my @files = @{ $hash->{$key} }; | 
|  | 10 |  |  |  |  | 22 |  | 
| 486 | 10 |  |  |  |  | 14 | my @abs_files; | 
| 487 | 10 | 100 |  |  |  | 22 | next unless @files > 1; | 
| 488 | 4 |  |  |  |  | 8 | for my $file (@files) { | 
| 489 | 12 | 50 |  |  |  | 194 | my $abs_file = abs_path $file or do { | 
| 490 | 0 |  |  |  |  | 0 | log_error "Cannot find absolute path for duplicate file '$file', skipping duplicate set %s", \@files; | 
| 491 |  |  |  |  |  |  | }; | 
| 492 | 12 |  |  |  |  | 37 | push @abs_files, $abs_file; | 
| 493 |  |  |  |  |  |  | } | 
| 494 |  |  |  |  |  |  |  | 
| 495 |  |  |  |  |  |  | #log_trace "Duplicate files before sorting: %s", \@files; | 
| 496 | 12 |  |  |  |  | 29 | @files = map { $files[$_] } sort { | 
| 497 | 4 |  |  |  |  | 19 | my $file_a = $abs_files[$a]; | 
|  | 10 |  |  |  |  | 20 |  | 
| 498 | 10 |  |  |  |  | 13 | my $file_a_in_authoritative_dirs = 0; | 
| 499 | 10 |  |  |  |  | 15 | my $subdir_len_file_a; | 
| 500 | 10 |  |  |  |  | 13 | for my $d (@authoritative_dirs) { | 
| 501 | 10 | 50 |  |  |  | 69 | if ($file_a =~ m!\A\Q$d\E(?:/|\z)(.*)!) { $file_a_in_authoritative_dirs++; $subdir_len_file_a = length($1); last } | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 502 |  |  |  |  |  |  | } | 
| 503 | 10 |  |  |  |  | 16 | my $file_b = $abs_files[$b]; | 
| 504 | 10 |  |  |  |  | 15 | my $file_b_in_authoritative_dirs = 0; | 
| 505 | 10 |  |  |  |  | 14 | my $subdir_len_file_b; | 
| 506 | 10 |  |  |  |  | 11 | for my $d (@authoritative_dirs) { | 
| 507 | 10 | 100 |  |  |  | 53 | if ($file_b =~ m!\A\Q$d\E(?:/|\z)(.*)!) { $file_b_in_authoritative_dirs++; $subdir_len_file_b = length($1); last } | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 7 |  | 
|  | 2 |  |  |  |  | 4 |  | 
| 508 |  |  |  |  |  |  | } | 
| 509 |  |  |  |  |  |  | #log_trace "  file_a=<$file_a>, in authoritative_dirs? $file_a_in_authoritative_dirs"; | 
| 510 |  |  |  |  |  |  | #log_trace "  file_b=<$file_b>, in authoritative_dirs? $file_b_in_authoritative_dirs"; | 
| 511 |  |  |  |  |  |  | # files located near the root of authoritative dir is preferred | 
| 512 |  |  |  |  |  |  | # to deeper files. this is done by comparing subdir_len | 
| 513 | 10 | 50 |  |  |  | 39 | ($file_a_in_authoritative_dirs ? $subdir_len_file_a : 9999) <=> ($file_b_in_authoritative_dirs ? $subdir_len_file_b : 9999) || | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 514 |  |  |  |  |  |  | $file_a cmp $file_b; | 
| 515 |  |  |  |  |  |  | } 0..$#files; | 
| 516 |  |  |  |  |  |  | #log_trace "Duplicate files after sorting: %s", \@files; | 
| 517 |  |  |  |  |  |  |  | 
| 518 | 4 |  |  |  |  | 14 | $hash->{$key} = \@files; | 
| 519 |  |  |  |  |  |  | } | 
| 520 |  |  |  |  |  |  | } | 
| 521 |  |  |  |  |  |  |  | 
| 522 |  |  |  |  |  |  | #$log->trace("report_duplicate=$report_duplicate"); | 
| 523 | 11 |  |  |  |  | 19 | my @files; | 
| 524 | 11 |  |  |  |  | 64 | for my $f (sort keys %file_counts) { | 
| 525 | 75 | 100 |  |  |  | 155 | if ($file_counts{$f} == 1) { | 
| 526 |  |  |  |  |  |  | #log_trace "unique file '$f'"; | 
| 527 | 24 | 100 |  |  |  | 48 | push @files, $f if $report_unique; | 
| 528 |  |  |  |  |  |  | } else { | 
| 529 |  |  |  |  |  |  | #log_trace "duplicate file '$f'"; | 
| 530 |  |  |  |  |  |  | my $is_first_copy = $calc_digest ? | 
| 531 |  |  |  |  |  |  | $f eq $digest_files{ $file_digests{$f} }[0] : | 
| 532 | 51 | 100 |  |  |  | 107 | $f eq $size_files{ $file_sizes{$f} }[0]; | 
| 533 |  |  |  |  |  |  | #log_trace "is first copy? <$is_first_copy>"; | 
| 534 | 51 | 100 |  |  |  | 114 | if ($report_duplicate == 0) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 535 |  |  |  |  |  |  | # do not report dupe files | 
| 536 |  |  |  |  |  |  | } elsif ($report_duplicate == 1) { | 
| 537 | 15 |  |  |  |  | 32 | push @files, $f; | 
| 538 |  |  |  |  |  |  | } elsif ($report_duplicate == 2) { | 
| 539 | 21 | 100 |  |  |  | 43 | push @files, $f if $is_first_copy; | 
| 540 |  |  |  |  |  |  | } elsif ($report_duplicate == 3) { | 
| 541 | 9 | 100 |  |  |  | 25 | push @files, $f unless $is_first_copy; | 
| 542 |  |  |  |  |  |  | } else { | 
| 543 | 0 |  |  |  |  | 0 | die "Invalid value for --report-duplicate ". | 
| 544 |  |  |  |  |  |  | "'$report_duplicate', please choose 0/1/2/3"; | 
| 545 |  |  |  |  |  |  | } | 
| 546 |  |  |  |  |  |  | } | 
| 547 |  |  |  |  |  |  | } | 
| 548 |  |  |  |  |  |  |  | 
| 549 |  |  |  |  |  |  | GROUP_FILES_BY_DIGEST: { | 
| 550 | 11 | 100 |  |  |  | 17 | last unless $group_by_digest; | 
|  | 11 |  |  |  |  | 24 |  | 
| 551 |  |  |  |  |  |  | @files = sort { | 
| 552 | 1 |  |  |  |  | 9 | $file_sizes{$a} <=> $file_sizes{$b} || | 
| 553 | 20 | 50 | 50 |  |  | 55 | ($file_digests{$a} // '') cmp ($file_digests{$b} // '') | 
|  |  |  | 50 |  |  |  |  | 
| 554 |  |  |  |  |  |  | } @files; | 
| 555 |  |  |  |  |  |  | } | 
| 556 |  |  |  |  |  |  |  | 
| 557 | 11 |  |  |  |  | 32 | my @rows; | 
| 558 |  |  |  |  |  |  | my %resmeta; | 
| 559 | 11 |  |  |  |  | 0 | my $last_digest; | 
| 560 | 11 |  |  |  |  | 20 | for my $f (@files) { | 
| 561 | 41 |  | 66 |  |  | 88 | my $digest = $file_digests{$f} // $file_sizes{$f}; | 
| 562 |  |  |  |  |  |  |  | 
| 563 |  |  |  |  |  |  | # add separator row | 
| 564 | 41 | 100 | 100 |  |  | 98 | if ($group_by_digest && defined $last_digest && $digest ne $last_digest) { | 
|  |  |  | 100 |  |  |  |  | 
| 565 | 4 | 50 | 33 |  |  | 16 | push @rows, ($show_count || $show_digest || $show_size) ? {} : ''; | 
| 566 |  |  |  |  |  |  | } | 
| 567 |  |  |  |  |  |  |  | 
| 568 | 41 |  |  |  |  | 48 | my $row; | 
| 569 | 41 | 100 | 100 |  |  | 131 | if ($show_count || $show_digest || $show_size) { | 
|  |  |  | 100 |  |  |  |  | 
| 570 | 19 |  |  |  |  | 37 | $row = {file=>$f}; | 
| 571 | 19 | 100 |  |  |  | 42 | $row->{count} = $file_counts{$f} if $show_count; | 
| 572 | 19 | 100 |  |  |  | 35 | $row->{digest} = $file_digests{$f} if $show_digest; | 
| 573 | 19 | 100 |  |  |  | 32 | $row->{size} = $file_sizes{$f} if $show_size; | 
| 574 |  |  |  |  |  |  | } else { | 
| 575 | 22 |  |  |  |  | 30 | $row = $f; | 
| 576 |  |  |  |  |  |  | } | 
| 577 | 41 |  |  |  |  | 65 | push @rows, $row; | 
| 578 | 41 |  |  |  |  | 67 | $last_digest = $digest; | 
| 579 |  |  |  |  |  |  | } | 
| 580 |  |  |  |  |  |  |  | 
| 581 | 11 |  |  |  |  | 36 | $resmeta{'table.fields'} = [qw/file size digest count/]; | 
| 582 |  |  |  |  |  |  |  | 
| 583 | 11 |  |  |  |  | 161 | [200, "OK", \@rows, \%resmeta]; | 
| 584 |  |  |  |  |  |  | } | 
| 585 |  |  |  |  |  |  |  | 
| 586 |  |  |  |  |  |  | gen_modified_sub( | 
| 587 |  |  |  |  |  |  | base_name => 'uniq_files', | 
| 588 |  |  |  |  |  |  | output_name => 'dupe_files', | 
| 589 |  |  |  |  |  |  | description => <<'_', | 
| 590 |  |  |  |  |  |  |  | 
| 591 |  |  |  |  |  |  | This is a thin wrapper to <prog:uniq-files>. It defaults `report_unique` to 0 | 
| 592 |  |  |  |  |  |  | and `report_duplicate` to 1. | 
| 593 |  |  |  |  |  |  |  | 
| 594 |  |  |  |  |  |  | _ | 
| 595 |  |  |  |  |  |  | modify_args => { | 
| 596 |  |  |  |  |  |  | report_unique => sub { | 
| 597 |  |  |  |  |  |  | $_[0]{schema} = [bool => {default=>0}]; | 
| 598 |  |  |  |  |  |  | }, | 
| 599 |  |  |  |  |  |  | report_duplicate => sub { | 
| 600 |  |  |  |  |  |  | $_[0]{schema} = [int => {in=>[0,1,2,3], default=>1}]; | 
| 601 |  |  |  |  |  |  | }, | 
| 602 |  |  |  |  |  |  | }, | 
| 603 |  |  |  |  |  |  | modify_meta => sub { | 
| 604 |  |  |  |  |  |  | $_[0]{examples} = [ | 
| 605 |  |  |  |  |  |  | { | 
| 606 |  |  |  |  |  |  | summary   => 'List all files (recursively, and in detail) which have duplicate contents (all duplicate copies)', | 
| 607 |  |  |  |  |  |  | src       => 'dupe-files -lR *', | 
| 608 |  |  |  |  |  |  | src_plang => 'bash', | 
| 609 |  |  |  |  |  |  | test      => 0, | 
| 610 |  |  |  |  |  |  | 'x.doc.show_result' => 0, | 
| 611 |  |  |  |  |  |  | }, | 
| 612 |  |  |  |  |  |  | ]; | 
| 613 |  |  |  |  |  |  | }, | 
| 614 |  |  |  |  |  |  | output_code => sub { | 
| 615 | 0 |  |  | 0 |  |  | my %args = @_; | 
| 616 | 0 |  | 0 |  |  |  | $args{report_unique} //= 0; | 
| 617 | 0 |  | 0 |  |  |  | $args{report_duplicate} //= 1; | 
| 618 | 0 |  |  |  |  |  | uniq_files(%args); | 
| 619 |  |  |  |  |  |  | }, | 
| 620 |  |  |  |  |  |  | ); | 
| 621 |  |  |  |  |  |  |  | 
| 622 |  |  |  |  |  |  | 1; | 
| 623 |  |  |  |  |  |  | # ABSTRACT: Report duplicate or unique file contents | 
| 624 |  |  |  |  |  |  |  | 
| 625 |  |  |  |  |  |  | __END__ | 
| 626 |  |  |  |  |  |  |  | 
| 627 |  |  |  |  |  |  | =pod | 
| 628 |  |  |  |  |  |  |  | 
| 629 |  |  |  |  |  |  | =encoding UTF-8 | 
| 630 |  |  |  |  |  |  |  | 
| 631 |  |  |  |  |  |  | =head1 NAME | 
| 632 |  |  |  |  |  |  |  | 
| 633 |  |  |  |  |  |  | App::UniqFiles - Report duplicate or unique file contents | 
| 634 |  |  |  |  |  |  |  | 
| 635 |  |  |  |  |  |  | =head1 VERSION | 
| 636 |  |  |  |  |  |  |  | 
| 637 |  |  |  |  |  |  | This document describes version 0.141 of App::UniqFiles (from Perl distribution App-UniqFiles), released on 2023-02-06. | 
| 638 |  |  |  |  |  |  |  | 
| 639 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 640 |  |  |  |  |  |  |  | 
| 641 |  |  |  |  |  |  | # See uniq-files script | 
| 642 |  |  |  |  |  |  |  | 
| 643 |  |  |  |  |  |  | =head1 NOTES | 
| 644 |  |  |  |  |  |  |  | 
| 645 |  |  |  |  |  |  | =head1 FUNCTIONS | 
| 646 |  |  |  |  |  |  |  | 
| 647 |  |  |  |  |  |  |  | 
| 648 |  |  |  |  |  |  | =head2 dupe_files | 
| 649 |  |  |  |  |  |  |  | 
| 650 |  |  |  |  |  |  | Usage: | 
| 651 |  |  |  |  |  |  |  | 
| 652 |  |  |  |  |  |  | dupe_files(%args) -> [$status_code, $reason, $payload, \%result_meta] | 
| 653 |  |  |  |  |  |  |  | 
| 654 |  |  |  |  |  |  | Report duplicate or unique file contents. | 
| 655 |  |  |  |  |  |  |  | 
| 656 |  |  |  |  |  |  | This is a thin wrapper to L<uniq-files>. It defaults C<report_unique> to 0 | 
| 657 |  |  |  |  |  |  | and C<report_duplicate> to 1. | 
| 658 |  |  |  |  |  |  |  | 
| 659 |  |  |  |  |  |  | This function is not exported. | 
| 660 |  |  |  |  |  |  |  | 
| 661 |  |  |  |  |  |  | Arguments ('*' denotes required arguments): | 
| 662 |  |  |  |  |  |  |  | 
| 663 |  |  |  |  |  |  | =over 4 | 
| 664 |  |  |  |  |  |  |  | 
| 665 |  |  |  |  |  |  | =item * B<algorithm> => I<str> | 
| 666 |  |  |  |  |  |  |  | 
| 667 |  |  |  |  |  |  | What algorithm is used to compute the digest of the content. | 
| 668 |  |  |  |  |  |  |  | 
| 669 |  |  |  |  |  |  | The default is to use C<md5>. Some algorithms supported include C<crc32>, C<sha1>, | 
| 670 |  |  |  |  |  |  | C<sha256>, as well as C<Digest> to use Perl L<Digest> which supports a lot of | 
| 671 |  |  |  |  |  |  | other algorithms, e.g. C<SHA-1>, C<BLAKE2b>. | 
| 672 |  |  |  |  |  |  |  | 
| 673 |  |  |  |  |  |  | If set to '', 'none', or 'size', then digest will be set to file size. This | 
| 674 |  |  |  |  |  |  | means uniqueness will be determined solely from file size. This can be quicker | 
| 675 |  |  |  |  |  |  | but will generate a false positive when two files of the same size are deemed as | 
| 676 |  |  |  |  |  |  | duplicate even though their content may be different. | 
| 677 |  |  |  |  |  |  |  | 
| 678 |  |  |  |  |  |  | If set to 'name' then only name comparison will be performed. This of course can | 
| 679 |  |  |  |  |  |  | potentially generate lots of false positives, but in some cases you might want | 
| 680 |  |  |  |  |  |  | to compare filename for uniqueness. | 
| 681 |  |  |  |  |  |  |  | 
| 682 |  |  |  |  |  |  | =item * B<authoritative_dirs> => I<array[str]> | 
| 683 |  |  |  |  |  |  |  | 
| 684 |  |  |  |  |  |  | Denote director(yE<verbar>ies) where authoritativeE<sol>"Original" copies are found. | 
| 685 |  |  |  |  |  |  |  | 
| 686 |  |  |  |  |  |  | =item * B<detail> => I<true> | 
| 687 |  |  |  |  |  |  |  | 
| 688 |  |  |  |  |  |  | Show details (a.k.a. --show-digest, --show-size, --show-count). | 
| 689 |  |  |  |  |  |  |  | 
| 690 |  |  |  |  |  |  | =item * B<digest_args> => I<array> | 
| 691 |  |  |  |  |  |  |  | 
| 692 |  |  |  |  |  |  | Some Digest algorithms require arguments, you can pass them here. | 
| 693 |  |  |  |  |  |  |  | 
| 694 |  |  |  |  |  |  | =item * B<exclude_empty_files> => I<bool> | 
| 695 |  |  |  |  |  |  |  | 
| 696 |  |  |  |  |  |  | (No description) | 
| 697 |  |  |  |  |  |  |  | 
| 698 |  |  |  |  |  |  | =item * B<exclude_file_patterns> => I<array[str]> | 
| 699 |  |  |  |  |  |  |  | 
| 700 |  |  |  |  |  |  | Filename (including path) regex patterns to include. | 
| 701 |  |  |  |  |  |  |  | 
| 702 |  |  |  |  |  |  | =item * B<files>* => I<array[str]> | 
| 703 |  |  |  |  |  |  |  | 
| 704 |  |  |  |  |  |  | (No description) | 
| 705 |  |  |  |  |  |  |  | 
| 706 |  |  |  |  |  |  | =item * B<group_by_digest> => I<bool> | 
| 707 |  |  |  |  |  |  |  | 
| 708 |  |  |  |  |  |  | Sort files by its digest (or size, if not computing digest), separate each different digest. | 
| 709 |  |  |  |  |  |  |  | 
| 710 |  |  |  |  |  |  | =item * B<include_file_patterns> => I<array[str]> | 
| 711 |  |  |  |  |  |  |  | 
| 712 |  |  |  |  |  |  | Filename (including path) regex patterns to exclude. | 
| 713 |  |  |  |  |  |  |  | 
| 714 |  |  |  |  |  |  | =item * B<max_size> => I<filesize> | 
| 715 |  |  |  |  |  |  |  | 
| 716 |  |  |  |  |  |  | Maximum file size to consider. | 
| 717 |  |  |  |  |  |  |  | 
| 718 |  |  |  |  |  |  | =item * B<min_size> => I<filesize> | 
| 719 |  |  |  |  |  |  |  | 
| 720 |  |  |  |  |  |  | Minimum file size to consider. | 
| 721 |  |  |  |  |  |  |  | 
| 722 |  |  |  |  |  |  | =item * B<recurse> => I<bool> | 
| 723 |  |  |  |  |  |  |  | 
| 724 |  |  |  |  |  |  | If set to true, will recurse into subdirectories. | 
| 725 |  |  |  |  |  |  |  | 
| 726 |  |  |  |  |  |  | =item * B<report_duplicate> => I<int> (default: 1) | 
| 727 |  |  |  |  |  |  |  | 
| 728 |  |  |  |  |  |  | Whether to return duplicate items. | 
| 729 |  |  |  |  |  |  |  | 
| 730 |  |  |  |  |  |  | Can be set to either 0, 1, 2, or 3. | 
| 731 |  |  |  |  |  |  |  | 
| 732 |  |  |  |  |  |  | If set to 0, duplicate items will not be returned. | 
| 733 |  |  |  |  |  |  |  | 
| 734 |  |  |  |  |  |  | If set to 1 (the default for C<dupe-files>), will return all the the duplicate | 
| 735 |  |  |  |  |  |  | files. For example: C<file1> contains text 'a', C<file2> 'b', C<file3> 'a'. Then | 
| 736 |  |  |  |  |  |  | C<file1> and C<file3> will be returned. | 
| 737 |  |  |  |  |  |  |  | 
| 738 |  |  |  |  |  |  | If set to 2 (the default for C<uniq-files>), will only return the first of | 
| 739 |  |  |  |  |  |  | duplicate items. Continuing from previous example, only C<file1> will be returned | 
| 740 |  |  |  |  |  |  | because C<file2> is unique and C<file3> contains 'a' (already represented by | 
| 741 |  |  |  |  |  |  | C<file1>). If one or more C<--authoritative-dir> (C<-O>) options are specified, | 
| 742 |  |  |  |  |  |  | files under these directories will be preferred. | 
| 743 |  |  |  |  |  |  |  | 
| 744 |  |  |  |  |  |  | If set to 3, will return all but the first of duplicate items. Continuing from | 
| 745 |  |  |  |  |  |  | previous example: C<file3> will be returned. This is useful if you want to keep | 
| 746 |  |  |  |  |  |  | only one copy of the duplicate content. You can use the output of this routine | 
| 747 |  |  |  |  |  |  | to C<mv> or C<rm>. Similar to the previous case, if one or more | 
| 748 |  |  |  |  |  |  | C<--authoritative-dir> (C<-O>) options are specified, then files under these | 
| 749 |  |  |  |  |  |  | directories will not be listed if possible. | 
| 750 |  |  |  |  |  |  |  | 
| 751 |  |  |  |  |  |  | =item * B<report_unique> => I<bool> (default: 0) | 
| 752 |  |  |  |  |  |  |  | 
| 753 |  |  |  |  |  |  | Whether to return unique items. | 
| 754 |  |  |  |  |  |  |  | 
| 755 |  |  |  |  |  |  | =item * B<show_count> => I<bool> (default: 0) | 
| 756 |  |  |  |  |  |  |  | 
| 757 |  |  |  |  |  |  | Whether to return each file content's number of occurence. | 
| 758 |  |  |  |  |  |  |  | 
| 759 |  |  |  |  |  |  | 1 means the file content is only encountered once (unique), 2 means there is one | 
| 760 |  |  |  |  |  |  | duplicate, and so on. | 
| 761 |  |  |  |  |  |  |  | 
| 762 |  |  |  |  |  |  | =item * B<show_digest> => I<true> | 
| 763 |  |  |  |  |  |  |  | 
| 764 |  |  |  |  |  |  | Show the digest value (or the size, if not computing digest) for each file. | 
| 765 |  |  |  |  |  |  |  | 
| 766 |  |  |  |  |  |  | Note that this routine does not compute digest for files which have unique | 
| 767 |  |  |  |  |  |  | sizes, so they will show up as empty. | 
| 768 |  |  |  |  |  |  |  | 
| 769 |  |  |  |  |  |  | =item * B<show_size> => I<true> | 
| 770 |  |  |  |  |  |  |  | 
| 771 |  |  |  |  |  |  | Show the size for each file. | 
| 772 |  |  |  |  |  |  |  | 
| 773 |  |  |  |  |  |  |  | 
| 774 |  |  |  |  |  |  | =back | 
| 775 |  |  |  |  |  |  |  | 
| 776 |  |  |  |  |  |  | Returns an enveloped result (an array). | 
| 777 |  |  |  |  |  |  |  | 
| 778 |  |  |  |  |  |  | First element ($status_code) is an integer containing HTTP-like status code | 
| 779 |  |  |  |  |  |  | (200 means OK, 4xx caller error, 5xx function error). Second element | 
| 780 |  |  |  |  |  |  | ($reason) is a string containing error message, or something like "OK" if status is | 
| 781 |  |  |  |  |  |  | 200. Third element ($payload) is the actual result, but usually not present when enveloped result is an error response ($status_code is not 2xx). Fourth | 
| 782 |  |  |  |  |  |  | element (%result_meta) is called result metadata and is optional, a hash | 
| 783 |  |  |  |  |  |  | that contains extra information, much like how HTTP response headers provide additional metadata. | 
| 784 |  |  |  |  |  |  |  | 
| 785 |  |  |  |  |  |  | Return value:  (any) | 
| 786 |  |  |  |  |  |  |  | 
| 787 |  |  |  |  |  |  |  | 
| 788 |  |  |  |  |  |  |  | 
| 789 |  |  |  |  |  |  | =head2 uniq_files | 
| 790 |  |  |  |  |  |  |  | 
| 791 |  |  |  |  |  |  | Usage: | 
| 792 |  |  |  |  |  |  |  | 
| 793 |  |  |  |  |  |  | uniq_files(%args) -> [$status_code, $reason, $payload, \%result_meta] | 
| 794 |  |  |  |  |  |  |  | 
| 795 |  |  |  |  |  |  | Report duplicate or unique file contents. | 
| 796 |  |  |  |  |  |  |  | 
| 797 |  |  |  |  |  |  | Given a list of filenames, will check each file size and content for duplicate | 
| 798 |  |  |  |  |  |  | content. Interface is a bit like the C<uniq> Unix command-line program. | 
| 799 |  |  |  |  |  |  |  | 
| 800 |  |  |  |  |  |  | This function is not exported by default, but exportable. | 
| 801 |  |  |  |  |  |  |  | 
| 802 |  |  |  |  |  |  | Arguments ('*' denotes required arguments): | 
| 803 |  |  |  |  |  |  |  | 
| 804 |  |  |  |  |  |  | =over 4 | 
| 805 |  |  |  |  |  |  |  | 
| 806 |  |  |  |  |  |  | =item * B<algorithm> => I<str> | 
| 807 |  |  |  |  |  |  |  | 
| 808 |  |  |  |  |  |  | What algorithm is used to compute the digest of the content. | 
| 809 |  |  |  |  |  |  |  | 
| 810 |  |  |  |  |  |  | The default is to use C<md5>. Some algorithms supported include C<crc32>, C<sha1>, | 
| 811 |  |  |  |  |  |  | C<sha256>, as well as C<Digest> to use Perl L<Digest> which supports a lot of | 
| 812 |  |  |  |  |  |  | other algorithms, e.g. C<SHA-1>, C<BLAKE2b>. | 
| 813 |  |  |  |  |  |  |  | 
| 814 |  |  |  |  |  |  | If set to '', 'none', or 'size', then digest will be set to file size. This | 
| 815 |  |  |  |  |  |  | means uniqueness will be determined solely from file size. This can be quicker | 
| 816 |  |  |  |  |  |  | but will generate a false positive when two files of the same size are deemed as | 
| 817 |  |  |  |  |  |  | duplicate even though their content may be different. | 
| 818 |  |  |  |  |  |  |  | 
| 819 |  |  |  |  |  |  | If set to 'name' then only name comparison will be performed. This of course can | 
| 820 |  |  |  |  |  |  | potentially generate lots of false positives, but in some cases you might want | 
| 821 |  |  |  |  |  |  | to compare filename for uniqueness. | 
| 822 |  |  |  |  |  |  |  | 
| 823 |  |  |  |  |  |  | =item * B<authoritative_dirs> => I<array[str]> | 
| 824 |  |  |  |  |  |  |  | 
| 825 |  |  |  |  |  |  | Denote director(yE<verbar>ies) where authoritativeE<sol>"Original" copies are found. | 
| 826 |  |  |  |  |  |  |  | 
| 827 |  |  |  |  |  |  | =item * B<detail> => I<true> | 
| 828 |  |  |  |  |  |  |  | 
| 829 |  |  |  |  |  |  | Show details (a.k.a. --show-digest, --show-size, --show-count). | 
| 830 |  |  |  |  |  |  |  | 
| 831 |  |  |  |  |  |  | =item * B<digest_args> => I<array> | 
| 832 |  |  |  |  |  |  |  | 
| 833 |  |  |  |  |  |  | Some Digest algorithms require arguments, you can pass them here. | 
| 834 |  |  |  |  |  |  |  | 
| 835 |  |  |  |  |  |  | =item * B<exclude_empty_files> => I<bool> | 
| 836 |  |  |  |  |  |  |  | 
| 837 |  |  |  |  |  |  | (No description) | 
| 838 |  |  |  |  |  |  |  | 
| 839 |  |  |  |  |  |  | =item * B<exclude_file_patterns> => I<array[str]> | 
| 840 |  |  |  |  |  |  |  | 
| 841 |  |  |  |  |  |  | Filename (including path) regex patterns to include. | 
| 842 |  |  |  |  |  |  |  | 
| 843 |  |  |  |  |  |  | =item * B<files>* => I<array[str]> | 
| 844 |  |  |  |  |  |  |  | 
| 845 |  |  |  |  |  |  | (No description) | 
| 846 |  |  |  |  |  |  |  | 
| 847 |  |  |  |  |  |  | =item * B<group_by_digest> => I<bool> | 
| 848 |  |  |  |  |  |  |  | 
| 849 |  |  |  |  |  |  | Sort files by its digest (or size, if not computing digest), separate each different digest. | 
| 850 |  |  |  |  |  |  |  | 
| 851 |  |  |  |  |  |  | =item * B<include_file_patterns> => I<array[str]> | 
| 852 |  |  |  |  |  |  |  | 
| 853 |  |  |  |  |  |  | Filename (including path) regex patterns to exclude. | 
| 854 |  |  |  |  |  |  |  | 
| 855 |  |  |  |  |  |  | =item * B<max_size> => I<filesize> | 
| 856 |  |  |  |  |  |  |  | 
| 857 |  |  |  |  |  |  | Maximum file size to consider. | 
| 858 |  |  |  |  |  |  |  | 
| 859 |  |  |  |  |  |  | =item * B<min_size> => I<filesize> | 
| 860 |  |  |  |  |  |  |  | 
| 861 |  |  |  |  |  |  | Minimum file size to consider. | 
| 862 |  |  |  |  |  |  |  | 
| 863 |  |  |  |  |  |  | =item * B<recurse> => I<bool> | 
| 864 |  |  |  |  |  |  |  | 
| 865 |  |  |  |  |  |  | If set to true, will recurse into subdirectories. | 
| 866 |  |  |  |  |  |  |  | 
| 867 |  |  |  |  |  |  | =item * B<report_duplicate> => I<int> (default: 2) | 
| 868 |  |  |  |  |  |  |  | 
| 869 |  |  |  |  |  |  | Whether to return duplicate items. | 
| 870 |  |  |  |  |  |  |  | 
| 871 |  |  |  |  |  |  | Can be set to either 0, 1, 2, or 3. | 
| 872 |  |  |  |  |  |  |  | 
| 873 |  |  |  |  |  |  | If set to 0, duplicate items will not be returned. | 
| 874 |  |  |  |  |  |  |  | 
| 875 |  |  |  |  |  |  | If set to 1 (the default for C<dupe-files>), will return all the the duplicate | 
| 876 |  |  |  |  |  |  | files. For example: C<file1> contains text 'a', C<file2> 'b', C<file3> 'a'. Then | 
| 877 |  |  |  |  |  |  | C<file1> and C<file3> will be returned. | 
| 878 |  |  |  |  |  |  |  | 
| 879 |  |  |  |  |  |  | If set to 2 (the default for C<uniq-files>), will only return the first of | 
| 880 |  |  |  |  |  |  | duplicate items. Continuing from previous example, only C<file1> will be returned | 
| 881 |  |  |  |  |  |  | because C<file2> is unique and C<file3> contains 'a' (already represented by | 
| 882 |  |  |  |  |  |  | C<file1>). If one or more C<--authoritative-dir> (C<-O>) options are specified, | 
| 883 |  |  |  |  |  |  | files under these directories will be preferred. | 
| 884 |  |  |  |  |  |  |  | 
| 885 |  |  |  |  |  |  | If set to 3, will return all but the first of duplicate items. Continuing from | 
| 886 |  |  |  |  |  |  | previous example: C<file3> will be returned. This is useful if you want to keep | 
| 887 |  |  |  |  |  |  | only one copy of the duplicate content. You can use the output of this routine | 
| 888 |  |  |  |  |  |  | to C<mv> or C<rm>. Similar to the previous case, if one or more | 
| 889 |  |  |  |  |  |  | C<--authoritative-dir> (C<-O>) options are specified, then files under these | 
| 890 |  |  |  |  |  |  | directories will not be listed if possible. | 
| 891 |  |  |  |  |  |  |  | 
| 892 |  |  |  |  |  |  | =item * B<report_unique> => I<bool> (default: 1) | 
| 893 |  |  |  |  |  |  |  | 
| 894 |  |  |  |  |  |  | Whether to return unique items. | 
| 895 |  |  |  |  |  |  |  | 
| 896 |  |  |  |  |  |  | =item * B<show_count> => I<bool> (default: 0) | 
| 897 |  |  |  |  |  |  |  | 
| 898 |  |  |  |  |  |  | Whether to return each file content's number of occurence. | 
| 899 |  |  |  |  |  |  |  | 
| 900 |  |  |  |  |  |  | 1 means the file content is only encountered once (unique), 2 means there is one | 
| 901 |  |  |  |  |  |  | duplicate, and so on. | 
| 902 |  |  |  |  |  |  |  | 
| 903 |  |  |  |  |  |  | =item * B<show_digest> => I<true> | 
| 904 |  |  |  |  |  |  |  | 
| 905 |  |  |  |  |  |  | Show the digest value (or the size, if not computing digest) for each file. | 
| 906 |  |  |  |  |  |  |  | 
| 907 |  |  |  |  |  |  | Note that this routine does not compute digest for files which have unique | 
| 908 |  |  |  |  |  |  | sizes, so they will show up as empty. | 
| 909 |  |  |  |  |  |  |  | 
| 910 |  |  |  |  |  |  | =item * B<show_size> => I<true> | 
| 911 |  |  |  |  |  |  |  | 
| 912 |  |  |  |  |  |  | Show the size for each file. | 
| 913 |  |  |  |  |  |  |  | 
| 914 |  |  |  |  |  |  |  | 
| 915 |  |  |  |  |  |  | =back | 
| 916 |  |  |  |  |  |  |  | 
| 917 |  |  |  |  |  |  | Returns an enveloped result (an array). | 
| 918 |  |  |  |  |  |  |  | 
| 919 |  |  |  |  |  |  | First element ($status_code) is an integer containing HTTP-like status code | 
| 920 |  |  |  |  |  |  | (200 means OK, 4xx caller error, 5xx function error). Second element | 
| 921 |  |  |  |  |  |  | ($reason) is a string containing error message, or something like "OK" if status is | 
| 922 |  |  |  |  |  |  | 200. Third element ($payload) is the actual result, but usually not present when enveloped result is an error response ($status_code is not 2xx). Fourth | 
| 923 |  |  |  |  |  |  | element (%result_meta) is called result metadata and is optional, a hash | 
| 924 |  |  |  |  |  |  | that contains extra information, much like how HTTP response headers provide additional metadata. | 
| 925 |  |  |  |  |  |  |  | 
| 926 |  |  |  |  |  |  | Return value:  (any) | 
| 927 |  |  |  |  |  |  |  | 
| 928 |  |  |  |  |  |  | =head1 HOMEPAGE | 
| 929 |  |  |  |  |  |  |  | 
| 930 |  |  |  |  |  |  | Please visit the project's homepage at L<https://metacpan.org/release/App-UniqFiles>. | 
| 931 |  |  |  |  |  |  |  | 
| 932 |  |  |  |  |  |  | =head1 SOURCE | 
| 933 |  |  |  |  |  |  |  | 
| 934 |  |  |  |  |  |  | Source repository is at L<https://github.com/perlancar/perl-App-UniqFiles>. | 
| 935 |  |  |  |  |  |  |  | 
| 936 |  |  |  |  |  |  | =head1 SEE ALSO | 
| 937 |  |  |  |  |  |  |  | 
| 938 |  |  |  |  |  |  | L<find-duplicate-filenames> from L<App::FindUtils> | 
| 939 |  |  |  |  |  |  |  | 
| 940 |  |  |  |  |  |  | L<move-duplicate-files-to> from L<App::DuplicateFilesUtils>, which is basically | 
| 941 |  |  |  |  |  |  | a shortcut for C<< uniq-files -D -R . | while read f; do mv "$f" SOMEDIR/; done | 
| 942 |  |  |  |  |  |  | >>. | 
| 943 |  |  |  |  |  |  |  | 
| 944 |  |  |  |  |  |  | =head1 AUTHOR | 
| 945 |  |  |  |  |  |  |  | 
| 946 |  |  |  |  |  |  | perlancar <perlancar@cpan.org> | 
| 947 |  |  |  |  |  |  |  | 
| 948 |  |  |  |  |  |  | =head1 CONTRIBUTOR | 
| 949 |  |  |  |  |  |  |  | 
| 950 |  |  |  |  |  |  | =for stopwords Steven Haryanto | 
| 951 |  |  |  |  |  |  |  | 
| 952 |  |  |  |  |  |  | Steven Haryanto <stevenharyanto@gmail.com> | 
| 953 |  |  |  |  |  |  |  | 
| 954 |  |  |  |  |  |  | =head1 CONTRIBUTING | 
| 955 |  |  |  |  |  |  |  | 
| 956 |  |  |  |  |  |  |  | 
| 957 |  |  |  |  |  |  | To contribute, you can send patches by email/via RT, or send pull requests on | 
| 958 |  |  |  |  |  |  | GitHub. | 
| 959 |  |  |  |  |  |  |  | 
| 960 |  |  |  |  |  |  | Most of the time, you don't need to build the distribution yourself. You can | 
| 961 |  |  |  |  |  |  | simply modify the code, then test via: | 
| 962 |  |  |  |  |  |  |  | 
| 963 |  |  |  |  |  |  | % prove -l | 
| 964 |  |  |  |  |  |  |  | 
| 965 |  |  |  |  |  |  | If you want to build the distribution (e.g. to try to install it locally on your | 
| 966 |  |  |  |  |  |  | system), you can install L<Dist::Zilla>, | 
| 967 |  |  |  |  |  |  | L<Dist::Zilla::PluginBundle::Author::PERLANCAR>, | 
| 968 |  |  |  |  |  |  | L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other | 
| 969 |  |  |  |  |  |  | Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond | 
| 970 |  |  |  |  |  |  | that are considered a bug and can be reported to me. | 
| 971 |  |  |  |  |  |  |  | 
| 972 |  |  |  |  |  |  | =head1 COPYRIGHT AND LICENSE | 
| 973 |  |  |  |  |  |  |  | 
| 974 |  |  |  |  |  |  | This software is copyright (c) 2023, 2022, 2020, 2019, 2017, 2015, 2014, 2012, 2011 by perlancar <perlancar@cpan.org>. | 
| 975 |  |  |  |  |  |  |  | 
| 976 |  |  |  |  |  |  | This is free software; you can redistribute it and/or modify it under | 
| 977 |  |  |  |  |  |  | the same terms as the Perl 5 programming language system itself. | 
| 978 |  |  |  |  |  |  |  | 
| 979 |  |  |  |  |  |  | =head1 BUGS | 
| 980 |  |  |  |  |  |  |  | 
| 981 |  |  |  |  |  |  | Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-UniqFiles> | 
| 982 |  |  |  |  |  |  |  | 
| 983 |  |  |  |  |  |  | When submitting a bug or request, please include a test-file or a | 
| 984 |  |  |  |  |  |  | patch to an existing test-file that illustrates the bug or desired | 
| 985 |  |  |  |  |  |  | feature. | 
| 986 |  |  |  |  |  |  |  | 
| 987 |  |  |  |  |  |  | =cut |