File Coverage

lib/File/Information.pm
Criterion Covered Total %
statement 121 328 36.8
branch 26 174 14.9
condition 15 106 14.1
subroutine 21 30 70.0
pod 10 10 100.0
total 193 648 29.7


line stmt bran cond sub pod time code
1             # Copyright (c) 2024-2025 Philipp Schafft <lion@cpan.org>
2              
3             # licensed under Artistic License 2.0 (see LICENSE file)
4              
5             # ABSTRACT: generic module for extracting information from filesystems
6              
7              
8             package File::Information;
9              
10 3     3   965457 use v5.16;
  3         9  
11 3     3   10 use strict;
  3         3  
  3         88  
12 3     3   15 use warnings;
  3         4  
  3         158  
13              
14 3     3   9 use Carp;
  3         4  
  3         209  
15 3     3   10 use Fcntl qw(S_ISBLK);
  3         4  
  3         123  
16 3     3   11 use File::Spec;
  3         2  
  3         76  
17              
18 3     3   3046 use File::Information::Link;
  3         14  
  3         234  
19 3     3   30 use File::Information::Inode;
  3         5  
  3         137  
20 3     3   1937 use File::Information::Filesystem;
  3         65  
  3         133  
21 3     3   2949 use File::Information::Tagpool;
  3         10  
  3         210  
22              
23 3     3   28 use parent 'Data::Identifier::Interface::Known';
  3         24  
  3         26  
24              
25             our $VERSION = v0.16;
26              
27             my $HAVE_FILE_VALUEFILE = eval {require File::ValueFile::Simple::Reader; 1;};
28             my $HAVE_UNIX_MKNOD = eval {require Unix::Mknod; 1;};
29              
30              
31             my %_new_subobjects = (
32             extractor => 'Data::URIID',
33             db => 'Data::TagDB',
34             );
35              
36             sub new {
37 2     2 1 1422 my ($pkg, %opts) = @_;
38 2         5 my $self = bless {};
39              
40 2         10 foreach my $key (keys %_new_subobjects) {
41 4 50       16 if (defined($opts{$key})) {
42 0 0       0 croak 'Bad package for option '.$key unless eval {$opts{$key}->isa($_new_subobjects{$key})};
  0         0  
43 0         0 $self->{$key} = $opts{$key};
44             }
45             }
46              
47 2         57 $self->{$_} = $opts{$_} foreach qw(tagpool_rc tagpool_path device_path digest_sizelimit mountinfo_path boring_sizelimit boring_extension store);
48              
49 2   50     19 $self->{store} //= [];
50 2 50       11 $self->{store} = [$self->{store}] unless ref($self->{store}) eq 'ARRAY';
51              
52 2         5 foreach my $store (@{$self->{store}}) {
  2         6  
53 0 0       0 unless (eval {$store->isa('File::FStore')}) {
  0         0  
54              
55 0         0 require File::FStore;
56 0         0 $store = File::FStore->new(path => $store);
57             }
58              
59 0         0 $store->attach(db => $self->{db}, extractor => $self->{extractor}, fii => $self, weak => 1);
60             }
61              
62 2   50     15 $self->{digest_sizelimit} //= 512*1024*1024; # 512MB
63              
64 2 50       9 if ($self->{digest_sizelimit} eq 'infinite') {
65 0         0 $self->{digest_sizelimit} = -1;
66             } else {
67 2         6 $self->{digest_sizelimit} = int($self->{digest_sizelimit}); # ensure it's an int. This will also set to 0 in case of error.
68 2 50       8 $self->{digest_sizelimit} = 0 if $self->{digest_sizelimit} < 0;
69             }
70              
71 2   50     13 $self->{boring_sizelimit} //= 128;
72 2         5 $self->{boring_sizelimit} = int($self->{boring_sizelimit});
73              
74 2   50     20 $self->{boring_extension} //= [qw(o bakX old orig part rej swp tmp dpkg-dist dpkg-old ucf-dist ucf-new ucf-old rpmnew rpmorig rpmsave)];
75 2 50       19 $self->{boring_extension} = [split/(?:\s*,\s*|\s+)/, $self->{boring_extension}] unless ref $self->{boring_extension};
76              
77 2         3 $self->{boring_extension} = {map {fc($_) => 1} @{$self->{boring_extension}}};
  32         91  
  2         7  
78              
79 2 50       14 if (defined(my $boring_extension_add = $opts{boring_extension_add})) {
80 0 0       0 $boring_extension_add = [split/(?:\s*,\s*|\s+)/, $boring_extension_add] unless ref $boring_extension_add;
81 0         0 $self->{boring_extension}{fc($_)} = 1 foreach @{$boring_extension_add};
  0         0  
82             }
83              
84 2 50       9 if (defined $opts{digest_unsafe}) {
85 0         0 my $unsafe = $opts{digest_unsafe};
86              
87 0 0       0 $unsafe = [$unsafe] unless ref($unsafe) eq 'ARRAY';
88              
89 0         0 $_->{unsafe} = 1 foreach $self->digest_info(@{$unsafe});
  0         0  
90             }
91              
92 2         8 $self->_tagpool_locate;
93              
94 2         9 return $self;
95             }
96              
97              
98             sub for_link {
99 2     2 1 1078 my ($self, %opts);
100              
101 2 50       9 if (scalar(@_) == 2) {
102 2         8 ($self, $opts{path}) = @_;
103             } else {
104 0         0 ($self, %opts) = @_;
105             }
106              
107 2   50     14 $opts{symlinks} //= 'nofollow';
108              
109 2         4 return File::Information::Link->_new(instance => $self, (map {$_ => $opts{$_}} qw(path symlinks)));
  4         28  
110             }
111              
112              
113             #@returns File::Information::Inode
114             sub for_handle {
115 0     0 1 0 my ($self, %opts);
116              
117 0 0       0 if (scalar(@_) == 2) {
118 0         0 ($self, $opts{handle}) = @_;
119             } else {
120 0         0 ($self, %opts) = @_;
121             }
122              
123 0         0 return File::Information::Inode->_new(instance => $self, (map {$_ => $opts{$_}} qw(handle)));
  0         0  
124             }
125              
126              
127              
128             #@returns File::Information::Base
129             sub for_identifier {
130 0     0 1 0 my ($self, %opts);
131              
132 0 0       0 if (scalar(@_) == 2) {
    0          
133 0         0 ($self, $opts{identifier}) = @_;
134             } elsif (scalar(@_) == 3) {
135 0         0 ($self, $opts{type}, $opts{identifier}) = @_;
136             } else {
137 0         0 ($self, %opts) = @_;
138             }
139              
140 0 0       0 croak 'No identifier given' unless defined $opts{identifier};
141              
142 0 0 0     0 if (!defined($opts{type}) && ref($opts{identifier})) {
143 0         0 my $id = $opts{identifier};
144              
145 0 0       0 if ($id->isa('Data::URIID::Result')) {
    0          
146 0         0 $opts{type} = $id->id_type;
147 0 0       0 unless (defined $opts{type}) {
148             # Special case: we might know a digest or something, but not an identifier.
149 0         0 require File::Information::Remote;
150 0         0 return File::Information::Remote->_new(instance => $self, data_uriid_result => $id);
151             }
152             } elsif ($id->isa('Data::Identifier')) {
153 0         0 $opts{type} = $id->type;
154             }
155             }
156              
157 0 0       0 croak 'No type given' unless defined $opts{type};
158              
159 0 0       0 if (ref $opts{type}) {
    0          
160 0         0 $opts{type} = $opts{type}->ise;
161             } elsif ($opts{type} eq 'uuid') {
162 0         0 $opts{type} = '8be115d2-dc2f-4a98-91e1-a6e3075cbc31';
163             }
164              
165 0 0       0 if (ref(my $id = $opts{identifier})) {
166 0 0 0     0 if ($id->isa('Data::URIID::Result') || $id->isa('Data::Identifier')) {
    0          
167 0         0 $opts{identifier} = $id->id;
168             } elsif ($id->isa('Data::URIID::Base')) {
169 0         0 $opts{identifier} = $id->ise;
170             }
171             }
172              
173 0 0       0 if ($opts{type} eq '8be115d2-dc2f-4a98-91e1-a6e3075cbc31') {
174 0 0       0 if ($opts{identifier} !~ /^[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}$/) {
175 0         0 croak 'Invalid format for UUID';
176             }
177              
178 0         0 $self->_load_filesystems;
179              
180 0         0 foreach my $fs (values %{$self->{filesystems}}) {
  0         0  
181 0   0     0 my $ise = $fs->get('uuid', as => 'uuid', default => undef) // next;
182 0 0       0 return $fs if $ise eq $opts{identifier};
183             }
184              
185             {
186 0         0 require File::ValueFile::Simple::Reader;
  0         0  
187              
188 0         0 my $uuid = $opts{identifier};
189              
190 0         0 foreach my $pool ($self->tagpool) {
191 0         0 my $ise = $pool->get('uuid', as => 'uuid');
192 0         0 my $hash;
193              
194 0 0       0 return $pool if $ise eq $uuid;
195              
196 0         0 $hash = eval {
197 0         0 my $reader = File::ValueFile::Simple::Reader->new($pool->_catfile('data', 'info.'.$uuid));
198 0         0 $reader->read_as_hash;
199             };
200              
201 0 0       0 next unless defined $hash;
202 0 0       0 next unless defined $hash->{'pool-name-suffix'};
203 0         0 return $self->for_link(path => $pool->_catfile('data', $hash->{'pool-name-suffix'}), %opts);
204             }
205             }
206             }
207              
208 0 0       0 if (defined(my $extractor = eval {$self->extractor})) {
  0         0  
209 0 0       0 if (defined(my $result = eval {$extractor->lookup($opts{type}, $opts{identifier})})) {
  0         0  
210 0         0 require File::Information::Remote;
211 0         0 return File::Information::Remote->_new(instance => $self, data_uriid_result => $result);
212             }
213             }
214              
215 0         0 croak 'Not found';
216             }
217              
218              
219             sub tagpool {
220 0     0 1 0 my ($self) = @_;
221 0         0 return values %{$self->_tagpool}
  0         0  
222             }
223              
224              
225             #@returns Data::URIID
226             sub extractor {
227 0     0 1 0 my ($self) = @_;
228              
229 0   0     0 return $self->{extractor} // croak 'No extractor available';
230             }
231              
232              
233             #@returns Data::TagDB
234             sub db {
235 0     0 1 0 my ($self) = @_;
236              
237 0   0     0 return $self->{db} // croak 'No database available';
238             }
239              
240              
241             sub store {
242 2     2 1 8 my ($self, %opts) = @_;
243 2         5 my $as = delete $opts{as};
244 2 50 50     8 croak 'Invalid as parameter' unless ($as // '') eq 'File::FStore';
245 2 50       7 croak 'Stray options passed' if scalar keys %opts;
246 2         3 return @{$self->{store}};
  2         9  
247             }
248              
249              
250             sub lifecycles {
251 1     1 1 420 return qw(initial last current final);
252             }
253              
254              
255             sub digest_info {
256 1     1 1 519 my ($self, @algos) = @_;
257 1         3 my @ret;
258              
259 1 0 33     6 if (scalar(@algos) == 1 && $algos[0] =~ /^v0m / && wantarray) {
      33        
260 0         0 while ($algos[-1] =~ s#^v0m (\S+) bytes [0-9]+-[0-9]*/(?:[0-9]+|\*) [0-9a-f\.]+ ##) {
261 0         0 unshift(@algos, $1);
262             }
263             }
264              
265 1 50       5 unless ($self->{hash_info}) {
266 1         6 my %hashes = map {$_ => {
267             name => $_,
268             bits => int(($_ =~ /-([0-9]+)$/)[0]),
269             aliases => [],
270 20   100     136 %{$File::Information::Base::_digest_info_extra{$_}//{}},
  20         153  
271             }} (
272             values(%File::Information::Base::_digest_name_converter),
273             qw(md-4-128 ripemd-1-160 tiger-1-192 tiger-2-192),
274             );
275 1         11 $self->{hash_info} = \%hashes;
276              
277 1         7 $hashes{$_}{unsafe} = 1 foreach qw(md-4-128 md-5-128 sha-1-160);
278 1         8 push(@{$hashes{$File::Information::Base::_digest_name_converter{$_}}{aliases}}, $_) foreach keys %File::Information::Base::_digest_name_converter;
  16         37  
279             }
280              
281 1 50       5 @algos = keys %{$self->{hash_info}} unless scalar @algos;
  1         6  
282              
283 1 50 33     6 croak 'Request for more than one hash in scalar context' if !wantarray && scalar(@algos) != 1;
284              
285             @ret = map{
286             $self->{hash_info}{$_} ||
287 14 50 0     36 $self->{hash_info}{$File::Information::Base::_digest_name_converter{fc($_)} // ''} ||
      33        
288             croak 'Unknown digest: '.$_
289 1         2 } map { s#^v0 (\S+) bytes [0-9]+-[0-9]*/(?:[0-9]+|\*) [0-9a-f\.]+$#$1#r } @algos;
  14         24  
290              
291 1 50       4 if (wantarray) {
292 1         8 return @ret;
293             } else {
294 0         0 return $ret[0];
295             }
296             }
297              
298             # ----------------
299              
300             sub _home {
301 2     2   5 my ($self) = @_;
302 2         3 my $home;
303              
304 2 50       6 return $self->{home} if defined $self->{home};
305              
306 2 50       10 if ($^O eq 'MSWin32') {
307 0 0 0     0 return $self->{home} = $home if defined($home = $ENV{USERPROFILE}) && length($home);
308 0 0 0     0 if (defined($ENV{HOMEDRIVE}) && defined($ENV{HOMEPATH})) {
309 0         0 $home = $ENV{HOMEDRIVE}.$ENV{HOMEPATH};
310 0 0       0 return $self->{home} = $home if length($home);
311             }
312 0         0 return $self->{home} = 'C:\\';
313             } else {
314 2 50 33     25 return $self->{home} = $home if defined($home = $ENV{HOME}) && length($home);
315 0 0 0     0 return $self->{home} = $home if defined($home = eval { [getpwuid($>)]->[7] }) && length($home);
  0         0  
316 0         0 return $self->{home} = File::Spec->rootdir;
317             }
318              
319 0         0 croak 'BUG';
320             }
321              
322             sub _path {
323 2     2   8 my ($self, $xdg, $type, @el) = @_;
324 2         4 my $base;
325              
326 2 50       6 if (defined $xdg) {
327 0   0     0 $base = $ENV{$xdg} // $self->{$xdg};
328 0 0 0     0 if (!defined($base) || !length($base)) {
329 0 0       0 if ($xdg eq 'XDG_CACHE_HOME') {
    0          
    0          
    0          
330 0         0 $base = File::Spec->catdir($self->_home, qw(.cache));
331             } elsif ($xdg eq 'XDG_DATA_HOME') {
332 0         0 $base = File::Spec->catdir($self->_home, qw(.local share));
333             } elsif ($xdg eq 'XDG_CONFIG_HOME') {
334 0         0 $base = File::Spec->catdir($self->_home, qw(.config));
335             } elsif ($xdg eq 'XDG_STATE_HOME') {
336 0         0 $base = File::Spec->catdir($self->_home, qw(.local state));
337             } else {
338 0         0 croak 'Unknown XDG path: '.$xdg;
339             }
340              
341 0         0 $self->{$xdg} = $base;
342             }
343             } else {
344 2         7 $base = $self->_home;
345             }
346              
347 2 50       7 if ($type eq 'file') {
348 2         61 return File::Spec->catfile($base, @el);
349             } else {
350 0         0 return File::Spec->catdir($base, @el);
351             }
352             }
353              
354             sub _tagpool_locate {
355 2     2   6 my ($self) = @_;
356 2         3 my %candidates;
357              
358 2 50       7 return unless $HAVE_FILE_VALUEFILE;
359              
360 2 50       7 unless (defined $self->{tagpool_rc}) {
361             # Set defaults:
362 2         7 $self->{tagpool_rc} = ['/etc/tagpoolrc', $self->_path(undef, file => '.tagpoolrc')]; # Values taken from tagpool as is. Should be updated.
363             }
364              
365 2 50       8 unless (defined $self->{tagpool_path}) {
366             # Set defaults:
367 2         5 $self->{tagpool_path} = []; # none at this point.
368             }
369              
370 2 50       7 $self->{tagpool_rc} = [$self->{tagpool_rc}] unless ref $self->{tagpool_rc};
371 2 50       6 $self->{tagpool_path} = [$self->{tagpool_path}] unless ref $self->{tagpool_path};
372              
373 2         3 %candidates = map {$_ => undef} grep {defined} @{$self->{tagpool_path}};
  0         0  
  0         0  
  2         7  
374              
375 2         3 foreach my $tagpool_rc_path (@{$self->{tagpool_rc}}) {
  2         7  
376 4         9 my $hash = eval {File::ValueFile::Simple::Reader->new($tagpool_rc_path)->read_as_hash};
  4         36  
377 4 50       1271 if (defined $hash) {
378 0         0 foreach my $key (qw(pool-path pool)) {
379 0 0       0 if (defined $hash->{$key}) {
380 0         0 $candidates{$hash->{$key}} = undef;
381             }
382             }
383             }
384             }
385              
386             # eliminate all but the ones that look like actual pools:
387 2         8 foreach my $path (keys %candidates) {
388 0 0       0 unless (-d $path) {
389 0         0 delete $candidates{$path};
390 0         0 next;
391             }
392              
393 0         0 foreach my $subdir (qw(data temp)) {
394 0 0       0 unless (-d File::Spec->catdir($path, $subdir)) {
395 0         0 delete $candidates{$path};
396 0         0 next;
397             }
398             }
399              
400 0         0 foreach my $subfile (qw(config)) {
401 0 0       0 unless (-f File::Spec->catfile($path, $subfile)) {
402 0         0 delete $candidates{$path};
403 0         0 next;
404             }
405             }
406             }
407              
408 2         8 $self->{tagpool_path} = [keys %candidates];
409             }
410              
411             sub _tagpool_path {
412 2     2   4 my ($self) = @_;
413 2         11 return $self->{tagpool_path};
414             }
415              
416             sub _tagpool_sysfile_cache {
417 8     8   16 my ($self) = @_;
418 8   100     34 return $self->{_tagpool_sysfile_cache} //= {};
419             }
420              
421             sub _tagpool {
422 0     0     my ($self) = @_;
423             my $pools = $self->{tagpool} //= {
424 0   0       map {$_ => File::Information::Tagpool->_new(instance => $self, path => $_)} @{$self->_tagpool_path}
  0            
  0            
425             };
426              
427 0           return $pools;
428             }
429              
430             sub _load_filesystems {
431 0     0     my ($self) = @_;
432 0 0         unless (defined $self->{filesystems}) {
433 0           my %dirs;
434             my %found;
435 0           my %filesystems;
436              
437 0   0       $self->{device_path} //= File::Information::Filesystem->_default_device_search_paths;
438              
439 0 0         $self->{device_path} = [$self->{device_path}] unless ref($self->{device_path}) eq 'ARRAY';
440              
441 0           %dirs = map {$_ => undef} @{$self->{device_path}};
  0            
  0            
442              
443 0           foreach my $dir_path (keys %dirs) {
444 0 0         if (opendir(my $dir, $dir_path)) {
445 0           while (my $entry = readdir($dir)) {
446 0           my $devpath = File::Spec->catfile($dir_path, $entry);
447 0           my @stat = stat($devpath);
448              
449 0 0         next unless scalar @stat;
450 0 0         next unless S_ISBLK($stat[2]);
451              
452 0   0       $found{$stat[6]} //= {};
453 0           $found{$stat[6]}{stat} = \@stat;
454 0   0       $found{$stat[6]}{paths} //= {};
455 0   0       $found{$stat[6]}{paths}{$dir_path} //= [];
456 0           push(@{$found{$stat[6]}{paths}{$dir_path}}, $entry);
  0            
457             }
458             }
459             }
460              
461 0 0         if ($^O eq 'MSWin32') {
462 0           foreach my $dos_device ('A'..'Z') {
463 0           my $dos_path = $dos_device.':\\';
464 0           my @stat = stat($dos_path);
465              
466 0 0         next unless scalar @stat;
467              
468 0   0       $found{$stat[0]} //= {};
469 0           $found{$stat[0]}{dirstat} = \@stat;
470 0           $found{$stat[0]}{dos_device} = $dos_device;
471 0           $found{$stat[0]}{dos_path} = $dos_path;
472 0   0       $found{$stat[0]}{paths} //= {};
473             }
474             }
475              
476 0 0 0       if ($HAVE_UNIX_MKNOD && $^O eq 'linux') {
477 0   0       $self->{mountinfo_path} //= '/proc/self/mountinfo';
478 0 0         if (open(my $mountinfo, '<', $self->{mountinfo_path})) {
479 0           while (defined(my $line = <$mountinfo>)) {
480 0           my ($mount_id, $parent_id, $major, $minor, $root, $mountpoint, $mount_options, $options, $fs_type, $source, $super_options) =
481             $line =~ m#^(\S+)\s+(\S+)\s+([0-9]+):([0-9]+)\s+(/\S*)\s+(/\S*)\s+(\S+)\s+((?:\S+:\S+\s+)*)-\s+(\S+)\s+(/\S+|none|\S+)\s+(\S+)$#;
482 0           my $dev;
483             my $entry;
484              
485 0 0         next unless defined $mount_id;
486              
487 0           s/\\([0-9]{3})/chr(oct($1))/ge foreach $mount_id, $parent_id, $major, $minor, $root, $mountpoint, $mount_options, $options, $fs_type, $source, $super_options;
  0            
488              
489 0           $dev = Unix::Mknod::makedev($major, $minor);
490              
491 0   0       $entry = $found{$dev} //= {};
492 0   0       $entry->{paths} //= {};
493              
494 0 0 0       if (!defined($entry->{stat}) && $source =~ m#^/#) {
495 0           my @stat = eval {stat($source)};
  0            
496 0 0         $entry->{stat} = \@stat if scalar @stat;
497             }
498              
499 0 0 0       if (!defined($entry->{dirstat}) && $mountpoint =~ m#^/#) {
500 0           my @stat = eval {stat($mountpoint)};
  0            
501 0 0         $entry->{dirstat} = \@stat if scalar @stat;
502             }
503              
504 0 0 0       $entry->{mountpoint} //= $mountpoint if $mountpoint =~ m#^/#;
505 0   0       $entry->{fs_type} //= $fs_type;
506 0   0       $entry->{linux_mount_options} //= $mount_options;
507 0   0       $entry->{linux_superblock_options} //= $super_options;
508              
509 0 0         if ($source =~ m#^/#) {
510 0           my ($volume, $directories, $file) = File::Spec->splitpath($source);
511 0           my $dir = File::Spec->catdir($volume, $directories);
512 0   0       $entry->{paths}{$dir} //= [];
513 0           push(@{$entry->{paths}{$dir}}, $file);
  0            
514             }
515             }
516             }
517             }
518              
519 0           foreach my $key (keys %found) {
520 0           $filesystems{$key} = File::Information::Filesystem->_new(instance => $self, %{$found{$key}});
  0            
521             }
522 0           $self->{dev_found} = \%found;
523 0           $self->{filesystems} = \%filesystems;
524             }
525             }
526              
527             sub _filesystem_for {
528 0     0     my ($self, $dev) = @_;
529 0           $self->_load_filesystems;
530 0           return $self->{filesystems}{$dev};
531             }
532              
533             # --- Overrides for Data::Identifier::Interface::Known ---
534              
535              
536             sub _known_provider {
537 0     0     my ($pkg, $class, %opts) = @_;
538 0 0         croak 'Unsupported options passed' if scalar(keys %opts);
539              
540 0 0         if ($class eq 'lifecycles_name') {
    0          
    0          
541 0           return ([$pkg->lifecycles], not_identifiers => 1);
542             } elsif ($class eq 'digest_name') {
543 0           return ([map {$_->{name}} $pkg->digest_info], not_identifiers => 1);
  0            
544             } elsif ($class eq 'digest') {
545 0           state $type = Data::Identifier->new(uuid => '8db88212-69df-40f3-a5cf-105dcd853d44')->register;
546 0           require Data::Identifier;
547 0           require Data::Identifier::Wellknown;
548 0           Data::Identifier::Wellknown->import('digest-algorithm');
549              
550 0           return ([map {Data::Identifier->new($type => $_->{name})} $pkg->digest_info], rawtype => 'Data::Identifier');
  0            
551             }
552              
553 0 0         return ([]) if $class eq ':all';
554 0           croak 'Unsupported class';
555             }
556              
557             1;
558              
559             __END__
560              
561             =pod
562              
563             =encoding UTF-8
564              
565             =head1 NAME
566              
567             File::Information - generic module for extracting information from filesystems
568              
569             =head1 VERSION
570              
571             version v0.16
572              
573             =head1 SYNOPSIS
574              
575             use File::Information;
576              
577             my File::Information $instance = File::Information->new(%config);
578              
579             my File::Information::Base $obj = $instance->for_link($path)
580             my File::Information::Base $obj = $instance->for_handle($handle);
581              
582             my $title = $obj->get('title');
583             my $digest = $obj->digest('sha-3-512');
584              
585             my $result = $obj->verify;
586             my $passed = $result->has_passed;
587              
588             B<Note:> This package inherits from L<Data::Identifier::Interface::Known>.
589              
590             This module provides support to read/write properties of inodes (files) and links
591             in a portable and compact way.
592              
593             This module will collect data from a number of sources such as the file system
594             (ANSI, POSIX, and operating specific interfaces),
595             C<.comments/>, tagpool, tag databases, and other.
596              
597             The provided example program C<file-information-dump> dumps all information this module
598             can read for a given file. It is also meant as an example on how to interact with the API.
599              
600             In addition this module also provides a way to verify a file for corruption.
601             See L<File::Information::Base/verify> for that.
602              
603             A noteable difference of this module to other similar modules is the use of lifecycles.
604             See L</lifecycles> for more information on that.
605              
606             =head1 METHODS
607              
608             =head2 new
609              
610             my File::Information $instance = File::Information->new(%config);
611              
612             Creates a new instance that can be used to perform lookups later on.
613              
614             The following options (all optional) are supported:
615              
616             =over
617              
618             =item C<extractor>
619              
620             An instance of L<Data::URIID> used to create related objects.
621              
622             =item C<db>
623              
624             An instance of L<Data::TagDB> used to interact with a database.
625              
626             =item C<store>
627              
628             One or more instances of L<File::FStore> or paths to such stores.
629             A scalar value or an arrayref if multiple.
630              
631             =item C<boring_extension>
632              
633             A list of file name extensions considered boring.
634             This is a list (arrayref) of extensions (with no leading dot).
635             Duplicates are eliminated.
636             Defaults to a list suitable for most cases.
637             To disable set to C<[]> and keep C<boring_extension_add> unset.
638              
639             =item C<boring_extension_add>
640              
641             Same as C<boring_extension>,
642             however does only add entries and does not remove the default entries or those provided by C<boring_extension>.
643              
644             =item C<boring_sizelimit>
645              
646             The limit (in bytes) for boring files.
647             A file is considered boring if it is smaller than this limit (or any other boring rule applies).
648             To disable, set this value to C<-1>.
649             The default is adequate for most usecases. It is no bigger than 512KB (likely much lower).
650              
651             =item C<tagpool_rc>
652              
653             A filename (or list of filenames) of tagpool rc files. Pool locations will be read from those files.
654             Default is to try standard locations. To disable this it is possible to set the option to C<[]>.
655              
656             =item C<tagpool_path>
657              
658             A path (or a list of paths) of tagpool directories. This is where a pool is located.
659             Default is to try standard locations. To disable this it is possible to set the option to C<[]>.
660             However to disable tagpool support fully C<tagpool_rc> also needs to be set to C<[]>.
661              
662             Only valid pools are accepted. Invalid pools are rejected without warning.
663              
664             =item C<device_path>
665              
666             The path (or list of paths) to look for device inodes. This is used as part of filesystem detection.
667             Default is to try a list of standard locations. To disable this it is possible to set the option to C<[]>.
668              
669             This module does B<not> perform recursive searches. Therefore on systems that include paths like C</dev/disk>
670             those also need to be included for this module to work correctly. It is therefore recommended not to alter this
671             setting.
672              
673             =item C<digest_sizelimit>
674              
675             The size limit (in bytes) for how large of a datablock (such as a file) the module will perform hashing.
676             This can be set to C<0> to disable hashing. When set to C<'infinite'> the limit is disabled.
677             The default is suitable for modern machines and will be not less than 16MiB.
678              
679             =item C<digest_unsafe>
680              
681             An digest or a list of digests to be defined unsafe. See L</digest_info> for details.
682             Dies if a digest in the list is unknown (this is for security reasons).
683             This option only allows to mark additinal digests unsafe. It does not allow to mark already marked ones safe again.
684              
685             =item C<mountinfo_path>
686              
687             The path to the mountinfo file. This is a special file on Linux that contains information on mounted filesystems.
688             Defaults to C</proc/self/mountinfo>.
689             This option has no effect on systems other than Linux.
690              
691             =back
692              
693             =head2 for_link
694              
695             my File::Information::Link $link = $instance->for_link($path);
696             # or:
697             my File::Information::Link $link = $instance->for_link(path => $path [, %opts ]);
698              
699             Creates a new link instance.
700              
701             The following options are supported:
702              
703             =over
704              
705             =item C<path>
706              
707             Required if not using the one-argument form. Gives the path (filename) of the link.
708              
709             =item C<symlinks>
710              
711             Whether (C<follow>) or not (C<nofollow>; default) symlinks.
712             The special value C<opportunistic-nofollow> can be used to not follow symlinks on system that support nofollow but not C<die> on systems that doesn't.
713              
714             =back
715              
716             =head2 for_handle
717              
718             my File::Information::Inode $inode = $instance->for_handle($handle);
719             # or:
720             my File::Information::Inode $inode = $instance->for_handle(handle => $handle [, %opts ]);
721              
722             Creates a new inode instance.
723              
724             The following options are supported:
725              
726             =over
727              
728             =item C<handle>
729              
730             Required if not using the one-argument form. Gives an open handle to the inode.
731              
732             =back
733              
734             =head2 for_identifier
735              
736             my File::Information::Base $obj = $instance->for_identifier($identifier);
737             # or:
738             my File::Information::Base $obj = $instance->for_identifier(uuid => $uuid);
739             # or:
740             my File::Information::Base $obj = $instance->for_identifier(type => 'uuid', id => $uuid [, %opts ]);
741              
742             B<Note:>
743             This is an experimental method. It may be renamed, removed, or changed in any way with future releases.
744              
745             This method returns an object based on it's identifier. This method might return different kinds of objects
746             such as links, inodes, filesystems, or tagpools.
747              
748             The identifier can be passed as an instance of e.g. L<Data::Identifier> or as a plain UUID.
749             Other types may or may not be supported.
750              
751             =head2 tagpool
752              
753             my @tagpool = $instance->tagpool;
754              
755             Returns the list of found tagpools if any (See L<File::Information::Tagpool>).
756              
757             B<Note:>
758             There is no order to the returned values. The order may change between any two calls.
759              
760             =head2 extractor
761              
762             my Data::URIID $extractor = $instance->extractor;
763              
764             Returns the extractor given via the configuration. Will die if no extractor is available.
765              
766             =head2 db
767              
768             my Data::TagDB $db = $instance->db;
769              
770             Returns the database given via the configuration. Will die if no database is available.
771              
772             =head2 store
773              
774             my @store = $instance->store(as => 'File::FStore');
775              
776             (since v0.09)
777              
778             Returns the list of file stores if any (see L<File::FStore>).
779              
780             B<Note:>
781             There is no order to the returned values. The order may change between any two calls.
782              
783             B<Note:>
784             Currently the C<as> option must be set to C<File::FStore>. No other values nor options are supported.
785              
786             =head2 lifecycles
787              
788             my @lifecycles = $instance->lifecycles;
789              
790             Returns the list of known lifecycles.
791             The order of the list is not defined. However the method will return them in a way suitable for display to an user.
792              
793             Currently defined are the following lifecycles:
794              
795             =over
796              
797             =item C<initial>
798              
799             The initial state. This is the state the object is in when it becomes known.
800             The exact meaning depend on the used data source.
801              
802             =item C<last>
803              
804             The state the object was in when last interacted with a non-read-only manner.
805             The exact meaning depend on the used data source.
806              
807             =item C<current>
808              
809             The current state of the object.
810              
811             =item C<final>
812              
813             The state the object will be in when it is I<final>.
814             Most commonly this is used to compare to when checking if a object is corrupted.
815              
816             =back
817              
818             =head2 digest_info
819              
820             my $info = $instance->digest_info('sha-3-512');
821             # or:
822             my @info = $instance->digest_info;
823             # or:
824             my @info = $instance->digest_info('sha-2-512', 'sha-3-512');
825              
826             Returns information on one or more digests. If no digest is given returns infos for all known ones.
827              
828             The digest can be given in the universal tag format (preferred), one of it's aliases (dissuaded),
829             or a complete digest-and-value string in universal tag format (only version C<v0> or
830             C<v0m> if only one digest is given and the method is called in list context).
831              
832             The return value is a hashref or an array of hashrefs which contain the following keys:
833              
834             =over
835              
836             =item C<name>
837              
838             The name of the digest in universal tag format (the format used in this module).
839              
840             =item C<bits>
841              
842             The number of bits the digest will return.
843              
844             =item C<aliases>
845              
846             An arrayref to a list of aliases for this digest.
847              
848             =item C<unsafe>
849              
850             A boolean indicating if the digest is considered unsafe by this module.
851             B<Security:> Note that a digest not defined unsafe by this module may still be unsafe to use.
852             This can for example happen if the digest became unsafe after the release of the version of this module.
853              
854             =item C<rfc9530>
855              
856             The name of the algorithm as per RFC 9530 if any.
857              
858             =item C<openpgp>
859              
860             The OpenPGP algorithm identifier.
861              
862             =item C<sni>
863              
864             The SIRTX numerical identifier.
865              
866             =back
867              
868             =head2 known
869              
870             my @list = $instance->known($class [, %opts ] );
871              
872             This module implements L<Data::Identifier::Interface::Known/known>. See there for details.
873              
874             B<Note:>
875             This interface does not guarantee any specific order.
876              
877             The following classes are supported:
878              
879             =over
880              
881             =item C<lifecycles_name>
882              
883             Returns the same values as L</lifecycles>.
884              
885             =item C<digest_name>
886              
887             Returns the names known by L</digest_info>.
888              
889             =item C<digest>
890              
891             (experimental since v0.16)
892              
893             Returns the known digests as per L</digest_info> as identifiers.
894              
895             This currently requires to load L<Data::Identifier::Wellknown> with the corresponding class.
896              
897             =back
898              
899             =head1 AUTHOR
900              
901             Philipp Schafft <lion@cpan.org>
902              
903             =head1 COPYRIGHT AND LICENSE
904              
905             This software is Copyright (c) 2024-2025 by Philipp Schafft <lion@cpan.org>.
906              
907             This is free software, licensed under:
908              
909             The Artistic License 2.0 (GPL Compatible)
910              
911             =cut