File Coverage

lib/File/Information/Tagpool.pm
Criterion Covered Total %
statement 26 202 12.8
branch 0 72 0.0
condition 0 53 0.0
subroutine 9 20 45.0
pod 4 4 100.0
total 39 351 11.1


line stmt bran cond sub pod time code
1             # Copyright (c) 2024-2025 Philipp Schafft <lion@cpan.org>
2              
3             # licensed under Artistic License 2.0 (see LICENSE file)
4              
5             # ABSTRACT: generic module for extracting information from filesystems
6              
7              
8             package File::Information::Tagpool;
9              
10 3     3   70 use v5.10;
  3         13  
11 3     3   14 use strict;
  3         6  
  3         71  
12 3     3   12 use warnings;
  3         6  
  3         142  
13              
14 3     3   14 use parent 'File::Information::Base';
  3         5  
  3         29  
15              
16 3     3   287 use Carp;
  3         6  
  3         239  
17 3     3   18 use File::Spec;
  3         12  
  3         83  
18 3     3   1906 use Sys::Hostname ();
  3         4210  
  3         112  
19 3     3   46 use Scalar::Util qw(weaken);
  3         6  
  3         239  
20              
21 3     3   1440 use File::Information::Lock;
  3         8  
  3         8443  
22              
23             our $VERSION = v0.16;
24              
25             my $HAVE_FILE_VALUEFILE = eval {require File::ValueFile::Simple::Reader; require File::ValueFile::Simple::Writer; 1;};
26              
27             my %_properties = (
28             tagpool_pool_path => {loader => \&_load_tagpool, rawtype => 'filename'},
29             );
30              
31             if ($HAVE_FILE_VALUEFILE) {
32             $_properties{tagpool_pool_uuid} = {loader => \&_load_tagpool, rawtype => 'uuid'};
33             }
34              
35             sub _new {
36 0     0     my ($pkg, %opts) = @_;
37 0           my $self = $pkg->SUPER::_new(%opts, properties => \%_properties);
38              
39 0 0         croak 'No path is given' unless defined $self->{path};
40              
41 0           return $self;
42             }
43              
44              
45             #@returns File::Information::Lock
46             sub lock {
47 0     0 1   my ($self) = @_;
48 0   0       my $locks = $self->{locks} //= {};
49              
50 0 0         unless (scalar keys %{$locks}) {
  0            
51 0           my $lockfile = $self->_catfile('lock');
52 0           my $lockname = $self->_tempfile('lock');
53 0 0         open(my $out, '>', $lockname) or die $!;
54 0           print $out ".\n";
55 0           close($out);
56              
57 0           for (my $i = 0; $i < 3; $i++) {
58 0 0         if (link($lockname, $lockfile)) {
59             # Success.
60 0           $self->{lockfile} = $lockfile;
61 0           $self->{lockname} = $lockname;
62             {
63 0           my $lock = File::Information::Lock->new(parent => $self, on_unlock => \&_unlock);
  0            
64 0           $locks->{$lock} = $lock;
65 0           weaken($locks->{$lock}); # it holds a reference to us, so our's will be weak.
66 0           return $lock;
67             }
68             }
69 0           sleep(1);
70             }
71              
72 0           unlink($lockname);
73 0           croak 'Can not lock pool';
74             }
75              
76             {
77 0           my $lock = File::Information::Lock->new(parent => $self, on_unlock => \&_unlock);
  0            
78 0           $locks->{$lock} = $lock;
79 0           weaken($locks->{$lock}); # it holds a reference to us, so our's will be weak.
80 0           return $lock;
81             }
82             }
83              
84              
85             sub locked {
86 0     0 1   my ($self, $func) = @_;
87 0           my $lock = $self->lock;
88 0           return $func->();
89             }
90              
91              
92             sub load_sysfile_cache {
93 0     0 1   my ($self) = @_;
94 0   0       my $locks = $self->{locks} //= {};
95              
96 0 0         unless (scalar keys %{$locks}) {
  0            
97 0           croak 'The pool must be locked to read the sysfile cache';
98             }
99              
100 0 0         unless (defined $self->{sysfile_cache}) {
101 0   0       my $local_cache = $self->instance->_tagpool_sysfile_cache->{$self->{path}} //= {};
102 0           my $data_path = $self->_catdir('data');
103 0           my %cache;
104              
105 0 0         opendir(my $dir, $data_path) or croak $!;
106              
107 0           while (my $entry = readdir($dir)) {
108 0           my @c_stat;
109              
110 0 0         $entry =~ /^file\./ or next; # skip everything that is not a file.* to begin with.
111              
112 0           @c_stat = stat($self->_catfile('data', $entry));
113 0 0         next unless scalar @c_stat;
114              
115 0           $cache{$c_stat[1].'@'.$c_stat[0]} = $entry;
116             }
117              
118 0           %{$local_cache} = (%cache, complete => 1);
  0            
119              
120 0           return $self->{sysfile_cache} = \%cache;
121             }
122              
123 0           return $self->{sysfile_cache};
124             }
125              
126              
127             sub file_add {
128 0     0 1   my ($self, $files, %opts) = @_;
129 0           my $instance = $self->instance;
130 0   0       my $local_cache = $instance->_tagpool_sysfile_cache->{$self->{path}} //= {};
131 0           my $lock;
132             my $sysfile_cache;
133 0           my %to_add;
134              
135             # First setup %to_add:
136 0 0         $files = [$files] unless ref($files) eq 'ARRAY';
137 0           foreach my $file (@{$files}) {
  0            
138 0           my $link;
139             my $inode;
140 0           my $path;
141 0           my $key;
142              
143 0 0         croak 'File is undefined' unless $file;
144              
145 0 0         if (ref($file)) {
146 0 0         if ($file->isa('File::Information::Link')) {
    0          
147 0           $link = $file;
148             } elsif ($file->isa('File::Information::Inode')) {
149 0           $inode = $file;
150             } else {
151 0           $inode = $instance->for_handle($file);
152             }
153             } else {
154 0           $link = $instance->for_link($file);
155             }
156              
157 0 0 0       $inode = $link->inode if !defined($inode) && defined($link);
158              
159 0 0 0       $path //= $link->{path} if defined $link;
160 0 0 0       $path //= $inode->{path} if defined $inode;
161              
162 0 0         croak 'Cannot find any inode for file' unless defined $inode;
163              
164 0           $key = $inode->get('stat_cachehash');
165              
166 0           $to_add{$key} = {
167             inode => $inode,
168             link => $link,
169             path => $path,
170             type => $inode->get('tagpool_inode_type', as => 'ise'),
171             uuid => $inode->get('uuid', as => 'uuid', default => undef),
172             };
173             }
174              
175             # Lock the pool and figure out what to add.
176 0           $lock = $self->lock;
177 0           $sysfile_cache = $self->load_sysfile_cache;
178              
179             # Check if we have any valid files.
180 0           foreach my $key (keys %to_add) {
181 0           my $file = $to_add{$key};
182 0           my $invalid;
183              
184 0   0       $invalid ||= !defined($file->{path}) && length($file->{path});
      0        
185 0   0       $invalid ||= $file->{type} ne 'e6d6bb07-1a6a-46f6-8c18-5aa6ea24d7cb';
186              
187 0 0         if (exists $sysfile_cache->{$key}) {
188 0 0         if ($opts{skip_already}) {
189 0           delete $to_add{$key};
190 0           next;
191             }
192 0   0       $invalid ||= 1;
193             }
194              
195 0 0 0       if ($invalid && $opts{skip_invalid}) {
196 0           delete $to_add{$key};
197 0           next;
198             }
199              
200 0 0         unless (defined $file->{uuid}) {
201 0           $file->{uuid} = Data::Identifier->random(type => 'uuid')->uuid;
202             }
203              
204 0   0       $invalid ||= !defined($file->{uuid});
205              
206 0   0       $invalid ||= -e $self->_catfile('data', 'info.'.$file->{uuid});
207              
208 0 0         if ($invalid) {
209 0           croak 'Cannot add file '.$key.': Not permissible for adding';
210             }
211             }
212              
213             # Now we only have files in %to_add which we can actually add.
214             # We also have a lock.
215             # So add them!
216              
217 0           foreach my $key (keys %to_add) {
218 0           my $file = $to_add{$key};
219 0           my $uuid = $file->{uuid};
220             my $inode = $file->{inode},
221 0           my $pool_name_suffix = 'file.'.$uuid.'.x';
222 0           my $writer;
223 0           my %data = (
224             );
225 0           my %info;
226             my %tags;
227 0           my %_base_key_to_tagpool_info = (
228             st_ino => 'inode',
229             st_mtime => 'mtime',
230             size => 'size',
231             );
232              
233 0           foreach my $lifecycle (qw(current final)) {
234 0           my $c = $data{$lifecycle} = {};
235              
236 0           foreach my $base_key (qw(st_ino st_mtime size)) {
237 0           $c->{$_base_key_to_tagpool_info{$base_key}} = $inode->get($base_key, lifecycle => $lifecycle, default => undef);
238             }
239             }
240              
241 0           foreach my $lifecycle (qw(current final)) {
242 0           foreach my $tagpool_name (qw(sha1 sha512)) {
243 0 0         my $utag_name = $File::Information::Base::_digest_name_converter{$tagpool_name} or next;
244 0           my $digest = $inode->digest($utag_name, lifecycle => $lifecycle, as => 'hex', default => undef);
245 0 0         next unless defined $digest;
246 0           $data{$lifecycle}{'hash-'.$tagpool_name} = $digest;
247             }
248             }
249              
250 0           $data{current}{timestamp} = time();
251              
252             %info = (
253 0           (map {$_ => $inode->get($_, default => undef)} qw(title comment description)),
254 0           (map {'initial-'.$_ => $data{current}{$_}, 'last-'.$_ => $data{current}{$_}} keys %{$data{current}}),
  0            
255 0           (map {'final-'.$_ => $data{final}{$_}} keys %{$data{final}}),
  0            
  0            
256             'pool-name-suffix' => $pool_name_suffix,
257             );
258              
259             # Fixup:
260 0           foreach my $c (keys %info) {
261 0 0         delete($info{$c}) unless defined $info{$c};
262             }
263              
264 0           foreach my $base_key (qw(writemode mediatype finalmode)) {
265 0           my $uuid = $inode->get($base_key, as => 'uuid', default => undef);
266 0 0         next unless $uuid;
267 0           $tags{$uuid} = undef;
268             }
269              
270 0           warn $uuid;
271              
272 0 0         link($file->{path}, $self->_catfile('data', $pool_name_suffix)) or die $!;
273              
274 0           $writer = File::ValueFile::Simple::Writer->new($self->_catfile('data', 'info.'.$uuid));
275 0           $writer->write_hash(\%info);
276              
277 0           $writer = File::ValueFile::Simple::Writer->new($self->_catfile('data', 'tags.'.$uuid));
278 0           $writer->write('tagged-as', $_) foreach keys %tags;
279              
280 0           $sysfile_cache->{$key} = $pool_name_suffix;
281 0           $local_cache->{$key} = $pool_name_suffix;
282             }
283             }
284              
285             # ----------------
286              
287             sub DESTROY {
288 0     0     my ($self) = @_;
289              
290 0 0 0       if (defined($self->{locks}) && scalar(keys %{$self->{locks}})) {
  0            
291 0           warn 'DESTROY on locked pool. BUG.';
292 0   0       warn sprintf('LOCK: %s -> %s', $_, $self->{locks}{$_} // '<undef>') foreach keys %{$self->{locks}};
  0            
293 0           warn 'END OF LOCK LIST';
294             }
295             }
296              
297             sub _catfile {
298 0     0     my ($self, @c) = @_;
299 0           File::Spec->catfile($self->{path}, @c);
300             }
301              
302             sub _catdir {
303 0     0     my ($self, @c) = @_;
304 0           File::Spec->catdir($self->{path}, @c);
305             }
306              
307             sub _tempfile {
308 0     0     my ($self, $task, $instance) = @_;
309              
310 0   0       $task ||= 'UNKNOWN';
311 0   0       $instance ||= int($self);;
312              
313 0           return $self->_catfile('temp', sprintf('%s.%i.%s.%s', Sys::Hostname::hostname(), $$, $task, $instance));
314             }
315              
316             sub _unlock {
317 0     0     my ($self, $lock) = @_;
318 0           my $locks = $self->{locks};
319              
320 0           delete $locks->{$lock};
321              
322 0 0         unless (scalar keys %{$locks}) {
  0            
323 0 0         unlink($self->{lockfile}) if defined $self->{lockfile};
324 0 0         unlink($self->{lockname}) if defined $self->{lockname};
325 0           $self->{lockfile} = undef;
326 0           $self->{lockname} = undef;
327 0           $self->{sysfile_cache} = undef;
328             }
329             }
330              
331             sub _load_tagpool {
332 0     0     my ($self, $key, %opts) = @_;
333 0   0       my $pv = $self->{properties_values} //= {};
334 0           my $config;
335              
336 0 0         return if $self->{_loaded_tagpool_pool};
337 0           $self->{_loaded_tagpool_pool} = 1;
338              
339 0   0       $pv->{current} //= {};
340 0           $pv->{current}{tagpool_pool_path} = {raw => $self->{path}};
341              
342 0 0         return unless $HAVE_FILE_VALUEFILE;
343              
344 0           eval {
345 0           my $path = $self->_catfile('config');
346 0           my $reader = File::ValueFile::Simple::Reader->new($path, supported_formats => undef, supported_features => []);
347 0           $config = $reader->read_as_hash;
348             };
349              
350 0 0         return unless defined $config;
351              
352 0 0 0       $pv->{current}{tagpool_pool_uuid} = {raw => $config->{'pool-uuid'}} if defined($config->{'pool-uuid'}) && length($config->{'pool-uuid'}) == 36;
353             }
354              
355             1;
356              
357             __END__
358              
359             =pod
360              
361             =encoding UTF-8
362              
363             =head1 NAME
364              
365             File::Information::Tagpool - generic module for extracting information from filesystems
366              
367             =head1 VERSION
368              
369             version v0.16
370              
371             =head1 SYNOPSIS
372              
373             use File::Information;
374              
375             my @tagpool = $instance->tagpool;
376              
377             my @tagpool = $inode->tagpool;
378              
379             my File::Information::Tagpool $tagpool = ...;
380              
381             This module represents an instance of a tagpool.
382              
383             B<Note:> This package inherits from L<File::Information::Base>.
384              
385             =head1 METHODS
386              
387             =head2 lock
388              
389             my File::Information::Lock $lock = $pool->lock;
390              
391             Locks the pool and returns the lock.
392              
393             Some operations require the pool to be in locked state. Specifically all write operations.
394             When the pool is locked no other process or instance can access it.
395              
396             The lock stays valid as long as a reference to C<$lock> is kept alive. See L<File::Information::Lock> about locks.
397             It is possible to acquire multiple lock objects from the same instance (C<$pool>). In that case the pool stays locked
398             until all lock references are gone.
399              
400             B<Note:>
401             Locking the pool may take time as we might wait on other locks.
402             It may also fail (C<die>ing if it does) if no lock can be acquired.
403              
404             B<Note:>
405             If you perform multiple write operations it will generally improve performance significantly to keep it locked.
406             To do this acquire a lock before you start your operations and hold for as long as you keep working on the pool.
407             However you should not lock the pool while idle to allow other processes to interact with it as well.
408             How long is too long is hard to answer in a general manner.
409              
410             B<See also>:
411             L</locked>
412              
413             =head2 locked
414              
415             $pool->locked(sub {
416             # your code ...
417             });
418              
419             This call run the passed coderef with an active lock. It is similar to:
420              
421             {
422             my File::Information::Lock $lock = $pool->lock;
423             {
424             # your code ....
425             }
426             }
427              
428             B<Note:>
429             It is safe to use this method even if you already hold a lock. This allows code calling this method
430             to not need to take notice about the current state of locking.
431              
432             B<See also:>
433             L</lock>
434              
435             =head2 load_sysfile_cache
436              
437             my File::Information::Lock $lock ...;
438             $pool->load_sysfile_cache;
439              
440             This method loads the pool's sysfile cache into memory. It will do nothing if the cache is already loaded.
441             The sysfile cache is only valid as long as the pool is locked. It is automatically discarded on unlock.
442              
443             This method will also seed the instance's sysfile cache (see L<File::Information>).
444             The instance's cache may survive pool unlock.
445              
446             B<Note:>
447             This method is normally not needed to be called manually. However if you perform a lot of read operations on the pool
448             (such as calling L<File::Information/for_link> or L<File::Information/for_handle> on a large number of different files)
449             this can be beneficial. It also allows to seed the cache ahead of time to speed up lookups later on.
450              
451             B<Note:>
452             This method caches information on all sysfiles in the pool in memory. This can be memory expensive.
453             One should expect at least 1024 Byte of memory usage per file in the pool. For small pools this is of no concern.
454             However for larger pools it must be considered. Also, as this seeds the instance's cache not all of it may be gone
455             once the pool is unlocked. See L<File::Information> for it's cache handling.
456              
457             =head2 file_add
458              
459             $pool->file_add(\@files [, %opts ]);
460              
461             Adds the given files to the pool. On error this method C<die>s.
462              
463             The pool is automatically locked if it is not yet locked.
464             If you want to add multiple files you can pass them. If you want to call this method multiple times
465             it might be more performant to acquire a lock before and hold it until you're done.
466              
467             The following (all optional) options are supported:
468              
469             =over
470              
471             =item C<skip_already>
472              
473             Files that are already in the pool are silently skipped.
474              
475             =item C<skip_invalid>
476              
477             Silently skip files that are invalid for any reason.
478              
479             =back
480              
481             =head1 AUTHOR
482              
483             Philipp Schafft <lion@cpan.org>
484              
485             =head1 COPYRIGHT AND LICENSE
486              
487             This software is Copyright (c) 2024-2025 by Philipp Schafft <lion@cpan.org>.
488              
489             This is free software, licensed under:
490              
491             The Artistic License 2.0 (GPL Compatible)
492              
493             =cut