File Coverage

blib/lib/App/FileCleanerByDiskUage.pm
Criterion Covered Total %
statement 17 136 12.5
branch 0 68 0.0
condition 0 21 0.0
subroutine 6 7 85.7
pod 1 1 100.0
total 24 233 10.3


line stmt bran cond sub pod time code
1             package App::FileCleanerByDiskUage;
2              
3 1     1   129785 use 5.006;
  1         6  
4 1     1   3 use strict;
  1         2  
  1         15  
5 1     1   3 use warnings;
  1         1  
  1         41  
6 1     1   501 use File::Find::Rule;
  1         6415  
  1         5  
7 1     1   503 use Filesys::Df qw( df );
  1         1112  
  1         59  
8 1     1   443 use Net::Server::Daemonize qw(check_pid_file create_pid_file unlink_pid_file);
  1         9744  
  1         1118  
9              
10             =head1 NAME
11              
12             App::FileCleanerByDiskUage - Removes files based on disk space usage till it drops below the specified amount.
13              
14             =head1 VERSION
15              
16             Version 0.4.1
17              
18             =cut
19              
20             our $VERSION = '0.4.1';
21              
22             =head1 SYNOPSIS
23              
24             use App::FileCleanerByDiskUage;
25              
26             # remove files under /var/log/suricata/pcap when disk usage is over 90%
27             # If over 90% make sure there are atleast 32 files and if there are atleast 32, remove them based
28             # on age till we drop below 90%. The newest 32 will be ignored regardless of disk usage.
29             my $removed=App::FileCleanerByDiskUage->clean(path=>'/var/log/suricata/pcap/', du=>90, min_files=>32);
30             if (defined( $removed->{errors}[0] )){
31             die('One or more file could not be removed... '.join(' ', @{ $removed->{errors} }));
32             }
33             my $int=0;
34             while (defined( $removed->{unlined}[$int] )){
35             print 'Removed ' . $removed->{unlinked}[$int]{name} . "\n";
36              
37             $int++;
38             }
39              
40             This works via doing the following.
41              
42             1: Check if disk usage is above the specified threshold. If not it ends here.
43              
44             2: Search for files under the specified path.
45              
46             3: If the number of found files is less than the number of files to keep regardless
47             of disk size it ends here. So if min_files is set to 32 and there are only 3 files,
48             then it would just return.
49              
50             4: Get the stats for all the found files.
51              
52             5: If min_files is specified, remove that many of the files from the list, starting
53             with the newest.
54              
55             6: Removes the oldest file.
56              
57             7: Check disk usage again and if it is less it ends here.
58              
59             8: Go back to 6.
60              
61             =head1 Functions
62              
63             =head2 clean
64              
65             This performs the cleaning actions. As long as the path exists and .path and .du
66             are defined this will not die. But if any of those are undef or do not exist it will
67             die.
68              
69             The following hash values are taken by it.
70              
71             Minimum Required Vars: path, du
72              
73             - path :: The path to look for files under. May be a array of paths. Only the first is used
74             for getting the disk usage though, so this should not have paths in it that are on
75             a different partition.
76             Default :: undef
77              
78             - du :: Disk usage to remove files down till.
79             Default :: undef
80              
81             - min_files :: Minimum number of files to keep, regardless of disk usage.
82             Default :: undef
83              
84             - ignore :: A regexp to use for ignoring files. So lets say you want to ignore,
85             files matching /\.pcap$/, it would be '\.pcap$'.
86             Default :: undef
87              
88             - dry_run :: Do not actually remove anything. Just check to see if the file writable by the
89             current user.
90              
91             -use_pid :: Create a PID to make sure multiple instances can't run at once.
92             Default :: undef
93              
94             -pid_dir :: Create a PID to make sure multiple instances can't run at once.
95             Default :: /var/run
96              
97             - pid_name :: Append this to the the name of the pid file created. If specified with a
98             value of 'foo' then the file would be 'file_cleaner_by_du-foo.pid'.
99             Default :: undef
100              
101             The returned value is a hash ref.
102              
103             - dry_run :: Boolean for fir it was a dry run or not.
104              
105             - found_files :: Array of hashes of data for all files found. This will only be defined if du is above
106             threshold for removing files. If it is below it, the function will return instead of taking
107             the time required to run a search.
108              
109             - found_files_count :: A count of files found.
110              
111             - path :: The value of path that it was called with. This will always be a array, regardless of if a array or
112             scalar was passed as internally converts a scalars into a array containing just a single item.
113              
114             - missing_paths :: Paths that were passed to it, but don't exist.
115              
116             - unlinked :: Array of hashes of data for files that have been removed.
117              
118             - unlinked_count :: A count of how many files were unlinked
119              
120             - unlink_errors :: Array of strings containing error descriptions.
121              
122             - unlink_failed :: Array of hashes of data for files that could not removed. The corresponding
123             index in .errors will be the error in question. So $results->{unlink_failed}[0]
124             would be $results->{unlink_errors}[0]
125              
126             - unlink_fialed_count :: A count of how many files unlinking failed for.
127              
128             The files hash is composed as below.
129              
130             - name :: Name of the file, including it's path.
131              
132             # following are provided via the Perl function stat
133             - dev
134             - ino
135             - mode
136             - nlink
137             - uid
138             - gid
139             - rdev
140             - size
141             - atime
142             - mtime
143             - ctime
144             - blksize
145             - blocks
146              
147             =cut
148              
149             sub clean {
150 0     0 1   my ( $empty, %opts ) = @_;
151              
152 0           my $pid_file;
153 0 0         if ( $opts{use_pid} ) {
154 0 0         if ( !defined( $opts{pid_dir} ) ) {
155 0           $opts{pid_dir} = '/var/run';
156             } else {
157 0 0         if ( !-d $opts{pid_dir} ) {
158 0           die( $opts{pid_dir} . ' does not exist or is not a dir' );
159             }
160             }
161              
162 0 0         if ( !defined( $opts{pid_name} ) ) {
163 0           $pid_file = $opts{pid_dir} . '/file_cleaner_by_du.pid';
164             } else {
165 0 0 0       if ( $opts{pid_name} =~ /\// || $opts{pid_name} =~ /\\/ ) {
166 0           die( 'PID name of "' . $opts{pid_name} . '" can not contain either / or \\' );
167             }
168 0           $pid_file = $opts{pid_dir} . '/file_cleaner_by_du-' . $opts{pid_name} . '.pid';
169             }
170              
171 0           check_pid_file($pid_file);
172 0           create_pid_file($pid_file);
173             } ## end if ( $opts{use_pid} )
174              
175 0           my @missing_paths;
176             my @paths;
177              
178 0           my $du_path;
179             # file paths should end with / or other wise if it is a symlink File::Find::Rule will skip it
180             # so fix that up while we are doing the path check
181 0 0 0       if ( !defined( $opts{path} ) ) {
    0          
    0          
182 0 0         if ( $opts{use_pid} ) {
183 0           unlink_pid_file($pid_file);
184             }
185 0           die('$opts{path} is not defined');
186             } elsif ( ref( $opts{path} ) ne 'ARRAY' && !-d $opts{path} ) {
187 0           push( @missing_paths, $opts{path} );
188             } elsif ( ref( $opts{path} ) eq 'ARRAY' ) {
189 0 0         if ( !defined( $opts{path}[0] ) ) {
190 0 0         if ( $opts{use_pid} ) {
191 0           unlink_pid_file($pid_file);
192             }
193 0           die('$opts{path}[0] is not defined');
194             }
195 0           my $int = 0;
196 0           while ( defined( $opts{path}[$int] ) ) {
197 0           $opts{path}[$int] = $opts{path}[$int] . '/';
198 0           $opts{path}[$int] =~ s/\/+$/\//;
199 0 0         if ( !-d $opts{path}[$int] ) {
200 0           push( @missing_paths, $opts{path}[$int] );
201             } else {
202 0           push( @paths, $opts{path}[$int] );
203             }
204 0           $int++;
205             } ## end while ( defined( $opts{path}[$int] ) )
206 0           $du_path = $paths[0];
207             } else {
208 0           $opts{path} = $opts{path} . '/';
209 0           $opts{path} =~ s/\/+$/\//;
210 0           $du_path = $opts{path};
211 0           push( @paths, $opts{path} );
212             }
213              
214 0 0         if ( !defined( $paths[0] ) ) {
215 0           die('All specified paths are missing or not a directory');
216             }
217              
218 0 0         if ( !defined( $opts{du} ) ) {
    0          
219 0 0         if ( $opts{use_pid} ) {
220 0           unlink_pid_file($pid_file);
221             }
222 0           die('$opts{du} is not defined');
223             } elsif ( $opts{du} !~ /^\d+$/ ) {
224 0 0         if ( $opts{use_pid} ) {
225 0           unlink_pid_file($pid_file);
226             }
227 0           die( '$opts{du} is set to "' . $opts{du} . '" whish is not numeric' );
228             }
229              
230             # if we have a min_files specified, make sure the value is numeric
231 0 0 0       if ( defined( $opts{min_files} ) && $opts{min_files} !~ /^\d+$/ ) {
232 0 0         if ( $opts{use_pid} ) {
233 0           unlink_pid_file($pid_file);
234             }
235 0           die( '$opts{min_files} is set to "' . $opts{min_files} . '" whish is not numeric matching /^\d+$/' );
236             }
237              
238 0 0         if ( !$opts{dry_run} ) {
239 0           $opts{dry_run} = 0,;
240             } else {
241 0           $opts{dry_run} = 1,;
242             }
243              
244 0           my $df = df($du_path);
245              
246             # the results to be returned
247             my $results = {
248             unlinked => [],
249             unlink_failed => [],
250             unlink_errors => [],
251             found_files => [],
252             found_files_count => 0,
253             unlinked_count => 0,
254             unlink_failed_count => 0,
255             du_target => $opts{du},
256             du_starting => $df->{per},
257             du_ending => $df->{per},
258             min_files => 0,
259             dry_run => $opts{dry_run},
260 0           path => \@paths,
261             missing_paths => \@missing_paths,
262             };
263              
264 0 0         if ( !defined( $paths[0] ) ) {
265 0 0         if ( $opts{use_pid} ) {
266 0           unlink_pid_file($pid_file);
267             }
268 0           return $results;
269             }
270              
271 0 0         if ( $df->{per} < $opts{du} ) {
272 0 0         if ( $opts{use_pid} ) {
273 0           unlink_pid_file($pid_file);
274             }
275 0           return $results;
276             }
277              
278 0           my @files;
279 0 0         if ( defined( $opts{ignore} ) ) {
280 0           my $ignore_rule = File::Find::Rule->new;
281 0           $ignore_rule->name(qr/$opts{ignore}/);
282 0           @files = File::Find::Rule->file()->not($ignore_rule)->in(@paths);
283             } else {
284 0           @files = File::Find::Rule->file()->in(@paths);
285             }
286 0           $results->{found_files_count} = $#files + 1;
287              
288             # if we have a min number of files specified, make sure have that many defined
289 0 0 0       if ( $opts{min_files} && !defined( $files[ $opts{min_files} ] ) ) {
290 0           $results->{min_files} = $opts{min_files};
291 0 0         if ( $opts{use_pid} ) {
292 0           unlink_pid_file($pid_file);
293             }
294 0           return $results;
295             }
296              
297             # get the stats for all the files
298 0           my @files_info;
299 0           foreach my $file (@files) {
300 0           my %file_info;
301             my $not_used;
302             (
303             $not_used, $not_used, $not_used, $not_used, $not_used,
304             $not_used, $not_used, $not_used, $not_used, $file_info{mtime},
305 0           $not_used, $not_used, $not_used
306             ) = stat($file);
307 0           $file_info{name} = $file;
308 0           push( @files_info, \%file_info );
309             } ## end foreach my $file (@files)
310              
311             # sort files oldest to newest based on mtime
312 0           @files_info = sort { $a->{mtime} cmp $b->{mtime} } @files_info;
  0            
313             # set this here as we are saving it into the hashref as a array ref
314 0           my @files_info_copy = @files_info;
315 0           $results->{found_files} = \@files_info_copy;
316              
317             # remove the newest files if mins_files is greater than or equal to 1
318 0 0 0       if ( defined( $opts{min_files} ) && $opts{min_files} > 0 ) {
319 0           $results->{min_files} = $opts{min_files};
320 0           my $min_files_int = 1;
321 0           while ( $min_files_int <= $opts{min_files} ) {
322 0           pop(@files_info);
323              
324 0           $min_files_int++;
325             }
326             }
327              
328             # go through files and remove the oldest till we
329 0           my $int = 0;
330 0   0       while ( $df->{per} >= $opts{du} && defined( $files_info[$int] ) ) {
331 0           eval {
332 0 0 0       if ( $opts{dry_run} && !-w $files_info[$int]{name} ) {
333 0           die('file is not writable');
334             } else {
335 0 0         unlink( $files_info[$int]{name} ) or die($!);
336             }
337              
338             };
339 0           my %tmp_hash = %{ $files_info[$int] };
  0            
340 0 0         if ($@) {
341 0           push( @{ $results->{unlink_errors} }, 'Failed to remove "' . $files_info[$int]{name} . '"... ' . $@ );
  0            
342 0           push( @{ $results->{unlink_failed} }, \%tmp_hash );
  0            
343             } else {
344 0           push( @{ $results->{unlinked} }, \%tmp_hash );
  0            
345             }
346              
347 0           $int++;
348 0           $df = df($du_path);
349             } ## end while ( $df->{per} >= $opts{du} && defined( $files_info...))
350              
351 0           $results->{du_ending} = $df->{per};
352 0 0         if ( defined( $results->{unlinked}[0] ) ) {
353 0           $results->{unlinked_count} = $#{ $results->{unlinked} } + 1;
  0            
354             }
355 0 0         if ( defined( $results->{unlink_failed}[0] ) ) {
356 0           $results->{unlink_failed_count} = $#{ $results->{unlink_failed} } + 1;
  0            
357             }
358              
359 0 0         if ( $opts{use_pid} ) {
360 0           unlink_pid_file($pid_file);
361             }
362 0           return $results;
363             } ## end sub clean
364              
365             =head1 AUTHOR
366              
367             Zane C. Bowers-Hadley, C<< >>
368              
369             =head1 BUGS
370              
371             Please report any bugs or feature requests to C, or through
372             the web interface at L. I will be notified, and then you'll
373             automatically be notified of progress on your bug as I make changes.
374              
375              
376              
377              
378             =head1 SUPPORT
379              
380             You can find documentation for this module with the perldoc command.
381              
382             perldoc App::FileCleanerByDiskUage
383              
384              
385             You can also look for information at:
386              
387             =over 4
388              
389             =item * RT: CPAN's request tracker (report bugs here)
390              
391             L
392              
393             =item * CPAN Ratings
394              
395             L
396              
397             =item * Search CPAN
398              
399             L
400              
401             =back
402              
403              
404             =head1 ACKNOWLEDGEMENTS
405              
406              
407             =head1 LICENSE AND COPYRIGHT
408              
409             This software is Copyright (c) 2023 by Zane C. Bowers-Hadley.
410              
411             This is free software, licensed under:
412              
413             The GNU Lesser General Public License, Version 3, June 2007
414              
415              
416             =cut
417              
418             1; # End of App::FileCleanerByDiskUage