File Coverage

blib/lib/App/FileCleanerByDiskUage.pm
Criterion Covered Total %
statement 14 100 14.0
branch 0 38 0.0
condition 0 18 0.0
subroutine 5 6 83.3
pod 1 1 100.0
total 20 163 12.2


line stmt bran cond sub pod time code
1             package App::FileCleanerByDiskUage;
2              
3 1     1   67583 use 5.006;
  1         4  
4 1     1   9 use strict;
  1         4  
  1         35  
5 1     1   6 use warnings;
  1         2  
  1         35  
6 1     1   528 use File::Find::Rule;
  1         8721  
  1         8  
7 1     1   537 use Filesys::Df;
  1         1133  
  1         1050  
8              
9             =head1 NAME
10              
11             App::FileCleanerByDiskUage - Removes files based on disk space usage till it drops below the specified amount.
12              
13             =head1 VERSION
14              
15             Version 0.2.1
16              
17             =cut
18              
19             our $VERSION = '0.2.1';
20              
21             =head1 SYNOPSIS
22              
23             use App::FileCleanerByDiskUage;
24              
25             # remove files under /var/log/suricata/pcap when disk usage is over 90%
26             # If over 90% make sure there are atleast 32 files and if there are atleast 32, remove them based
27             # on age till we drop below 90%. The newest 32 will be ignored regardless of disk usage.
28             my $removed=App::FileCleanerByDiskUage->clean(path=>'/var/log/suricata/pcap/', du=>90, min_files=>32);
29             if (defined( $removed->{errors}[0] )){
30             die('One or more file could not be removed... '.join(' ', @{ $removed->{errors} }));
31             }
32             my $int=0;
33             while (defined( $removed->{unlined}[$int] )){
34             print 'Removed ' . $removed->{unlinked}[$int]{name} . "\n";
35              
36             $int++;
37             }
38              
39             This works via doing the following.
40              
41             1: Check if disk usage is above the specified threshold. If not it ends here.
42              
43             2: Search for files under the specified path.
44              
45             3: If the number of found files is less than the number of files to keep regardless
46             of disk size it ends here. So if min_files is set to 32 and there are only 3 files,
47             then it would just return.
48              
49             4: Get the stats for all the found files.
50              
51             5: If min_files is specified, remove that many of the files from the list, starting
52             with the newest.
53              
54             6: Removes the oldest file.
55              
56             7: Check disk usage again and if it is less it ends here.
57              
58             8: Go back to 6.
59              
60             =head1 Functions
61              
62             =head2 clean
63              
64             This performs the cleaning actions. As long as the path exists and .path and .du
65             are defined this will not die. But if any of those are undef or do not exist it will
66             die.
67              
68             The following hash values are taken by it.
69              
70             Minimum Required Vars: path, du
71              
72             - path :: The path to look for files under. May be a array of paths. Only the first is used
73             for getting the disk usage though, so this should not have paths in it that are on
74             a different partition.
75             Default :: undef
76              
77             - du :: Disk usage to remove files down till.
78             Default :: undef
79              
80             - min_files :: Minimum number of files to keep, regardless of disk usage.
81             Default :: undef
82              
83             - ignore :: A regexp to use for ignoring files. So lets say you want to ignore,
84             files matching /\.pcap$/, it would be '\.pcap$'.
85             Default :: undef
86              
87             - dry_run :: Do not actually remove anything. Just check to see if the file writable by the
88             current user.
89              
90             The returned value is a hash ref.
91              
92             - dry_run :: Boolean for fir it was a dry run or not.
93              
94             - found_files :: Array of hashes of data for all files found. This will only be defined if du is above
95             threshold for removing files. If it is below it, the function will return instead of taking
96             the time required to run a search.
97              
98             - found_files_count :: A count of files found.
99              
100             - path :: The value of path that it was called with. This will always be a array, regardless of if a array or
101             scalar was passed as internally converts a scalars into a array containing just a single item.
102              
103             - missing_paths :: Paths that were passed to it, but don't exist.
104              
105             - unlinked :: Array of hashes of data for files that have been removed.
106              
107             - unlinked_count :: A count of how many files were unlinked
108              
109             - unlink_errors :: Array of strings containing error descriptions.
110              
111             - unlink_failed :: Array of hashes of data for files that could not removed. The corresponding
112             index in .errors will be the error in question. So $results->{unlink_failed}[0]
113             would be $results->{unlink_errors}[0]
114              
115             - unlink_fialed_count :: A count of how many files unlinking failed for.
116              
117             The files hash is composed as below.
118              
119             - name :: Name of the file, including it's path.
120              
121             # following are provided via the Perl function stat
122             - dev
123             - ino
124             - mode
125             - nlink
126             - uid
127             - gid
128             - rdev
129             - size
130             - atime
131             - mtime
132             - ctime
133             - blksize
134             - blocks
135              
136             =cut
137              
138             sub clean {
139 0     0 1   my ( $empty, %opts ) = @_;
140              
141 0           my @missing_paths;
142             my @paths;
143              
144 0           my $du_path;
145             # file paths should end with / or other wise if it is a symlink File::Find::Rule will skip it
146             # so fix that up while we are doing the path check
147 0 0 0       if ( !defined( $opts{path} ) ) {
    0          
    0          
148 0           die('$opts{path} is not defined');
149             } elsif ( ref( $opts{path} ) ne 'ARRAY' && !-d $opts{path} ) {
150 0           push( @missing_paths, $opts{path} );
151             } elsif ( ref( $opts{path} ) eq 'ARRAY' ) {
152 0 0         if ( !defined( $opts{path}[0] ) ) {
153 0           die('$opts{path}[0] is not defined');
154             }
155 0           my $int = 0;
156 0           while ( defined( $opts{path}[$int] ) ) {
157 0           $opts{path}[$int] = $opts{path}[$int] . '/';
158 0           $opts{path}[$int] =~ s/\/+$/\//;
159 0 0         if ( !-d $opts{path}[$int] ) {
160 0           push( @missing_paths, $opts{path}[$int] );
161             } else {
162 0           push( @paths, $opts{path}[$int] );
163             }
164 0           $int++;
165             } ## end while ( defined( $opts{path}[$int] ) )
166 0           $du_path = $opts{path}[0];
167             } else {
168 0           $opts{path} = $opts{path} . '/';
169 0           $opts{path} =~ s/\/+$/\//;
170 0           $du_path = $opts{path};
171 0           push( @paths, $opts{path} );
172             }
173              
174 0 0         if ( !defined( $opts{du} ) ) {
    0          
175 0           die('$opts{du} is not defined');
176             } elsif ( $opts{du} !~ /^\d+$/ ) {
177 0           die( '$opts{du} is set to "' . $opts{du} . '" whish is not numeric' );
178             }
179              
180             # if we have a min_files specified, make sure the value is numeric
181 0 0 0       if ( defined( $opts{min_files} ) && $opts{min_files} !~ /^\d+$/ ) {
182 0           die( '$opts{min_files} is set to "' . $opts{min_files} . '" whish is not numeric matching /^\d+$/' );
183             }
184              
185 0 0         if ( !$opts{dry_run} ) {
186 0           $opts{dry_run} = 0,;
187             } else {
188 0           $opts{dry_run} = 1,;
189             }
190              
191 0           my $df = df($du_path);
192              
193             # the results to be returned
194             my $results = {
195             unlinked => [],
196             unlink_failed => [],
197             unlink_errors => [],
198             found_files => [],
199             found_files_count => 0,
200             unlinked_count => 0,
201             unlink_failed_count => 0,
202             du_target => $opts{du},
203             du_starting => $df->{per},
204             du_ending => $df->{per},
205             min_files => 0,
206             dry_run => $opts{dry_run},
207 0           path => \@paths,
208             missing_paths => \@missing_paths,
209             };
210              
211 0 0         if ( !defined( $paths[0] ) ) {
212 0           return $results;
213             }
214              
215 0 0         if ( $df->{per} < $opts{du} ) {
216 0           return $results;
217             }
218              
219 0           my @files;
220 0 0         if ( defined( $opts{ignore} ) ) {
221 0           my $ignore_rule = File::Find::Rule->new;
222 0           $ignore_rule->name(qr/$opts{ignore}/);
223 0           @files = File::Find::Rule->file()->not($ignore_rule)->in(@paths);
224             } else {
225 0           @files = File::Find::Rule->file()->in(@paths);
226             }
227 0           $results->{found_files_count} = $#files + 1;
228              
229             # if we have a min number of files specified, make sure have that many defined
230 0 0 0       if ( $opts{min_files} && !defined( $files[ $opts{min_files} ] ) ) {
231 0           $results->{min_files} = $opts{min_files};
232 0           return $results;
233             }
234              
235             # get the stats for all the files
236 0           my @files_info;
237 0           foreach my $file (@files) {
238 0           my %file_info;
239             (
240             $file_info{dev}, $file_info{ino}, $file_info{mode}, $file_info{nlink}, $file_info{uid},
241             $file_info{gid}, $file_info{rdev}, $file_info{size}, $file_info{atime}, $file_info{mtime},
242             $file_info{ctime}, $file_info{blksize}, $file_info{blocks}
243 0           ) = stat($file);
244 0           $file_info{name} = $file;
245 0           push( @files_info, \%file_info );
246             } ## end foreach my $file (@files)
247              
248             # sort files oldest to newest based on mtime
249 0           @files_info = sort { $a->{mtime} cmp $b->{mtime} } @files_info;
  0            
250             # set this here as we are saving it into the hashref as a array ref
251 0           my @files_info_copy = @files_info;
252 0           $results->{found_files} = \@files_info_copy;
253              
254             # remove the newest files if mins_files is greater than or equal to 1
255 0 0 0       if ( defined( $opts{min_files} ) && $opts{min_files} > 0 ) {
256 0           $results->{min_files} = $opts{min_files};
257 0           my $min_files_int = 1;
258 0           while ( $min_files_int <= $opts{min_files} ) {
259 0           pop(@files_info);
260              
261 0           $min_files_int++;
262             }
263             }
264              
265             # go through files and remove the oldest till we
266 0           my $int = 0;
267 0   0       while ( $df->{per} >= $opts{du} && defined( $files_info[$int] ) ) {
268 0           eval {
269 0 0 0       if ( $opts{dry_run} && !-w $files_info[$int]{name} ) {
270 0           die('file is not writable');
271             } else {
272 0 0         unlink( $files_info[$int]{name} ) or die($!);
273             }
274              
275             };
276 0           my %tmp_hash = %{ $files_info[$int] };
  0            
277 0 0         if ($@) {
278 0           push( @{ $results->{unlink_errors} }, 'Failed to remove "' . $files_info[$int]{name} . '"... ' . $@ );
  0            
279 0           push( @{ $results->{unlink_failed} }, \%tmp_hash );
  0            
280             } else {
281 0           push( @{ $results->{unlinked} }, \%tmp_hash );
  0            
282             }
283              
284 0           $int++;
285 0           $df = df($du_path);
286             } ## end while ( $df->{per} >= $opts{du} && defined( $files_info...))
287              
288 0           $results->{du_ending} = $df->{per};
289 0 0         if ( defined( $results->{unlinked}[0] ) ) {
290 0           $results->{unlinked_count} = $#{ $results->{unlinked} } + 1;
  0            
291             }
292 0 0         if ( defined( $results->{unlink_failed}[0] ) ) {
293 0           $results->{unlink_failed_count} = $#{ $results->{unlink_failed} } + 1;
  0            
294             }
295              
296 0           return $results;
297             } ## end sub clean
298              
299             =head1 AUTHOR
300              
301             Zane C. Bowers-Hadley, C<< >>
302              
303             =head1 BUGS
304              
305             Please report any bugs or feature requests to C, or through
306             the web interface at L. I will be notified, and then you'll
307             automatically be notified of progress on your bug as I make changes.
308              
309              
310              
311              
312             =head1 SUPPORT
313              
314             You can find documentation for this module with the perldoc command.
315              
316             perldoc App::FileCleanerByDiskUage
317              
318              
319             You can also look for information at:
320              
321             =over 4
322              
323             =item * RT: CPAN's request tracker (report bugs here)
324              
325             L
326              
327             =item * CPAN Ratings
328              
329             L
330              
331             =item * Search CPAN
332              
333             L
334              
335             =back
336              
337              
338             =head1 ACKNOWLEDGEMENTS
339              
340              
341             =head1 LICENSE AND COPYRIGHT
342              
343             This software is Copyright (c) 2023 by Zane C. Bowers-Hadley.
344              
345             This is free software, licensed under:
346              
347             The GNU Lesser General Public License, Version 3, June 2007
348              
349              
350             =cut
351              
352             1; # End of App::FileCleanerByDiskUage