File Coverage

blib/lib/App/FileCleanerByDiskUage.pm
Criterion Covered Total %
statement 14 100 14.0
branch 0 38 0.0
condition 0 18 0.0
subroutine 5 6 83.3
pod 1 1 100.0
total 20 163 12.2


line stmt bran cond sub pod time code
1             package App::FileCleanerByDiskUage;
2              
3 1     1   68393 use 5.006;
  1         5  
4 1     1   6 use strict;
  1         11  
  1         33  
5 1     1   7 use warnings;
  1         2  
  1         35  
6 1     1   532 use File::Find::Rule;
  1         8393  
  1         7  
7 1     1   575 use Filesys::Df;
  1         1195  
  1         1083  
8              
9             =head1 NAME
10              
11             App::FileCleanerByDiskUage - Removes files based on disk space usage till it drops below the specified amount.
12              
13             =head1 VERSION
14              
15             Version 0.2.0
16              
17             =cut
18              
19             our $VERSION = '0.2.0';
20              
21             =head1 SYNOPSIS
22              
23             use App::FileCleanerByDiskUage;
24              
25             # remove files under /var/log/suricata/pcap when disk usage is over 90%
26             # If over 90% make sure there are atleast 32 files and if there are atleast 32, remove them based
27             # on age till we drop below 90%. The newest 32 will be ignored regardless of disk usage.
28             my $removed=App::FileCleanerByDiskUage->clean(path=>'/var/log/suricata/pcap/', du=>90, min_files=>32);
29             if (defined( $removed->{errors}[0] )){
30             die('One or more file could not be removed... '.join(' ', @{ $removed->{errors} }));
31             }
32             my $int=0;
33             while (defined( $removed->{unlined}[$int] )){
34             print 'Removed ' . $removed->{unlinked}[$int]{name} . "\n";
35              
36             $int++;
37             }
38              
39             This works via doing the following.
40              
41             1: Check if disk usage is above the specified threshold. If not it ends here.
42              
43             2: Search for files under the specified path.
44              
45             3: If the number of found files is less than the number of files to keep regardless
46             of disk size it ends here. So if min_files is set to 32 and there are only 3 files,
47             then it would just return.
48              
49             4: Get the stats for all the found files.
50              
51             5: If min_files is specified, remove that many of the files from the list, starting
52             with the newest.
53              
54             6: Removes the oldest file.
55              
56             7: Check disk usage again and if it is less it ends here.
57              
58             8: Go back to 6.
59              
60             =head1 Functions
61              
62             =head2 clean
63              
64             This performs the cleaning actions. As long as the path exists and .path and .du
65             are defined this will not die. But if any of those are undef or do not exist it will
66             die.
67              
68             The following hash values are taken by it.
69              
70             Minimum Required Vars: path, du
71              
72             - path :: The path to look for files under. May be a array of paths. Only the first is used
73             for getting the disk usage though, so this should not have paths in it that are on
74             a different partition.
75             Default :: undef
76              
77             - du :: Disk usage to remove files down till.
78             Default :: undef
79              
80             - min_files :: Minimum number of files to keep, regardless of disk usage.
81             Default :: undef
82              
83             - ignore :: A regexp to use for ignoring files. So lets say you want to ignore,
84             files matching /\.pcap$/, it would be '\.pcap$'.
85             Default :: undef
86              
87             - dry_run :: Do not actually remove anything. Just check to see if the file writable by the
88             current user.
89              
90             The returned value is a hash ref.
91              
92             - dry_run :: Boolean for fir it was a dry run or not.
93              
94             - found_files :: Array of hashes of data for all files found. This will only be defined if du is above
95             threshold for removing files. If it is below it, the function will return instead of taking
96             the time required to run a search.
97              
98             - found_files_count :: A count of files found.
99              
100             - path :: The value of path that it was called with. This will always be a array, regardless of if a array or
101             scalar was passed as internally converts a scalars into a array containing just a single item.
102              
103             - missing_paths :: Paths that were passed to it, but don't exist.
104              
105             - unlinked :: Array of hashes of data for files that have been removed.
106              
107             - unlinked_count :: A count of how many files were unlinked
108              
109             - unlink_errors :: Array of strings containing error descriptions.
110              
111             - unlink_failed :: Array of hashes of data for files that could not removed. The corresponding
112             index in .errors will be the error in question. So $results->{unlink_failed}[0]
113             would be $results->{unlink_errors}[0]
114              
115             - unlink_fialed_count :: A count of how many files unlinking failed for.
116              
117             The files hash is composed as below.
118              
119             - name :: Name of the file, including it's path.
120              
121             # following are provided via the Perl function stat
122             - dev
123             - ino
124             - mode
125             - nlink
126             - uid
127             - gid
128             - rdev
129             - size
130             - atime
131             - mtime
132             - ctime
133             - blksize
134             - blocks
135              
136             =cut
137              
138             sub clean {
139 0     0 1   my ( $empty, %opts ) = @_;
140              
141 0           my @missing_paths;
142             my @paths;
143              
144 0           my $du_path;
145 0 0 0       if ( !defined( $opts{path} ) ) {
    0          
    0          
146 0           die('$opts{path} is not defined');
147             } elsif ( ref( $opts{path} ) ne 'ARRAY' && !-d $opts{path} ) {
148 0           push( @missing_paths, $opts{path} );
149             } elsif ( ref( $opts{path} ) eq 'ARRAY' ) {
150 0 0         if ( !defined( $opts{path}[0] ) ) {
151 0           die('$opts{path}[0] is not defined');
152             }
153 0           my $int = 0;
154 0           while ( defined( $opts{path}[$int] ) ) {
155 0           $opts{path}[$int] = $opts{path}[$int] . '/';
156 0           $opts{path}[$int] =~ s/\/+/\//;
157 0 0         if ( !-d $opts{path}[$int] ) {
158 0           push( @missing_paths, $opts{path}[$int] );
159             } else {
160 0           push( @paths, $opts{path}[$int] );
161             }
162 0           $int++;
163             } ## end while ( defined( $opts{path}[$int] ) )
164 0           $du_path = $opts{path}[0];
165             } else {
166 0           $opts{path} = $opts{path} . '/';
167 0           $opts{path} =~ s/\/+/\//;
168 0           $du_path = $opts{path};
169 0           push( @paths, $opts{path} );
170             }
171              
172 0 0         if ( !defined( $opts{du} ) ) {
    0          
173 0           die('$opts{du} is not defined');
174             } elsif ( $opts{du} !~ /^\d+$/ ) {
175 0           die( '$opts{du} is set to "' . $opts{du} . '" whish is not numeric' );
176             }
177              
178             # if we have a min_files specified, make sure the value is numeric
179 0 0 0       if ( defined( $opts{min_files} ) && $opts{min_files} !~ /^\d+$/ ) {
180 0           die( '$opts{min_files} is set to "' . $opts{min_files} . '" whish is not numeric matching /^\d+$/' );
181             }
182              
183 0 0         if ( !$opts{dry_run} ) {
184 0           $opts{dry_run} = 0,;
185             } else {
186 0           $opts{dry_run} = 1,;
187             }
188              
189 0           my $df = df($du_path);
190              
191             # the results to be returned
192             my $results = {
193             unlinked => [],
194             unlink_failed => [],
195             unlink_errors => [],
196             found_files => [],
197             found_files_count => 0,
198             unlinked_count => 0,
199             unlink_failed_count => 0,
200             du_target => $opts{du},
201             du_starting => $df->{per},
202             du_ending => $df->{per},
203             min_files => 0,
204             dry_run => $opts{dry_run},
205 0           path => \@paths,
206             missing_paths => \@missing_paths,
207             };
208              
209 0 0         if ( !defined( $paths[0] ) ) {
210 0           return $results;
211             }
212              
213 0 0         if ( $df->{per} < $opts{du} ) {
214 0           return $results;
215             }
216              
217 0           my @files;
218 0 0         if ( defined( $opts{ignore} ) ) {
219 0           my $ignore_rule = File::Find::Rule->new;
220 0           $ignore_rule->name(qr/$opts{ignore}/);
221 0           @files = File::Find::Rule->file()->not($ignore_rule)->in(@paths);
222             } else {
223 0           @files = File::Find::Rule->file()->in(@paths);
224             }
225 0           $results->{found_files_count} = $#files + 1;
226              
227             # if we have a min number of files specified, make sure have that many defined
228 0 0 0       if ( $opts{min_files} && !defined( $files[ $opts{min_files} ] ) ) {
229 0           $results->{min_files} = $opts{min_files};
230 0           return $results;
231             }
232              
233             # get the stats for all the files
234 0           my @files_info;
235 0           foreach my $file (@files) {
236 0           my %file_info;
237             (
238             $file_info{dev}, $file_info{ino}, $file_info{mode}, $file_info{nlink}, $file_info{uid},
239             $file_info{gid}, $file_info{rdev}, $file_info{size}, $file_info{atime}, $file_info{mtime},
240             $file_info{ctime}, $file_info{blksize}, $file_info{blocks}
241 0           ) = stat($file);
242 0           $file_info{name} = $file;
243 0           push( @files_info, \%file_info );
244             } ## end foreach my $file (@files)
245              
246             # sort files oldest to newest based on mtime
247 0           @files_info = sort { $a->{mtime} cmp $b->{mtime} } @files_info;
  0            
248             # set this here as we are saving it into the hashref as a array ref
249 0           my @files_info_copy = @files_info;
250 0           $results->{found_files} = \@files_info_copy;
251              
252             # remove the newest files if mins_files is greater than or equal to 1
253 0 0 0       if ( defined( $opts{min_files} ) && $opts{min_files} > 0 ) {
254 0           $results->{min_files} = $opts{min_files};
255 0           my $min_files_int = 1;
256 0           while ( $min_files_int <= $opts{min_files} ) {
257 0           pop(@files_info);
258              
259 0           $min_files_int++;
260             }
261             }
262              
263             # go through files and remove the oldest till we
264 0           my $int = 0;
265 0   0       while ( $df->{per} >= $opts{du} && defined( $files_info[$int] ) ) {
266 0           eval {
267 0 0 0       if ( $opts{dry_run} && !-w $files_info[$int]{name} ) {
268 0           die('file is not writable');
269             } else {
270 0 0         unlink( $files_info[$int]{name} ) or die($!);
271             }
272              
273             };
274 0           my %tmp_hash = %{ $files_info[$int] };
  0            
275 0 0         if ($@) {
276 0           push( @{ $results->{unlink_errors} }, 'Failed to remove "' . $files_info[$int]{name} . '"... ' . $@ );
  0            
277 0           push( @{ $results->{unlink_failed} }, \%tmp_hash );
  0            
278             } else {
279 0           push( @{ $results->{unlinked} }, \%tmp_hash );
  0            
280             }
281              
282 0           $int++;
283 0           $df = df($du_path);
284             } ## end while ( $df->{per} >= $opts{du} && defined( $files_info...))
285              
286 0           $results->{du_ending} = $df->{per};
287 0 0         if ( defined( $results->{unlinked}[0] ) ) {
288 0           $results->{unlinked_count} = $#{ $results->{unlinked} } + 1;
  0            
289             }
290 0 0         if ( defined( $results->{unlink_failed}[0] ) ) {
291 0           $results->{unlink_failed_count} = $#{ $results->{unlink_failed} } + 1;
  0            
292             }
293              
294 0           return $results;
295             } ## end sub clean
296              
297             =head1 AUTHOR
298              
299             Zane C. Bowers-Hadley, C<< >>
300              
301             =head1 BUGS
302              
303             Please report any bugs or feature requests to C, or through
304             the web interface at L. I will be notified, and then you'll
305             automatically be notified of progress on your bug as I make changes.
306              
307              
308              
309              
310             =head1 SUPPORT
311              
312             You can find documentation for this module with the perldoc command.
313              
314             perldoc App::FileCleanerByDiskUage
315              
316              
317             You can also look for information at:
318              
319             =over 4
320              
321             =item * RT: CPAN's request tracker (report bugs here)
322              
323             L
324              
325             =item * CPAN Ratings
326              
327             L
328              
329             =item * Search CPAN
330              
331             L
332              
333             =back
334              
335              
336             =head1 ACKNOWLEDGEMENTS
337              
338              
339             =head1 LICENSE AND COPYRIGHT
340              
341             This software is Copyright (c) 2023 by Zane C. Bowers-Hadley.
342              
343             This is free software, licensed under:
344              
345             The GNU Lesser General Public License, Version 3, June 2007
346              
347              
348             =cut
349              
350             1; # End of App::FileCleanerByDiskUage