File Coverage

blib/lib/App/FileCleanerByDiskUage.pm
Criterion Covered Total %
statement 14 93 15.0
branch 0 36 0.0
condition 0 18 0.0
subroutine 5 6 83.3
pod 1 1 100.0
total 20 154 12.9


line stmt bran cond sub pod time code
1             package App::FileCleanerByDiskUage;
2              
3 1     1   69020 use 5.006;
  1         4  
4 1     1   6 use strict;
  1         2  
  1         50  
5 1     1   6 use warnings;
  1         3  
  1         42  
6 1     1   542 use File::Find::Rule;
  1         8388  
  1         9  
7 1     1   572 use Filesys::Df;
  1         1187  
  1         1014  
8              
9             =head1 NAME
10              
11             App::FileCleanerByDiskUage - Removes files based on disk space usage till it drops below the specified amount.
12              
13             =head1 VERSION
14              
15             Version 0.1.0
16              
17             =cut
18              
19             our $VERSION = '0.1.0';
20              
21             =head1 SYNOPSIS
22              
23             use App::FileCleanerByDiskUage;
24              
25             # remove files under /var/log/suricata/pcap when disk usage is over 90%
26             # If over 90% make sure there are atleast 32 files and if there are atleast 32, remove them based
27             # on age till we drop below 90%. The newest 32 will be ignored regardless of disk usage.
28             my $removed=App::FileCleanerByDiskUage->clean(path=>'/var/log/suricata/pcap/', du=>90, min_files=>32);
29             if (defined( $removed->{errors}[0] )){
30             die('One or more file could not be removed... '.join(' ', @{ $removed->{errors} }));
31             }
32             my $int=0;
33             while (defined( $removed->{unlined}[$int] )){
34             print 'Removed ' . $removed->{unlinked}[$int]{name} . "\n";
35              
36             $int++;
37             }
38              
39             This works via doing the following.
40              
41             1: Check if disk usage is above the specified threshold. If not it ends here.
42              
43             2: Search for files under the specified path.
44              
45             3: If the number of found files is less than the number of files to keep regardless
46             of disk size it ends here. So if min_files is set to 32 and there are only 3 files,
47             then it would just return.
48              
49             4: Get the stats for all the found files.
50              
51             5: If min_files is specified, remove that many of the files from the list, starting
52             with the newest.
53              
54             6: Removes the oldest file.
55              
56             7: Check disk usage again and if it is less it ends here.
57              
58             8: Go back to 6.
59              
60             =head1 Functions
61              
62             =head2 clean
63              
64             This performs the cleaning actions. As long as the path exists and .path and .du
65             are defined this will not die. But if any of those are undef or do not exist it will
66             die.
67              
68             The following hash values are taken by it.
69              
70             Minimum Required Vars: path, du
71              
72             - path :: The path to look for files under. May be a array of paths. Only the first is used
73             for getting the disk usage though, so this should not have paths in it that are on
74             a different partition.
75             Default :: undef
76              
77             - du :: Disk usage to remove files down till.
78             Default :: undef
79              
80             - min_files :: Minimum number of files to keep, regardless of disk usage.
81             Default :: undef
82              
83             - ignore :: A regexp to use for ignoring files. So lets say you want to ignore,
84             files matching /\.pcap$/, it would be '\.pcap$'.
85             Default :: undef
86              
87             - dry_run :: Do not actually remove anything. Just check to see if the file writable by the
88             current user.
89              
90             The returned value is a hash ref.
91              
92             - dry_run :: Boolean for fir it was a dry run or not.
93              
94             - found_files :: Array of hashes of data for all files found. This will only be defined if du is above
95             threshold for removing files. If it is below it, the function will return instead of taking
96             the time required to run a search.
97              
98             - found_files_count :: A count of files found.
99              
100             - path :: The value of path that it was called with. This will always be a array, regardless of if a array or
101             scalar was passed as internally converts a scalars into a array containing just a single item.
102              
103             - unlinked :: Array of hashes of data for files that have been removed.
104              
105             - unlinked_count :: A count of how many files were unlinked
106              
107             - unlink_errors :: Array of strings containing error descriptions.
108              
109             - unlink_failed :: Array of hashes of data for files that could not removed. The corresponding
110             index in .errors will be the error in question. So $results->{unlink_failed}[0]
111             would be $results->{unlink_errors}[0]
112              
113             - unlink_fialed_count :: A count of how many files unlinking failed for.
114              
115             The files hash is composed as below.
116              
117             - name :: Name of the file, including it's path.
118              
119             # following are provided via the Perl function stat
120             - dev
121             - ino
122             - mode
123             - nlink
124             - uid
125             - gid
126             - rdev
127             - size
128             - atime
129             - mtime
130             - ctime
131             - blksize
132             - blocks
133              
134             =cut
135              
136             sub clean {
137 0     0 1   my ( $empty, %opts ) = @_;
138              
139 0           my $du_path;
140 0 0 0       if ( !defined( $opts{path} ) ) {
    0          
    0          
141 0           die('$opts{path} is not defined');
142             } elsif ( ref( $opts{path} ) ne 'ARRAY' && !-d $opts{path} ) {
143 0           die( '$opts{path} is set to "' . $opts{path} . '" which is not a directory or does not exist' );
144             } elsif ( ref( $opts{path} ) eq 'ARRAY' ) {
145 0 0         if ( !defined( $opts{path}[0] ) ) {
146 0           die('$opts{path}[0] is not defined');
147             }
148 0           my $int = 0;
149 0           while ( defined( $opts{path}[$int] ) ) {
150 0 0         if ( !-d $opts{path}[$int] ) {
151             die( '$opts{path}['
152             . $int
153             . '] is set to "'
154 0           . $opts{path}[$int]
155             . '" which is not a directory or does not exist' );
156             }
157 0           $int++;
158             } ## end while ( defined( $opts{path}[$int] ) )
159 0           $du_path = $opts{path}[0];
160             } else {
161 0           $du_path = $opts{path} = [ $opts{path} ];
162             }
163              
164 0 0         if ( !defined( $opts{du} ) ) {
    0          
165 0           die('$opts{du} is not defined');
166             } elsif ( $opts{du} !~ /^\d+$/ ) {
167 0           die( '$opts{du} is set to "' . $opts{du} . '" whish is not numeric' );
168             }
169              
170             # if we have a min_files specified, make sure the value is numeric
171 0 0 0       if ( defined( $opts{min_files} ) && $opts{min_files} !~ /^\d+$/ ) {
172 0           die( '$opts{min_files} is set to "' . $opts{min_files} . '" whish is not numeric matching /^\d+$/' );
173             }
174              
175 0 0         if ( !$opts{dry_run} ) {
176 0           $opts{dry_run} = 0,;
177             } else {
178 0           $opts{dry_run} = 1,;
179             }
180              
181 0           my $df = df($du_path);
182              
183             # the results to be returned
184             my $results = {
185             unlinked => [],
186             unlink_failed => [],
187             unlink_errors => [],
188             found_files => [],
189             found_files_count => 0,
190             unlinked_count => 0,
191             unlink_failed_count => 0,
192             du_target => $opts{du},
193             du_starting => $df->{per},
194             du_ending => $df->{per},
195             min_files => 0,
196             dry_run => $opts{dry_run},
197             path => $opts{path},
198 0           };
199              
200 0 0         if ( $df->{per} < $opts{du} ) {
201 0           return $results;
202             }
203              
204 0           my @files;
205 0 0         if ( defined( $opts{ignore} ) ) {
206 0           my $ignore_rule = File::Find::Rule->new;
207 0           $ignore_rule->name(qr/$opts{ignore}/);
208 0           @files = File::Find::Rule->file()->not($ignore_rule)->in( @{ $opts{path} } );
  0            
209             } else {
210 0           @files = File::Find::Rule->file()->in( @{ $opts{path} } );
  0            
211             }
212 0           $results->{found_files_count} = $#files + 1;
213              
214             # if we have a min number of files specified, make sure have that many defined
215 0 0 0       if ( $opts{min_files} && !defined( $files[ $opts{min_files} ] ) ) {
216 0           $results->{min_files} = $opts{min_files};
217 0           return $results;
218             }
219              
220             # get the stats for all the files
221 0           my @files_info;
222 0           foreach my $file (@files) {
223 0           my %file_info;
224             (
225             $file_info{dev}, $file_info{ino}, $file_info{mode}, $file_info{nlink}, $file_info{uid},
226             $file_info{gid}, $file_info{rdev}, $file_info{size}, $file_info{atime}, $file_info{mtime},
227             $file_info{ctime}, $file_info{blksize}, $file_info{blocks}
228 0           ) = stat($file);
229 0           $file_info{name} = $file;
230 0           push( @files_info, \%file_info );
231             } ## end foreach my $file (@files)
232              
233             # sort files oldest to newest based on mtime
234 0           @files_info = sort { $a->{mtime} cmp $b->{mtime} } @files_info;
  0            
235             # set this here as we are saving it into the hashref as a array ref
236 0           my @files_info_copy = @files_info;
237 0           $results->{found_files} = \@files_info_copy;
238              
239             # remove the newest files if mins_files is greater than or equal to 1
240 0 0 0       if ( defined( $opts{min_files} ) && $opts{min_files} > 0 ) {
241 0           $results->{min_files} = $opts{min_files};
242 0           my $min_files_int = 1;
243 0           while ( $min_files_int <= $opts{min_files} ) {
244 0           pop(@files_info);
245              
246 0           $min_files_int++;
247             }
248             }
249              
250             # go through files and remove the oldest till we
251 0           my $int = 0;
252 0   0       while ( $df->{per} >= $opts{du} && defined( $files_info[$int] ) ) {
253 0           eval {
254 0 0 0       if ( $opts{dry_run} && !-w $files_info[$int]{name} ) {
255 0           die('file is not writable');
256             } else {
257 0 0         unlink( $files_info[$int]{name} ) or die($!);
258             }
259              
260             };
261 0           my %tmp_hash = %{ $files_info[$int] };
  0            
262 0 0         if ($@) {
263 0           push( @{ $results->{unlink_errors} }, 'Failed to remove "' . $files_info[$int]{name} . '"... ' . $@ );
  0            
264 0           push( @{ $results->{unlink_failed} }, \%tmp_hash );
  0            
265             } else {
266 0           push( @{ $results->{unlinked} }, \%tmp_hash );
  0            
267             }
268              
269 0           $int++;
270 0           $df = df($du_path);
271             } ## end while ( $df->{per} >= $opts{du} && defined( $files_info...))
272              
273 0           $results->{du_ending} = $df->{per};
274 0 0         if ( defined( $results->{unlinked}[0] ) ) {
275 0           $results->{unlinked_count} = $#{ $results->{unlinked} } + 1;
  0            
276             }
277 0 0         if ( defined( $results->{unlink_failed}[0] ) ) {
278 0           $results->{unlink_failed_count} = $#{ $results->{unlink_failed} } + 1;
  0            
279             }
280              
281 0           return $results;
282             } ## end sub clean
283              
284             =head1 AUTHOR
285              
286             Zane C. Bowers-Hadley, C<< >>
287              
288             =head1 BUGS
289              
290             Please report any bugs or feature requests to C, or through
291             the web interface at L. I will be notified, and then you'll
292             automatically be notified of progress on your bug as I make changes.
293              
294              
295              
296              
297             =head1 SUPPORT
298              
299             You can find documentation for this module with the perldoc command.
300              
301             perldoc App::FileCleanerByDiskUage
302              
303              
304             You can also look for information at:
305              
306             =over 4
307              
308             =item * RT: CPAN's request tracker (report bugs here)
309              
310             L
311              
312             =item * CPAN Ratings
313              
314             L
315              
316             =item * Search CPAN
317              
318             L
319              
320             =back
321              
322              
323             =head1 ACKNOWLEDGEMENTS
324              
325              
326             =head1 LICENSE AND COPYRIGHT
327              
328             This software is Copyright (c) 2023 by Zane C. Bowers-Hadley.
329              
330             This is free software, licensed under:
331              
332             The GNU Lesser General Public License, Version 3, June 2007
333              
334              
335             =cut
336              
337             1; # End of App::FileCleanerByDiskUage