File Coverage

blib/lib/App/FileCleanerByDiskUage.pm

Criterion	Covered	Total	%
statement	14	100	14.0
branch	0	38	0.0
condition	0	18	0.0
subroutine	5	6	83.3
pod	1	1	100.0
total	20	163	12.2

line	stmt	bran	cond	sub	pod	time	code
1							package App::FileCleanerByDiskUage;
2
3	1			1		68393	use 5.006;
	1					5
4	1			1		6	use strict;
	1					11
	1					33
5	1			1		7	use warnings;
	1					2
	1					35
6	1			1		532	use File::Find::Rule;
	1					8393
	1					7
7	1			1		575	use Filesys::Df;
	1					1195
	1					1083
8
9							=head1 NAME
10
11							App::FileCleanerByDiskUage - Removes files based on disk space usage till it drops below the specified amount.
12
13							=head1 VERSION
14
15							Version 0.2.0
16
17							=cut
18
19							our $VERSION = '0.2.0';
20
21							=head1 SYNOPSIS
22
23							use App::FileCleanerByDiskUage;
24
25							# remove files under /var/log/suricata/pcap when disk usage is over 90%
26							# If over 90% make sure there are atleast 32 files and if there are atleast 32, remove them based
27							# on age till we drop below 90%. The newest 32 will be ignored regardless of disk usage.
28							my $removed=App::FileCleanerByDiskUage->clean(path=>'/var/log/suricata/pcap/', du=>90, min_files=>32);
29							if (defined( $removed->{errors}[0] )){
30							die('One or more file could not be removed... '.join(' ', @{ $removed->{errors} }));
31							}
32							my $int=0;
33							while (defined( $removed->{unlined}[$int] )){
34							print 'Removed ' . $removed->{unlinked}[$int]{name} . "\n";
35
36							$int++;
37							}
38
39							This works via doing the following.
40
41							1: Check if disk usage is above the specified threshold. If not it ends here.
42
43							2: Search for files under the specified path.
44
45							3: If the number of found files is less than the number of files to keep regardless
46							of disk size it ends here. So if min_files is set to 32 and there are only 3 files,
47							then it would just return.
48
49							4: Get the stats for all the found files.
50
51							5: If min_files is specified, remove that many of the files from the list, starting
52							with the newest.
53
54							6: Removes the oldest file.
55
56							7: Check disk usage again and if it is less it ends here.
57
58							8: Go back to 6.
59
60							=head1 Functions
61
62							=head2 clean
63
64							This performs the cleaning actions. As long as the path exists and .path and .du
65							are defined this will not die. But if any of those are undef or do not exist it will
66							die.
67
68							The following hash values are taken by it.
69
70							Minimum Required Vars: path, du
71
72							- path :: The path to look for files under. May be a array of paths. Only the first is used
73							for getting the disk usage though, so this should not have paths in it that are on
74							a different partition.
75							Default :: undef
76
77							- du :: Disk usage to remove files down till.
78							Default :: undef
79
80							- min_files :: Minimum number of files to keep, regardless of disk usage.
81							Default :: undef
82
83							- ignore :: A regexp to use for ignoring files. So lets say you want to ignore,
84							files matching /\.pcap$/, it would be '\.pcap$'.
85							Default :: undef
86
87							- dry_run :: Do not actually remove anything. Just check to see if the file writable by the
88							current user.
89
90							The returned value is a hash ref.
91
92							- dry_run :: Boolean for fir it was a dry run or not.
93
94							- found_files :: Array of hashes of data for all files found. This will only be defined if du is above
95							threshold for removing files. If it is below it, the function will return instead of taking
96							the time required to run a search.
97
98							- found_files_count :: A count of files found.
99
100							- path :: The value of path that it was called with. This will always be a array, regardless of if a array or
101							scalar was passed as internally converts a scalars into a array containing just a single item.
102
103							- missing_paths :: Paths that were passed to it, but don't exist.
104
105							- unlinked :: Array of hashes of data for files that have been removed.
106
107							- unlinked_count :: A count of how many files were unlinked
108
109							- unlink_errors :: Array of strings containing error descriptions.
110
111							- unlink_failed :: Array of hashes of data for files that could not removed. The corresponding
112							index in .errors will be the error in question. So $results->{unlink_failed}[0]
113							would be $results->{unlink_errors}[0]
114
115							- unlink_fialed_count :: A count of how many files unlinking failed for.
116
117							The files hash is composed as below.
118
119							- name :: Name of the file, including it's path.
120
121							# following are provided via the Perl function stat
122							- dev
123							- ino
124							- mode
125							- nlink
126							- uid
127							- gid
128							- rdev
129							- size
130							- atime
131							- mtime
132							- ctime
133							- blksize
134							- blocks
135
136							=cut
137
138							sub clean {
139	0			0	1		my ( $empty, %opts ) = @_;
140
141	0						my @missing_paths;
142							my @paths;
143
144	0						my $du_path;
145	0	0	0				if ( !defined( $opts{path} ) ) {
		0
		0
146	0						die('$opts{path} is not defined');
147							} elsif ( ref( $opts{path} ) ne 'ARRAY' && !-d $opts{path} ) {
148	0						push( @missing_paths, $opts{path} );
149							} elsif ( ref( $opts{path} ) eq 'ARRAY' ) {
150	0	0					if ( !defined( $opts{path}[0] ) ) {
151	0						die('$opts{path}[0] is not defined');
152							}
153	0						my $int = 0;
154	0						while ( defined( $opts{path}[$int] ) ) {
155	0						$opts{path}[$int] = $opts{path}[$int] . '/';
156	0						$opts{path}[$int] =~ s/\/+/\//;
157	0	0					if ( !-d $opts{path}[$int] ) {
158	0						push( @missing_paths, $opts{path}[$int] );
159							} else {
160	0						push( @paths, $opts{path}[$int] );
161							}
162	0						$int++;
163							} ## end while ( defined( $opts{path}[$int] ) )
164	0						$du_path = $opts{path}[0];
165							} else {
166	0						$opts{path} = $opts{path} . '/';
167	0						$opts{path} =~ s/\/+/\//;
168	0						$du_path = $opts{path};
169	0						push( @paths, $opts{path} );
170							}
171
172	0	0					if ( !defined( $opts{du} ) ) {
		0
173	0						die('$opts{du} is not defined');
174							} elsif ( $opts{du} !~ /^\d+$/ ) {
175	0						die( '$opts{du} is set to "' . $opts{du} . '" whish is not numeric' );
176							}
177
178							# if we have a min_files specified, make sure the value is numeric
179	0	0	0				if ( defined( $opts{min_files} ) && $opts{min_files} !~ /^\d+$/ ) {
180	0						die( '$opts{min_files} is set to "' . $opts{min_files} . '" whish is not numeric matching /^\d+$/' );
181							}
182
183	0	0					if ( !$opts{dry_run} ) {
184	0						$opts{dry_run} = 0,;
185							} else {
186	0						$opts{dry_run} = 1,;
187							}
188
189	0						my $df = df($du_path);
190
191							# the results to be returned
192							my $results = {
193							unlinked => [],
194							unlink_failed => [],
195							unlink_errors => [],
196							found_files => [],
197							found_files_count => 0,
198							unlinked_count => 0,
199							unlink_failed_count => 0,
200							du_target => $opts{du},
201							du_starting => $df->{per},
202							du_ending => $df->{per},
203							min_files => 0,
204							dry_run => $opts{dry_run},
205	0						path => \@paths,
206							missing_paths => \@missing_paths,
207							};
208
209	0	0					if ( !defined( $paths[0] ) ) {
210	0						return $results;
211							}
212
213	0	0					if ( $df->{per} < $opts{du} ) {
214	0						return $results;
215							}
216
217	0						my @files;
218	0	0					if ( defined( $opts{ignore} ) ) {
219	0						my $ignore_rule = File::Find::Rule->new;
220	0						$ignore_rule->name(qr/$opts{ignore}/);
221	0						@files = File::Find::Rule->file()->not($ignore_rule)->in(@paths);
222							} else {
223	0						@files = File::Find::Rule->file()->in(@paths);
224							}
225	0						$results->{found_files_count} = $#files + 1;
226
227							# if we have a min number of files specified, make sure have that many defined
228	0	0	0				if ( $opts{min_files} && !defined( $files[ $opts{min_files} ] ) ) {
229	0						$results->{min_files} = $opts{min_files};
230	0						return $results;
231							}
232
233							# get the stats for all the files
234	0						my @files_info;
235	0						foreach my $file (@files) {
236	0						my %file_info;
237							(
238							$file_info{dev}, $file_info{ino}, $file_info{mode}, $file_info{nlink}, $file_info{uid},
239							$file_info{gid}, $file_info{rdev}, $file_info{size}, $file_info{atime}, $file_info{mtime},
240							$file_info{ctime}, $file_info{blksize}, $file_info{blocks}
241	0						) = stat($file);
242	0						$file_info{name} = $file;
243	0						push( @files_info, \%file_info );
244							} ## end foreach my $file (@files)
245
246							# sort files oldest to newest based on mtime
247	0						@files_info = sort { $a->{mtime} cmp $b->{mtime} } @files_info;
	0
248							# set this here as we are saving it into the hashref as a array ref
249	0						my @files_info_copy = @files_info;
250	0						$results->{found_files} = \@files_info_copy;
251
252							# remove the newest files if mins_files is greater than or equal to 1
253	0	0	0				if ( defined( $opts{min_files} ) && $opts{min_files} > 0 ) {
254	0						$results->{min_files} = $opts{min_files};
255	0						my $min_files_int = 1;
256	0						while ( $min_files_int <= $opts{min_files} ) {
257	0						pop(@files_info);
258
259	0						$min_files_int++;
260							}
261							}
262
263							# go through files and remove the oldest till we
264	0						my $int = 0;
265	0		0				while ( $df->{per} >= $opts{du} && defined( $files_info[$int] ) ) {
266	0						eval {
267	0	0	0				if ( $opts{dry_run} && !-w $files_info[$int]{name} ) {
268	0						die('file is not writable');
269							} else {
270	0	0					unlink( $files_info[$int]{name} ) or die($!);
271							}
272
273							};
274	0						my %tmp_hash = %{ $files_info[$int] };
	0
275	0	0					if ($@) {
276	0						push( @{ $results->{unlink_errors} }, 'Failed to remove "' . $files_info[$int]{name} . '"... ' . $@ );
	0
277	0						push( @{ $results->{unlink_failed} }, \%tmp_hash );
	0
278							} else {
279	0						push( @{ $results->{unlinked} }, \%tmp_hash );
	0
280							}
281
282	0						$int++;
283	0						$df = df($du_path);
284							} ## end while ( $df->{per} >= $opts{du} && defined( $files_info...))
285
286	0						$results->{du_ending} = $df->{per};
287	0	0					if ( defined( $results->{unlinked}[0] ) ) {
288	0						$results->{unlinked_count} = $#{ $results->{unlinked} } + 1;
	0
289							}
290	0	0					if ( defined( $results->{unlink_failed}[0] ) ) {
291	0						$results->{unlink_failed_count} = $#{ $results->{unlink_failed} } + 1;
	0
292							}
293
294	0						return $results;
295							} ## end sub clean
296
297							=head1 AUTHOR
298
299							Zane C. Bowers-Hadley, C<< >>
300
301							=head1 BUGS
302
303							Please report any bugs or feature requests to C, or through
304							the web interface at L. I will be notified, and then you'll
305							automatically be notified of progress on your bug as I make changes.
306
307
308
309
310							=head1 SUPPORT
311
312							You can find documentation for this module with the perldoc command.
313
314							perldoc App::FileCleanerByDiskUage
315
316
317							You can also look for information at:
318
319							=over 4
320
321							=item * RT: CPAN's request tracker (report bugs here)
322
323							L
324
325							=item * CPAN Ratings
326
327							L
328
329							=item * Search CPAN
330
331							L
332
333							=back
334
335
336							=head1 ACKNOWLEDGEMENTS
337
338
339							=head1 LICENSE AND COPYRIGHT
340
341							This software is Copyright (c) 2023 by Zane C. Bowers-Hadley.
342
343							This is free software, licensed under:
344
345							The GNU Lesser General Public License, Version 3, June 2007
346
347
348							=cut
349
350							1; # End of App::FileCleanerByDiskUage