File Coverage

blib/lib/App/FileCleanerByDiskUage.pm

Criterion	Covered	Total	%
statement	17	136	12.5
branch	0	68	0.0
condition	0	21	0.0
subroutine	6	7	85.7
pod	1	1	100.0
total	24	233	10.3

line	stmt	bran	cond	sub	pod	time	code
1							package App::FileCleanerByDiskUage;
2
3	1			1		129785	use 5.006;
	1					6
4	1			1		3	use strict;
	1					2
	1					15
5	1			1		3	use warnings;
	1					1
	1					41
6	1			1		501	use File::Find::Rule;
	1					6415
	1					5
7	1			1		503	use Filesys::Df qw( df );
	1					1112
	1					59
8	1			1		443	use Net::Server::Daemonize qw(check_pid_file create_pid_file unlink_pid_file);
	1					9744
	1					1118
9
10							=head1 NAME
11
12							App::FileCleanerByDiskUage - Removes files based on disk space usage till it drops below the specified amount.
13
14							=head1 VERSION
15
16							Version 0.4.1
17
18							=cut
19
20							our $VERSION = '0.4.1';
21
22							=head1 SYNOPSIS
23
24							use App::FileCleanerByDiskUage;
25
26							# remove files under /var/log/suricata/pcap when disk usage is over 90%
27							# If over 90% make sure there are atleast 32 files and if there are atleast 32, remove them based
28							# on age till we drop below 90%. The newest 32 will be ignored regardless of disk usage.
29							my $removed=App::FileCleanerByDiskUage->clean(path=>'/var/log/suricata/pcap/', du=>90, min_files=>32);
30							if (defined( $removed->{errors}[0] )){
31							die('One or more file could not be removed... '.join(' ', @{ $removed->{errors} }));
32							}
33							my $int=0;
34							while (defined( $removed->{unlined}[$int] )){
35							print 'Removed ' . $removed->{unlinked}[$int]{name} . "\n";
36
37							$int++;
38							}
39
40							This works via doing the following.
41
42							1: Check if disk usage is above the specified threshold. If not it ends here.
43
44							2: Search for files under the specified path.
45
46							3: If the number of found files is less than the number of files to keep regardless
47							of disk size it ends here. So if min_files is set to 32 and there are only 3 files,
48							then it would just return.
49
50							4: Get the stats for all the found files.
51
52							5: If min_files is specified, remove that many of the files from the list, starting
53							with the newest.
54
55							6: Removes the oldest file.
56
57							7: Check disk usage again and if it is less it ends here.
58
59							8: Go back to 6.
60
61							=head1 Functions
62
63							=head2 clean
64
65							This performs the cleaning actions. As long as the path exists and .path and .du
66							are defined this will not die. But if any of those are undef or do not exist it will
67							die.
68
69							The following hash values are taken by it.
70
71							Minimum Required Vars: path, du
72
73							- path :: The path to look for files under. May be a array of paths. Only the first is used
74							for getting the disk usage though, so this should not have paths in it that are on
75							a different partition.
76							Default :: undef
77
78							- du :: Disk usage to remove files down till.
79							Default :: undef
80
81							- min_files :: Minimum number of files to keep, regardless of disk usage.
82							Default :: undef
83
84							- ignore :: A regexp to use for ignoring files. So lets say you want to ignore,
85							files matching /\.pcap$/, it would be '\.pcap$'.
86							Default :: undef
87
88							- dry_run :: Do not actually remove anything. Just check to see if the file writable by the
89							current user.
90
91							-use_pid :: Create a PID to make sure multiple instances can't run at once.
92							Default :: undef
93
94							-pid_dir :: Create a PID to make sure multiple instances can't run at once.
95							Default :: /var/run
96
97							- pid_name :: Append this to the the name of the pid file created. If specified with a
98							value of 'foo' then the file would be 'file_cleaner_by_du-foo.pid'.
99							Default :: undef
100
101							The returned value is a hash ref.
102
103							- dry_run :: Boolean for fir it was a dry run or not.
104
105							- found_files :: Array of hashes of data for all files found. This will only be defined if du is above
106							threshold for removing files. If it is below it, the function will return instead of taking
107							the time required to run a search.
108
109							- found_files_count :: A count of files found.
110
111							- path :: The value of path that it was called with. This will always be a array, regardless of if a array or
112							scalar was passed as internally converts a scalars into a array containing just a single item.
113
114							- missing_paths :: Paths that were passed to it, but don't exist.
115
116							- unlinked :: Array of hashes of data for files that have been removed.
117
118							- unlinked_count :: A count of how many files were unlinked
119
120							- unlink_errors :: Array of strings containing error descriptions.
121
122							- unlink_failed :: Array of hashes of data for files that could not removed. The corresponding
123							index in .errors will be the error in question. So $results->{unlink_failed}[0]
124							would be $results->{unlink_errors}[0]
125
126							- unlink_fialed_count :: A count of how many files unlinking failed for.
127
128							The files hash is composed as below.
129
130							- name :: Name of the file, including it's path.
131
132							# following are provided via the Perl function stat
133							- dev
134							- ino
135							- mode
136							- nlink
137							- uid
138							- gid
139							- rdev
140							- size
141							- atime
142							- mtime
143							- ctime
144							- blksize
145							- blocks
146
147							=cut
148
149							sub clean {
150	0			0	1		my ( $empty, %opts ) = @_;
151
152	0						my $pid_file;
153	0	0					if ( $opts{use_pid} ) {
154	0	0					if ( !defined( $opts{pid_dir} ) ) {
155	0						$opts{pid_dir} = '/var/run';
156							} else {
157	0	0					if ( !-d $opts{pid_dir} ) {
158	0						die( $opts{pid_dir} . ' does not exist or is not a dir' );
159							}
160							}
161
162	0	0					if ( !defined( $opts{pid_name} ) ) {
163	0						$pid_file = $opts{pid_dir} . '/file_cleaner_by_du.pid';
164							} else {
165	0	0	0				if ( $opts{pid_name} =~ /\// \|\| $opts{pid_name} =~ /\\/ ) {
166	0						die( 'PID name of "' . $opts{pid_name} . '" can not contain either / or \\' );
167							}
168	0						$pid_file = $opts{pid_dir} . '/file_cleaner_by_du-' . $opts{pid_name} . '.pid';
169							}
170
171	0						check_pid_file($pid_file);
172	0						create_pid_file($pid_file);
173							} ## end if ( $opts{use_pid} )
174
175	0						my @missing_paths;
176							my @paths;
177
178	0						my $du_path;
179							# file paths should end with / or other wise if it is a symlink File::Find::Rule will skip it
180							# so fix that up while we are doing the path check
181	0	0	0				if ( !defined( $opts{path} ) ) {
		0
		0
182	0	0					if ( $opts{use_pid} ) {
183	0						unlink_pid_file($pid_file);
184							}
185	0						die('$opts{path} is not defined');
186							} elsif ( ref( $opts{path} ) ne 'ARRAY' && !-d $opts{path} ) {
187	0						push( @missing_paths, $opts{path} );
188							} elsif ( ref( $opts{path} ) eq 'ARRAY' ) {
189	0	0					if ( !defined( $opts{path}[0] ) ) {
190	0	0					if ( $opts{use_pid} ) {
191	0						unlink_pid_file($pid_file);
192							}
193	0						die('$opts{path}[0] is not defined');
194							}
195	0						my $int = 0;
196	0						while ( defined( $opts{path}[$int] ) ) {
197	0						$opts{path}[$int] = $opts{path}[$int] . '/';
198	0						$opts{path}[$int] =~ s/\/+$/\//;
199	0	0					if ( !-d $opts{path}[$int] ) {
200	0						push( @missing_paths, $opts{path}[$int] );
201							} else {
202	0						push( @paths, $opts{path}[$int] );
203							}
204	0						$int++;
205							} ## end while ( defined( $opts{path}[$int] ) )
206	0						$du_path = $paths[0];
207							} else {
208	0						$opts{path} = $opts{path} . '/';
209	0						$opts{path} =~ s/\/+$/\//;
210	0						$du_path = $opts{path};
211	0						push( @paths, $opts{path} );
212							}
213
214	0	0					if ( !defined( $paths[0] ) ) {
215	0						die('All specified paths are missing or not a directory');
216							}
217
218	0	0					if ( !defined( $opts{du} ) ) {
		0
219	0	0					if ( $opts{use_pid} ) {
220	0						unlink_pid_file($pid_file);
221							}
222	0						die('$opts{du} is not defined');
223							} elsif ( $opts{du} !~ /^\d+$/ ) {
224	0	0					if ( $opts{use_pid} ) {
225	0						unlink_pid_file($pid_file);
226							}
227	0						die( '$opts{du} is set to "' . $opts{du} . '" whish is not numeric' );
228							}
229
230							# if we have a min_files specified, make sure the value is numeric
231	0	0	0				if ( defined( $opts{min_files} ) && $opts{min_files} !~ /^\d+$/ ) {
232	0	0					if ( $opts{use_pid} ) {
233	0						unlink_pid_file($pid_file);
234							}
235	0						die( '$opts{min_files} is set to "' . $opts{min_files} . '" whish is not numeric matching /^\d+$/' );
236							}
237
238	0	0					if ( !$opts{dry_run} ) {
239	0						$opts{dry_run} = 0,;
240							} else {
241	0						$opts{dry_run} = 1,;
242							}
243
244	0						my $df = df($du_path);
245
246							# the results to be returned
247							my $results = {
248							unlinked => [],
249							unlink_failed => [],
250							unlink_errors => [],
251							found_files => [],
252							found_files_count => 0,
253							unlinked_count => 0,
254							unlink_failed_count => 0,
255							du_target => $opts{du},
256							du_starting => $df->{per},
257							du_ending => $df->{per},
258							min_files => 0,
259							dry_run => $opts{dry_run},
260	0						path => \@paths,
261							missing_paths => \@missing_paths,
262							};
263
264	0	0					if ( !defined( $paths[0] ) ) {
265	0	0					if ( $opts{use_pid} ) {
266	0						unlink_pid_file($pid_file);
267							}
268	0						return $results;
269							}
270
271	0	0					if ( $df->{per} < $opts{du} ) {
272	0	0					if ( $opts{use_pid} ) {
273	0						unlink_pid_file($pid_file);
274							}
275	0						return $results;
276							}
277
278	0						my @files;
279	0	0					if ( defined( $opts{ignore} ) ) {
280	0						my $ignore_rule = File::Find::Rule->new;
281	0						$ignore_rule->name(qr/$opts{ignore}/);
282	0						@files = File::Find::Rule->file()->not($ignore_rule)->in(@paths);
283							} else {
284	0						@files = File::Find::Rule->file()->in(@paths);
285							}
286	0						$results->{found_files_count} = $#files + 1;
287
288							# if we have a min number of files specified, make sure have that many defined
289	0	0	0				if ( $opts{min_files} && !defined( $files[ $opts{min_files} ] ) ) {
290	0						$results->{min_files} = $opts{min_files};
291	0	0					if ( $opts{use_pid} ) {
292	0						unlink_pid_file($pid_file);
293							}
294	0						return $results;
295							}
296
297							# get the stats for all the files
298	0						my @files_info;
299	0						foreach my $file (@files) {
300	0						my %file_info;
301							my $not_used;
302							(
303							$not_used, $not_used, $not_used, $not_used, $not_used,
304							$not_used, $not_used, $not_used, $not_used, $file_info{mtime},
305	0						$not_used, $not_used, $not_used
306							) = stat($file);
307	0						$file_info{name} = $file;
308	0						push( @files_info, \%file_info );
309							} ## end foreach my $file (@files)
310
311							# sort files oldest to newest based on mtime
312	0						@files_info = sort { $a->{mtime} cmp $b->{mtime} } @files_info;
	0
313							# set this here as we are saving it into the hashref as a array ref
314	0						my @files_info_copy = @files_info;
315	0						$results->{found_files} = \@files_info_copy;
316
317							# remove the newest files if mins_files is greater than or equal to 1
318	0	0	0				if ( defined( $opts{min_files} ) && $opts{min_files} > 0 ) {
319	0						$results->{min_files} = $opts{min_files};
320	0						my $min_files_int = 1;
321	0						while ( $min_files_int <= $opts{min_files} ) {
322	0						pop(@files_info);
323
324	0						$min_files_int++;
325							}
326							}
327
328							# go through files and remove the oldest till we
329	0						my $int = 0;
330	0		0				while ( $df->{per} >= $opts{du} && defined( $files_info[$int] ) ) {
331	0						eval {
332	0	0	0				if ( $opts{dry_run} && !-w $files_info[$int]{name} ) {
333	0						die('file is not writable');
334							} else {
335	0	0					unlink( $files_info[$int]{name} ) or die($!);
336							}
337
338							};
339	0						my %tmp_hash = %{ $files_info[$int] };
	0
340	0	0					if ($@) {
341	0						push( @{ $results->{unlink_errors} }, 'Failed to remove "' . $files_info[$int]{name} . '"... ' . $@ );
	0
342	0						push( @{ $results->{unlink_failed} }, \%tmp_hash );
	0
343							} else {
344	0						push( @{ $results->{unlinked} }, \%tmp_hash );
	0
345							}
346
347	0						$int++;
348	0						$df = df($du_path);
349							} ## end while ( $df->{per} >= $opts{du} && defined( $files_info...))
350
351	0						$results->{du_ending} = $df->{per};
352	0	0					if ( defined( $results->{unlinked}[0] ) ) {
353	0						$results->{unlinked_count} = $#{ $results->{unlinked} } + 1;
	0
354							}
355	0	0					if ( defined( $results->{unlink_failed}[0] ) ) {
356	0						$results->{unlink_failed_count} = $#{ $results->{unlink_failed} } + 1;
	0
357							}
358
359	0	0					if ( $opts{use_pid} ) {
360	0						unlink_pid_file($pid_file);
361							}
362	0						return $results;
363							} ## end sub clean
364
365							=head1 AUTHOR
366
367							Zane C. Bowers-Hadley, C<< >>
368
369							=head1 BUGS
370
371							Please report any bugs or feature requests to C, or through
372							the web interface at L. I will be notified, and then you'll
373							automatically be notified of progress on your bug as I make changes.
374
375
376
377
378							=head1 SUPPORT
379
380							You can find documentation for this module with the perldoc command.
381
382							perldoc App::FileCleanerByDiskUage
383
384
385							You can also look for information at:
386
387							=over 4
388
389							=item * RT: CPAN's request tracker (report bugs here)
390
391							L
392
393							=item * CPAN Ratings
394
395							L
396
397							=item * Search CPAN
398
399							L
400
401							=back
402
403
404							=head1 ACKNOWLEDGEMENTS
405
406
407							=head1 LICENSE AND COPYRIGHT
408
409							This software is Copyright (c) 2023 by Zane C. Bowers-Hadley.
410
411							This is free software, licensed under:
412
413							The GNU Lesser General Public License, Version 3, June 2007
414
415
416							=cut
417
418							1; # End of App::FileCleanerByDiskUage