File Coverage

blib/lib/App/FileCleanerByDiskUage.pm

Criterion	Covered	Total	%
statement	14	100	14.0
branch	0	38	0.0
condition	0	18	0.0
subroutine	5	6	83.3
pod	1	1	100.0
total	20	163	12.2

line	stmt	bran	cond	sub	pod	time	code
1							package App::FileCleanerByDiskUage;
2
3	1			1		67583	use 5.006;
	1					4
4	1			1		9	use strict;
	1					4
	1					35
5	1			1		6	use warnings;
	1					2
	1					35
6	1			1		528	use File::Find::Rule;
	1					8721
	1					8
7	1			1		537	use Filesys::Df;
	1					1133
	1					1050
8
9							=head1 NAME
10
11							App::FileCleanerByDiskUage - Removes files based on disk space usage till it drops below the specified amount.
12
13							=head1 VERSION
14
15							Version 0.2.1
16
17							=cut
18
19							our $VERSION = '0.2.1';
20
21							=head1 SYNOPSIS
22
23							use App::FileCleanerByDiskUage;
24
25							# remove files under /var/log/suricata/pcap when disk usage is over 90%
26							# If over 90% make sure there are atleast 32 files and if there are atleast 32, remove them based
27							# on age till we drop below 90%. The newest 32 will be ignored regardless of disk usage.
28							my $removed=App::FileCleanerByDiskUage->clean(path=>'/var/log/suricata/pcap/', du=>90, min_files=>32);
29							if (defined( $removed->{errors}[0] )){
30							die('One or more file could not be removed... '.join(' ', @{ $removed->{errors} }));
31							}
32							my $int=0;
33							while (defined( $removed->{unlined}[$int] )){
34							print 'Removed ' . $removed->{unlinked}[$int]{name} . "\n";
35
36							$int++;
37							}
38
39							This works via doing the following.
40
41							1: Check if disk usage is above the specified threshold. If not it ends here.
42
43							2: Search for files under the specified path.
44
45							3: If the number of found files is less than the number of files to keep regardless
46							of disk size it ends here. So if min_files is set to 32 and there are only 3 files,
47							then it would just return.
48
49							4: Get the stats for all the found files.
50
51							5: If min_files is specified, remove that many of the files from the list, starting
52							with the newest.
53
54							6: Removes the oldest file.
55
56							7: Check disk usage again and if it is less it ends here.
57
58							8: Go back to 6.
59
60							=head1 Functions
61
62							=head2 clean
63
64							This performs the cleaning actions. As long as the path exists and .path and .du
65							are defined this will not die. But if any of those are undef or do not exist it will
66							die.
67
68							The following hash values are taken by it.
69
70							Minimum Required Vars: path, du
71
72							- path :: The path to look for files under. May be a array of paths. Only the first is used
73							for getting the disk usage though, so this should not have paths in it that are on
74							a different partition.
75							Default :: undef
76
77							- du :: Disk usage to remove files down till.
78							Default :: undef
79
80							- min_files :: Minimum number of files to keep, regardless of disk usage.
81							Default :: undef
82
83							- ignore :: A regexp to use for ignoring files. So lets say you want to ignore,
84							files matching /\.pcap$/, it would be '\.pcap$'.
85							Default :: undef
86
87							- dry_run :: Do not actually remove anything. Just check to see if the file writable by the
88							current user.
89
90							The returned value is a hash ref.
91
92							- dry_run :: Boolean for fir it was a dry run or not.
93
94							- found_files :: Array of hashes of data for all files found. This will only be defined if du is above
95							threshold for removing files. If it is below it, the function will return instead of taking
96							the time required to run a search.
97
98							- found_files_count :: A count of files found.
99
100							- path :: The value of path that it was called with. This will always be a array, regardless of if a array or
101							scalar was passed as internally converts a scalars into a array containing just a single item.
102
103							- missing_paths :: Paths that were passed to it, but don't exist.
104
105							- unlinked :: Array of hashes of data for files that have been removed.
106
107							- unlinked_count :: A count of how many files were unlinked
108
109							- unlink_errors :: Array of strings containing error descriptions.
110
111							- unlink_failed :: Array of hashes of data for files that could not removed. The corresponding
112							index in .errors will be the error in question. So $results->{unlink_failed}[0]
113							would be $results->{unlink_errors}[0]
114
115							- unlink_fialed_count :: A count of how many files unlinking failed for.
116
117							The files hash is composed as below.
118
119							- name :: Name of the file, including it's path.
120
121							# following are provided via the Perl function stat
122							- dev
123							- ino
124							- mode
125							- nlink
126							- uid
127							- gid
128							- rdev
129							- size
130							- atime
131							- mtime
132							- ctime
133							- blksize
134							- blocks
135
136							=cut
137
138							sub clean {
139	0			0	1		my ( $empty, %opts ) = @_;
140
141	0						my @missing_paths;
142							my @paths;
143
144	0						my $du_path;
145							# file paths should end with / or other wise if it is a symlink File::Find::Rule will skip it
146							# so fix that up while we are doing the path check
147	0	0	0				if ( !defined( $opts{path} ) ) {
		0
		0
148	0						die('$opts{path} is not defined');
149							} elsif ( ref( $opts{path} ) ne 'ARRAY' && !-d $opts{path} ) {
150	0						push( @missing_paths, $opts{path} );
151							} elsif ( ref( $opts{path} ) eq 'ARRAY' ) {
152	0	0					if ( !defined( $opts{path}[0] ) ) {
153	0						die('$opts{path}[0] is not defined');
154							}
155	0						my $int = 0;
156	0						while ( defined( $opts{path}[$int] ) ) {
157	0						$opts{path}[$int] = $opts{path}[$int] . '/';
158	0						$opts{path}[$int] =~ s/\/+$/\//;
159	0	0					if ( !-d $opts{path}[$int] ) {
160	0						push( @missing_paths, $opts{path}[$int] );
161							} else {
162	0						push( @paths, $opts{path}[$int] );
163							}
164	0						$int++;
165							} ## end while ( defined( $opts{path}[$int] ) )
166	0						$du_path = $opts{path}[0];
167							} else {
168	0						$opts{path} = $opts{path} . '/';
169	0						$opts{path} =~ s/\/+$/\//;
170	0						$du_path = $opts{path};
171	0						push( @paths, $opts{path} );
172							}
173
174	0	0					if ( !defined( $opts{du} ) ) {
		0
175	0						die('$opts{du} is not defined');
176							} elsif ( $opts{du} !~ /^\d+$/ ) {
177	0						die( '$opts{du} is set to "' . $opts{du} . '" whish is not numeric' );
178							}
179
180							# if we have a min_files specified, make sure the value is numeric
181	0	0	0				if ( defined( $opts{min_files} ) && $opts{min_files} !~ /^\d+$/ ) {
182	0						die( '$opts{min_files} is set to "' . $opts{min_files} . '" whish is not numeric matching /^\d+$/' );
183							}
184
185	0	0					if ( !$opts{dry_run} ) {
186	0						$opts{dry_run} = 0,;
187							} else {
188	0						$opts{dry_run} = 1,;
189							}
190
191	0						my $df = df($du_path);
192
193							# the results to be returned
194							my $results = {
195							unlinked => [],
196							unlink_failed => [],
197							unlink_errors => [],
198							found_files => [],
199							found_files_count => 0,
200							unlinked_count => 0,
201							unlink_failed_count => 0,
202							du_target => $opts{du},
203							du_starting => $df->{per},
204							du_ending => $df->{per},
205							min_files => 0,
206							dry_run => $opts{dry_run},
207	0						path => \@paths,
208							missing_paths => \@missing_paths,
209							};
210
211	0	0					if ( !defined( $paths[0] ) ) {
212	0						return $results;
213							}
214
215	0	0					if ( $df->{per} < $opts{du} ) {
216	0						return $results;
217							}
218
219	0						my @files;
220	0	0					if ( defined( $opts{ignore} ) ) {
221	0						my $ignore_rule = File::Find::Rule->new;
222	0						$ignore_rule->name(qr/$opts{ignore}/);
223	0						@files = File::Find::Rule->file()->not($ignore_rule)->in(@paths);
224							} else {
225	0						@files = File::Find::Rule->file()->in(@paths);
226							}
227	0						$results->{found_files_count} = $#files + 1;
228
229							# if we have a min number of files specified, make sure have that many defined
230	0	0	0				if ( $opts{min_files} && !defined( $files[ $opts{min_files} ] ) ) {
231	0						$results->{min_files} = $opts{min_files};
232	0						return $results;
233							}
234
235							# get the stats for all the files
236	0						my @files_info;
237	0						foreach my $file (@files) {
238	0						my %file_info;
239							(
240							$file_info{dev}, $file_info{ino}, $file_info{mode}, $file_info{nlink}, $file_info{uid},
241							$file_info{gid}, $file_info{rdev}, $file_info{size}, $file_info{atime}, $file_info{mtime},
242							$file_info{ctime}, $file_info{blksize}, $file_info{blocks}
243	0						) = stat($file);
244	0						$file_info{name} = $file;
245	0						push( @files_info, \%file_info );
246							} ## end foreach my $file (@files)
247
248							# sort files oldest to newest based on mtime
249	0						@files_info = sort { $a->{mtime} cmp $b->{mtime} } @files_info;
	0
250							# set this here as we are saving it into the hashref as a array ref
251	0						my @files_info_copy = @files_info;
252	0						$results->{found_files} = \@files_info_copy;
253
254							# remove the newest files if mins_files is greater than or equal to 1
255	0	0	0				if ( defined( $opts{min_files} ) && $opts{min_files} > 0 ) {
256	0						$results->{min_files} = $opts{min_files};
257	0						my $min_files_int = 1;
258	0						while ( $min_files_int <= $opts{min_files} ) {
259	0						pop(@files_info);
260
261	0						$min_files_int++;
262							}
263							}
264
265							# go through files and remove the oldest till we
266	0						my $int = 0;
267	0		0				while ( $df->{per} >= $opts{du} && defined( $files_info[$int] ) ) {
268	0						eval {
269	0	0	0				if ( $opts{dry_run} && !-w $files_info[$int]{name} ) {
270	0						die('file is not writable');
271							} else {
272	0	0					unlink( $files_info[$int]{name} ) or die($!);
273							}
274
275							};
276	0						my %tmp_hash = %{ $files_info[$int] };
	0
277	0	0					if ($@) {
278	0						push( @{ $results->{unlink_errors} }, 'Failed to remove "' . $files_info[$int]{name} . '"... ' . $@ );
	0
279	0						push( @{ $results->{unlink_failed} }, \%tmp_hash );
	0
280							} else {
281	0						push( @{ $results->{unlinked} }, \%tmp_hash );
	0
282							}
283
284	0						$int++;
285	0						$df = df($du_path);
286							} ## end while ( $df->{per} >= $opts{du} && defined( $files_info...))
287
288	0						$results->{du_ending} = $df->{per};
289	0	0					if ( defined( $results->{unlinked}[0] ) ) {
290	0						$results->{unlinked_count} = $#{ $results->{unlinked} } + 1;
	0
291							}
292	0	0					if ( defined( $results->{unlink_failed}[0] ) ) {
293	0						$results->{unlink_failed_count} = $#{ $results->{unlink_failed} } + 1;
	0
294							}
295
296	0						return $results;
297							} ## end sub clean
298
299							=head1 AUTHOR
300
301							Zane C. Bowers-Hadley, C<< >>
302
303							=head1 BUGS
304
305							Please report any bugs or feature requests to C, or through
306							the web interface at L. I will be notified, and then you'll
307							automatically be notified of progress on your bug as I make changes.
308
309
310
311
312							=head1 SUPPORT
313
314							You can find documentation for this module with the perldoc command.
315
316							perldoc App::FileCleanerByDiskUage
317
318
319							You can also look for information at:
320
321							=over 4
322
323							=item * RT: CPAN's request tracker (report bugs here)
324
325							L
326
327							=item * CPAN Ratings
328
329							L
330
331							=item * Search CPAN
332
333							L
334
335							=back
336
337
338							=head1 ACKNOWLEDGEMENTS
339
340
341							=head1 LICENSE AND COPYRIGHT
342
343							This software is Copyright (c) 2023 by Zane C. Bowers-Hadley.
344
345							This is free software, licensed under:
346
347							The GNU Lesser General Public License, Version 3, June 2007
348
349
350							=cut
351
352							1; # End of App::FileCleanerByDiskUage