File Coverage

blib/lib/App/FileCleanerByDiskUage.pm

Criterion	Covered	Total	%
statement	14	93	15.0
branch	0	36	0.0
condition	0	18	0.0
subroutine	5	6	83.3
pod	1	1	100.0
total	20	154	12.9

line	stmt	bran	cond	sub	pod	time	code
1							package App::FileCleanerByDiskUage;
2
3	1			1		69020	use 5.006;
	1					4
4	1			1		6	use strict;
	1					2
	1					50
5	1			1		6	use warnings;
	1					3
	1					42
6	1			1		542	use File::Find::Rule;
	1					8388
	1					9
7	1			1		572	use Filesys::Df;
	1					1187
	1					1014
8
9							=head1 NAME
10
11							App::FileCleanerByDiskUage - Removes files based on disk space usage till it drops below the specified amount.
12
13							=head1 VERSION
14
15							Version 0.1.0
16
17							=cut
18
19							our $VERSION = '0.1.0';
20
21							=head1 SYNOPSIS
22
23							use App::FileCleanerByDiskUage;
24
25							# remove files under /var/log/suricata/pcap when disk usage is over 90%
26							# If over 90% make sure there are atleast 32 files and if there are atleast 32, remove them based
27							# on age till we drop below 90%. The newest 32 will be ignored regardless of disk usage.
28							my $removed=App::FileCleanerByDiskUage->clean(path=>'/var/log/suricata/pcap/', du=>90, min_files=>32);
29							if (defined( $removed->{errors}[0] )){
30							die('One or more file could not be removed... '.join(' ', @{ $removed->{errors} }));
31							}
32							my $int=0;
33							while (defined( $removed->{unlined}[$int] )){
34							print 'Removed ' . $removed->{unlinked}[$int]{name} . "\n";
35
36							$int++;
37							}
38
39							This works via doing the following.
40
41							1: Check if disk usage is above the specified threshold. If not it ends here.
42
43							2: Search for files under the specified path.
44
45							3: If the number of found files is less than the number of files to keep regardless
46							of disk size it ends here. So if min_files is set to 32 and there are only 3 files,
47							then it would just return.
48
49							4: Get the stats for all the found files.
50
51							5: If min_files is specified, remove that many of the files from the list, starting
52							with the newest.
53
54							6: Removes the oldest file.
55
56							7: Check disk usage again and if it is less it ends here.
57
58							8: Go back to 6.
59
60							=head1 Functions
61
62							=head2 clean
63
64							This performs the cleaning actions. As long as the path exists and .path and .du
65							are defined this will not die. But if any of those are undef or do not exist it will
66							die.
67
68							The following hash values are taken by it.
69
70							Minimum Required Vars: path, du
71
72							- path :: The path to look for files under. May be a array of paths. Only the first is used
73							for getting the disk usage though, so this should not have paths in it that are on
74							a different partition.
75							Default :: undef
76
77							- du :: Disk usage to remove files down till.
78							Default :: undef
79
80							- min_files :: Minimum number of files to keep, regardless of disk usage.
81							Default :: undef
82
83							- ignore :: A regexp to use for ignoring files. So lets say you want to ignore,
84							files matching /\.pcap$/, it would be '\.pcap$'.
85							Default :: undef
86
87							- dry_run :: Do not actually remove anything. Just check to see if the file writable by the
88							current user.
89
90							The returned value is a hash ref.
91
92							- dry_run :: Boolean for fir it was a dry run or not.
93
94							- found_files :: Array of hashes of data for all files found. This will only be defined if du is above
95							threshold for removing files. If it is below it, the function will return instead of taking
96							the time required to run a search.
97
98							- found_files_count :: A count of files found.
99
100							- path :: The value of path that it was called with. This will always be a array, regardless of if a array or
101							scalar was passed as internally converts a scalars into a array containing just a single item.
102
103							- unlinked :: Array of hashes of data for files that have been removed.
104
105							- unlinked_count :: A count of how many files were unlinked
106
107							- unlink_errors :: Array of strings containing error descriptions.
108
109							- unlink_failed :: Array of hashes of data for files that could not removed. The corresponding
110							index in .errors will be the error in question. So $results->{unlink_failed}[0]
111							would be $results->{unlink_errors}[0]
112
113							- unlink_fialed_count :: A count of how many files unlinking failed for.
114
115							The files hash is composed as below.
116
117							- name :: Name of the file, including it's path.
118
119							# following are provided via the Perl function stat
120							- dev
121							- ino
122							- mode
123							- nlink
124							- uid
125							- gid
126							- rdev
127							- size
128							- atime
129							- mtime
130							- ctime
131							- blksize
132							- blocks
133
134							=cut
135
136							sub clean {
137	0			0	1		my ( $empty, %opts ) = @_;
138
139	0						my $du_path;
140	0	0	0				if ( !defined( $opts{path} ) ) {
		0
		0
141	0						die('$opts{path} is not defined');
142							} elsif ( ref( $opts{path} ) ne 'ARRAY' && !-d $opts{path} ) {
143	0						die( '$opts{path} is set to "' . $opts{path} . '" which is not a directory or does not exist' );
144							} elsif ( ref( $opts{path} ) eq 'ARRAY' ) {
145	0	0					if ( !defined( $opts{path}[0] ) ) {
146	0						die('$opts{path}[0] is not defined');
147							}
148	0						my $int = 0;
149	0						while ( defined( $opts{path}[$int] ) ) {
150	0	0					if ( !-d $opts{path}[$int] ) {
151							die( '$opts{path}['
152							. $int
153							. '] is set to "'
154	0						. $opts{path}[$int]
155							. '" which is not a directory or does not exist' );
156							}
157	0						$int++;
158							} ## end while ( defined( $opts{path}[$int] ) )
159	0						$du_path = $opts{path}[0];
160							} else {
161	0						$du_path = $opts{path} = [ $opts{path} ];
162							}
163
164	0	0					if ( !defined( $opts{du} ) ) {
		0
165	0						die('$opts{du} is not defined');
166							} elsif ( $opts{du} !~ /^\d+$/ ) {
167	0						die( '$opts{du} is set to "' . $opts{du} . '" whish is not numeric' );
168							}
169
170							# if we have a min_files specified, make sure the value is numeric
171	0	0	0				if ( defined( $opts{min_files} ) && $opts{min_files} !~ /^\d+$/ ) {
172	0						die( '$opts{min_files} is set to "' . $opts{min_files} . '" whish is not numeric matching /^\d+$/' );
173							}
174
175	0	0					if ( !$opts{dry_run} ) {
176	0						$opts{dry_run} = 0,;
177							} else {
178	0						$opts{dry_run} = 1,;
179							}
180
181	0						my $df = df($du_path);
182
183							# the results to be returned
184							my $results = {
185							unlinked => [],
186							unlink_failed => [],
187							unlink_errors => [],
188							found_files => [],
189							found_files_count => 0,
190							unlinked_count => 0,
191							unlink_failed_count => 0,
192							du_target => $opts{du},
193							du_starting => $df->{per},
194							du_ending => $df->{per},
195							min_files => 0,
196							dry_run => $opts{dry_run},
197							path => $opts{path},
198	0						};
199
200	0	0					if ( $df->{per} < $opts{du} ) {
201	0						return $results;
202							}
203
204	0						my @files;
205	0	0					if ( defined( $opts{ignore} ) ) {
206	0						my $ignore_rule = File::Find::Rule->new;
207	0						$ignore_rule->name(qr/$opts{ignore}/);
208	0						@files = File::Find::Rule->file()->not($ignore_rule)->in( @{ $opts{path} } );
	0
209							} else {
210	0						@files = File::Find::Rule->file()->in( @{ $opts{path} } );
	0
211							}
212	0						$results->{found_files_count} = $#files + 1;
213
214							# if we have a min number of files specified, make sure have that many defined
215	0	0	0				if ( $opts{min_files} && !defined( $files[ $opts{min_files} ] ) ) {
216	0						$results->{min_files} = $opts{min_files};
217	0						return $results;
218							}
219
220							# get the stats for all the files
221	0						my @files_info;
222	0						foreach my $file (@files) {
223	0						my %file_info;
224							(
225							$file_info{dev}, $file_info{ino}, $file_info{mode}, $file_info{nlink}, $file_info{uid},
226							$file_info{gid}, $file_info{rdev}, $file_info{size}, $file_info{atime}, $file_info{mtime},
227							$file_info{ctime}, $file_info{blksize}, $file_info{blocks}
228	0						) = stat($file);
229	0						$file_info{name} = $file;
230	0						push( @files_info, \%file_info );
231							} ## end foreach my $file (@files)
232
233							# sort files oldest to newest based on mtime
234	0						@files_info = sort { $a->{mtime} cmp $b->{mtime} } @files_info;
	0
235							# set this here as we are saving it into the hashref as a array ref
236	0						my @files_info_copy = @files_info;
237	0						$results->{found_files} = \@files_info_copy;
238
239							# remove the newest files if mins_files is greater than or equal to 1
240	0	0	0				if ( defined( $opts{min_files} ) && $opts{min_files} > 0 ) {
241	0						$results->{min_files} = $opts{min_files};
242	0						my $min_files_int = 1;
243	0						while ( $min_files_int <= $opts{min_files} ) {
244	0						pop(@files_info);
245
246	0						$min_files_int++;
247							}
248							}
249
250							# go through files and remove the oldest till we
251	0						my $int = 0;
252	0		0				while ( $df->{per} >= $opts{du} && defined( $files_info[$int] ) ) {
253	0						eval {
254	0	0	0				if ( $opts{dry_run} && !-w $files_info[$int]{name} ) {
255	0						die('file is not writable');
256							} else {
257	0	0					unlink( $files_info[$int]{name} ) or die($!);
258							}
259
260							};
261	0						my %tmp_hash = %{ $files_info[$int] };
	0
262	0	0					if ($@) {
263	0						push( @{ $results->{unlink_errors} }, 'Failed to remove "' . $files_info[$int]{name} . '"... ' . $@ );
	0
264	0						push( @{ $results->{unlink_failed} }, \%tmp_hash );
	0
265							} else {
266	0						push( @{ $results->{unlinked} }, \%tmp_hash );
	0
267							}
268
269	0						$int++;
270	0						$df = df($du_path);
271							} ## end while ( $df->{per} >= $opts{du} && defined( $files_info...))
272
273	0						$results->{du_ending} = $df->{per};
274	0	0					if ( defined( $results->{unlinked}[0] ) ) {
275	0						$results->{unlinked_count} = $#{ $results->{unlinked} } + 1;
	0
276							}
277	0	0					if ( defined( $results->{unlink_failed}[0] ) ) {
278	0						$results->{unlink_failed_count} = $#{ $results->{unlink_failed} } + 1;
	0
279							}
280
281	0						return $results;
282							} ## end sub clean
283
284							=head1 AUTHOR
285
286							Zane C. Bowers-Hadley, C<< >>
287
288							=head1 BUGS
289
290							Please report any bugs or feature requests to C, or through
291							the web interface at L. I will be notified, and then you'll
292							automatically be notified of progress on your bug as I make changes.
293
294
295
296
297							=head1 SUPPORT
298
299							You can find documentation for this module with the perldoc command.
300
301							perldoc App::FileCleanerByDiskUage
302
303
304							You can also look for information at:
305
306							=over 4
307
308							=item * RT: CPAN's request tracker (report bugs here)
309
310							L
311
312							=item * CPAN Ratings
313
314							L
315
316							=item * Search CPAN
317
318							L
319
320							=back
321
322
323							=head1 ACKNOWLEDGEMENTS
324
325
326							=head1 LICENSE AND COPYRIGHT
327
328							This software is Copyright (c) 2023 by Zane C. Bowers-Hadley.
329
330							This is free software, licensed under:
331
332							The GNU Lesser General Public License, Version 3, June 2007
333
334
335							=cut
336
337							1; # End of App::FileCleanerByDiskUage