File Coverage

blib/lib/App/Hashl.pm

Criterion	Covered	Total	%
statement	90	98	91.8
branch	19	32	59.3
condition	4	8	50.0
subroutine	20	22	90.9
pod	15	17	88.2
total	148	177	83.6

line	stmt	bran	cond	sub	pod	time	code
1							package App::Hashl;
2
3	1			1		553	use strict;
	1					1
	1					26
4	1			1		2	use warnings;
	1					1
	1					20
5	1			1		13	use 5.010;
	1					2
6
7	1			1		412	use Digest::SHA;
	1					2366
	1					40
8	1			1		500	use Storable qw(nstore retrieve);
	1					2152
	1					798
9
10							our $VERSION = '1.01';
11
12							sub new {
13	7			7	1	360	my ( $obj, %conf ) = @_;
14
15	7					14	my $ref = {
16							files => {},
17							ignored => {},
18							};
19
20	7					9	$ref->{config} = \%conf;
21	7		100			20	$ref->{config}->{read_size} //= ( 2*20 ) 4; # 4 MiB
22	7					9	$ref->{version} = $VERSION;
23
24	7					16	return bless( $ref, $obj );
25							}
26
27							sub new_from_file {
28	1			1	1	359773	my ( $obj, $file ) = @_;
29
30	1					5	my $ref = retrieve($file);
31
32	1	50				92	if ( not defined $ref->{version} ) {
33	0					0	$ref->{version} = '1.00';
34							}
35
36	1					2	return bless( $ref, $obj );
37							}
38
39							sub si_size {
40	5			5	1	5	my ( $self, $bytes ) = @_;
41	5					10	my @post = ( q{ }, qw(k M G T) );
42
43	5	100				9	if ( $bytes == 0 ) {
44	1					3	return 'infinite';
45							}
46
47	4					8	while ( $bytes >= 1024 ) {
48	4					5	$bytes /= 1024;
49	4					6	shift @post;
50							}
51
52	4					43	return sprintf( '%6.1f%s', $bytes, $post[0] );
53							}
54
55							sub hash_file {
56	22			22	1	16	my ( $self, $file ) = @_;
57	22					18	my $data;
58	22					69	my $digest = Digest::SHA->new(1);
59
60							# read() fails for empty files
61	22	50				390	if ( ( stat($file) )[7] == 0 ) {
62	0					0	return $digest->hexdigest;
63							}
64	22	100				44	if ( $self->{config}->{read_size} == 0 ) {
65	1					4	$digest->addfile($file);
66	1					56	return $digest->hexdigest;
67							}
68
69							#<<< perltidy has problems indenting 'or die' with tabs
70
71	21	50				400	open( my $fh, '<', $file )
72							or die("Can't open ${file} for reading: $!\n");
73	21	50				47	binmode($fh)
74							or die("Can't set binmode on ${file}: $!\n");
75							read( $fh, $data, $self->{config}->{read_size} )
76	21	50				178	or die("Can't read ${file}: $!\n");
77	21	50				112	close($fh)
78							or die("Can't close ${file}: $!\n");
79
80							#>>>
81	21					82	$digest->add($data);
82	21					170	return $digest->hexdigest;
83							}
84
85							sub hash_in_db {
86	10			10	1	16	my ( $self, $hash ) = @_;
87
88	10	100				20	if ( $self->{ignored}->{$hash} ) {
89	3					12	return '// ignored';
90							}
91
92	7					13	for my $name ( $self->files() ) {
93	6					10	my $file = $self->file($name);
94
95	6	100				13	if ( $file->{hash} eq $hash ) {
96	5					18	return $name;
97							}
98							}
99	2					6	return;
100							}
101
102							sub file_in_db {
103	8			8	1	333	my ( $self, $file ) = @_;
104
105	8					16	return $self->hash_in_db( $self->hash_file($file) );
106							}
107
108							sub read_size {
109	2			2	1	215	my ($self) = @_;
110
111	2					10	return $self->{config}->{read_size};
112							}
113
114							sub db_info {
115	0			0	0	0	my ($self) = @_;
116
117	0	0				0	return sprintf(
		0
118							"Database created by hashl v%s\n"
119							. "Read size: %d bytes (%s)\n"
120							. "contains: %d file%s and %d ignored hash%s\n",
121							$self->db_version,
122							$self->read_size,
123							$self->si_size( $self->read_size ),
124							scalar( $self->files ),
125							( scalar( $self->files ) == 1 ? q{} : 's' ),
126							scalar( $self->ignored ),
127							( scalar( $self->ignored ) == 1 ? q{} : 'es' ),
128							);
129							}
130
131							sub db_version {
132	0			0	0	0	my ($self) = @_;
133
134	0					0	return $self->{version};
135							}
136
137							sub file {
138	12			12	1	12	my ( $self, $name ) = @_;
139
140	12					26	return $self->{files}->{$name};
141							}
142
143							sub delete_file {
144	2			2	1	2	my ( $self, $name ) = @_;
145
146	2					3	delete $self->{files}->{$name};
147
148	2					2	return 1;
149							}
150
151							sub files {
152	12			12	1	12	my ($self) = @_;
153
154	12					9	return keys %{ $self->{files} };
	12					88
155							}
156
157							sub add_file {
158	5			5	1	25	my ( $self, %opt ) = @_;
159	5					5	my $file = $opt{file};
160	5					6	my $path = $opt{path};
161	5					53	my ( $size, $mtime ) = ( stat($path) )[ 7, 9 ];
162
163	5	0	33			12	if ( $self->file($file)
			33
164							and $self->file($file)->{mtime} == $mtime
165							and $self->file($file)->{size} == $size )
166							{
167	0					0	return;
168							}
169
170	5					7	my $hash = $self->hash_file($path);
171
172	5	100				13	if ( $self->{ignored}->{$hash} ) {
173	2	100				6	if ( $opt{unignore} ) {
174	1					2	$self->unignore($hash);
175							}
176							else {
177	1					2	return;
178							}
179							}
180
181	4					12	$self->{files}->{$file} = {
182							hash => $hash,
183							mtime => $mtime,
184							size => $size,
185							};
186
187	4					16	return 1;
188							}
189
190							sub ignored {
191	4			4	1	4	my ($self) = @_;
192
193	4	50				10	if ( exists $self->{ignored} ) {
194	4					3	return keys %{ $self->{ignored} };
	4					18
195							}
196
197	0					0	return ();
198							}
199
200							sub ignore {
201	2			2	1	3	my ( $self, $file, $path ) = @_;
202
203	2					4	$self->delete_file($file);
204	2					3	$self->{ignored}->{ $self->hash_file($path) } = 1;
205
206	2					7	return 1;
207							}
208
209							sub unignore {
210	2			2	1	5	my ( $self, $hash ) = @_;
211
212	2					3	delete $self->{ignored}->{$hash};
213
214	2					3	return 1;
215							}
216
217							sub save {
218	1			1	1	1	my ( $self, $file ) = @_;
219
220	1					3	return nstore( $self, $file );
221							}
222
223							1;
224
225							__END__
226
227							=head1 NAME
228
229							App::Hashl - Partially hash files, check if files are equal etc.
230
231							=head1 SYNOPSIS
232
233							use App::Hashl;
234
235							my $hashl = App::Hashl->new();
236							# or: App::Hashl->new_from_file($database_file);
237
238							=head1 VERSION
239
240							This manual documents App::Hashl version 1.01
241
242							=head1 DESCRIPTION
243
244							App::Hashl contains utilities to hash the first n bytes of files, store and
245							recall them, check if another file is already in the database and optionally
246							ignore file hashes.
247
248							=head1 METHODS
249
250							=over
251
252							=item $hashl = App::Hashl->new(I<%conf>)
253
254							Returns a new B<App::Hashl> object. Accepted parameters are:
255
256							=over
257
258							=item B<read_size> => I<bytes>
259
260							How many bytes of a file to consider for the hash. Defaults to 4 MiB (4 *
261							2**20 bytes). 0 means read the whole file.
262
263							=back
264
265							=item $hashl = App::Hashl->new_from_file(I<$file>)
266
267							Returns the B<App::Hashl> object saved to I<file> by a prior $hashl->save
268							call.
269
270							=item $hashl->si_size(I<$bytes>)
271
272							Returns I<bytes> as a human-readable SI-size, such as "1.0k", "50.7M", "2.1G".
273							The returned string is always six characters long.
274
275							=item $hashl->hash_file(I<$file>)
276
277							Returns the SHA1 hash of the first few bytes (as configured via B<read_size>) of
278							I<file>. Dies if I<file> cannot be read.
279
280							=item $hashl->hash_in_db(I<$hash>)
281
282							Checks if I<hash> is in the database. If it is, returns the filename it is
283							associated with. If it is ignored, returns "// ignored" (subject to change).
284							Otherwise, returns false.
285
286							=item $hashl->file_in_db(I<$file>)
287
288							Checks if I<file>'s hash is in the database. For the return value, see
289							B<hash_in_db>.
290
291							=item $hashl->read_size()
292
293							Returns the current read size. Note that once an B<App::Hashl> object has
294							been created, it is not possible to change the read size.
295
296							=item $hashl->file(I<$name>)
297
298							Returns a hashref describing the file. The layout is as follows:
299
300							hash => file's hash,
301							mtime => mtime as UNIX timestamp,
302							size => file size in bytes
303
304							If I<name> does not exist in the database, returns undef.
305
306							=item $hashl->delete_file(I<$name>)
307
308							Remove the file from the database.
309
310							=item $hashl->files()
311
312							Returns a list of all file names in the database.
313
314							=item $hashl->add_file(I<%data>)
315
316							Add a file to the database. Required keys in I<%data> are:
317
318							=over
319
320							=item B<file> => I<name>
321
322							relateve file name to store in the database
323
324							=item B<path> => I<path>
325
326							Full path to the file
327
328							=item B<unignore> => B<0>\|B<1>
329
330							If true: do not skip ignored files, unignore and re-add them instead
331
332							=back
333
334							If the file already is in the database, it is only updated if both the file
335							size and the mtime have changed.
336
337							Returns true if the file was actually added to the database, false if it is
338							ignored or already present (and up-to-date).
339
340							=item $hashl->ignored()
341
342							Returns a list of all ignored file hashes.
343
344							=item $hashl->ignore(I<$file>, I<$path>)
345
346							Removes I<$file> from the database and adds I<$path> to the list of ignored
347							file hashes.
348
349							=item $hashl->unignore(I<$path>)
350
351							Unignore the hash of I<$path>.
352
353							=item $hashl->save(I<$file>)
354
355							Save the B<App::Hashl> object with all data to I<$file>. It can later be
356							retrieved via B<new_from_file>.
357
358							=back
359
360							=head1 DIAGNOSTICS
361
362							None.
363
364							=head1 DEPENDENCIES
365
366							Digest::SHA(3pm);
367
368							=head1 BUGS AND LIMITATIONS
369
370							None known.
371
372							=head1 AUTHOR
373
374							Copyright (C) 2011-2017 by Daniel Friesel E<lt>derf@finalrewind.orgE<gt>
375
376							=head1 LICENSE
377
378							0. You just DO WHAT THE FUCK YOU WANT TO.