File Coverage

lib/App/dupfind/Guts.pm

Criterion	Covered	Total	%
statement	18	67	26.8
branch	0	14	0.0
condition			n/a
subroutine	6	12	50.0
pod			n/a
total	24	93	25.8

line	stmt	bran	sub	time	code
1					# ABSTRACT: Private methods supporting the App::dupfind::Common public interface
2
3	8		8	81708	use strict;
	8			16
	8			320
4	8		8	44	use warnings;
	8			13
	8			434
5
6					package App::dupfind::Guts;
7					{
8					$App::dupfind::Guts::VERSION = '0.140230'; # TRIAL
9					}
10
11	8		8	216	use 5.010;
	8			30
	8			378
12
13	8		8	3862	use File::Util;
	8			63998
	8			78
14
15	8		8	976	use Moo::Role;
	8			16
	8			68
16
17	8		8	2657	use lib 'lib';
	8			14
	8			63
18
19					requires 'opts';
20
21					with 'App::dupfind::Guts::Algorithms';
22
23					has weed_pass_map => ( is => 'ro', builder => '_build_wpmap' );
24
25					has ftl => ( is => 'ro', builder => '_build_ftl', lazy => 1 );
26
27	0		0		has stats => ( is => 'rw', builder => sub { {} } );
28
29
30					sub _build_ftl
31					{
32	0		0		my $self = shift;
33
34	0				return File::Util->new
35					(
36					{
37					use_flock => 0,
38					diag => 1,
39					read_limit => $self->opts->{bytes},
40					abort_depth => $self->opts->{maxdepth},
41					onfail => 'undefined',
42					}
43					);
44					}
45
46					sub _build_wpmap
47					{
48					{
49	0		0		last => '_get_last_bytes',
50					first => '_get_first_bytes',
51					middle => '_get_middle_byte',
52					middle_last => '_get_middle_last_bytes',
53					almost_middle => '_get_bytes_n_offset_n',
54					first_middle_last => '_get_first_middle_last_bytes',
55					}
56					}
57
58					sub _plan_weed_passes
59					{
60	0		0		my $self = shift;
61	0				my @plan = ();
62
63	0				for my $pass_type ( @{ $self->opts->{wpass} } )
	0
64					{
65	0	0			die "Unrecognized weed pass type $pass_type"
66					if ! exists $self->weed_pass_map->{ $pass_type };
67
68	0				push @plan, $self->weed_pass_map->{ $pass_type };
69					}
70
71	0				return @plan;
72					}
73
74					sub _do_weed_pass
75					{
76	0		0		my ( $self, $size_dups, $pass_type, $pass_count ) = @_;
77
78	0				my $dup_count = $self->count_dups( $size_dups );
79
80	0				my ( $new_count, $difference );
81
82	0				$self->say_stderr( " $dup_count POTENTIAL DUPLICATES" );
83
84	0				$size_dups = $self->_pull_weeds( $size_dups => $pass_type => $pass_count );
85
86	0				$new_count = $self->count_dups( $size_dups );
87
88	0				$difference = $dup_count - $new_count;
89
90	0				$dup_count = $new_count;
91
92	0				$self->say_stderr
93					(
94					sprintf ' ...ELIMINATED %d NON-DUPS IN PASS #%d. %d REMAIN',
95					$difference,
96					$pass_count,
97					$new_count
98					);
99
100	0				return $size_dups;
101					}
102
103					sub _pull_weeds
104					{
105					# weed out files that are obviously different, based on the last
106					# few bytes in the file. This saves us from unnecessary hashing
107
108	0		0		my ( $self, $size_dups, $weeder, $pass_count ) = @_;
109
110	0				my $len = $self->opts->{wpsize};
111
112	0				my ( $progress, $i );
113
114	0	0			if ( $self->opts->{progress} )
115					{
116	0				my $dup_count = $self->count_dups( $size_dups );
117
118	0				$progress = Term::ProgressBar->new
119					(
120					{
121					name => ' ...WEED-OUT PASS ' . $pass_count,
122					count => $dup_count,
123					remove => 1,
124					}
125					);
126					}
127
128	0				for my $same_size ( keys %$size_dups )
129					{
130	0				my $same_bytes = {};
131	0				my $weed_failed = [];
132
133	0				for my $file ( @{ $size_dups->{ $same_size } } )
	0
134					{
135	0				my $bytes_read = $self->$weeder( $file, $len, $same_size );
136
137	0	0			push @{ $same_bytes->{ $bytes_read } }, $file
	0
138					if defined $bytes_read;
139
140	0	0			push @$weed_failed, $file unless defined $bytes_read;
141
142	0	0			$progress->update( $i++ ) if $progress;
143					}
144
145					# delete obvious non-dupe files from the group of same-size files
146					# by virtue of the fact that they will be a single length arrayref
147
148	0				delete $same_bytes->{ $_ }
149	0				for grep { @{ $same_bytes->{ $_ } } == 1 }
	0
150					keys %$same_bytes;
151
152					# recompose the arrayref of filenames for the same-size file grouping
153					# but leave out the files we just weeded out from the group
154
155	0				$size_dups->{ $same_size } = []; # start fresh
156
157	0				@{ $size_dups->{ $same_size } } =
	0
158	0				map { @{ $same_bytes->{ $_ } } }
	0
159					keys %$same_bytes;
160
161	0	0			push @{ $size_dups->{ $same_size } }, @$weed_failed if @$weed_failed;
	0
162					}
163
164	0	0			$progress->update( $i ) if $progress;
165
166	0				return $size_dups;
167					}
168
169					1;
170
171					__END__