File Coverage

blib/lib/Algorithm/Diff/Apply.pm

Criterion	Covered	Total	%
statement	207	227	91.1
branch	61	80	76.2
condition	12	17	70.5
subroutine	17	18	94.4
pod	4	4	100.0
total	301	346	86.9

line	stmt	bran	cond	sub	pod	time	code
1							package Algorithm::Diff::Apply;
2	11			8		255817	use Carp;
	11					28
	11					787
3	11			8		70	use strict;
	11					32
	18					435
4	11			8		55	use constant DEFAULT_OPTIMISERS => (\&optimise_remove_duplicates);
	18					171
	18					761
5
6	18			8		61	use base qw{Exporter};
	16					28
	16					1022
7	16			8		62	use vars qw{@EXPORT_OK $VERSION};
	12					23
	12					24897
8							@EXPORT_OK = qw{
9							apply_diff
10							apply_diffs
11							mark_conflicts
12							optimise_remove_duplicates
13							};
14							$VERSION = '0.2.3';
15							# ^ incr. implies loss of backwards compatibility, no workaround
16							# ^ increment implies a new feature, or big under-bonnet changes
17							# ^ this gets incremented on bugfixes and minor
18
19
20							# Apply a single diff sequence. Nice and simple, and doesn't require
21							# any pre-passes.
22
23							sub apply_diff
24							{
25	8			5	1	3913	my @ary = @{shift()};
	5					20
26	5					9	my $diff = shift;
27
28	5					6	my $delta = 0;
29	5					9	foreach my $hunk (@$diff)
30							{
31	8					10	foreach my $change (@$hunk)
32							{
33	63					79	my ($op, $pos, $data) = @$change;
34	63	100				111	if ($op eq "-")
		100
35							{
36	30					41	splice(@ary, $pos+$delta, 1);
37	30					52	--$delta;
38							}
39							elsif ($op eq "+")
40							{
41	33					52	splice(@ary, $pos, 0, $data);
42	33					51	++$delta;
43							}
44							else
45							{
46	0					0	die "unknown operation: \"$op\"\n";
47							}
48							}
49							}
50	5	50				39	return wantarray ? @ary : \@ary;
51							}
52
53
54							# Apply one or more labelled diff sequences to a target array.
55							# Somewhat more complex; needs prepasses and consideration of
56							# conflicts.
57
58							sub apply_diffs
59							{
60							# Collect args:
61	17			17	1	25418	my @ary = @{shift(@_)};
	17					120
62	17					36	my %opt;
63	17	100	66			134	%opt = %{shift(@_)} if ref($_[0]) && (ref($_[0]) eq 'HASH');
	15					68
64	17					42	my %diffset;
65	17					128	while (my $tag = shift)
66							{
67	42	50				102	ref($tag) and croak("Tagnames must be scalar");
68	42					66	my $diff = shift;
69	42	50				224	ref($diff) eq 'ARRAY'
70							or croak("Diff sequences must be references of "
71							. "type \"ARRAY\"");
72	42					127	$diffset{$tag} = __homogenise_diff($diff, %opt);
73							}
74
75							# Trivial case
76	17	50				62	if (scalar keys %diffset < 1)
77							{
78	0	0				0	return wantarray ? @ary : \@ary;
79							}
80
81	17					61	my @alts = __optimise_conflicts(diffset => \%diffset,
82							opts => \%opt);
83	17					68	__apply_alternatives(target => \@ary,
84							alts => \@alts,
85							opts => \%opt);
86	17	50				270	return wantarray ? @ary : \@ary;
87							}
88
89
90							# Converts all the hunks in an Algorithm::Diff-style diff to a
91							# normalised form in which all hunks are a) still internally
92							# contiguous, and b) have start indices which refer to items in the
93							# original array, before any diffs are applied. Normally, hunks
94							# consisting of only inserts don't meet criterion b).
95							#
96							# Allso attaches hash data if the hashing function is defined.
97
98							sub __homogenise_diff
99							{
100	42			42		95	my ($orig_diff, %opt) = @_;
101	42					65	my @hdiff = ();
102	42					64	my $delta = 0; # difference between orig and resultant
103	42					98	foreach my $orig_hunk (@$orig_diff)
104							{
105	53					151	my ($first_op, $start) = @{$orig_hunk->[0]} [0, 1];
	53					126
106	53	100				136	$start -= $delta if $first_op eq '+';
107	53					436	my $hhunk = {
108							start => $start,
109							changes => [],
110							};
111	53					106	foreach my $change (@$orig_hunk)
112							{
113	211					1687	my ($op, $data);
114	211					524	($op, undef, $data) = @$change;
115	211	100				519	$delta += (($op eq '+') ? 1 : -1);
116	211	100				420	my $hash = (exists($opt{key_generator})
117							? $opt{key_generator}->($data)
118							: undef);
119	211					347	push @{$hhunk->{changes}}, [$op, $data, $hash];
	211					809
120							}
121	53					142	push @hdiff, $hhunk;
122							}
123	42					272	return \@hdiff;
124							}
125
126
127							# Calls the specified optimisation callbacks, returning a list of discrete
128							# alternative blocks in a format that __apply_alternatives() can handle.
129
130							sub __optimise_conflicts
131							{
132	17			17		124	my %args = @_;
133	17	50				35	my %diffset = %{$args{diffset} \|\| confess "\"diffset\" not specified"};
	17					105
134	17	50				36	my %opt = %{$args{opts} \|\| confess "\"opts\" not specified"};
	17					76
135
136	17					12614	my @optim;
137	17	100	66			2209	if ($opt{optimisers} or $opt{optimizers})
138							{
139	4	50				5	push @optim, @{$opt{optimisers} \|\| []};
	4					26
140	4	50				8	push @optim, @{$opt{optimizers} \|\| []};
	4					39
141							}
142							else
143							{
144	13					3768	@optim = &DEFAULT_OPTIMISERS;
145							}
146	17					1213	my @alts;
147	17					61	while (my ($u_min, $u_max, %u_alt)
148							= __shift_next_alternatives(\%diffset))
149							{
150							# Non-conflict case:
151	32	100				91	if (scalar(keys(%u_alt)) <= 1)
152							{
153	15					41	push(@alts, [$u_min, $u_max, %u_alt]);
154	15					54	next;
155							}
156
157							# Conflict case: pass each optimiser over it once.
158	17					30	foreach my $o (@optim)
159							{
160	16					65	%u_alt = $o->("conflict_block" => \%u_alt);
161	16					69	%u_alt = __diffset_discard_empties(%u_alt);
162							}
163							#__dump_diffset(%u_alt);
164
165							# An optimiser could turn one block of conflicts into
166							# two or more, so re-detect any remaining conflicts
167							# within the block.
168
169	17					195	while (my ($o_min, $o_max, %o_alt)
170							= __shift_next_alternatives(\%u_alt))
171							{
172	17					123	push(@alts, [$o_min, $o_max, %o_alt]);
173							}
174							}
175	17					85	return @alts;
176							}
177
178
179							# Extracts the array ($min, $max, %alts) from %$diffset where $min and
180							# $max describe the range of lines affected by all the diff hunks in
181							# %alts, and %alts is a diffset containing at least one alternative.
182							# Returns an empty array if there are no diff hunks left.
183
184							sub __shift_next_alternatives
185							{
186	83			83		174	my $diffset = shift;
187	83					168	my $id = __next_hunk_id($diffset);
188	83	100				550	defined($id) or return ();
189	49					59	my ($cflict_max, $cflict_min);
190	0					0	my %cflict;
191	49					59	my $hunk = shift @{$diffset->{$id}};
	49					98
192	49					125	$cflict{$id} = [ $hunk ];
193
194							# Seed range with $hunk:
195	49					61	my @ch = @{$hunk->{changes}};
	49					120
196	49					80	my $span = grep { $_->[0] eq '-' } @ch;
	195					477
197	49					80	$cflict_min = $hunk->{start};
198	49					59	$cflict_max = $cflict_min + $span;
199
200							# Detect conflicting hunks, and add those in too.
201	49					58	my %ignore;
202	49					106	while (my $tmp_id = __next_hunk_id($diffset, %ignore))
203							{
204	68					114	my $tmp_hunk = $diffset->{$tmp_id}->[0];
205	68					83	@ch = @{$tmp_hunk->{changes}};
	68					237
206	68					109	my $tmp_span = grep { $_->[0] eq '-' } @ch;
	225					458
207	68					113	my $tmp_max = $tmp_hunk->{start} + $tmp_span;
208	68	100				174	if ($tmp_hunk->{start} <= $cflict_max)
209							{
210	33	50				107	exists $cflict{$tmp_id} or $cflict{$tmp_id} = [];
211	33					39	shift @{$diffset->{$tmp_id}};
	33					60
212	33					39	push @{$cflict{$tmp_id}}, $tmp_hunk;
	33					60
213	33	100				159	$cflict_max = $tmp_max if $tmp_max > $cflict_max;
214							}
215							else
216							{
217	35					150	$ignore{$tmp_id} = 1;
218							}
219							}
220
221	49					4948	return ($cflict_min, $cflict_max, %cflict);
222							}
223
224
225							# Returns the ID of the hunk in %$diffset whose ->{start} is lowest,
226							# or undef. %ignore{SOMEID} can be set to a true value to cause a
227							# given sequence to be skipped over.
228
229							sub __next_hunk_id
230							{
231	200			200		339	my ($diffset, %ignore) = @_;
232	200					231	my ($lo_id, $lo_start);
233	200					448	foreach my $id (keys %$diffset)
234							{
235	563	100				5360	next if $ignore{$id};
236	495					816	my $diff = $diffset->{$id};
237	495	100				1301	next if $#$diff < 0;
238	217					358	my $start = $diff->[0]->{start};
239	217	100	100			801	if ((! defined($lo_start))
240							\|\| $start < $lo_start)
241							{
242	133					161	$lo_id = $id;
243	133					267	$lo_start = $start;
244							}
245							}
246	200					662	return $lo_id;
247							}
248
249
250							sub __diffset_discard_empties
251							{
252	16			16		73	my %dset = @_;
253	36					181	return map {
254	16	100				34	($#{$dset{$_}} < 0) ? () : ($_ => $dset{$_});
	36					51
255							} keys %dset;
256							}
257
258
259							sub __apply_alternatives
260							{
261	17			17		63	my %args = @_;
262	17	50				27	my %opt = %{$args{opts} \|\| confess "\"opts\" not specified"};
	17					87
263	17		33			57	my $ary = $args{target} \|\| confess "\"target\" not specified";
264	17	50				25	my @alts = @{$args{alts} \|\| confess "\"alts\" not specified"};
	17					206
265	17		100			72	my $resolver = $opt{resolver} \|\| \&mark_conflicts;
266
267	17					30	my $delta = 0;
268	17					55	while (my $alt = shift @alts)
269							{
270	32					136	my ($min, $max, %alts) = @$alt;
271	32					85	my @orig = @{$ary}[$min + $delta .. $max + $delta - 1];
	32					91
272	32					45	my @replacement;
273
274							my %alt_txts;
275	32					108	foreach my $id (sort keys %alts)
276							{
277	44					87	my @tmp = @orig;
278	44					149	my $tmp_delta = -$min;
279	44					54	foreach my $hunk (@{ $alts{$id} })
	44					96
280							{
281	44					182	__apply_hunk(\@tmp, \$tmp_delta, $hunk);
282							}
283	44					142	$alt_txts{$id} = \@tmp;
284							}
285
286	32	100				97	if (scalar keys %alt_txts == 1)
287							{
288	20					36	my ($r) = values %alt_txts;
289	20					48	@replacement = @$r;
290							}
291							else
292							{
293	12					63	@replacement = $resolver->(src_range_end => $max,
294							src_range_start => $min,
295							src_range => \@orig,
296							alt_txts => \%alt_txts,
297							invoc_opts => \%opt);
298							}
299	32					414	splice(@$ary, $min + $delta, $#orig+1, @replacement);
300	32					216	$delta += ($#replacement - $#orig);
301							}
302							}
303
304
305							# Applies a hunk to an array, and calculates the lines lost or gained
306							# by doing so.
307
308							sub __apply_hunk
309							{
310	44			44		244	my ($ary, $rdelta, $hunk) = @_;
311	44					149	my $pos = $hunk->{start} + $$rdelta;
312	44					65	foreach my $change (@{$hunk->{changes}})
	44					87
313							{
314	161	100				452	if ($change->[0] eq '+')
315							{
316	72					185	splice(@$ary, $pos, 0, $change->[1]);
317	72					75	++$$rdelta;
318	72					208	++$pos;
319							}
320							else
321							{
322	89					127	splice(@$ary, $pos, 1);
323	89					163	--$$rdelta;
324							}
325							}
326							}
327
328
329							# The default conflict resolution subroutine. Returns all alternative
330							# texts with conflict markers inserted around them.
331
332							sub mark_conflicts (%)
333							{
334	3			3	1	17	my %opt = @_;
335	3	50				12	defined $opt{alt_txts} or confess("alt_txts not defined\n");
336	3					5	my %alt = %{$opt{alt_txts}};
	3					11
337	3					7	my @ret;
338	3					12	foreach my $id (sort keys %alt)
339							{
340	6					18	push @ret, ">>>>>> $id\n";
341	6					42	push @ret, @{$alt{$id}};
	6					20
342							}
343	3					9	push @ret, "<<<<<<\n";
344	3					24	return @ret;
345							}
346
347
348							sub optimise_remove_duplicates (%)
349							{
350	13			16	1	122	my %opt = @_;
351	13					24	my $block = $opt{conflict_block};
352	13	50				98	defined $block or confess("conflict_block not defined\n");
353	13					130	my @tags = reverse sort keys(%$block);
354	13					90	my %ret = map {$_ => []} @tags;
	26					74
355							REFTAG:
356	13					42	while (my $tag = shift @tags)
357							{
358	26					122	REFHUNK:
359	26					77	for my $hunk (@{$block->{$tag}})
360							{
361	26					36	for my $t (@tags)
362							{
363	13					18	for my $h (@{$block->{$t}})
	13					24
364							{
365	13	100				38	__hunks_identical($hunk, $h)
366							and next REFHUNK;
367							}
368							}
369	23					32	push @{$ret{$tag}}, $hunk;
	23					107
370							}
371							}
372	13					68	return %ret;
373							}
374
375
376							sub __hunks_identical
377							{
378	21			21		31	my ($h1, $h2) = @_;
379	21	100				79	$h1->{start} == $h2->{start} or return 0;
380	18	100				21	$#{$h1->{changes}} == $#{$h2->{changes}} or return 0;
	18					47
	18					58
381	15					23	foreach my $i (0 .. $#{$h1->{changes}})
	15					44
382							{
383	65					113	my ($op1, $data1, $hash1) = @{ $h1->{changes}->[$i] };
	65					141
384	65					79	my ($op2, $data2, $hash2) = @{ $h2->{changes}->[$i] };
	65					146
385	65	50				146	$op1 eq $op2 or return 0;
386	65	100	66			176	if (defined($hash1) && defined($hash2))
387							{
388	18	100				48	$hash1 eq $hash2 or return 0;
389							}
390							else
391							{
392	47	100				163	$data1 eq $data2 or return 0;
393							}
394							}
395	9					65	return 1;
396							}
397
398
399							sub __dump_diffset
400							{
401	0			0			my %dset = @_;
402	0						print STDERR "-- begin diffset --\n";
403	0						for my $tag (sort keys %dset)
404							{
405	0						print STDERR "-- begin seq tag=\"$tag\" --\n";
406	0						my @diff = @{$dset{$tag}};
	0
407	0						for my $diff (@diff)
408							{
409	0						print STDERR "\n\@".$diff->{start}."\n";
410	0						for my $e (@{$diff->{changes}})
	0
411							{
412	0						my ($op, $data) = @$e;
413	0						$data = quotemeta($data);
414	0						$data =~ s{^(.{0,75})(.*)}{
415	0	0					$1 . ($2 eq "" ? "" : "...");
416							}se;
417	0						print STDERR "$op $data\n";
418							}
419							}
420	0						print STDERR "\n-- end seq tag=\"$tag\" --\n";
421							}
422	0						print STDERR "-- end diffset --\n";
423							}
424
425
426							# Terminology
427							#
428							# A "diffset" is a hash of IDs whose values are arrays holding
429							# sequences of diffs generated from different sources. There may be
430							# conflicts within a diffset.
431							#
432							# An "alternatives" diffset is a minimal diffset which contains no
433							# more than one conflict. I can't think of a better name for it, as
434							# there's a special case where it only consists of a single key
435							# pointing at a single hunk.
436
437							1;