File Coverage

deps/libgit2/src/xdiff/xdiffi.c

Criterion	Covered	Total	%
statement	153	413	37.0
branch	89	322	27.6
condition			n/a
subroutine			n/a
pod			n/a
total	242	735	32.9

line	stmt	bran	code
1			/*
2			* LibXDiff by Davide Libenzi ( File Differential Library )
3			* Copyright (C) 2003 Davide Libenzi
4			*
5			* This library is free software; you can redistribute it and/or
6			* modify it under the terms of the GNU Lesser General Public
7			* License as published by the Free Software Foundation; either
8			* version 2.1 of the License, or (at your option) any later version.
9			*
10			* This library is distributed in the hope that it will be useful,
11			* but WITHOUT ANY WARRANTY; without even the implied warranty of
12			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13			* Lesser General Public License for more details.
14			*
15			* You should have received a copy of the GNU Lesser General Public
16			* License along with this library; if not, see
17			* .
18			*
19			* Davide Libenzi
20			*
21			*/
22
23			#include "xinclude.h"
24			#include "integer.h"
25
26
27			#define XDL_MAX_COST_MIN 256
28			#define XDL_HEUR_MIN_COST 256
29			#define XDL_LINE_MAX (long)((1UL << (CHAR_BIT * sizeof(long) - 1)) - 1)
30			#define XDL_SNAKE_CNT 20
31			#define XDL_K_HEUR 4
32
33			/** Declare a function as always inlined. */
34			#if defined(_MSC_VER)
35			# define XDL_INLINE(type) static __inline type
36			#elif defined(__GNUC__)
37			# define XDL_INLINE(type) static __inline__ type
38			#else
39			# define XDL_INLINE(type) static type
40			#endif
41
42			typedef struct s_xdpsplit {
43			long i1, i2;
44			int min_lo, min_hi;
45			} xdpsplit_t;
46
47
48
49
50			static long xdl_split(unsigned long const *ha1, long off1, long lim1,
51			unsigned long const *ha2, long off2, long lim2,
52			long kvdf, long kvdb, int need_min, xdpsplit_t *spl,
53			xdalgoenv_t *xenv);
54			static xdchange_t xdl_add_change(xdchange_t xscr, long i1, long i2, long chg1, long chg2);
55
56
57
58
59
60			/*
61			* See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers.
62			* Basically considers a "box" (off1, off2, lim1, lim2) and scan from both
63			* the forward diagonal starting from (off1, off2) and the backward diagonal
64			* starting from (lim1, lim2). If the K values on the same diagonal crosses
65			* returns the furthest point of reach. We might end up having to expensive
66			* cases using this algorithm is full, so a little bit of heuristic is needed
67			* to cut the search and to return a suboptimal point.
68			*/
69	0		static long xdl_split(unsigned long const *ha1, long off1, long lim1,
70			unsigned long const *ha2, long off2, long lim2,
71			long kvdf, long kvdb, int need_min, xdpsplit_t *spl,
72			xdalgoenv_t *xenv) {
73	0		long dmin = off1 - lim2, dmax = lim1 - off2;
74	0		long fmid = off1 - off2, bmid = lim1 - lim2;
75	0		long odd = (fmid - bmid) & 1;
76	0		long fmin = fmid, fmax = fmid;
77	0		long bmin = bmid, bmax = bmid;
78			long ec, d, i1, i2, prev1, best, dd, v, k;
79
80			/*
81			* Set initial diagonal values for both forward and backward path.
82			*/
83	0		kvdf[fmid] = off1;
84	0		kvdb[bmid] = lim1;
85
86	0		for (ec = 1;; ec++) {
87	0		int got_snake = 0;
88
89			/*
90			* We need to extent the diagonal "domain" by one. If the next
91			* values exits the box boundaries we need to change it in the
92			* opposite direction because (max - min) must be a power of two.
93			* Also we initialize the external K value to -1 so that we can
94			* avoid extra conditions check inside the core loop.
95			*/
96	0	0	if (fmin > dmin)
97	0		kvdf[--fmin - 1] = -1;
98			else
99	0		++fmin;
100	0	0	if (fmax < dmax)
101	0		kvdf[++fmax + 1] = -1;
102			else
103	0		--fmax;
104
105	0	0	for (d = fmax; d >= fmin; d -= 2) {
106	0	0	if (kvdf[d - 1] >= kvdf[d + 1])
107	0		i1 = kvdf[d - 1] + 1;
108			else
109	0		i1 = kvdf[d + 1];
110	0		prev1 = i1;
111	0		i2 = i1 - d;
112	0	0	for (; i1 < lim1 && i2 < lim2 && ha1[i1] == ha2[i2]; i1++, i2++);
		0
		0
113	0	0	if (i1 - prev1 > xenv->snake_cnt)
114	0		got_snake = 1;
115	0		kvdf[d] = i1;
116	0	0	if (odd && bmin <= d && d <= bmax && kvdb[d] <= i1) {
		0
		0
		0
117	0		spl->i1 = i1;
118	0		spl->i2 = i2;
119	0		spl->min_lo = spl->min_hi = 1;
120	0		return ec;
121			}
122			}
123
124			/*
125			* We need to extent the diagonal "domain" by one. If the next
126			* values exits the box boundaries we need to change it in the
127			* opposite direction because (max - min) must be a power of two.
128			* Also we initialize the external K value to -1 so that we can
129			* avoid extra conditions check inside the core loop.
130			*/
131	0	0	if (bmin > dmin)
132	0		kvdb[--bmin - 1] = XDL_LINE_MAX;
133			else
134	0		++bmin;
135	0	0	if (bmax < dmax)
136	0		kvdb[++bmax + 1] = XDL_LINE_MAX;
137			else
138	0		--bmax;
139
140	0	0	for (d = bmax; d >= bmin; d -= 2) {
141	0	0	if (kvdb[d - 1] < kvdb[d + 1])
142	0		i1 = kvdb[d - 1];
143			else
144	0		i1 = kvdb[d + 1] - 1;
145	0		prev1 = i1;
146	0		i2 = i1 - d;
147	0	0	for (; i1 > off1 && i2 > off2 && ha1[i1 - 1] == ha2[i2 - 1]; i1--, i2--);
		0
		0
148	0	0	if (prev1 - i1 > xenv->snake_cnt)
149	0		got_snake = 1;
150	0		kvdb[d] = i1;
151	0	0	if (!odd && fmin <= d && d <= fmax && i1 <= kvdf[d]) {
		0
		0
		0
152	0		spl->i1 = i1;
153	0		spl->i2 = i2;
154	0		spl->min_lo = spl->min_hi = 1;
155	0		return ec;
156			}
157			}
158
159	0	0	if (need_min)
160	0		continue;
161
162			/*
163			* If the edit cost is above the heuristic trigger and if
164			* we got a good snake, we sample current diagonals to see
165			* if some of the, have reached an "interesting" path. Our
166			* measure is a function of the distance from the diagonal
167			* corner (i1 + i2) penalized with the distance from the
168			* mid diagonal itself. If this value is above the current
169			* edit cost times a magic factor (XDL_K_HEUR) we consider
170			* it interesting.
171			*/
172	0	0	if (got_snake && ec > xenv->heur_min) {
		0
173	0	0	for (best = 0, d = fmax; d >= fmin; d -= 2) {
174	0	0	dd = d > fmid ? d - fmid: fmid - d;
175	0		i1 = kvdf[d];
176	0		i2 = i1 - d;
177	0		v = (i1 - off1) + (i2 - off2) - dd;
178
179	0	0	if (v > XDL_K_HEUR * ec && v > best &&
		0
		0
180	0	0	off1 + xenv->snake_cnt <= i1 && i1 < lim1 &&
		0
181	0	0	off2 + xenv->snake_cnt <= i2 && i2 < lim2) {
182	0	0	for (k = 1; ha1[i1 - k] == ha2[i2 - k]; k++)
183	0	0	if (k == xenv->snake_cnt) {
184	0		best = v;
185	0		spl->i1 = i1;
186	0		spl->i2 = i2;
187	0		break;
188			}
189			}
190			}
191	0	0	if (best > 0) {
192	0		spl->min_lo = 1;
193	0		spl->min_hi = 0;
194	0		return ec;
195			}
196
197	0	0	for (best = 0, d = bmax; d >= bmin; d -= 2) {
198	0	0	dd = d > bmid ? d - bmid: bmid - d;
199	0		i1 = kvdb[d];
200	0		i2 = i1 - d;
201	0		v = (lim1 - i1) + (lim2 - i2) - dd;
202
203	0	0	if (v > XDL_K_HEUR * ec && v > best &&
		0
		0
204	0	0	off1 < i1 && i1 <= lim1 - xenv->snake_cnt &&
		0
205	0	0	off2 < i2 && i2 <= lim2 - xenv->snake_cnt) {
206	0	0	for (k = 0; ha1[i1 + k] == ha2[i2 + k]; k++)
207	0	0	if (k == xenv->snake_cnt - 1) {
208	0		best = v;
209	0		spl->i1 = i1;
210	0		spl->i2 = i2;
211	0		break;
212			}
213			}
214			}
215	0	0	if (best > 0) {
216	0		spl->min_lo = 0;
217	0		spl->min_hi = 1;
218	0		return ec;
219			}
220			}
221
222			/*
223			* Enough is enough. We spent too much time here and now we collect
224			* the furthest reaching path using the (i1 + i2) measure.
225			*/
226	0	0	if (ec >= xenv->mxcost) {
227			long fbest, fbest1, bbest, bbest1;
228
229	0		fbest = fbest1 = -1;
230	0	0	for (d = fmax; d >= fmin; d -= 2) {
231	0		i1 = XDL_MIN(kvdf[d], lim1);
232	0		i2 = i1 - d;
233	0	0	if (lim2 < i2)
234	0		i1 = lim2 + d, i2 = lim2;
235	0	0	if (fbest < i1 + i2) {
236	0		fbest = i1 + i2;
237	0		fbest1 = i1;
238			}
239			}
240
241	0		bbest = bbest1 = XDL_LINE_MAX;
242	0	0	for (d = bmax; d >= bmin; d -= 2) {
243	0		i1 = XDL_MAX(off1, kvdb[d]);
244	0		i2 = i1 - d;
245	0	0	if (i2 < off2)
246	0		i1 = off2 + d, i2 = off2;
247	0	0	if (i1 + i2 < bbest) {
248	0		bbest = i1 + i2;
249	0		bbest1 = i1;
250			}
251			}
252
253	0	0	if ((lim1 + lim2) - bbest < fbest - (off1 + off2)) {
254	0		spl->i1 = fbest1;
255	0		spl->i2 = fbest - fbest1;
256	0		spl->min_lo = 1;
257	0		spl->min_hi = 0;
258			} else {
259	0		spl->i1 = bbest1;
260	0		spl->i2 = bbest - bbest1;
261	0		spl->min_lo = 0;
262	0		spl->min_hi = 1;
263			}
264	0		return ec;
265			}
266	0		}
267			}
268
269
270			/*
271			* Rule: "Divide et Impera". Recursively split the box in sub-boxes by calling
272			* the box splitting function. Note that the real job (marking changed lines)
273			* is done in the two boundary reaching checks.
274			*/
275	73		int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
276			diffdata_t *dd2, long off2, long lim2,
277			long kvdf, long kvdb, int need_min, xdalgoenv_t *xenv) {
278	73		unsigned long const ha1 = dd1->ha, ha2 = dd2->ha;
279
280			/*
281			* Shrink the box by walking through each diagonal snake (SW and NE).
282			*/
283	75	100	for (; off1 < lim1 && off2 < lim2 && ha1[off1] == ha2[off2]; off1++, off2++);
		100
		50
284	73	100	for (; off1 < lim1 && off2 < lim2 && ha1[lim1 - 1] == ha2[lim2 - 1]; lim1--, lim2--);
		50
		0
285
286			/*
287			* If one dimension is empty, then all records on the other one must
288			* be obviously changed.
289			*/
290	73	100	if (off1 == lim1) {
291	71		char *rchg2 = dd2->rchg;
292	71		long *rindex2 = dd2->rindex;
293
294	71	50	for (; off2 < lim2; off2++)
295	0		rchg2[rindex2[off2]] = 1;
296	2	50	} else if (off2 == lim2) {
297	2		char *rchg1 = dd1->rchg;
298	2		long *rindex1 = dd1->rindex;
299
300	6	100	for (; off1 < lim1; off1++)
301	4		rchg1[rindex1[off1]] = 1;
302			} else {
303			xdpsplit_t spl;
304	0		spl.i1 = spl.i2 = 0;
305
306			/*
307			* Divide ...
308			*/
309	0	0	if (xdl_split(ha1, off1, lim1, ha2, off2, lim2, kvdf, kvdb,
310			need_min, &spl, xenv) < 0) {
311
312	0		return -1;
313			}
314
315			/*
316			* ... et Impera.
317			*/
318	0	0	if (xdl_recs_cmp(dd1, off1, spl.i1, dd2, off2, spl.i2,
319	0	0	kvdf, kvdb, spl.min_lo, xenv) < 0 \|\|
320	0		xdl_recs_cmp(dd1, spl.i1, lim1, dd2, spl.i2, lim2,
321			kvdf, kvdb, spl.min_hi, xenv) < 0) {
322
323	0		return -1;
324			}
325			}
326
327	73		return 0;
328			}
329
330
331	73		int xdl_do_diff(mmfile_t mf1, mmfile_t mf2, xpparam_t const *xpp,
332			xdfenv_t *xe) {
333			size_t ndiags, allocsize;
334			long kvd, kvdf, *kvdb;
335			xdalgoenv_t xenv;
336			diffdata_t dd1, dd2;
337
338	73	50	if (XDF_DIFF_ALG(xpp->flags) == XDF_PATIENCE_DIFF)
339	0		return xdl_do_patience_diff(mf1, mf2, xpp, xe);
340
341	73	50	if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF)
342	0		return xdl_do_histogram_diff(mf1, mf2, xpp, xe);
343
344	73	50	if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) {
345
346	0		return -1;
347			}
348
349			/*
350			* Allocate and setup K vectors to be used by the differential algorithm.
351			* One is to store the forward path and one to store the backward path.
352			*/
353	73	50	GIT_ERROR_CHECK_ALLOC_ADD3(&ndiags, xe->xdf1.nreff, xe->xdf2.nreff, 3);
		50
354	73	50	GIT_ERROR_CHECK_ALLOC_MULTIPLY(&allocsize, ndiags, 2);
		50
355	73	50	GIT_ERROR_CHECK_ALLOC_ADD(&allocsize, allocsize, 2);
		50
356	73	50	GIT_ERROR_CHECK_ALLOC_MULTIPLY(&allocsize, allocsize, sizeof(long));
		50
357
358	73	50	if (!(kvd = (long *) xdl_malloc(allocsize))) {
359	0		xdl_free_env(xe);
360	0		return -1;
361			}
362	73		kvdf = kvd;
363	73		kvdb = kvdf + ndiags;
364	73		kvdf += xe->xdf2.nreff + 1;
365	73		kvdb += xe->xdf2.nreff + 1;
366
367	73		xenv.mxcost = xdl_bogosqrt(ndiags);
368	73	50	if (xenv.mxcost < XDL_MAX_COST_MIN)
369	73		xenv.mxcost = XDL_MAX_COST_MIN;
370	73		xenv.snake_cnt = XDL_SNAKE_CNT;
371	73		xenv.heur_min = XDL_HEUR_MIN_COST;
372
373	73		dd1.nrec = xe->xdf1.nreff;
374	73		dd1.ha = xe->xdf1.ha;
375	73		dd1.rchg = xe->xdf1.rchg;
376	73		dd1.rindex = xe->xdf1.rindex;
377	73		dd2.nrec = xe->xdf2.nreff;
378	73		dd2.ha = xe->xdf2.ha;
379	73		dd2.rchg = xe->xdf2.rchg;
380	73		dd2.rindex = xe->xdf2.rindex;
381
382	73	50	if (xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec,
383	73		kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0, &xenv) < 0) {
384
385	0		xdl_free(kvd);
386	0		xdl_free_env(xe);
387	0		return -1;
388			}
389
390	73		xdl_free(kvd);
391
392	73		return 0;
393			}
394
395
396	75		static xdchange_t xdl_add_change(xdchange_t xscr, long i1, long i2, long chg1, long chg2) {
397			xdchange_t *xch;
398
399	75	50	if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t))))
400	0		return NULL;
401
402	75		xch->next = xscr;
403	75		xch->i1 = i1;
404	75		xch->i2 = i2;
405	75		xch->chg1 = chg1;
406	75		xch->chg2 = chg2;
407	75		xch->ignore = 0;
408
409	75		return xch;
410			}
411
412
413	30		static int recs_match(xrecord_t rec1, xrecord_t rec2, long flags)
414			{
415	42		return (rec1->ha == rec2->ha &&
416	12		xdl_recmatch(rec1->ptr, rec1->size,
417			rec2->ptr, rec2->size,
418			flags));
419			}
420
421			/*
422			* If a line is indented more than this, get_indent() just returns this value.
423			* This avoids having to do absurd amounts of work for data that are not
424			* human-readable text, and also ensures that the output of get_indent fits within
425			* an int.
426			*/
427			#define MAX_INDENT 200
428
429			/*
430			* Return the amount of indentation of the specified line, treating TAB as 8
431			* columns. Return -1 if line is empty or contains only whitespace. Clamp the
432			* output value at MAX_INDENT.
433			*/
434	0		static int get_indent(xrecord_t *rec)
435			{
436			long i;
437	0		int ret = 0;
438
439	0	0	for (i = 0; i < rec->size; i++) {
440	0		char c = rec->ptr[i];
441
442	0	0	if (!XDL_ISSPACE(c))
443	0		return ret;
444	0	0	else if (c == ' ')
445	0		ret += 1;
446	0	0	else if (c == '\t')
447	0		ret += 8 - ret % 8;
448			/* ignore other whitespace characters */
449
450	0	0	if (ret >= MAX_INDENT)
451	0		return MAX_INDENT;
452			}
453
454			/* The line contains only whitespace. */
455	0		return -1;
456			}
457
458			/*
459			* If more than this number of consecutive blank rows are found, just return this
460			* value. This avoids requiring O(N^2) work for pathological cases, and also
461			* ensures that the output of score_split fits in an int.
462			*/
463			#define MAX_BLANKS 20
464
465			/* Characteristics measured about a hypothetical split position. */
466			struct split_measurement {
467			/*
468			* Is the split at the end of the file (aside from any blank lines)?
469			*/
470			int end_of_file;
471
472			/*
473			* How much is the line immediately following the split indented (or -1 if
474			* the line is blank):
475			*/
476			int indent;
477
478			/*
479			* How many consecutive lines above the split are blank?
480			*/
481			int pre_blank;
482
483			/*
484			* How much is the nearest non-blank line above the split indented (or -1
485			* if there is no such line)?
486			*/
487			int pre_indent;
488
489			/*
490			* How many lines after the line following the split are blank?
491			*/
492			int post_blank;
493
494			/*
495			* How much is the nearest non-blank line after the line following the
496			* split indented (or -1 if there is no such line)?
497			*/
498			int post_indent;
499			};
500
501			struct split_score {
502			/* The effective indent of this split (smaller is preferred). */
503			int effective_indent;
504
505			/* Penalty for this split (smaller is preferred). */
506			int penalty;
507			};
508
509			/*
510			* Fill m with information about a hypothetical split of xdf above line split.
511			*/
512	0		static void measure_split(const xdfile_t *xdf, long split,
513			struct split_measurement *m)
514			{
515			long i;
516
517	0	0	if (split >= xdf->nrec) {
518	0		m->end_of_file = 1;
519	0		m->indent = -1;
520			} else {
521	0		m->end_of_file = 0;
522	0		m->indent = get_indent(xdf->recs[split]);
523			}
524
525	0		m->pre_blank = 0;
526	0		m->pre_indent = -1;
527	0	0	for (i = split - 1; i >= 0; i--) {
528	0		m->pre_indent = get_indent(xdf->recs[i]);
529	0	0	if (m->pre_indent != -1)
530	0		break;
531	0		m->pre_blank += 1;
532	0	0	if (m->pre_blank == MAX_BLANKS) {
533	0		m->pre_indent = 0;
534	0		break;
535			}
536			}
537
538	0		m->post_blank = 0;
539	0		m->post_indent = -1;
540	0	0	for (i = split + 1; i < xdf->nrec; i++) {
541	0		m->post_indent = get_indent(xdf->recs[i]);
542	0	0	if (m->post_indent != -1)
543	0		break;
544	0		m->post_blank += 1;
545	0	0	if (m->post_blank == MAX_BLANKS) {
546	0		m->post_indent = 0;
547	0		break;
548			}
549			}
550	0		}
551
552			/*
553			* The empirically-determined weight factors used by score_split() below.
554			* Larger values means that the position is a less favorable place to split.
555			*
556			* Note that scores are only ever compared against each other, so multiplying
557			* all of these weight/penalty values by the same factor wouldn't change the
558			* heuristic's behavior. Still, we need to set that arbitrary scale somehow.
559			* In practice, these numbers are chosen to be large enough that they can be
560			* adjusted relative to each other with sufficient precision despite using
561			* integer math.
562			*/
563
564			/* Penalty if there are no non-blank lines before the split */
565			#define START_OF_FILE_PENALTY 1
566
567			/* Penalty if there are no non-blank lines after the split */
568			#define END_OF_FILE_PENALTY 21
569
570			/* Multiplier for the number of blank lines around the split */
571			#define TOTAL_BLANK_WEIGHT (-30)
572
573			/* Multiplier for the number of blank lines after the split */
574			#define POST_BLANK_WEIGHT 6
575
576			/*
577			* Penalties applied if the line is indented more than its predecessor
578			*/
579			#define RELATIVE_INDENT_PENALTY (-4)
580			#define RELATIVE_INDENT_WITH_BLANK_PENALTY 10
581
582			/*
583			* Penalties applied if the line is indented less than both its predecessor and
584			* its successor
585			*/
586			#define RELATIVE_OUTDENT_PENALTY 24
587			#define RELATIVE_OUTDENT_WITH_BLANK_PENALTY 17
588
589			/*
590			* Penalties applied if the line is indented less than its predecessor but not
591			* less than its successor
592			*/
593			#define RELATIVE_DEDENT_PENALTY 23
594			#define RELATIVE_DEDENT_WITH_BLANK_PENALTY 17
595
596			/*
597			* We only consider whether the sum of the effective indents for splits are
598			* less than (-1), equal to (0), or greater than (+1) each other. The resulting
599			* value is multiplied by the following weight and combined with the penalty to
600			* determine the better of two scores.
601			*/
602			#define INDENT_WEIGHT 60
603
604			/*
605			* Compute a badness score for the hypothetical split whose measurements are
606			* stored in m. The weight factors were determined empirically using the tools and
607			* corpus described in
608			*
609			* https://github.com/mhagger/diff-slider-tools
610			*
611			* Also see that project if you want to improve the weights based on, for example,
612			* a larger or more diverse corpus.
613			*/
614	0		static void score_add_split(const struct split_measurement m, struct split_score s)
615			{
616			/*
617			* A place to accumulate penalty factors (positive makes this index more
618			* favored):
619			*/
620			int post_blank, total_blank, indent, any_blanks;
621
622	0	0	if (m->pre_indent == -1 && m->pre_blank == 0)
		0
623	0		s->penalty += START_OF_FILE_PENALTY;
624
625	0	0	if (m->end_of_file)
626	0		s->penalty += END_OF_FILE_PENALTY;
627
628			/*
629			* Set post_blank to the number of blank lines following the split,
630			* including the line immediately after the split:
631			*/
632	0	0	post_blank = (m->indent == -1) ? 1 + m->post_blank : 0;
633	0		total_blank = m->pre_blank + post_blank;
634
635			/* Penalties based on nearby blank lines: */
636	0		s->penalty += TOTAL_BLANK_WEIGHT * total_blank;
637	0		s->penalty += POST_BLANK_WEIGHT * post_blank;
638
639	0	0	if (m->indent != -1)
640	0		indent = m->indent;
641			else
642	0		indent = m->post_indent;
643
644	0		any_blanks = (total_blank != 0);
645
646			/* Note that the effective indent is -1 at the end of the file: */
647	0		s->effective_indent += indent;
648
649	0	0	if (indent == -1) {
650			/* No additional adjustments needed. */
651	0	0	} else if (m->pre_indent == -1) {
652			/* No additional adjustments needed. */
653	0	0	} else if (indent > m->pre_indent) {
654			/*
655			* The line is indented more than its predecessor.
656			*/
657	0		s->penalty += any_blanks ?
658	0	0	RELATIVE_INDENT_WITH_BLANK_PENALTY :
659			RELATIVE_INDENT_PENALTY;
660	0	0	} else if (indent == m->pre_indent) {
661			/*
662			* The line has the same indentation level as its predecessor.
663			* No additional adjustments needed.
664			*/
665			} else {
666			/*
667			* The line is indented less than its predecessor. It could be
668			* the block terminator of the previous block, but it could
669			* also be the start of a new block (e.g., an "else" block, or
670			* maybe the previous block didn't have a block terminator).
671			* Try to distinguish those cases based on what comes next:
672			*/
673	0	0	if (m->post_indent != -1 && m->post_indent > indent) {
		0
674			/*
675			* The following line is indented more. So it is likely
676			* that this line is the start of a block.
677			*/
678	0		s->penalty += any_blanks ?
679	0	0	RELATIVE_OUTDENT_WITH_BLANK_PENALTY :
680			RELATIVE_OUTDENT_PENALTY;
681			} else {
682			/*
683			* That was probably the end of a block.
684			*/
685	0		s->penalty += any_blanks ?
686	0	0	RELATIVE_DEDENT_WITH_BLANK_PENALTY :
687			RELATIVE_DEDENT_PENALTY;
688			}
689			}
690	0		}
691
692	0		static int score_cmp(struct split_score s1, struct split_score s2)
693			{
694			/* -1 if s1.effective_indent < s2->effective_indent, etc. */
695	0		int cmp_indents = ((s1->effective_indent > s2->effective_indent) -
696	0		(s1->effective_indent < s2->effective_indent));
697
698	0		return INDENT_WEIGHT * cmp_indents + (s1->penalty - s2->penalty);
699			}
700
701			/*
702			* Represent a group of changed lines in an xdfile_t (i.e., a contiguous group
703			* of lines that was inserted or deleted from the corresponding version of the
704			* file). We consider there to be such a group at the beginning of the file, at
705			* the end of the file, and between any two unchanged lines, though most such
706			* groups will usually be empty.
707			*
708			* If the first line in a group is equal to the line following the group, then
709			* the group can be slid down. Similarly, if the last line in a group is equal
710			* to the line preceding the group, then the group can be slid up. See
711			* group_slide_down() and group_slide_up().
712			*
713			* Note that loops that are testing for changed lines in xdf->rchg do not need
714			* index bounding since the array is prepared with a zero at position -1 and N.
715			*/
716			struct xdlgroup {
717			/*
718			* The index of the first changed line in the group, or the index of
719			* the unchanged line above which the (empty) group is located.
720			*/
721			long start;
722
723			/*
724			* The index of the first unchanged line after the group. For an empty
725			* group, end is equal to start.
726			*/
727			long end;
728			};
729
730			/*
731			* Initialize g to point at the first group in xdf.
732			*/
733	292		static void group_init(xdfile_t xdf, struct xdlgroup g)
734			{
735	292		g->start = g->end = 0;
736	516	100	while (xdf->rchg[g->end])
737	224		g->end++;
738	292		}
739
740			/*
741			* Move g to describe the next (possibly empty) group in xdf and return 0. If g
742			* is already at the end of the file, do nothing and return -1.
743			*/
744	366		XDL_INLINE(int) group_next(xdfile_t xdf, struct xdlgroup g)
745			{
746	366	100	if (g->end == xdf->nrec)
747	292		return -1;
748
749	74		g->start = g->end + 1;
750	111	100	for (g->end = g->start; xdf->rchg[g->end]; g->end++)
751			;
752
753	74		return 0;
754			}
755
756			/*
757			* Move g to describe the previous (possibly empty) group in xdf and return 0.
758			* If g is already at the beginning of the file, do nothing and return -1.
759			*/
760	6		XDL_INLINE(int) group_previous(xdfile_t xdf, struct xdlgroup g)
761			{
762	6	50	if (g->start == 0)
763	0		return -1;
764
765	6		g->end = g->start - 1;
766	9	100	for (g->start = g->end; xdf->rchg[g->start - 1]; g->start--)
767			;
768
769	6		return 0;
770			}
771
772			/*
773			* If g can be slid toward the end of the file, do so, and if it bumps into a
774			* following group, expand this group to include it. Return 0 on success or -1
775			* if g cannot be slid down.
776			*/
777	125		static int group_slide_down(xdfile_t xdf, struct xdlgroup g, long flags)
778			{
779	138		if (g->end < xdf->nrec &&
780	13		recs_match(xdf->recs[g->start], xdf->recs[g->end], flags)) {
781	6		xdf->rchg[g->start++] = 0;
782	6		xdf->rchg[g->end++] = 1;
783
784	6	50	while (xdf->rchg[g->end])
785	0		g->end++;
786
787	6		return 0;
788			} else {
789	119		return -1;
790			}
791			}
792
793			/*
794			* If g can be slid toward the beginning of the file, do so, and if it bumps
795			* into a previous group, expand this group to include it. Return 0 on success
796			* or -1 if g cannot be slid up.
797			*/
798	125		static int group_slide_up(xdfile_t xdf, struct xdlgroup g, long flags)
799			{
800	142		if (g->start > 0 &&
801	17		recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1], flags)) {
802	6		xdf->rchg[--g->start] = 1;
803	6		xdf->rchg[--g->end] = 0;
804
805	6	50	while (xdf->rchg[g->start - 1])
806	0		g->start--;
807
808	6		return 0;
809			} else {
810	119		return -1;
811			}
812			}
813
814	0		static void xdl_bug(const char *msg)
815			{
816	0		fprintf(stderr, "BUG: %s\n", msg);
817	0		exit(1);
818			}
819
820			/*
821			* Move back and forward change groups for a consistent and pretty diff output.
822			* This also helps in finding joinable change groups and reducing the diff
823			* size.
824			*/
825	146		int xdl_change_compact(xdfile_t xdf, xdfile_t xdfo, long flags) {
826			struct xdlgroup g, go;
827			long earliest_end, end_matching_other;
828			long groupsize;
829
830	146		group_init(xdf, &g);
831	146		group_init(xdfo, &go);
832
833			while (1) {
834			/* If the group is empty in the to-be-compacted file, skip it: */
835	180	100	if (g.end == g.start)
836	61		goto next;
837
838			/*
839			* Now shift the change up and then down as far as possible in
840			* each direction. If it bumps into any other changes, merge them.
841			*/
842			do {
843	119		groupsize = g.end - g.start;
844
845			/*
846			* Keep track of the last "end" index that causes this
847			* group to align with a group of changed lines in the
848			* other file. -1 indicates that we haven't found such
849			* a match yet:
850			*/
851	119		end_matching_other = -1;
852
853			/* Shift the group backward as much as possible: */
854	123	100	while (!group_slide_up(xdf, &g, flags))
855	4	50	if (group_previous(xdfo, &go))
856	0		xdl_bug("group sync broken sliding up");
857
858			/*
859			* This is this highest that this group can be shifted.
860			* Record its end index:
861			*/
862	119		earliest_end = g.end;
863
864	119	100	if (go.end > go.start)
865	87		end_matching_other = g.end;
866
867			/* Now shift the group forward as far as possible: */
868			while (1) {
869	125	100	if (group_slide_down(xdf, &g, flags))
870	119		break;
871	6	50	if (group_next(xdfo, &go))
872	0		xdl_bug("group sync broken sliding down");
873
874	6	100	if (go.end > go.start)
875	3		end_matching_other = g.end;
876	6		}
877	119	50	} while (groupsize != g.end - g.start);
878
879			/*
880			* If the group can be shifted, then we can possibly use this
881			* freedom to produce a more intuitive diff.
882			*
883			* The group is currently shifted as far down as possible, so the
884			* heuristics below only have to handle upwards shifts.
885			*/
886
887	119	100	if (g.end == earliest_end) {
888			/* no shifting was possible */
889	2	50	} else if (end_matching_other != -1) {
890			/*
891			* Move the possibly merged group of changes back to line
892			* up with the last group of changes from the other file
893			* that it can align with.
894			*/
895	4	100	while (go.end == go.start) {
896	2	50	if (group_slide_up(xdf, &g, flags))
897	0		xdl_bug("match disappeared");
898	2	50	if (group_previous(xdfo, &go))
899	0		xdl_bug("group sync broken sliding to match");
900			}
901	0	0	} else if (flags & XDF_INDENT_HEURISTIC) {
902			/*
903			* Indent heuristic: a group of pure add/delete lines
904			* implies two splits, one between the end of the "before"
905			* context and the start of the group, and another between
906			* the end of the group and the beginning of the "after"
907			* context. Some splits are aesthetically better and some
908			* are worse. We compute a badness "score" for each split,
909			* and add the scores for the two splits to define a
910			* "score" for each position that the group can be shifted
911			* to. Then we pick the shift with the lowest score.
912			*/
913	0		long shift, best_shift = -1;
914			struct split_score best_score;
915
916	0	0	for (shift = earliest_end; shift <= g.end; shift++) {
917			struct split_measurement m;
918	0		struct split_score score = {0, 0};
919
920	0		measure_split(xdf, shift, &m);
921	0		score_add_split(&m, &score);
922	0		measure_split(xdf, shift - groupsize, &m);
923	0		score_add_split(&m, &score);
924	0		if (best_shift == -1 \|\|
925	0		score_cmp(&score, &best_score) <= 0) {
926	0		best_score.effective_indent = score.effective_indent;
927	0		best_score.penalty = score.penalty;
928	0		best_shift = shift;
929			}
930			}
931
932	0	0	while (g.end > best_shift) {
933	0	0	if (group_slide_up(xdf, &g, flags))
934	0		xdl_bug("best shift unreached");
935	0	0	if (group_previous(xdfo, &go))
936	0		xdl_bug("group sync broken sliding to blank line");
937			}
938			}
939
940			next:
941			/* Move past the just-processed group: */
942	180	100	if (group_next(xdf, &g))
943	146		break;
944	34	50	if (group_next(xdfo, &go))
945	0		xdl_bug("group sync broken moving to next group");
946	34		}
947
948	146	50	if (!group_next(xdfo, &go))
949	0		xdl_bug("group sync broken at end of file");
950
951	146		return 0;
952			}
953
954
955	73		int xdl_build_script(xdfenv_t xe, xdchange_t *xscr) {
956	73		xdchange_t cscr = NULL, xch;
957	73		char rchg1 = xe->xdf1.rchg, rchg2 = xe->xdf2.rchg;
958			long i1, i2, l1, l2;
959
960			/*
961			* Trivial. Collects "groups" of changes and creates an edit script.
962			*/
963	163	100	for (i1 = xe->xdf1.nrec, i2 = xe->xdf2.nrec; i1 >= 0 \|\| i2 >= 0; i1--, i2--)
		50
964	90	100	if (rchg1[i1 - 1] \|\| rchg2[i2 - 1]) {
		100
965	124	100	for (l1 = i1; rchg1[i1 - 1]; i1--);
966	155	100	for (l2 = i2; rchg2[i2 - 1]; i2--);
967
968	75	50	if (!(xch = xdl_add_change(cscr, i1, i2, l1 - i1, l2 - i2))) {
969	0		xdl_free_script(cscr);
970	0		return -1;
971			}
972	75		cscr = xch;
973			}
974
975	73		*xscr = cscr;
976
977	73		return 0;
978			}
979
980
981	73		void xdl_free_script(xdchange_t *xscr) {
982			xdchange_t *xch;
983
984	148	100	while ((xch = xscr) != NULL) {
985	75		xscr = xscr->next;
986	75		xdl_free(xch);
987			}
988	73		}
989
990	2		static int xdl_call_hunk_func(xdfenv_t xe, xdchange_t xscr, xdemitcb_t *ecb,
991			xdemitconf_t const *xecfg)
992			{
993			xdchange_t xch, xche;
994
995			(void)xe;
996
997	4	100	for (xch = xscr; xch; xch = xche->next) {
998	2		xche = xdl_get_hunk(&xch, xecfg);
999	2	50	if (!xch)
1000	0		break;
1001	2	50	if (xecfg->hunk_func(xch->i1, xche->i1 + xche->chg1 - xch->i1,
1002	4		xch->i2, xche->i2 + xche->chg2 - xch->i2,
1003			ecb->priv) < 0)
1004	0		return -1;
1005			}
1006	2		return 0;
1007			}
1008
1009	0		static void xdl_mark_ignorable(xdchange_t xscr, xdfenv_t xe, long flags)
1010			{
1011			xdchange_t *xch;
1012
1013	0	0	for (xch = xscr; xch; xch = xch->next) {
1014	0		int ignore = 1;
1015			xrecord_t **rec;
1016			long i;
1017
1018	0		rec = &xe->xdf1.recs[xch->i1];
1019	0	0	for (i = 0; i < xch->chg1 && ignore; i++)
		0
1020	0		ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
1021
1022	0		rec = &xe->xdf2.recs[xch->i2];
1023	0	0	for (i = 0; i < xch->chg2 && ignore; i++)
		0
1024	0		ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
1025
1026	0		xch->ignore = ignore;
1027			}
1028	0		}
1029
1030	40		int xdl_diff(mmfile_t mf1, mmfile_t mf2, xpparam_t const *xpp,
1031			xdemitconf_t const xecfg, xdemitcb_t ecb) {
1032			xdchange_t *xscr;
1033			xdfenv_t xe;
1034	40	100	emit_func_t ef = xecfg->hunk_func ? xdl_call_hunk_func : xdl_emit_diff;
1035
1036	40	50	if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0) {
1037
1038	0		return -1;
1039			}
1040	80		if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 \|\|
1041	80	50	xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 \|\|
1042	40		xdl_build_script(&xe, &xscr) < 0) {
1043
1044	0		xdl_free_env(&xe);
1045	0		return -1;
1046			}
1047	40	50	if (xscr) {
1048	40	50	if (xpp->flags & XDF_IGNORE_BLANK_LINES)
1049	0		xdl_mark_ignorable(xscr, &xe, xpp->flags);
1050
1051	40	50	if (ef(&xe, xscr, ecb, xecfg) < 0) {
1052
1053	0		xdl_free_script(xscr);
1054	0		xdl_free_env(&xe);
1055	0		return -1;
1056			}
1057	40		xdl_free_script(xscr);
1058			}
1059	40		xdl_free_env(&xe);
1060
1061	40		return 0;
1062			}