line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
/* |
2
|
|
|
|
|
|
|
* Copyright (C) the libgit2 contributors. All rights reserved. |
3
|
|
|
|
|
|
|
* |
4
|
|
|
|
|
|
|
* This file is part of libgit2, distributed under the GNU GPL v2 with |
5
|
|
|
|
|
|
|
* a Linking Exception. For full terms see the included COPYING file. |
6
|
|
|
|
|
|
|
*/ |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
#include "diff_tform.h" |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
#include "git2/config.h" |
11
|
|
|
|
|
|
|
#include "git2/blob.h" |
12
|
|
|
|
|
|
|
#include "git2/sys/hashsig.h" |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
#include "diff.h" |
15
|
|
|
|
|
|
|
#include "diff_generate.h" |
16
|
|
|
|
|
|
|
#include "path.h" |
17
|
|
|
|
|
|
|
#include "futils.h" |
18
|
|
|
|
|
|
|
#include "config.h" |
19
|
|
|
|
|
|
|
|
20
|
27
|
|
|
|
|
|
git_diff_delta *git_diff__delta_dup( |
21
|
|
|
|
|
|
|
const git_diff_delta *d, git_pool *pool) |
22
|
|
|
|
|
|
|
{ |
23
|
27
|
|
|
|
|
|
git_diff_delta *delta = git__malloc(sizeof(git_diff_delta)); |
24
|
27
|
50
|
|
|
|
|
if (!delta) |
25
|
0
|
|
|
|
|
|
return NULL; |
26
|
|
|
|
|
|
|
|
27
|
27
|
|
|
|
|
|
memcpy(delta, d, sizeof(git_diff_delta)); |
28
|
27
|
|
|
|
|
|
GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags); |
29
|
|
|
|
|
|
|
|
30
|
27
|
50
|
|
|
|
|
if (d->old_file.path != NULL) { |
31
|
27
|
|
|
|
|
|
delta->old_file.path = git_pool_strdup(pool, d->old_file.path); |
32
|
27
|
50
|
|
|
|
|
if (delta->old_file.path == NULL) |
33
|
0
|
|
|
|
|
|
goto fail; |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
|
36
|
27
|
50
|
|
|
|
|
if (d->new_file.path != d->old_file.path && d->new_file.path != NULL) { |
|
|
0
|
|
|
|
|
|
37
|
0
|
|
|
|
|
|
delta->new_file.path = git_pool_strdup(pool, d->new_file.path); |
38
|
0
|
0
|
|
|
|
|
if (delta->new_file.path == NULL) |
39
|
0
|
|
|
|
|
|
goto fail; |
40
|
|
|
|
|
|
|
} else { |
41
|
27
|
|
|
|
|
|
delta->new_file.path = delta->old_file.path; |
42
|
|
|
|
|
|
|
} |
43
|
|
|
|
|
|
|
|
44
|
27
|
|
|
|
|
|
return delta; |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
fail: |
47
|
0
|
|
|
|
|
|
git__free(delta); |
48
|
0
|
|
|
|
|
|
return NULL; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
|
51
|
3
|
|
|
|
|
|
git_diff_delta *git_diff__merge_like_cgit( |
52
|
|
|
|
|
|
|
const git_diff_delta *a, |
53
|
|
|
|
|
|
|
const git_diff_delta *b, |
54
|
|
|
|
|
|
|
git_pool *pool) |
55
|
|
|
|
|
|
|
{ |
56
|
|
|
|
|
|
|
git_diff_delta *dup; |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
/* Emulate C git for merging two diffs (a la 'git diff '). |
59
|
|
|
|
|
|
|
* |
60
|
|
|
|
|
|
|
* When C git does a diff between the work dir and a tree, it actually |
61
|
|
|
|
|
|
|
* diffs with the index but uses the workdir contents. This emulates |
62
|
|
|
|
|
|
|
* those choices so we can emulate the type of diff. |
63
|
|
|
|
|
|
|
* |
64
|
|
|
|
|
|
|
* We have three file descriptions here, let's call them: |
65
|
|
|
|
|
|
|
* f1 = a->old_file |
66
|
|
|
|
|
|
|
* f2 = a->new_file AND b->old_file |
67
|
|
|
|
|
|
|
* f3 = b->new_file |
68
|
|
|
|
|
|
|
*/ |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
/* If one of the diffs is a conflict, just dup it */ |
71
|
3
|
50
|
|
|
|
|
if (b->status == GIT_DELTA_CONFLICTED) |
72
|
0
|
|
|
|
|
|
return git_diff__delta_dup(b, pool); |
73
|
3
|
50
|
|
|
|
|
if (a->status == GIT_DELTA_CONFLICTED) |
74
|
0
|
|
|
|
|
|
return git_diff__delta_dup(a, pool); |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
/* if f2 == f3 or f2 is deleted, then just dup the 'a' diff */ |
77
|
3
|
50
|
|
|
|
|
if (b->status == GIT_DELTA_UNMODIFIED || a->status == GIT_DELTA_DELETED) |
|
|
50
|
|
|
|
|
|
78
|
0
|
|
|
|
|
|
return git_diff__delta_dup(a, pool); |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
/* otherwise, base this diff on the 'b' diff */ |
81
|
3
|
50
|
|
|
|
|
if ((dup = git_diff__delta_dup(b, pool)) == NULL) |
82
|
0
|
|
|
|
|
|
return NULL; |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
/* If 'a' status is uninteresting, then we're done */ |
85
|
3
|
50
|
|
|
|
|
if (a->status == GIT_DELTA_UNMODIFIED || |
|
|
50
|
|
|
|
|
|
86
|
3
|
50
|
|
|
|
|
a->status == GIT_DELTA_UNTRACKED || |
87
|
3
|
|
|
|
|
|
a->status == GIT_DELTA_UNREADABLE) |
88
|
0
|
|
|
|
|
|
return dup; |
89
|
|
|
|
|
|
|
|
90
|
3
|
50
|
|
|
|
|
assert(b->status != GIT_DELTA_UNMODIFIED); |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
/* A cgit exception is that the diff of a file that is only in the |
93
|
|
|
|
|
|
|
* index (i.e. not in HEAD nor workdir) is given as empty. |
94
|
|
|
|
|
|
|
*/ |
95
|
3
|
50
|
|
|
|
|
if (dup->status == GIT_DELTA_DELETED) { |
96
|
0
|
0
|
|
|
|
|
if (a->status == GIT_DELTA_ADDED) { |
97
|
0
|
|
|
|
|
|
dup->status = GIT_DELTA_UNMODIFIED; |
98
|
0
|
|
|
|
|
|
dup->nfiles = 2; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
/* else don't overwrite DELETE status */ |
101
|
|
|
|
|
|
|
} else { |
102
|
3
|
|
|
|
|
|
dup->status = a->status; |
103
|
3
|
|
|
|
|
|
dup->nfiles = a->nfiles; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
3
|
|
|
|
|
|
git_oid_cpy(&dup->old_file.id, &a->old_file.id); |
107
|
3
|
|
|
|
|
|
dup->old_file.mode = a->old_file.mode; |
108
|
3
|
|
|
|
|
|
dup->old_file.size = a->old_file.size; |
109
|
3
|
|
|
|
|
|
dup->old_file.flags = a->old_file.flags; |
110
|
|
|
|
|
|
|
|
111
|
3
|
|
|
|
|
|
return dup; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
6
|
|
|
|
|
|
int git_diff__merge( |
115
|
|
|
|
|
|
|
git_diff *onto, const git_diff *from, git_diff__merge_cb cb) |
116
|
|
|
|
|
|
|
{ |
117
|
6
|
|
|
|
|
|
int error = 0; |
118
|
|
|
|
|
|
|
git_pool onto_pool; |
119
|
|
|
|
|
|
|
git_vector onto_new; |
120
|
|
|
|
|
|
|
git_diff_delta *delta; |
121
|
|
|
|
|
|
|
bool ignore_case, reversed; |
122
|
|
|
|
|
|
|
unsigned int i, j; |
123
|
|
|
|
|
|
|
|
124
|
6
|
50
|
|
|
|
|
assert(onto && from); |
|
|
50
|
|
|
|
|
|
125
|
|
|
|
|
|
|
|
126
|
6
|
100
|
|
|
|
|
if (!from->deltas.length) |
127
|
2
|
|
|
|
|
|
return 0; |
128
|
|
|
|
|
|
|
|
129
|
4
|
|
|
|
|
|
ignore_case = ((onto->opts.flags & GIT_DIFF_IGNORE_CASE) != 0); |
130
|
4
|
|
|
|
|
|
reversed = ((onto->opts.flags & GIT_DIFF_REVERSE) != 0); |
131
|
|
|
|
|
|
|
|
132
|
4
|
50
|
|
|
|
|
if (ignore_case != ((from->opts.flags & GIT_DIFF_IGNORE_CASE) != 0) || |
|
|
50
|
|
|
|
|
|
133
|
4
|
|
|
|
|
|
reversed != ((from->opts.flags & GIT_DIFF_REVERSE) != 0)) { |
134
|
0
|
|
|
|
|
|
git_error_set(GIT_ERROR_INVALID, |
135
|
|
|
|
|
|
|
"attempt to merge diffs created with conflicting options"); |
136
|
0
|
|
|
|
|
|
return -1; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
4
|
50
|
|
|
|
|
if (git_vector_init(&onto_new, onto->deltas.length, git_diff_delta__cmp) < 0) |
140
|
0
|
|
|
|
|
|
return -1; |
141
|
|
|
|
|
|
|
|
142
|
4
|
|
|
|
|
|
git_pool_init(&onto_pool, 1); |
143
|
|
|
|
|
|
|
|
144
|
31
|
100
|
|
|
|
|
for (i = 0, j = 0; i < onto->deltas.length || j < from->deltas.length; ) { |
|
|
100
|
|
|
|
|
|
145
|
27
|
100
|
|
|
|
|
git_diff_delta *o = GIT_VECTOR_GET(&onto->deltas, i); |
146
|
27
|
50
|
|
|
|
|
const git_diff_delta *f = GIT_VECTOR_GET(&from->deltas, j); |
147
|
45
|
50
|
|
|
|
|
int cmp = !f ? -1 : !o ? 1 : |
|
|
100
|
|
|
|
|
|
148
|
18
|
50
|
|
|
|
|
STRCMP_CASESELECT(ignore_case, o->old_file.path, f->old_file.path); |
149
|
|
|
|
|
|
|
|
150
|
27
|
50
|
|
|
|
|
if (cmp < 0) { |
151
|
0
|
|
|
|
|
|
delta = git_diff__delta_dup(o, &onto_pool); |
152
|
0
|
|
|
|
|
|
i++; |
153
|
27
|
100
|
|
|
|
|
} else if (cmp > 0) { |
154
|
24
|
|
|
|
|
|
delta = git_diff__delta_dup(f, &onto_pool); |
155
|
24
|
|
|
|
|
|
j++; |
156
|
|
|
|
|
|
|
} else { |
157
|
3
|
50
|
|
|
|
|
const git_diff_delta *left = reversed ? f : o; |
158
|
3
|
50
|
|
|
|
|
const git_diff_delta *right = reversed ? o : f; |
159
|
|
|
|
|
|
|
|
160
|
3
|
|
|
|
|
|
delta = cb(left, right, &onto_pool); |
161
|
3
|
|
|
|
|
|
i++; |
162
|
3
|
|
|
|
|
|
j++; |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
/* the ignore rules for the target may not match the source |
166
|
|
|
|
|
|
|
* or the result of a merged delta could be skippable... |
167
|
|
|
|
|
|
|
*/ |
168
|
27
|
50
|
|
|
|
|
if (delta && git_diff_delta__should_skip(&onto->opts, delta)) { |
|
|
50
|
|
|
|
|
|
169
|
0
|
|
|
|
|
|
git__free(delta); |
170
|
0
|
|
|
|
|
|
continue; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
27
|
50
|
|
|
|
|
if ((error = !delta ? -1 : git_vector_insert(&onto_new, delta)) < 0) |
|
|
50
|
|
|
|
|
|
174
|
0
|
|
|
|
|
|
break; |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
4
|
50
|
|
|
|
|
if (!error) { |
178
|
4
|
|
|
|
|
|
git_vector_swap(&onto->deltas, &onto_new); |
179
|
4
|
|
|
|
|
|
git_pool_swap(&onto->pool, &onto_pool); |
180
|
|
|
|
|
|
|
|
181
|
4
|
50
|
|
|
|
|
if ((onto->opts.flags & GIT_DIFF_REVERSE) != 0) |
182
|
0
|
|
|
|
|
|
onto->old_src = from->old_src; |
183
|
|
|
|
|
|
|
else |
184
|
4
|
|
|
|
|
|
onto->new_src = from->new_src; |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
/* prefix strings also come from old pool, so recreate those.*/ |
187
|
4
|
|
|
|
|
|
onto->opts.old_prefix = |
188
|
4
|
|
|
|
|
|
git_pool_strdup_safe(&onto->pool, onto->opts.old_prefix); |
189
|
4
|
|
|
|
|
|
onto->opts.new_prefix = |
190
|
4
|
|
|
|
|
|
git_pool_strdup_safe(&onto->pool, onto->opts.new_prefix); |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
4
|
|
|
|
|
|
git_vector_free_deep(&onto_new); |
194
|
4
|
|
|
|
|
|
git_pool_clear(&onto_pool); |
195
|
|
|
|
|
|
|
|
196
|
6
|
|
|
|
|
|
return error; |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
|
199
|
0
|
|
|
|
|
|
int git_diff_merge(git_diff *onto, const git_diff *from) |
200
|
|
|
|
|
|
|
{ |
201
|
0
|
|
|
|
|
|
return git_diff__merge(onto, from, git_diff__merge_like_cgit); |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
|
204
|
5
|
|
|
|
|
|
int git_diff_find_similar__hashsig_for_file( |
205
|
|
|
|
|
|
|
void **out, const git_diff_file *f, const char *path, void *p) |
206
|
|
|
|
|
|
|
{ |
207
|
5
|
|
|
|
|
|
git_hashsig_option_t opt = (git_hashsig_option_t)(intptr_t)p; |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
GIT_UNUSED(f); |
210
|
5
|
|
|
|
|
|
return git_hashsig_create_fromfile((git_hashsig **)out, path, opt); |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
|
213
|
6
|
|
|
|
|
|
int git_diff_find_similar__hashsig_for_buf( |
214
|
|
|
|
|
|
|
void **out, const git_diff_file *f, const char *buf, size_t len, void *p) |
215
|
|
|
|
|
|
|
{ |
216
|
6
|
|
|
|
|
|
git_hashsig_option_t opt = (git_hashsig_option_t)(intptr_t)p; |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
GIT_UNUSED(f); |
219
|
6
|
|
|
|
|
|
return git_hashsig_create((git_hashsig **)out, buf, len, opt); |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
11
|
|
|
|
|
|
void git_diff_find_similar__hashsig_free(void *sig, void *payload) |
223
|
|
|
|
|
|
|
{ |
224
|
|
|
|
|
|
|
GIT_UNUSED(payload); |
225
|
11
|
|
|
|
|
|
git_hashsig_free(sig); |
226
|
11
|
|
|
|
|
|
} |
227
|
|
|
|
|
|
|
|
228
|
7
|
|
|
|
|
|
int git_diff_find_similar__calc_similarity( |
229
|
|
|
|
|
|
|
int *score, void *siga, void *sigb, void *payload) |
230
|
|
|
|
|
|
|
{ |
231
|
|
|
|
|
|
|
int error; |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
GIT_UNUSED(payload); |
234
|
7
|
|
|
|
|
|
error = git_hashsig_compare(siga, sigb); |
235
|
7
|
50
|
|
|
|
|
if (error < 0) |
236
|
0
|
|
|
|
|
|
return error; |
237
|
|
|
|
|
|
|
|
238
|
7
|
|
|
|
|
|
*score = error; |
239
|
7
|
|
|
|
|
|
return 0; |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
#define DEFAULT_THRESHOLD 50 |
243
|
|
|
|
|
|
|
#define DEFAULT_BREAK_REWRITE_THRESHOLD 60 |
244
|
|
|
|
|
|
|
#define DEFAULT_RENAME_LIMIT 200 |
245
|
|
|
|
|
|
|
|
246
|
11
|
|
|
|
|
|
static int normalize_find_opts( |
247
|
|
|
|
|
|
|
git_diff *diff, |
248
|
|
|
|
|
|
|
git_diff_find_options *opts, |
249
|
|
|
|
|
|
|
const git_diff_find_options *given) |
250
|
|
|
|
|
|
|
{ |
251
|
11
|
|
|
|
|
|
git_config *cfg = NULL; |
252
|
|
|
|
|
|
|
git_hashsig_option_t hashsig_opts; |
253
|
|
|
|
|
|
|
|
254
|
11
|
50
|
|
|
|
|
GIT_ERROR_CHECK_VERSION(given, GIT_DIFF_FIND_OPTIONS_VERSION, "git_diff_find_options"); |
255
|
|
|
|
|
|
|
|
256
|
22
|
|
|
|
|
|
if (diff->repo != NULL && |
257
|
11
|
|
|
|
|
|
git_repository_config__weakptr(&cfg, diff->repo) < 0) |
258
|
0
|
|
|
|
|
|
return -1; |
259
|
|
|
|
|
|
|
|
260
|
11
|
50
|
|
|
|
|
if (given) |
261
|
11
|
|
|
|
|
|
memcpy(opts, given, sizeof(*opts)); |
262
|
|
|
|
|
|
|
|
263
|
11
|
50
|
|
|
|
|
if (!given || |
|
|
100
|
|
|
|
|
|
264
|
11
|
|
|
|
|
|
(given->flags & GIT_DIFF_FIND_ALL) == GIT_DIFF_FIND_BY_CONFIG) |
265
|
|
|
|
|
|
|
{ |
266
|
2
|
50
|
|
|
|
|
if (cfg) { |
267
|
2
|
|
|
|
|
|
char *rule = |
268
|
2
|
|
|
|
|
|
git_config__get_string_force(cfg, "diff.renames", "true"); |
269
|
|
|
|
|
|
|
int boolval; |
270
|
|
|
|
|
|
|
|
271
|
2
|
50
|
|
|
|
|
if (!git__parse_bool(&boolval, rule) && !boolval) |
|
|
50
|
|
|
|
|
|
272
|
|
|
|
|
|
|
/* don't set FIND_RENAMES if bool value is false */; |
273
|
2
|
50
|
|
|
|
|
else if (!strcasecmp(rule, "copies") || !strcasecmp(rule, "copy")) |
|
|
50
|
|
|
|
|
|
274
|
0
|
|
|
|
|
|
opts->flags |= GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES; |
275
|
|
|
|
|
|
|
else |
276
|
2
|
|
|
|
|
|
opts->flags |= GIT_DIFF_FIND_RENAMES; |
277
|
|
|
|
|
|
|
|
278
|
2
|
|
|
|
|
|
git__free(rule); |
279
|
|
|
|
|
|
|
} else { |
280
|
|
|
|
|
|
|
/* set default flag */ |
281
|
0
|
|
|
|
|
|
opts->flags |= GIT_DIFF_FIND_RENAMES; |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
/* some flags imply others */ |
286
|
|
|
|
|
|
|
|
287
|
11
|
50
|
|
|
|
|
if (opts->flags & GIT_DIFF_FIND_EXACT_MATCH_ONLY) { |
288
|
|
|
|
|
|
|
/* if we are only looking for exact matches, then don't turn |
289
|
|
|
|
|
|
|
* MODIFIED items into ADD/DELETE pairs because it's too picky |
290
|
|
|
|
|
|
|
*/ |
291
|
0
|
|
|
|
|
|
opts->flags &= ~(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES); |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
/* similarly, don't look for self-rewrites to split */ |
294
|
0
|
|
|
|
|
|
opts->flags &= ~GIT_DIFF_FIND_RENAMES_FROM_REWRITES; |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
|
297
|
11
|
50
|
|
|
|
|
if (opts->flags & GIT_DIFF_FIND_RENAMES_FROM_REWRITES) |
298
|
0
|
|
|
|
|
|
opts->flags |= GIT_DIFF_FIND_RENAMES; |
299
|
|
|
|
|
|
|
|
300
|
11
|
50
|
|
|
|
|
if (opts->flags & GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED) |
301
|
0
|
|
|
|
|
|
opts->flags |= GIT_DIFF_FIND_COPIES; |
302
|
|
|
|
|
|
|
|
303
|
11
|
100
|
|
|
|
|
if (opts->flags & GIT_DIFF_BREAK_REWRITES) |
304
|
1
|
|
|
|
|
|
opts->flags |= GIT_DIFF_FIND_REWRITES; |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
#define USE_DEFAULT(X) ((X) == 0 || (X) > 100) |
307
|
|
|
|
|
|
|
|
308
|
11
|
100
|
|
|
|
|
if (USE_DEFAULT(opts->rename_threshold)) |
|
|
50
|
|
|
|
|
|
309
|
10
|
|
|
|
|
|
opts->rename_threshold = DEFAULT_THRESHOLD; |
310
|
|
|
|
|
|
|
|
311
|
11
|
100
|
|
|
|
|
if (USE_DEFAULT(opts->rename_from_rewrite_threshold)) |
|
|
50
|
|
|
|
|
|
312
|
10
|
|
|
|
|
|
opts->rename_from_rewrite_threshold = DEFAULT_THRESHOLD; |
313
|
|
|
|
|
|
|
|
314
|
11
|
100
|
|
|
|
|
if (USE_DEFAULT(opts->copy_threshold)) |
|
|
50
|
|
|
|
|
|
315
|
10
|
|
|
|
|
|
opts->copy_threshold = DEFAULT_THRESHOLD; |
316
|
|
|
|
|
|
|
|
317
|
11
|
100
|
|
|
|
|
if (USE_DEFAULT(opts->break_rewrite_threshold)) |
|
|
50
|
|
|
|
|
|
318
|
10
|
|
|
|
|
|
opts->break_rewrite_threshold = DEFAULT_BREAK_REWRITE_THRESHOLD; |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
#undef USE_DEFAULT |
321
|
|
|
|
|
|
|
|
322
|
11
|
100
|
|
|
|
|
if (!opts->rename_limit) { |
323
|
10
|
50
|
|
|
|
|
if (cfg) { |
324
|
10
|
|
|
|
|
|
opts->rename_limit = git_config__get_int_force( |
325
|
|
|
|
|
|
|
cfg, "diff.renamelimit", DEFAULT_RENAME_LIMIT); |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
|
328
|
10
|
50
|
|
|
|
|
if (opts->rename_limit <= 0) |
329
|
0
|
|
|
|
|
|
opts->rename_limit = DEFAULT_RENAME_LIMIT; |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
/* assign the internal metric with whitespace flag as payload */ |
333
|
11
|
50
|
|
|
|
|
if (!opts->metric) { |
334
|
11
|
|
|
|
|
|
opts->metric = git__malloc(sizeof(git_diff_similarity_metric)); |
335
|
11
|
50
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC(opts->metric); |
336
|
|
|
|
|
|
|
|
337
|
11
|
|
|
|
|
|
opts->metric->file_signature = git_diff_find_similar__hashsig_for_file; |
338
|
11
|
|
|
|
|
|
opts->metric->buffer_signature = git_diff_find_similar__hashsig_for_buf; |
339
|
11
|
|
|
|
|
|
opts->metric->free_signature = git_diff_find_similar__hashsig_free; |
340
|
11
|
|
|
|
|
|
opts->metric->similarity = git_diff_find_similar__calc_similarity; |
341
|
|
|
|
|
|
|
|
342
|
11
|
100
|
|
|
|
|
if (opts->flags & GIT_DIFF_FIND_IGNORE_WHITESPACE) |
343
|
1
|
|
|
|
|
|
hashsig_opts = GIT_HASHSIG_IGNORE_WHITESPACE; |
344
|
10
|
50
|
|
|
|
|
else if (opts->flags & GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE) |
345
|
0
|
|
|
|
|
|
hashsig_opts = GIT_HASHSIG_NORMAL; |
346
|
|
|
|
|
|
|
else |
347
|
10
|
|
|
|
|
|
hashsig_opts = GIT_HASHSIG_SMART_WHITESPACE; |
348
|
11
|
|
|
|
|
|
hashsig_opts |= GIT_HASHSIG_ALLOW_SMALL_FILES; |
349
|
11
|
|
|
|
|
|
opts->metric->payload = (void *)hashsig_opts; |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
|
352
|
11
|
|
|
|
|
|
return 0; |
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
|
355
|
0
|
|
|
|
|
|
static int insert_delete_side_of_split( |
356
|
|
|
|
|
|
|
git_diff *diff, git_vector *onto, const git_diff_delta *delta) |
357
|
|
|
|
|
|
|
{ |
358
|
|
|
|
|
|
|
/* make new record for DELETED side of split */ |
359
|
0
|
|
|
|
|
|
git_diff_delta *deleted = git_diff__delta_dup(delta, &diff->pool); |
360
|
0
|
0
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC(deleted); |
361
|
|
|
|
|
|
|
|
362
|
0
|
|
|
|
|
|
deleted->status = GIT_DELTA_DELETED; |
363
|
0
|
|
|
|
|
|
deleted->nfiles = 1; |
364
|
0
|
|
|
|
|
|
memset(&deleted->new_file, 0, sizeof(deleted->new_file)); |
365
|
0
|
|
|
|
|
|
deleted->new_file.path = deleted->old_file.path; |
366
|
0
|
|
|
|
|
|
deleted->new_file.flags |= GIT_DIFF_FLAG_VALID_ID; |
367
|
|
|
|
|
|
|
|
368
|
0
|
|
|
|
|
|
return git_vector_insert(onto, deleted); |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
|
371
|
6
|
|
|
|
|
|
static int apply_splits_and_deletes( |
372
|
|
|
|
|
|
|
git_diff *diff, size_t expected_size, bool actually_split) |
373
|
|
|
|
|
|
|
{ |
374
|
6
|
|
|
|
|
|
git_vector onto = GIT_VECTOR_INIT; |
375
|
|
|
|
|
|
|
size_t i; |
376
|
|
|
|
|
|
|
git_diff_delta *delta; |
377
|
|
|
|
|
|
|
|
378
|
6
|
50
|
|
|
|
|
if (git_vector_init(&onto, expected_size, git_diff_delta__cmp) < 0) |
379
|
0
|
|
|
|
|
|
return -1; |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
/* build new delta list without TO_DELETE and splitting TO_SPLIT */ |
382
|
21
|
100
|
|
|
|
|
git_vector_foreach(&diff->deltas, i, delta) { |
383
|
15
|
100
|
|
|
|
|
if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0) |
384
|
6
|
|
|
|
|
|
continue; |
385
|
|
|
|
|
|
|
|
386
|
9
|
50
|
|
|
|
|
if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0 && actually_split) { |
|
|
0
|
|
|
|
|
|
387
|
0
|
|
|
|
|
|
delta->similarity = 0; |
388
|
|
|
|
|
|
|
|
389
|
0
|
0
|
|
|
|
|
if (insert_delete_side_of_split(diff, &onto, delta) < 0) |
390
|
0
|
|
|
|
|
|
goto on_error; |
391
|
|
|
|
|
|
|
|
392
|
0
|
0
|
|
|
|
|
if (diff->new_src == GIT_ITERATOR_WORKDIR) |
393
|
0
|
|
|
|
|
|
delta->status = GIT_DELTA_UNTRACKED; |
394
|
|
|
|
|
|
|
else |
395
|
0
|
|
|
|
|
|
delta->status = GIT_DELTA_ADDED; |
396
|
0
|
|
|
|
|
|
delta->nfiles = 1; |
397
|
0
|
|
|
|
|
|
memset(&delta->old_file, 0, sizeof(delta->old_file)); |
398
|
0
|
|
|
|
|
|
delta->old_file.path = delta->new_file.path; |
399
|
0
|
|
|
|
|
|
delta->old_file.flags |= GIT_DIFF_FLAG_VALID_ID; |
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
/* clean up delta before inserting into new list */ |
403
|
9
|
|
|
|
|
|
GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags); |
404
|
|
|
|
|
|
|
|
405
|
9
|
50
|
|
|
|
|
if (delta->status != GIT_DELTA_COPIED && |
|
|
100
|
|
|
|
|
|
406
|
3
|
50
|
|
|
|
|
delta->status != GIT_DELTA_RENAMED && |
407
|
0
|
0
|
|
|
|
|
(delta->status != GIT_DELTA_MODIFIED || actually_split)) |
408
|
3
|
|
|
|
|
|
delta->similarity = 0; |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
/* insert into new list */ |
411
|
9
|
50
|
|
|
|
|
if (git_vector_insert(&onto, delta) < 0) |
412
|
0
|
|
|
|
|
|
goto on_error; |
413
|
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
/* cannot return an error past this point */ |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
/* free deltas from old list that didn't make it to the new one */ |
418
|
21
|
100
|
|
|
|
|
git_vector_foreach(&diff->deltas, i, delta) { |
419
|
15
|
100
|
|
|
|
|
if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0) |
420
|
6
|
|
|
|
|
|
git__free(delta); |
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
/* swap new delta list into place */ |
424
|
6
|
|
|
|
|
|
git_vector_swap(&diff->deltas, &onto); |
425
|
6
|
|
|
|
|
|
git_vector_free(&onto); |
426
|
6
|
|
|
|
|
|
git_vector_sort(&diff->deltas); |
427
|
|
|
|
|
|
|
|
428
|
6
|
|
|
|
|
|
return 0; |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
on_error: |
431
|
0
|
|
|
|
|
|
git_vector_free_deep(&onto); |
432
|
|
|
|
|
|
|
|
433
|
6
|
|
|
|
|
|
return -1; |
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
|
436
|
31
|
|
|
|
|
|
GIT_INLINE(git_diff_file *) similarity_get_file(git_diff *diff, size_t idx) |
437
|
|
|
|
|
|
|
{ |
438
|
31
|
|
|
|
|
|
git_diff_delta *delta = git_vector_get(&diff->deltas, idx / 2); |
439
|
31
|
100
|
|
|
|
|
return (idx & 1) ? &delta->new_file : &delta->old_file; |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
typedef struct { |
443
|
|
|
|
|
|
|
size_t idx; |
444
|
|
|
|
|
|
|
git_iterator_t src; |
445
|
|
|
|
|
|
|
git_repository *repo; |
446
|
|
|
|
|
|
|
git_diff_file *file; |
447
|
|
|
|
|
|
|
git_buf data; |
448
|
|
|
|
|
|
|
git_odb_object *odb_obj; |
449
|
|
|
|
|
|
|
git_blob *blob; |
450
|
|
|
|
|
|
|
} similarity_info; |
451
|
|
|
|
|
|
|
|
452
|
11
|
|
|
|
|
|
static int similarity_init( |
453
|
|
|
|
|
|
|
similarity_info *info, git_diff *diff, size_t file_idx) |
454
|
|
|
|
|
|
|
{ |
455
|
11
|
|
|
|
|
|
info->idx = file_idx; |
456
|
11
|
100
|
|
|
|
|
info->src = (file_idx & 1) ? diff->new_src : diff->old_src; |
457
|
11
|
|
|
|
|
|
info->repo = diff->repo; |
458
|
11
|
|
|
|
|
|
info->file = similarity_get_file(diff, file_idx); |
459
|
11
|
|
|
|
|
|
info->odb_obj = NULL; |
460
|
11
|
|
|
|
|
|
info->blob = NULL; |
461
|
11
|
|
|
|
|
|
git_buf_init(&info->data, 0); |
462
|
|
|
|
|
|
|
|
463
|
11
|
100
|
|
|
|
|
if (info->file->size > 0 || info->src == GIT_ITERATOR_WORKDIR) |
|
|
50
|
|
|
|
|
|
464
|
7
|
|
|
|
|
|
return 0; |
465
|
|
|
|
|
|
|
|
466
|
4
|
|
|
|
|
|
return git_diff_file__resolve_zero_size( |
467
|
|
|
|
|
|
|
info->file, &info->odb_obj, info->repo); |
468
|
|
|
|
|
|
|
} |
469
|
|
|
|
|
|
|
|
470
|
11
|
|
|
|
|
|
static int similarity_sig( |
471
|
|
|
|
|
|
|
similarity_info *info, |
472
|
|
|
|
|
|
|
const git_diff_find_options *opts, |
473
|
|
|
|
|
|
|
void **cache) |
474
|
|
|
|
|
|
|
{ |
475
|
11
|
|
|
|
|
|
int error = 0; |
476
|
11
|
|
|
|
|
|
git_diff_file *file = info->file; |
477
|
|
|
|
|
|
|
|
478
|
11
|
100
|
|
|
|
|
if (info->src == GIT_ITERATOR_WORKDIR) { |
479
|
5
|
50
|
|
|
|
|
if ((error = git_buf_joinpath( |
480
|
5
|
|
|
|
|
|
&info->data, git_repository_workdir(info->repo), file->path)) < 0) |
481
|
0
|
|
|
|
|
|
return error; |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
/* if path is not a regular file, just skip this item */ |
484
|
5
|
50
|
|
|
|
|
if (!git_path_isfile(info->data.ptr)) |
485
|
0
|
|
|
|
|
|
return 0; |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
/* TODO: apply wd-to-odb filters to file data if necessary */ |
488
|
|
|
|
|
|
|
|
489
|
5
|
|
|
|
|
|
error = opts->metric->file_signature( |
490
|
5
|
|
|
|
|
|
&cache[info->idx], info->file, |
491
|
10
|
|
|
|
|
|
info->data.ptr, opts->metric->payload); |
492
|
|
|
|
|
|
|
} else { |
493
|
|
|
|
|
|
|
/* if we didn't initially know the size, we might have an odb_obj |
494
|
|
|
|
|
|
|
* around from earlier, so convert that, otherwise load the blob now |
495
|
|
|
|
|
|
|
*/ |
496
|
6
|
50
|
|
|
|
|
if (info->odb_obj != NULL) |
497
|
0
|
|
|
|
|
|
error = git_object__from_odb_object( |
498
|
0
|
|
|
|
|
|
(git_object **)&info->blob, info->repo, |
499
|
|
|
|
|
|
|
info->odb_obj, GIT_OBJECT_BLOB); |
500
|
|
|
|
|
|
|
else |
501
|
6
|
|
|
|
|
|
error = git_blob_lookup(&info->blob, info->repo, &file->id); |
502
|
|
|
|
|
|
|
|
503
|
6
|
50
|
|
|
|
|
if (error < 0) { |
504
|
|
|
|
|
|
|
/* if lookup fails, just skip this item in similarity calc */ |
505
|
0
|
|
|
|
|
|
git_error_clear(); |
506
|
|
|
|
|
|
|
} else { |
507
|
|
|
|
|
|
|
size_t sz; |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
/* index size may not be actual blob size if filtered */ |
510
|
6
|
50
|
|
|
|
|
if (file->size != git_blob_rawsize(info->blob)) |
511
|
0
|
|
|
|
|
|
file->size = git_blob_rawsize(info->blob); |
512
|
|
|
|
|
|
|
|
513
|
6
|
50
|
|
|
|
|
sz = git__is_sizet(file->size) ? (size_t)file->size : (size_t)-1; |
514
|
|
|
|
|
|
|
|
515
|
6
|
|
|
|
|
|
error = opts->metric->buffer_signature( |
516
|
6
|
|
|
|
|
|
&cache[info->idx], info->file, |
517
|
6
|
|
|
|
|
|
git_blob_rawcontent(info->blob), sz, opts->metric->payload); |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
} |
520
|
|
|
|
|
|
|
|
521
|
11
|
|
|
|
|
|
return error; |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
|
524
|
14
|
|
|
|
|
|
static void similarity_unload(similarity_info *info) |
525
|
|
|
|
|
|
|
{ |
526
|
14
|
50
|
|
|
|
|
if (info->odb_obj) |
527
|
0
|
|
|
|
|
|
git_odb_object_free(info->odb_obj); |
528
|
|
|
|
|
|
|
|
529
|
14
|
100
|
|
|
|
|
if (info->blob) |
530
|
6
|
|
|
|
|
|
git_blob_free(info->blob); |
531
|
|
|
|
|
|
|
else |
532
|
8
|
|
|
|
|
|
git_buf_dispose(&info->data); |
533
|
14
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
#define FLAG_SET(opts,flag_name) (((opts)->flags & flag_name) != 0) |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
/* - score < 0 means files cannot be compared |
538
|
|
|
|
|
|
|
* - score >= 100 means files are exact match |
539
|
|
|
|
|
|
|
* - score == 0 means files are completely different |
540
|
|
|
|
|
|
|
*/ |
541
|
10
|
|
|
|
|
|
static int similarity_measure( |
542
|
|
|
|
|
|
|
int *score, |
543
|
|
|
|
|
|
|
git_diff *diff, |
544
|
|
|
|
|
|
|
const git_diff_find_options *opts, |
545
|
|
|
|
|
|
|
void **cache, |
546
|
|
|
|
|
|
|
size_t a_idx, |
547
|
|
|
|
|
|
|
size_t b_idx) |
548
|
|
|
|
|
|
|
{ |
549
|
10
|
|
|
|
|
|
git_diff_file *a_file = similarity_get_file(diff, a_idx); |
550
|
10
|
|
|
|
|
|
git_diff_file *b_file = similarity_get_file(diff, b_idx); |
551
|
10
|
|
|
|
|
|
bool exact_match = FLAG_SET(opts, GIT_DIFF_FIND_EXACT_MATCH_ONLY); |
552
|
10
|
|
|
|
|
|
int error = 0; |
553
|
|
|
|
|
|
|
similarity_info a_info, b_info; |
554
|
|
|
|
|
|
|
|
555
|
10
|
|
|
|
|
|
*score = -1; |
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
/* don't try to compare things that aren't files */ |
558
|
10
|
50
|
|
|
|
|
if (!GIT_MODE_ISBLOB(a_file->mode) || !GIT_MODE_ISBLOB(b_file->mode)) |
|
|
50
|
|
|
|
|
|
559
|
0
|
|
|
|
|
|
return 0; |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
/* if exact match is requested, force calculation of missing OIDs now */ |
562
|
10
|
50
|
|
|
|
|
if (exact_match) { |
563
|
0
|
0
|
|
|
|
|
if (git_oid_is_zero(&a_file->id) && |
|
|
0
|
|
|
|
|
|
564
|
0
|
0
|
|
|
|
|
diff->old_src == GIT_ITERATOR_WORKDIR && |
565
|
0
|
|
|
|
|
|
!git_diff__oid_for_file(&a_file->id, |
566
|
0
|
|
|
|
|
|
diff, a_file->path, a_file->mode, a_file->size)) |
567
|
0
|
|
|
|
|
|
a_file->flags |= GIT_DIFF_FLAG_VALID_ID; |
568
|
|
|
|
|
|
|
|
569
|
0
|
0
|
|
|
|
|
if (git_oid_is_zero(&b_file->id) && |
|
|
0
|
|
|
|
|
|
570
|
0
|
0
|
|
|
|
|
diff->new_src == GIT_ITERATOR_WORKDIR && |
571
|
0
|
|
|
|
|
|
!git_diff__oid_for_file(&b_file->id, |
572
|
0
|
|
|
|
|
|
diff, b_file->path, b_file->mode, b_file->size)) |
573
|
0
|
|
|
|
|
|
b_file->flags |= GIT_DIFF_FLAG_VALID_ID; |
574
|
|
|
|
|
|
|
} |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
/* check OID match as a quick test */ |
577
|
10
|
100
|
|
|
|
|
if (git_oid__cmp(&a_file->id, &b_file->id) == 0) { |
578
|
3
|
|
|
|
|
|
*score = 100; |
579
|
3
|
|
|
|
|
|
return 0; |
580
|
|
|
|
|
|
|
} |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
/* don't calculate signatures if we are doing exact match */ |
583
|
7
|
50
|
|
|
|
|
if (exact_match) { |
584
|
0
|
|
|
|
|
|
*score = 0; |
585
|
0
|
|
|
|
|
|
return 0; |
586
|
|
|
|
|
|
|
} |
587
|
|
|
|
|
|
|
|
588
|
7
|
|
|
|
|
|
memset(&a_info, 0, sizeof(a_info)); |
589
|
7
|
|
|
|
|
|
memset(&b_info, 0, sizeof(b_info)); |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
/* set up similarity data (will try to update missing file sizes) */ |
592
|
7
|
100
|
|
|
|
|
if (!cache[a_idx] && (error = similarity_init(&a_info, diff, a_idx)) < 0) |
|
|
50
|
|
|
|
|
|
593
|
0
|
|
|
|
|
|
return error; |
594
|
7
|
50
|
|
|
|
|
if (!cache[b_idx] && (error = similarity_init(&b_info, diff, b_idx)) < 0) |
|
|
50
|
|
|
|
|
|
595
|
0
|
|
|
|
|
|
goto cleanup; |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
/* check if file sizes are nowhere near each other */ |
598
|
7
|
50
|
|
|
|
|
if (a_file->size > 127 && |
|
|
0
|
|
|
|
|
|
599
|
0
|
0
|
|
|
|
|
b_file->size > 127 && |
600
|
0
|
0
|
|
|
|
|
(a_file->size > (b_file->size << 3) || |
601
|
0
|
|
|
|
|
|
b_file->size > (a_file->size << 3))) |
602
|
|
|
|
|
|
|
goto cleanup; |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
/* update signature cache if needed */ |
605
|
7
|
100
|
|
|
|
|
if (!cache[a_idx]) { |
606
|
4
|
50
|
|
|
|
|
if ((error = similarity_sig(&a_info, opts, cache)) < 0) |
607
|
0
|
|
|
|
|
|
goto cleanup; |
608
|
|
|
|
|
|
|
} |
609
|
7
|
50
|
|
|
|
|
if (!cache[b_idx]) { |
610
|
7
|
50
|
|
|
|
|
if ((error = similarity_sig(&b_info, opts, cache)) < 0) |
611
|
0
|
|
|
|
|
|
goto cleanup; |
612
|
|
|
|
|
|
|
} |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
/* calculate similarity provided that the metric choose to process |
615
|
|
|
|
|
|
|
* both the a and b files (some may not if file is too big, etc). |
616
|
|
|
|
|
|
|
*/ |
617
|
7
|
50
|
|
|
|
|
if (cache[a_idx] && cache[b_idx]) |
|
|
50
|
|
|
|
|
|
618
|
7
|
|
|
|
|
|
error = opts->metric->similarity( |
619
|
21
|
|
|
|
|
|
score, cache[a_idx], cache[b_idx], opts->metric->payload); |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
cleanup: |
622
|
7
|
|
|
|
|
|
similarity_unload(&a_info); |
623
|
7
|
|
|
|
|
|
similarity_unload(&b_info); |
624
|
|
|
|
|
|
|
|
625
|
10
|
|
|
|
|
|
return error; |
626
|
|
|
|
|
|
|
} |
627
|
|
|
|
|
|
|
|
628
|
0
|
|
|
|
|
|
static int calc_self_similarity( |
629
|
|
|
|
|
|
|
git_diff *diff, |
630
|
|
|
|
|
|
|
const git_diff_find_options *opts, |
631
|
|
|
|
|
|
|
size_t delta_idx, |
632
|
|
|
|
|
|
|
void **cache) |
633
|
|
|
|
|
|
|
{ |
634
|
0
|
|
|
|
|
|
int error, similarity = -1; |
635
|
0
|
0
|
|
|
|
|
git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); |
636
|
|
|
|
|
|
|
|
637
|
0
|
0
|
|
|
|
|
if ((delta->flags & GIT_DIFF_FLAG__HAS_SELF_SIMILARITY) != 0) |
638
|
0
|
|
|
|
|
|
return 0; |
639
|
|
|
|
|
|
|
|
640
|
0
|
|
|
|
|
|
error = similarity_measure( |
641
|
0
|
|
|
|
|
|
&similarity, diff, opts, cache, 2 * delta_idx, 2 * delta_idx + 1); |
642
|
0
|
0
|
|
|
|
|
if (error < 0) |
643
|
0
|
|
|
|
|
|
return error; |
644
|
|
|
|
|
|
|
|
645
|
0
|
0
|
|
|
|
|
if (similarity >= 0) { |
646
|
0
|
|
|
|
|
|
delta->similarity = (uint16_t)similarity; |
647
|
0
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__HAS_SELF_SIMILARITY; |
648
|
|
|
|
|
|
|
} |
649
|
|
|
|
|
|
|
|
650
|
0
|
|
|
|
|
|
return 0; |
651
|
|
|
|
|
|
|
} |
652
|
|
|
|
|
|
|
|
653
|
25
|
|
|
|
|
|
static bool is_rename_target( |
654
|
|
|
|
|
|
|
git_diff *diff, |
655
|
|
|
|
|
|
|
const git_diff_find_options *opts, |
656
|
|
|
|
|
|
|
size_t delta_idx, |
657
|
|
|
|
|
|
|
void **cache) |
658
|
|
|
|
|
|
|
{ |
659
|
25
|
50
|
|
|
|
|
git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
/* skip things that aren't plain blobs */ |
662
|
25
|
100
|
|
|
|
|
if (!GIT_MODE_ISBLOB(delta->new_file.mode)) |
663
|
10
|
|
|
|
|
|
return false; |
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
/* only consider ADDED, RENAMED, COPIED, and split MODIFIED as |
666
|
|
|
|
|
|
|
* targets; maybe include UNTRACKED if requested. |
667
|
|
|
|
|
|
|
*/ |
668
|
15
|
|
|
|
|
|
switch (delta->status) { |
669
|
|
|
|
|
|
|
case GIT_DELTA_UNMODIFIED: |
670
|
|
|
|
|
|
|
case GIT_DELTA_DELETED: |
671
|
|
|
|
|
|
|
case GIT_DELTA_IGNORED: |
672
|
|
|
|
|
|
|
case GIT_DELTA_CONFLICTED: |
673
|
0
|
|
|
|
|
|
return false; |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
case GIT_DELTA_MODIFIED: |
676
|
2
|
50
|
|
|
|
|
if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) && |
|
|
50
|
|
|
|
|
|
677
|
2
|
|
|
|
|
|
!FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES)) |
678
|
2
|
|
|
|
|
|
return false; |
679
|
|
|
|
|
|
|
|
680
|
0
|
0
|
|
|
|
|
if (calc_self_similarity(diff, opts, delta_idx, cache) < 0) |
681
|
0
|
|
|
|
|
|
return false; |
682
|
|
|
|
|
|
|
|
683
|
0
|
0
|
|
|
|
|
if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) && |
|
|
0
|
|
|
|
|
|
684
|
0
|
|
|
|
|
|
delta->similarity < opts->break_rewrite_threshold) { |
685
|
0
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__TO_SPLIT; |
686
|
0
|
|
|
|
|
|
break; |
687
|
|
|
|
|
|
|
} |
688
|
0
|
0
|
|
|
|
|
if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) && |
|
|
0
|
|
|
|
|
|
689
|
0
|
|
|
|
|
|
delta->similarity < opts->rename_from_rewrite_threshold) { |
690
|
0
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__TO_SPLIT; |
691
|
0
|
|
|
|
|
|
break; |
692
|
|
|
|
|
|
|
} |
693
|
|
|
|
|
|
|
|
694
|
0
|
|
|
|
|
|
return false; |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
case GIT_DELTA_UNTRACKED: |
697
|
7
|
50
|
|
|
|
|
if (!FLAG_SET(opts, GIT_DIFF_FIND_FOR_UNTRACKED)) |
698
|
0
|
|
|
|
|
|
return false; |
699
|
7
|
|
|
|
|
|
break; |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
default: /* all other status values should be checked */ |
702
|
6
|
|
|
|
|
|
break; |
703
|
|
|
|
|
|
|
} |
704
|
|
|
|
|
|
|
|
705
|
13
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__IS_RENAME_TARGET; |
706
|
13
|
|
|
|
|
|
return true; |
707
|
|
|
|
|
|
|
} |
708
|
|
|
|
|
|
|
|
709
|
25
|
|
|
|
|
|
static bool is_rename_source( |
710
|
|
|
|
|
|
|
git_diff *diff, |
711
|
|
|
|
|
|
|
const git_diff_find_options *opts, |
712
|
|
|
|
|
|
|
size_t delta_idx, |
713
|
|
|
|
|
|
|
void **cache) |
714
|
|
|
|
|
|
|
{ |
715
|
25
|
50
|
|
|
|
|
git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); |
716
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
/* skip things that aren't blobs */ |
718
|
25
|
100
|
|
|
|
|
if (!GIT_MODE_ISBLOB(delta->old_file.mode)) |
719
|
16
|
|
|
|
|
|
return false; |
720
|
|
|
|
|
|
|
|
721
|
9
|
|
|
|
|
|
switch (delta->status) { |
722
|
|
|
|
|
|
|
case GIT_DELTA_ADDED: |
723
|
|
|
|
|
|
|
case GIT_DELTA_UNTRACKED: |
724
|
|
|
|
|
|
|
case GIT_DELTA_UNREADABLE: |
725
|
|
|
|
|
|
|
case GIT_DELTA_IGNORED: |
726
|
|
|
|
|
|
|
case GIT_DELTA_CONFLICTED: |
727
|
0
|
|
|
|
|
|
return false; |
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
case GIT_DELTA_DELETED: |
730
|
|
|
|
|
|
|
case GIT_DELTA_TYPECHANGE: |
731
|
7
|
|
|
|
|
|
break; |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
case GIT_DELTA_UNMODIFIED: |
734
|
0
|
0
|
|
|
|
|
if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED)) |
735
|
0
|
|
|
|
|
|
return false; |
736
|
0
|
0
|
|
|
|
|
if (FLAG_SET(opts, GIT_DIFF_FIND_REMOVE_UNMODIFIED)) |
737
|
0
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__TO_DELETE; |
738
|
0
|
|
|
|
|
|
break; |
739
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
default: /* MODIFIED, RENAMED, COPIED */ |
741
|
|
|
|
|
|
|
/* if we're finding copies, this could be a source */ |
742
|
2
|
50
|
|
|
|
|
if (FLAG_SET(opts, GIT_DIFF_FIND_COPIES)) |
743
|
0
|
|
|
|
|
|
break; |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
/* otherwise, this is only a source if we can split it */ |
746
|
2
|
50
|
|
|
|
|
if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) && |
|
|
50
|
|
|
|
|
|
747
|
2
|
|
|
|
|
|
!FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES)) |
748
|
2
|
|
|
|
|
|
return false; |
749
|
|
|
|
|
|
|
|
750
|
0
|
0
|
|
|
|
|
if (calc_self_similarity(diff, opts, delta_idx, cache) < 0) |
751
|
0
|
|
|
|
|
|
return false; |
752
|
|
|
|
|
|
|
|
753
|
0
|
0
|
|
|
|
|
if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) && |
|
|
0
|
|
|
|
|
|
754
|
0
|
|
|
|
|
|
delta->similarity < opts->break_rewrite_threshold) { |
755
|
0
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__TO_SPLIT; |
756
|
0
|
|
|
|
|
|
break; |
757
|
|
|
|
|
|
|
} |
758
|
|
|
|
|
|
|
|
759
|
0
|
0
|
|
|
|
|
if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) && |
|
|
0
|
|
|
|
|
|
760
|
0
|
|
|
|
|
|
delta->similarity < opts->rename_from_rewrite_threshold) |
761
|
0
|
|
|
|
|
|
break; |
762
|
|
|
|
|
|
|
|
763
|
0
|
|
|
|
|
|
return false; |
764
|
|
|
|
|
|
|
} |
765
|
|
|
|
|
|
|
|
766
|
7
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__IS_RENAME_SOURCE; |
767
|
7
|
|
|
|
|
|
return true; |
768
|
|
|
|
|
|
|
} |
769
|
|
|
|
|
|
|
|
770
|
0
|
|
|
|
|
|
GIT_INLINE(bool) delta_is_split(git_diff_delta *delta) |
771
|
|
|
|
|
|
|
{ |
772
|
0
|
0
|
|
|
|
|
return (delta->status == GIT_DELTA_TYPECHANGE || |
|
|
0
|
|
|
|
|
|
773
|
0
|
|
|
|
|
|
(delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0); |
774
|
|
|
|
|
|
|
} |
775
|
|
|
|
|
|
|
|
776
|
6
|
|
|
|
|
|
GIT_INLINE(bool) delta_is_new_only(git_diff_delta *delta) |
777
|
|
|
|
|
|
|
{ |
778
|
7
|
50
|
|
|
|
|
return (delta->status == GIT_DELTA_ADDED || |
779
|
0
|
0
|
|
|
|
|
delta->status == GIT_DELTA_UNTRACKED || |
780
|
7
|
100
|
|
|
|
|
delta->status == GIT_DELTA_UNREADABLE || |
|
|
0
|
|
|
|
|
|
781
|
0
|
|
|
|
|
|
delta->status == GIT_DELTA_IGNORED); |
782
|
|
|
|
|
|
|
} |
783
|
|
|
|
|
|
|
|
784
|
6
|
|
|
|
|
|
GIT_INLINE(void) delta_make_rename( |
785
|
|
|
|
|
|
|
git_diff_delta *to, const git_diff_delta *from, uint16_t similarity) |
786
|
|
|
|
|
|
|
{ |
787
|
6
|
|
|
|
|
|
to->status = GIT_DELTA_RENAMED; |
788
|
6
|
|
|
|
|
|
to->similarity = similarity; |
789
|
6
|
|
|
|
|
|
to->nfiles = 2; |
790
|
6
|
|
|
|
|
|
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); |
791
|
6
|
|
|
|
|
|
to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; |
792
|
6
|
|
|
|
|
|
} |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
typedef struct { |
795
|
|
|
|
|
|
|
size_t idx; |
796
|
|
|
|
|
|
|
uint16_t similarity; |
797
|
|
|
|
|
|
|
} diff_find_match; |
798
|
|
|
|
|
|
|
|
799
|
11
|
|
|
|
|
|
int git_diff_find_similar( |
800
|
|
|
|
|
|
|
git_diff *diff, |
801
|
|
|
|
|
|
|
const git_diff_find_options *given_opts) |
802
|
|
|
|
|
|
|
{ |
803
|
|
|
|
|
|
|
size_t s, t; |
804
|
11
|
|
|
|
|
|
int error = 0, result; |
805
|
|
|
|
|
|
|
uint16_t similarity; |
806
|
|
|
|
|
|
|
git_diff_delta *src, *tgt; |
807
|
11
|
|
|
|
|
|
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT; |
808
|
11
|
|
|
|
|
|
size_t num_deltas, num_srcs = 0, num_tgts = 0; |
809
|
11
|
|
|
|
|
|
size_t tried_srcs = 0, tried_tgts = 0; |
810
|
11
|
|
|
|
|
|
size_t num_rewrites = 0, num_updates = 0, num_bumped = 0; |
811
|
|
|
|
|
|
|
size_t sigcache_size; |
812
|
11
|
|
|
|
|
|
void **sigcache = NULL; /* cache of similarity metric file signatures */ |
813
|
11
|
|
|
|
|
|
diff_find_match *tgt2src = NULL; |
814
|
11
|
|
|
|
|
|
diff_find_match *src2tgt = NULL; |
815
|
11
|
|
|
|
|
|
diff_find_match *tgt2src_copy = NULL; |
816
|
|
|
|
|
|
|
diff_find_match *best_match; |
817
|
|
|
|
|
|
|
git_diff_file swap; |
818
|
|
|
|
|
|
|
|
819
|
11
|
50
|
|
|
|
|
assert(diff); |
820
|
|
|
|
|
|
|
|
821
|
11
|
50
|
|
|
|
|
if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0) |
822
|
0
|
|
|
|
|
|
return error; |
823
|
|
|
|
|
|
|
|
824
|
11
|
|
|
|
|
|
num_deltas = diff->deltas.length; |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
/* TODO: maybe abort if deltas.length > rename_limit ??? */ |
827
|
11
|
50
|
|
|
|
|
if (!num_deltas || !git__is_uint32(num_deltas)) |
|
|
50
|
|
|
|
|
|
828
|
|
|
|
|
|
|
goto cleanup; |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
/* No flags set; nothing to do */ |
831
|
11
|
50
|
|
|
|
|
if ((opts.flags & GIT_DIFF_FIND_ALL) == 0) |
832
|
0
|
|
|
|
|
|
goto cleanup; |
833
|
|
|
|
|
|
|
|
834
|
11
|
50
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC_MULTIPLY(&sigcache_size, num_deltas, 2); |
|
|
50
|
|
|
|
|
|
835
|
11
|
|
|
|
|
|
sigcache = git__calloc(sigcache_size, sizeof(void *)); |
836
|
11
|
50
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC(sigcache); |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
/* Label rename sources and targets |
839
|
|
|
|
|
|
|
* |
840
|
|
|
|
|
|
|
* This will also set self-similarity scores for MODIFIED files and |
841
|
|
|
|
|
|
|
* mark them for splitting if break-rewrites is enabled |
842
|
|
|
|
|
|
|
*/ |
843
|
36
|
100
|
|
|
|
|
git_vector_foreach(&diff->deltas, t, tgt) { |
844
|
25
|
100
|
|
|
|
|
if (is_rename_source(diff, &opts, t, sigcache)) |
845
|
7
|
|
|
|
|
|
++num_srcs; |
846
|
|
|
|
|
|
|
|
847
|
25
|
100
|
|
|
|
|
if (is_rename_target(diff, &opts, t, sigcache)) |
848
|
13
|
|
|
|
|
|
++num_tgts; |
849
|
|
|
|
|
|
|
|
850
|
25
|
50
|
|
|
|
|
if ((tgt->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) |
851
|
0
|
|
|
|
|
|
num_rewrites++; |
852
|
|
|
|
|
|
|
} |
853
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
/* if there are no candidate srcs or tgts, we're done */ |
855
|
11
|
100
|
|
|
|
|
if (!num_srcs || !num_tgts) |
|
|
50
|
|
|
|
|
|
856
|
|
|
|
|
|
|
goto cleanup; |
857
|
|
|
|
|
|
|
|
858
|
7
|
|
|
|
|
|
src2tgt = git__calloc(num_deltas, sizeof(diff_find_match)); |
859
|
7
|
50
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC(src2tgt); |
860
|
7
|
|
|
|
|
|
tgt2src = git__calloc(num_deltas, sizeof(diff_find_match)); |
861
|
7
|
50
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC(tgt2src); |
862
|
|
|
|
|
|
|
|
863
|
7
|
50
|
|
|
|
|
if (FLAG_SET(&opts, GIT_DIFF_FIND_COPIES)) { |
864
|
0
|
|
|
|
|
|
tgt2src_copy = git__calloc(num_deltas, sizeof(diff_find_match)); |
865
|
0
|
0
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC(tgt2src_copy); |
866
|
|
|
|
|
|
|
} |
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
/* |
869
|
|
|
|
|
|
|
* Find best-fit matches for rename / copy candidates |
870
|
|
|
|
|
|
|
*/ |
871
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
find_best_matches: |
873
|
7
|
|
|
|
|
|
tried_tgts = num_bumped = 0; |
874
|
|
|
|
|
|
|
|
875
|
17
|
50
|
|
|
|
|
git_vector_foreach(&diff->deltas, t, tgt) { |
876
|
|
|
|
|
|
|
/* skip things that are not rename targets */ |
877
|
17
|
100
|
|
|
|
|
if ((tgt->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0) |
878
|
7
|
|
|
|
|
|
continue; |
879
|
|
|
|
|
|
|
|
880
|
10
|
|
|
|
|
|
tried_srcs = 0; |
881
|
|
|
|
|
|
|
|
882
|
17
|
50
|
|
|
|
|
git_vector_foreach(&diff->deltas, s, src) { |
883
|
|
|
|
|
|
|
/* skip things that are not rename sources */ |
884
|
17
|
100
|
|
|
|
|
if ((src->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) == 0) |
885
|
7
|
|
|
|
|
|
continue; |
886
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
/* calculate similarity for this pair and find best match */ |
888
|
10
|
50
|
|
|
|
|
if (s == t) |
889
|
0
|
|
|
|
|
|
result = -1; /* don't measure self-similarity here */ |
890
|
10
|
50
|
|
|
|
|
else if ((error = similarity_measure( |
891
|
10
|
|
|
|
|
|
&result, diff, &opts, sigcache, 2 * s, 2 * t + 1)) < 0) |
892
|
0
|
|
|
|
|
|
goto cleanup; |
893
|
|
|
|
|
|
|
|
894
|
10
|
50
|
|
|
|
|
if (result < 0) |
895
|
0
|
|
|
|
|
|
continue; |
896
|
10
|
|
|
|
|
|
similarity = (uint16_t)result; |
897
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
/* is this a better rename? */ |
899
|
10
|
100
|
|
|
|
|
if (tgt2src[t].similarity < similarity && |
|
|
50
|
|
|
|
|
|
900
|
6
|
|
|
|
|
|
src2tgt[s].similarity < similarity) |
901
|
|
|
|
|
|
|
{ |
902
|
|
|
|
|
|
|
/* eject old mapping */ |
903
|
6
|
50
|
|
|
|
|
if (src2tgt[s].similarity > 0) { |
904
|
0
|
|
|
|
|
|
tgt2src[src2tgt[s].idx].similarity = 0; |
905
|
0
|
|
|
|
|
|
num_bumped++; |
906
|
|
|
|
|
|
|
} |
907
|
6
|
50
|
|
|
|
|
if (tgt2src[t].similarity > 0) { |
908
|
0
|
|
|
|
|
|
src2tgt[tgt2src[t].idx].similarity = 0; |
909
|
0
|
|
|
|
|
|
num_bumped++; |
910
|
|
|
|
|
|
|
} |
911
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
/* write new mapping */ |
913
|
6
|
|
|
|
|
|
tgt2src[t].idx = s; |
914
|
6
|
|
|
|
|
|
tgt2src[t].similarity = similarity; |
915
|
6
|
|
|
|
|
|
src2tgt[s].idx = t; |
916
|
6
|
|
|
|
|
|
src2tgt[s].similarity = similarity; |
917
|
|
|
|
|
|
|
} |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
/* keep best absolute match for copies */ |
920
|
10
|
50
|
|
|
|
|
if (tgt2src_copy != NULL && |
|
|
0
|
|
|
|
|
|
921
|
0
|
|
|
|
|
|
tgt2src_copy[t].similarity < similarity) |
922
|
|
|
|
|
|
|
{ |
923
|
0
|
|
|
|
|
|
tgt2src_copy[t].idx = s; |
924
|
0
|
|
|
|
|
|
tgt2src_copy[t].similarity = similarity; |
925
|
|
|
|
|
|
|
} |
926
|
|
|
|
|
|
|
|
927
|
10
|
50
|
|
|
|
|
if (++tried_srcs >= num_srcs) |
928
|
10
|
|
|
|
|
|
break; |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
/* cap on maximum targets we'll examine (per "tgt" file) */ |
931
|
0
|
0
|
|
|
|
|
if (tried_srcs > opts.rename_limit) |
932
|
0
|
|
|
|
|
|
break; |
933
|
|
|
|
|
|
|
} |
934
|
|
|
|
|
|
|
|
935
|
10
|
100
|
|
|
|
|
if (++tried_tgts >= num_tgts) |
936
|
7
|
|
|
|
|
|
break; |
937
|
|
|
|
|
|
|
} |
938
|
|
|
|
|
|
|
|
939
|
7
|
50
|
|
|
|
|
if (num_bumped > 0) /* try again if we bumped some items */ |
940
|
0
|
|
|
|
|
|
goto find_best_matches; |
941
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
/* |
943
|
|
|
|
|
|
|
* Rewrite the diffs with renames / copies |
944
|
|
|
|
|
|
|
*/ |
945
|
|
|
|
|
|
|
|
946
|
26
|
100
|
|
|
|
|
git_vector_foreach(&diff->deltas, t, tgt) { |
947
|
|
|
|
|
|
|
/* skip things that are not rename targets */ |
948
|
19
|
100
|
|
|
|
|
if ((tgt->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0) |
949
|
9
|
|
|
|
|
|
continue; |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
/* check if this delta was the target of a similarity */ |
952
|
10
|
100
|
|
|
|
|
if (tgt2src[t].similarity) |
953
|
6
|
|
|
|
|
|
best_match = &tgt2src[t]; |
954
|
4
|
50
|
|
|
|
|
else if (tgt2src_copy && tgt2src_copy[t].similarity) |
|
|
0
|
|
|
|
|
|
955
|
0
|
|
|
|
|
|
best_match = &tgt2src_copy[t]; |
956
|
|
|
|
|
|
|
else |
957
|
4
|
|
|
|
|
|
continue; |
958
|
|
|
|
|
|
|
|
959
|
6
|
|
|
|
|
|
s = best_match->idx; |
960
|
6
|
50
|
|
|
|
|
src = GIT_VECTOR_GET(&diff->deltas, s); |
961
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
/* possible scenarios: |
963
|
|
|
|
|
|
|
* 1. from DELETE to ADD/UNTRACK/IGNORE = RENAME |
964
|
|
|
|
|
|
|
* 2. from DELETE to SPLIT/TYPECHANGE = RENAME + DELETE |
965
|
|
|
|
|
|
|
* 3. from SPLIT/TYPECHANGE to ADD/UNTRACK/IGNORE = ADD + RENAME |
966
|
|
|
|
|
|
|
* 4. from SPLIT/TYPECHANGE to SPLIT/TYPECHANGE = RENAME + SPLIT |
967
|
|
|
|
|
|
|
* 5. from OTHER to ADD/UNTRACK/IGNORE = OTHER + COPY |
968
|
|
|
|
|
|
|
*/ |
969
|
|
|
|
|
|
|
|
970
|
6
|
50
|
|
|
|
|
if (src->status == GIT_DELTA_DELETED) { |
971
|
|
|
|
|
|
|
|
972
|
6
|
50
|
|
|
|
|
if (delta_is_new_only(tgt)) { |
973
|
|
|
|
|
|
|
|
974
|
6
|
50
|
|
|
|
|
if (best_match->similarity < opts.rename_threshold) |
975
|
0
|
|
|
|
|
|
continue; |
976
|
|
|
|
|
|
|
|
977
|
6
|
|
|
|
|
|
delta_make_rename(tgt, src, best_match->similarity); |
978
|
|
|
|
|
|
|
|
979
|
6
|
|
|
|
|
|
src->flags |= GIT_DIFF_FLAG__TO_DELETE; |
980
|
6
|
|
|
|
|
|
num_rewrites++; |
981
|
|
|
|
|
|
|
} else { |
982
|
0
|
0
|
|
|
|
|
assert(delta_is_split(tgt)); |
983
|
|
|
|
|
|
|
|
984
|
0
|
0
|
|
|
|
|
if (best_match->similarity < opts.rename_from_rewrite_threshold) |
985
|
0
|
|
|
|
|
|
continue; |
986
|
|
|
|
|
|
|
|
987
|
0
|
|
|
|
|
|
memcpy(&swap, &tgt->old_file, sizeof(swap)); |
988
|
|
|
|
|
|
|
|
989
|
0
|
|
|
|
|
|
delta_make_rename(tgt, src, best_match->similarity); |
990
|
0
|
|
|
|
|
|
num_rewrites--; |
991
|
|
|
|
|
|
|
|
992
|
0
|
0
|
|
|
|
|
assert(src->status == GIT_DELTA_DELETED); |
993
|
0
|
|
|
|
|
|
memcpy(&src->old_file, &swap, sizeof(src->old_file)); |
994
|
0
|
|
|
|
|
|
memset(&src->new_file, 0, sizeof(src->new_file)); |
995
|
0
|
|
|
|
|
|
src->new_file.path = src->old_file.path; |
996
|
0
|
|
|
|
|
|
src->new_file.flags |= GIT_DIFF_FLAG_VALID_ID; |
997
|
|
|
|
|
|
|
|
998
|
0
|
|
|
|
|
|
num_updates++; |
999
|
|
|
|
|
|
|
|
1000
|
0
|
0
|
|
|
|
|
if (src2tgt[t].similarity > 0 && src2tgt[t].idx > t) { |
|
|
0
|
|
|
|
|
|
1001
|
|
|
|
|
|
|
/* what used to be at src t is now at src s */ |
1002
|
6
|
|
|
|
|
|
tgt2src[src2tgt[t].idx].idx = s; |
1003
|
|
|
|
|
|
|
} |
1004
|
|
|
|
|
|
|
} |
1005
|
|
|
|
|
|
|
} |
1006
|
|
|
|
|
|
|
|
1007
|
0
|
0
|
|
|
|
|
else if (delta_is_split(src)) { |
1008
|
|
|
|
|
|
|
|
1009
|
0
|
0
|
|
|
|
|
if (delta_is_new_only(tgt)) { |
1010
|
|
|
|
|
|
|
|
1011
|
0
|
0
|
|
|
|
|
if (best_match->similarity < opts.rename_threshold) |
1012
|
0
|
|
|
|
|
|
continue; |
1013
|
|
|
|
|
|
|
|
1014
|
0
|
|
|
|
|
|
delta_make_rename(tgt, src, best_match->similarity); |
1015
|
|
|
|
|
|
|
|
1016
|
0
|
0
|
|
|
|
|
src->status = (diff->new_src == GIT_ITERATOR_WORKDIR) ? |
1017
|
|
|
|
|
|
|
GIT_DELTA_UNTRACKED : GIT_DELTA_ADDED; |
1018
|
0
|
|
|
|
|
|
src->nfiles = 1; |
1019
|
0
|
|
|
|
|
|
memset(&src->old_file, 0, sizeof(src->old_file)); |
1020
|
0
|
|
|
|
|
|
src->old_file.path = src->new_file.path; |
1021
|
0
|
|
|
|
|
|
src->old_file.flags |= GIT_DIFF_FLAG_VALID_ID; |
1022
|
|
|
|
|
|
|
|
1023
|
0
|
|
|
|
|
|
src->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; |
1024
|
0
|
|
|
|
|
|
num_rewrites--; |
1025
|
|
|
|
|
|
|
|
1026
|
0
|
|
|
|
|
|
num_updates++; |
1027
|
|
|
|
|
|
|
} else { |
1028
|
0
|
0
|
|
|
|
|
assert(delta_is_split(src)); |
1029
|
|
|
|
|
|
|
|
1030
|
0
|
0
|
|
|
|
|
if (best_match->similarity < opts.rename_from_rewrite_threshold) |
1031
|
0
|
|
|
|
|
|
continue; |
1032
|
|
|
|
|
|
|
|
1033
|
0
|
|
|
|
|
|
memcpy(&swap, &tgt->old_file, sizeof(swap)); |
1034
|
|
|
|
|
|
|
|
1035
|
0
|
|
|
|
|
|
delta_make_rename(tgt, src, best_match->similarity); |
1036
|
0
|
|
|
|
|
|
num_rewrites--; |
1037
|
0
|
|
|
|
|
|
num_updates++; |
1038
|
|
|
|
|
|
|
|
1039
|
0
|
|
|
|
|
|
memcpy(&src->old_file, &swap, sizeof(src->old_file)); |
1040
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
/* if we've just swapped the new element into the correct |
1042
|
|
|
|
|
|
|
* place, clear the SPLIT flag |
1043
|
|
|
|
|
|
|
*/ |
1044
|
0
|
0
|
|
|
|
|
if (tgt2src[s].idx == t && |
|
|
0
|
|
|
|
|
|
1045
|
0
|
|
|
|
|
|
tgt2src[s].similarity > |
1046
|
0
|
|
|
|
|
|
opts.rename_from_rewrite_threshold) { |
1047
|
0
|
|
|
|
|
|
src->status = GIT_DELTA_RENAMED; |
1048
|
0
|
|
|
|
|
|
src->similarity = tgt2src[s].similarity; |
1049
|
0
|
|
|
|
|
|
tgt2src[s].similarity = 0; |
1050
|
0
|
|
|
|
|
|
src->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; |
1051
|
0
|
|
|
|
|
|
num_rewrites--; |
1052
|
|
|
|
|
|
|
} |
1053
|
|
|
|
|
|
|
/* otherwise, if we just overwrote a source, update mapping */ |
1054
|
0
|
0
|
|
|
|
|
else if (src2tgt[t].similarity > 0 && src2tgt[t].idx > t) { |
|
|
0
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
/* what used to be at src t is now at src s */ |
1056
|
0
|
|
|
|
|
|
tgt2src[src2tgt[t].idx].idx = s; |
1057
|
|
|
|
|
|
|
} |
1058
|
|
|
|
|
|
|
|
1059
|
0
|
|
|
|
|
|
num_updates++; |
1060
|
|
|
|
|
|
|
} |
1061
|
|
|
|
|
|
|
} |
1062
|
|
|
|
|
|
|
|
1063
|
0
|
0
|
|
|
|
|
else if (FLAG_SET(&opts, GIT_DIFF_FIND_COPIES)) { |
1064
|
0
|
0
|
|
|
|
|
if (tgt2src_copy[t].similarity < opts.copy_threshold) |
1065
|
0
|
|
|
|
|
|
continue; |
1066
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
/* always use best possible source for copy */ |
1068
|
0
|
|
|
|
|
|
best_match = &tgt2src_copy[t]; |
1069
|
0
|
0
|
|
|
|
|
src = GIT_VECTOR_GET(&diff->deltas, best_match->idx); |
1070
|
|
|
|
|
|
|
|
1071
|
0
|
0
|
|
|
|
|
if (delta_is_split(tgt)) { |
1072
|
0
|
|
|
|
|
|
error = insert_delete_side_of_split(diff, &diff->deltas, tgt); |
1073
|
0
|
0
|
|
|
|
|
if (error < 0) |
1074
|
0
|
|
|
|
|
|
goto cleanup; |
1075
|
0
|
|
|
|
|
|
num_rewrites--; |
1076
|
|
|
|
|
|
|
} |
1077
|
|
|
|
|
|
|
|
1078
|
0
|
0
|
|
|
|
|
if (!delta_is_split(tgt) && !delta_is_new_only(tgt)) |
|
|
0
|
|
|
|
|
|
1079
|
0
|
|
|
|
|
|
continue; |
1080
|
|
|
|
|
|
|
|
1081
|
0
|
|
|
|
|
|
tgt->status = GIT_DELTA_COPIED; |
1082
|
0
|
|
|
|
|
|
tgt->similarity = best_match->similarity; |
1083
|
0
|
|
|
|
|
|
tgt->nfiles = 2; |
1084
|
0
|
|
|
|
|
|
memcpy(&tgt->old_file, &src->old_file, sizeof(tgt->old_file)); |
1085
|
0
|
|
|
|
|
|
tgt->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; |
1086
|
|
|
|
|
|
|
|
1087
|
0
|
|
|
|
|
|
num_updates++; |
1088
|
|
|
|
|
|
|
} |
1089
|
|
|
|
|
|
|
} |
1090
|
|
|
|
|
|
|
|
1091
|
|
|
|
|
|
|
/* |
1092
|
|
|
|
|
|
|
* Actually split and delete entries as needed |
1093
|
|
|
|
|
|
|
*/ |
1094
|
|
|
|
|
|
|
|
1095
|
7
|
100
|
|
|
|
|
if (num_rewrites > 0 || num_updates > 0) |
|
|
50
|
|
|
|
|
|
1096
|
6
|
|
|
|
|
|
error = apply_splits_and_deletes( |
1097
|
6
|
|
|
|
|
|
diff, diff->deltas.length - num_rewrites, |
1098
|
6
|
100
|
|
|
|
|
FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES) && |
|
|
50
|
|
|
|
|
|
1099
|
1
|
|
|
|
|
|
!FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES_FOR_RENAMES_ONLY)); |
1100
|
|
|
|
|
|
|
|
1101
|
|
|
|
|
|
|
cleanup: |
1102
|
11
|
|
|
|
|
|
git__free(tgt2src); |
1103
|
11
|
|
|
|
|
|
git__free(src2tgt); |
1104
|
11
|
|
|
|
|
|
git__free(tgt2src_copy); |
1105
|
|
|
|
|
|
|
|
1106
|
11
|
50
|
|
|
|
|
if (sigcache) { |
1107
|
61
|
100
|
|
|
|
|
for (t = 0; t < num_deltas * 2; ++t) { |
1108
|
50
|
100
|
|
|
|
|
if (sigcache[t] != NULL) |
1109
|
11
|
|
|
|
|
|
opts.metric->free_signature(sigcache[t], opts.metric->payload); |
1110
|
|
|
|
|
|
|
} |
1111
|
11
|
|
|
|
|
|
git__free(sigcache); |
1112
|
|
|
|
|
|
|
} |
1113
|
|
|
|
|
|
|
|
1114
|
11
|
50
|
|
|
|
|
if (!given_opts || !given_opts->metric) |
|
|
50
|
|
|
|
|
|
1115
|
11
|
|
|
|
|
|
git__free(opts.metric); |
1116
|
|
|
|
|
|
|
|
1117
|
11
|
|
|
|
|
|
return error; |
1118
|
|
|
|
|
|
|
} |
1119
|
|
|
|
|
|
|
|
1120
|
|
|
|
|
|
|
#undef FLAG_SET |