line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
/* |
2
|
|
|
|
|
|
|
* Copyright (C) the libgit2 contributors. All rights reserved. |
3
|
|
|
|
|
|
|
* |
4
|
|
|
|
|
|
|
* This file is part of libgit2, distributed under the GNU GPL v2 with |
5
|
|
|
|
|
|
|
* a Linking Exception. For full terms see the included COPYING file. |
6
|
|
|
|
|
|
|
*/ |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
#include "diff_tform.h" |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
#include "git2/config.h" |
11
|
|
|
|
|
|
|
#include "git2/blob.h" |
12
|
|
|
|
|
|
|
#include "git2/sys/hashsig.h" |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
#include "diff.h" |
15
|
|
|
|
|
|
|
#include "diff_generate.h" |
16
|
|
|
|
|
|
|
#include "path.h" |
17
|
|
|
|
|
|
|
#include "futils.h" |
18
|
|
|
|
|
|
|
#include "config.h" |
19
|
|
|
|
|
|
|
|
20
|
27
|
|
|
|
|
|
git_diff_delta *git_diff__delta_dup( |
21
|
|
|
|
|
|
|
const git_diff_delta *d, git_pool *pool) |
22
|
|
|
|
|
|
|
{ |
23
|
27
|
|
|
|
|
|
git_diff_delta *delta = git__malloc(sizeof(git_diff_delta)); |
24
|
27
|
50
|
|
|
|
|
if (!delta) |
25
|
0
|
|
|
|
|
|
return NULL; |
26
|
|
|
|
|
|
|
|
27
|
27
|
|
|
|
|
|
memcpy(delta, d, sizeof(git_diff_delta)); |
28
|
27
|
|
|
|
|
|
GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags); |
29
|
|
|
|
|
|
|
|
30
|
27
|
50
|
|
|
|
|
if (d->old_file.path != NULL) { |
31
|
27
|
|
|
|
|
|
delta->old_file.path = git_pool_strdup(pool, d->old_file.path); |
32
|
27
|
50
|
|
|
|
|
if (delta->old_file.path == NULL) |
33
|
0
|
|
|
|
|
|
goto fail; |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
|
36
|
27
|
50
|
|
|
|
|
if (d->new_file.path != d->old_file.path && d->new_file.path != NULL) { |
|
|
0
|
|
|
|
|
|
37
|
0
|
|
|
|
|
|
delta->new_file.path = git_pool_strdup(pool, d->new_file.path); |
38
|
0
|
0
|
|
|
|
|
if (delta->new_file.path == NULL) |
39
|
0
|
|
|
|
|
|
goto fail; |
40
|
|
|
|
|
|
|
} else { |
41
|
27
|
|
|
|
|
|
delta->new_file.path = delta->old_file.path; |
42
|
|
|
|
|
|
|
} |
43
|
|
|
|
|
|
|
|
44
|
27
|
|
|
|
|
|
return delta; |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
fail: |
47
|
0
|
|
|
|
|
|
git__free(delta); |
48
|
0
|
|
|
|
|
|
return NULL; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
|
51
|
3
|
|
|
|
|
|
git_diff_delta *git_diff__merge_like_cgit( |
52
|
|
|
|
|
|
|
const git_diff_delta *a, |
53
|
|
|
|
|
|
|
const git_diff_delta *b, |
54
|
|
|
|
|
|
|
git_pool *pool) |
55
|
|
|
|
|
|
|
{ |
56
|
|
|
|
|
|
|
git_diff_delta *dup; |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
/* Emulate C git for merging two diffs (a la 'git diff '). |
59
|
|
|
|
|
|
|
* |
60
|
|
|
|
|
|
|
* When C git does a diff between the work dir and a tree, it actually |
61
|
|
|
|
|
|
|
* diffs with the index but uses the workdir contents. This emulates |
62
|
|
|
|
|
|
|
* those choices so we can emulate the type of diff. |
63
|
|
|
|
|
|
|
* |
64
|
|
|
|
|
|
|
* We have three file descriptions here, let's call them: |
65
|
|
|
|
|
|
|
* f1 = a->old_file |
66
|
|
|
|
|
|
|
* f2 = a->new_file AND b->old_file |
67
|
|
|
|
|
|
|
* f3 = b->new_file |
68
|
|
|
|
|
|
|
*/ |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
/* If one of the diffs is a conflict, just dup it */ |
71
|
3
|
50
|
|
|
|
|
if (b->status == GIT_DELTA_CONFLICTED) |
72
|
0
|
|
|
|
|
|
return git_diff__delta_dup(b, pool); |
73
|
3
|
50
|
|
|
|
|
if (a->status == GIT_DELTA_CONFLICTED) |
74
|
0
|
|
|
|
|
|
return git_diff__delta_dup(a, pool); |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
/* if f2 == f3 or f2 is deleted, then just dup the 'a' diff */ |
77
|
3
|
50
|
|
|
|
|
if (b->status == GIT_DELTA_UNMODIFIED || a->status == GIT_DELTA_DELETED) |
|
|
50
|
|
|
|
|
|
78
|
0
|
|
|
|
|
|
return git_diff__delta_dup(a, pool); |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
/* otherwise, base this diff on the 'b' diff */ |
81
|
3
|
50
|
|
|
|
|
if ((dup = git_diff__delta_dup(b, pool)) == NULL) |
82
|
0
|
|
|
|
|
|
return NULL; |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
/* If 'a' status is uninteresting, then we're done */ |
85
|
3
|
50
|
|
|
|
|
if (a->status == GIT_DELTA_UNMODIFIED || |
|
|
50
|
|
|
|
|
|
86
|
3
|
50
|
|
|
|
|
a->status == GIT_DELTA_UNTRACKED || |
87
|
3
|
|
|
|
|
|
a->status == GIT_DELTA_UNREADABLE) |
88
|
0
|
|
|
|
|
|
return dup; |
89
|
|
|
|
|
|
|
|
90
|
3
|
50
|
|
|
|
|
assert(b->status != GIT_DELTA_UNMODIFIED); |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
/* A cgit exception is that the diff of a file that is only in the |
93
|
|
|
|
|
|
|
* index (i.e. not in HEAD nor workdir) is given as empty. |
94
|
|
|
|
|
|
|
*/ |
95
|
3
|
50
|
|
|
|
|
if (dup->status == GIT_DELTA_DELETED) { |
96
|
0
|
0
|
|
|
|
|
if (a->status == GIT_DELTA_ADDED) { |
97
|
0
|
|
|
|
|
|
dup->status = GIT_DELTA_UNMODIFIED; |
98
|
0
|
|
|
|
|
|
dup->nfiles = 2; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
/* else don't overwrite DELETE status */ |
101
|
|
|
|
|
|
|
} else { |
102
|
3
|
|
|
|
|
|
dup->status = a->status; |
103
|
3
|
|
|
|
|
|
dup->nfiles = a->nfiles; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
3
|
|
|
|
|
|
git_oid_cpy(&dup->old_file.id, &a->old_file.id); |
107
|
3
|
|
|
|
|
|
dup->old_file.mode = a->old_file.mode; |
108
|
3
|
|
|
|
|
|
dup->old_file.size = a->old_file.size; |
109
|
3
|
|
|
|
|
|
dup->old_file.flags = a->old_file.flags; |
110
|
|
|
|
|
|
|
|
111
|
3
|
|
|
|
|
|
return dup; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
6
|
|
|
|
|
|
int git_diff__merge( |
115
|
|
|
|
|
|
|
git_diff *onto, const git_diff *from, git_diff__merge_cb cb) |
116
|
|
|
|
|
|
|
{ |
117
|
6
|
|
|
|
|
|
int error = 0; |
118
|
|
|
|
|
|
|
git_pool onto_pool; |
119
|
|
|
|
|
|
|
git_vector onto_new; |
120
|
|
|
|
|
|
|
git_diff_delta *delta; |
121
|
|
|
|
|
|
|
bool ignore_case, reversed; |
122
|
|
|
|
|
|
|
unsigned int i, j; |
123
|
|
|
|
|
|
|
|
124
|
6
|
50
|
|
|
|
|
assert(onto && from); |
|
|
50
|
|
|
|
|
|
125
|
|
|
|
|
|
|
|
126
|
6
|
100
|
|
|
|
|
if (!from->deltas.length) |
127
|
2
|
|
|
|
|
|
return 0; |
128
|
|
|
|
|
|
|
|
129
|
4
|
|
|
|
|
|
ignore_case = ((onto->opts.flags & GIT_DIFF_IGNORE_CASE) != 0); |
130
|
4
|
|
|
|
|
|
reversed = ((onto->opts.flags & GIT_DIFF_REVERSE) != 0); |
131
|
|
|
|
|
|
|
|
132
|
4
|
50
|
|
|
|
|
if (ignore_case != ((from->opts.flags & GIT_DIFF_IGNORE_CASE) != 0) || |
|
|
50
|
|
|
|
|
|
133
|
4
|
|
|
|
|
|
reversed != ((from->opts.flags & GIT_DIFF_REVERSE) != 0)) { |
134
|
0
|
|
|
|
|
|
git_error_set(GIT_ERROR_INVALID, |
135
|
|
|
|
|
|
|
"attempt to merge diffs created with conflicting options"); |
136
|
0
|
|
|
|
|
|
return -1; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
8
|
|
|
|
|
|
if (git_vector_init(&onto_new, onto->deltas.length, git_diff_delta__cmp) < 0 || |
140
|
4
|
|
|
|
|
|
git_pool_init(&onto_pool, 1) < 0) |
141
|
0
|
|
|
|
|
|
return -1; |
142
|
|
|
|
|
|
|
|
143
|
31
|
100
|
|
|
|
|
for (i = 0, j = 0; i < onto->deltas.length || j < from->deltas.length; ) { |
|
|
100
|
|
|
|
|
|
144
|
27
|
100
|
|
|
|
|
git_diff_delta *o = GIT_VECTOR_GET(&onto->deltas, i); |
145
|
27
|
50
|
|
|
|
|
const git_diff_delta *f = GIT_VECTOR_GET(&from->deltas, j); |
146
|
45
|
50
|
|
|
|
|
int cmp = !f ? -1 : !o ? 1 : |
|
|
100
|
|
|
|
|
|
147
|
18
|
50
|
|
|
|
|
STRCMP_CASESELECT(ignore_case, o->old_file.path, f->old_file.path); |
148
|
|
|
|
|
|
|
|
149
|
27
|
50
|
|
|
|
|
if (cmp < 0) { |
150
|
0
|
|
|
|
|
|
delta = git_diff__delta_dup(o, &onto_pool); |
151
|
0
|
|
|
|
|
|
i++; |
152
|
27
|
100
|
|
|
|
|
} else if (cmp > 0) { |
153
|
24
|
|
|
|
|
|
delta = git_diff__delta_dup(f, &onto_pool); |
154
|
24
|
|
|
|
|
|
j++; |
155
|
|
|
|
|
|
|
} else { |
156
|
3
|
50
|
|
|
|
|
const git_diff_delta *left = reversed ? f : o; |
157
|
3
|
50
|
|
|
|
|
const git_diff_delta *right = reversed ? o : f; |
158
|
|
|
|
|
|
|
|
159
|
3
|
|
|
|
|
|
delta = cb(left, right, &onto_pool); |
160
|
3
|
|
|
|
|
|
i++; |
161
|
3
|
|
|
|
|
|
j++; |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
/* the ignore rules for the target may not match the source |
165
|
|
|
|
|
|
|
* or the result of a merged delta could be skippable... |
166
|
|
|
|
|
|
|
*/ |
167
|
27
|
50
|
|
|
|
|
if (delta && git_diff_delta__should_skip(&onto->opts, delta)) { |
|
|
50
|
|
|
|
|
|
168
|
0
|
|
|
|
|
|
git__free(delta); |
169
|
0
|
|
|
|
|
|
continue; |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
|
172
|
27
|
50
|
|
|
|
|
if ((error = !delta ? -1 : git_vector_insert(&onto_new, delta)) < 0) |
|
|
50
|
|
|
|
|
|
173
|
0
|
|
|
|
|
|
break; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
4
|
50
|
|
|
|
|
if (!error) { |
177
|
4
|
|
|
|
|
|
git_vector_swap(&onto->deltas, &onto_new); |
178
|
4
|
|
|
|
|
|
git_pool_swap(&onto->pool, &onto_pool); |
179
|
|
|
|
|
|
|
|
180
|
4
|
50
|
|
|
|
|
if ((onto->opts.flags & GIT_DIFF_REVERSE) != 0) |
181
|
0
|
|
|
|
|
|
onto->old_src = from->old_src; |
182
|
|
|
|
|
|
|
else |
183
|
4
|
|
|
|
|
|
onto->new_src = from->new_src; |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
/* prefix strings also come from old pool, so recreate those.*/ |
186
|
4
|
|
|
|
|
|
onto->opts.old_prefix = |
187
|
4
|
|
|
|
|
|
git_pool_strdup_safe(&onto->pool, onto->opts.old_prefix); |
188
|
4
|
|
|
|
|
|
onto->opts.new_prefix = |
189
|
4
|
|
|
|
|
|
git_pool_strdup_safe(&onto->pool, onto->opts.new_prefix); |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
4
|
|
|
|
|
|
git_vector_free_deep(&onto_new); |
193
|
4
|
|
|
|
|
|
git_pool_clear(&onto_pool); |
194
|
|
|
|
|
|
|
|
195
|
6
|
|
|
|
|
|
return error; |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
|
198
|
0
|
|
|
|
|
|
int git_diff_merge(git_diff *onto, const git_diff *from) |
199
|
|
|
|
|
|
|
{ |
200
|
0
|
|
|
|
|
|
return git_diff__merge(onto, from, git_diff__merge_like_cgit); |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
5
|
|
|
|
|
|
int git_diff_find_similar__hashsig_for_file( |
204
|
|
|
|
|
|
|
void **out, const git_diff_file *f, const char *path, void *p) |
205
|
|
|
|
|
|
|
{ |
206
|
5
|
|
|
|
|
|
git_hashsig_option_t opt = (git_hashsig_option_t)(intptr_t)p; |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
GIT_UNUSED(f); |
209
|
5
|
|
|
|
|
|
return git_hashsig_create_fromfile((git_hashsig **)out, path, opt); |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
6
|
|
|
|
|
|
int git_diff_find_similar__hashsig_for_buf( |
213
|
|
|
|
|
|
|
void **out, const git_diff_file *f, const char *buf, size_t len, void *p) |
214
|
|
|
|
|
|
|
{ |
215
|
6
|
|
|
|
|
|
git_hashsig_option_t opt = (git_hashsig_option_t)(intptr_t)p; |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
GIT_UNUSED(f); |
218
|
6
|
|
|
|
|
|
return git_hashsig_create((git_hashsig **)out, buf, len, opt); |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
11
|
|
|
|
|
|
void git_diff_find_similar__hashsig_free(void *sig, void *payload) |
222
|
|
|
|
|
|
|
{ |
223
|
|
|
|
|
|
|
GIT_UNUSED(payload); |
224
|
11
|
|
|
|
|
|
git_hashsig_free(sig); |
225
|
11
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
|
227
|
7
|
|
|
|
|
|
int git_diff_find_similar__calc_similarity( |
228
|
|
|
|
|
|
|
int *score, void *siga, void *sigb, void *payload) |
229
|
|
|
|
|
|
|
{ |
230
|
|
|
|
|
|
|
int error; |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
GIT_UNUSED(payload); |
233
|
7
|
|
|
|
|
|
error = git_hashsig_compare(siga, sigb); |
234
|
7
|
50
|
|
|
|
|
if (error < 0) |
235
|
0
|
|
|
|
|
|
return error; |
236
|
|
|
|
|
|
|
|
237
|
7
|
|
|
|
|
|
*score = error; |
238
|
7
|
|
|
|
|
|
return 0; |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
#define DEFAULT_THRESHOLD 50 |
242
|
|
|
|
|
|
|
#define DEFAULT_BREAK_REWRITE_THRESHOLD 60 |
243
|
|
|
|
|
|
|
#define DEFAULT_RENAME_LIMIT 200 |
244
|
|
|
|
|
|
|
|
245
|
11
|
|
|
|
|
|
static int normalize_find_opts( |
246
|
|
|
|
|
|
|
git_diff *diff, |
247
|
|
|
|
|
|
|
git_diff_find_options *opts, |
248
|
|
|
|
|
|
|
const git_diff_find_options *given) |
249
|
|
|
|
|
|
|
{ |
250
|
11
|
|
|
|
|
|
git_config *cfg = NULL; |
251
|
|
|
|
|
|
|
git_hashsig_option_t hashsig_opts; |
252
|
|
|
|
|
|
|
|
253
|
11
|
50
|
|
|
|
|
GIT_ERROR_CHECK_VERSION(given, GIT_DIFF_FIND_OPTIONS_VERSION, "git_diff_find_options"); |
254
|
|
|
|
|
|
|
|
255
|
22
|
|
|
|
|
|
if (diff->repo != NULL && |
256
|
11
|
|
|
|
|
|
git_repository_config__weakptr(&cfg, diff->repo) < 0) |
257
|
0
|
|
|
|
|
|
return -1; |
258
|
|
|
|
|
|
|
|
259
|
11
|
50
|
|
|
|
|
if (given) |
260
|
11
|
|
|
|
|
|
memcpy(opts, given, sizeof(*opts)); |
261
|
|
|
|
|
|
|
|
262
|
11
|
50
|
|
|
|
|
if (!given || |
|
|
100
|
|
|
|
|
|
263
|
11
|
|
|
|
|
|
(given->flags & GIT_DIFF_FIND_ALL) == GIT_DIFF_FIND_BY_CONFIG) |
264
|
|
|
|
|
|
|
{ |
265
|
2
|
50
|
|
|
|
|
if (cfg) { |
266
|
2
|
|
|
|
|
|
char *rule = |
267
|
2
|
|
|
|
|
|
git_config__get_string_force(cfg, "diff.renames", "true"); |
268
|
|
|
|
|
|
|
int boolval; |
269
|
|
|
|
|
|
|
|
270
|
2
|
50
|
|
|
|
|
if (!git__parse_bool(&boolval, rule) && !boolval) |
|
|
50
|
|
|
|
|
|
271
|
|
|
|
|
|
|
/* don't set FIND_RENAMES if bool value is false */; |
272
|
2
|
50
|
|
|
|
|
else if (!strcasecmp(rule, "copies") || !strcasecmp(rule, "copy")) |
|
|
50
|
|
|
|
|
|
273
|
0
|
|
|
|
|
|
opts->flags |= GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES; |
274
|
|
|
|
|
|
|
else |
275
|
2
|
|
|
|
|
|
opts->flags |= GIT_DIFF_FIND_RENAMES; |
276
|
|
|
|
|
|
|
|
277
|
2
|
|
|
|
|
|
git__free(rule); |
278
|
|
|
|
|
|
|
} else { |
279
|
|
|
|
|
|
|
/* set default flag */ |
280
|
0
|
|
|
|
|
|
opts->flags |= GIT_DIFF_FIND_RENAMES; |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
/* some flags imply others */ |
285
|
|
|
|
|
|
|
|
286
|
11
|
50
|
|
|
|
|
if (opts->flags & GIT_DIFF_FIND_EXACT_MATCH_ONLY) { |
287
|
|
|
|
|
|
|
/* if we are only looking for exact matches, then don't turn |
288
|
|
|
|
|
|
|
* MODIFIED items into ADD/DELETE pairs because it's too picky |
289
|
|
|
|
|
|
|
*/ |
290
|
0
|
|
|
|
|
|
opts->flags &= ~(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES); |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
/* similarly, don't look for self-rewrites to split */ |
293
|
0
|
|
|
|
|
|
opts->flags &= ~GIT_DIFF_FIND_RENAMES_FROM_REWRITES; |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
11
|
50
|
|
|
|
|
if (opts->flags & GIT_DIFF_FIND_RENAMES_FROM_REWRITES) |
297
|
0
|
|
|
|
|
|
opts->flags |= GIT_DIFF_FIND_RENAMES; |
298
|
|
|
|
|
|
|
|
299
|
11
|
50
|
|
|
|
|
if (opts->flags & GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED) |
300
|
0
|
|
|
|
|
|
opts->flags |= GIT_DIFF_FIND_COPIES; |
301
|
|
|
|
|
|
|
|
302
|
11
|
100
|
|
|
|
|
if (opts->flags & GIT_DIFF_BREAK_REWRITES) |
303
|
1
|
|
|
|
|
|
opts->flags |= GIT_DIFF_FIND_REWRITES; |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
#define USE_DEFAULT(X) ((X) == 0 || (X) > 100) |
306
|
|
|
|
|
|
|
|
307
|
11
|
100
|
|
|
|
|
if (USE_DEFAULT(opts->rename_threshold)) |
|
|
50
|
|
|
|
|
|
308
|
10
|
|
|
|
|
|
opts->rename_threshold = DEFAULT_THRESHOLD; |
309
|
|
|
|
|
|
|
|
310
|
11
|
100
|
|
|
|
|
if (USE_DEFAULT(opts->rename_from_rewrite_threshold)) |
|
|
50
|
|
|
|
|
|
311
|
10
|
|
|
|
|
|
opts->rename_from_rewrite_threshold = DEFAULT_THRESHOLD; |
312
|
|
|
|
|
|
|
|
313
|
11
|
100
|
|
|
|
|
if (USE_DEFAULT(opts->copy_threshold)) |
|
|
50
|
|
|
|
|
|
314
|
10
|
|
|
|
|
|
opts->copy_threshold = DEFAULT_THRESHOLD; |
315
|
|
|
|
|
|
|
|
316
|
11
|
100
|
|
|
|
|
if (USE_DEFAULT(opts->break_rewrite_threshold)) |
|
|
50
|
|
|
|
|
|
317
|
10
|
|
|
|
|
|
opts->break_rewrite_threshold = DEFAULT_BREAK_REWRITE_THRESHOLD; |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
#undef USE_DEFAULT |
320
|
|
|
|
|
|
|
|
321
|
11
|
100
|
|
|
|
|
if (!opts->rename_limit) { |
322
|
10
|
50
|
|
|
|
|
if (cfg) { |
323
|
10
|
|
|
|
|
|
opts->rename_limit = git_config__get_int_force( |
324
|
|
|
|
|
|
|
cfg, "diff.renamelimit", DEFAULT_RENAME_LIMIT); |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
|
327
|
10
|
50
|
|
|
|
|
if (opts->rename_limit <= 0) |
328
|
0
|
|
|
|
|
|
opts->rename_limit = DEFAULT_RENAME_LIMIT; |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
/* assign the internal metric with whitespace flag as payload */ |
332
|
11
|
50
|
|
|
|
|
if (!opts->metric) { |
333
|
11
|
|
|
|
|
|
opts->metric = git__malloc(sizeof(git_diff_similarity_metric)); |
334
|
11
|
50
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC(opts->metric); |
335
|
|
|
|
|
|
|
|
336
|
11
|
|
|
|
|
|
opts->metric->file_signature = git_diff_find_similar__hashsig_for_file; |
337
|
11
|
|
|
|
|
|
opts->metric->buffer_signature = git_diff_find_similar__hashsig_for_buf; |
338
|
11
|
|
|
|
|
|
opts->metric->free_signature = git_diff_find_similar__hashsig_free; |
339
|
11
|
|
|
|
|
|
opts->metric->similarity = git_diff_find_similar__calc_similarity; |
340
|
|
|
|
|
|
|
|
341
|
11
|
100
|
|
|
|
|
if (opts->flags & GIT_DIFF_FIND_IGNORE_WHITESPACE) |
342
|
1
|
|
|
|
|
|
hashsig_opts = GIT_HASHSIG_IGNORE_WHITESPACE; |
343
|
10
|
50
|
|
|
|
|
else if (opts->flags & GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE) |
344
|
0
|
|
|
|
|
|
hashsig_opts = GIT_HASHSIG_NORMAL; |
345
|
|
|
|
|
|
|
else |
346
|
10
|
|
|
|
|
|
hashsig_opts = GIT_HASHSIG_SMART_WHITESPACE; |
347
|
11
|
|
|
|
|
|
hashsig_opts |= GIT_HASHSIG_ALLOW_SMALL_FILES; |
348
|
11
|
|
|
|
|
|
opts->metric->payload = (void *)hashsig_opts; |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
|
351
|
11
|
|
|
|
|
|
return 0; |
352
|
|
|
|
|
|
|
} |
353
|
|
|
|
|
|
|
|
354
|
0
|
|
|
|
|
|
static int insert_delete_side_of_split( |
355
|
|
|
|
|
|
|
git_diff *diff, git_vector *onto, const git_diff_delta *delta) |
356
|
|
|
|
|
|
|
{ |
357
|
|
|
|
|
|
|
/* make new record for DELETED side of split */ |
358
|
0
|
|
|
|
|
|
git_diff_delta *deleted = git_diff__delta_dup(delta, &diff->pool); |
359
|
0
|
0
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC(deleted); |
360
|
|
|
|
|
|
|
|
361
|
0
|
|
|
|
|
|
deleted->status = GIT_DELTA_DELETED; |
362
|
0
|
|
|
|
|
|
deleted->nfiles = 1; |
363
|
0
|
|
|
|
|
|
memset(&deleted->new_file, 0, sizeof(deleted->new_file)); |
364
|
0
|
|
|
|
|
|
deleted->new_file.path = deleted->old_file.path; |
365
|
0
|
|
|
|
|
|
deleted->new_file.flags |= GIT_DIFF_FLAG_VALID_ID; |
366
|
|
|
|
|
|
|
|
367
|
0
|
|
|
|
|
|
return git_vector_insert(onto, deleted); |
368
|
|
|
|
|
|
|
} |
369
|
|
|
|
|
|
|
|
370
|
6
|
|
|
|
|
|
static int apply_splits_and_deletes( |
371
|
|
|
|
|
|
|
git_diff *diff, size_t expected_size, bool actually_split) |
372
|
|
|
|
|
|
|
{ |
373
|
6
|
|
|
|
|
|
git_vector onto = GIT_VECTOR_INIT; |
374
|
|
|
|
|
|
|
size_t i; |
375
|
|
|
|
|
|
|
git_diff_delta *delta; |
376
|
|
|
|
|
|
|
|
377
|
6
|
50
|
|
|
|
|
if (git_vector_init(&onto, expected_size, git_diff_delta__cmp) < 0) |
378
|
0
|
|
|
|
|
|
return -1; |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
/* build new delta list without TO_DELETE and splitting TO_SPLIT */ |
381
|
21
|
100
|
|
|
|
|
git_vector_foreach(&diff->deltas, i, delta) { |
382
|
15
|
100
|
|
|
|
|
if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0) |
383
|
6
|
|
|
|
|
|
continue; |
384
|
|
|
|
|
|
|
|
385
|
9
|
50
|
|
|
|
|
if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0 && actually_split) { |
|
|
0
|
|
|
|
|
|
386
|
0
|
|
|
|
|
|
delta->similarity = 0; |
387
|
|
|
|
|
|
|
|
388
|
0
|
0
|
|
|
|
|
if (insert_delete_side_of_split(diff, &onto, delta) < 0) |
389
|
0
|
|
|
|
|
|
goto on_error; |
390
|
|
|
|
|
|
|
|
391
|
0
|
0
|
|
|
|
|
if (diff->new_src == GIT_ITERATOR_WORKDIR) |
392
|
0
|
|
|
|
|
|
delta->status = GIT_DELTA_UNTRACKED; |
393
|
|
|
|
|
|
|
else |
394
|
0
|
|
|
|
|
|
delta->status = GIT_DELTA_ADDED; |
395
|
0
|
|
|
|
|
|
delta->nfiles = 1; |
396
|
0
|
|
|
|
|
|
memset(&delta->old_file, 0, sizeof(delta->old_file)); |
397
|
0
|
|
|
|
|
|
delta->old_file.path = delta->new_file.path; |
398
|
0
|
|
|
|
|
|
delta->old_file.flags |= GIT_DIFF_FLAG_VALID_ID; |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
/* clean up delta before inserting into new list */ |
402
|
9
|
|
|
|
|
|
GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags); |
403
|
|
|
|
|
|
|
|
404
|
9
|
50
|
|
|
|
|
if (delta->status != GIT_DELTA_COPIED && |
|
|
100
|
|
|
|
|
|
405
|
3
|
50
|
|
|
|
|
delta->status != GIT_DELTA_RENAMED && |
406
|
0
|
0
|
|
|
|
|
(delta->status != GIT_DELTA_MODIFIED || actually_split)) |
407
|
3
|
|
|
|
|
|
delta->similarity = 0; |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
/* insert into new list */ |
410
|
9
|
50
|
|
|
|
|
if (git_vector_insert(&onto, delta) < 0) |
411
|
0
|
|
|
|
|
|
goto on_error; |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
/* cannot return an error past this point */ |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
/* free deltas from old list that didn't make it to the new one */ |
417
|
21
|
100
|
|
|
|
|
git_vector_foreach(&diff->deltas, i, delta) { |
418
|
15
|
100
|
|
|
|
|
if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0) |
419
|
6
|
|
|
|
|
|
git__free(delta); |
420
|
|
|
|
|
|
|
} |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
/* swap new delta list into place */ |
423
|
6
|
|
|
|
|
|
git_vector_swap(&diff->deltas, &onto); |
424
|
6
|
|
|
|
|
|
git_vector_free(&onto); |
425
|
6
|
|
|
|
|
|
git_vector_sort(&diff->deltas); |
426
|
|
|
|
|
|
|
|
427
|
6
|
|
|
|
|
|
return 0; |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
on_error: |
430
|
0
|
|
|
|
|
|
git_vector_free_deep(&onto); |
431
|
|
|
|
|
|
|
|
432
|
6
|
|
|
|
|
|
return -1; |
433
|
|
|
|
|
|
|
} |
434
|
|
|
|
|
|
|
|
435
|
31
|
|
|
|
|
|
GIT_INLINE(git_diff_file *) similarity_get_file(git_diff *diff, size_t idx) |
436
|
|
|
|
|
|
|
{ |
437
|
31
|
|
|
|
|
|
git_diff_delta *delta = git_vector_get(&diff->deltas, idx / 2); |
438
|
31
|
100
|
|
|
|
|
return (idx & 1) ? &delta->new_file : &delta->old_file; |
439
|
|
|
|
|
|
|
} |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
typedef struct { |
442
|
|
|
|
|
|
|
size_t idx; |
443
|
|
|
|
|
|
|
git_iterator_t src; |
444
|
|
|
|
|
|
|
git_repository *repo; |
445
|
|
|
|
|
|
|
git_diff_file *file; |
446
|
|
|
|
|
|
|
git_buf data; |
447
|
|
|
|
|
|
|
git_odb_object *odb_obj; |
448
|
|
|
|
|
|
|
git_blob *blob; |
449
|
|
|
|
|
|
|
} similarity_info; |
450
|
|
|
|
|
|
|
|
451
|
11
|
|
|
|
|
|
static int similarity_init( |
452
|
|
|
|
|
|
|
similarity_info *info, git_diff *diff, size_t file_idx) |
453
|
|
|
|
|
|
|
{ |
454
|
11
|
|
|
|
|
|
info->idx = file_idx; |
455
|
11
|
100
|
|
|
|
|
info->src = (file_idx & 1) ? diff->new_src : diff->old_src; |
456
|
11
|
|
|
|
|
|
info->repo = diff->repo; |
457
|
11
|
|
|
|
|
|
info->file = similarity_get_file(diff, file_idx); |
458
|
11
|
|
|
|
|
|
info->odb_obj = NULL; |
459
|
11
|
|
|
|
|
|
info->blob = NULL; |
460
|
11
|
|
|
|
|
|
git_buf_init(&info->data, 0); |
461
|
|
|
|
|
|
|
|
462
|
11
|
100
|
|
|
|
|
if (info->file->size > 0 || info->src == GIT_ITERATOR_WORKDIR) |
|
|
50
|
|
|
|
|
|
463
|
7
|
|
|
|
|
|
return 0; |
464
|
|
|
|
|
|
|
|
465
|
4
|
|
|
|
|
|
return git_diff_file__resolve_zero_size( |
466
|
|
|
|
|
|
|
info->file, &info->odb_obj, info->repo); |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
|
469
|
11
|
|
|
|
|
|
static int similarity_sig( |
470
|
|
|
|
|
|
|
similarity_info *info, |
471
|
|
|
|
|
|
|
const git_diff_find_options *opts, |
472
|
|
|
|
|
|
|
void **cache) |
473
|
|
|
|
|
|
|
{ |
474
|
11
|
|
|
|
|
|
int error = 0; |
475
|
11
|
|
|
|
|
|
git_diff_file *file = info->file; |
476
|
|
|
|
|
|
|
|
477
|
11
|
100
|
|
|
|
|
if (info->src == GIT_ITERATOR_WORKDIR) { |
478
|
5
|
50
|
|
|
|
|
if ((error = git_buf_joinpath( |
479
|
5
|
|
|
|
|
|
&info->data, git_repository_workdir(info->repo), file->path)) < 0) |
480
|
0
|
|
|
|
|
|
return error; |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
/* if path is not a regular file, just skip this item */ |
483
|
5
|
50
|
|
|
|
|
if (!git_path_isfile(info->data.ptr)) |
484
|
0
|
|
|
|
|
|
return 0; |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
/* TODO: apply wd-to-odb filters to file data if necessary */ |
487
|
|
|
|
|
|
|
|
488
|
5
|
|
|
|
|
|
error = opts->metric->file_signature( |
489
|
5
|
|
|
|
|
|
&cache[info->idx], info->file, |
490
|
10
|
|
|
|
|
|
info->data.ptr, opts->metric->payload); |
491
|
|
|
|
|
|
|
} else { |
492
|
|
|
|
|
|
|
/* if we didn't initially know the size, we might have an odb_obj |
493
|
|
|
|
|
|
|
* around from earlier, so convert that, otherwise load the blob now |
494
|
|
|
|
|
|
|
*/ |
495
|
6
|
50
|
|
|
|
|
if (info->odb_obj != NULL) |
496
|
0
|
|
|
|
|
|
error = git_object__from_odb_object( |
497
|
0
|
|
|
|
|
|
(git_object **)&info->blob, info->repo, |
498
|
|
|
|
|
|
|
info->odb_obj, GIT_OBJECT_BLOB); |
499
|
|
|
|
|
|
|
else |
500
|
6
|
|
|
|
|
|
error = git_blob_lookup(&info->blob, info->repo, &file->id); |
501
|
|
|
|
|
|
|
|
502
|
6
|
50
|
|
|
|
|
if (error < 0) { |
503
|
|
|
|
|
|
|
/* if lookup fails, just skip this item in similarity calc */ |
504
|
0
|
|
|
|
|
|
git_error_clear(); |
505
|
|
|
|
|
|
|
} else { |
506
|
|
|
|
|
|
|
size_t sz; |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
/* index size may not be actual blob size if filtered */ |
509
|
6
|
50
|
|
|
|
|
if (file->size != git_blob_rawsize(info->blob)) |
510
|
0
|
|
|
|
|
|
file->size = git_blob_rawsize(info->blob); |
511
|
|
|
|
|
|
|
|
512
|
6
|
50
|
|
|
|
|
sz = git__is_sizet(file->size) ? (size_t)file->size : (size_t)-1; |
513
|
|
|
|
|
|
|
|
514
|
6
|
|
|
|
|
|
error = opts->metric->buffer_signature( |
515
|
6
|
|
|
|
|
|
&cache[info->idx], info->file, |
516
|
6
|
|
|
|
|
|
git_blob_rawcontent(info->blob), sz, opts->metric->payload); |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
|
520
|
11
|
|
|
|
|
|
return error; |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
|
523
|
14
|
|
|
|
|
|
static void similarity_unload(similarity_info *info) |
524
|
|
|
|
|
|
|
{ |
525
|
14
|
50
|
|
|
|
|
if (info->odb_obj) |
526
|
0
|
|
|
|
|
|
git_odb_object_free(info->odb_obj); |
527
|
|
|
|
|
|
|
|
528
|
14
|
100
|
|
|
|
|
if (info->blob) |
529
|
6
|
|
|
|
|
|
git_blob_free(info->blob); |
530
|
|
|
|
|
|
|
else |
531
|
8
|
|
|
|
|
|
git_buf_dispose(&info->data); |
532
|
14
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
#define FLAG_SET(opts,flag_name) (((opts)->flags & flag_name) != 0) |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
/* - score < 0 means files cannot be compared |
537
|
|
|
|
|
|
|
* - score >= 100 means files are exact match |
538
|
|
|
|
|
|
|
* - score == 0 means files are completely different |
539
|
|
|
|
|
|
|
*/ |
540
|
10
|
|
|
|
|
|
static int similarity_measure( |
541
|
|
|
|
|
|
|
int *score, |
542
|
|
|
|
|
|
|
git_diff *diff, |
543
|
|
|
|
|
|
|
const git_diff_find_options *opts, |
544
|
|
|
|
|
|
|
void **cache, |
545
|
|
|
|
|
|
|
size_t a_idx, |
546
|
|
|
|
|
|
|
size_t b_idx) |
547
|
|
|
|
|
|
|
{ |
548
|
10
|
|
|
|
|
|
git_diff_file *a_file = similarity_get_file(diff, a_idx); |
549
|
10
|
|
|
|
|
|
git_diff_file *b_file = similarity_get_file(diff, b_idx); |
550
|
10
|
|
|
|
|
|
bool exact_match = FLAG_SET(opts, GIT_DIFF_FIND_EXACT_MATCH_ONLY); |
551
|
10
|
|
|
|
|
|
int error = 0; |
552
|
|
|
|
|
|
|
similarity_info a_info, b_info; |
553
|
|
|
|
|
|
|
|
554
|
10
|
|
|
|
|
|
*score = -1; |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
/* don't try to compare things that aren't files */ |
557
|
10
|
50
|
|
|
|
|
if (!GIT_MODE_ISBLOB(a_file->mode) || !GIT_MODE_ISBLOB(b_file->mode)) |
|
|
50
|
|
|
|
|
|
558
|
0
|
|
|
|
|
|
return 0; |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
/* if exact match is requested, force calculation of missing OIDs now */ |
561
|
10
|
50
|
|
|
|
|
if (exact_match) { |
562
|
0
|
0
|
|
|
|
|
if (git_oid_is_zero(&a_file->id) && |
|
|
0
|
|
|
|
|
|
563
|
0
|
0
|
|
|
|
|
diff->old_src == GIT_ITERATOR_WORKDIR && |
564
|
0
|
|
|
|
|
|
!git_diff__oid_for_file(&a_file->id, |
565
|
0
|
|
|
|
|
|
diff, a_file->path, a_file->mode, a_file->size)) |
566
|
0
|
|
|
|
|
|
a_file->flags |= GIT_DIFF_FLAG_VALID_ID; |
567
|
|
|
|
|
|
|
|
568
|
0
|
0
|
|
|
|
|
if (git_oid_is_zero(&b_file->id) && |
|
|
0
|
|
|
|
|
|
569
|
0
|
0
|
|
|
|
|
diff->new_src == GIT_ITERATOR_WORKDIR && |
570
|
0
|
|
|
|
|
|
!git_diff__oid_for_file(&b_file->id, |
571
|
0
|
|
|
|
|
|
diff, b_file->path, b_file->mode, b_file->size)) |
572
|
0
|
|
|
|
|
|
b_file->flags |= GIT_DIFF_FLAG_VALID_ID; |
573
|
|
|
|
|
|
|
} |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
/* check OID match as a quick test */ |
576
|
10
|
100
|
|
|
|
|
if (git_oid__cmp(&a_file->id, &b_file->id) == 0) { |
577
|
3
|
|
|
|
|
|
*score = 100; |
578
|
3
|
|
|
|
|
|
return 0; |
579
|
|
|
|
|
|
|
} |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
/* don't calculate signatures if we are doing exact match */ |
582
|
7
|
50
|
|
|
|
|
if (exact_match) { |
583
|
0
|
|
|
|
|
|
*score = 0; |
584
|
0
|
|
|
|
|
|
return 0; |
585
|
|
|
|
|
|
|
} |
586
|
|
|
|
|
|
|
|
587
|
7
|
|
|
|
|
|
memset(&a_info, 0, sizeof(a_info)); |
588
|
7
|
|
|
|
|
|
memset(&b_info, 0, sizeof(b_info)); |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
/* set up similarity data (will try to update missing file sizes) */ |
591
|
7
|
100
|
|
|
|
|
if (!cache[a_idx] && (error = similarity_init(&a_info, diff, a_idx)) < 0) |
|
|
50
|
|
|
|
|
|
592
|
0
|
|
|
|
|
|
return error; |
593
|
7
|
50
|
|
|
|
|
if (!cache[b_idx] && (error = similarity_init(&b_info, diff, b_idx)) < 0) |
|
|
50
|
|
|
|
|
|
594
|
0
|
|
|
|
|
|
goto cleanup; |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
/* check if file sizes are nowhere near each other */ |
597
|
7
|
50
|
|
|
|
|
if (a_file->size > 127 && |
|
|
0
|
|
|
|
|
|
598
|
0
|
0
|
|
|
|
|
b_file->size > 127 && |
599
|
0
|
0
|
|
|
|
|
(a_file->size > (b_file->size << 3) || |
600
|
0
|
|
|
|
|
|
b_file->size > (a_file->size << 3))) |
601
|
|
|
|
|
|
|
goto cleanup; |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
/* update signature cache if needed */ |
604
|
7
|
100
|
|
|
|
|
if (!cache[a_idx]) { |
605
|
4
|
50
|
|
|
|
|
if ((error = similarity_sig(&a_info, opts, cache)) < 0) |
606
|
0
|
|
|
|
|
|
goto cleanup; |
607
|
|
|
|
|
|
|
} |
608
|
7
|
50
|
|
|
|
|
if (!cache[b_idx]) { |
609
|
7
|
50
|
|
|
|
|
if ((error = similarity_sig(&b_info, opts, cache)) < 0) |
610
|
0
|
|
|
|
|
|
goto cleanup; |
611
|
|
|
|
|
|
|
} |
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
/* calculate similarity provided that the metric choose to process |
614
|
|
|
|
|
|
|
* both the a and b files (some may not if file is too big, etc). |
615
|
|
|
|
|
|
|
*/ |
616
|
7
|
50
|
|
|
|
|
if (cache[a_idx] && cache[b_idx]) |
|
|
50
|
|
|
|
|
|
617
|
7
|
|
|
|
|
|
error = opts->metric->similarity( |
618
|
21
|
|
|
|
|
|
score, cache[a_idx], cache[b_idx], opts->metric->payload); |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
cleanup: |
621
|
7
|
|
|
|
|
|
similarity_unload(&a_info); |
622
|
7
|
|
|
|
|
|
similarity_unload(&b_info); |
623
|
|
|
|
|
|
|
|
624
|
10
|
|
|
|
|
|
return error; |
625
|
|
|
|
|
|
|
} |
626
|
|
|
|
|
|
|
|
627
|
0
|
|
|
|
|
|
static int calc_self_similarity( |
628
|
|
|
|
|
|
|
git_diff *diff, |
629
|
|
|
|
|
|
|
const git_diff_find_options *opts, |
630
|
|
|
|
|
|
|
size_t delta_idx, |
631
|
|
|
|
|
|
|
void **cache) |
632
|
|
|
|
|
|
|
{ |
633
|
0
|
|
|
|
|
|
int error, similarity = -1; |
634
|
0
|
0
|
|
|
|
|
git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); |
635
|
|
|
|
|
|
|
|
636
|
0
|
0
|
|
|
|
|
if ((delta->flags & GIT_DIFF_FLAG__HAS_SELF_SIMILARITY) != 0) |
637
|
0
|
|
|
|
|
|
return 0; |
638
|
|
|
|
|
|
|
|
639
|
0
|
|
|
|
|
|
error = similarity_measure( |
640
|
0
|
|
|
|
|
|
&similarity, diff, opts, cache, 2 * delta_idx, 2 * delta_idx + 1); |
641
|
0
|
0
|
|
|
|
|
if (error < 0) |
642
|
0
|
|
|
|
|
|
return error; |
643
|
|
|
|
|
|
|
|
644
|
0
|
0
|
|
|
|
|
if (similarity >= 0) { |
645
|
0
|
|
|
|
|
|
delta->similarity = (uint16_t)similarity; |
646
|
0
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__HAS_SELF_SIMILARITY; |
647
|
|
|
|
|
|
|
} |
648
|
|
|
|
|
|
|
|
649
|
0
|
|
|
|
|
|
return 0; |
650
|
|
|
|
|
|
|
} |
651
|
|
|
|
|
|
|
|
652
|
25
|
|
|
|
|
|
static bool is_rename_target( |
653
|
|
|
|
|
|
|
git_diff *diff, |
654
|
|
|
|
|
|
|
const git_diff_find_options *opts, |
655
|
|
|
|
|
|
|
size_t delta_idx, |
656
|
|
|
|
|
|
|
void **cache) |
657
|
|
|
|
|
|
|
{ |
658
|
25
|
50
|
|
|
|
|
git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
/* skip things that aren't plain blobs */ |
661
|
25
|
100
|
|
|
|
|
if (!GIT_MODE_ISBLOB(delta->new_file.mode)) |
662
|
10
|
|
|
|
|
|
return false; |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
/* only consider ADDED, RENAMED, COPIED, and split MODIFIED as |
665
|
|
|
|
|
|
|
* targets; maybe include UNTRACKED if requested. |
666
|
|
|
|
|
|
|
*/ |
667
|
15
|
|
|
|
|
|
switch (delta->status) { |
668
|
|
|
|
|
|
|
case GIT_DELTA_UNMODIFIED: |
669
|
|
|
|
|
|
|
case GIT_DELTA_DELETED: |
670
|
|
|
|
|
|
|
case GIT_DELTA_IGNORED: |
671
|
|
|
|
|
|
|
case GIT_DELTA_CONFLICTED: |
672
|
0
|
|
|
|
|
|
return false; |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
case GIT_DELTA_MODIFIED: |
675
|
2
|
50
|
|
|
|
|
if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) && |
|
|
50
|
|
|
|
|
|
676
|
2
|
|
|
|
|
|
!FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES)) |
677
|
2
|
|
|
|
|
|
return false; |
678
|
|
|
|
|
|
|
|
679
|
0
|
0
|
|
|
|
|
if (calc_self_similarity(diff, opts, delta_idx, cache) < 0) |
680
|
0
|
|
|
|
|
|
return false; |
681
|
|
|
|
|
|
|
|
682
|
0
|
0
|
|
|
|
|
if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) && |
|
|
0
|
|
|
|
|
|
683
|
0
|
|
|
|
|
|
delta->similarity < opts->break_rewrite_threshold) { |
684
|
0
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__TO_SPLIT; |
685
|
0
|
|
|
|
|
|
break; |
686
|
|
|
|
|
|
|
} |
687
|
0
|
0
|
|
|
|
|
if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) && |
|
|
0
|
|
|
|
|
|
688
|
0
|
|
|
|
|
|
delta->similarity < opts->rename_from_rewrite_threshold) { |
689
|
0
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__TO_SPLIT; |
690
|
0
|
|
|
|
|
|
break; |
691
|
|
|
|
|
|
|
} |
692
|
|
|
|
|
|
|
|
693
|
0
|
|
|
|
|
|
return false; |
694
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
case GIT_DELTA_UNTRACKED: |
696
|
7
|
50
|
|
|
|
|
if (!FLAG_SET(opts, GIT_DIFF_FIND_FOR_UNTRACKED)) |
697
|
0
|
|
|
|
|
|
return false; |
698
|
7
|
|
|
|
|
|
break; |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
default: /* all other status values should be checked */ |
701
|
6
|
|
|
|
|
|
break; |
702
|
|
|
|
|
|
|
} |
703
|
|
|
|
|
|
|
|
704
|
13
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__IS_RENAME_TARGET; |
705
|
13
|
|
|
|
|
|
return true; |
706
|
|
|
|
|
|
|
} |
707
|
|
|
|
|
|
|
|
708
|
25
|
|
|
|
|
|
static bool is_rename_source( |
709
|
|
|
|
|
|
|
git_diff *diff, |
710
|
|
|
|
|
|
|
const git_diff_find_options *opts, |
711
|
|
|
|
|
|
|
size_t delta_idx, |
712
|
|
|
|
|
|
|
void **cache) |
713
|
|
|
|
|
|
|
{ |
714
|
25
|
50
|
|
|
|
|
git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
/* skip things that aren't blobs */ |
717
|
25
|
100
|
|
|
|
|
if (!GIT_MODE_ISBLOB(delta->old_file.mode)) |
718
|
16
|
|
|
|
|
|
return false; |
719
|
|
|
|
|
|
|
|
720
|
9
|
|
|
|
|
|
switch (delta->status) { |
721
|
|
|
|
|
|
|
case GIT_DELTA_ADDED: |
722
|
|
|
|
|
|
|
case GIT_DELTA_UNTRACKED: |
723
|
|
|
|
|
|
|
case GIT_DELTA_UNREADABLE: |
724
|
|
|
|
|
|
|
case GIT_DELTA_IGNORED: |
725
|
|
|
|
|
|
|
case GIT_DELTA_CONFLICTED: |
726
|
0
|
|
|
|
|
|
return false; |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
case GIT_DELTA_DELETED: |
729
|
|
|
|
|
|
|
case GIT_DELTA_TYPECHANGE: |
730
|
7
|
|
|
|
|
|
break; |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
case GIT_DELTA_UNMODIFIED: |
733
|
0
|
0
|
|
|
|
|
if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED)) |
734
|
0
|
|
|
|
|
|
return false; |
735
|
0
|
0
|
|
|
|
|
if (FLAG_SET(opts, GIT_DIFF_FIND_REMOVE_UNMODIFIED)) |
736
|
0
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__TO_DELETE; |
737
|
0
|
|
|
|
|
|
break; |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
default: /* MODIFIED, RENAMED, COPIED */ |
740
|
|
|
|
|
|
|
/* if we're finding copies, this could be a source */ |
741
|
2
|
50
|
|
|
|
|
if (FLAG_SET(opts, GIT_DIFF_FIND_COPIES)) |
742
|
0
|
|
|
|
|
|
break; |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
/* otherwise, this is only a source if we can split it */ |
745
|
2
|
50
|
|
|
|
|
if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) && |
|
|
50
|
|
|
|
|
|
746
|
2
|
|
|
|
|
|
!FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES)) |
747
|
2
|
|
|
|
|
|
return false; |
748
|
|
|
|
|
|
|
|
749
|
0
|
0
|
|
|
|
|
if (calc_self_similarity(diff, opts, delta_idx, cache) < 0) |
750
|
0
|
|
|
|
|
|
return false; |
751
|
|
|
|
|
|
|
|
752
|
0
|
0
|
|
|
|
|
if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) && |
|
|
0
|
|
|
|
|
|
753
|
0
|
|
|
|
|
|
delta->similarity < opts->break_rewrite_threshold) { |
754
|
0
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__TO_SPLIT; |
755
|
0
|
|
|
|
|
|
break; |
756
|
|
|
|
|
|
|
} |
757
|
|
|
|
|
|
|
|
758
|
0
|
0
|
|
|
|
|
if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) && |
|
|
0
|
|
|
|
|
|
759
|
0
|
|
|
|
|
|
delta->similarity < opts->rename_from_rewrite_threshold) |
760
|
0
|
|
|
|
|
|
break; |
761
|
|
|
|
|
|
|
|
762
|
0
|
|
|
|
|
|
return false; |
763
|
|
|
|
|
|
|
} |
764
|
|
|
|
|
|
|
|
765
|
7
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__IS_RENAME_SOURCE; |
766
|
7
|
|
|
|
|
|
return true; |
767
|
|
|
|
|
|
|
} |
768
|
|
|
|
|
|
|
|
769
|
0
|
|
|
|
|
|
GIT_INLINE(bool) delta_is_split(git_diff_delta *delta) |
770
|
|
|
|
|
|
|
{ |
771
|
0
|
0
|
|
|
|
|
return (delta->status == GIT_DELTA_TYPECHANGE || |
|
|
0
|
|
|
|
|
|
772
|
0
|
|
|
|
|
|
(delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0); |
773
|
|
|
|
|
|
|
} |
774
|
|
|
|
|
|
|
|
775
|
6
|
|
|
|
|
|
GIT_INLINE(bool) delta_is_new_only(git_diff_delta *delta) |
776
|
|
|
|
|
|
|
{ |
777
|
7
|
50
|
|
|
|
|
return (delta->status == GIT_DELTA_ADDED || |
778
|
0
|
0
|
|
|
|
|
delta->status == GIT_DELTA_UNTRACKED || |
779
|
7
|
100
|
|
|
|
|
delta->status == GIT_DELTA_UNREADABLE || |
|
|
0
|
|
|
|
|
|
780
|
0
|
|
|
|
|
|
delta->status == GIT_DELTA_IGNORED); |
781
|
|
|
|
|
|
|
} |
782
|
|
|
|
|
|
|
|
783
|
6
|
|
|
|
|
|
GIT_INLINE(void) delta_make_rename( |
784
|
|
|
|
|
|
|
git_diff_delta *to, const git_diff_delta *from, uint16_t similarity) |
785
|
|
|
|
|
|
|
{ |
786
|
6
|
|
|
|
|
|
to->status = GIT_DELTA_RENAMED; |
787
|
6
|
|
|
|
|
|
to->similarity = similarity; |
788
|
6
|
|
|
|
|
|
to->nfiles = 2; |
789
|
6
|
|
|
|
|
|
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); |
790
|
6
|
|
|
|
|
|
to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; |
791
|
6
|
|
|
|
|
|
} |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
typedef struct { |
794
|
|
|
|
|
|
|
size_t idx; |
795
|
|
|
|
|
|
|
uint16_t similarity; |
796
|
|
|
|
|
|
|
} diff_find_match; |
797
|
|
|
|
|
|
|
|
798
|
11
|
|
|
|
|
|
int git_diff_find_similar( |
799
|
|
|
|
|
|
|
git_diff *diff, |
800
|
|
|
|
|
|
|
const git_diff_find_options *given_opts) |
801
|
|
|
|
|
|
|
{ |
802
|
|
|
|
|
|
|
size_t s, t; |
803
|
11
|
|
|
|
|
|
int error = 0, result; |
804
|
|
|
|
|
|
|
uint16_t similarity; |
805
|
|
|
|
|
|
|
git_diff_delta *src, *tgt; |
806
|
11
|
|
|
|
|
|
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT; |
807
|
11
|
|
|
|
|
|
size_t num_deltas, num_srcs = 0, num_tgts = 0; |
808
|
11
|
|
|
|
|
|
size_t tried_srcs = 0, tried_tgts = 0; |
809
|
11
|
|
|
|
|
|
size_t num_rewrites = 0, num_updates = 0, num_bumped = 0; |
810
|
|
|
|
|
|
|
size_t sigcache_size; |
811
|
11
|
|
|
|
|
|
void **sigcache = NULL; /* cache of similarity metric file signatures */ |
812
|
11
|
|
|
|
|
|
diff_find_match *tgt2src = NULL; |
813
|
11
|
|
|
|
|
|
diff_find_match *src2tgt = NULL; |
814
|
11
|
|
|
|
|
|
diff_find_match *tgt2src_copy = NULL; |
815
|
|
|
|
|
|
|
diff_find_match *best_match; |
816
|
|
|
|
|
|
|
git_diff_file swap; |
817
|
|
|
|
|
|
|
|
818
|
11
|
50
|
|
|
|
|
assert(diff); |
819
|
|
|
|
|
|
|
|
820
|
11
|
50
|
|
|
|
|
if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0) |
821
|
0
|
|
|
|
|
|
return error; |
822
|
|
|
|
|
|
|
|
823
|
11
|
|
|
|
|
|
num_deltas = diff->deltas.length; |
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
/* TODO: maybe abort if deltas.length > rename_limit ??? */ |
826
|
11
|
50
|
|
|
|
|
if (!num_deltas || !git__is_uint32(num_deltas)) |
|
|
50
|
|
|
|
|
|
827
|
|
|
|
|
|
|
goto cleanup; |
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
/* No flags set; nothing to do */ |
830
|
11
|
50
|
|
|
|
|
if ((opts.flags & GIT_DIFF_FIND_ALL) == 0) |
831
|
0
|
|
|
|
|
|
goto cleanup; |
832
|
|
|
|
|
|
|
|
833
|
11
|
50
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC_MULTIPLY(&sigcache_size, num_deltas, 2); |
|
|
50
|
|
|
|
|
|
834
|
11
|
|
|
|
|
|
sigcache = git__calloc(sigcache_size, sizeof(void *)); |
835
|
11
|
50
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC(sigcache); |
836
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
/* Label rename sources and targets |
838
|
|
|
|
|
|
|
* |
839
|
|
|
|
|
|
|
* This will also set self-similarity scores for MODIFIED files and |
840
|
|
|
|
|
|
|
* mark them for splitting if break-rewrites is enabled |
841
|
|
|
|
|
|
|
*/ |
842
|
36
|
100
|
|
|
|
|
git_vector_foreach(&diff->deltas, t, tgt) { |
843
|
25
|
100
|
|
|
|
|
if (is_rename_source(diff, &opts, t, sigcache)) |
844
|
7
|
|
|
|
|
|
++num_srcs; |
845
|
|
|
|
|
|
|
|
846
|
25
|
100
|
|
|
|
|
if (is_rename_target(diff, &opts, t, sigcache)) |
847
|
13
|
|
|
|
|
|
++num_tgts; |
848
|
|
|
|
|
|
|
|
849
|
25
|
50
|
|
|
|
|
if ((tgt->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) |
850
|
0
|
|
|
|
|
|
num_rewrites++; |
851
|
|
|
|
|
|
|
} |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
/* if there are no candidate srcs or tgts, we're done */ |
854
|
11
|
100
|
|
|
|
|
if (!num_srcs || !num_tgts) |
|
|
50
|
|
|
|
|
|
855
|
|
|
|
|
|
|
goto cleanup; |
856
|
|
|
|
|
|
|
|
857
|
7
|
|
|
|
|
|
src2tgt = git__calloc(num_deltas, sizeof(diff_find_match)); |
858
|
7
|
50
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC(src2tgt); |
859
|
7
|
|
|
|
|
|
tgt2src = git__calloc(num_deltas, sizeof(diff_find_match)); |
860
|
7
|
50
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC(tgt2src); |
861
|
|
|
|
|
|
|
|
862
|
7
|
50
|
|
|
|
|
if (FLAG_SET(&opts, GIT_DIFF_FIND_COPIES)) { |
863
|
0
|
|
|
|
|
|
tgt2src_copy = git__calloc(num_deltas, sizeof(diff_find_match)); |
864
|
0
|
0
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC(tgt2src_copy); |
865
|
|
|
|
|
|
|
} |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
/* |
868
|
|
|
|
|
|
|
* Find best-fit matches for rename / copy candidates |
869
|
|
|
|
|
|
|
*/ |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
find_best_matches: |
872
|
7
|
|
|
|
|
|
tried_tgts = num_bumped = 0; |
873
|
|
|
|
|
|
|
|
874
|
17
|
50
|
|
|
|
|
git_vector_foreach(&diff->deltas, t, tgt) { |
875
|
|
|
|
|
|
|
/* skip things that are not rename targets */ |
876
|
17
|
100
|
|
|
|
|
if ((tgt->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0) |
877
|
7
|
|
|
|
|
|
continue; |
878
|
|
|
|
|
|
|
|
879
|
10
|
|
|
|
|
|
tried_srcs = 0; |
880
|
|
|
|
|
|
|
|
881
|
17
|
50
|
|
|
|
|
git_vector_foreach(&diff->deltas, s, src) { |
882
|
|
|
|
|
|
|
/* skip things that are not rename sources */ |
883
|
17
|
100
|
|
|
|
|
if ((src->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) == 0) |
884
|
7
|
|
|
|
|
|
continue; |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
/* calculate similarity for this pair and find best match */ |
887
|
10
|
50
|
|
|
|
|
if (s == t) |
888
|
0
|
|
|
|
|
|
result = -1; /* don't measure self-similarity here */ |
889
|
10
|
50
|
|
|
|
|
else if ((error = similarity_measure( |
890
|
10
|
|
|
|
|
|
&result, diff, &opts, sigcache, 2 * s, 2 * t + 1)) < 0) |
891
|
0
|
|
|
|
|
|
goto cleanup; |
892
|
|
|
|
|
|
|
|
893
|
10
|
50
|
|
|
|
|
if (result < 0) |
894
|
0
|
|
|
|
|
|
continue; |
895
|
10
|
|
|
|
|
|
similarity = (uint16_t)result; |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
/* is this a better rename? */ |
898
|
10
|
100
|
|
|
|
|
if (tgt2src[t].similarity < similarity && |
|
|
50
|
|
|
|
|
|
899
|
6
|
|
|
|
|
|
src2tgt[s].similarity < similarity) |
900
|
|
|
|
|
|
|
{ |
901
|
|
|
|
|
|
|
/* eject old mapping */ |
902
|
6
|
50
|
|
|
|
|
if (src2tgt[s].similarity > 0) { |
903
|
0
|
|
|
|
|
|
tgt2src[src2tgt[s].idx].similarity = 0; |
904
|
0
|
|
|
|
|
|
num_bumped++; |
905
|
|
|
|
|
|
|
} |
906
|
6
|
50
|
|
|
|
|
if (tgt2src[t].similarity > 0) { |
907
|
0
|
|
|
|
|
|
src2tgt[tgt2src[t].idx].similarity = 0; |
908
|
0
|
|
|
|
|
|
num_bumped++; |
909
|
|
|
|
|
|
|
} |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
/* write new mapping */ |
912
|
6
|
|
|
|
|
|
tgt2src[t].idx = s; |
913
|
6
|
|
|
|
|
|
tgt2src[t].similarity = similarity; |
914
|
6
|
|
|
|
|
|
src2tgt[s].idx = t; |
915
|
6
|
|
|
|
|
|
src2tgt[s].similarity = similarity; |
916
|
|
|
|
|
|
|
} |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
/* keep best absolute match for copies */ |
919
|
10
|
50
|
|
|
|
|
if (tgt2src_copy != NULL && |
|
|
0
|
|
|
|
|
|
920
|
0
|
|
|
|
|
|
tgt2src_copy[t].similarity < similarity) |
921
|
|
|
|
|
|
|
{ |
922
|
0
|
|
|
|
|
|
tgt2src_copy[t].idx = s; |
923
|
0
|
|
|
|
|
|
tgt2src_copy[t].similarity = similarity; |
924
|
|
|
|
|
|
|
} |
925
|
|
|
|
|
|
|
|
926
|
10
|
50
|
|
|
|
|
if (++tried_srcs >= num_srcs) |
927
|
10
|
|
|
|
|
|
break; |
928
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
/* cap on maximum targets we'll examine (per "tgt" file) */ |
930
|
0
|
0
|
|
|
|
|
if (tried_srcs > opts.rename_limit) |
931
|
0
|
|
|
|
|
|
break; |
932
|
|
|
|
|
|
|
} |
933
|
|
|
|
|
|
|
|
934
|
10
|
100
|
|
|
|
|
if (++tried_tgts >= num_tgts) |
935
|
7
|
|
|
|
|
|
break; |
936
|
|
|
|
|
|
|
} |
937
|
|
|
|
|
|
|
|
938
|
7
|
50
|
|
|
|
|
if (num_bumped > 0) /* try again if we bumped some items */ |
939
|
0
|
|
|
|
|
|
goto find_best_matches; |
940
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
/* |
942
|
|
|
|
|
|
|
* Rewrite the diffs with renames / copies |
943
|
|
|
|
|
|
|
*/ |
944
|
|
|
|
|
|
|
|
945
|
26
|
100
|
|
|
|
|
git_vector_foreach(&diff->deltas, t, tgt) { |
946
|
|
|
|
|
|
|
/* skip things that are not rename targets */ |
947
|
19
|
100
|
|
|
|
|
if ((tgt->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0) |
948
|
9
|
|
|
|
|
|
continue; |
949
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
/* check if this delta was the target of a similarity */ |
951
|
10
|
100
|
|
|
|
|
if (tgt2src[t].similarity) |
952
|
6
|
|
|
|
|
|
best_match = &tgt2src[t]; |
953
|
4
|
50
|
|
|
|
|
else if (tgt2src_copy && tgt2src_copy[t].similarity) |
|
|
0
|
|
|
|
|
|
954
|
0
|
|
|
|
|
|
best_match = &tgt2src_copy[t]; |
955
|
|
|
|
|
|
|
else |
956
|
4
|
|
|
|
|
|
continue; |
957
|
|
|
|
|
|
|
|
958
|
6
|
|
|
|
|
|
s = best_match->idx; |
959
|
6
|
50
|
|
|
|
|
src = GIT_VECTOR_GET(&diff->deltas, s); |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
/* possible scenarios: |
962
|
|
|
|
|
|
|
* 1. from DELETE to ADD/UNTRACK/IGNORE = RENAME |
963
|
|
|
|
|
|
|
* 2. from DELETE to SPLIT/TYPECHANGE = RENAME + DELETE |
964
|
|
|
|
|
|
|
* 3. from SPLIT/TYPECHANGE to ADD/UNTRACK/IGNORE = ADD + RENAME |
965
|
|
|
|
|
|
|
* 4. from SPLIT/TYPECHANGE to SPLIT/TYPECHANGE = RENAME + SPLIT |
966
|
|
|
|
|
|
|
* 5. from OTHER to ADD/UNTRACK/IGNORE = OTHER + COPY |
967
|
|
|
|
|
|
|
*/ |
968
|
|
|
|
|
|
|
|
969
|
6
|
50
|
|
|
|
|
if (src->status == GIT_DELTA_DELETED) { |
970
|
|
|
|
|
|
|
|
971
|
6
|
50
|
|
|
|
|
if (delta_is_new_only(tgt)) { |
972
|
|
|
|
|
|
|
|
973
|
6
|
50
|
|
|
|
|
if (best_match->similarity < opts.rename_threshold) |
974
|
0
|
|
|
|
|
|
continue; |
975
|
|
|
|
|
|
|
|
976
|
6
|
|
|
|
|
|
delta_make_rename(tgt, src, best_match->similarity); |
977
|
|
|
|
|
|
|
|
978
|
6
|
|
|
|
|
|
src->flags |= GIT_DIFF_FLAG__TO_DELETE; |
979
|
6
|
|
|
|
|
|
num_rewrites++; |
980
|
|
|
|
|
|
|
} else { |
981
|
0
|
0
|
|
|
|
|
assert(delta_is_split(tgt)); |
982
|
|
|
|
|
|
|
|
983
|
0
|
0
|
|
|
|
|
if (best_match->similarity < opts.rename_from_rewrite_threshold) |
984
|
0
|
|
|
|
|
|
continue; |
985
|
|
|
|
|
|
|
|
986
|
0
|
|
|
|
|
|
memcpy(&swap, &tgt->old_file, sizeof(swap)); |
987
|
|
|
|
|
|
|
|
988
|
0
|
|
|
|
|
|
delta_make_rename(tgt, src, best_match->similarity); |
989
|
0
|
|
|
|
|
|
num_rewrites--; |
990
|
|
|
|
|
|
|
|
991
|
0
|
0
|
|
|
|
|
assert(src->status == GIT_DELTA_DELETED); |
992
|
0
|
|
|
|
|
|
memcpy(&src->old_file, &swap, sizeof(src->old_file)); |
993
|
0
|
|
|
|
|
|
memset(&src->new_file, 0, sizeof(src->new_file)); |
994
|
0
|
|
|
|
|
|
src->new_file.path = src->old_file.path; |
995
|
0
|
|
|
|
|
|
src->new_file.flags |= GIT_DIFF_FLAG_VALID_ID; |
996
|
|
|
|
|
|
|
|
997
|
0
|
|
|
|
|
|
num_updates++; |
998
|
|
|
|
|
|
|
|
999
|
0
|
0
|
|
|
|
|
if (src2tgt[t].similarity > 0 && src2tgt[t].idx > t) { |
|
|
0
|
|
|
|
|
|
1000
|
|
|
|
|
|
|
/* what used to be at src t is now at src s */ |
1001
|
6
|
|
|
|
|
|
tgt2src[src2tgt[t].idx].idx = s; |
1002
|
|
|
|
|
|
|
} |
1003
|
|
|
|
|
|
|
} |
1004
|
|
|
|
|
|
|
} |
1005
|
|
|
|
|
|
|
|
1006
|
0
|
0
|
|
|
|
|
else if (delta_is_split(src)) { |
1007
|
|
|
|
|
|
|
|
1008
|
0
|
0
|
|
|
|
|
if (delta_is_new_only(tgt)) { |
1009
|
|
|
|
|
|
|
|
1010
|
0
|
0
|
|
|
|
|
if (best_match->similarity < opts.rename_threshold) |
1011
|
0
|
|
|
|
|
|
continue; |
1012
|
|
|
|
|
|
|
|
1013
|
0
|
|
|
|
|
|
delta_make_rename(tgt, src, best_match->similarity); |
1014
|
|
|
|
|
|
|
|
1015
|
0
|
0
|
|
|
|
|
src->status = (diff->new_src == GIT_ITERATOR_WORKDIR) ? |
1016
|
|
|
|
|
|
|
GIT_DELTA_UNTRACKED : GIT_DELTA_ADDED; |
1017
|
0
|
|
|
|
|
|
src->nfiles = 1; |
1018
|
0
|
|
|
|
|
|
memset(&src->old_file, 0, sizeof(src->old_file)); |
1019
|
0
|
|
|
|
|
|
src->old_file.path = src->new_file.path; |
1020
|
0
|
|
|
|
|
|
src->old_file.flags |= GIT_DIFF_FLAG_VALID_ID; |
1021
|
|
|
|
|
|
|
|
1022
|
0
|
|
|
|
|
|
src->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; |
1023
|
0
|
|
|
|
|
|
num_rewrites--; |
1024
|
|
|
|
|
|
|
|
1025
|
0
|
|
|
|
|
|
num_updates++; |
1026
|
|
|
|
|
|
|
} else { |
1027
|
0
|
0
|
|
|
|
|
assert(delta_is_split(src)); |
1028
|
|
|
|
|
|
|
|
1029
|
0
|
0
|
|
|
|
|
if (best_match->similarity < opts.rename_from_rewrite_threshold) |
1030
|
0
|
|
|
|
|
|
continue; |
1031
|
|
|
|
|
|
|
|
1032
|
0
|
|
|
|
|
|
memcpy(&swap, &tgt->old_file, sizeof(swap)); |
1033
|
|
|
|
|
|
|
|
1034
|
0
|
|
|
|
|
|
delta_make_rename(tgt, src, best_match->similarity); |
1035
|
0
|
|
|
|
|
|
num_rewrites--; |
1036
|
0
|
|
|
|
|
|
num_updates++; |
1037
|
|
|
|
|
|
|
|
1038
|
0
|
|
|
|
|
|
memcpy(&src->old_file, &swap, sizeof(src->old_file)); |
1039
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
/* if we've just swapped the new element into the correct |
1041
|
|
|
|
|
|
|
* place, clear the SPLIT flag |
1042
|
|
|
|
|
|
|
*/ |
1043
|
0
|
0
|
|
|
|
|
if (tgt2src[s].idx == t && |
|
|
0
|
|
|
|
|
|
1044
|
0
|
|
|
|
|
|
tgt2src[s].similarity > |
1045
|
0
|
|
|
|
|
|
opts.rename_from_rewrite_threshold) { |
1046
|
0
|
|
|
|
|
|
src->status = GIT_DELTA_RENAMED; |
1047
|
0
|
|
|
|
|
|
src->similarity = tgt2src[s].similarity; |
1048
|
0
|
|
|
|
|
|
tgt2src[s].similarity = 0; |
1049
|
0
|
|
|
|
|
|
src->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; |
1050
|
0
|
|
|
|
|
|
num_rewrites--; |
1051
|
|
|
|
|
|
|
} |
1052
|
|
|
|
|
|
|
/* otherwise, if we just overwrote a source, update mapping */ |
1053
|
0
|
0
|
|
|
|
|
else if (src2tgt[t].similarity > 0 && src2tgt[t].idx > t) { |
|
|
0
|
|
|
|
|
|
1054
|
|
|
|
|
|
|
/* what used to be at src t is now at src s */ |
1055
|
0
|
|
|
|
|
|
tgt2src[src2tgt[t].idx].idx = s; |
1056
|
|
|
|
|
|
|
} |
1057
|
|
|
|
|
|
|
|
1058
|
0
|
|
|
|
|
|
num_updates++; |
1059
|
|
|
|
|
|
|
} |
1060
|
|
|
|
|
|
|
} |
1061
|
|
|
|
|
|
|
|
1062
|
0
|
0
|
|
|
|
|
else if (FLAG_SET(&opts, GIT_DIFF_FIND_COPIES)) { |
1063
|
0
|
0
|
|
|
|
|
if (tgt2src_copy[t].similarity < opts.copy_threshold) |
1064
|
0
|
|
|
|
|
|
continue; |
1065
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
/* always use best possible source for copy */ |
1067
|
0
|
|
|
|
|
|
best_match = &tgt2src_copy[t]; |
1068
|
0
|
0
|
|
|
|
|
src = GIT_VECTOR_GET(&diff->deltas, best_match->idx); |
1069
|
|
|
|
|
|
|
|
1070
|
0
|
0
|
|
|
|
|
if (delta_is_split(tgt)) { |
1071
|
0
|
|
|
|
|
|
error = insert_delete_side_of_split(diff, &diff->deltas, tgt); |
1072
|
0
|
0
|
|
|
|
|
if (error < 0) |
1073
|
0
|
|
|
|
|
|
goto cleanup; |
1074
|
0
|
|
|
|
|
|
num_rewrites--; |
1075
|
|
|
|
|
|
|
} |
1076
|
|
|
|
|
|
|
|
1077
|
0
|
0
|
|
|
|
|
if (!delta_is_split(tgt) && !delta_is_new_only(tgt)) |
|
|
0
|
|
|
|
|
|
1078
|
0
|
|
|
|
|
|
continue; |
1079
|
|
|
|
|
|
|
|
1080
|
0
|
|
|
|
|
|
tgt->status = GIT_DELTA_COPIED; |
1081
|
0
|
|
|
|
|
|
tgt->similarity = best_match->similarity; |
1082
|
0
|
|
|
|
|
|
tgt->nfiles = 2; |
1083
|
0
|
|
|
|
|
|
memcpy(&tgt->old_file, &src->old_file, sizeof(tgt->old_file)); |
1084
|
0
|
|
|
|
|
|
tgt->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; |
1085
|
|
|
|
|
|
|
|
1086
|
0
|
|
|
|
|
|
num_updates++; |
1087
|
|
|
|
|
|
|
} |
1088
|
|
|
|
|
|
|
} |
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
/* |
1091
|
|
|
|
|
|
|
* Actually split and delete entries as needed |
1092
|
|
|
|
|
|
|
*/ |
1093
|
|
|
|
|
|
|
|
1094
|
7
|
100
|
|
|
|
|
if (num_rewrites > 0 || num_updates > 0) |
|
|
50
|
|
|
|
|
|
1095
|
6
|
|
|
|
|
|
error = apply_splits_and_deletes( |
1096
|
6
|
|
|
|
|
|
diff, diff->deltas.length - num_rewrites, |
1097
|
6
|
100
|
|
|
|
|
FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES) && |
|
|
50
|
|
|
|
|
|
1098
|
1
|
|
|
|
|
|
!FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES_FOR_RENAMES_ONLY)); |
1099
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
cleanup: |
1101
|
11
|
|
|
|
|
|
git__free(tgt2src); |
1102
|
11
|
|
|
|
|
|
git__free(src2tgt); |
1103
|
11
|
|
|
|
|
|
git__free(tgt2src_copy); |
1104
|
|
|
|
|
|
|
|
1105
|
11
|
50
|
|
|
|
|
if (sigcache) { |
1106
|
61
|
100
|
|
|
|
|
for (t = 0; t < num_deltas * 2; ++t) { |
1107
|
50
|
100
|
|
|
|
|
if (sigcache[t] != NULL) |
1108
|
11
|
|
|
|
|
|
opts.metric->free_signature(sigcache[t], opts.metric->payload); |
1109
|
|
|
|
|
|
|
} |
1110
|
11
|
|
|
|
|
|
git__free(sigcache); |
1111
|
|
|
|
|
|
|
} |
1112
|
|
|
|
|
|
|
|
1113
|
11
|
50
|
|
|
|
|
if (!given_opts || !given_opts->metric) |
|
|
50
|
|
|
|
|
|
1114
|
11
|
|
|
|
|
|
git__free(opts.metric); |
1115
|
|
|
|
|
|
|
|
1116
|
11
|
|
|
|
|
|
return error; |
1117
|
|
|
|
|
|
|
} |
1118
|
|
|
|
|
|
|
|
1119
|
|
|
|
|
|
|
#undef FLAG_SET |