| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
/* |
|
2
|
|
|
|
|
|
|
* Copyright (C) the libgit2 contributors. All rights reserved. |
|
3
|
|
|
|
|
|
|
* |
|
4
|
|
|
|
|
|
|
* This file is part of libgit2, distributed under the GNU GPL v2 with |
|
5
|
|
|
|
|
|
|
* a Linking Exception. For full terms see the included COPYING file. |
|
6
|
|
|
|
|
|
|
*/ |
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
#include "diff_tform.h" |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
#include "git2/config.h" |
|
11
|
|
|
|
|
|
|
#include "git2/blob.h" |
|
12
|
|
|
|
|
|
|
#include "git2/sys/hashsig.h" |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
#include "diff.h" |
|
15
|
|
|
|
|
|
|
#include "diff_generate.h" |
|
16
|
|
|
|
|
|
|
#include "fs_path.h" |
|
17
|
|
|
|
|
|
|
#include "futils.h" |
|
18
|
|
|
|
|
|
|
#include "config.h" |
|
19
|
|
|
|
|
|
|
|
|
20
|
27
|
|
|
|
|
|
git_diff_delta *git_diff__delta_dup( |
|
21
|
|
|
|
|
|
|
const git_diff_delta *d, git_pool *pool) |
|
22
|
|
|
|
|
|
|
{ |
|
23
|
27
|
|
|
|
|
|
git_diff_delta *delta = git__malloc(sizeof(git_diff_delta)); |
|
24
|
27
|
50
|
|
|
|
|
if (!delta) |
|
25
|
0
|
|
|
|
|
|
return NULL; |
|
26
|
|
|
|
|
|
|
|
|
27
|
27
|
|
|
|
|
|
memcpy(delta, d, sizeof(git_diff_delta)); |
|
28
|
27
|
|
|
|
|
|
GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags); |
|
29
|
|
|
|
|
|
|
|
|
30
|
27
|
50
|
|
|
|
|
if (d->old_file.path != NULL) { |
|
31
|
27
|
|
|
|
|
|
delta->old_file.path = git_pool_strdup(pool, d->old_file.path); |
|
32
|
27
|
50
|
|
|
|
|
if (delta->old_file.path == NULL) |
|
33
|
0
|
|
|
|
|
|
goto fail; |
|
34
|
|
|
|
|
|
|
} |
|
35
|
|
|
|
|
|
|
|
|
36
|
27
|
50
|
|
|
|
|
if (d->new_file.path != d->old_file.path && d->new_file.path != NULL) { |
|
|
|
0
|
|
|
|
|
|
|
37
|
0
|
|
|
|
|
|
delta->new_file.path = git_pool_strdup(pool, d->new_file.path); |
|
38
|
0
|
0
|
|
|
|
|
if (delta->new_file.path == NULL) |
|
39
|
0
|
|
|
|
|
|
goto fail; |
|
40
|
|
|
|
|
|
|
} else { |
|
41
|
27
|
|
|
|
|
|
delta->new_file.path = delta->old_file.path; |
|
42
|
|
|
|
|
|
|
} |
|
43
|
|
|
|
|
|
|
|
|
44
|
27
|
|
|
|
|
|
return delta; |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
fail: |
|
47
|
0
|
|
|
|
|
|
git__free(delta); |
|
48
|
0
|
|
|
|
|
|
return NULL; |
|
49
|
|
|
|
|
|
|
} |
|
50
|
|
|
|
|
|
|
|
|
51
|
3
|
|
|
|
|
|
git_diff_delta *git_diff__merge_like_cgit( |
|
52
|
|
|
|
|
|
|
const git_diff_delta *a, |
|
53
|
|
|
|
|
|
|
const git_diff_delta *b, |
|
54
|
|
|
|
|
|
|
git_pool *pool) |
|
55
|
|
|
|
|
|
|
{ |
|
56
|
|
|
|
|
|
|
git_diff_delta *dup; |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
/* Emulate C git for merging two diffs (a la 'git diff '). |
|
59
|
|
|
|
|
|
|
* |
|
60
|
|
|
|
|
|
|
* When C git does a diff between the work dir and a tree, it actually |
|
61
|
|
|
|
|
|
|
* diffs with the index but uses the workdir contents. This emulates |
|
62
|
|
|
|
|
|
|
* those choices so we can emulate the type of diff. |
|
63
|
|
|
|
|
|
|
* |
|
64
|
|
|
|
|
|
|
* We have three file descriptions here, let's call them: |
|
65
|
|
|
|
|
|
|
* f1 = a->old_file |
|
66
|
|
|
|
|
|
|
* f2 = a->new_file AND b->old_file |
|
67
|
|
|
|
|
|
|
* f3 = b->new_file |
|
68
|
|
|
|
|
|
|
*/ |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
/* If one of the diffs is a conflict, just dup it */ |
|
71
|
3
|
50
|
|
|
|
|
if (b->status == GIT_DELTA_CONFLICTED) |
|
72
|
0
|
|
|
|
|
|
return git_diff__delta_dup(b, pool); |
|
73
|
3
|
50
|
|
|
|
|
if (a->status == GIT_DELTA_CONFLICTED) |
|
74
|
0
|
|
|
|
|
|
return git_diff__delta_dup(a, pool); |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
/* if f2 == f3 or f2 is deleted, then just dup the 'a' diff */ |
|
77
|
3
|
50
|
|
|
|
|
if (b->status == GIT_DELTA_UNMODIFIED || a->status == GIT_DELTA_DELETED) |
|
|
|
50
|
|
|
|
|
|
|
78
|
0
|
|
|
|
|
|
return git_diff__delta_dup(a, pool); |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
/* otherwise, base this diff on the 'b' diff */ |
|
81
|
3
|
50
|
|
|
|
|
if ((dup = git_diff__delta_dup(b, pool)) == NULL) |
|
82
|
0
|
|
|
|
|
|
return NULL; |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
/* If 'a' status is uninteresting, then we're done */ |
|
85
|
3
|
50
|
|
|
|
|
if (a->status == GIT_DELTA_UNMODIFIED || |
|
|
|
50
|
|
|
|
|
|
|
86
|
3
|
50
|
|
|
|
|
a->status == GIT_DELTA_UNTRACKED || |
|
87
|
3
|
|
|
|
|
|
a->status == GIT_DELTA_UNREADABLE) |
|
88
|
0
|
|
|
|
|
|
return dup; |
|
89
|
|
|
|
|
|
|
|
|
90
|
3
|
50
|
|
|
|
|
GIT_ASSERT_WITH_RETVAL(b->status != GIT_DELTA_UNMODIFIED, NULL); |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
/* A cgit exception is that the diff of a file that is only in the |
|
93
|
|
|
|
|
|
|
* index (i.e. not in HEAD nor workdir) is given as empty. |
|
94
|
|
|
|
|
|
|
*/ |
|
95
|
3
|
50
|
|
|
|
|
if (dup->status == GIT_DELTA_DELETED) { |
|
96
|
0
|
0
|
|
|
|
|
if (a->status == GIT_DELTA_ADDED) { |
|
97
|
0
|
|
|
|
|
|
dup->status = GIT_DELTA_UNMODIFIED; |
|
98
|
0
|
|
|
|
|
|
dup->nfiles = 2; |
|
99
|
|
|
|
|
|
|
} |
|
100
|
|
|
|
|
|
|
/* else don't overwrite DELETE status */ |
|
101
|
|
|
|
|
|
|
} else { |
|
102
|
3
|
|
|
|
|
|
dup->status = a->status; |
|
103
|
3
|
|
|
|
|
|
dup->nfiles = a->nfiles; |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
|
|
106
|
3
|
|
|
|
|
|
git_oid_cpy(&dup->old_file.id, &a->old_file.id); |
|
107
|
3
|
|
|
|
|
|
dup->old_file.mode = a->old_file.mode; |
|
108
|
3
|
|
|
|
|
|
dup->old_file.size = a->old_file.size; |
|
109
|
3
|
|
|
|
|
|
dup->old_file.flags = a->old_file.flags; |
|
110
|
|
|
|
|
|
|
|
|
111
|
3
|
|
|
|
|
|
return dup; |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
|
|
114
|
6
|
|
|
|
|
|
int git_diff__merge( |
|
115
|
|
|
|
|
|
|
git_diff *onto, const git_diff *from, git_diff__merge_cb cb) |
|
116
|
|
|
|
|
|
|
{ |
|
117
|
6
|
|
|
|
|
|
int error = 0; |
|
118
|
|
|
|
|
|
|
git_pool onto_pool; |
|
119
|
|
|
|
|
|
|
git_vector onto_new; |
|
120
|
|
|
|
|
|
|
git_diff_delta *delta; |
|
121
|
|
|
|
|
|
|
bool ignore_case, reversed; |
|
122
|
|
|
|
|
|
|
unsigned int i, j; |
|
123
|
|
|
|
|
|
|
|
|
124
|
6
|
50
|
|
|
|
|
GIT_ASSERT_ARG(onto); |
|
125
|
6
|
50
|
|
|
|
|
GIT_ASSERT_ARG(from); |
|
126
|
|
|
|
|
|
|
|
|
127
|
6
|
100
|
|
|
|
|
if (!from->deltas.length) |
|
128
|
2
|
|
|
|
|
|
return 0; |
|
129
|
|
|
|
|
|
|
|
|
130
|
4
|
|
|
|
|
|
ignore_case = ((onto->opts.flags & GIT_DIFF_IGNORE_CASE) != 0); |
|
131
|
4
|
|
|
|
|
|
reversed = ((onto->opts.flags & GIT_DIFF_REVERSE) != 0); |
|
132
|
|
|
|
|
|
|
|
|
133
|
4
|
50
|
|
|
|
|
if (ignore_case != ((from->opts.flags & GIT_DIFF_IGNORE_CASE) != 0) || |
|
|
|
50
|
|
|
|
|
|
|
134
|
4
|
|
|
|
|
|
reversed != ((from->opts.flags & GIT_DIFF_REVERSE) != 0)) { |
|
135
|
0
|
|
|
|
|
|
git_error_set(GIT_ERROR_INVALID, |
|
136
|
|
|
|
|
|
|
"attempt to merge diffs created with conflicting options"); |
|
137
|
0
|
|
|
|
|
|
return -1; |
|
138
|
|
|
|
|
|
|
} |
|
139
|
|
|
|
|
|
|
|
|
140
|
8
|
|
|
|
|
|
if (git_vector_init(&onto_new, onto->deltas.length, git_diff_delta__cmp) < 0 || |
|
141
|
4
|
|
|
|
|
|
git_pool_init(&onto_pool, 1) < 0) |
|
142
|
0
|
|
|
|
|
|
return -1; |
|
143
|
|
|
|
|
|
|
|
|
144
|
31
|
100
|
|
|
|
|
for (i = 0, j = 0; i < onto->deltas.length || j < from->deltas.length; ) { |
|
|
|
100
|
|
|
|
|
|
|
145
|
27
|
100
|
|
|
|
|
git_diff_delta *o = GIT_VECTOR_GET(&onto->deltas, i); |
|
146
|
27
|
50
|
|
|
|
|
const git_diff_delta *f = GIT_VECTOR_GET(&from->deltas, j); |
|
147
|
45
|
50
|
|
|
|
|
int cmp = !f ? -1 : !o ? 1 : |
|
|
|
100
|
|
|
|
|
|
|
148
|
18
|
50
|
|
|
|
|
STRCMP_CASESELECT(ignore_case, o->old_file.path, f->old_file.path); |
|
149
|
|
|
|
|
|
|
|
|
150
|
27
|
50
|
|
|
|
|
if (cmp < 0) { |
|
151
|
0
|
|
|
|
|
|
delta = git_diff__delta_dup(o, &onto_pool); |
|
152
|
0
|
|
|
|
|
|
i++; |
|
153
|
27
|
100
|
|
|
|
|
} else if (cmp > 0) { |
|
154
|
24
|
|
|
|
|
|
delta = git_diff__delta_dup(f, &onto_pool); |
|
155
|
24
|
|
|
|
|
|
j++; |
|
156
|
|
|
|
|
|
|
} else { |
|
157
|
3
|
50
|
|
|
|
|
const git_diff_delta *left = reversed ? f : o; |
|
158
|
3
|
50
|
|
|
|
|
const git_diff_delta *right = reversed ? o : f; |
|
159
|
|
|
|
|
|
|
|
|
160
|
3
|
|
|
|
|
|
delta = cb(left, right, &onto_pool); |
|
161
|
3
|
|
|
|
|
|
i++; |
|
162
|
3
|
|
|
|
|
|
j++; |
|
163
|
|
|
|
|
|
|
} |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
/* the ignore rules for the target may not match the source |
|
166
|
|
|
|
|
|
|
* or the result of a merged delta could be skippable... |
|
167
|
|
|
|
|
|
|
*/ |
|
168
|
27
|
50
|
|
|
|
|
if (delta && git_diff_delta__should_skip(&onto->opts, delta)) { |
|
|
|
50
|
|
|
|
|
|
|
169
|
0
|
|
|
|
|
|
git__free(delta); |
|
170
|
0
|
|
|
|
|
|
continue; |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
|
|
173
|
27
|
50
|
|
|
|
|
if ((error = !delta ? -1 : git_vector_insert(&onto_new, delta)) < 0) |
|
|
|
50
|
|
|
|
|
|
|
174
|
0
|
|
|
|
|
|
break; |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
|
|
177
|
4
|
50
|
|
|
|
|
if (!error) { |
|
178
|
4
|
|
|
|
|
|
git_vector_swap(&onto->deltas, &onto_new); |
|
179
|
4
|
|
|
|
|
|
git_pool_swap(&onto->pool, &onto_pool); |
|
180
|
|
|
|
|
|
|
|
|
181
|
4
|
50
|
|
|
|
|
if ((onto->opts.flags & GIT_DIFF_REVERSE) != 0) |
|
182
|
0
|
|
|
|
|
|
onto->old_src = from->old_src; |
|
183
|
|
|
|
|
|
|
else |
|
184
|
4
|
|
|
|
|
|
onto->new_src = from->new_src; |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
/* prefix strings also come from old pool, so recreate those.*/ |
|
187
|
4
|
|
|
|
|
|
onto->opts.old_prefix = |
|
188
|
4
|
|
|
|
|
|
git_pool_strdup_safe(&onto->pool, onto->opts.old_prefix); |
|
189
|
4
|
|
|
|
|
|
onto->opts.new_prefix = |
|
190
|
4
|
|
|
|
|
|
git_pool_strdup_safe(&onto->pool, onto->opts.new_prefix); |
|
191
|
|
|
|
|
|
|
} |
|
192
|
|
|
|
|
|
|
|
|
193
|
4
|
|
|
|
|
|
git_vector_free_deep(&onto_new); |
|
194
|
4
|
|
|
|
|
|
git_pool_clear(&onto_pool); |
|
195
|
|
|
|
|
|
|
|
|
196
|
6
|
|
|
|
|
|
return error; |
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
|
|
199
|
0
|
|
|
|
|
|
int git_diff_merge(git_diff *onto, const git_diff *from) |
|
200
|
|
|
|
|
|
|
{ |
|
201
|
0
|
|
|
|
|
|
return git_diff__merge(onto, from, git_diff__merge_like_cgit); |
|
202
|
|
|
|
|
|
|
} |
|
203
|
|
|
|
|
|
|
|
|
204
|
5
|
|
|
|
|
|
int git_diff_find_similar__hashsig_for_file( |
|
205
|
|
|
|
|
|
|
void **out, const git_diff_file *f, const char *path, void *p) |
|
206
|
|
|
|
|
|
|
{ |
|
207
|
5
|
|
|
|
|
|
git_hashsig_option_t opt = (git_hashsig_option_t)(intptr_t)p; |
|
208
|
|
|
|
|
|
|
|
|
209
|
5
|
|
|
|
|
|
GIT_UNUSED(f); |
|
210
|
5
|
|
|
|
|
|
return git_hashsig_create_fromfile((git_hashsig **)out, path, opt); |
|
211
|
|
|
|
|
|
|
} |
|
212
|
|
|
|
|
|
|
|
|
213
|
6
|
|
|
|
|
|
int git_diff_find_similar__hashsig_for_buf( |
|
214
|
|
|
|
|
|
|
void **out, const git_diff_file *f, const char *buf, size_t len, void *p) |
|
215
|
|
|
|
|
|
|
{ |
|
216
|
6
|
|
|
|
|
|
git_hashsig_option_t opt = (git_hashsig_option_t)(intptr_t)p; |
|
217
|
|
|
|
|
|
|
|
|
218
|
6
|
|
|
|
|
|
GIT_UNUSED(f); |
|
219
|
6
|
|
|
|
|
|
return git_hashsig_create((git_hashsig **)out, buf, len, opt); |
|
220
|
|
|
|
|
|
|
} |
|
221
|
|
|
|
|
|
|
|
|
222
|
11
|
|
|
|
|
|
void git_diff_find_similar__hashsig_free(void *sig, void *payload) |
|
223
|
|
|
|
|
|
|
{ |
|
224
|
11
|
|
|
|
|
|
GIT_UNUSED(payload); |
|
225
|
11
|
|
|
|
|
|
git_hashsig_free(sig); |
|
226
|
11
|
|
|
|
|
|
} |
|
227
|
|
|
|
|
|
|
|
|
228
|
7
|
|
|
|
|
|
int git_diff_find_similar__calc_similarity( |
|
229
|
|
|
|
|
|
|
int *score, void *siga, void *sigb, void *payload) |
|
230
|
|
|
|
|
|
|
{ |
|
231
|
|
|
|
|
|
|
int error; |
|
232
|
|
|
|
|
|
|
|
|
233
|
7
|
|
|
|
|
|
GIT_UNUSED(payload); |
|
234
|
7
|
|
|
|
|
|
error = git_hashsig_compare(siga, sigb); |
|
235
|
7
|
50
|
|
|
|
|
if (error < 0) |
|
236
|
0
|
|
|
|
|
|
return error; |
|
237
|
|
|
|
|
|
|
|
|
238
|
7
|
|
|
|
|
|
*score = error; |
|
239
|
7
|
|
|
|
|
|
return 0; |
|
240
|
|
|
|
|
|
|
} |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
#define DEFAULT_THRESHOLD 50 |
|
243
|
|
|
|
|
|
|
#define DEFAULT_BREAK_REWRITE_THRESHOLD 60 |
|
244
|
|
|
|
|
|
|
#define DEFAULT_RENAME_LIMIT 1000 |
|
245
|
|
|
|
|
|
|
|
|
246
|
11
|
|
|
|
|
|
static int normalize_find_opts( |
|
247
|
|
|
|
|
|
|
git_diff *diff, |
|
248
|
|
|
|
|
|
|
git_diff_find_options *opts, |
|
249
|
|
|
|
|
|
|
const git_diff_find_options *given) |
|
250
|
|
|
|
|
|
|
{ |
|
251
|
11
|
|
|
|
|
|
git_config *cfg = NULL; |
|
252
|
|
|
|
|
|
|
git_hashsig_option_t hashsig_opts; |
|
253
|
|
|
|
|
|
|
|
|
254
|
11
|
50
|
|
|
|
|
GIT_ERROR_CHECK_VERSION(given, GIT_DIFF_FIND_OPTIONS_VERSION, "git_diff_find_options"); |
|
255
|
|
|
|
|
|
|
|
|
256
|
22
|
|
|
|
|
|
if (diff->repo != NULL && |
|
257
|
11
|
|
|
|
|
|
git_repository_config__weakptr(&cfg, diff->repo) < 0) |
|
258
|
0
|
|
|
|
|
|
return -1; |
|
259
|
|
|
|
|
|
|
|
|
260
|
11
|
50
|
|
|
|
|
if (given) |
|
261
|
11
|
|
|
|
|
|
memcpy(opts, given, sizeof(*opts)); |
|
262
|
|
|
|
|
|
|
|
|
263
|
11
|
50
|
|
|
|
|
if (!given || |
|
|
|
100
|
|
|
|
|
|
|
264
|
11
|
|
|
|
|
|
(given->flags & GIT_DIFF_FIND_ALL) == GIT_DIFF_FIND_BY_CONFIG) |
|
265
|
|
|
|
|
|
|
{ |
|
266
|
2
|
50
|
|
|
|
|
if (cfg) { |
|
267
|
2
|
|
|
|
|
|
char *rule = |
|
268
|
2
|
|
|
|
|
|
git_config__get_string_force(cfg, "diff.renames", "true"); |
|
269
|
|
|
|
|
|
|
int boolval; |
|
270
|
|
|
|
|
|
|
|
|
271
|
2
|
50
|
|
|
|
|
if (!git__parse_bool(&boolval, rule) && !boolval) |
|
|
|
50
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
/* don't set FIND_RENAMES if bool value is false */; |
|
273
|
2
|
50
|
|
|
|
|
else if (!strcasecmp(rule, "copies") || !strcasecmp(rule, "copy")) |
|
|
|
50
|
|
|
|
|
|
|
274
|
0
|
|
|
|
|
|
opts->flags |= GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES; |
|
275
|
|
|
|
|
|
|
else |
|
276
|
2
|
|
|
|
|
|
opts->flags |= GIT_DIFF_FIND_RENAMES; |
|
277
|
|
|
|
|
|
|
|
|
278
|
2
|
|
|
|
|
|
git__free(rule); |
|
279
|
|
|
|
|
|
|
} else { |
|
280
|
|
|
|
|
|
|
/* set default flag */ |
|
281
|
0
|
|
|
|
|
|
opts->flags |= GIT_DIFF_FIND_RENAMES; |
|
282
|
|
|
|
|
|
|
} |
|
283
|
|
|
|
|
|
|
} |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
/* some flags imply others */ |
|
286
|
|
|
|
|
|
|
|
|
287
|
11
|
50
|
|
|
|
|
if (opts->flags & GIT_DIFF_FIND_EXACT_MATCH_ONLY) { |
|
288
|
|
|
|
|
|
|
/* if we are only looking for exact matches, then don't turn |
|
289
|
|
|
|
|
|
|
* MODIFIED items into ADD/DELETE pairs because it's too picky |
|
290
|
|
|
|
|
|
|
*/ |
|
291
|
0
|
|
|
|
|
|
opts->flags &= ~(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES); |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
/* similarly, don't look for self-rewrites to split */ |
|
294
|
0
|
|
|
|
|
|
opts->flags &= ~GIT_DIFF_FIND_RENAMES_FROM_REWRITES; |
|
295
|
|
|
|
|
|
|
} |
|
296
|
|
|
|
|
|
|
|
|
297
|
11
|
50
|
|
|
|
|
if (opts->flags & GIT_DIFF_FIND_RENAMES_FROM_REWRITES) |
|
298
|
0
|
|
|
|
|
|
opts->flags |= GIT_DIFF_FIND_RENAMES; |
|
299
|
|
|
|
|
|
|
|
|
300
|
11
|
50
|
|
|
|
|
if (opts->flags & GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED) |
|
301
|
0
|
|
|
|
|
|
opts->flags |= GIT_DIFF_FIND_COPIES; |
|
302
|
|
|
|
|
|
|
|
|
303
|
11
|
100
|
|
|
|
|
if (opts->flags & GIT_DIFF_BREAK_REWRITES) |
|
304
|
1
|
|
|
|
|
|
opts->flags |= GIT_DIFF_FIND_REWRITES; |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
#define USE_DEFAULT(X) ((X) == 0 || (X) > 100) |
|
307
|
|
|
|
|
|
|
|
|
308
|
11
|
100
|
|
|
|
|
if (USE_DEFAULT(opts->rename_threshold)) |
|
|
|
50
|
|
|
|
|
|
|
309
|
10
|
|
|
|
|
|
opts->rename_threshold = DEFAULT_THRESHOLD; |
|
310
|
|
|
|
|
|
|
|
|
311
|
11
|
100
|
|
|
|
|
if (USE_DEFAULT(opts->rename_from_rewrite_threshold)) |
|
|
|
50
|
|
|
|
|
|
|
312
|
10
|
|
|
|
|
|
opts->rename_from_rewrite_threshold = DEFAULT_THRESHOLD; |
|
313
|
|
|
|
|
|
|
|
|
314
|
11
|
100
|
|
|
|
|
if (USE_DEFAULT(opts->copy_threshold)) |
|
|
|
50
|
|
|
|
|
|
|
315
|
10
|
|
|
|
|
|
opts->copy_threshold = DEFAULT_THRESHOLD; |
|
316
|
|
|
|
|
|
|
|
|
317
|
11
|
100
|
|
|
|
|
if (USE_DEFAULT(opts->break_rewrite_threshold)) |
|
|
|
50
|
|
|
|
|
|
|
318
|
10
|
|
|
|
|
|
opts->break_rewrite_threshold = DEFAULT_BREAK_REWRITE_THRESHOLD; |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
#undef USE_DEFAULT |
|
321
|
|
|
|
|
|
|
|
|
322
|
11
|
100
|
|
|
|
|
if (!opts->rename_limit) { |
|
323
|
10
|
50
|
|
|
|
|
if (cfg) { |
|
324
|
10
|
|
|
|
|
|
opts->rename_limit = git_config__get_int_force( |
|
325
|
|
|
|
|
|
|
cfg, "diff.renamelimit", DEFAULT_RENAME_LIMIT); |
|
326
|
|
|
|
|
|
|
} |
|
327
|
|
|
|
|
|
|
|
|
328
|
10
|
50
|
|
|
|
|
if (opts->rename_limit <= 0) |
|
329
|
0
|
|
|
|
|
|
opts->rename_limit = DEFAULT_RENAME_LIMIT; |
|
330
|
|
|
|
|
|
|
} |
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
/* assign the internal metric with whitespace flag as payload */ |
|
333
|
11
|
50
|
|
|
|
|
if (!opts->metric) { |
|
334
|
11
|
|
|
|
|
|
opts->metric = git__malloc(sizeof(git_diff_similarity_metric)); |
|
335
|
11
|
50
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC(opts->metric); |
|
336
|
|
|
|
|
|
|
|
|
337
|
11
|
|
|
|
|
|
opts->metric->file_signature = git_diff_find_similar__hashsig_for_file; |
|
338
|
11
|
|
|
|
|
|
opts->metric->buffer_signature = git_diff_find_similar__hashsig_for_buf; |
|
339
|
11
|
|
|
|
|
|
opts->metric->free_signature = git_diff_find_similar__hashsig_free; |
|
340
|
11
|
|
|
|
|
|
opts->metric->similarity = git_diff_find_similar__calc_similarity; |
|
341
|
|
|
|
|
|
|
|
|
342
|
11
|
100
|
|
|
|
|
if (opts->flags & GIT_DIFF_FIND_IGNORE_WHITESPACE) |
|
343
|
1
|
|
|
|
|
|
hashsig_opts = GIT_HASHSIG_IGNORE_WHITESPACE; |
|
344
|
10
|
50
|
|
|
|
|
else if (opts->flags & GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE) |
|
345
|
0
|
|
|
|
|
|
hashsig_opts = GIT_HASHSIG_NORMAL; |
|
346
|
|
|
|
|
|
|
else |
|
347
|
10
|
|
|
|
|
|
hashsig_opts = GIT_HASHSIG_SMART_WHITESPACE; |
|
348
|
11
|
|
|
|
|
|
hashsig_opts |= GIT_HASHSIG_ALLOW_SMALL_FILES; |
|
349
|
11
|
|
|
|
|
|
opts->metric->payload = (void *)hashsig_opts; |
|
350
|
|
|
|
|
|
|
} |
|
351
|
|
|
|
|
|
|
|
|
352
|
11
|
|
|
|
|
|
return 0; |
|
353
|
|
|
|
|
|
|
} |
|
354
|
|
|
|
|
|
|
|
|
355
|
0
|
|
|
|
|
|
static int insert_delete_side_of_split( |
|
356
|
|
|
|
|
|
|
git_diff *diff, git_vector *onto, const git_diff_delta *delta) |
|
357
|
|
|
|
|
|
|
{ |
|
358
|
|
|
|
|
|
|
/* make new record for DELETED side of split */ |
|
359
|
0
|
|
|
|
|
|
git_diff_delta *deleted = git_diff__delta_dup(delta, &diff->pool); |
|
360
|
0
|
0
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC(deleted); |
|
361
|
|
|
|
|
|
|
|
|
362
|
0
|
|
|
|
|
|
deleted->status = GIT_DELTA_DELETED; |
|
363
|
0
|
|
|
|
|
|
deleted->nfiles = 1; |
|
364
|
0
|
|
|
|
|
|
memset(&deleted->new_file, 0, sizeof(deleted->new_file)); |
|
365
|
0
|
|
|
|
|
|
deleted->new_file.path = deleted->old_file.path; |
|
366
|
0
|
|
|
|
|
|
deleted->new_file.flags |= GIT_DIFF_FLAG_VALID_ID; |
|
367
|
|
|
|
|
|
|
|
|
368
|
0
|
|
|
|
|
|
return git_vector_insert(onto, deleted); |
|
369
|
|
|
|
|
|
|
} |
|
370
|
|
|
|
|
|
|
|
|
371
|
6
|
|
|
|
|
|
static int apply_splits_and_deletes( |
|
372
|
|
|
|
|
|
|
git_diff *diff, size_t expected_size, bool actually_split) |
|
373
|
|
|
|
|
|
|
{ |
|
374
|
6
|
|
|
|
|
|
git_vector onto = GIT_VECTOR_INIT; |
|
375
|
|
|
|
|
|
|
size_t i; |
|
376
|
|
|
|
|
|
|
git_diff_delta *delta; |
|
377
|
|
|
|
|
|
|
|
|
378
|
6
|
50
|
|
|
|
|
if (git_vector_init(&onto, expected_size, diff->deltas._cmp) < 0) |
|
379
|
0
|
|
|
|
|
|
return -1; |
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
/* build new delta list without TO_DELETE and splitting TO_SPLIT */ |
|
382
|
21
|
100
|
|
|
|
|
git_vector_foreach(&diff->deltas, i, delta) { |
|
383
|
15
|
100
|
|
|
|
|
if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0) |
|
384
|
6
|
|
|
|
|
|
continue; |
|
385
|
|
|
|
|
|
|
|
|
386
|
9
|
50
|
|
|
|
|
if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0 && actually_split) { |
|
|
|
0
|
|
|
|
|
|
|
387
|
0
|
|
|
|
|
|
delta->similarity = 0; |
|
388
|
|
|
|
|
|
|
|
|
389
|
0
|
0
|
|
|
|
|
if (insert_delete_side_of_split(diff, &onto, delta) < 0) |
|
390
|
0
|
|
|
|
|
|
goto on_error; |
|
391
|
|
|
|
|
|
|
|
|
392
|
0
|
0
|
|
|
|
|
if (diff->new_src == GIT_ITERATOR_WORKDIR) |
|
393
|
0
|
|
|
|
|
|
delta->status = GIT_DELTA_UNTRACKED; |
|
394
|
|
|
|
|
|
|
else |
|
395
|
0
|
|
|
|
|
|
delta->status = GIT_DELTA_ADDED; |
|
396
|
0
|
|
|
|
|
|
delta->nfiles = 1; |
|
397
|
0
|
|
|
|
|
|
memset(&delta->old_file, 0, sizeof(delta->old_file)); |
|
398
|
0
|
|
|
|
|
|
delta->old_file.path = delta->new_file.path; |
|
399
|
0
|
|
|
|
|
|
delta->old_file.flags |= GIT_DIFF_FLAG_VALID_ID; |
|
400
|
|
|
|
|
|
|
} |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
/* clean up delta before inserting into new list */ |
|
403
|
9
|
|
|
|
|
|
GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags); |
|
404
|
|
|
|
|
|
|
|
|
405
|
9
|
50
|
|
|
|
|
if (delta->status != GIT_DELTA_COPIED && |
|
|
|
100
|
|
|
|
|
|
|
406
|
3
|
50
|
|
|
|
|
delta->status != GIT_DELTA_RENAMED && |
|
407
|
0
|
0
|
|
|
|
|
(delta->status != GIT_DELTA_MODIFIED || actually_split)) |
|
408
|
3
|
|
|
|
|
|
delta->similarity = 0; |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
/* insert into new list */ |
|
411
|
9
|
50
|
|
|
|
|
if (git_vector_insert(&onto, delta) < 0) |
|
412
|
0
|
|
|
|
|
|
goto on_error; |
|
413
|
|
|
|
|
|
|
} |
|
414
|
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
/* cannot return an error past this point */ |
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
/* free deltas from old list that didn't make it to the new one */ |
|
418
|
21
|
100
|
|
|
|
|
git_vector_foreach(&diff->deltas, i, delta) { |
|
419
|
15
|
100
|
|
|
|
|
if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0) |
|
420
|
6
|
|
|
|
|
|
git__free(delta); |
|
421
|
|
|
|
|
|
|
} |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
/* swap new delta list into place */ |
|
424
|
6
|
|
|
|
|
|
git_vector_swap(&diff->deltas, &onto); |
|
425
|
6
|
|
|
|
|
|
git_vector_free(&onto); |
|
426
|
6
|
|
|
|
|
|
git_vector_sort(&diff->deltas); |
|
427
|
|
|
|
|
|
|
|
|
428
|
6
|
|
|
|
|
|
return 0; |
|
429
|
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
on_error: |
|
431
|
0
|
|
|
|
|
|
git_vector_free_deep(&onto); |
|
432
|
|
|
|
|
|
|
|
|
433
|
6
|
|
|
|
|
|
return -1; |
|
434
|
|
|
|
|
|
|
} |
|
435
|
|
|
|
|
|
|
|
|
436
|
31
|
|
|
|
|
|
GIT_INLINE(git_diff_file *) similarity_get_file(git_diff *diff, size_t idx) |
|
437
|
|
|
|
|
|
|
{ |
|
438
|
31
|
|
|
|
|
|
git_diff_delta *delta = git_vector_get(&diff->deltas, idx / 2); |
|
439
|
31
|
100
|
|
|
|
|
return (idx & 1) ? &delta->new_file : &delta->old_file; |
|
440
|
|
|
|
|
|
|
} |
|
441
|
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
typedef struct { |
|
443
|
|
|
|
|
|
|
size_t idx; |
|
444
|
|
|
|
|
|
|
git_iterator_t src; |
|
445
|
|
|
|
|
|
|
git_repository *repo; |
|
446
|
|
|
|
|
|
|
git_diff_file *file; |
|
447
|
|
|
|
|
|
|
git_str data; |
|
448
|
|
|
|
|
|
|
git_odb_object *odb_obj; |
|
449
|
|
|
|
|
|
|
git_blob *blob; |
|
450
|
|
|
|
|
|
|
} similarity_info; |
|
451
|
|
|
|
|
|
|
|
|
452
|
11
|
|
|
|
|
|
static int similarity_init( |
|
453
|
|
|
|
|
|
|
similarity_info *info, git_diff *diff, size_t file_idx) |
|
454
|
|
|
|
|
|
|
{ |
|
455
|
11
|
|
|
|
|
|
info->idx = file_idx; |
|
456
|
11
|
100
|
|
|
|
|
info->src = (file_idx & 1) ? diff->new_src : diff->old_src; |
|
457
|
11
|
|
|
|
|
|
info->repo = diff->repo; |
|
458
|
11
|
|
|
|
|
|
info->file = similarity_get_file(diff, file_idx); |
|
459
|
11
|
|
|
|
|
|
info->odb_obj = NULL; |
|
460
|
11
|
|
|
|
|
|
info->blob = NULL; |
|
461
|
11
|
|
|
|
|
|
git_str_init(&info->data, 0); |
|
462
|
|
|
|
|
|
|
|
|
463
|
11
|
100
|
|
|
|
|
if ((info->file->flags & GIT_DIFF_FLAG_VALID_SIZE) || |
|
|
|
50
|
|
|
|
|
|
|
464
|
4
|
|
|
|
|
|
info->src == GIT_ITERATOR_WORKDIR) |
|
465
|
7
|
|
|
|
|
|
return 0; |
|
466
|
|
|
|
|
|
|
|
|
467
|
4
|
|
|
|
|
|
return git_diff_file__resolve_zero_size( |
|
468
|
|
|
|
|
|
|
info->file, &info->odb_obj, info->repo); |
|
469
|
|
|
|
|
|
|
} |
|
470
|
|
|
|
|
|
|
|
|
471
|
11
|
|
|
|
|
|
static int similarity_sig( |
|
472
|
|
|
|
|
|
|
similarity_info *info, |
|
473
|
|
|
|
|
|
|
const git_diff_find_options *opts, |
|
474
|
|
|
|
|
|
|
void **cache) |
|
475
|
|
|
|
|
|
|
{ |
|
476
|
11
|
|
|
|
|
|
int error = 0; |
|
477
|
11
|
|
|
|
|
|
git_diff_file *file = info->file; |
|
478
|
|
|
|
|
|
|
|
|
479
|
11
|
100
|
|
|
|
|
if (info->src == GIT_ITERATOR_WORKDIR) { |
|
480
|
5
|
50
|
|
|
|
|
if ((error = git_repository_workdir_path( |
|
481
|
|
|
|
|
|
|
&info->data, info->repo, file->path)) < 0) |
|
482
|
0
|
|
|
|
|
|
return error; |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
/* if path is not a regular file, just skip this item */ |
|
485
|
5
|
50
|
|
|
|
|
if (!git_fs_path_isfile(info->data.ptr)) |
|
486
|
0
|
|
|
|
|
|
return 0; |
|
487
|
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
/* TODO: apply wd-to-odb filters to file data if necessary */ |
|
489
|
|
|
|
|
|
|
|
|
490
|
5
|
|
|
|
|
|
error = opts->metric->file_signature( |
|
491
|
5
|
|
|
|
|
|
&cache[info->idx], info->file, |
|
492
|
10
|
|
|
|
|
|
info->data.ptr, opts->metric->payload); |
|
493
|
|
|
|
|
|
|
} else { |
|
494
|
|
|
|
|
|
|
/* if we didn't initially know the size, we might have an odb_obj |
|
495
|
|
|
|
|
|
|
* around from earlier, so convert that, otherwise load the blob now |
|
496
|
|
|
|
|
|
|
*/ |
|
497
|
6
|
50
|
|
|
|
|
if (info->odb_obj != NULL) |
|
498
|
0
|
|
|
|
|
|
error = git_object__from_odb_object( |
|
499
|
0
|
|
|
|
|
|
(git_object **)&info->blob, info->repo, |
|
500
|
|
|
|
|
|
|
info->odb_obj, GIT_OBJECT_BLOB); |
|
501
|
|
|
|
|
|
|
else |
|
502
|
6
|
|
|
|
|
|
error = git_blob_lookup(&info->blob, info->repo, &file->id); |
|
503
|
|
|
|
|
|
|
|
|
504
|
6
|
50
|
|
|
|
|
if (error < 0) { |
|
505
|
|
|
|
|
|
|
/* if lookup fails, just skip this item in similarity calc */ |
|
506
|
0
|
|
|
|
|
|
git_error_clear(); |
|
507
|
|
|
|
|
|
|
} else { |
|
508
|
|
|
|
|
|
|
size_t sz; |
|
509
|
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
/* index size may not be actual blob size if filtered */ |
|
511
|
6
|
50
|
|
|
|
|
if (file->size != git_blob_rawsize(info->blob)) |
|
512
|
0
|
|
|
|
|
|
file->size = git_blob_rawsize(info->blob); |
|
513
|
|
|
|
|
|
|
|
|
514
|
6
|
50
|
|
|
|
|
sz = git__is_sizet(file->size) ? (size_t)file->size : (size_t)-1; |
|
515
|
|
|
|
|
|
|
|
|
516
|
6
|
|
|
|
|
|
error = opts->metric->buffer_signature( |
|
517
|
6
|
|
|
|
|
|
&cache[info->idx], info->file, |
|
518
|
6
|
|
|
|
|
|
git_blob_rawcontent(info->blob), sz, opts->metric->payload); |
|
519
|
|
|
|
|
|
|
} |
|
520
|
|
|
|
|
|
|
} |
|
521
|
|
|
|
|
|
|
|
|
522
|
11
|
|
|
|
|
|
return error; |
|
523
|
|
|
|
|
|
|
} |
|
524
|
|
|
|
|
|
|
|
|
525
|
14
|
|
|
|
|
|
static void similarity_unload(similarity_info *info) |
|
526
|
|
|
|
|
|
|
{ |
|
527
|
14
|
50
|
|
|
|
|
if (info->odb_obj) |
|
528
|
0
|
|
|
|
|
|
git_odb_object_free(info->odb_obj); |
|
529
|
|
|
|
|
|
|
|
|
530
|
14
|
100
|
|
|
|
|
if (info->blob) |
|
531
|
6
|
|
|
|
|
|
git_blob_free(info->blob); |
|
532
|
|
|
|
|
|
|
else |
|
533
|
8
|
|
|
|
|
|
git_str_dispose(&info->data); |
|
534
|
14
|
|
|
|
|
|
} |
|
535
|
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
#define FLAG_SET(opts,flag_name) (((opts)->flags & flag_name) != 0) |
|
537
|
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
/* - score < 0 means files cannot be compared |
|
539
|
|
|
|
|
|
|
* - score >= 100 means files are exact match |
|
540
|
|
|
|
|
|
|
* - score == 0 means files are completely different |
|
541
|
|
|
|
|
|
|
*/ |
|
542
|
10
|
|
|
|
|
|
static int similarity_measure( |
|
543
|
|
|
|
|
|
|
int *score, |
|
544
|
|
|
|
|
|
|
git_diff *diff, |
|
545
|
|
|
|
|
|
|
const git_diff_find_options *opts, |
|
546
|
|
|
|
|
|
|
void **cache, |
|
547
|
|
|
|
|
|
|
size_t a_idx, |
|
548
|
|
|
|
|
|
|
size_t b_idx) |
|
549
|
|
|
|
|
|
|
{ |
|
550
|
10
|
|
|
|
|
|
git_diff_file *a_file = similarity_get_file(diff, a_idx); |
|
551
|
10
|
|
|
|
|
|
git_diff_file *b_file = similarity_get_file(diff, b_idx); |
|
552
|
10
|
|
|
|
|
|
bool exact_match = FLAG_SET(opts, GIT_DIFF_FIND_EXACT_MATCH_ONLY); |
|
553
|
10
|
|
|
|
|
|
int error = 0; |
|
554
|
|
|
|
|
|
|
similarity_info a_info, b_info; |
|
555
|
|
|
|
|
|
|
|
|
556
|
10
|
|
|
|
|
|
*score = -1; |
|
557
|
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
/* don't try to compare things that aren't files */ |
|
559
|
10
|
50
|
|
|
|
|
if (!GIT_MODE_ISBLOB(a_file->mode) || !GIT_MODE_ISBLOB(b_file->mode)) |
|
|
|
50
|
|
|
|
|
|
|
560
|
0
|
|
|
|
|
|
return 0; |
|
561
|
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
/* if exact match is requested, force calculation of missing OIDs now */ |
|
563
|
10
|
50
|
|
|
|
|
if (exact_match) { |
|
564
|
0
|
0
|
|
|
|
|
if (git_oid_is_zero(&a_file->id) && |
|
|
|
0
|
|
|
|
|
|
|
565
|
0
|
0
|
|
|
|
|
diff->old_src == GIT_ITERATOR_WORKDIR && |
|
566
|
0
|
|
|
|
|
|
!git_diff__oid_for_file(&a_file->id, |
|
567
|
0
|
|
|
|
|
|
diff, a_file->path, a_file->mode, a_file->size)) |
|
568
|
0
|
|
|
|
|
|
a_file->flags |= GIT_DIFF_FLAG_VALID_ID; |
|
569
|
|
|
|
|
|
|
|
|
570
|
0
|
0
|
|
|
|
|
if (git_oid_is_zero(&b_file->id) && |
|
|
|
0
|
|
|
|
|
|
|
571
|
0
|
0
|
|
|
|
|
diff->new_src == GIT_ITERATOR_WORKDIR && |
|
572
|
0
|
|
|
|
|
|
!git_diff__oid_for_file(&b_file->id, |
|
573
|
0
|
|
|
|
|
|
diff, b_file->path, b_file->mode, b_file->size)) |
|
574
|
0
|
|
|
|
|
|
b_file->flags |= GIT_DIFF_FLAG_VALID_ID; |
|
575
|
|
|
|
|
|
|
} |
|
576
|
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
/* check OID match as a quick test */ |
|
578
|
10
|
100
|
|
|
|
|
if (git_oid__cmp(&a_file->id, &b_file->id) == 0) { |
|
579
|
3
|
|
|
|
|
|
*score = 100; |
|
580
|
3
|
|
|
|
|
|
return 0; |
|
581
|
|
|
|
|
|
|
} |
|
582
|
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
/* don't calculate signatures if we are doing exact match */ |
|
584
|
7
|
50
|
|
|
|
|
if (exact_match) { |
|
585
|
0
|
|
|
|
|
|
*score = 0; |
|
586
|
0
|
|
|
|
|
|
return 0; |
|
587
|
|
|
|
|
|
|
} |
|
588
|
|
|
|
|
|
|
|
|
589
|
7
|
|
|
|
|
|
memset(&a_info, 0, sizeof(a_info)); |
|
590
|
7
|
|
|
|
|
|
memset(&b_info, 0, sizeof(b_info)); |
|
591
|
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
/* set up similarity data (will try to update missing file sizes) */ |
|
593
|
7
|
100
|
|
|
|
|
if (!cache[a_idx] && (error = similarity_init(&a_info, diff, a_idx)) < 0) |
|
|
|
50
|
|
|
|
|
|
|
594
|
0
|
|
|
|
|
|
return error; |
|
595
|
7
|
50
|
|
|
|
|
if (!cache[b_idx] && (error = similarity_init(&b_info, diff, b_idx)) < 0) |
|
|
|
50
|
|
|
|
|
|
|
596
|
0
|
|
|
|
|
|
goto cleanup; |
|
597
|
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
/* check if file sizes are nowhere near each other */ |
|
599
|
7
|
50
|
|
|
|
|
if (a_file->size > 127 && |
|
|
|
0
|
|
|
|
|
|
|
600
|
0
|
0
|
|
|
|
|
b_file->size > 127 && |
|
601
|
0
|
0
|
|
|
|
|
(a_file->size > (b_file->size << 3) || |
|
602
|
0
|
|
|
|
|
|
b_file->size > (a_file->size << 3))) |
|
603
|
|
|
|
|
|
|
goto cleanup; |
|
604
|
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
/* update signature cache if needed */ |
|
606
|
7
|
100
|
|
|
|
|
if (!cache[a_idx]) { |
|
607
|
4
|
50
|
|
|
|
|
if ((error = similarity_sig(&a_info, opts, cache)) < 0) |
|
608
|
0
|
|
|
|
|
|
goto cleanup; |
|
609
|
|
|
|
|
|
|
} |
|
610
|
7
|
50
|
|
|
|
|
if (!cache[b_idx]) { |
|
611
|
7
|
50
|
|
|
|
|
if ((error = similarity_sig(&b_info, opts, cache)) < 0) |
|
612
|
0
|
|
|
|
|
|
goto cleanup; |
|
613
|
|
|
|
|
|
|
} |
|
614
|
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
/* calculate similarity provided that the metric choose to process |
|
616
|
|
|
|
|
|
|
* both the a and b files (some may not if file is too big, etc). |
|
617
|
|
|
|
|
|
|
*/ |
|
618
|
7
|
50
|
|
|
|
|
if (cache[a_idx] && cache[b_idx]) |
|
|
|
50
|
|
|
|
|
|
|
619
|
7
|
|
|
|
|
|
error = opts->metric->similarity( |
|
620
|
21
|
|
|
|
|
|
score, cache[a_idx], cache[b_idx], opts->metric->payload); |
|
621
|
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
cleanup: |
|
623
|
7
|
|
|
|
|
|
similarity_unload(&a_info); |
|
624
|
7
|
|
|
|
|
|
similarity_unload(&b_info); |
|
625
|
|
|
|
|
|
|
|
|
626
|
10
|
|
|
|
|
|
return error; |
|
627
|
|
|
|
|
|
|
} |
|
628
|
|
|
|
|
|
|
|
|
629
|
0
|
|
|
|
|
|
static int calc_self_similarity( |
|
630
|
|
|
|
|
|
|
git_diff *diff, |
|
631
|
|
|
|
|
|
|
const git_diff_find_options *opts, |
|
632
|
|
|
|
|
|
|
size_t delta_idx, |
|
633
|
|
|
|
|
|
|
void **cache) |
|
634
|
|
|
|
|
|
|
{ |
|
635
|
0
|
|
|
|
|
|
int error, similarity = -1; |
|
636
|
0
|
0
|
|
|
|
|
git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); |
|
637
|
|
|
|
|
|
|
|
|
638
|
0
|
0
|
|
|
|
|
if ((delta->flags & GIT_DIFF_FLAG__HAS_SELF_SIMILARITY) != 0) |
|
639
|
0
|
|
|
|
|
|
return 0; |
|
640
|
|
|
|
|
|
|
|
|
641
|
0
|
|
|
|
|
|
error = similarity_measure( |
|
642
|
0
|
|
|
|
|
|
&similarity, diff, opts, cache, 2 * delta_idx, 2 * delta_idx + 1); |
|
643
|
0
|
0
|
|
|
|
|
if (error < 0) |
|
644
|
0
|
|
|
|
|
|
return error; |
|
645
|
|
|
|
|
|
|
|
|
646
|
0
|
0
|
|
|
|
|
if (similarity >= 0) { |
|
647
|
0
|
|
|
|
|
|
delta->similarity = (uint16_t)similarity; |
|
648
|
0
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__HAS_SELF_SIMILARITY; |
|
649
|
|
|
|
|
|
|
} |
|
650
|
|
|
|
|
|
|
|
|
651
|
0
|
|
|
|
|
|
return 0; |
|
652
|
|
|
|
|
|
|
} |
|
653
|
|
|
|
|
|
|
|
|
654
|
25
|
|
|
|
|
|
static bool is_rename_target( |
|
655
|
|
|
|
|
|
|
git_diff *diff, |
|
656
|
|
|
|
|
|
|
const git_diff_find_options *opts, |
|
657
|
|
|
|
|
|
|
size_t delta_idx, |
|
658
|
|
|
|
|
|
|
void **cache) |
|
659
|
|
|
|
|
|
|
{ |
|
660
|
25
|
50
|
|
|
|
|
git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); |
|
661
|
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
/* skip things that aren't plain blobs */ |
|
663
|
25
|
100
|
|
|
|
|
if (!GIT_MODE_ISBLOB(delta->new_file.mode)) |
|
664
|
10
|
|
|
|
|
|
return false; |
|
665
|
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
/* only consider ADDED, RENAMED, COPIED, and split MODIFIED as |
|
667
|
|
|
|
|
|
|
* targets; maybe include UNTRACKED if requested. |
|
668
|
|
|
|
|
|
|
*/ |
|
669
|
15
|
|
|
|
|
|
switch (delta->status) { |
|
670
|
|
|
|
|
|
|
case GIT_DELTA_UNMODIFIED: |
|
671
|
|
|
|
|
|
|
case GIT_DELTA_DELETED: |
|
672
|
|
|
|
|
|
|
case GIT_DELTA_IGNORED: |
|
673
|
|
|
|
|
|
|
case GIT_DELTA_CONFLICTED: |
|
674
|
0
|
|
|
|
|
|
return false; |
|
675
|
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
case GIT_DELTA_MODIFIED: |
|
677
|
2
|
50
|
|
|
|
|
if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) && |
|
|
|
50
|
|
|
|
|
|
|
678
|
2
|
|
|
|
|
|
!FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES)) |
|
679
|
2
|
|
|
|
|
|
return false; |
|
680
|
|
|
|
|
|
|
|
|
681
|
0
|
0
|
|
|
|
|
if (calc_self_similarity(diff, opts, delta_idx, cache) < 0) |
|
682
|
0
|
|
|
|
|
|
return false; |
|
683
|
|
|
|
|
|
|
|
|
684
|
0
|
0
|
|
|
|
|
if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) && |
|
|
|
0
|
|
|
|
|
|
|
685
|
0
|
|
|
|
|
|
delta->similarity < opts->break_rewrite_threshold) { |
|
686
|
0
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__TO_SPLIT; |
|
687
|
0
|
|
|
|
|
|
break; |
|
688
|
|
|
|
|
|
|
} |
|
689
|
0
|
0
|
|
|
|
|
if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) && |
|
|
|
0
|
|
|
|
|
|
|
690
|
0
|
|
|
|
|
|
delta->similarity < opts->rename_from_rewrite_threshold) { |
|
691
|
0
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__TO_SPLIT; |
|
692
|
0
|
|
|
|
|
|
break; |
|
693
|
|
|
|
|
|
|
} |
|
694
|
|
|
|
|
|
|
|
|
695
|
0
|
|
|
|
|
|
return false; |
|
696
|
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
case GIT_DELTA_UNTRACKED: |
|
698
|
7
|
50
|
|
|
|
|
if (!FLAG_SET(opts, GIT_DIFF_FIND_FOR_UNTRACKED)) |
|
699
|
0
|
|
|
|
|
|
return false; |
|
700
|
7
|
|
|
|
|
|
break; |
|
701
|
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
default: /* all other status values should be checked */ |
|
703
|
6
|
|
|
|
|
|
break; |
|
704
|
|
|
|
|
|
|
} |
|
705
|
|
|
|
|
|
|
|
|
706
|
13
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__IS_RENAME_TARGET; |
|
707
|
13
|
|
|
|
|
|
return true; |
|
708
|
|
|
|
|
|
|
} |
|
709
|
|
|
|
|
|
|
|
|
710
|
25
|
|
|
|
|
|
static bool is_rename_source( |
|
711
|
|
|
|
|
|
|
git_diff *diff, |
|
712
|
|
|
|
|
|
|
const git_diff_find_options *opts, |
|
713
|
|
|
|
|
|
|
size_t delta_idx, |
|
714
|
|
|
|
|
|
|
void **cache) |
|
715
|
|
|
|
|
|
|
{ |
|
716
|
25
|
50
|
|
|
|
|
git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); |
|
717
|
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
/* skip things that aren't blobs */ |
|
719
|
25
|
100
|
|
|
|
|
if (!GIT_MODE_ISBLOB(delta->old_file.mode)) |
|
720
|
16
|
|
|
|
|
|
return false; |
|
721
|
|
|
|
|
|
|
|
|
722
|
9
|
|
|
|
|
|
switch (delta->status) { |
|
723
|
|
|
|
|
|
|
case GIT_DELTA_ADDED: |
|
724
|
|
|
|
|
|
|
case GIT_DELTA_UNTRACKED: |
|
725
|
|
|
|
|
|
|
case GIT_DELTA_UNREADABLE: |
|
726
|
|
|
|
|
|
|
case GIT_DELTA_IGNORED: |
|
727
|
|
|
|
|
|
|
case GIT_DELTA_CONFLICTED: |
|
728
|
0
|
|
|
|
|
|
return false; |
|
729
|
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
case GIT_DELTA_DELETED: |
|
731
|
|
|
|
|
|
|
case GIT_DELTA_TYPECHANGE: |
|
732
|
7
|
|
|
|
|
|
break; |
|
733
|
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
case GIT_DELTA_UNMODIFIED: |
|
735
|
0
|
0
|
|
|
|
|
if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED)) |
|
736
|
0
|
|
|
|
|
|
return false; |
|
737
|
0
|
0
|
|
|
|
|
if (FLAG_SET(opts, GIT_DIFF_FIND_REMOVE_UNMODIFIED)) |
|
738
|
0
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__TO_DELETE; |
|
739
|
0
|
|
|
|
|
|
break; |
|
740
|
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
default: /* MODIFIED, RENAMED, COPIED */ |
|
742
|
|
|
|
|
|
|
/* if we're finding copies, this could be a source */ |
|
743
|
2
|
50
|
|
|
|
|
if (FLAG_SET(opts, GIT_DIFF_FIND_COPIES)) |
|
744
|
0
|
|
|
|
|
|
break; |
|
745
|
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
/* otherwise, this is only a source if we can split it */ |
|
747
|
2
|
50
|
|
|
|
|
if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) && |
|
|
|
50
|
|
|
|
|
|
|
748
|
2
|
|
|
|
|
|
!FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES)) |
|
749
|
2
|
|
|
|
|
|
return false; |
|
750
|
|
|
|
|
|
|
|
|
751
|
0
|
0
|
|
|
|
|
if (calc_self_similarity(diff, opts, delta_idx, cache) < 0) |
|
752
|
0
|
|
|
|
|
|
return false; |
|
753
|
|
|
|
|
|
|
|
|
754
|
0
|
0
|
|
|
|
|
if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) && |
|
|
|
0
|
|
|
|
|
|
|
755
|
0
|
|
|
|
|
|
delta->similarity < opts->break_rewrite_threshold) { |
|
756
|
0
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__TO_SPLIT; |
|
757
|
0
|
|
|
|
|
|
break; |
|
758
|
|
|
|
|
|
|
} |
|
759
|
|
|
|
|
|
|
|
|
760
|
0
|
0
|
|
|
|
|
if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) && |
|
|
|
0
|
|
|
|
|
|
|
761
|
0
|
|
|
|
|
|
delta->similarity < opts->rename_from_rewrite_threshold) |
|
762
|
0
|
|
|
|
|
|
break; |
|
763
|
|
|
|
|
|
|
|
|
764
|
0
|
|
|
|
|
|
return false; |
|
765
|
|
|
|
|
|
|
} |
|
766
|
|
|
|
|
|
|
|
|
767
|
7
|
|
|
|
|
|
delta->flags |= GIT_DIFF_FLAG__IS_RENAME_SOURCE; |
|
768
|
7
|
|
|
|
|
|
return true; |
|
769
|
|
|
|
|
|
|
} |
|
770
|
|
|
|
|
|
|
|
|
771
|
0
|
|
|
|
|
|
GIT_INLINE(bool) delta_is_split(git_diff_delta *delta) |
|
772
|
|
|
|
|
|
|
{ |
|
773
|
0
|
0
|
|
|
|
|
return (delta->status == GIT_DELTA_TYPECHANGE || |
|
|
|
0
|
|
|
|
|
|
|
774
|
0
|
|
|
|
|
|
(delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0); |
|
775
|
|
|
|
|
|
|
} |
|
776
|
|
|
|
|
|
|
|
|
777
|
6
|
|
|
|
|
|
GIT_INLINE(bool) delta_is_new_only(git_diff_delta *delta) |
|
778
|
|
|
|
|
|
|
{ |
|
779
|
7
|
50
|
|
|
|
|
return (delta->status == GIT_DELTA_ADDED || |
|
780
|
0
|
0
|
|
|
|
|
delta->status == GIT_DELTA_UNTRACKED || |
|
781
|
7
|
100
|
|
|
|
|
delta->status == GIT_DELTA_UNREADABLE || |
|
|
|
0
|
|
|
|
|
|
|
782
|
0
|
|
|
|
|
|
delta->status == GIT_DELTA_IGNORED); |
|
783
|
|
|
|
|
|
|
} |
|
784
|
|
|
|
|
|
|
|
|
785
|
6
|
|
|
|
|
|
GIT_INLINE(void) delta_make_rename( |
|
786
|
|
|
|
|
|
|
git_diff_delta *to, const git_diff_delta *from, uint16_t similarity) |
|
787
|
|
|
|
|
|
|
{ |
|
788
|
6
|
|
|
|
|
|
to->status = GIT_DELTA_RENAMED; |
|
789
|
6
|
|
|
|
|
|
to->similarity = similarity; |
|
790
|
6
|
|
|
|
|
|
to->nfiles = 2; |
|
791
|
6
|
|
|
|
|
|
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); |
|
792
|
6
|
|
|
|
|
|
to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; |
|
793
|
6
|
|
|
|
|
|
} |
|
794
|
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
typedef struct { |
|
796
|
|
|
|
|
|
|
size_t idx; |
|
797
|
|
|
|
|
|
|
uint16_t similarity; |
|
798
|
|
|
|
|
|
|
} diff_find_match; |
|
799
|
|
|
|
|
|
|
|
|
800
|
11
|
|
|
|
|
|
int git_diff_find_similar( |
|
801
|
|
|
|
|
|
|
git_diff *diff, |
|
802
|
|
|
|
|
|
|
const git_diff_find_options *given_opts) |
|
803
|
|
|
|
|
|
|
{ |
|
804
|
|
|
|
|
|
|
size_t s, t; |
|
805
|
11
|
|
|
|
|
|
int error = 0, result; |
|
806
|
|
|
|
|
|
|
uint16_t similarity; |
|
807
|
|
|
|
|
|
|
git_diff_delta *src, *tgt; |
|
808
|
11
|
|
|
|
|
|
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT; |
|
809
|
11
|
|
|
|
|
|
size_t num_deltas, num_srcs = 0, num_tgts = 0; |
|
810
|
11
|
|
|
|
|
|
size_t tried_srcs = 0, tried_tgts = 0; |
|
811
|
11
|
|
|
|
|
|
size_t num_rewrites = 0, num_updates = 0, num_bumped = 0; |
|
812
|
|
|
|
|
|
|
size_t sigcache_size; |
|
813
|
11
|
|
|
|
|
|
void **sigcache = NULL; /* cache of similarity metric file signatures */ |
|
814
|
11
|
|
|
|
|
|
diff_find_match *tgt2src = NULL; |
|
815
|
11
|
|
|
|
|
|
diff_find_match *src2tgt = NULL; |
|
816
|
11
|
|
|
|
|
|
diff_find_match *tgt2src_copy = NULL; |
|
817
|
|
|
|
|
|
|
diff_find_match *best_match; |
|
818
|
|
|
|
|
|
|
git_diff_file swap; |
|
819
|
|
|
|
|
|
|
|
|
820
|
11
|
50
|
|
|
|
|
GIT_ASSERT_ARG(diff); |
|
821
|
|
|
|
|
|
|
|
|
822
|
11
|
50
|
|
|
|
|
if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0) |
|
823
|
0
|
|
|
|
|
|
return error; |
|
824
|
|
|
|
|
|
|
|
|
825
|
11
|
|
|
|
|
|
num_deltas = diff->deltas.length; |
|
826
|
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
/* TODO: maybe abort if deltas.length > rename_limit ??? */ |
|
828
|
11
|
50
|
|
|
|
|
if (!num_deltas || !git__is_uint32(num_deltas)) |
|
|
|
50
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
goto cleanup; |
|
830
|
|
|
|
|
|
|
|
|
831
|
|
|
|
|
|
|
/* No flags set; nothing to do */ |
|
832
|
11
|
50
|
|
|
|
|
if ((opts.flags & GIT_DIFF_FIND_ALL) == 0) |
|
833
|
0
|
|
|
|
|
|
goto cleanup; |
|
834
|
|
|
|
|
|
|
|
|
835
|
11
|
50
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC_MULTIPLY(&sigcache_size, num_deltas, 2); |
|
|
|
50
|
|
|
|
|
|
|
836
|
11
|
|
|
|
|
|
sigcache = git__calloc(sigcache_size, sizeof(void *)); |
|
837
|
11
|
50
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC(sigcache); |
|
838
|
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
/* Label rename sources and targets |
|
840
|
|
|
|
|
|
|
* |
|
841
|
|
|
|
|
|
|
* This will also set self-similarity scores for MODIFIED files and |
|
842
|
|
|
|
|
|
|
* mark them for splitting if break-rewrites is enabled |
|
843
|
|
|
|
|
|
|
*/ |
|
844
|
36
|
100
|
|
|
|
|
git_vector_foreach(&diff->deltas, t, tgt) { |
|
845
|
25
|
100
|
|
|
|
|
if (is_rename_source(diff, &opts, t, sigcache)) |
|
846
|
7
|
|
|
|
|
|
++num_srcs; |
|
847
|
|
|
|
|
|
|
|
|
848
|
25
|
100
|
|
|
|
|
if (is_rename_target(diff, &opts, t, sigcache)) |
|
849
|
13
|
|
|
|
|
|
++num_tgts; |
|
850
|
|
|
|
|
|
|
|
|
851
|
25
|
50
|
|
|
|
|
if ((tgt->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) |
|
852
|
0
|
|
|
|
|
|
num_rewrites++; |
|
853
|
|
|
|
|
|
|
} |
|
854
|
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
/* if there are no candidate srcs or tgts, we're done */ |
|
856
|
11
|
100
|
|
|
|
|
if (!num_srcs || !num_tgts) |
|
|
|
50
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
goto cleanup; |
|
858
|
|
|
|
|
|
|
|
|
859
|
7
|
|
|
|
|
|
src2tgt = git__calloc(num_deltas, sizeof(diff_find_match)); |
|
860
|
7
|
50
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC(src2tgt); |
|
861
|
7
|
|
|
|
|
|
tgt2src = git__calloc(num_deltas, sizeof(diff_find_match)); |
|
862
|
7
|
50
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC(tgt2src); |
|
863
|
|
|
|
|
|
|
|
|
864
|
7
|
50
|
|
|
|
|
if (FLAG_SET(&opts, GIT_DIFF_FIND_COPIES)) { |
|
865
|
0
|
|
|
|
|
|
tgt2src_copy = git__calloc(num_deltas, sizeof(diff_find_match)); |
|
866
|
0
|
0
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC(tgt2src_copy); |
|
867
|
|
|
|
|
|
|
} |
|
868
|
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
/* |
|
870
|
|
|
|
|
|
|
* Find best-fit matches for rename / copy candidates |
|
871
|
|
|
|
|
|
|
*/ |
|
872
|
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
find_best_matches: |
|
874
|
7
|
|
|
|
|
|
tried_tgts = num_bumped = 0; |
|
875
|
|
|
|
|
|
|
|
|
876
|
17
|
50
|
|
|
|
|
git_vector_foreach(&diff->deltas, t, tgt) { |
|
877
|
|
|
|
|
|
|
/* skip things that are not rename targets */ |
|
878
|
17
|
100
|
|
|
|
|
if ((tgt->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0) |
|
879
|
7
|
|
|
|
|
|
continue; |
|
880
|
|
|
|
|
|
|
|
|
881
|
10
|
|
|
|
|
|
tried_srcs = 0; |
|
882
|
|
|
|
|
|
|
|
|
883
|
17
|
50
|
|
|
|
|
git_vector_foreach(&diff->deltas, s, src) { |
|
884
|
|
|
|
|
|
|
/* skip things that are not rename sources */ |
|
885
|
17
|
100
|
|
|
|
|
if ((src->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) == 0) |
|
886
|
7
|
|
|
|
|
|
continue; |
|
887
|
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
/* calculate similarity for this pair and find best match */ |
|
889
|
10
|
50
|
|
|
|
|
if (s == t) |
|
890
|
0
|
|
|
|
|
|
result = -1; /* don't measure self-similarity here */ |
|
891
|
10
|
50
|
|
|
|
|
else if ((error = similarity_measure( |
|
892
|
10
|
|
|
|
|
|
&result, diff, &opts, sigcache, 2 * s, 2 * t + 1)) < 0) |
|
893
|
0
|
|
|
|
|
|
goto cleanup; |
|
894
|
|
|
|
|
|
|
|
|
895
|
10
|
50
|
|
|
|
|
if (result < 0) |
|
896
|
0
|
|
|
|
|
|
continue; |
|
897
|
10
|
|
|
|
|
|
similarity = (uint16_t)result; |
|
898
|
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
/* is this a better rename? */ |
|
900
|
10
|
100
|
|
|
|
|
if (tgt2src[t].similarity < similarity && |
|
|
|
50
|
|
|
|
|
|
|
901
|
6
|
|
|
|
|
|
src2tgt[s].similarity < similarity) |
|
902
|
|
|
|
|
|
|
{ |
|
903
|
|
|
|
|
|
|
/* eject old mapping */ |
|
904
|
6
|
50
|
|
|
|
|
if (src2tgt[s].similarity > 0) { |
|
905
|
0
|
|
|
|
|
|
tgt2src[src2tgt[s].idx].similarity = 0; |
|
906
|
0
|
|
|
|
|
|
num_bumped++; |
|
907
|
|
|
|
|
|
|
} |
|
908
|
6
|
50
|
|
|
|
|
if (tgt2src[t].similarity > 0) { |
|
909
|
0
|
|
|
|
|
|
src2tgt[tgt2src[t].idx].similarity = 0; |
|
910
|
0
|
|
|
|
|
|
num_bumped++; |
|
911
|
|
|
|
|
|
|
} |
|
912
|
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
/* write new mapping */ |
|
914
|
6
|
|
|
|
|
|
tgt2src[t].idx = s; |
|
915
|
6
|
|
|
|
|
|
tgt2src[t].similarity = similarity; |
|
916
|
6
|
|
|
|
|
|
src2tgt[s].idx = t; |
|
917
|
6
|
|
|
|
|
|
src2tgt[s].similarity = similarity; |
|
918
|
|
|
|
|
|
|
} |
|
919
|
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
/* keep best absolute match for copies */ |
|
921
|
10
|
50
|
|
|
|
|
if (tgt2src_copy != NULL && |
|
|
|
0
|
|
|
|
|
|
|
922
|
0
|
|
|
|
|
|
tgt2src_copy[t].similarity < similarity) |
|
923
|
|
|
|
|
|
|
{ |
|
924
|
0
|
|
|
|
|
|
tgt2src_copy[t].idx = s; |
|
925
|
0
|
|
|
|
|
|
tgt2src_copy[t].similarity = similarity; |
|
926
|
|
|
|
|
|
|
} |
|
927
|
|
|
|
|
|
|
|
|
928
|
10
|
50
|
|
|
|
|
if (++tried_srcs >= num_srcs) |
|
929
|
10
|
|
|
|
|
|
break; |
|
930
|
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
/* cap on maximum targets we'll examine (per "tgt" file) */ |
|
932
|
0
|
0
|
|
|
|
|
if (tried_srcs > opts.rename_limit) |
|
933
|
0
|
|
|
|
|
|
break; |
|
934
|
|
|
|
|
|
|
} |
|
935
|
|
|
|
|
|
|
|
|
936
|
10
|
100
|
|
|
|
|
if (++tried_tgts >= num_tgts) |
|
937
|
7
|
|
|
|
|
|
break; |
|
938
|
|
|
|
|
|
|
} |
|
939
|
|
|
|
|
|
|
|
|
940
|
7
|
50
|
|
|
|
|
if (num_bumped > 0) /* try again if we bumped some items */ |
|
941
|
0
|
|
|
|
|
|
goto find_best_matches; |
|
942
|
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
/* |
|
944
|
|
|
|
|
|
|
* Rewrite the diffs with renames / copies |
|
945
|
|
|
|
|
|
|
*/ |
|
946
|
|
|
|
|
|
|
|
|
947
|
26
|
100
|
|
|
|
|
git_vector_foreach(&diff->deltas, t, tgt) { |
|
948
|
|
|
|
|
|
|
/* skip things that are not rename targets */ |
|
949
|
19
|
100
|
|
|
|
|
if ((tgt->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0) |
|
950
|
9
|
|
|
|
|
|
continue; |
|
951
|
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
/* check if this delta was the target of a similarity */ |
|
953
|
10
|
100
|
|
|
|
|
if (tgt2src[t].similarity) |
|
954
|
6
|
|
|
|
|
|
best_match = &tgt2src[t]; |
|
955
|
4
|
50
|
|
|
|
|
else if (tgt2src_copy && tgt2src_copy[t].similarity) |
|
|
|
0
|
|
|
|
|
|
|
956
|
0
|
|
|
|
|
|
best_match = &tgt2src_copy[t]; |
|
957
|
|
|
|
|
|
|
else |
|
958
|
4
|
|
|
|
|
|
continue; |
|
959
|
|
|
|
|
|
|
|
|
960
|
6
|
|
|
|
|
|
s = best_match->idx; |
|
961
|
6
|
50
|
|
|
|
|
src = GIT_VECTOR_GET(&diff->deltas, s); |
|
962
|
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
/* possible scenarios: |
|
964
|
|
|
|
|
|
|
* 1. from DELETE to ADD/UNTRACK/IGNORE = RENAME |
|
965
|
|
|
|
|
|
|
* 2. from DELETE to SPLIT/TYPECHANGE = RENAME + DELETE |
|
966
|
|
|
|
|
|
|
* 3. from SPLIT/TYPECHANGE to ADD/UNTRACK/IGNORE = ADD + RENAME |
|
967
|
|
|
|
|
|
|
* 4. from SPLIT/TYPECHANGE to SPLIT/TYPECHANGE = RENAME + SPLIT |
|
968
|
|
|
|
|
|
|
* 5. from OTHER to ADD/UNTRACK/IGNORE = OTHER + COPY |
|
969
|
|
|
|
|
|
|
*/ |
|
970
|
|
|
|
|
|
|
|
|
971
|
6
|
50
|
|
|
|
|
if (src->status == GIT_DELTA_DELETED) { |
|
972
|
|
|
|
|
|
|
|
|
973
|
6
|
50
|
|
|
|
|
if (delta_is_new_only(tgt)) { |
|
974
|
|
|
|
|
|
|
|
|
975
|
6
|
50
|
|
|
|
|
if (best_match->similarity < opts.rename_threshold) |
|
976
|
0
|
|
|
|
|
|
continue; |
|
977
|
|
|
|
|
|
|
|
|
978
|
6
|
|
|
|
|
|
delta_make_rename(tgt, src, best_match->similarity); |
|
979
|
|
|
|
|
|
|
|
|
980
|
6
|
|
|
|
|
|
src->flags |= GIT_DIFF_FLAG__TO_DELETE; |
|
981
|
6
|
|
|
|
|
|
num_rewrites++; |
|
982
|
|
|
|
|
|
|
} else { |
|
983
|
0
|
0
|
|
|
|
|
GIT_ASSERT(delta_is_split(tgt)); |
|
984
|
|
|
|
|
|
|
|
|
985
|
0
|
0
|
|
|
|
|
if (best_match->similarity < opts.rename_from_rewrite_threshold) |
|
986
|
0
|
|
|
|
|
|
continue; |
|
987
|
|
|
|
|
|
|
|
|
988
|
0
|
|
|
|
|
|
memcpy(&swap, &tgt->old_file, sizeof(swap)); |
|
989
|
|
|
|
|
|
|
|
|
990
|
0
|
|
|
|
|
|
delta_make_rename(tgt, src, best_match->similarity); |
|
991
|
0
|
|
|
|
|
|
num_rewrites--; |
|
992
|
|
|
|
|
|
|
|
|
993
|
0
|
0
|
|
|
|
|
GIT_ASSERT(src->status == GIT_DELTA_DELETED); |
|
994
|
0
|
|
|
|
|
|
memcpy(&src->old_file, &swap, sizeof(src->old_file)); |
|
995
|
0
|
|
|
|
|
|
memset(&src->new_file, 0, sizeof(src->new_file)); |
|
996
|
0
|
|
|
|
|
|
src->new_file.path = src->old_file.path; |
|
997
|
0
|
|
|
|
|
|
src->new_file.flags |= GIT_DIFF_FLAG_VALID_ID; |
|
998
|
|
|
|
|
|
|
|
|
999
|
0
|
|
|
|
|
|
num_updates++; |
|
1000
|
|
|
|
|
|
|
|
|
1001
|
0
|
0
|
|
|
|
|
if (src2tgt[t].similarity > 0 && src2tgt[t].idx > t) { |
|
|
|
0
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
/* what used to be at src t is now at src s */ |
|
1003
|
6
|
|
|
|
|
|
tgt2src[src2tgt[t].idx].idx = s; |
|
1004
|
|
|
|
|
|
|
} |
|
1005
|
|
|
|
|
|
|
} |
|
1006
|
|
|
|
|
|
|
} |
|
1007
|
|
|
|
|
|
|
|
|
1008
|
0
|
0
|
|
|
|
|
else if (delta_is_split(src)) { |
|
1009
|
|
|
|
|
|
|
|
|
1010
|
0
|
0
|
|
|
|
|
if (delta_is_new_only(tgt)) { |
|
1011
|
|
|
|
|
|
|
|
|
1012
|
0
|
0
|
|
|
|
|
if (best_match->similarity < opts.rename_threshold) |
|
1013
|
0
|
|
|
|
|
|
continue; |
|
1014
|
|
|
|
|
|
|
|
|
1015
|
0
|
|
|
|
|
|
delta_make_rename(tgt, src, best_match->similarity); |
|
1016
|
|
|
|
|
|
|
|
|
1017
|
0
|
0
|
|
|
|
|
src->status = (diff->new_src == GIT_ITERATOR_WORKDIR) ? |
|
1018
|
|
|
|
|
|
|
GIT_DELTA_UNTRACKED : GIT_DELTA_ADDED; |
|
1019
|
0
|
|
|
|
|
|
src->nfiles = 1; |
|
1020
|
0
|
|
|
|
|
|
memset(&src->old_file, 0, sizeof(src->old_file)); |
|
1021
|
0
|
|
|
|
|
|
src->old_file.path = src->new_file.path; |
|
1022
|
0
|
|
|
|
|
|
src->old_file.flags |= GIT_DIFF_FLAG_VALID_ID; |
|
1023
|
|
|
|
|
|
|
|
|
1024
|
0
|
|
|
|
|
|
src->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; |
|
1025
|
0
|
|
|
|
|
|
num_rewrites--; |
|
1026
|
|
|
|
|
|
|
|
|
1027
|
0
|
|
|
|
|
|
num_updates++; |
|
1028
|
|
|
|
|
|
|
} else { |
|
1029
|
0
|
0
|
|
|
|
|
GIT_ASSERT(delta_is_split(src)); |
|
1030
|
|
|
|
|
|
|
|
|
1031
|
0
|
0
|
|
|
|
|
if (best_match->similarity < opts.rename_from_rewrite_threshold) |
|
1032
|
0
|
|
|
|
|
|
continue; |
|
1033
|
|
|
|
|
|
|
|
|
1034
|
0
|
|
|
|
|
|
memcpy(&swap, &tgt->old_file, sizeof(swap)); |
|
1035
|
|
|
|
|
|
|
|
|
1036
|
0
|
|
|
|
|
|
delta_make_rename(tgt, src, best_match->similarity); |
|
1037
|
0
|
|
|
|
|
|
num_rewrites--; |
|
1038
|
0
|
|
|
|
|
|
num_updates++; |
|
1039
|
|
|
|
|
|
|
|
|
1040
|
0
|
|
|
|
|
|
memcpy(&src->old_file, &swap, sizeof(src->old_file)); |
|
1041
|
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
/* if we've just swapped the new element into the correct |
|
1043
|
|
|
|
|
|
|
* place, clear the SPLIT and RENAME_TARGET flags |
|
1044
|
|
|
|
|
|
|
*/ |
|
1045
|
0
|
0
|
|
|
|
|
if (tgt2src[s].idx == t && |
|
|
|
0
|
|
|
|
|
|
|
1046
|
0
|
|
|
|
|
|
tgt2src[s].similarity > |
|
1047
|
0
|
|
|
|
|
|
opts.rename_from_rewrite_threshold) { |
|
1048
|
0
|
|
|
|
|
|
src->status = GIT_DELTA_RENAMED; |
|
1049
|
0
|
|
|
|
|
|
src->similarity = tgt2src[s].similarity; |
|
1050
|
0
|
|
|
|
|
|
tgt2src[s].similarity = 0; |
|
1051
|
0
|
|
|
|
|
|
src->flags &= ~(GIT_DIFF_FLAG__TO_SPLIT | GIT_DIFF_FLAG__IS_RENAME_TARGET); |
|
1052
|
0
|
|
|
|
|
|
num_rewrites--; |
|
1053
|
|
|
|
|
|
|
} |
|
1054
|
|
|
|
|
|
|
/* otherwise, if we just overwrote a source, update mapping */ |
|
1055
|
0
|
0
|
|
|
|
|
else if (src2tgt[t].similarity > 0 && src2tgt[t].idx > t) { |
|
|
|
0
|
|
|
|
|
|
|
1056
|
|
|
|
|
|
|
/* what used to be at src t is now at src s */ |
|
1057
|
0
|
|
|
|
|
|
tgt2src[src2tgt[t].idx].idx = s; |
|
1058
|
|
|
|
|
|
|
} |
|
1059
|
|
|
|
|
|
|
|
|
1060
|
0
|
|
|
|
|
|
num_updates++; |
|
1061
|
|
|
|
|
|
|
} |
|
1062
|
|
|
|
|
|
|
} |
|
1063
|
|
|
|
|
|
|
|
|
1064
|
0
|
0
|
|
|
|
|
else if (FLAG_SET(&opts, GIT_DIFF_FIND_COPIES)) { |
|
1065
|
0
|
0
|
|
|
|
|
if (tgt2src_copy[t].similarity < opts.copy_threshold) |
|
1066
|
0
|
|
|
|
|
|
continue; |
|
1067
|
|
|
|
|
|
|
|
|
1068
|
|
|
|
|
|
|
/* always use best possible source for copy */ |
|
1069
|
0
|
|
|
|
|
|
best_match = &tgt2src_copy[t]; |
|
1070
|
0
|
0
|
|
|
|
|
src = GIT_VECTOR_GET(&diff->deltas, best_match->idx); |
|
1071
|
|
|
|
|
|
|
|
|
1072
|
0
|
0
|
|
|
|
|
if (delta_is_split(tgt)) { |
|
1073
|
0
|
|
|
|
|
|
error = insert_delete_side_of_split(diff, &diff->deltas, tgt); |
|
1074
|
0
|
0
|
|
|
|
|
if (error < 0) |
|
1075
|
0
|
|
|
|
|
|
goto cleanup; |
|
1076
|
0
|
|
|
|
|
|
num_rewrites--; |
|
1077
|
|
|
|
|
|
|
} |
|
1078
|
|
|
|
|
|
|
|
|
1079
|
0
|
0
|
|
|
|
|
if (!delta_is_split(tgt) && !delta_is_new_only(tgt)) |
|
|
|
0
|
|
|
|
|
|
|
1080
|
0
|
|
|
|
|
|
continue; |
|
1081
|
|
|
|
|
|
|
|
|
1082
|
0
|
|
|
|
|
|
tgt->status = GIT_DELTA_COPIED; |
|
1083
|
0
|
|
|
|
|
|
tgt->similarity = best_match->similarity; |
|
1084
|
0
|
|
|
|
|
|
tgt->nfiles = 2; |
|
1085
|
0
|
|
|
|
|
|
memcpy(&tgt->old_file, &src->old_file, sizeof(tgt->old_file)); |
|
1086
|
0
|
|
|
|
|
|
tgt->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; |
|
1087
|
|
|
|
|
|
|
|
|
1088
|
0
|
|
|
|
|
|
num_updates++; |
|
1089
|
|
|
|
|
|
|
} |
|
1090
|
|
|
|
|
|
|
} |
|
1091
|
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
/* |
|
1093
|
|
|
|
|
|
|
* Actually split and delete entries as needed |
|
1094
|
|
|
|
|
|
|
*/ |
|
1095
|
|
|
|
|
|
|
|
|
1096
|
7
|
100
|
|
|
|
|
if (num_rewrites > 0 || num_updates > 0) |
|
|
|
50
|
|
|
|
|
|
|
1097
|
6
|
|
|
|
|
|
error = apply_splits_and_deletes( |
|
1098
|
6
|
|
|
|
|
|
diff, diff->deltas.length - num_rewrites, |
|
1099
|
6
|
100
|
|
|
|
|
FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES) && |
|
|
|
50
|
|
|
|
|
|
|
1100
|
1
|
|
|
|
|
|
!FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES_FOR_RENAMES_ONLY)); |
|
1101
|
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
cleanup: |
|
1103
|
11
|
|
|
|
|
|
git__free(tgt2src); |
|
1104
|
11
|
|
|
|
|
|
git__free(src2tgt); |
|
1105
|
11
|
|
|
|
|
|
git__free(tgt2src_copy); |
|
1106
|
|
|
|
|
|
|
|
|
1107
|
11
|
50
|
|
|
|
|
if (sigcache) { |
|
1108
|
61
|
100
|
|
|
|
|
for (t = 0; t < num_deltas * 2; ++t) { |
|
1109
|
50
|
100
|
|
|
|
|
if (sigcache[t] != NULL) |
|
1110
|
11
|
|
|
|
|
|
opts.metric->free_signature(sigcache[t], opts.metric->payload); |
|
1111
|
|
|
|
|
|
|
} |
|
1112
|
11
|
|
|
|
|
|
git__free(sigcache); |
|
1113
|
|
|
|
|
|
|
} |
|
1114
|
|
|
|
|
|
|
|
|
1115
|
11
|
50
|
|
|
|
|
if (!given_opts || !given_opts->metric) |
|
|
|
50
|
|
|
|
|
|
|
1116
|
11
|
|
|
|
|
|
git__free(opts.metric); |
|
1117
|
|
|
|
|
|
|
|
|
1118
|
11
|
|
|
|
|
|
return error; |
|
1119
|
|
|
|
|
|
|
} |
|
1120
|
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
#undef FLAG_SET |