| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | /* | 
| 2 |  |  |  |  |  |  | * Copyright (C) the libgit2 contributors. All rights reserved. | 
| 3 |  |  |  |  |  |  | * | 
| 4 |  |  |  |  |  |  | * This file is part of libgit2, distributed under the GNU GPL v2 with | 
| 5 |  |  |  |  |  |  | * a Linking Exception. For full terms see the included COPYING file. | 
| 6 |  |  |  |  |  |  | */ | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | #include "common.h" | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | #include "git2/sys/hashsig.h" | 
| 11 |  |  |  |  |  |  | #include "futils.h" | 
| 12 |  |  |  |  |  |  | #include "util.h" | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | typedef uint32_t hashsig_t; | 
| 15 |  |  |  |  |  |  | typedef uint64_t hashsig_state; | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | #define HASHSIG_SCALE 100 | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | #define HASHSIG_MAX_RUN 80 | 
| 20 |  |  |  |  |  |  | #define HASHSIG_HASH_START	0x012345678ABCDEF0LL | 
| 21 |  |  |  |  |  |  | #define HASHSIG_HASH_SHIFT  5 | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  | #define HASHSIG_HASH_MIX(S,CH) \ | 
| 24 |  |  |  |  |  |  | (S) = ((S) << HASHSIG_HASH_SHIFT) - (S) + (hashsig_state)(CH) | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | #define HASHSIG_HEAP_SIZE ((1 << 7) - 1) | 
| 27 |  |  |  |  |  |  | #define HASHSIG_HEAP_MIN_SIZE 4 | 
| 28 |  |  |  |  |  |  |  | 
| 29 |  |  |  |  |  |  | typedef int (*hashsig_cmp)(const void *a, const void *b, void *); | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | typedef struct { | 
| 32 |  |  |  |  |  |  | int size, asize; | 
| 33 |  |  |  |  |  |  | hashsig_cmp cmp; | 
| 34 |  |  |  |  |  |  | hashsig_t values[HASHSIG_HEAP_SIZE]; | 
| 35 |  |  |  |  |  |  | } hashsig_heap; | 
| 36 |  |  |  |  |  |  |  | 
| 37 |  |  |  |  |  |  | struct git_hashsig { | 
| 38 |  |  |  |  |  |  | hashsig_heap mins; | 
| 39 |  |  |  |  |  |  | hashsig_heap maxs; | 
| 40 |  |  |  |  |  |  | size_t lines; | 
| 41 |  |  |  |  |  |  | git_hashsig_option_t opt; | 
| 42 |  |  |  |  |  |  | }; | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | #define HEAP_LCHILD_OF(I) (((I)<<1)+1) | 
| 45 |  |  |  |  |  |  | #define HEAP_RCHILD_OF(I) (((I)<<1)+2) | 
| 46 |  |  |  |  |  |  | #define HEAP_PARENT_OF(I) (((I)-1)>>1) | 
| 47 |  |  |  |  |  |  |  | 
| 48 | 22 |  |  |  |  |  | static void hashsig_heap_init(hashsig_heap *h, hashsig_cmp cmp) | 
| 49 |  |  |  |  |  |  | { | 
| 50 | 22 |  |  |  |  |  | h->size  = 0; | 
| 51 | 22 |  |  |  |  |  | h->asize = HASHSIG_HEAP_SIZE; | 
| 52 | 22 |  |  |  |  |  | h->cmp   = cmp; | 
| 53 | 22 |  |  |  |  |  | } | 
| 54 |  |  |  |  |  |  |  | 
| 55 | 54 |  |  |  |  |  | static int hashsig_cmp_max(const void *a, const void *b, void *payload) | 
| 56 |  |  |  |  |  |  | { | 
| 57 | 54 |  |  |  |  |  | hashsig_t av = *(const hashsig_t *)a, bv = *(const hashsig_t *)b; | 
| 58 |  |  |  |  |  |  | GIT_UNUSED(payload); | 
| 59 | 54 | 100 |  |  |  |  | return (av < bv) ? -1 : (av > bv) ? 1 : 0; | 
| 60 |  |  |  |  |  |  | } | 
| 61 |  |  |  |  |  |  |  | 
| 62 | 68 |  |  |  |  |  | static int hashsig_cmp_min(const void *a, const void *b, void *payload) | 
| 63 |  |  |  |  |  |  | { | 
| 64 | 68 |  |  |  |  |  | hashsig_t av = *(const hashsig_t *)a, bv = *(const hashsig_t *)b; | 
| 65 |  |  |  |  |  |  | GIT_UNUSED(payload); | 
| 66 | 68 | 50 |  |  |  |  | return (av > bv) ? -1 : (av < bv) ? 1 : 0; | 
| 67 |  |  |  |  |  |  | } | 
| 68 |  |  |  |  |  |  |  | 
| 69 | 72 |  |  |  |  |  | static void hashsig_heap_up(hashsig_heap *h, int el) | 
| 70 |  |  |  |  |  |  | { | 
| 71 | 72 |  |  |  |  |  | int parent_el = HEAP_PARENT_OF(el); | 
| 72 |  |  |  |  |  |  |  | 
| 73 | 74 | 100 |  |  |  |  | while (el > 0 && h->cmp(&h->values[parent_el], &h->values[el], NULL) > 0) { | 
|  |  | 100 |  |  |  |  |  | 
| 74 | 2 |  |  |  |  |  | hashsig_t t = h->values[el]; | 
| 75 | 2 |  |  |  |  |  | h->values[el] = h->values[parent_el]; | 
| 76 | 2 |  |  |  |  |  | h->values[parent_el] = t; | 
| 77 |  |  |  |  |  |  |  | 
| 78 | 2 |  |  |  |  |  | el = parent_el; | 
| 79 | 2 |  |  |  |  |  | parent_el = HEAP_PARENT_OF(el); | 
| 80 |  |  |  |  |  |  | } | 
| 81 | 72 |  |  |  |  |  | } | 
| 82 |  |  |  |  |  |  |  | 
| 83 | 0 |  |  |  |  |  | static void hashsig_heap_down(hashsig_heap *h, int el) | 
| 84 |  |  |  |  |  |  | { | 
| 85 |  |  |  |  |  |  | hashsig_t v, lv, rv; | 
| 86 |  |  |  |  |  |  |  | 
| 87 |  |  |  |  |  |  | /* 'el < h->size / 2' tests if el is bottom row of heap */ | 
| 88 |  |  |  |  |  |  |  | 
| 89 | 0 | 0 |  |  |  |  | while (el < h->size / 2) { | 
| 90 | 0 |  |  |  |  |  | int lel = HEAP_LCHILD_OF(el), rel = HEAP_RCHILD_OF(el), swapel; | 
| 91 |  |  |  |  |  |  |  | 
| 92 | 0 |  |  |  |  |  | v  = h->values[el]; | 
| 93 | 0 |  |  |  |  |  | lv = h->values[lel]; | 
| 94 | 0 |  |  |  |  |  | rv = h->values[rel]; | 
| 95 |  |  |  |  |  |  |  | 
| 96 | 0 | 0 |  |  |  |  | if (h->cmp(&v, &lv, NULL) < 0 && h->cmp(&v, &rv, NULL) < 0) | 
|  |  | 0 |  |  |  |  |  | 
| 97 | 0 |  |  |  |  |  | break; | 
| 98 |  |  |  |  |  |  |  | 
| 99 | 0 | 0 |  |  |  |  | swapel = (h->cmp(&lv, &rv, NULL) < 0) ? lel : rel; | 
| 100 |  |  |  |  |  |  |  | 
| 101 | 0 |  |  |  |  |  | h->values[el] = h->values[swapel]; | 
| 102 | 0 |  |  |  |  |  | h->values[swapel] = v; | 
| 103 |  |  |  |  |  |  |  | 
| 104 | 0 |  |  |  |  |  | el = swapel; | 
| 105 |  |  |  |  |  |  | } | 
| 106 | 0 |  |  |  |  |  | } | 
| 107 |  |  |  |  |  |  |  | 
| 108 | 22 |  |  |  |  |  | static void hashsig_heap_sort(hashsig_heap *h) | 
| 109 |  |  |  |  |  |  | { | 
| 110 |  |  |  |  |  |  | /* only need to do this at the end for signature comparison */ | 
| 111 | 22 |  |  |  |  |  | git__qsort_r(h->values, h->size, sizeof(hashsig_t), h->cmp, NULL); | 
| 112 | 22 |  |  |  |  |  | } | 
| 113 |  |  |  |  |  |  |  | 
| 114 | 72 |  |  |  |  |  | static void hashsig_heap_insert(hashsig_heap *h, hashsig_t val) | 
| 115 |  |  |  |  |  |  | { | 
| 116 |  |  |  |  |  |  | /* if heap is not full, insert new element */ | 
| 117 | 72 | 50 |  |  |  |  | if (h->size < h->asize) { | 
| 118 | 72 |  |  |  |  |  | h->values[h->size++] = val; | 
| 119 | 72 |  |  |  |  |  | hashsig_heap_up(h, h->size - 1); | 
| 120 |  |  |  |  |  |  | } | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | /* if heap is full, pop top if new element should replace it */ | 
| 123 | 0 | 0 |  |  |  |  | else if (h->cmp(&val, &h->values[0], NULL) > 0) { | 
| 124 | 0 |  |  |  |  |  | h->size--; | 
| 125 | 0 |  |  |  |  |  | h->values[0] = h->values[h->size]; | 
| 126 | 0 |  |  |  |  |  | hashsig_heap_down(h, 0); | 
| 127 |  |  |  |  |  |  | } | 
| 128 |  |  |  |  |  |  |  | 
| 129 | 72 |  |  |  |  |  | } | 
| 130 |  |  |  |  |  |  |  | 
| 131 |  |  |  |  |  |  | typedef struct { | 
| 132 |  |  |  |  |  |  | int use_ignores; | 
| 133 |  |  |  |  |  |  | uint8_t ignore_ch[256]; | 
| 134 |  |  |  |  |  |  | } hashsig_in_progress; | 
| 135 |  |  |  |  |  |  |  | 
| 136 | 11 |  |  |  |  |  | static void hashsig_in_progress_init( | 
| 137 |  |  |  |  |  |  | hashsig_in_progress *prog, git_hashsig *sig) | 
| 138 |  |  |  |  |  |  | { | 
| 139 |  |  |  |  |  |  | int i; | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  | /* no more than one can be set */ | 
| 142 | 11 | 100 |  |  |  |  | assert(!(sig->opt & GIT_HASHSIG_IGNORE_WHITESPACE) || | 
|  |  | 50 |  |  |  |  |  | 
| 143 |  |  |  |  |  |  | !(sig->opt & GIT_HASHSIG_SMART_WHITESPACE)); | 
| 144 |  |  |  |  |  |  |  | 
| 145 | 11 | 100 |  |  |  |  | if (sig->opt & GIT_HASHSIG_IGNORE_WHITESPACE) { | 
| 146 | 514 | 100 |  |  |  |  | for (i = 0; i < 256; ++i) | 
| 147 | 512 |  |  |  |  |  | prog->ignore_ch[i] = git__isspace_nonlf(i); | 
| 148 | 2 |  |  |  |  |  | prog->use_ignores = 1; | 
| 149 | 9 | 50 |  |  |  |  | } else if (sig->opt & GIT_HASHSIG_SMART_WHITESPACE) { | 
| 150 | 2313 | 100 |  |  |  |  | for (i = 0; i < 256; ++i) | 
| 151 | 2304 |  |  |  |  |  | prog->ignore_ch[i] = git__isspace(i); | 
| 152 | 9 |  |  |  |  |  | prog->use_ignores = 1; | 
| 153 |  |  |  |  |  |  | } else { | 
| 154 | 0 |  |  |  |  |  | memset(prog, 0, sizeof(*prog)); | 
| 155 |  |  |  |  |  |  | } | 
| 156 | 11 |  |  |  |  |  | } | 
| 157 |  |  |  |  |  |  |  | 
| 158 | 11 |  |  |  |  |  | static int hashsig_add_hashes( | 
| 159 |  |  |  |  |  |  | git_hashsig *sig, | 
| 160 |  |  |  |  |  |  | const uint8_t *data, | 
| 161 |  |  |  |  |  |  | size_t size, | 
| 162 |  |  |  |  |  |  | hashsig_in_progress *prog) | 
| 163 |  |  |  |  |  |  | { | 
| 164 | 11 |  |  |  |  |  | const uint8_t *scan = data, *end = data + size; | 
| 165 | 11 |  |  |  |  |  | hashsig_state state = HASHSIG_HASH_START; | 
| 166 | 11 |  |  |  |  |  | int use_ignores = prog->use_ignores, len; | 
| 167 |  |  |  |  |  |  | uint8_t ch; | 
| 168 |  |  |  |  |  |  |  | 
| 169 | 50 | 100 |  |  |  |  | while (scan < end) { | 
| 170 | 39 |  |  |  |  |  | state = HASHSIG_HASH_START; | 
| 171 |  |  |  |  |  |  |  | 
| 172 | 579 | 100 |  |  |  |  | for (len = 0; scan < end && len < HASHSIG_MAX_RUN; ) { | 
|  |  | 50 |  |  |  |  |  | 
| 173 | 575 |  |  |  |  |  | ch = *scan; | 
| 174 |  |  |  |  |  |  |  | 
| 175 | 575 | 100 |  |  |  |  | if (use_ignores) | 
| 176 | 141 | 50 |  |  |  |  | for (; scan < end && git__isspace_nonlf(ch); ch = *scan) | 
|  |  | 100 |  |  |  |  |  | 
| 177 | 2 |  |  |  |  |  | ++scan; | 
| 178 | 436 | 50 |  |  |  |  | else if (sig->opt & | 
| 179 |  |  |  |  |  |  | (GIT_HASHSIG_IGNORE_WHITESPACE | GIT_HASHSIG_SMART_WHITESPACE)) | 
| 180 | 436 | 50 |  |  |  |  | for (; scan < end && ch == '\r'; ch = *scan) | 
|  |  | 50 |  |  |  |  |  | 
| 181 | 0 |  |  |  |  |  | ++scan; | 
| 182 |  |  |  |  |  |  |  | 
| 183 |  |  |  |  |  |  | /* peek at next character to decide what to do next */ | 
| 184 | 575 | 100 |  |  |  |  | if (sig->opt & GIT_HASHSIG_SMART_WHITESPACE) | 
| 185 | 465 |  |  |  |  |  | use_ignores = (ch == '\n'); | 
| 186 |  |  |  |  |  |  |  | 
| 187 | 575 | 50 |  |  |  |  | if (scan >= end) | 
| 188 | 0 |  |  |  |  |  | break; | 
| 189 | 575 |  |  |  |  |  | ++scan; | 
| 190 |  |  |  |  |  |  |  | 
| 191 |  |  |  |  |  |  | /* check run terminator */ | 
| 192 | 575 | 100 |  |  |  |  | if (ch == '\n' || ch == '\0') { | 
|  |  | 50 |  |  |  |  |  | 
| 193 | 35 |  |  |  |  |  | sig->lines++; | 
| 194 | 35 |  |  |  |  |  | break; | 
| 195 |  |  |  |  |  |  | } | 
| 196 |  |  |  |  |  |  |  | 
| 197 | 540 |  |  |  |  |  | ++len; | 
| 198 | 540 |  |  |  |  |  | HASHSIG_HASH_MIX(state, ch); | 
| 199 |  |  |  |  |  |  | } | 
| 200 |  |  |  |  |  |  |  | 
| 201 | 39 | 100 |  |  |  |  | if (len > 0) { | 
| 202 | 36 |  |  |  |  |  | hashsig_heap_insert(&sig->mins, (hashsig_t)state); | 
| 203 | 36 |  |  |  |  |  | hashsig_heap_insert(&sig->maxs, (hashsig_t)state); | 
| 204 |  |  |  |  |  |  |  | 
| 205 | 36 | 100 |  |  |  |  | while (scan < end && (*scan == '\n' || !*scan)) | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 206 | 0 |  |  |  |  |  | ++scan; | 
| 207 |  |  |  |  |  |  | } | 
| 208 |  |  |  |  |  |  | } | 
| 209 |  |  |  |  |  |  |  | 
| 210 | 11 |  |  |  |  |  | prog->use_ignores = use_ignores; | 
| 211 |  |  |  |  |  |  |  | 
| 212 | 11 |  |  |  |  |  | return 0; | 
| 213 |  |  |  |  |  |  | } | 
| 214 |  |  |  |  |  |  |  | 
| 215 | 11 |  |  |  |  |  | static int hashsig_finalize_hashes(git_hashsig *sig) | 
| 216 |  |  |  |  |  |  | { | 
| 217 | 11 | 100 |  |  |  |  | if (sig->mins.size < HASHSIG_HEAP_MIN_SIZE && | 
|  |  | 50 |  |  |  |  |  | 
| 218 | 4 |  |  |  |  |  | !(sig->opt & GIT_HASHSIG_ALLOW_SMALL_FILES)) { | 
| 219 | 0 |  |  |  |  |  | git_error_set(GIT_ERROR_INVALID, | 
| 220 |  |  |  |  |  |  | "file too small for similarity signature calculation"); | 
| 221 | 0 |  |  |  |  |  | return GIT_EBUFS; | 
| 222 |  |  |  |  |  |  | } | 
| 223 |  |  |  |  |  |  |  | 
| 224 | 11 |  |  |  |  |  | hashsig_heap_sort(&sig->mins); | 
| 225 | 11 |  |  |  |  |  | hashsig_heap_sort(&sig->maxs); | 
| 226 |  |  |  |  |  |  |  | 
| 227 | 11 |  |  |  |  |  | return 0; | 
| 228 |  |  |  |  |  |  | } | 
| 229 |  |  |  |  |  |  |  | 
| 230 | 11 |  |  |  |  |  | static git_hashsig *hashsig_alloc(git_hashsig_option_t opts) | 
| 231 |  |  |  |  |  |  | { | 
| 232 | 11 |  |  |  |  |  | git_hashsig *sig = git__calloc(1, sizeof(git_hashsig)); | 
| 233 | 11 | 50 |  |  |  |  | if (!sig) | 
| 234 | 0 |  |  |  |  |  | return NULL; | 
| 235 |  |  |  |  |  |  |  | 
| 236 | 11 |  |  |  |  |  | hashsig_heap_init(&sig->mins, hashsig_cmp_min); | 
| 237 | 11 |  |  |  |  |  | hashsig_heap_init(&sig->maxs, hashsig_cmp_max); | 
| 238 | 11 |  |  |  |  |  | sig->opt = opts; | 
| 239 |  |  |  |  |  |  |  | 
| 240 | 11 |  |  |  |  |  | return sig; | 
| 241 |  |  |  |  |  |  | } | 
| 242 |  |  |  |  |  |  |  | 
| 243 | 6 |  |  |  |  |  | int git_hashsig_create( | 
| 244 |  |  |  |  |  |  | git_hashsig **out, | 
| 245 |  |  |  |  |  |  | const char *buf, | 
| 246 |  |  |  |  |  |  | size_t buflen, | 
| 247 |  |  |  |  |  |  | git_hashsig_option_t opts) | 
| 248 |  |  |  |  |  |  | { | 
| 249 |  |  |  |  |  |  | int error; | 
| 250 |  |  |  |  |  |  | hashsig_in_progress prog; | 
| 251 | 6 |  |  |  |  |  | git_hashsig *sig = hashsig_alloc(opts); | 
| 252 | 6 | 50 |  |  |  |  | GIT_ERROR_CHECK_ALLOC(sig); | 
| 253 |  |  |  |  |  |  |  | 
| 254 | 6 |  |  |  |  |  | hashsig_in_progress_init(&prog, sig); | 
| 255 |  |  |  |  |  |  |  | 
| 256 | 6 |  |  |  |  |  | error = hashsig_add_hashes(sig, (const uint8_t *)buf, buflen, &prog); | 
| 257 |  |  |  |  |  |  |  | 
| 258 | 6 | 50 |  |  |  |  | if (!error) | 
| 259 | 6 |  |  |  |  |  | error = hashsig_finalize_hashes(sig); | 
| 260 |  |  |  |  |  |  |  | 
| 261 | 6 | 50 |  |  |  |  | if (!error) | 
| 262 | 6 |  |  |  |  |  | *out = sig; | 
| 263 |  |  |  |  |  |  | else | 
| 264 | 0 |  |  |  |  |  | git_hashsig_free(sig); | 
| 265 |  |  |  |  |  |  |  | 
| 266 | 6 |  |  |  |  |  | return error; | 
| 267 |  |  |  |  |  |  | } | 
| 268 |  |  |  |  |  |  |  | 
| 269 | 5 |  |  |  |  |  | int git_hashsig_create_fromfile( | 
| 270 |  |  |  |  |  |  | git_hashsig **out, | 
| 271 |  |  |  |  |  |  | const char *path, | 
| 272 |  |  |  |  |  |  | git_hashsig_option_t opts) | 
| 273 |  |  |  |  |  |  | { | 
| 274 |  |  |  |  |  |  | uint8_t buf[0x1000]; | 
| 275 | 5 |  |  |  |  |  | ssize_t buflen = 0; | 
| 276 | 5 |  |  |  |  |  | int error = 0, fd; | 
| 277 |  |  |  |  |  |  | hashsig_in_progress prog; | 
| 278 | 5 |  |  |  |  |  | git_hashsig *sig = hashsig_alloc(opts); | 
| 279 | 5 | 50 |  |  |  |  | GIT_ERROR_CHECK_ALLOC(sig); | 
| 280 |  |  |  |  |  |  |  | 
| 281 | 5 | 50 |  |  |  |  | if ((fd = git_futils_open_ro(path)) < 0) { | 
| 282 | 0 |  |  |  |  |  | git__free(sig); | 
| 283 | 0 |  |  |  |  |  | return fd; | 
| 284 |  |  |  |  |  |  | } | 
| 285 |  |  |  |  |  |  |  | 
| 286 | 5 |  |  |  |  |  | hashsig_in_progress_init(&prog, sig); | 
| 287 |  |  |  |  |  |  |  | 
| 288 | 10 | 50 |  |  |  |  | while (!error) { | 
| 289 | 10 | 100 |  |  |  |  | if ((buflen = p_read(fd, buf, sizeof(buf))) <= 0) { | 
| 290 | 5 | 50 |  |  |  |  | if ((error = (int)buflen) < 0) | 
| 291 | 0 |  |  |  |  |  | git_error_set(GIT_ERROR_OS, | 
| 292 |  |  |  |  |  |  | "read error on '%s' calculating similarity hashes", path); | 
| 293 | 5 |  |  |  |  |  | break; | 
| 294 |  |  |  |  |  |  | } | 
| 295 |  |  |  |  |  |  |  | 
| 296 | 5 |  |  |  |  |  | error = hashsig_add_hashes(sig, buf, buflen, &prog); | 
| 297 |  |  |  |  |  |  | } | 
| 298 |  |  |  |  |  |  |  | 
| 299 | 5 |  |  |  |  |  | p_close(fd); | 
| 300 |  |  |  |  |  |  |  | 
| 301 | 5 | 50 |  |  |  |  | if (!error) | 
| 302 | 5 |  |  |  |  |  | error = hashsig_finalize_hashes(sig); | 
| 303 |  |  |  |  |  |  |  | 
| 304 | 5 | 50 |  |  |  |  | if (!error) | 
| 305 | 5 |  |  |  |  |  | *out = sig; | 
| 306 |  |  |  |  |  |  | else | 
| 307 | 0 |  |  |  |  |  | git_hashsig_free(sig); | 
| 308 |  |  |  |  |  |  |  | 
| 309 | 5 |  |  |  |  |  | return error; | 
| 310 |  |  |  |  |  |  | } | 
| 311 |  |  |  |  |  |  |  | 
| 312 | 11 |  |  |  |  |  | void git_hashsig_free(git_hashsig *sig) | 
| 313 |  |  |  |  |  |  | { | 
| 314 | 11 |  |  |  |  |  | git__free(sig); | 
| 315 | 11 |  |  |  |  |  | } | 
| 316 |  |  |  |  |  |  |  | 
| 317 | 7 |  |  |  |  |  | static int hashsig_heap_compare(const hashsig_heap *a, const hashsig_heap *b) | 
| 318 |  |  |  |  |  |  | { | 
| 319 | 7 |  |  |  |  |  | int matches = 0, i, j, cmp; | 
| 320 |  |  |  |  |  |  |  | 
| 321 | 7 | 50 |  |  |  |  | assert(a->cmp == b->cmp); | 
| 322 |  |  |  |  |  |  |  | 
| 323 |  |  |  |  |  |  | /* hash heaps are sorted - just look for overlap vs total */ | 
| 324 |  |  |  |  |  |  |  | 
| 325 | 25 | 100 |  |  |  |  | for (i = 0, j = 0; i < a->size && j < b->size; ) { | 
|  |  | 100 |  |  |  |  |  | 
| 326 | 18 |  |  |  |  |  | cmp = a->cmp(&a->values[i], &b->values[j], NULL); | 
| 327 |  |  |  |  |  |  |  | 
| 328 | 18 | 50 |  |  |  |  | if (cmp < 0) | 
| 329 | 0 |  |  |  |  |  | ++i; | 
| 330 | 18 | 100 |  |  |  |  | else if (cmp > 0) | 
| 331 | 6 |  |  |  |  |  | ++j; | 
| 332 |  |  |  |  |  |  | else { | 
| 333 | 12 |  |  |  |  |  | ++i; ++j; ++matches; | 
| 334 |  |  |  |  |  |  | } | 
| 335 |  |  |  |  |  |  | } | 
| 336 |  |  |  |  |  |  |  | 
| 337 | 7 |  |  |  |  |  | return HASHSIG_SCALE * (matches * 2) / (a->size + b->size); | 
| 338 |  |  |  |  |  |  | } | 
| 339 |  |  |  |  |  |  |  | 
| 340 | 7 |  |  |  |  |  | int git_hashsig_compare(const git_hashsig *a, const git_hashsig *b) | 
| 341 |  |  |  |  |  |  | { | 
| 342 |  |  |  |  |  |  | /* if we have no elements in either file then each file is either | 
| 343 |  |  |  |  |  |  | * empty or blank.  if we're ignoring whitespace then the files are | 
| 344 |  |  |  |  |  |  | * similar, otherwise they're dissimilar. | 
| 345 |  |  |  |  |  |  | */ | 
| 346 | 7 | 50 |  |  |  |  | if (a->mins.size == 0 && b->mins.size == 0) { | 
|  |  | 0 |  |  |  |  |  | 
| 347 | 0 | 0 |  |  |  |  | if ((!a->lines && !b->lines) || | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 348 | 0 |  |  |  |  |  | (a->opt & GIT_HASHSIG_IGNORE_WHITESPACE)) | 
| 349 | 0 |  |  |  |  |  | return HASHSIG_SCALE; | 
| 350 |  |  |  |  |  |  | else | 
| 351 | 0 |  |  |  |  |  | return 0; | 
| 352 |  |  |  |  |  |  | } | 
| 353 |  |  |  |  |  |  |  | 
| 354 |  |  |  |  |  |  | /* if we have fewer than the maximum number of elements, then just use | 
| 355 |  |  |  |  |  |  | * one array since the two arrays will be the same | 
| 356 |  |  |  |  |  |  | */ | 
| 357 | 7 | 50 |  |  |  |  | if (a->mins.size < HASHSIG_HEAP_SIZE) | 
| 358 | 7 |  |  |  |  |  | return hashsig_heap_compare(&a->mins, &b->mins); | 
| 359 |  |  |  |  |  |  | else | 
| 360 | 0 |  |  |  |  |  | return (hashsig_heap_compare(&a->mins, &b->mins) + | 
| 361 | 0 |  |  |  |  |  | hashsig_heap_compare(&a->maxs, &b->maxs)) / 2; | 
| 362 |  |  |  |  |  |  | } |