| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
/* CityHash128, v1.0.2 ("cityhash102"). Ported from Google CityHash |
|
2
|
|
|
|
|
|
|
* v1.0.2 (Geoff Pike, Jyrki Alakuijala; MIT license), the same revision |
|
3
|
|
|
|
|
|
|
* ClickHouse keeps under contrib/cityhash102/ and uses for its |
|
4
|
|
|
|
|
|
|
* compressed-block 16-byte checksum. |
|
5
|
|
|
|
|
|
|
* |
|
6
|
|
|
|
|
|
|
* This file only implements CityHash128 (without seed) - the seeded |
|
7
|
|
|
|
|
|
|
* helpers, CityHash64, and the CRC32-accelerated CityHashCrc128 are |
|
8
|
|
|
|
|
|
|
* all skipped because the only ClickHouse-Encoder caller is the |
|
9
|
|
|
|
|
|
|
* CompressedReadBuffer / CompressedWriteBuffer block-prefix hash. */ |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
#include |
|
12
|
|
|
|
|
|
|
#include |
|
13
|
|
|
|
|
|
|
#include |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
#include "cityhash.h" |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
/* Magic constants from CityHash v1.0.2. */ |
|
18
|
|
|
|
|
|
|
static const uint64_t k0 = 0xc3a5c85c97cb3127ULL; |
|
19
|
|
|
|
|
|
|
static const uint64_t k1 = 0xb492b66fbe98f273ULL; |
|
20
|
|
|
|
|
|
|
static const uint64_t k2 = 0x9ae16a3b2f90404fULL; |
|
21
|
|
|
|
|
|
|
static const uint64_t k3 = 0xc949d7c7509e6557ULL; |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
/* Little-endian byte loads via memcpy. On LE hosts the compiler |
|
24
|
|
|
|
|
|
|
* collapses these to a single load; on BE hosts we explicitly byte- |
|
25
|
|
|
|
|
|
|
* swap so the algorithm sees identical integer values across hosts. */ |
|
26
|
1013
|
|
|
|
|
|
static inline uint64_t Fetch64(const char *p) { |
|
27
|
|
|
|
|
|
|
uint64_t v; |
|
28
|
1013
|
|
|
|
|
|
memcpy(&v, p, 8); |
|
29
|
|
|
|
|
|
|
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ |
|
30
|
|
|
|
|
|
|
v = ((v & 0x00000000000000ffULL) << 56) |
|
31
|
|
|
|
|
|
|
| ((v & 0x000000000000ff00ULL) << 40) |
|
32
|
|
|
|
|
|
|
| ((v & 0x0000000000ff0000ULL) << 24) |
|
33
|
|
|
|
|
|
|
| ((v & 0x00000000ff000000ULL) << 8) |
|
34
|
|
|
|
|
|
|
| ((v & 0x000000ff00000000ULL) >> 8) |
|
35
|
|
|
|
|
|
|
| ((v & 0x0000ff0000000000ULL) >> 24) |
|
36
|
|
|
|
|
|
|
| ((v & 0x00ff000000000000ULL) >> 40) |
|
37
|
|
|
|
|
|
|
| ((v & 0xff00000000000000ULL) >> 56); |
|
38
|
|
|
|
|
|
|
#endif |
|
39
|
1013
|
|
|
|
|
|
return v; |
|
40
|
|
|
|
|
|
|
} |
|
41
|
|
|
|
|
|
|
|
|
42
|
8
|
|
|
|
|
|
static inline uint32_t Fetch32(const char *p) { |
|
43
|
|
|
|
|
|
|
uint32_t v; |
|
44
|
8
|
|
|
|
|
|
memcpy(&v, p, 4); |
|
45
|
|
|
|
|
|
|
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ |
|
46
|
|
|
|
|
|
|
v = ((v & 0x000000ffU) << 24) |
|
47
|
|
|
|
|
|
|
| ((v & 0x0000ff00U) << 8) |
|
48
|
|
|
|
|
|
|
| ((v & 0x00ff0000U) >> 8) |
|
49
|
|
|
|
|
|
|
| ((v & 0xff000000U) >> 24); |
|
50
|
|
|
|
|
|
|
#endif |
|
51
|
8
|
|
|
|
|
|
return v; |
|
52
|
|
|
|
|
|
|
} |
|
53
|
|
|
|
|
|
|
|
|
54
|
640
|
|
|
|
|
|
static inline uint64_t Rotate(uint64_t v, int n) { |
|
55
|
|
|
|
|
|
|
/* Reference impl returns `v` unchanged when n == 0 (avoiding UB |
|
56
|
|
|
|
|
|
|
* from `v >> 64`). All call sites pass n in 1..63, but mirror the |
|
57
|
|
|
|
|
|
|
* guard so a future-added caller doesn't trip on it. */ |
|
58
|
640
|
50
|
|
|
|
|
return n == 0 ? v : ((v >> n) | (v << (64 - n))); |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
|
|
61
|
4
|
|
|
|
|
|
static inline uint64_t RotateByAtLeast1(uint64_t v, int n) { |
|
62
|
4
|
|
|
|
|
|
return (v >> n) | (v << (64 - n)); |
|
63
|
|
|
|
|
|
|
} |
|
64
|
|
|
|
|
|
|
|
|
65
|
63
|
|
|
|
|
|
static inline uint64_t ShiftMix(uint64_t v) { |
|
66
|
63
|
|
|
|
|
|
return v ^ (v >> 47); |
|
67
|
|
|
|
|
|
|
} |
|
68
|
|
|
|
|
|
|
|
|
69
|
175
|
|
|
|
|
|
static inline uint64_t Hash128to64(uint64_t low, uint64_t high) { |
|
70
|
|
|
|
|
|
|
static const uint64_t kMul = 0x9ddfea08eb382d69ULL; |
|
71
|
175
|
|
|
|
|
|
uint64_t a = (low ^ high) * kMul; |
|
72
|
175
|
|
|
|
|
|
a ^= (a >> 47); |
|
73
|
175
|
|
|
|
|
|
uint64_t b = (high ^ a) * kMul; |
|
74
|
175
|
|
|
|
|
|
b ^= (b >> 47); |
|
75
|
175
|
|
|
|
|
|
b *= kMul; |
|
76
|
175
|
|
|
|
|
|
return b; |
|
77
|
|
|
|
|
|
|
} |
|
78
|
|
|
|
|
|
|
|
|
79
|
175
|
|
|
|
|
|
static inline uint64_t HashLen16(uint64_t u, uint64_t v) { |
|
80
|
175
|
|
|
|
|
|
return Hash128to64(u, v); |
|
81
|
|
|
|
|
|
|
} |
|
82
|
|
|
|
|
|
|
|
|
83
|
12
|
|
|
|
|
|
static uint64_t HashLen0to16(const char *s, size_t len) { |
|
84
|
12
|
100
|
|
|
|
|
if (len > 8) { |
|
85
|
4
|
|
|
|
|
|
uint64_t a = Fetch64(s); |
|
86
|
4
|
|
|
|
|
|
uint64_t b = Fetch64(s + len - 8); |
|
87
|
4
|
|
|
|
|
|
return HashLen16(a, RotateByAtLeast1(b + len, (int)len)) ^ b; |
|
88
|
|
|
|
|
|
|
} |
|
89
|
8
|
100
|
|
|
|
|
if (len >= 4) { |
|
90
|
4
|
|
|
|
|
|
uint64_t a = (uint64_t)Fetch32(s); |
|
91
|
4
|
|
|
|
|
|
return HashLen16(len + (a << 3), |
|
92
|
4
|
|
|
|
|
|
(uint64_t)Fetch32(s + len - 4)); |
|
93
|
|
|
|
|
|
|
} |
|
94
|
4
|
100
|
|
|
|
|
if (len > 0) { |
|
95
|
1
|
|
|
|
|
|
uint8_t a = (uint8_t)s[0]; |
|
96
|
1
|
|
|
|
|
|
uint8_t b = (uint8_t)s[len >> 1]; |
|
97
|
1
|
|
|
|
|
|
uint8_t c = (uint8_t)s[len - 1]; |
|
98
|
1
|
|
|
|
|
|
uint32_t y = (uint32_t)a + ((uint32_t)b << 8); |
|
99
|
1
|
|
|
|
|
|
uint32_t z = (uint32_t)len + ((uint32_t)c << 2); |
|
100
|
1
|
|
|
|
|
|
return ShiftMix((uint64_t)y * k2 ^ (uint64_t)z * k3) * k2; |
|
101
|
|
|
|
|
|
|
} |
|
102
|
3
|
|
|
|
|
|
return k2; |
|
103
|
|
|
|
|
|
|
} |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
/* Return a 16-byte hash for 32 bytes plus two seed words. |
|
106
|
|
|
|
|
|
|
* Out is written as out[0] = low, out[1] = high. */ |
|
107
|
164
|
|
|
|
|
|
static void WeakHashLen32WithSeeds_raw( |
|
108
|
|
|
|
|
|
|
uint64_t w, uint64_t x, uint64_t y, uint64_t z, |
|
109
|
|
|
|
|
|
|
uint64_t a, uint64_t b, uint64_t out[2]) { |
|
110
|
164
|
|
|
|
|
|
a += w; |
|
111
|
164
|
|
|
|
|
|
b = Rotate(b + a + z, 21); |
|
112
|
164
|
|
|
|
|
|
uint64_t c = a; |
|
113
|
164
|
|
|
|
|
|
a += x; |
|
114
|
164
|
|
|
|
|
|
a += y; |
|
115
|
164
|
|
|
|
|
|
b += Rotate(a, 44); |
|
116
|
164
|
|
|
|
|
|
out[0] = a + z; |
|
117
|
164
|
|
|
|
|
|
out[1] = b + c; |
|
118
|
164
|
|
|
|
|
|
} |
|
119
|
|
|
|
|
|
|
|
|
120
|
164
|
|
|
|
|
|
static inline void WeakHashLen32WithSeeds( |
|
121
|
|
|
|
|
|
|
const char *s, uint64_t a, uint64_t b, uint64_t out[2]) { |
|
122
|
164
|
|
|
|
|
|
WeakHashLen32WithSeeds_raw(Fetch64(s), |
|
123
|
|
|
|
|
|
|
Fetch64(s + 8), |
|
124
|
|
|
|
|
|
|
Fetch64(s + 16), |
|
125
|
|
|
|
|
|
|
Fetch64(s + 24), |
|
126
|
|
|
|
|
|
|
a, b, out); |
|
127
|
164
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
/* CityMurmur: handles len < 128 inside CityHash128WithSeed. */ |
|
130
|
31
|
|
|
|
|
|
static void CityMurmur(const char *s, size_t len, |
|
131
|
|
|
|
|
|
|
uint64_t seed_low, uint64_t seed_high, |
|
132
|
|
|
|
|
|
|
uint64_t out[2]) { |
|
133
|
31
|
|
|
|
|
|
uint64_t a = seed_low; |
|
134
|
31
|
|
|
|
|
|
uint64_t b = seed_high; |
|
135
|
31
|
|
|
|
|
|
uint64_t c = 0; |
|
136
|
31
|
|
|
|
|
|
uint64_t d = 0; |
|
137
|
|
|
|
|
|
|
/* The reference uses `signed long l = len - 16` then `if (l <= 0)` |
|
138
|
|
|
|
|
|
|
* to bifurcate; with `size_t` we test `len <= 16` directly. */ |
|
139
|
31
|
100
|
|
|
|
|
if (len <= 16) { |
|
140
|
12
|
|
|
|
|
|
a = ShiftMix(a * k1) * k1; |
|
141
|
12
|
|
|
|
|
|
c = b * k1 + HashLen0to16(s, len); |
|
142
|
12
|
100
|
|
|
|
|
d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c)); |
|
143
|
|
|
|
|
|
|
} else { |
|
144
|
19
|
|
|
|
|
|
c = HashLen16(Fetch64(s + len - 8) + k1, a); |
|
145
|
19
|
|
|
|
|
|
d = HashLen16(b + len, c + Fetch64(s + len - 16)); |
|
146
|
19
|
|
|
|
|
|
a += d; |
|
147
|
19
|
|
|
|
|
|
size_t l = len - 16; |
|
148
|
|
|
|
|
|
|
do { |
|
149
|
19
|
|
|
|
|
|
a ^= ShiftMix(Fetch64(s) * k1) * k1; |
|
150
|
19
|
|
|
|
|
|
a *= k1; |
|
151
|
19
|
|
|
|
|
|
b ^= a; |
|
152
|
19
|
|
|
|
|
|
c ^= ShiftMix(Fetch64(s + 8) * k1) * k1; |
|
153
|
19
|
|
|
|
|
|
c *= k1; |
|
154
|
19
|
|
|
|
|
|
d ^= c; |
|
155
|
19
|
|
|
|
|
|
s += 16; |
|
156
|
19
|
|
|
|
|
|
l = (l > 16) ? (l - 16) : 0; |
|
157
|
19
|
50
|
|
|
|
|
} while (l > 0); |
|
158
|
|
|
|
|
|
|
} |
|
159
|
31
|
|
|
|
|
|
a = HashLen16(a, c); |
|
160
|
31
|
|
|
|
|
|
b = HashLen16(d, b); |
|
161
|
31
|
|
|
|
|
|
out[0] = a ^ b; |
|
162
|
31
|
|
|
|
|
|
out[1] = HashLen16(b, a); |
|
163
|
31
|
|
|
|
|
|
} |
|
164
|
|
|
|
|
|
|
|
|
165
|
40
|
|
|
|
|
|
static void CityHash128WithSeed(const char *s, size_t len, |
|
166
|
|
|
|
|
|
|
uint64_t seed_low, uint64_t seed_high, |
|
167
|
|
|
|
|
|
|
uint64_t out[2]) { |
|
168
|
40
|
100
|
|
|
|
|
if (len < 128) { |
|
169
|
31
|
|
|
|
|
|
CityMurmur(s, len, seed_low, seed_high, out); |
|
170
|
31
|
|
|
|
|
|
return; |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
/* Long-input path: 56 bytes of rolling state, eats input in |
|
173
|
|
|
|
|
|
|
* 128-byte chunks (two 64-byte halves per loop iteration). */ |
|
174
|
|
|
|
|
|
|
uint64_t v[2], w[2]; |
|
175
|
9
|
|
|
|
|
|
uint64_t x = seed_low; |
|
176
|
9
|
|
|
|
|
|
uint64_t y = seed_high; |
|
177
|
9
|
|
|
|
|
|
uint64_t z = len * k1; |
|
178
|
9
|
|
|
|
|
|
v[0] = Rotate(y ^ k1, 49) * k1 + Fetch64(s); |
|
179
|
9
|
|
|
|
|
|
v[1] = Rotate(v[0], 42) * k1 + Fetch64(s + 8); |
|
180
|
9
|
|
|
|
|
|
w[0] = Rotate(y + z, 35) * k1 + x; |
|
181
|
9
|
|
|
|
|
|
w[1] = Rotate(x + Fetch64(s + 88), 53) * k1; |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
do { |
|
184
|
35
|
|
|
|
|
|
x = Rotate(x + y + v[0] + Fetch64(s + 8), 37) * k1; |
|
185
|
35
|
|
|
|
|
|
y = Rotate(y + v[1] + Fetch64(s + 48), 42) * k1; |
|
186
|
35
|
|
|
|
|
|
x ^= w[1]; |
|
187
|
35
|
|
|
|
|
|
y ^= v[0]; |
|
188
|
35
|
|
|
|
|
|
z = Rotate(z ^ w[0], 33); |
|
189
|
35
|
|
|
|
|
|
WeakHashLen32WithSeeds(s, v[1] * k1, x + w[0], v); |
|
190
|
35
|
|
|
|
|
|
WeakHashLen32WithSeeds(s + 32, z + w[1], y, w); |
|
191
|
35
|
|
|
|
|
|
{ uint64_t t = z; z = x; x = t; } |
|
192
|
35
|
|
|
|
|
|
s += 64; |
|
193
|
35
|
|
|
|
|
|
x = Rotate(x + y + v[0] + Fetch64(s + 8), 37) * k1; |
|
194
|
35
|
|
|
|
|
|
y = Rotate(y + v[1] + Fetch64(s + 48), 42) * k1; |
|
195
|
35
|
|
|
|
|
|
x ^= w[1]; |
|
196
|
35
|
|
|
|
|
|
y ^= v[0]; |
|
197
|
35
|
|
|
|
|
|
z = Rotate(z ^ w[0], 33); |
|
198
|
35
|
|
|
|
|
|
WeakHashLen32WithSeeds(s, v[1] * k1, x + w[0], v); |
|
199
|
35
|
|
|
|
|
|
WeakHashLen32WithSeeds(s + 32, z + w[1], y, w); |
|
200
|
35
|
|
|
|
|
|
{ uint64_t t = z; z = x; x = t; } |
|
201
|
35
|
|
|
|
|
|
s += 64; |
|
202
|
35
|
|
|
|
|
|
len -= 128; |
|
203
|
35
|
100
|
|
|
|
|
} while (len >= 128); |
|
204
|
|
|
|
|
|
|
|
|
205
|
9
|
|
|
|
|
|
y += Rotate(w[0], 37) * k0 + z; |
|
206
|
9
|
|
|
|
|
|
x += Rotate(v[0] + z, 49) * k0; |
|
207
|
|
|
|
|
|
|
/* If 0 < len < 128, hash the tail in 32-byte chunks from the END |
|
208
|
|
|
|
|
|
|
* of the input. tail_done starts at 0 and grows by 32 each iter. */ |
|
209
|
|
|
|
|
|
|
{ |
|
210
|
9
|
|
|
|
|
|
size_t tail_done = 0; |
|
211
|
33
|
100
|
|
|
|
|
while (tail_done < len) { |
|
212
|
24
|
|
|
|
|
|
tail_done += 32; |
|
213
|
24
|
|
|
|
|
|
y = Rotate(y - x, 42) * k0 + v[1]; |
|
214
|
24
|
|
|
|
|
|
w[0] += Fetch64(s + len - tail_done + 16); |
|
215
|
24
|
|
|
|
|
|
x = Rotate(x, 49) * k0 + w[0]; |
|
216
|
24
|
|
|
|
|
|
w[0] += v[0]; |
|
217
|
24
|
|
|
|
|
|
WeakHashLen32WithSeeds(s + len - tail_done, |
|
218
|
24
|
|
|
|
|
|
v[0] + z, v[1], v); |
|
219
|
|
|
|
|
|
|
} |
|
220
|
|
|
|
|
|
|
} |
|
221
|
|
|
|
|
|
|
/* Final mix. */ |
|
222
|
9
|
|
|
|
|
|
x = HashLen16(x, v[0]); |
|
223
|
9
|
|
|
|
|
|
y = HashLen16(y, w[0]); |
|
224
|
9
|
|
|
|
|
|
out[0] = HashLen16(x + v[1], w[1]) + y; |
|
225
|
9
|
|
|
|
|
|
out[1] = HashLen16(x + w[1], y + v[1]); |
|
226
|
|
|
|
|
|
|
} |
|
227
|
|
|
|
|
|
|
|
|
228
|
40
|
|
|
|
|
|
void cityhash128_v102(const char *s, size_t len, unsigned char out[16]) { |
|
229
|
|
|
|
|
|
|
uint64_t hash[2]; |
|
230
|
40
|
100
|
|
|
|
|
if (len >= 16) { |
|
231
|
72
|
|
|
|
|
|
CityHash128WithSeed(s + 16, len - 16, |
|
232
|
36
|
|
|
|
|
|
Fetch64(s) ^ k3, Fetch64(s + 8), |
|
233
|
|
|
|
|
|
|
hash); |
|
234
|
4
|
100
|
|
|
|
|
} else if (len >= 8) { |
|
235
|
4
|
|
|
|
|
|
CityHash128WithSeed(NULL, 0, |
|
236
|
2
|
|
|
|
|
|
Fetch64(s) ^ ((uint64_t)len * k0), |
|
237
|
2
|
|
|
|
|
|
Fetch64(s + len - 8) ^ k1, |
|
238
|
|
|
|
|
|
|
hash); |
|
239
|
|
|
|
|
|
|
} else { |
|
240
|
2
|
|
|
|
|
|
CityHash128WithSeed(s, len, k0, k1, hash); |
|
241
|
|
|
|
|
|
|
} |
|
242
|
|
|
|
|
|
|
/* Pack low half then high half as 16 little-endian bytes. */ |
|
243
|
|
|
|
|
|
|
int i; |
|
244
|
360
|
100
|
|
|
|
|
for (i = 0; i < 8; i++) out[i] = (unsigned char)(hash[0] >> (i * 8)); |
|
245
|
360
|
100
|
|
|
|
|
for (i = 0; i < 8; i++) out[8 + i] = (unsigned char)(hash[1] >> (i * 8)); |
|
246
|
40
|
|
|
|
|
|
} |