File Coverage

cityhash.c
Criterion Covered Total %
statement 136 136 100.0
branch 26 28 92.8
condition n/a
subroutine n/a
pod n/a
total 162 164 98.7


line stmt bran cond sub pod time code
1             /* CityHash128, v1.0.2 ("cityhash102"). Ported from Google CityHash
2             * v1.0.2 (Geoff Pike, Jyrki Alakuijala; MIT license), the same revision
3             * ClickHouse keeps under contrib/cityhash102/ and uses for its
4             * compressed-block 16-byte checksum.
5             *
6             * This file only implements CityHash128 (without seed) - the seeded
7             * helpers, CityHash64, and the CRC32-accelerated CityHashCrc128 are
8             * all skipped because the only ClickHouse-Encoder caller is the
9             * CompressedReadBuffer / CompressedWriteBuffer block-prefix hash. */
10              
11             #include
12             #include
13             #include
14              
15             #include "cityhash.h"
16              
17             /* Magic constants from CityHash v1.0.2. */
18             static const uint64_t k0 = 0xc3a5c85c97cb3127ULL;
19             static const uint64_t k1 = 0xb492b66fbe98f273ULL;
20             static const uint64_t k2 = 0x9ae16a3b2f90404fULL;
21             static const uint64_t k3 = 0xc949d7c7509e6557ULL;
22              
23             /* Little-endian byte loads via memcpy. On LE hosts the compiler
24             * collapses these to a single load; on BE hosts we explicitly byte-
25             * swap so the algorithm sees identical integer values across hosts. */
26 1013           static inline uint64_t Fetch64(const char *p) {
27             uint64_t v;
28 1013           memcpy(&v, p, 8);
29             #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
30             v = ((v & 0x00000000000000ffULL) << 56)
31             | ((v & 0x000000000000ff00ULL) << 40)
32             | ((v & 0x0000000000ff0000ULL) << 24)
33             | ((v & 0x00000000ff000000ULL) << 8)
34             | ((v & 0x000000ff00000000ULL) >> 8)
35             | ((v & 0x0000ff0000000000ULL) >> 24)
36             | ((v & 0x00ff000000000000ULL) >> 40)
37             | ((v & 0xff00000000000000ULL) >> 56);
38             #endif
39 1013           return v;
40             }
41              
42 8           static inline uint32_t Fetch32(const char *p) {
43             uint32_t v;
44 8           memcpy(&v, p, 4);
45             #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
46             v = ((v & 0x000000ffU) << 24)
47             | ((v & 0x0000ff00U) << 8)
48             | ((v & 0x00ff0000U) >> 8)
49             | ((v & 0xff000000U) >> 24);
50             #endif
51 8           return v;
52             }
53              
54 640           static inline uint64_t Rotate(uint64_t v, int n) {
55             /* Reference impl returns `v` unchanged when n == 0 (avoiding UB
56             * from `v >> 64`). All call sites pass n in 1..63, but mirror the
57             * guard so a future-added caller doesn't trip on it. */
58 640 50         return n == 0 ? v : ((v >> n) | (v << (64 - n)));
59             }
60              
61 4           static inline uint64_t RotateByAtLeast1(uint64_t v, int n) {
62 4           return (v >> n) | (v << (64 - n));
63             }
64              
65 63           static inline uint64_t ShiftMix(uint64_t v) {
66 63           return v ^ (v >> 47);
67             }
68              
69 175           static inline uint64_t Hash128to64(uint64_t low, uint64_t high) {
70             static const uint64_t kMul = 0x9ddfea08eb382d69ULL;
71 175           uint64_t a = (low ^ high) * kMul;
72 175           a ^= (a >> 47);
73 175           uint64_t b = (high ^ a) * kMul;
74 175           b ^= (b >> 47);
75 175           b *= kMul;
76 175           return b;
77             }
78              
79 175           static inline uint64_t HashLen16(uint64_t u, uint64_t v) {
80 175           return Hash128to64(u, v);
81             }
82              
83 12           static uint64_t HashLen0to16(const char *s, size_t len) {
84 12 100         if (len > 8) {
85 4           uint64_t a = Fetch64(s);
86 4           uint64_t b = Fetch64(s + len - 8);
87 4           return HashLen16(a, RotateByAtLeast1(b + len, (int)len)) ^ b;
88             }
89 8 100         if (len >= 4) {
90 4           uint64_t a = (uint64_t)Fetch32(s);
91 4           return HashLen16(len + (a << 3),
92 4           (uint64_t)Fetch32(s + len - 4));
93             }
94 4 100         if (len > 0) {
95 1           uint8_t a = (uint8_t)s[0];
96 1           uint8_t b = (uint8_t)s[len >> 1];
97 1           uint8_t c = (uint8_t)s[len - 1];
98 1           uint32_t y = (uint32_t)a + ((uint32_t)b << 8);
99 1           uint32_t z = (uint32_t)len + ((uint32_t)c << 2);
100 1           return ShiftMix((uint64_t)y * k2 ^ (uint64_t)z * k3) * k2;
101             }
102 3           return k2;
103             }
104              
105             /* Return a 16-byte hash for 32 bytes plus two seed words.
106             * Out is written as out[0] = low, out[1] = high. */
107 164           static void WeakHashLen32WithSeeds_raw(
108             uint64_t w, uint64_t x, uint64_t y, uint64_t z,
109             uint64_t a, uint64_t b, uint64_t out[2]) {
110 164           a += w;
111 164           b = Rotate(b + a + z, 21);
112 164           uint64_t c = a;
113 164           a += x;
114 164           a += y;
115 164           b += Rotate(a, 44);
116 164           out[0] = a + z;
117 164           out[1] = b + c;
118 164           }
119              
120 164           static inline void WeakHashLen32WithSeeds(
121             const char *s, uint64_t a, uint64_t b, uint64_t out[2]) {
122 164           WeakHashLen32WithSeeds_raw(Fetch64(s),
123             Fetch64(s + 8),
124             Fetch64(s + 16),
125             Fetch64(s + 24),
126             a, b, out);
127 164           }
128              
129             /* CityMurmur: handles len < 128 inside CityHash128WithSeed. */
130 31           static void CityMurmur(const char *s, size_t len,
131             uint64_t seed_low, uint64_t seed_high,
132             uint64_t out[2]) {
133 31           uint64_t a = seed_low;
134 31           uint64_t b = seed_high;
135 31           uint64_t c = 0;
136 31           uint64_t d = 0;
137             /* The reference uses `signed long l = len - 16` then `if (l <= 0)`
138             * to bifurcate; with `size_t` we test `len <= 16` directly. */
139 31 100         if (len <= 16) {
140 12           a = ShiftMix(a * k1) * k1;
141 12           c = b * k1 + HashLen0to16(s, len);
142 12 100         d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
143             } else {
144 19           c = HashLen16(Fetch64(s + len - 8) + k1, a);
145 19           d = HashLen16(b + len, c + Fetch64(s + len - 16));
146 19           a += d;
147 19           size_t l = len - 16;
148             do {
149 19           a ^= ShiftMix(Fetch64(s) * k1) * k1;
150 19           a *= k1;
151 19           b ^= a;
152 19           c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
153 19           c *= k1;
154 19           d ^= c;
155 19           s += 16;
156 19           l = (l > 16) ? (l - 16) : 0;
157 19 50         } while (l > 0);
158             }
159 31           a = HashLen16(a, c);
160 31           b = HashLen16(d, b);
161 31           out[0] = a ^ b;
162 31           out[1] = HashLen16(b, a);
163 31           }
164              
165 40           static void CityHash128WithSeed(const char *s, size_t len,
166             uint64_t seed_low, uint64_t seed_high,
167             uint64_t out[2]) {
168 40 100         if (len < 128) {
169 31           CityMurmur(s, len, seed_low, seed_high, out);
170 31           return;
171             }
172             /* Long-input path: 56 bytes of rolling state, eats input in
173             * 128-byte chunks (two 64-byte halves per loop iteration). */
174             uint64_t v[2], w[2];
175 9           uint64_t x = seed_low;
176 9           uint64_t y = seed_high;
177 9           uint64_t z = len * k1;
178 9           v[0] = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
179 9           v[1] = Rotate(v[0], 42) * k1 + Fetch64(s + 8);
180 9           w[0] = Rotate(y + z, 35) * k1 + x;
181 9           w[1] = Rotate(x + Fetch64(s + 88), 53) * k1;
182              
183             do {
184 35           x = Rotate(x + y + v[0] + Fetch64(s + 8), 37) * k1;
185 35           y = Rotate(y + v[1] + Fetch64(s + 48), 42) * k1;
186 35           x ^= w[1];
187 35           y ^= v[0];
188 35           z = Rotate(z ^ w[0], 33);
189 35           WeakHashLen32WithSeeds(s, v[1] * k1, x + w[0], v);
190 35           WeakHashLen32WithSeeds(s + 32, z + w[1], y, w);
191 35           { uint64_t t = z; z = x; x = t; }
192 35           s += 64;
193 35           x = Rotate(x + y + v[0] + Fetch64(s + 8), 37) * k1;
194 35           y = Rotate(y + v[1] + Fetch64(s + 48), 42) * k1;
195 35           x ^= w[1];
196 35           y ^= v[0];
197 35           z = Rotate(z ^ w[0], 33);
198 35           WeakHashLen32WithSeeds(s, v[1] * k1, x + w[0], v);
199 35           WeakHashLen32WithSeeds(s + 32, z + w[1], y, w);
200 35           { uint64_t t = z; z = x; x = t; }
201 35           s += 64;
202 35           len -= 128;
203 35 100         } while (len >= 128);
204              
205 9           y += Rotate(w[0], 37) * k0 + z;
206 9           x += Rotate(v[0] + z, 49) * k0;
207             /* If 0 < len < 128, hash the tail in 32-byte chunks from the END
208             * of the input. tail_done starts at 0 and grows by 32 each iter. */
209             {
210 9           size_t tail_done = 0;
211 33 100         while (tail_done < len) {
212 24           tail_done += 32;
213 24           y = Rotate(y - x, 42) * k0 + v[1];
214 24           w[0] += Fetch64(s + len - tail_done + 16);
215 24           x = Rotate(x, 49) * k0 + w[0];
216 24           w[0] += v[0];
217 24           WeakHashLen32WithSeeds(s + len - tail_done,
218 24           v[0] + z, v[1], v);
219             }
220             }
221             /* Final mix. */
222 9           x = HashLen16(x, v[0]);
223 9           y = HashLen16(y, w[0]);
224 9           out[0] = HashLen16(x + v[1], w[1]) + y;
225 9           out[1] = HashLen16(x + w[1], y + v[1]);
226             }
227              
228 40           void cityhash128_v102(const char *s, size_t len, unsigned char out[16]) {
229             uint64_t hash[2];
230 40 100         if (len >= 16) {
231 72           CityHash128WithSeed(s + 16, len - 16,
232 36           Fetch64(s) ^ k3, Fetch64(s + 8),
233             hash);
234 4 100         } else if (len >= 8) {
235 4           CityHash128WithSeed(NULL, 0,
236 2           Fetch64(s) ^ ((uint64_t)len * k0),
237 2           Fetch64(s + len - 8) ^ k1,
238             hash);
239             } else {
240 2           CityHash128WithSeed(s, len, k0, k1, hash);
241             }
242             /* Pack low half then high half as 16 little-endian bytes. */
243             int i;
244 360 100         for (i = 0; i < 8; i++) out[i] = (unsigned char)(hash[0] >> (i * 8));
245 360 100         for (i = 0; i < 8; i++) out[8 + i] = (unsigned char)(hash[1] >> (i * 8));
246 40           }