File Coverage

sphash.h
Criterion Covered Total %
statement 637 765 83.2
branch 470 708 66.3
condition n/a
subroutine n/a
pod n/a
total 1107 1473 75.1


line stmt bran cond sub pod time code
1             /*
2             * sphash.h -- Shared-memory spatial hash index for Linux
3             *
4             * Unbounded sparse spatial hash. Points are bucketed by integer cell
5             * coordinates (floor(pos / cell_size)); cells map to hash buckets via XXH3.
6             * Entries live in a fixed pool allocated from a free-list, chained into
7             * per-bucket singly/doubly linked lists. A write-preferring futex rwlock
8             * with reader-slot dead-process recovery guards mutations.
9             *
10             * Entry: double pos[3]; int64_t value; uint32_t next, prev
11             * Layout: Header -> reader_slots[1024] -> buckets -> bitmap -> entries
12             */
13              
14             #ifndef SPHASH_H
15             #define SPHASH_H
16              
17             #include
18             #include
19             #include
20             #include
21             #include
22             #include
23             #include
24             #include
25             #include
26             #include
27             #include
28             #include
29             #include
30             #include
31             #include
32             #include
33             #include
34             #include
35              
36             #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
37             #error "sphash.h: requires little-endian architecture"
38             #endif
39              
40             #define XXH_INLINE_ALL
41             #include "xxhash.h"
42              
43             /* ================================================================
44             * Constants
45             * ================================================================ */
46              
47             #define SPH_MAGIC 0x53504831U /* "SPH1" */
48             #define SPH_VERSION 1
49             #define SPH_NONE UINT32_MAX
50             #define SPH_ERR_BUFLEN 256
51             #define SPH_READER_SLOTS 1024 /* max concurrent reader processes for dead-process recovery */
52              
53             #define SPH_MAX_QUERY_CELLS (1u << 26) /* ~67M cell ceiling per query; raise + rebuild if you genuinely need larger regions */
54             /* query function return codes */
55             #define SPH_Q_OK 1
56             #define SPH_Q_OOM 0
57             #define SPH_Q_TOOBIG (-1)
58              
59             #define SPH_ERR(fmt, ...) do { if (errbuf) snprintf(errbuf, SPH_ERR_BUFLEN, fmt, ##__VA_ARGS__); } while (0)
60              
61             /* ================================================================
62             * Structs
63             * ================================================================ */
64              
65             typedef struct {
66             double pos[3]; /* 0 */
67             int64_t value; /* 24 */
68             double radius; /* 32 per-entry interaction radius (0 = point) */
69             uint32_t next; /* 40 */
70             uint32_t prev; /* 44 */
71             } SpatialEntry; /* 48 */
72              
73             /* Per-process slot for dead-process recovery. Each shared rwlock counter
74             * (the main rwlock-reader count, rwlock_waiters, rwlock_writers_waiting)
75             * is mirrored here so a wrlock timeout can attribute and reverse a dead
76             * process's contribution instead of waiting for the slow per-op timeout
77             * drain. */
78             typedef struct {
79             uint32_t pid; /* 0 = unclaimed */
80             uint32_t subcount; /* in-flight rdlock acquisitions for this process */
81             uint32_t waiters_parked; /* contribution to hdr->rwlock_waiters */
82             uint32_t writers_parked; /* contribution to hdr->rwlock_writers_waiting */
83             } SphReaderSlot;
84              
85             struct SphHeader {
86             uint32_t magic, version; /* 0,4 */
87             uint32_t max_entries, num_buckets;/* 8,12 */
88             double cell_size; /* 16 */
89             uint32_t count; /* 24 */
90             uint32_t free_head; /* 28 */
91             uint64_t total_size; /* 32 */
92             uint64_t reader_slots_off; /* 40 */
93             uint64_t buckets_off; /* 48 */
94             uint64_t bitmap_off; /* 56 */
95             uint64_t entries_off; /* 64 */
96             uint32_t rwlock; /* 72 */
97             uint32_t rwlock_waiters; /* 76 */
98             uint32_t rwlock_writers_waiting; /* 80 */
99             uint32_t _pad0; /* 84 */
100             uint64_t stat_ops; /* 88 */
101             double world[3]; /* 96 toroidal wrap extents per axis (0 = no wrap) */
102             uint32_t flags; /* 120 bit0 = SPH_FLAG_WRAP */
103             uint32_t _pad0b; /* 124 */
104             double sphere_radius; /* 128 body radius for geo methods (0 = geo disabled) */
105             uint8_t _pad1[120]; /* 136..255 */
106             };
107             typedef struct SphHeader SphHeader;
108              
109             #define SPH_FLAG_WRAP 1u
110              
111             _Static_assert(sizeof(SpatialEntry) == 48, "SpatialEntry must be 48 bytes");
112             _Static_assert(sizeof(SphHeader) == 256, "SphHeader must be 256 bytes");
113              
114             /* ---- Process-local handle ---- */
115              
116             typedef struct SpatialHandle {
117             SphHeader *hdr;
118             SphReaderSlot *reader_slots; /* SPH_READER_SLOTS entries */
119             uint32_t *buckets;
120             uint64_t *bitmap;
121             SpatialEntry *entries;
122             uint32_t bitmap_words;
123             size_t mmap_size;
124             char *path; /* backing file path (strdup'd) */
125             int notify_fd;
126             int backing_fd; /* memfd fd to close on destroy, -1 otherwise */
127             uint32_t my_slot_idx; /* UINT32_MAX if all slots taken (no recovery for this handle) */
128             uint32_t cached_pid; /* getpid() cached at last slot claim */
129             uint32_t cached_fork_gen; /* sph_fork_gen value at last slot claim */
130             double world[3]; /* cached wrap extents (0 = no wrap on axis) */
131             int64_t wrap_cells[3]; /* ceil(world/cell) per axis, 0 = no wrap */
132             int wrap; /* nonzero if any axis wraps */
133             } SpatialHandle;
134              
135             /* ================================================================
136             * Hashing + cell helpers
137             * ================================================================ */
138              
139 83           static inline uint32_t sph_next_pow2(uint32_t v) {
140 83 100         if (v < 2) return 1;
141 82           return 1u << (32 - __builtin_clz(v - 1));
142             }
143             /* Largest magnitude a cell index may take. Clamping to +/-2^62 keeps cells far
144             * inside int64 range, so the cell-box and knn-shell loops -- which expand from
145             * center +/- g over a span already capped at SPH_MAX_QUERY_CELLS -- can never
146             * overflow int64. 2^62 is far beyond any coordinate a double represents exactly
147             * (2^53), so nothing reachable is lost. */
148             #define SPH_CELL_LIMIT ((int64_t)1 << 62)
149              
150             /* Floor one coordinate to its integer cell. Defined for every double: NaN/Inf
151             * map to cell 0, and the result is clamped to +/-SPH_CELL_LIMIT, both to avoid
152             * the UB of converting an out-of-range double to int64_t and to bound the
153             * query loops (see SPH_CELL_LIMIT). */
154 2753844           static inline int64_t sph_floor_cell(double v, double cs) {
155 2753844           double d = floor(v / cs);
156 2753844 100         if (!isfinite(d)) return 0;
157 2753840 100         if (d >= (double)SPH_CELL_LIMIT) return SPH_CELL_LIMIT;
158 2753832 100         if (d <= -(double)SPH_CELL_LIMIT) return -SPH_CELL_LIMIT;
159 2753831           return (int64_t)d;
160             }
161             /* positive modulo of a cell index into [0, n); n <= 0 means "no wrap on this axis" */
162 34033           static inline int64_t sph_wrap_cell(int64_t c, int64_t n) {
163 34033 100         if (n <= 0) return c;
164 30707           int64_t m = c % n;
165 30707 100         return m < 0 ? m + n : m;
166             }
167             /* shortest per-axis separation; with wrap, the minimum-image (toroidal) distance */
168 56105           static inline double sph_axis_delta(double a, double b, double world) {
169 56105           double d = fabs(a - b);
170 56105 100         return (world > 0.0 && d > world * 0.5) ? world - d : d;
    100          
171             }
172             /* raw integer cell of a position (no wrap) -- used for box-corner span math */
173 917948           static inline void sph_cell_raw(const SpatialHandle *h, const double pos[3], int64_t cell[3]) {
174 917948           double cs = h->hdr->cell_size;
175 917948           cell[0] = sph_floor_cell(pos[0], cs);
176 917948           cell[1] = sph_floor_cell(pos[1], cs);
177 917948           cell[2] = sph_floor_cell(pos[2], cs);
178 917948           }
179             /* storage cell: the raw cell wrapped into [0,nx) per axis when toroidal, so a
180             point near x=0 and one near x=world land in adjacent cells (0 and nx-1) */
181 915874           static inline void sph_cell_of(const SpatialHandle *h, const double pos[3], int64_t cell[3]) {
182 915874           sph_cell_raw(h, pos, cell);
183 915874 100         if (h->wrap) {
184 5287           cell[0] = sph_wrap_cell(cell[0], h->wrap_cells[0]);
185 5287           cell[1] = sph_wrap_cell(cell[1], h->wrap_cells[1]);
186 5287           cell[2] = sph_wrap_cell(cell[2], h->wrap_cells[2]);
187             }
188 915874           }
189 2014540           static inline uint32_t sph_bucket_of_cell(const SpatialHandle *h, const int64_t cell[3]) {
190 2014540           return (uint32_t)(XXH3_64bits(cell, 3 * sizeof(int64_t)) & (h->hdr->num_buckets - 1));
191             }
192 901830           static inline int sph_cell_eq(const int64_t a[3], const int64_t b[3]) {
193 901830 100         return a[0] == b[0] && a[1] == b[1] && a[2] == b[2];
    100          
    100          
194             }
195              
196             /* ================================================================
197             * Futex-based write-preferring read-write lock
198             * with reader-slot dead-process recovery
199             * ================================================================ */
200              
201             #define SPH_RWLOCK_SPIN_LIMIT 32
202             #define SPH_LOCK_TIMEOUT_SEC 2 /* FUTEX_WAIT timeout for stale lock detection */
203              
204 0           static inline void sph_rwlock_spin_pause(void) {
205             #if defined(__x86_64__) || defined(__i386__)
206 0           __asm__ volatile("pause" ::: "memory");
207             #elif defined(__aarch64__)
208             __asm__ volatile("yield" ::: "memory");
209             #else
210             __asm__ volatile("" ::: "memory");
211             #endif
212 0           }
213              
214             /* Extract writer PID from rwlock value (lower 31 bits when write-locked). */
215             #define SPH_RWLOCK_WRITER_BIT 0x80000000U
216             #define SPH_RWLOCK_PID_MASK 0x7FFFFFFFU
217             #define SPH_RWLOCK_WR(pid) (SPH_RWLOCK_WRITER_BIT | ((uint32_t)(pid) & SPH_RWLOCK_PID_MASK))
218              
219             /* Check if a PID is alive. Returns 1 if alive or unknown, 0 if definitely dead. */
220             /* Liveness via kill(pid,0). NOTE: cannot detect PID reuse -- if a dead
221             * lock-holder's PID is recycled to an unrelated live process before recovery
222             * runs, this reports "alive" and that slot's orphaned contribution is not
223             * reclaimed until the recycled process exits. Robust detection would require
224             * a per-slot process-start-time epoch (a header-layout/version change).
225             * Documented under "Crash Safety" in the POD. */
226 0           static inline int sph_pid_alive(uint32_t pid) {
227 0 0         if (pid == 0) return 1; /* no owner recorded, assume alive */
228 0 0         return !(kill((pid_t)pid, 0) == -1 && errno == ESRCH);
    0          
229             }
230              
231             /* Force-recover a stale write lock left by a dead process.
232             * CAS to OUR pid to hold the lock while fixing shared state, then release.
233             * Using our pid (not a bare WRITER_BIT sentinel) means a subsequent
234             * recovering process can detect and re-recover if we crash mid-recovery. */
235 0           static inline void sph_recover_stale_lock(SpatialHandle *h, uint32_t observed_rwlock) {
236 0           SphHeader *hdr = h->hdr;
237 0           uint32_t mypid = SPH_RWLOCK_WR((uint32_t)getpid());
238 0 0         if (!__atomic_compare_exchange_n(&hdr->rwlock, &observed_rwlock,
239             mypid, 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
240 0           return;
241             /* We now hold the write lock as mypid. No additional shared state needs
242             * repair here (this module has no seqlock); just release the lock. */
243 0           __atomic_store_n(&hdr->rwlock, 0, __ATOMIC_RELEASE);
244 0 0         if (__atomic_load_n(&hdr->rwlock_waiters, __ATOMIC_RELAXED) > 0)
245 0           syscall(SYS_futex, &hdr->rwlock, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
246             }
247              
248             static const struct timespec sph_lock_timeout = { SPH_LOCK_TIMEOUT_SEC, 0 };
249              
250             /* Process-global fork-generation counter. Incremented in the pthread_atfork
251             * child callback so every open handle detects a fork transition on the next
252             * lock call without paying a getpid() syscall on the hot path. */
253             static uint32_t sph_fork_gen = 1;
254             static pthread_once_t sph_atfork_once = PTHREAD_ONCE_INIT;
255 0           static void sph_on_fork_child(void) {
256 0           __atomic_add_fetch(&sph_fork_gen, 1, __ATOMIC_RELAXED);
257 0           }
258 17           static void sph_atfork_init(void) {
259 17           pthread_atfork(NULL, NULL, sph_on_fork_child);
260 17           }
261              
262             /* Ensure this process owns a reader slot. Called from the lock helpers so
263             * that fork()'d children pick up their own slot lazily instead of sharing
264             * the parent's. Hot-path is a single relaxed load + compare; only on a
265             * fork-generation mismatch do we touch getpid() and scan slots. */
266 12173           static inline void sph_claim_reader_slot(SpatialHandle *h) {
267 12173           uint32_t cur_gen = __atomic_load_n(&sph_fork_gen, __ATOMIC_RELAXED);
268 12173 100         if (__builtin_expect(cur_gen == h->cached_fork_gen && h->my_slot_idx != UINT32_MAX, 1))
    50          
269 12114           return;
270             /* Cold path -- register the atfork hook once per process, then claim. */
271 59           pthread_once(&sph_atfork_once, sph_atfork_init);
272             /* Re-read after pthread_once: sph_on_fork_child may have bumped it. */
273 59           cur_gen = __atomic_load_n(&sph_fork_gen, __ATOMIC_RELAXED);
274 59           uint32_t now_pid = (uint32_t)getpid();
275 59           h->cached_pid = now_pid;
276 59           h->cached_fork_gen = cur_gen;
277 59           h->my_slot_idx = UINT32_MAX;
278 59           uint32_t start = now_pid % SPH_READER_SLOTS;
279 67 50         for (uint32_t i = 0; i < SPH_READER_SLOTS; i++) {
280 67           uint32_t s = (start + i) % SPH_READER_SLOTS;
281 67           uint32_t expected = 0;
282 67 100         if (__atomic_compare_exchange_n(&h->reader_slots[s].pid,
283             &expected, now_pid, 0,
284             __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
285             /* Zero all mirror fields, not just subcount: a SIGKILL'd
286             * predecessor may have left waiters_parked/writers_parked
287             * non-zero, and sph_recover_dead_readers won't drain them
288             * once we own the slot (the CAS expects the dead PID). */
289 59           __atomic_store_n(&h->reader_slots[s].subcount, 0, __ATOMIC_RELAXED);
290 59           __atomic_store_n(&h->reader_slots[s].waiters_parked, 0, __ATOMIC_RELAXED);
291 59           __atomic_store_n(&h->reader_slots[s].writers_parked, 0, __ATOMIC_RELAXED);
292 59           h->my_slot_idx = s;
293 59           return;
294             }
295             }
296             /* Table full -- leave my_slot_idx = UINT32_MAX so we silently skip
297             * tracking for this handle (lock still works; just no recovery). */
298             }
299              
300             /* Atomically subtract `sub` from a counter, capped at 0 (never underflows). */
301 0           static inline void sph_atomic_sub_cap(uint32_t *p, uint32_t sub) {
302 0 0         if (!sub) return;
303 0           uint32_t cur = __atomic_load_n(p, __ATOMIC_RELAXED);
304 0           for (;;) {
305 0 0         uint32_t want = (cur > sub) ? cur - sub : 0;
306 0 0         if (__atomic_compare_exchange_n(p, &cur, want,
307             1, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
308 0           return;
309             }
310             }
311              
312             /* Try to claim a dead slot (CAS pid -> 0) and drain its parked-waiter
313             * contributions back to the global counters. A no-op if the slot was stolen
314             * by another recoverer or had no waiter contribution to drain.
315             *
316             * Note: subcount/waiters_parked/writers_parked are NOT zeroed here.
317             * Between our CAS and a follow-up store, a new process could claim the
318             * slot and start populating these fields -- our stores would clobber its
319             * state. sph_claim_reader_slot zeros all three on every claim, so
320             * leaving stale values is harmless. */
321 0           static inline void sph_drain_dead_slot(SpatialHandle *h, uint32_t i, uint32_t pid) {
322 0           SphHeader *hdr = h->hdr;
323 0           uint32_t expected = pid;
324             /* ACQ_REL on success: RELEASE publishes pid=0 to other observers;
325             * ACQUIRE syncs us with prior writes from the dead process to
326             * waiters_parked/writers_parked. On weakly-ordered archs (aarch64)
327             * a plain RELAXED load before the CAS could miss those writes;
328             * loading them after the CAS keeps them inside the acquire window. */
329 0 0         if (!__atomic_compare_exchange_n(&h->reader_slots[i].pid, &expected, 0,
330             0, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED))
331 0           return;
332 0           uint32_t wp = __atomic_load_n(&h->reader_slots[i].waiters_parked, __ATOMIC_RELAXED);
333 0           uint32_t writp = __atomic_load_n(&h->reader_slots[i].writers_parked, __ATOMIC_RELAXED);
334 0 0         if (wp) sph_atomic_sub_cap(&hdr->rwlock_waiters, wp);
335 0 0         if (writp) sph_atomic_sub_cap(&hdr->rwlock_writers_waiting, writp);
336             }
337              
338             /* Scan reader slots for dead-process recovery.
339             *
340             * For each dead PID with non-zero contributions to the shared rwlock,
341             * rwlock_waiters, or rwlock_writers_waiting counters, drain its share back
342             * out so live processes don't have to wait for the slow per-op timeout
343             * decrement to drain it for them.
344             *
345             * For the main rwlock counter we use the "no live reader holds -> force-
346             * reset to 0" trick (precise) because per-process attribution of the
347             * subcount is racy across the inc-counter-then-inc-subcount window. */
348 0           static inline void sph_recover_dead_readers(SpatialHandle *h) {
349 0 0         if (!h->reader_slots) return;
350 0           SphHeader *hdr = h->hdr;
351 0           int any_live_reader = 0;
352 0           int found_dead_reader = 0;
353              
354             /* Pass 1: classify slots. Slots with dead pid and sc == 0 (no rwlock
355             * contribution to lose) are wiped immediately to free the slot for
356             * future claimants and drain any orphan parked-waiter counters. Slots
357             * with dead pid and sc > 0 are left intact in this pass: if force-
358             * reset cannot fire (because a live reader is concurrently present),
359             * wiping the dead slot would lose the only record of its orphan
360             * rwlock contribution and strand writers permanently once the live
361             * reader releases. */
362 0 0         for (uint32_t i = 0; i < SPH_READER_SLOTS; i++) {
363 0           uint32_t pid = __atomic_load_n(&h->reader_slots[i].pid, __ATOMIC_ACQUIRE);
364 0 0         if (pid == 0) continue;
365 0           uint32_t sc = __atomic_load_n(&h->reader_slots[i].subcount, __ATOMIC_RELAXED);
366 0 0         if (sph_pid_alive(pid)) {
367 0 0         if (sc > 0) any_live_reader = 1;
368 0           continue;
369             }
370 0 0         if (sc > 0) { found_dead_reader = 1; continue; }
371 0           sph_drain_dead_slot(h, i, pid);
372             }
373              
374             /* Pass 2: only if force-reset will fire. Issue the rwlock force-
375             * reset CAS FIRST, while the window since pass 1's last scan is
376             * still narrow (a handful of instructions, as in the original
377             * single-pass code). A new reader that started rdlock between
378             * pass 1's scan and the CAS will either:
379             * (a) have already CAS'd rwlock from cur to cur+1 -- our CAS then
380             * fails (cur mismatched), recovery yields and a future
381             * cycle retries; or
382             * (b) be still in the subcount-bump phase -- our CAS sees the
383             * stale cur and resets to 0; the new reader's subsequent CAS
384             * rwlock(0 -> 1) succeeds cleanly.
385             * Only after the CAS resolves do we wipe the deferred dead slots,
386             * keeping that work outside the race-sensitive window. */
387 0 0         if (found_dead_reader && !any_live_reader) {
    0          
388 0           uint32_t cur = __atomic_load_n(&hdr->rwlock, __ATOMIC_RELAXED);
389 0 0         if (cur > 0 && cur < SPH_RWLOCK_WRITER_BIT) {
    0          
390 0 0         if (__atomic_compare_exchange_n(&hdr->rwlock, &cur, 0,
391             0, __ATOMIC_RELEASE, __ATOMIC_RELAXED)) {
392 0 0         if (__atomic_load_n(&hdr->rwlock_waiters, __ATOMIC_RELAXED) > 0)
393 0           syscall(SYS_futex, &hdr->rwlock, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
394             }
395             }
396 0 0         for (uint32_t i = 0; i < SPH_READER_SLOTS; i++) {
397 0           uint32_t pid = __atomic_load_n(&h->reader_slots[i].pid, __ATOMIC_ACQUIRE);
398 0 0         if (pid == 0 || sph_pid_alive(pid)) continue;
    0          
399 0           sph_drain_dead_slot(h, i, pid);
400             }
401             }
402             }
403              
404             /* Inspect the lock word after a futex-wait timeout. If a dead writer
405             * holds it, force-recover the lock. Otherwise drain dead readers' shares
406             * of the rwlock/waiter counters. Called from rdlock and wrlock ETIMEDOUT
407             * branches -- identical recovery logic in both. */
408 0           static inline void sph_recover_after_timeout(SpatialHandle *h) {
409 0           SphHeader *hdr = h->hdr;
410 0           uint32_t val = __atomic_load_n(&hdr->rwlock, __ATOMIC_RELAXED);
411 0 0         if (val >= SPH_RWLOCK_WRITER_BIT) {
412 0           uint32_t pid = val & SPH_RWLOCK_PID_MASK;
413 0 0         if (!sph_pid_alive(pid))
414 0           sph_recover_stale_lock(h, val);
415             } else {
416 0           sph_recover_dead_readers(h);
417             }
418 0           }
419              
420             /* Park/unpark helpers: bump the global waiter counters together with this
421             * process's mirrored slot counters so a wrlock-timeout recovery scan can
422             * attribute and reverse a dead PID's contribution. Kept paired to make
423             * accidental drift between global and per-slot counts impossible. */
424 0           static inline void sph_park_reader(SpatialHandle *h) {
425 0 0         if (h->my_slot_idx != UINT32_MAX)
426 0           __atomic_add_fetch(&h->reader_slots[h->my_slot_idx].waiters_parked, 1, __ATOMIC_RELAXED);
427 0           __atomic_add_fetch(&h->hdr->rwlock_waiters, 1, __ATOMIC_RELAXED);
428 0           }
429 0           static inline void sph_unpark_reader(SpatialHandle *h) {
430 0           __atomic_sub_fetch(&h->hdr->rwlock_waiters, 1, __ATOMIC_RELAXED);
431 0 0         if (h->my_slot_idx != UINT32_MAX)
432 0           __atomic_sub_fetch(&h->reader_slots[h->my_slot_idx].waiters_parked, 1, __ATOMIC_RELAXED);
433 0           }
434 0           static inline void sph_park_writer(SpatialHandle *h) {
435 0 0         if (h->my_slot_idx != UINT32_MAX) {
436 0           __atomic_add_fetch(&h->reader_slots[h->my_slot_idx].waiters_parked, 1, __ATOMIC_RELAXED);
437 0           __atomic_add_fetch(&h->reader_slots[h->my_slot_idx].writers_parked, 1, __ATOMIC_RELAXED);
438             }
439 0           __atomic_add_fetch(&h->hdr->rwlock_waiters, 1, __ATOMIC_RELAXED);
440 0           __atomic_add_fetch(&h->hdr->rwlock_writers_waiting, 1, __ATOMIC_RELAXED);
441 0           }
442 0           static inline void sph_unpark_writer(SpatialHandle *h) {
443 0           __atomic_sub_fetch(&h->hdr->rwlock_waiters, 1, __ATOMIC_RELAXED);
444 0           __atomic_sub_fetch(&h->hdr->rwlock_writers_waiting, 1, __ATOMIC_RELAXED);
445 0 0         if (h->my_slot_idx != UINT32_MAX) {
446 0           __atomic_sub_fetch(&h->reader_slots[h->my_slot_idx].waiters_parked, 1, __ATOMIC_RELAXED);
447 0           __atomic_sub_fetch(&h->reader_slots[h->my_slot_idx].writers_parked, 1, __ATOMIC_RELAXED);
448             }
449 0           }
450              
451 685           static inline void sph_rwlock_rdlock(SpatialHandle *h) {
452 685           sph_claim_reader_slot(h);
453 685           SphHeader *hdr = h->hdr;
454 685           uint32_t *lock = &hdr->rwlock;
455 685           uint32_t *writers_waiting = &hdr->rwlock_writers_waiting;
456             /* Claim subcount BEFORE bumping the shared rwlock counter. This way
457             * a concurrent writer-side recovery scan that sees our PID alive with
458             * subcount > 0 will (correctly) defer force-reset, even while we are
459             * still spinning trying to win the rwlock CAS. Without this, a reader
460             * killed between rwlock CAS-success and subcount++ would let recovery
461             * force-reset rwlock to 0 underneath us, causing a UINT32_MAX wrap on
462             * our eventual rdunlock dec. */
463 685 50         if (h->my_slot_idx != UINT32_MAX)
464 685           __atomic_add_fetch(&h->reader_slots[h->my_slot_idx].subcount, 1, __ATOMIC_RELAXED);
465 685           for (int spin = 0; ; spin++) {
466 685           uint32_t cur = __atomic_load_n(lock, __ATOMIC_RELAXED);
467             /* Write-preferring: when lock is free (cur==0) and writers are
468             * waiting, yield to let the writer acquire. When readers are
469             * already active (cur>=1), new readers may join freely. */
470 685 50         if (cur > 0 && cur < SPH_RWLOCK_WRITER_BIT) {
    0          
471 0 0         if (__atomic_compare_exchange_n(lock, &cur, cur + 1,
472             1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
473 685           return;
474 685 50         } else if (cur == 0 && !__atomic_load_n(writers_waiting, __ATOMIC_RELAXED)) {
    50          
475 685 50         if (__atomic_compare_exchange_n(lock, &cur, 1,
476             1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
477 685           return;
478             }
479 0 0         if (__builtin_expect(spin < SPH_RWLOCK_SPIN_LIMIT, 1)) {
480 0           sph_rwlock_spin_pause();
481 0           continue;
482             }
483 0           sph_park_reader(h);
484 0           cur = __atomic_load_n(lock, __ATOMIC_RELAXED);
485             /* Sleep when write-locked OR when yielding to waiting writers */
486 0 0         if (cur >= SPH_RWLOCK_WRITER_BIT || cur == 0) {
    0          
487 0           long rc = syscall(SYS_futex, lock, FUTEX_WAIT, cur,
488             &sph_lock_timeout, NULL, 0);
489 0 0         if (rc == -1 && errno == ETIMEDOUT) {
    0          
490 0           sph_unpark_reader(h);
491 0           sph_recover_after_timeout(h);
492 0           spin = 0;
493 0           continue;
494             }
495             }
496 0           sph_unpark_reader(h);
497 0           spin = 0;
498             }
499             }
500              
501 685           static inline void sph_rwlock_rdunlock(SpatialHandle *h) {
502 685           SphHeader *hdr = h->hdr;
503             /* Release the shared counter BEFORE dropping our subcount so that
504             * "any live PID with subcount > 0" is a reliable in-flight indicator
505             * for the writer-side recovery scan. Inverting these would create a
506             * window where we still own a unit of rwlock but our slot subcount is
507             * 0, letting recovery force-reset rwlock underneath us. */
508 685           uint32_t after = __atomic_sub_fetch(&hdr->rwlock, 1, __ATOMIC_RELEASE);
509 685 50         if (h->my_slot_idx != UINT32_MAX)
510 685           __atomic_sub_fetch(&h->reader_slots[h->my_slot_idx].subcount, 1, __ATOMIC_RELAXED);
511 685 50         if (after == 0 && __atomic_load_n(&hdr->rwlock_waiters, __ATOMIC_RELAXED) > 0)
    50          
512 0           syscall(SYS_futex, &hdr->rwlock, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
513 685           }
514              
515 11488           static inline void sph_rwlock_wrlock(SpatialHandle *h) {
516 11488           sph_claim_reader_slot(h); /* refresh cached_pid across fork */
517 11488           SphHeader *hdr = h->hdr;
518 11488           uint32_t *lock = &hdr->rwlock;
519             /* Encode PID in the rwlock word itself (0x80000000 | pid) to eliminate
520             * any crash window between acquiring the lock and storing the owner. */
521 11488           uint32_t mypid = SPH_RWLOCK_WR(h->cached_pid);
522 11488           for (int spin = 0; ; spin++) {
523 11488           uint32_t expected = 0;
524 11488 50         if (__atomic_compare_exchange_n(lock, &expected, mypid,
525             1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
526 11488           return;
527 0 0         if (__builtin_expect(spin < SPH_RWLOCK_SPIN_LIMIT, 1)) {
528 0           sph_rwlock_spin_pause();
529 0           continue;
530             }
531 0           sph_park_writer(h);
532 0           uint32_t cur = __atomic_load_n(lock, __ATOMIC_RELAXED);
533 0 0         if (cur != 0) {
534 0           long rc = syscall(SYS_futex, lock, FUTEX_WAIT, cur,
535             &sph_lock_timeout, NULL, 0);
536 0 0         if (rc == -1 && errno == ETIMEDOUT) {
    0          
537 0           sph_unpark_writer(h);
538 0           sph_recover_after_timeout(h);
539 0           spin = 0;
540 0           continue;
541             }
542             }
543 0           sph_unpark_writer(h);
544 0           spin = 0;
545             }
546             }
547              
548 11488           static inline void sph_rwlock_wrunlock(SpatialHandle *h) {
549 11488           SphHeader *hdr = h->hdr;
550 11488           __atomic_store_n(&hdr->rwlock, 0, __ATOMIC_RELEASE);
551 11488 50         if (__atomic_load_n(&hdr->rwlock_waiters, __ATOMIC_RELAXED) > 0)
552 0           syscall(SYS_futex, &hdr->rwlock, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
553 11488           }
554              
555             /* ================================================================
556             * Layout math + create / open / destroy
557             *
558             * Layout: Header -> reader_slots[1024] -> buckets -> bitmap -> entries
559             * ================================================================ */
560              
561             /* Largest max_entries / num_buckets accepted at create time. 2^30 keeps the
562             * power-of-two rounding (sph_next_pow2) and every byte offset well within
563             * range, and is far beyond any realistic shared-memory map. */
564             #define SPH_MAX_CAPACITY 0x40000000u
565              
566             /* Single source of truth for the mmap region layout offsets. */
567             typedef struct { uint64_t reader_slots, buckets, bitmap, entries; } SphLayout;
568              
569 173           static inline SphLayout sph_layout(uint32_t max_entries, uint32_t num_buckets) {
570 173           uint32_t bw = (max_entries + 63) / 64;
571             SphLayout L;
572 173           L.reader_slots = sizeof(SphHeader);
573 173           L.buckets = L.reader_slots + (uint64_t)SPH_READER_SLOTS * sizeof(SphReaderSlot);
574 173           L.bitmap = L.buckets + (uint64_t)num_buckets * sizeof(uint32_t);
575 173           L.bitmap = (L.bitmap + 7) & ~(uint64_t)7; /* 8-byte align: bitmap is uint64[] and entries (doubles) follow it */
576 173           L.entries = L.bitmap + (uint64_t)bw * sizeof(uint64_t);
577 173           return L;
578             }
579              
580 93           static inline uint64_t sph_total_size(uint32_t max_entries, uint32_t num_buckets) {
581 93           SphLayout L = sph_layout(max_entries, num_buckets);
582 93           return L.entries + (uint64_t)max_entries * sizeof(SpatialEntry);
583             }
584              
585 70           static inline void sph_init_header(void *base, uint32_t max_entries, uint32_t num_buckets,
586             double cell_size, const double world[3], double sphere_radius, uint64_t total) {
587 70           SphLayout L = sph_layout(max_entries, num_buckets);
588              
589 70           SphHeader *hdr = (SphHeader *)base;
590 70           memset(base, 0, (size_t)total);
591 70           hdr->magic = SPH_MAGIC;
592 70           hdr->version = SPH_VERSION;
593 70           hdr->max_entries = max_entries;
594 70           hdr->num_buckets = num_buckets;
595 70           hdr->cell_size = cell_size;
596             /* toroidal wrap extents: any axis > 0 enables wrap */
597 70           int wrap = 0;
598 280 100         for (int i = 0; i < 3; i++) {
599 210 100         double w = world ? world[i] : 0.0;
600 210           hdr->world[i] = w;
601 210 100         if (w > 0.0) wrap = 1;
602             }
603 70 100         hdr->flags = wrap ? SPH_FLAG_WRAP : 0;
604 70           hdr->sphere_radius = sphere_radius; /* caller validated finite and >= 0 */
605 70           hdr->count = 0;
606 70           hdr->total_size = total;
607 70           hdr->reader_slots_off = L.reader_slots;
608 70           hdr->buckets_off = L.buckets;
609 70           hdr->bitmap_off = L.bitmap;
610 70           hdr->entries_off = L.entries;
611              
612             /* Set every bucket head to SPH_NONE (empty). */
613 70           uint32_t *buckets = (uint32_t *)((uint8_t *)base + L.buckets);
614 118047 100         for (uint32_t i = 0; i < num_buckets; i++) buckets[i] = SPH_NONE;
615              
616             /* Thread the free-list through the entries pool. */
617 70           SpatialEntry *entries = (SpatialEntry *)((uint8_t *)base + L.entries);
618 89764 100         for (uint32_t i = 0; i < max_entries; i++)
619 89694 100         entries[i].next = (i + 1 < max_entries) ? (i + 1) : SPH_NONE;
620 70           hdr->free_head = 0; /* max_entries >= 1 (validated at create) */
621              
622             /* Alloc bitmap left zeroed (all entries free). */
623 70           __atomic_thread_fence(__ATOMIC_SEQ_CST);
624 70           }
625              
626 79           static inline SpatialHandle *sph_setup(void *base, size_t map_size,
627             const char *path, int backing_fd) {
628 79           SphHeader *hdr = (SphHeader *)base;
629 79           SpatialHandle *h = (SpatialHandle *)calloc(1, sizeof(SpatialHandle));
630 79 50         if (!h) {
631 0           munmap(base, map_size);
632 0 0         if (backing_fd >= 0) close(backing_fd);
633 0           return NULL;
634             }
635 79           h->hdr = hdr;
636 79           h->reader_slots = (SphReaderSlot *)((uint8_t *)base + hdr->reader_slots_off);
637 79           h->buckets = (uint32_t *)((uint8_t *)base + hdr->buckets_off);
638 79           h->bitmap = (uint64_t *)((uint8_t *)base + hdr->bitmap_off);
639 79           h->entries = (SpatialEntry *)((uint8_t *)base + hdr->entries_off);
640 79           h->bitmap_words = (hdr->max_entries + 63) / 64;
641 79           h->mmap_size = map_size;
642 79 100         h->path = path ? strdup(path) : NULL;
643 79           h->notify_fd = -1;
644 79           h->backing_fd = backing_fd;
645 79           h->my_slot_idx = UINT32_MAX;
646 79           h->wrap = (hdr->flags & SPH_FLAG_WRAP) ? 1 : 0;
647 316 100         for (int i = 0; i < 3; i++) {
648 237 100         h->world[i] = h->wrap ? hdr->world[i] : 0.0;
649 264 100         h->wrap_cells[i] = (h->wrap && hdr->world[i] > 0.0)
650 264 100         ? (int64_t)floor(hdr->world[i] / hdr->cell_size + 0.5) : 0; /* exact: extent is a multiple */
651             }
652 79           return h;
653             }
654              
655             /* Validate a mapped header (shared by sph_create reopen and sph_open_fd). */
656 14           static inline int sph_validate_header(const SphHeader *hdr, uint64_t file_size) {
657 14 100         if (hdr->magic != SPH_MAGIC) return 0;
658 13 100         if (hdr->version != SPH_VERSION) return 0;
659 12 50         if (hdr->max_entries == 0 || hdr->num_buckets == 0) return 0;
    50          
660 12 50         if (hdr->max_entries > SPH_MAX_CAPACITY || hdr->num_buckets > SPH_MAX_CAPACITY) return 0;
    50          
661 12 50         if ((hdr->num_buckets & (hdr->num_buckets - 1)) != 0) return 0; /* power of two */
662 12 50         if (!(hdr->cell_size > 0) || !isfinite(hdr->cell_size)) return 0;
    50          
663 12 100         if (hdr->total_size != file_size) return 0;
664 10 50         if (hdr->total_size != sph_total_size(hdr->max_entries, hdr->num_buckets)) return 0;
665              
666 10           SphLayout L = sph_layout(hdr->max_entries, hdr->num_buckets);
667 10 50         if (hdr->reader_slots_off != L.reader_slots) return 0;
668 10 50         if (hdr->buckets_off != L.buckets) return 0;
669 10 50         if (hdr->bitmap_off != L.bitmap) return 0;
670 10 50         if (hdr->entries_off != L.entries) return 0;
671 10 50         if (hdr->flags & ~SPH_FLAG_WRAP) return 0; /* no unknown flags */
672 10           { int w = 0;
673 40 100         for (int i = 0; i < 3; i++) {
674 30 50         if (!isfinite(hdr->world[i]) || hdr->world[i] < 0.0) return 0;
    50          
675 30 100         if (hdr->world[i] > 0.0) { w = 1;
676 4           double nx = floor(hdr->world[i] / hdr->cell_size + 0.5);
677 4 50         if (nx < 1.0 || fabs(nx * hdr->cell_size - hdr->world[i]) > 1e-9 * hdr->world[i]) return 0;
    50          
678             }
679             }
680 10 50         if (((hdr->flags & SPH_FLAG_WRAP) ? 1 : 0) != w) return 0; /* flag must match extents */
681             }
682 10 50         if (!isfinite(hdr->sphere_radius) || hdr->sphere_radius < 0.0) return 0;
    50          
683 10 100         if (hdr->sphere_radius > 0.0 && (hdr->flags & SPH_FLAG_WRAP)) return 0; /* sphere and wrap are mutually exclusive */
    100          
684 9           return 1;
685             }
686              
687             /* Validate optional wrap extents against cell_size (world may be NULL); each
688             wrapped axis must be a positive multiple of cell_size so cells tile exactly. */
689 87           static inline int sph_validate_world(const double *world, double cell_size, char *errbuf) {
690 87 100         if (!world) return 1;
691 37 100         for (int i = 0; i < 3; i++) {
692 28 50         if (!(world[i] >= 0.0) || !isfinite(world[i])) { SPH_ERR("world extent must be finite and >= 0"); return 0; }
    50          
    0          
693 28 100         if (world[i] > 0.0) {
694 21           double nx = floor(world[i] / cell_size + 0.5);
695 21 50         if (nx < 1.0 || fabs(nx * cell_size - world[i]) > 1e-9 * world[i]) {
    100          
696 1 50         SPH_ERR("wrap extent must be a positive multiple of cell_size"); return 0;
697             }
698             }
699             }
700 9           return 1;
701             }
702              
703             /* ================================================================
704             * Geo helpers (lat/lon/alt <-> xyz); R = body radius (hdr->sphere_radius).
705             * Angles in radians: lat in [-pi/2, pi/2], lon in (-pi, pi].
706             * ================================================================ */
707 1134           static inline void sph_geo_to_xyz(double R, double lat, double lon, double alt, double out[3]) {
708 1134           double r = R + alt;
709 1134           double cl = cos(lat);
710 1134           out[0] = r * cl * cos(lon);
711 1134           out[1] = r * cl * sin(lon);
712 1134           out[2] = r * sin(lat);
713 1134           }
714 306           static inline void sph_geo_of_xyz(double R, const double in[3], double *lat, double *lon, double *alt) {
715 306           double r = sqrt(in[0]*in[0] + in[1]*in[1] + in[2]*in[2]);
716 306 50         double s = (r > 0.0) ? in[2] / r : 0.0;
717 306 50         if (s > 1.0) s = 1.0; else if (s < -1.0) s = -1.0; /* guard asin domain vs rounding */
    50          
718 306           *lat = asin(s);
719 306           *lon = atan2(in[1], in[0]); /* 0 at the poles */
720 306           *alt = r - R;
721 306           }
722              
723             /* ================================================================
724             * Cube-sphere cell IDs (stateless; a tiny S2-like scheme).
725             * Equal-angle (tangent-warp) projection of a direction onto one of 6 cube
726             * faces, row-major i,j at a level. Packed id: level(5) face(3) i(24) j(24).
727             * Faces: 0=+X 1=-X 2=+Y 3=-Y 4=+Z 5=-Z. Per face the major axis carries the
728             * sign and the other two coords are (s,t); reconstruction is d=(face basis).
729             * ================================================================ */
730             #define SPH_CUBE_MAX_LEVEL 24
731             #define SPH_PI_4 0.78539816339744830961 /* pi/4 */
732              
733 198253           static inline int sph_cube_level(uint64_t id) { return (int)((id >> 51) & 0x1Fu); }
734 178249           static inline int sph_cube_face (uint64_t id) { return (int)((id >> 48) & 0x7u); }
735              
736             /* well-formed id: no stray high bits, level <= MAX, face < 6, i,j < 2^level */
737 99132           static inline int sph_cube_valid(uint64_t id) {
738 99132 100         if (id >> 56) return 0;
739 99127           int level = sph_cube_level(id);
740 99127 100         if (level > SPH_CUBE_MAX_LEVEL) return 0;
741 99126 50         if (sph_cube_face(id) > 5) return 0;
742 99126           uint64_t N = (uint64_t)1 << level;
743 99126           uint64_t i = (id >> 24) & 0xFFFFFFu, j = id & 0xFFFFFFu;
744 99126 50         return (i < N && j < N);
    50          
745             }
746              
747             /* direction (need not be unit) -> cell id at level [0, SPH_CUBE_MAX_LEVEL] */
748 162087           static inline uint64_t sph_cube_cell(const double dir[3], int level) {
749 162087           double x = dir[0], y = dir[1], z = dir[2];
750 162087           double ax = fabs(x), ay = fabs(y), az = fabs(z);
751             int face; double mag, s, t;
752 162087 100         if (ax >= ay && ax >= az) { mag = ax; face = (x >= 0) ? 0 : 1; s = y / mag; t = z / mag; }
    100          
753 108374 100         else if (ay >= az) { mag = ay; face = (y >= 0) ? 2 : 3; s = x / mag; t = z / mag; }
    100          
754 54118 100         else { mag = az; face = (z >= 0) ? 4 : 5; s = x / mag; t = y / mag; }
755 162087           double u = atan(s) / SPH_PI_4; /* equal-angle warp to [-1,1] */
756 162087           double v = atan(t) / SPH_PI_4;
757 162087 100         if (!isfinite(u)) u = 0.0; /* dir==0 -> s,t NaN; keep defined */
758 162087 100         if (!isfinite(v)) v = 0.0;
759 162087           int64_t N = (int64_t)1 << level;
760 162087           int64_t i = (int64_t)floor((u + 1.0) * 0.5 * (double)N);
761 162087           int64_t j = (int64_t)floor((v + 1.0) * 0.5 * (double)N);
762 162087 50         if (i < 0) i = 0; else if (i >= N) i = N - 1;
    50          
763 162087 50         if (j < 0) j = 0; else if (j >= N) j = N - 1;
    50          
764 162087           return ((uint64_t)level << 51) | ((uint64_t)face << 48)
765 162087           | ((uint64_t)i << 24) | (uint64_t)j;
766             }
767              
768             /* reconstruct a face-basis direction from (face, s, t): the major axis carries
769             the face sign, the two minor coords are (s, t). Inverse of the face/s/t split
770             in sph_cube_cell; shared by sph_cube_center and sph_cube_neighbors. */
771 159483           static inline void sph_face_dir(int face, double s, double t, double d[3]) {
772 159483           switch (face) {
773 26550           case 0: d[0]= 1; d[1]= s; d[2]= t; break;
774 26123           case 1: d[0]=-1; d[1]= s; d[2]= t; break;
775 27206           case 2: d[0]= s; d[1]= 1; d[2]= t; break;
776 26193           case 3: d[0]= s; d[1]=-1; d[2]= t; break;
777 26642           case 4: d[0]= s; d[1]= t; d[2]= 1; break;
778 26769           default: d[0]= s; d[1]= t; d[2]=-1; break;
779             }
780 159483           }
781              
782             /* cell id -> its centre as a unit direction */
783 39003           static inline void sph_cube_center(uint64_t id, double out[3]) {
784 39003           int level = sph_cube_level(id), face = sph_cube_face(id);
785 39003           int64_t N = (int64_t)1 << level;
786 39003           int64_t i = (int64_t)((id >> 24) & 0xFFFFFFu), j = (int64_t)(id & 0xFFFFFFu);
787 39003           double u = ((double)i + 0.5) / (double)N * 2.0 - 1.0;
788 39003           double v = ((double)j + 0.5) / (double)N * 2.0 - 1.0;
789 39003           double s = tan(u * SPH_PI_4), t = tan(v * SPH_PI_4);
790             double d[3];
791 39003           sph_face_dir(face, s, t, d);
792 39003           double n = sqrt(d[0]*d[0] + d[1]*d[1] + d[2]*d[2]);
793 39003           out[0] = d[0]/n; out[1] = d[1]/n; out[2] = d[2]/n;
794 39003           }
795              
796             /* parent at level-1 (returns 0 at level 0); children at level+1 (0 at MAX) */
797 8001           static inline int sph_cube_parent(uint64_t id, uint64_t *out) {
798 8001           int level = sph_cube_level(id);
799 8001 100         if (level == 0) return 0;
800 8000           int64_t i = (int64_t)((id >> 24) & 0xFFFFFFu), j = (int64_t)(id & 0xFFFFFFu);
801 8000           *out = ((uint64_t)(level - 1) << 51) | ((uint64_t)sph_cube_face(id) << 48)
802 8000           | ((uint64_t)(i >> 1) << 24) | (uint64_t)(j >> 1);
803 8000           return 1;
804             }
805 2001           static inline int sph_cube_children(uint64_t id, uint64_t out[4]) {
806 2001           int level = sph_cube_level(id);
807 2001 100         if (level >= SPH_CUBE_MAX_LEVEL) return 0;
808 2000           uint64_t face = (uint64_t)sph_cube_face(id);
809 2000           int64_t i = (int64_t)((id >> 24) & 0xFFFFFFu), j = (int64_t)(id & 0xFFFFFFu);
810 2000           int k = 0;
811 14000 100         for (int di = 0; di < 2; di++) for (int dj = 0; dj < 2; dj++)
    100          
812 8000           out[k++] = ((uint64_t)(level + 1) << 51) | (face << 48)
813 8000           | ((uint64_t)(2*i + di) << 24) | (uint64_t)(2*j + dj);
814 2000           return 1;
815             }
816              
817             /* 4 edge-adjacent neighbors (seam-aware) by perturb-and-reproject: step one
818             cell width from the centre along +-u / +-v in THIS face's basis, rebuild the
819             direction (|coord| may exceed 1, crossing to the adjacent face), and re-run
820             sph_cube_cell. The major-axis flip handles the seam + orientation; a full
821             cell-width step lands at the neighbour's centre, so it is rounding-robust. */
822 30120           static inline void sph_cube_neighbors(uint64_t id, uint64_t out[4]) {
823 30120           int level = sph_cube_level(id), face = sph_cube_face(id);
824 30120           int64_t N = (int64_t)1 << level;
825 30120           int64_t i = (int64_t)((id >> 24) & 0xFFFFFFu), j = (int64_t)(id & 0xFFFFFFu);
826 30120           double uc = ((double)i + 0.5) / (double)N * 2.0 - 1.0;
827 30120           double vc = ((double)j + 0.5) / (double)N * 2.0 - 1.0;
828 30120           double step = 2.0 / (double)N;
829             static const double du[4] = { +1.0, -1.0, 0.0, 0.0 };
830             static const double dv[4] = { 0.0, 0.0, +1.0, -1.0 };
831 150600 100         for (int k = 0; k < 4; k++) {
832 120480           double s = tan((uc + du[k]*step) * SPH_PI_4);
833 120480           double t = tan((vc + dv[k]*step) * SPH_PI_4);
834             double d[3];
835 120480           sph_face_dir(face, s, t, d);
836 120480           out[k] = sph_cube_cell(d, level);
837             }
838 30120           }
839              
840             /* validate + normalize create() args (shared by sph_create + sph_create_memfd);
841             on success rounds *num_buckets up to a power of two */
842 89           static int sph_validate_create_args(uint32_t max_entries, uint32_t *num_buckets,
843             double cell_size, const double *world,
844             double sphere_radius, char *errbuf) {
845 89 50         if (errbuf) errbuf[0] = '\0';
846 89 100         if (!(cell_size > 0) || !isfinite(cell_size)) { SPH_ERR("cell_size must be a finite number > 0"); return 0; }
    50          
    50          
847 88 100         if (max_entries == 0) { SPH_ERR("max_entries must be > 0"); return 0; }
    50          
848 87 100         if (!sph_validate_world(world, cell_size, errbuf)) return 0;
849 86 50         if (!(sphere_radius >= 0.0) || !isfinite(sphere_radius)) { SPH_ERR("sphere radius must be finite and >= 0"); return 0; }
    50          
    0          
850 86 100         if (sphere_radius > 0.0 && world && (world[0] > 0.0 || world[1] > 0.0 || world[2] > 0.0)) { SPH_ERR("sphere and wrap are mutually exclusive"); return 0; }
    100          
    50          
    0          
    0          
    50          
851 85 100         if (max_entries > SPH_MAX_CAPACITY) { SPH_ERR("max_entries too large (max %u)", SPH_MAX_CAPACITY); return 0; }
    50          
852 84 100         if (*num_buckets > SPH_MAX_CAPACITY) { SPH_ERR("num_buckets too large (max %u)", SPH_MAX_CAPACITY); return 0; }
    50          
853 83 100         *num_buckets = sph_next_pow2(*num_buckets == 0 ? max_entries : *num_buckets);
854 83           return 1;
855             }
856              
857 87           static SpatialHandle *sph_create(const char *path, uint32_t max_entries,
858             uint32_t num_buckets, double cell_size,
859             const double *world, double sphere_radius, char *errbuf) {
860 87 100         if (!sph_validate_create_args(max_entries, &num_buckets, cell_size, world, sphere_radius, errbuf)) return NULL;
861              
862 81           uint64_t total = sph_total_size(max_entries, num_buckets);
863 81           int anonymous = (path == NULL);
864 81           int fd = -1;
865             size_t map_size;
866             void *base;
867              
868 81 100         if (anonymous) {
869 57           map_size = (size_t)total;
870 57           base = mmap(NULL, map_size, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
871 57 50         if (base == MAP_FAILED) { SPH_ERR("mmap: %s", strerror(errno)); return NULL; }
    0          
872             } else {
873 24           fd = open(path, O_RDWR|O_CREAT, 0666);
874 37 50         if (fd < 0) { SPH_ERR("open: %s", strerror(errno)); return NULL; }
    0          
875 24 50         if (flock(fd, LOCK_EX) < 0) { SPH_ERR("flock: %s", strerror(errno)); close(fd); return NULL; }
    0          
876             struct stat st;
877 24 50         if (fstat(fd, &st) < 0) { SPH_ERR("fstat: %s", strerror(errno)); flock(fd, LOCK_UN); close(fd); return NULL; }
    0          
878 24           int is_new = (st.st_size == 0);
879 24 100         if (!is_new && (uint64_t)st.st_size < sizeof(SphHeader)) {
    100          
880 1 50         SPH_ERR("%s: file too small (%lld)", path, (long long)st.st_size);
881 1           flock(fd, LOCK_UN); close(fd); return NULL;
882             }
883 23 100         if (is_new && ftruncate(fd, (off_t)total) < 0) {
    50          
884 0 0         SPH_ERR("ftruncate: %s", strerror(errno)); flock(fd, LOCK_UN); close(fd); return NULL;
885             }
886 23 100         map_size = is_new ? (size_t)total : (size_t)st.st_size;
887 23           base = mmap(NULL, map_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
888 23 50         if (base == MAP_FAILED) { SPH_ERR("mmap: %s", strerror(errno)); flock(fd, LOCK_UN); close(fd); return NULL; }
    0          
889 23 100         if (!is_new) {
890 12 100         if (!sph_validate_header((SphHeader *)base, (uint64_t)st.st_size)) {
891 5 50         SPH_ERR("invalid spatial hash file"); munmap(base, map_size); flock(fd, LOCK_UN); close(fd); return NULL;
892             }
893 7           flock(fd, LOCK_UN); close(fd);
894 7           return sph_setup(base, map_size, path, -1);
895             }
896             }
897 68           sph_init_header(base, max_entries, num_buckets, cell_size, world, sphere_radius, total);
898 68 100         if (fd >= 0) { flock(fd, LOCK_UN); close(fd); }
899 68           return sph_setup(base, map_size, path, -1);
900             }
901              
902 2           static SpatialHandle *sph_create_memfd(const char *name, uint32_t max_entries,
903             uint32_t num_buckets, double cell_size,
904             const double *world, double sphere_radius, char *errbuf) {
905 2 50         if (!sph_validate_create_args(max_entries, &num_buckets, cell_size, world, sphere_radius, errbuf)) return NULL;
906              
907 2           uint64_t total = sph_total_size(max_entries, num_buckets);
908 2 50         int fd = memfd_create(name ? name : "sphash", MFD_CLOEXEC | MFD_ALLOW_SEALING);
909 2 50         if (fd < 0) { SPH_ERR("memfd_create: %s", strerror(errno)); return NULL; }
    0          
910 2 50         if (ftruncate(fd, (off_t)total) < 0) {
911 0 0         SPH_ERR("ftruncate: %s", strerror(errno)); close(fd); return NULL;
912             }
913 2           (void)fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW);
914 2           void *base = mmap(NULL, (size_t)total, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
915 2 50         if (base == MAP_FAILED) { SPH_ERR("mmap: %s", strerror(errno)); close(fd); return NULL; }
    0          
916 2           sph_init_header(base, max_entries, num_buckets, cell_size, world, sphere_radius, total);
917 2           return sph_setup(base, (size_t)total, NULL, fd);
918             }
919              
920 2           static SpatialHandle *sph_open_fd(int fd, char *errbuf) {
921 2 50         if (errbuf) errbuf[0] = '\0';
922             struct stat st;
923 2 50         if (fstat(fd, &st) < 0) { SPH_ERR("fstat: %s", strerror(errno)); return NULL; }
    0          
924 2 50         if ((uint64_t)st.st_size < sizeof(SphHeader)) { SPH_ERR("too small"); return NULL; }
    0          
925 2           size_t ms = (size_t)st.st_size;
926 2           void *base = mmap(NULL, ms, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
927 2 50         if (base == MAP_FAILED) { SPH_ERR("mmap: %s", strerror(errno)); return NULL; }
    0          
928 2 50         if (!sph_validate_header((SphHeader *)base, (uint64_t)st.st_size)) {
929 0 0         SPH_ERR("invalid spatial hash"); munmap(base, ms); return NULL;
930             }
931 2           int myfd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
932 2 50         if (myfd < 0) { SPH_ERR("fcntl: %s", strerror(errno)); munmap(base, ms); return NULL; }
    0          
933 2           return sph_setup(base, ms, NULL, myfd);
934             }
935              
936 79           static void sph_destroy(SpatialHandle *h) {
937 79 50         if (!h) return;
938 79 100         if (h->notify_fd >= 0) close(h->notify_fd);
939 79 100         if (h->backing_fd >= 0) close(h->backing_fd);
940 79 50         if (h->hdr) munmap(h->hdr, h->mmap_size);
941 79           free(h->path);
942 79           free(h);
943             }
944              
945 9           static inline int sph_msync(SpatialHandle *h) {
946 9 50         if (!h || !h->hdr) return 0;
    50          
947 9           return msync(h->hdr, h->mmap_size, MS_SYNC);
948             }
949              
950 2           static int sph_create_eventfd(SpatialHandle *h) {
951 2 100         if (h->notify_fd >= 0) return h->notify_fd;
952 1           int efd = eventfd(0, EFD_NONBLOCK|EFD_CLOEXEC);
953 1 50         if (efd < 0) return -1;
954 1           h->notify_fd = efd;
955 1           return efd;
956             }
957              
958 2           static int sph_notify(SpatialHandle *h) {
959 2 50         if (h->notify_fd < 0) return 0;
960 2           uint64_t v = 1;
961 2           return write(h->notify_fd, &v, sizeof(v)) == sizeof(v);
962             }
963              
964 3           static int64_t sph_eventfd_consume(SpatialHandle *h) {
965 3 100         if (h->notify_fd < 0) return -1;
966 2           uint64_t v = 0;
967 2 100         if (read(h->notify_fd, &v, sizeof(v)) != sizeof(v)) return -1;
968 1           return (int64_t)v;
969             }
970              
971             /* ================================================================
972             * Slot pool: alloc / free / liveness
973             * ================================================================ */
974              
975 34439           static inline int sph_is_live(const SpatialHandle *h, uint32_t idx) {
976 34439 100         if (idx >= h->hdr->max_entries) return 0;
977 34435           uint64_t w = h->bitmap[idx / 64];
978 34435           return (w >> (idx % 64)) & 1;
979             }
980              
981 9906           static inline uint32_t sph_alloc_slot(SpatialHandle *h) {
982 9906           uint32_t idx = h->hdr->free_head;
983 9906 100         if (idx == SPH_NONE) return SPH_NONE;
984 9902           h->hdr->free_head = h->entries[idx].next; /* pop free-list */
985 9902           h->bitmap[idx / 64] |= (uint64_t)1 << (idx % 64);
986 9902           h->entries[idx].next = SPH_NONE;
987 9902           h->entries[idx].prev = SPH_NONE;
988 9902           return idx;
989             }
990              
991 462           static inline void sph_free_slot(SpatialHandle *h, uint32_t idx) {
992 462           h->bitmap[idx / 64] &= ~((uint64_t)1 << (idx % 64));
993 462           h->entries[idx].next = h->hdr->free_head; /* push free-list */
994 462           h->hdr->free_head = idx;
995 462           }
996              
997             /* ================================================================
998             * Bucket chain: link / unlink (callers hold write lock)
999             * ================================================================ */
1000              
1001 10565           static inline void sph_bucket_link(SpatialHandle *h, uint32_t idx) {
1002 10565           int64_t cell[3]; sph_cell_of(h, h->entries[idx].pos, cell);
1003 10565           uint32_t b = sph_bucket_of_cell(h, cell);
1004 10565           uint32_t head = h->buckets[b];
1005 10565           h->entries[idx].prev = SPH_NONE;
1006 10565           h->entries[idx].next = head;
1007 10565 100         if (head != SPH_NONE) h->entries[head].prev = idx;
1008 10565           h->buckets[b] = idx;
1009 10565           }
1010              
1011 1125           static inline void sph_bucket_unlink(SpatialHandle *h, uint32_t idx) {
1012 1125           int64_t cell[3]; sph_cell_of(h, h->entries[idx].pos, cell);
1013 1125           uint32_t b = sph_bucket_of_cell(h, cell);
1014 1125           uint32_t p = h->entries[idx].prev, n = h->entries[idx].next;
1015 1125 100         if (p != SPH_NONE) h->entries[p].next = n; else h->buckets[b] = n;
1016 1125 100         if (n != SPH_NONE) h->entries[n].prev = p;
1017 1125           }
1018              
1019             /* ================================================================
1020             * Mutation ops (callers hold write lock)
1021             * ================================================================ */
1022              
1023 9906           static inline uint32_t sph_insert_locked(SpatialHandle *h, double x, double y, double z,
1024             int64_t value, double radius) {
1025 9906           uint32_t idx = sph_alloc_slot(h);
1026 9906 100         if (idx == SPH_NONE) return SPH_NONE;
1027 9902           h->entries[idx].pos[0] = x; h->entries[idx].pos[1] = y; h->entries[idx].pos[2] = z;
1028 9902           h->entries[idx].value = value;
1029 9902           h->entries[idx].radius = radius;
1030 9902           sph_bucket_link(h, idx);
1031 9902           h->hdr->count++;
1032 9902           return idx;
1033             }
1034              
1035 463           static inline int sph_remove_locked(SpatialHandle *h, uint32_t idx) {
1036 463 100         if (!sph_is_live(h, idx)) return 0;
1037 462           sph_bucket_unlink(h, idx);
1038 462           sph_free_slot(h, idx);
1039 462           h->hdr->count--;
1040 462           return 1;
1041             }
1042              
1043 667           static inline int sph_move_locked(SpatialHandle *h, uint32_t idx, double x, double y, double z) {
1044 667 100         if (!sph_is_live(h, idx)) return 0;
1045             int64_t oc[3], nc[3];
1046 664           sph_cell_of(h, h->entries[idx].pos, oc);
1047 664           double np[3] = { x, y, z }; sph_cell_of(h, np, nc);
1048 664 100         if (sph_cell_eq(oc, nc)) { /* same cell: just rewrite pos */
1049 1           h->entries[idx].pos[0] = x; h->entries[idx].pos[1] = y; h->entries[idx].pos[2] = z;
1050 1           return 1;
1051             }
1052 663           sph_bucket_unlink(h, idx); /* unlink uses OLD pos */
1053 663           h->entries[idx].pos[0] = x; h->entries[idx].pos[1] = y; h->entries[idx].pos[2] = z;
1054 663           sph_bucket_link(h, idx); /* link uses NEW pos */
1055 663           return 1;
1056             }
1057              
1058             /* ================================================================
1059             * Region query: collector + walk helpers
1060             * ================================================================ */
1061              
1062             typedef struct { int64_t *vals; size_t n, cap; } sph_collect_t;
1063              
1064 13780           static int sph_collect_push(sph_collect_t *c, int64_t v) {
1065 13780 100         if (c->n == c->cap) {
1066 341 100         size_t nc = c->cap ? c->cap * 2 : 64;
1067 341           int64_t *nv = (int64_t *)realloc(c->vals, nc * sizeof(int64_t));
1068 341 50         if (!nv) return 0;
1069 341           c->vals = nv; c->cap = nc;
1070             }
1071 13780           c->vals[c->n++] = v;
1072 13780           return 1;
1073             }
1074              
1075             /* squared distance from a to b over `dims` axes; minimum-image when toroidal */
1076 24546           static inline double sph_dist2_w(const double world[3], const double a[3], const double b[3], int dims) {
1077 24546           double dx = sph_axis_delta(a[0], b[0], world[0]);
1078 24546           double dy = sph_axis_delta(a[1], b[1], world[1]);
1079 24546           double d = dx*dx + dy*dy;
1080 24546 100         if (dims == 3) { double dz = sph_axis_delta(a[2], b[2], world[2]); d += dz*dz; }
1081 24546           return d;
1082             }
1083 18789           static inline double sph_dist2(const SpatialHandle *h, const double a[3], const double b[3], int dims) {
1084 18789           return sph_dist2_w(h->world, a, b, dims);
1085             }
1086              
1087             /* Walk one cell C's bucket chain; for each entry whose recomputed cell == C
1088             and which passes `accept`, push its value. Returns 1 ok, 0 OOM. */
1089 587347           static int sph_walk_cell(SpatialHandle *h, const int64_t C[3], sph_collect_t *out,
1090             int (*accept)(const double pos[3], void *ctx), void *ctx) {
1091 587347           uint32_t b = sph_bucket_of_cell(h, C);
1092 775932 100         for (uint32_t idx = h->buckets[b]; idx != SPH_NONE; idx = h->entries[idx].next) {
1093 188585           int64_t ec[3]; sph_cell_of(h, h->entries[idx].pos, ec);
1094 190376 100         if (!sph_cell_eq(ec, C)) continue; /* collision / dedup guard */
1095 5982 100         if (accept && !accept(h->entries[idx].pos, ctx)) continue;
    100          
1096 4191 50         if (!sph_collect_push(out, h->entries[idx].value)) return 0;
1097             }
1098 587347           return 1;
1099             }
1100              
1101 17           static int sph_query_cell(SpatialHandle *h, const double p[3], int dims, sph_collect_t *out) {
1102 17 100         double pp[3] = { p[0], p[1], dims == 3 ? p[2] : 0.0 };
1103 17           int64_t C[3]; sph_cell_of(h, pp, C);
1104 17           return sph_walk_cell(h, C, out, NULL, NULL);
1105             }
1106              
1107             struct sph_box { double lo[3], hi[3]; int dims; };
1108             struct sph_disk { double c[3]; double r2; int dims; const double *world; };
1109              
1110 201           static int sph_accept_box(const double pos[3], void *ctx) {
1111 201           struct sph_box *q = (struct sph_box *)ctx;
1112 201 50         if (pos[0] < q->lo[0] || pos[0] > q->hi[0]) return 0;
    100          
1113 172 50         if (pos[1] < q->lo[1] || pos[1] > q->hi[1]) return 0;
    100          
1114 149 100         if (q->dims == 3 && (pos[2] < q->lo[2] || pos[2] > q->hi[2])) return 0;
    50          
    100          
1115 133           return 1;
1116             }
1117 5757           static int sph_accept_disk(const double pos[3], void *ctx) {
1118 5757           struct sph_disk *q = (struct sph_disk *)ctx;
1119 5757           return sph_dist2_w(q->world, pos, q->c, q->dims) <= q->r2;
1120             }
1121              
1122             /* enumerate the inclusive cell box [lo..hi] (per axis), z fixed to its single
1123             cell when dims==2, walking each cell with `accept`. */
1124 335           static int sph_enum_cellbox(SpatialHandle *h, const double lo[3], const double hi[3],
1125             int dims, sph_collect_t *out,
1126             int (*accept)(const double[3], void *), void *ctx) {
1127             int64_t cl[3], ch[3];
1128 335           sph_cell_raw(h, lo, cl); sph_cell_raw(h, hi, ch); /* raw corners for span math */
1129 335           int64_t cnt[3] = { 1, 1, 1 };
1130 335           uint64_t cells = 1;
1131 1144 100         for (int i = 0; i < dims; i++) {
1132 812 50         if (ch[i] < cl[i]) return SPH_Q_OK; /* empty box */
1133 812           uint64_t span = (uint64_t)ch[i] - (uint64_t)cl[i] + 1; /* exact, since ch>=cl */
1134 812 100         if (h->wrap_cells[i] > 0 && span > (uint64_t)h->wrap_cells[i])
    50          
1135 0           span = (uint64_t)h->wrap_cells[i]; /* whole ring, each cell once */
1136 812 100         if (span >= (uint64_t)SPH_MAX_QUERY_CELLS) return SPH_Q_TOOBIG;
1137 809 50         if (cells > (uint64_t)SPH_MAX_QUERY_CELLS / span) return SPH_Q_TOOBIG;
1138 809           cells *= span; cnt[i] = (int64_t)span;
1139             }
1140             int64_t C[3];
1141 6585 100         for (int64_t i0 = 0; i0 < cnt[0]; i0++) {
1142 6253 100         C[0] = h->wrap ? sph_wrap_cell(cl[0] + i0, h->wrap_cells[0]) : cl[0] + i0;
1143 342630 100         for (int64_t i1 = 0; i1 < cnt[1]; i1++) {
1144 336377 100         C[1] = h->wrap ? sph_wrap_cell(cl[1] + i1, h->wrap_cells[1]) : cl[1] + i1;
1145 336377 100         if (dims == 3) {
1146 288653 100         for (int64_t i2 = 0; i2 < cnt[2]; i2++) {
1147 269803 100         C[2] = h->wrap ? sph_wrap_cell(cl[2] + i2, h->wrap_cells[2]) : cl[2] + i2;
1148 269803 50         if (!sph_walk_cell(h, C, out, accept, ctx)) return SPH_Q_OOM;
1149             }
1150             } else {
1151 317527           C[2] = 0;
1152 317527 50         if (!sph_walk_cell(h, C, out, accept, ctx)) return SPH_Q_OOM;
1153             }
1154             }
1155             }
1156 332           return SPH_Q_OK;
1157             }
1158              
1159 12           static int sph_query_aabb(SpatialHandle *h, const double lo[3], const double hi[3],
1160             int dims, sph_collect_t *out) {
1161 12 100         struct sph_box q = { { lo[0], lo[1], dims==3?lo[2]:0 }, { hi[0], hi[1], dims==3?hi[2]:0 }, dims };
    100          
1162 12           return sph_enum_cellbox(h, q.lo, q.hi, dims, out, sph_accept_box, &q);
1163             }
1164              
1165 323           static int sph_query_radius(SpatialHandle *h, const double c[3], double r, int dims, sph_collect_t *out) {
1166 323 100         struct sph_disk q = { { c[0], c[1], dims==3?c[2]:0 }, r*r, dims, h->world };
1167 323 100         double lo[3] = { c[0]-r, c[1]-r, (dims==3?c[2]-r:0) };
1168 323 100         double hi[3] = { c[0]+r, c[1]+r, (dims==3?c[2]+r:0) };
1169 323           return sph_enum_cellbox(h, lo, hi, dims, out, sph_accept_disk, &q);
1170             }
1171              
1172             /* ================================================================
1173             * k-nearest-neighbour query
1174             * ================================================================ */
1175              
1176             /* Candidate kept during the search: squared distance + value. */
1177             typedef struct { double d2; int64_t val; } sph_cand_t;
1178              
1179             /* Bounded keep-k: maintain a max-heap of size k keyed by d2 so the worst is
1180             at [0]. Insert only if better than current worst once full. */
1181 2371           static void sph_heap_offer(sph_cand_t *heap, uint32_t *n, uint32_t k, double d2, int64_t val) {
1182 2371 100         if (*n < k) {
1183 293           uint32_t i = (*n)++; /* sift up */
1184 293           heap[i].d2 = d2; heap[i].val = val;
1185 527 100         while (i) { uint32_t p = (i-1)/2; if (heap[p].d2 >= heap[i].d2) break;
    100          
1186 234           sph_cand_t t = heap[p]; heap[p] = heap[i]; heap[i] = t; i = p; }
1187 2078 50         } else if (k && d2 < heap[0].d2) {
    100          
1188 177           heap[0].d2 = d2; heap[0].val = val; /* replace worst, sift down */
1189 177           uint32_t i = 0;
1190 581           for (;;) { uint32_t l=2*i+1, r=2*i+2, m=i;
1191 379 100         if (lheap[m].d2) m=l;
    100          
1192 379 100         if (rheap[m].d2) m=r;
    100          
1193 379 100         if (m==i) break; sph_cand_t t=heap[m]; heap[m]=heap[i]; heap[i]=t; i=m; }
1194             }
1195 2371           }
1196 439           static int sph_cmp_d2(const void *a, const void *b) {
1197 439           double x = ((const sph_cand_t*)a)->d2, y = ((const sph_cand_t*)b)->d2;
1198 439 100         return (x < y) ? -1 : (x > y) ? 1 : 0;
1199             }
1200             /* Walk a single cell, offering live entries (cell-guarded) to the heap.
1201             Returns the number of live, cell-matched entries examined in this cell, so
1202             the caller can terminate once every live entry has been seen (not by a cell
1203             count, which is unbounded since coords are continuous). */
1204 1086357           static uint32_t sph_knn_walk(SpatialHandle *h, const int64_t C[3], const double c[3],
1205             int dims, sph_cand_t *heap, uint32_t *n, uint32_t k) {
1206 1086357           uint32_t b = sph_bucket_of_cell(h, C);
1207 1086357           uint32_t walked = 0;
1208 1271465 100         for (uint32_t idx = h->buckets[b]; idx != SPH_NONE; idx = h->entries[idx].next) {
1209 185108           int64_t ec[3]; sph_cell_of(h, h->entries[idx].pos, ec);
1210 185108 100         if (!sph_cell_eq(ec, C)) continue;
1211 871           walked++;
1212 871           sph_heap_offer(heap, n, k, sph_dist2(h, h->entries[idx].pos, c, dims), h->entries[idx].value);
1213             }
1214 1086357           return walked;
1215             }
1216 56           static int sph_query_knn(SpatialHandle *h, const double c[3], uint32_t k, int dims, sph_collect_t *out) {
1217 56 100         double cc[3] = { c[0], c[1], dims==3 ? c[2] : 0 };
1218 56           uint32_t total = h->hdr->count;
1219 56 100         if (k > total) k = total; /* can never return more than exist */
1220 56 100         if (k == 0) return 1; /* empty hash (or k clamped to 0): empty result */
1221 55           sph_cand_t *heap = (sph_cand_t *)malloc((size_t)k * sizeof(sph_cand_t));
1222 55 50         if (!heap) return 0;
1223 55           uint32_t n = 0; /* candidates currently in the heap */
1224              
1225 55 100         if (h->wrap) {
1226             /* Toroidal: expanding shells would re-visit wrapped cells and double-offer
1227             the same entry to the heap. The world is bounded, so scan every live
1228             entry once with the min-image distance -- O(max_entries), fine for a torus. */
1229 4           uint32_t me = h->hdr->max_entries;
1230 15004 100         for (uint32_t idx = 0; idx < me; idx++)
1231 15000 100         if (sph_is_live(h, idx))
1232 1500           sph_heap_offer(heap, &n, k, sph_dist2(h, h->entries[idx].pos, cc, dims),
1233 1500           h->entries[idx].value);
1234             } else {
1235 51           int64_t center[3]; sph_cell_of(h, cc, center);
1236 51           double cs = h->hdr->cell_size;
1237             /* Termination is by how many live entries we have EXAMINED, not by a cell
1238             count: cell distance is unbounded (continuous coords), so a far point can
1239             sit arbitrarily many shells out. Snapshot the population; once seen==total
1240             every live entry has been visited and nothing remains to find. */
1241 51           uint32_t seen = 0; /* live, cell-matched entries examined */
1242 51           uint64_t cells_visited = 0;
1243             /* g < INT32_MAX is only a corruption guard against a bogus count; the real
1244             terminators below (have-k bound, or seen>=total) end the loop normally. */
1245 522 50         for (int64_t g = 0; g < INT32_MAX; g++) {
1246             /* Enumerate the Chebyshev shell at distance g -- its SURFACE only, not
1247             the full (2g+1)^d box, so per-shell work is O(g) in 2D / O(g^2) in 3D.
1248             cells_visited counts every cell walked, so the cap bounds real work. */
1249             #define SPH_KNN_PROCESS(CX, CY, CZ) do { \
1250             if (++cells_visited > (uint64_t)SPH_MAX_QUERY_CELLS) { \
1251             free(heap); return SPH_Q_TOOBIG; \
1252             } \
1253             int64_t C_[3] = { (CX), (CY), (CZ) }; \
1254             seen += sph_knn_walk(h, C_, cc, dims, heap, &n, k); \
1255             } while (0)
1256 522           int64_t cx = center[0], cy = center[1], cz = center[2];
1257 522 100         if (g == 0) {
1258 51 50         SPH_KNN_PROCESS(cx, cy, cz);
1259 471 100         } else if (dims == 2) {
1260 3146 100         for (int64_t dx = -g; dx <= g; dx++) { /* top + bottom rows */
1261 3024 50         SPH_KNN_PROCESS(cx + dx, cy - g, cz);
1262 3024 50         SPH_KNN_PROCESS(cx + dx, cy + g, cz);
1263             }
1264 2902 100         for (int64_t dy = -g + 1; dy <= g - 1; dy++) { /* left + right cols */
1265 2780 50         SPH_KNN_PROCESS(cx - g, cy + dy, cz);
1266 2780 50         SPH_KNN_PROCESS(cx + g, cy + dy, cz);
1267             }
1268             } else {
1269 1047 100         for (int64_t dz = -g; dz <= g; dz += 2 * g) { /* two z caps (full faces) */
1270 14796 100         for (int64_t dx = -g; dx <= g; dx++)
1271 399596 100         for (int64_t dy = -g; dy <= g; dy++)
1272 385498 50         SPH_KNN_PROCESS(cx + dx, cy + dy, cz + dz);
1273             }
1274 6700 100         for (int64_t dz = -g + 1; dz <= g - 1; dz++) { /* middle layers: xy perimeter */
1275 185002 100         for (int64_t dx = -g; dx <= g; dx++) {
1276 178651 50         SPH_KNN_PROCESS(cx + dx, cy - g, cz + dz);
1277 178651 50         SPH_KNN_PROCESS(cx + dx, cy + g, cz + dz);
1278             }
1279 172300 100         for (int64_t dy = -g + 1; dy <= g - 1; dy++) {
1280 165949 50         SPH_KNN_PROCESS(cx - g, cy + dy, cz + dz);
1281 165949 50         SPH_KNN_PROCESS(cx + g, cy + dy, cz + dz);
1282             }
1283             }
1284             }
1285             #undef SPH_KNN_PROCESS
1286             /* stop once full and the next shell cannot contain anything closer:
1287             min distance from the query point to shell g+1 is g*cell_size. */
1288 522 100         if (n >= k) { double bound = (double)g * cs; if (bound*bound >= heap[0].d2) break; }
    100          
1289             /* stop once every live entry has been examined: nothing left to find. */
1290 473 100         if (seen >= total) break;
1291             }
1292             } /* end !wrap */
1293 55           qsort(heap, n, sizeof(sph_cand_t), sph_cmp_d2); /* nearest-first */
1294 348 100         for (uint32_t i = 0; i < n; i++)
1295 293 50         if (!sph_collect_push(out, heap[i].val)) { free(heap); return 0; }
1296 55           free(heap);
1297 55           return 1;
1298             }
1299              
1300             /* ================================================================
1301             * Collision-pair emitters (callers hold the read lock)
1302             * ================================================================ */
1303              
1304             /* emit returns 1 to continue, 0 to abort (collector OOM). */
1305             typedef int (*sph_pair_cb)(int64_t va, int64_t vb, void *ctx);
1306              
1307 4648           static int sph_pair_to_collect(int64_t va, int64_t vb, void *ctx) {
1308 4648           sph_collect_t *c = (sph_collect_t *)ctx;
1309 4648 50         return sph_collect_push(c, va) && sph_collect_push(c, vb);
    50          
1310             }
1311              
1312             /* Emit every unordered pair once. fixed_r >= 0: pairs with min-image centre
1313             distance < fixed_r. fixed_r < 0: collision mode -- distance < radius_a+radius_b.
1314             Enumeration/distance are 3D when any entry has a non-zero z (entries are
1315             bucketed by their full 3D cell) or the world wraps in z, else 2D.
1316             Returns SPH_Q_OK / OOM / TOOBIG. */
1317 10           static int sph_pairs(SpatialHandle *h, double fixed_r, sph_pair_cb emit, void *ctx) {
1318 10           double cs = h->hdr->cell_size;
1319 10           uint32_t me = h->hdr->max_entries;
1320 10           int collide = (fixed_r < 0.0);
1321             /* one pass: largest radius (collision reach) + whether any entry is 3D, so the
1322             enumeration covers the real z-cell range rather than only z-cell 0 */
1323 10           double maxr = 0.0; int has3d = 0;
1324 9310 100         for (uint32_t i = 0; i < me; i++) {
1325 9300 100         if (!sph_is_live(h, i)) continue;
1326 1656 100         if (collide && h->entries[i].radius > maxr) maxr = h->entries[i].radius;
    100          
1327 1656 100         if (h->entries[i].pos[2] != 0.0) has3d = 1;
1328             }
1329 10 100         if (collide) { if (maxr <= 0.0) return SPH_Q_OK; } /* no radii -> points never collide */
    100          
1330 6 100         else if (!(fixed_r > 0.0)) return SPH_Q_OK; /* non-positive radius -> nothing */
1331 8 100         int dims = (h->world[2] > 0.0 || has3d) ? 3 : 2;
    100          
1332 8208 100         for (uint32_t a = 0; a < me; a++) {
1333 8200 100         if (!sph_is_live(h, a)) continue;
1334 1404           const double *pa = h->entries[a].pos;
1335 1404           double ra = h->entries[a].radius;
1336 1404 100         double reach_d = ceil((collide ? (ra + maxr) : fixed_r) / cs);
1337             /* defensive: the API validates radii, but a corrupt stored radius could make this
1338             NaN, which would slip past the cap check below (NaN >= X is false) into the cast */
1339 1404 50         if (!(reach_d >= 0)) reach_d = 0;
1340 1404 50         if (reach_d >= (double)SPH_MAX_QUERY_CELLS) return SPH_Q_TOOBIG; /* avoid int64 overflow */
1341 1404           int64_t reach = (int64_t)reach_d;
1342 1404           int64_t ac[3]; sph_cell_raw(h, pa, ac);
1343 1404           int64_t cnt[3] = { 1, 1, 1 };
1344 1404           uint64_t cells = 1;
1345 4912 100         for (int i = 0; i < dims; i++) {
1346 3508           uint64_t span = (uint64_t)(2 * reach + 1);
1347 3508 100         if (h->wrap_cells[i] > 0 && span > (uint64_t)h->wrap_cells[i]) span = (uint64_t)h->wrap_cells[i];
    50          
1348 3508 50         if (span >= (uint64_t)SPH_MAX_QUERY_CELLS) return SPH_Q_TOOBIG;
1349 3508 50         if (cells > (uint64_t)SPH_MAX_QUERY_CELLS / span) return SPH_Q_TOOBIG;
1350 3508           cells *= span; cnt[i] = (int64_t)span;
1351             }
1352 1404           int64_t b0 = ac[0]-reach, b1 = ac[1]-reach, b2 = ac[2]-reach;
1353 11204 100         for (int64_t i0 = 0; i0 < cnt[0]; i0++) {
1354 9800 100         int64_t C0 = h->wrap ? sph_wrap_cell(b0+i0, h->wrap_cells[0]) : b0+i0;
1355 94924 100         for (int64_t i1 = 0; i1 < cnt[1]; i1++) {
1356 85124 100         int64_t C1 = h->wrap ? sph_wrap_cell(b1+i1, h->wrap_cells[1]) : b1+i1;
1357 414270 100         for (int64_t i2 = 0; i2 < cnt[2]; i2++) { /* cnt[2]==1 when dims==2 */
1358 329146 100         int64_t C2 = (dims == 3) ? (h->wrap ? sph_wrap_cell(b2+i2, h->wrap_cells[2]) : b2+i2) : 0;
    100          
1359 329146           int64_t C[3] = { C0, C1, C2 };
1360 329146           uint32_t bkt = sph_bucket_of_cell(h, C);
1361 425090 100         for (uint32_t idx = h->buckets[bkt]; idx != SPH_NONE; idx = h->entries[idx].next) {
1362 126547 100         if (idx <= a) continue; /* unordered: emit each pair once */
1363 47021           int64_t ec[3]; sph_cell_of(h, h->entries[idx].pos, ec);
1364 47021 100         if (!sph_cell_eq(ec, C)) continue; /* hash-collision guard */
1365 16418 100         double thr = collide ? (ra + h->entries[idx].radius) : fixed_r;
1366 16418 100         if (sph_dist2(h, pa, h->entries[idx].pos, dims) < thr*thr)
1367 4648 50         if (!emit(h->entries[a].value, h->entries[idx].value, ctx)) return SPH_Q_OOM;
1368             }
1369             }
1370             }
1371             }
1372             }
1373 8           return SPH_Q_OK;
1374             }
1375              
1376             /* ================================================================
1377             * Lifecycle helpers: clear, chain stats
1378             * ================================================================ */
1379              
1380 1           static void sph_clear_locked(SpatialHandle *h) {
1381 1           uint32_t me = h->hdr->max_entries, nb = h->hdr->num_buckets;
1382 1025 100         for (uint32_t b = 0; b < nb; b++) h->buckets[b] = SPH_NONE;
1383 1           memset(h->bitmap, 0, (size_t)h->bitmap_words * sizeof(uint64_t));
1384 1001 100         for (uint32_t i = 0; i < me; i++) h->entries[i].next = (i+1 < me) ? i+1 : SPH_NONE;
    100          
1385 1           h->hdr->free_head = 0; /* max_entries >= 1 (validated at create) */
1386 1           h->hdr->count = 0;
1387 1           }
1388 7           static void sph_chain_stats(SpatialHandle *h, uint32_t *occupied, uint32_t *max_chain,
1389             uint32_t *max_cell) {
1390 7           uint32_t occ = 0, mx = 0, mxc = 0, nb = h->hdr->num_buckets;
1391 1293 100         for (uint32_t b = 0; b < nb; b++) {
1392 1286           uint32_t len = 0;
1393 2908 100         for (uint32_t idx = h->buckets[b]; idx != SPH_NONE; idx = h->entries[idx].next) {
1394 1622           len++;
1395             /* per-cell occupancy: count chain entries sharing idx's cell (entries of
1396             one cell always hash to one bucket, so a cell is a subset of a chain) */
1397 1622           int64_t ci[3]; sph_cell_of(h, h->entries[idx].pos, ci);
1398 1622           uint32_t cc = 0;
1399 482074 100         for (uint32_t j = h->buckets[b]; j != SPH_NONE; j = h->entries[j].next) {
1400 480452           int64_t cj[3]; sph_cell_of(h, h->entries[j].pos, cj);
1401 480452 100         if (sph_cell_eq(ci, cj)) cc++;
1402             }
1403 1622 100         if (cc > mxc) mxc = cc;
1404             }
1405 1286 100         if (len) { occ++; if (len > mx) mx = len; }
    100          
1406             }
1407 7           *occupied = occ; *max_chain = mx; *max_cell = mxc;
1408 7           }
1409              
1410             #endif /* SPHASH_H */