File Coverage

pool.h
Criterion Covered Total %
statement 346 405 85.4
branch 144 284 50.7
condition n/a
subroutine n/a
pod n/a
total 490 689 71.1


line stmt bran cond sub pod time code
1             /*
2             * pool.h -- Fixed-size shared-memory object pool for Linux
3             *
4             * Bitmap-based slot allocation with CAS (lock-free).
5             * Futex-based blocking when pool is exhausted.
6             * PID-based stale slot recovery.
7             *
8             * Variants:
9             * Raw — opaque byte slots of arbitrary elem_size
10             * I64 — int64_t slots with atomic CAS/add
11             * F64 — double slots
12             * I32 — int32_t slots with atomic CAS/add
13             * Str — fixed-length string slots (4-byte length prefix + data)
14             */
15              
16             #ifndef POOL_H
17             #define POOL_H
18              
19             #include
20             #include
21             #include
22             #include
23             #include
24             #include
25             #include
26             #include
27             #include
28             #include
29             #include
30             #include
31             #include
32             #include
33             #include
34             #include
35              
36             /* ================================================================
37             * Constants
38             * ================================================================ */
39              
40             #define POOL_MAGIC 0x504F4C31U /* "POL1" */
41             #define POOL_VERSION 1
42             #define POOL_ERR_BUFLEN 256
43              
44             #define POOL_VAR_RAW 0
45             #define POOL_VAR_I64 1
46             #define POOL_VAR_F64 2
47             #define POOL_VAR_I32 3
48             #define POOL_VAR_STR 4
49              
50             #define POOL_ALIGN8(x) (((x) + 7) & ~(uint64_t)7)
51              
52             /* ================================================================
53             * Header (128 bytes = 2 cache lines)
54             * ================================================================ */
55              
56             typedef struct {
57             /* ---- Cache line 0 (0-63): immutable after create ---- */
58             uint32_t magic; /* 0 */
59             uint32_t version; /* 4 */
60             uint32_t elem_size; /* 8 */
61             uint32_t variant_id; /* 12 */
62             uint64_t capacity; /* 16: number of slots */
63             uint64_t total_size; /* 24: total mmap size */
64             uint64_t data_off; /* 32: offset to slot data */
65             uint64_t bitmap_off; /* 40: offset to allocation bitmap */
66             uint64_t owners_off; /* 48: offset to per-slot owner PIDs */
67             uint8_t _pad0[8]; /* 56-63 */
68              
69             /* ---- Cache line 1 (64-127): mutable state ---- */
70             uint32_t used; /* 64: allocated count (futex word) */
71             uint32_t waiters; /* 68: blocked on alloc */
72             uint8_t _pad1[8]; /* 72-79 */
73             uint64_t stat_allocs; /* 80 */
74             uint64_t stat_frees; /* 88 */
75             uint64_t stat_waits; /* 96 */
76             uint64_t stat_timeouts; /* 104 */
77             uint64_t stat_recoveries;/* 112 */
78             uint8_t _pad2[8]; /* 120-127 */
79             } PoolHeader;
80              
81             #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
82             _Static_assert(sizeof(PoolHeader) == 128, "PoolHeader must be 128 bytes");
83             #endif
84              
85             /* ================================================================
86             * Process-local handle
87             * ================================================================ */
88              
89             typedef struct {
90             PoolHeader *hdr;
91             uint64_t *bitmap;
92             uint32_t *owners;
93             uint8_t *data;
94             size_t mmap_size;
95             uint32_t bitmap_words;
96             char *path;
97             int notify_fd;
98             int backing_fd;
99             uint32_t scan_hint;
100             } PoolHandle;
101              
102             /* ================================================================
103             * Utility
104             * ================================================================ */
105              
106 22           static inline int pool_pid_alive(uint32_t pid) {
107 22 50         if (pid == 0) return 1;
108 22 100         return !(kill((pid_t)pid, 0) == -1 && errno == ESRCH);
    50          
109             }
110              
111 2           static inline void pool_make_deadline(double timeout, struct timespec *deadline) {
112 2           clock_gettime(CLOCK_MONOTONIC, deadline);
113 2           deadline->tv_sec += (time_t)timeout;
114 2           deadline->tv_nsec += (long)((timeout - (double)(time_t)timeout) * 1e9);
115 2 50         if (deadline->tv_nsec >= 1000000000L) {
116 0           deadline->tv_sec++;
117 0           deadline->tv_nsec -= 1000000000L;
118             }
119 2           }
120              
121 3           static inline int pool_remaining_time(const struct timespec *deadline,
122             struct timespec *remaining) {
123             struct timespec now;
124 3           clock_gettime(CLOCK_MONOTONIC, &now);
125 3           remaining->tv_sec = deadline->tv_sec - now.tv_sec;
126 3           remaining->tv_nsec = deadline->tv_nsec - now.tv_nsec;
127 3 100         if (remaining->tv_nsec < 0) {
128 2           remaining->tv_sec--;
129 2           remaining->tv_nsec += 1000000000L;
130             }
131 3           return remaining->tv_sec >= 0;
132             }
133              
134             /* ================================================================
135             * Slot access
136             * ================================================================ */
137              
138 1697           static inline uint8_t *pool_slot_ptr(PoolHandle *h, uint64_t slot) {
139 1697           return h->data + slot * h->hdr->elem_size;
140             }
141              
142 1028           static inline int pool_is_allocated(PoolHandle *h, uint64_t slot) {
143 1028           uint32_t widx = (uint32_t)(slot / 64);
144 1028           int bit = (int)(slot % 64);
145 1028           uint64_t word = __atomic_load_n(&h->bitmap[widx], __ATOMIC_RELAXED);
146 1028           return (word >> bit) & 1;
147             }
148              
149             /* ================================================================
150             * Allocation (lock-free bitmap scan + CAS)
151             * ================================================================ */
152              
153 943           static inline int64_t pool_try_alloc(PoolHandle *h) {
154 943           uint32_t nwords = h->bitmap_words;
155 943           uint64_t cap = h->hdr->capacity;
156 943           uint32_t start = h->scan_hint;
157 943           uint32_t mypid = (uint32_t)getpid();
158              
159 971 100         for (uint32_t i = 0; i < nwords; i++) {
160 955           uint32_t widx = (start + i) % nwords;
161 955           uint64_t word = __atomic_load_n(&h->bitmap[widx], __ATOMIC_RELAXED);
162              
163 955 100         while (word != ~(uint64_t)0) {
164 944           int bit = __builtin_ctzll(~word);
165 944           uint64_t slot = (uint64_t)widx * 64 + bit;
166 944 100         if (slot >= cap) break;
167              
168 927           uint64_t new_word = word | ((uint64_t)1 << bit);
169 927 50         if (__atomic_compare_exchange_n(&h->bitmap[widx], &word, new_word,
170             1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
171 927           __atomic_store_n(&h->owners[slot], mypid, __ATOMIC_RELAXED);
172 927           memset(pool_slot_ptr(h, slot), 0, h->hdr->elem_size);
173 927           __atomic_add_fetch(&h->hdr->used, 1, __ATOMIC_RELEASE);
174 927           __atomic_add_fetch(&h->hdr->stat_allocs, 1, __ATOMIC_RELAXED);
175             /* Advance hint past full word to reduce next scan */
176 11 100         h->scan_hint = (new_word == ~(uint64_t)0 && nwords > 1)
177 938 100         ? (widx + 1) % nwords : widx;
178 927           return (int64_t)slot;
179             }
180             /* CAS failed — word now holds current value, retry */
181             }
182             }
183 16           return -1;
184             }
185              
186             /* Pin the per-handle allocation scan to start at the given slot's word. Pass 0 for
187             * deterministic, sequential (low-to-high) allocation order, overriding the
188             * getpid()-derived spread start. The spread only reduces CAS contention when many
189             * processes allocate concurrently; a single allocator wants reproducible order, and
190             * the bitmap stays the source of truth either way. An out-of-range slot wraps. */
191 8           static inline void pool_scan_from(PoolHandle *h, uint64_t slot) {
192 8           h->scan_hint = (uint32_t)((slot / 64) % h->bitmap_words);
193 8           }
194              
195             /* Blocking alloc. timeout<0 = infinite, 0 = non-blocking, >0 = seconds. */
196 922           static inline int64_t pool_alloc(PoolHandle *h, double timeout) {
197 922           int64_t slot = pool_try_alloc(h);
198 922 100         if (slot >= 0) return slot;
199 3 100         if (timeout == 0) return -1;
200              
201 2           PoolHeader *hdr = h->hdr;
202             struct timespec deadline, remaining;
203 2           int has_deadline = (timeout > 0);
204 2 50         if (has_deadline) pool_make_deadline(timeout, &deadline);
205              
206 2           __atomic_add_fetch(&hdr->stat_waits, 1, __ATOMIC_RELAXED);
207              
208 0           for (;;) {
209 2           uint32_t cur_used = __atomic_load_n(&hdr->used, __ATOMIC_RELAXED);
210 2 50         if ((uint64_t)cur_used < hdr->capacity) {
211 0           slot = pool_try_alloc(h);
212 0 0         if (slot >= 0) return slot;
213             }
214              
215 2           __atomic_add_fetch(&hdr->waiters, 1, __ATOMIC_RELEASE);
216              
217 2           cur_used = __atomic_load_n(&hdr->used, __ATOMIC_ACQUIRE);
218 2 50         if ((uint64_t)cur_used >= hdr->capacity) {
219 2           struct timespec *pts = NULL;
220 2 50         if (has_deadline) {
221 2 50         if (!pool_remaining_time(&deadline, &remaining)) {
222 0           __atomic_sub_fetch(&hdr->waiters, 1, __ATOMIC_RELAXED);
223 0           __atomic_add_fetch(&hdr->stat_timeouts, 1, __ATOMIC_RELAXED);
224 0           return -1;
225             }
226 2           pts = &remaining;
227             }
228 2           syscall(SYS_futex, &hdr->used, FUTEX_WAIT, cur_used, pts, NULL, 0);
229             }
230              
231 2           __atomic_sub_fetch(&hdr->waiters, 1, __ATOMIC_RELAXED);
232              
233 2           slot = pool_try_alloc(h);
234 2 100         if (slot >= 0) return slot;
235              
236 1 50         if (has_deadline) {
237 1 50         if (!pool_remaining_time(&deadline, &remaining)) {
238 1           __atomic_add_fetch(&hdr->stat_timeouts, 1, __ATOMIC_RELAXED);
239 1           return -1;
240             }
241             }
242             }
243             }
244              
245             /* ================================================================
246             * Free
247             * ================================================================ */
248              
249 691           static inline int pool_free_slot(PoolHandle *h, uint64_t slot) {
250 691           PoolHeader *hdr = h->hdr;
251 691 50         if (slot >= hdr->capacity) return 0;
252              
253 691           uint32_t widx = (uint32_t)(slot / 64);
254 691           int bit = (int)(slot % 64);
255 691           uint64_t mask = (uint64_t)1 << bit;
256              
257 0           for (;;) {
258 691           uint64_t word = __atomic_load_n(&h->bitmap[widx], __ATOMIC_RELAXED);
259 1380 100         if (!(word & mask)) return 0;
260              
261 689           uint64_t new_word = word & ~mask;
262 689 50         if (__atomic_compare_exchange_n(&h->bitmap[widx], &word, new_word,
263             1, __ATOMIC_RELEASE, __ATOMIC_RELAXED)) {
264 689           __atomic_store_n(&h->owners[slot], 0, __ATOMIC_RELAXED);
265 689           __atomic_sub_fetch(&hdr->used, 1, __ATOMIC_RELEASE);
266 689           __atomic_add_fetch(&hdr->stat_frees, 1, __ATOMIC_RELAXED);
267 689 50         if (__atomic_load_n(&hdr->waiters, __ATOMIC_RELAXED) > 0)
268 0           syscall(SYS_futex, &hdr->used, FUTEX_WAKE, 1, NULL, NULL, 0);
269 689           return 1;
270             }
271             }
272             }
273              
274             /* ================================================================
275             * Batch free — single used decrement + single futex wake
276             * ================================================================ */
277              
278 3           static inline uint32_t pool_free_n(PoolHandle *h, uint64_t *slots, uint32_t count) {
279 3           uint32_t freed = 0;
280 3           PoolHeader *hdr = h->hdr;
281              
282 10 100         for (uint32_t i = 0; i < count; i++) {
283 7           uint64_t slot = slots[i];
284 7 50         if (slot >= hdr->capacity) continue;
285              
286 7           uint32_t widx = (uint32_t)(slot / 64);
287 7           int bit = (int)(slot % 64);
288 7           uint64_t mask = (uint64_t)1 << bit;
289              
290 0           for (;;) {
291 7           uint64_t word = __atomic_load_n(&h->bitmap[widx], __ATOMIC_RELAXED);
292 14 50         if (!(word & mask)) break;
293 7           uint64_t new_word = word & ~mask;
294 7 50         if (__atomic_compare_exchange_n(&h->bitmap[widx], &word, new_word,
295             1, __ATOMIC_RELEASE, __ATOMIC_RELAXED)) {
296 7           __atomic_store_n(&h->owners[slot], 0, __ATOMIC_RELAXED);
297 7           freed++;
298 7           break;
299             }
300             }
301             }
302              
303 3 50         if (freed > 0) {
304 3           __atomic_sub_fetch(&hdr->used, freed, __ATOMIC_RELEASE);
305 3           __atomic_add_fetch(&hdr->stat_frees, freed, __ATOMIC_RELAXED);
306 3 50         if (__atomic_load_n(&hdr->waiters, __ATOMIC_RELAXED) > 0) {
307 0 0         int wake = freed < (uint32_t)INT_MAX ? (int)freed : INT_MAX;
308 0           syscall(SYS_futex, &hdr->used, FUTEX_WAKE, wake, NULL, NULL, 0);
309             }
310             }
311 3           return freed;
312             }
313              
314             /* ================================================================
315             * Batch alloc — all-or-nothing, shared deadline
316             * ================================================================ */
317              
318 2           static inline int pool_alloc_n(PoolHandle *h, uint64_t *out, uint32_t count,
319             double timeout) {
320 2 50         if (count == 0) return 1;
321              
322 2 100         if (timeout == 0) {
323 2 50         for (uint32_t i = 0; i < count; i++) {
324 2           int64_t slot = pool_try_alloc(h);
325 2 100         if (slot < 0) {
326 1 50         if (i > 0) pool_free_n(h, out, i);
327 1           return 0;
328             }
329 1           out[i] = (uint64_t)slot;
330             }
331 0           return 1;
332             }
333              
334             struct timespec deadline, remaining;
335 1           int has_deadline = (timeout > 0);
336 1 50         if (has_deadline) pool_make_deadline(timeout, &deadline);
337              
338 4 100         for (uint32_t i = 0; i < count; i++) {
339 3           double t = timeout;
340 3 50         if (has_deadline) {
341 0 0         if (!pool_remaining_time(&deadline, &remaining)) {
342 0           __atomic_add_fetch(&h->hdr->stat_timeouts, 1, __ATOMIC_RELAXED);
343 0 0         if (i > 0) pool_free_n(h, out, i);
344 0           return 0;
345             }
346 0           t = (double)remaining.tv_sec + (double)remaining.tv_nsec / 1e9;
347             }
348 3           int64_t slot = pool_alloc(h, t);
349 3 50         if (slot < 0) {
350 0 0         if (i > 0) pool_free_n(h, out, i);
351 0           return 0;
352             }
353 3           out[i] = (uint64_t)slot;
354             }
355 1           return 1;
356             }
357              
358             /* ================================================================
359             * Stale recovery — CAS owner to narrow race window
360             * ================================================================ */
361              
362 3           static inline uint32_t pool_recover_stale(PoolHandle *h) {
363 3           uint32_t recovered = 0;
364 3           uint64_t cap = h->hdr->capacity;
365              
366 267 100         for (uint64_t slot = 0; slot < cap; slot++) {
367 265 100         if (!pool_is_allocated(h, slot)) continue;
368 22           uint32_t owner = __atomic_load_n(&h->owners[slot], __ATOMIC_ACQUIRE);
369 22 50         if (owner == 0 || pool_pid_alive(owner)) continue;
    100          
370              
371             /* CAS owner from dead PID to 0 — if it fails, slot was
372             * re-allocated or already recovered by another process */
373 21 50         if (!__atomic_compare_exchange_n(&h->owners[slot], &owner, 0,
374             0, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED))
375 0           continue;
376              
377             /* We now own the right to free this slot's bitmap bit.
378             *
379             * Race window: between our owner-CAS and the bitmap-CAS below,
380             * the bitmap word can transition via concurrent allocators (or,
381             * with API misuse, a free+alloc cycle on the same bit). The bit
382             * may be reset by an allocator before our CAS reaches it, or our
383             * CAS may clear a bit a fresh allocator has just claimed.
384             *
385             * Mitigation: pre-CAS owner check (narrows window) + post-CAS
386             * recovery accounting (always decrement used, since our CAS
387             * succeeded against an "expected = bit set" state — that bit is
388             * gone from popcount). If post-CAS observes an owner already
389             * stored, an allocator's CAS landed inside our window; we restore
390             * the bit so their slot stays claimed. Their own used++ pairs
391             * with our used-- to keep the counter in sync with popcount. */
392 21           uint32_t widx = (uint32_t)(slot / 64);
393 21           int bit = (int)(slot % 64);
394 21           uint64_t mask = (uint64_t)1 << bit;
395              
396 0           for (;;) {
397 21           uint64_t word = __atomic_load_n(&h->bitmap[widx], __ATOMIC_RELAXED);
398 42 50         if (!(word & mask)) break;
399              
400             /* Pre-CAS: if a new allocator already populated owner, abort */
401 21 50         if (__atomic_load_n(&h->owners[slot], __ATOMIC_ACQUIRE) != 0)
402 0           break;
403              
404 21           uint64_t new_word = word & ~mask;
405 21 50         if (__atomic_compare_exchange_n(&h->bitmap[widx], &word, new_word,
406             1, __ATOMIC_RELEASE, __ATOMIC_RELAXED)) {
407             /* Post-CAS: if owner is now non-zero, a live allocator's
408             * CAS landed inside our window. Restore the bit so their
409             * claim stays valid; their used++ already happened. */
410 21 50         if (__atomic_load_n(&h->owners[slot], __ATOMIC_ACQUIRE) != 0)
411 0           __atomic_or_fetch(&h->bitmap[widx], mask, __ATOMIC_RELEASE);
412             /* Account for the stale slot's bit we just cleared. Use a
413             * saturating decrement: if a prior allocator was killed
414             * between owner-store and used++, used may already reflect
415             * fewer "real" allocations than popcount suggests, and a
416             * plain sub would underflow on subsequent recoveries. */
417 21           uint32_t cur = __atomic_load_n(&h->hdr->used, __ATOMIC_RELAXED);
418 21 50         while (cur > 0 && !__atomic_compare_exchange_n(&h->hdr->used,
419 21 50         &cur, cur - 1, 1, __ATOMIC_RELEASE, __ATOMIC_RELAXED))
420             ; /* CAS failed: cur reloaded with current value; retry */
421 21           __atomic_add_fetch(&h->hdr->stat_frees, 1, __ATOMIC_RELAXED);
422 21 50         if (__atomic_load_n(&h->hdr->waiters, __ATOMIC_RELAXED) > 0)
423 0           syscall(SYS_futex, &h->hdr->used, FUTEX_WAKE, 1, NULL, NULL, 0);
424 21           recovered++;
425 21           break;
426             }
427             }
428             }
429              
430 3 100         if (recovered > 0)
431 2           __atomic_add_fetch(&h->hdr->stat_recoveries, recovered, __ATOMIC_RELAXED);
432              
433 3           return recovered;
434             }
435              
436             /* ================================================================
437             * Layout calculation
438             * ================================================================ */
439              
440 60           static inline void pool_calc_layout(uint64_t capacity, uint32_t elem_size,
441             uint64_t *bitmap_off, uint64_t *owners_off,
442             uint64_t *data_off, uint64_t *total_size) {
443 60           uint64_t bwords = (capacity + 63) / 64;
444 60           uint64_t bitmap_sz = bwords * 8;
445 60           uint64_t owners_sz = POOL_ALIGN8(capacity * 4);
446 60           uint64_t data_sz = (uint64_t)capacity * elem_size;
447              
448 60           *bitmap_off = sizeof(PoolHeader);
449 60           *owners_off = *bitmap_off + bitmap_sz;
450 60           *data_off = *owners_off + owners_sz;
451 60           *total_size = *data_off + data_sz;
452 60           }
453              
454             /* ================================================================
455             * Header initialization (shared by pool_create and pool_create_memfd)
456             * ================================================================ */
457              
458 51           static inline void pool_init_header(void *base, uint64_t total,
459             uint32_t elem_size, uint32_t variant_id,
460             uint64_t capacity, uint64_t bm_off,
461             uint64_t own_off, uint64_t dat_off) {
462 51           PoolHeader *hdr = (PoolHeader *)base;
463 51           memset(base, 0, (size_t)total);
464 51           hdr->magic = POOL_MAGIC;
465 51           hdr->version = POOL_VERSION;
466 51           hdr->elem_size = elem_size;
467 51           hdr->variant_id = variant_id;
468 51           hdr->capacity = capacity;
469 51           hdr->total_size = total;
470 51           hdr->data_off = dat_off;
471 51           hdr->bitmap_off = bm_off;
472 51           hdr->owners_off = own_off;
473 51           __atomic_thread_fence(__ATOMIC_SEQ_CST);
474 51           }
475              
476             /* ================================================================
477             * Create / Open / Close
478             * ================================================================ */
479              
480             #define POOL_ERR(fmt, ...) do { if (errbuf) snprintf(errbuf, POOL_ERR_BUFLEN, fmt, ##__VA_ARGS__); } while(0)
481              
482             /* Max capacity to prevent bitmap_words (uint32_t) truncation */
483             #define POOL_MAX_CAPACITY ((uint64_t)UINT32_MAX * 64)
484              
485             /* Validate header: magic, version, variant, sizes, AND layout offsets. */
486 7           static inline int pool_validate_header(const PoolHeader *hdr, uint64_t file_size,
487             uint32_t expected_variant) {
488 7 50         if (hdr->magic != POOL_MAGIC) return 0;
489 7 50         if (hdr->version != POOL_VERSION) return 0;
490 7 50         if (hdr->variant_id != expected_variant) return 0;
491 7 50         if (hdr->capacity == 0 || hdr->capacity > POOL_MAX_CAPACITY) return 0;
    50          
492 7 50         if (hdr->elem_size == 0) return 0;
493 7 50         if (hdr->capacity > (UINT64_MAX - sizeof(PoolHeader)) / hdr->elem_size) return 0;
494 7 50         if (hdr->total_size != file_size) return 0;
495              
496             uint64_t bm_off, own_off, dat_off, total;
497 7           pool_calc_layout(hdr->capacity, hdr->elem_size, &bm_off, &own_off, &dat_off, &total);
498              
499 7 50         if (hdr->bitmap_off != bm_off) return 0;
500 7 50         if (hdr->owners_off != own_off) return 0;
501 7 50         if (hdr->data_off != dat_off) return 0;
502 7 50         if (hdr->total_size != total) return 0;
503 7           return 1;
504             }
505              
506 58           static inline PoolHandle *pool_setup_handle(void *base, size_t map_size,
507             const char *path, int backing_fd) {
508 58           PoolHeader *hdr = (PoolHeader *)base;
509 58           PoolHandle *h = (PoolHandle *)calloc(1, sizeof(PoolHandle));
510 58 50         if (!h) { munmap(base, map_size); return NULL; }
511              
512 58           h->hdr = hdr;
513 58           h->bitmap = (uint64_t *)((uint8_t *)base + hdr->bitmap_off);
514 58           h->owners = (uint32_t *)((uint8_t *)base + hdr->owners_off);
515 58           h->data = (uint8_t *)base + hdr->data_off;
516 58           h->mmap_size = map_size;
517 58           h->bitmap_words = (uint32_t)((hdr->capacity + 63) / 64);
518 58 100         h->path = path ? strdup(path) : NULL;
519 58           h->notify_fd = -1;
520 58           h->backing_fd = backing_fd;
521 58           h->scan_hint = (uint32_t)getpid() % h->bitmap_words;
522              
523 58           return h;
524             }
525              
526 47           static PoolHandle *pool_create(const char *path, uint64_t capacity,
527             uint32_t elem_size, uint32_t variant_id,
528             char *errbuf) {
529 47 50         if (errbuf) errbuf[0] = '\0';
530              
531 47 50         if (capacity == 0) { POOL_ERR("capacity must be > 0"); return NULL; }
    0          
532 47 50         if (elem_size == 0) { POOL_ERR("elem_size must be > 0"); return NULL; }
    0          
533 47 50         if (capacity > POOL_MAX_CAPACITY) { POOL_ERR("capacity too large"); return NULL; }
    0          
534 47 50         if (capacity > (UINT64_MAX - sizeof(PoolHeader)) / elem_size) {
535 0 0         POOL_ERR("capacity * elem_size overflow"); return NULL;
536             }
537              
538             uint64_t bm_off, own_off, dat_off, total;
539 47           pool_calc_layout(capacity, elem_size, &bm_off, &own_off, &dat_off, &total);
540              
541 47           int fd = -1;
542 47           int anonymous = (path == NULL);
543             size_t map_size;
544             void *base;
545              
546 47 100         if (anonymous) {
547 35           map_size = (size_t)total;
548 35           base = mmap(NULL, map_size, PROT_READ | PROT_WRITE,
549             MAP_SHARED | MAP_ANONYMOUS, -1, 0);
550 35 50         if (base == MAP_FAILED) {
551 0 0         POOL_ERR("mmap(anonymous): %s", strerror(errno));
552 0           return NULL;
553             }
554             } else {
555 12           fd = open(path, O_RDWR | O_CREAT, 0666);
556 14 50         if (fd < 0) { POOL_ERR("open(%s): %s", path, strerror(errno)); return NULL; }
    0          
557              
558 12 50         if (flock(fd, LOCK_EX) < 0) {
559 0 0         POOL_ERR("flock(%s): %s", path, strerror(errno));
560 0           close(fd); return NULL;
561             }
562              
563             struct stat st;
564 12 50         if (fstat(fd, &st) < 0) {
565 0 0         POOL_ERR("fstat(%s): %s", path, strerror(errno));
566 0           flock(fd, LOCK_UN); close(fd); return NULL;
567             }
568              
569 12           int is_new = (st.st_size == 0);
570              
571 12 100         if (!is_new && (uint64_t)st.st_size < sizeof(PoolHeader)) {
    50          
572 0 0         POOL_ERR("%s: file too small (%lld)", path, (long long)st.st_size);
573 0           flock(fd, LOCK_UN); close(fd); return NULL;
574             }
575              
576 12 100         if (is_new) {
577 10 50         if (ftruncate(fd, (off_t)total) < 0) {
578 0 0         POOL_ERR("ftruncate(%s): %s", path, strerror(errno));
579 0           flock(fd, LOCK_UN); close(fd); return NULL;
580             }
581             }
582              
583 12 100         map_size = is_new ? (size_t)total : (size_t)st.st_size;
584 12           base = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
585 12 50         if (base == MAP_FAILED) {
586 0 0         POOL_ERR("mmap(%s): %s", path, strerror(errno));
587 0           flock(fd, LOCK_UN); close(fd); return NULL;
588             }
589              
590 12 100         if (!is_new) {
591 2 50         if (!pool_validate_header((PoolHeader *)base, (uint64_t)st.st_size, variant_id)) {
592 0 0         POOL_ERR("%s: invalid or incompatible pool file", path);
593 0           munmap(base, map_size); flock(fd, LOCK_UN); close(fd); return NULL;
594             }
595 2           flock(fd, LOCK_UN);
596 2           close(fd);
597 2           return pool_setup_handle(base, map_size, path, -1);
598             }
599             }
600              
601             /* Initialize header — flock still held for file-backed new files */
602 45           pool_init_header(base, total, elem_size, variant_id, capacity,
603             bm_off, own_off, dat_off);
604              
605 45 100         if (fd >= 0) {
606 10           flock(fd, LOCK_UN);
607 10           close(fd);
608             }
609              
610 45           return pool_setup_handle(base, map_size, path, -1);
611             }
612              
613 6           static PoolHandle *pool_create_memfd(const char *name, uint64_t capacity,
614             uint32_t elem_size, uint32_t variant_id,
615             char *errbuf) {
616 6 50         if (errbuf) errbuf[0] = '\0';
617              
618 6 50         if (capacity == 0) { POOL_ERR("capacity must be > 0"); return NULL; }
    0          
619 6 50         if (elem_size == 0) { POOL_ERR("elem_size must be > 0"); return NULL; }
    0          
620 6 50         if (capacity > POOL_MAX_CAPACITY) { POOL_ERR("capacity too large"); return NULL; }
    0          
621 6 50         if (capacity > (UINT64_MAX - sizeof(PoolHeader)) / elem_size) {
622 0 0         POOL_ERR("capacity * elem_size overflow"); return NULL;
623             }
624              
625             uint64_t bm_off, own_off, dat_off, total;
626 6           pool_calc_layout(capacity, elem_size, &bm_off, &own_off, &dat_off, &total);
627              
628 6 50         int fd = memfd_create(name ? name : "pool", MFD_CLOEXEC | MFD_ALLOW_SEALING);
629 6 50         if (fd < 0) { POOL_ERR("memfd_create: %s", strerror(errno)); return NULL; }
    0          
630              
631 6 50         if (ftruncate(fd, (off_t)total) < 0) {
632 0 0         POOL_ERR("ftruncate(memfd): %s", strerror(errno));
633 0           close(fd); return NULL;
634             }
635              
636             /* Seal against shrink/grow to block ftruncate-based SIGBUS attacks via
637             * SCM_RIGHTS-shared fds. Peers can still write; only size is immutable. */
638 6           (void)fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW);
639              
640 6           void *base = mmap(NULL, (size_t)total, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
641 6 50         if (base == MAP_FAILED) {
642 0 0         POOL_ERR("mmap(memfd): %s", strerror(errno));
643 0           close(fd); return NULL;
644             }
645              
646 6           pool_init_header(base, total, elem_size, variant_id, capacity,
647             bm_off, own_off, dat_off);
648              
649 6           return pool_setup_handle(base, (size_t)total, NULL, fd);
650             }
651              
652 5           static PoolHandle *pool_open_fd(int fd, uint32_t variant_id, char *errbuf) {
653 5 50         if (errbuf) errbuf[0] = '\0';
654              
655             struct stat st;
656 5 50         if (fstat(fd, &st) < 0) {
657 0 0         POOL_ERR("fstat(fd=%d): %s", fd, strerror(errno));
658 0           return NULL;
659             }
660              
661 5 50         if ((uint64_t)st.st_size < sizeof(PoolHeader)) {
662 0 0         POOL_ERR("fd %d: too small (%lld)", fd, (long long)st.st_size);
663 0           return NULL;
664             }
665              
666 5           size_t map_size = (size_t)st.st_size;
667 5           void *base = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
668 5 50         if (base == MAP_FAILED) {
669 0 0         POOL_ERR("mmap(fd=%d): %s", fd, strerror(errno));
670 0           return NULL;
671             }
672              
673 5 50         if (!pool_validate_header((PoolHeader *)base, (uint64_t)st.st_size, variant_id)) {
674 0 0         POOL_ERR("fd %d: invalid or incompatible pool", fd);
675 0           munmap(base, map_size);
676 0           return NULL;
677             }
678              
679 5           int myfd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
680 5 50         if (myfd < 0) {
681 0 0         POOL_ERR("fcntl(F_DUPFD_CLOEXEC): %s", strerror(errno));
682 0           munmap(base, map_size);
683 0           return NULL;
684             }
685              
686 5           return pool_setup_handle(base, map_size, NULL, myfd);
687             }
688              
689 58           static void pool_destroy(PoolHandle *h) {
690 58 50         if (!h) return;
691 58 100         if (h->notify_fd >= 0) close(h->notify_fd);
692 58 100         if (h->backing_fd >= 0) close(h->backing_fd);
693 58 50         if (h->hdr) munmap(h->hdr, h->mmap_size);
694 58           free(h->path);
695 58           free(h);
696             }
697              
698             /* ================================================================
699             * Eventfd integration
700             * ================================================================ */
701              
702 3           static int pool_create_eventfd(PoolHandle *h) {
703 3 100         if (h->notify_fd >= 0) return h->notify_fd;
704 2           int efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
705 2 50         if (efd < 0) return -1;
706 2           h->notify_fd = efd;
707 2           return efd;
708             }
709              
710 4           static int pool_notify(PoolHandle *h) {
711 4 50         if (h->notify_fd < 0) return 0;
712 4           uint64_t val = 1;
713 4           return write(h->notify_fd, &val, sizeof(val)) == sizeof(val);
714             }
715              
716 4           static int64_t pool_eventfd_consume(PoolHandle *h) {
717 4 50         if (h->notify_fd < 0) return -1;
718 4           uint64_t val = 0;
719 4 100         if (read(h->notify_fd, &val, sizeof(val)) != sizeof(val)) return -1;
720 3           return (int64_t)val;
721             }
722              
723 1           static int pool_msync(PoolHandle *h) {
724 1           return msync(h->hdr, h->mmap_size, MS_SYNC);
725             }
726              
727             /* ================================================================
728             * Typed accessors — integers (atomic)
729             * ================================================================ */
730              
731 62           static inline int64_t pool_get_i64(PoolHandle *h, uint64_t slot) {
732 62           return __atomic_load_n((int64_t *)pool_slot_ptr(h, slot), __ATOMIC_RELAXED);
733             }
734              
735 613           static inline void pool_set_i64(PoolHandle *h, uint64_t slot, int64_t val) {
736 613           __atomic_store_n((int64_t *)pool_slot_ptr(h, slot), val, __ATOMIC_RELAXED);
737 613           }
738              
739 3           static inline int pool_cas_i64(PoolHandle *h, uint64_t slot,
740             int64_t expected, int64_t desired) {
741 3           return __atomic_compare_exchange_n((int64_t *)pool_slot_ptr(h, slot),
742             &expected, desired, 0, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
743             }
744              
745 2           static inline int64_t pool_cmpxchg_i64(PoolHandle *h, uint64_t slot,
746             int64_t expected, int64_t desired) {
747 2           __atomic_compare_exchange_n((int64_t *)pool_slot_ptr(h, slot),
748             &expected, desired, 0, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
749 2           return expected;
750             }
751              
752 1           static inline int64_t pool_xchg_i64(PoolHandle *h, uint64_t slot, int64_t val) {
753 1           return __atomic_exchange_n((int64_t *)pool_slot_ptr(h, slot), val, __ATOMIC_ACQ_REL);
754             }
755              
756 7           static inline int64_t pool_add_i64(PoolHandle *h, uint64_t slot, int64_t delta) {
757 7           return __atomic_add_fetch((int64_t *)pool_slot_ptr(h, slot), delta, __ATOMIC_ACQ_REL);
758             }
759              
760 5           static inline int32_t pool_get_i32(PoolHandle *h, uint64_t slot) {
761 5           return __atomic_load_n((int32_t *)pool_slot_ptr(h, slot), __ATOMIC_RELAXED);
762             }
763              
764 6           static inline void pool_set_i32(PoolHandle *h, uint64_t slot, int32_t val) {
765 6           __atomic_store_n((int32_t *)pool_slot_ptr(h, slot), val, __ATOMIC_RELAXED);
766 6           }
767              
768 1           static inline int pool_cas_i32(PoolHandle *h, uint64_t slot,
769             int32_t expected, int32_t desired) {
770 1           return __atomic_compare_exchange_n((int32_t *)pool_slot_ptr(h, slot),
771             &expected, desired, 0, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
772             }
773              
774 2           static inline int32_t pool_cmpxchg_i32(PoolHandle *h, uint64_t slot,
775             int32_t expected, int32_t desired) {
776 2           __atomic_compare_exchange_n((int32_t *)pool_slot_ptr(h, slot),
777             &expected, desired, 0, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
778 2           return expected;
779             }
780              
781 1           static inline int32_t pool_xchg_i32(PoolHandle *h, uint64_t slot, int32_t val) {
782 1           return __atomic_exchange_n((int32_t *)pool_slot_ptr(h, slot), val, __ATOMIC_ACQ_REL);
783             }
784              
785 2           static inline int32_t pool_add_i32(PoolHandle *h, uint64_t slot, int32_t delta) {
786 2           return __atomic_add_fetch((int32_t *)pool_slot_ptr(h, slot), delta, __ATOMIC_ACQ_REL);
787             }
788              
789             /* ================================================================
790             * Typed accessors — float (non-atomic)
791             * ================================================================ */
792              
793 7           static inline double pool_get_f64(PoolHandle *h, uint64_t slot) {
794             double v;
795 7           memcpy(&v, pool_slot_ptr(h, slot), sizeof(double));
796 7           return v;
797             }
798              
799 7           static inline void pool_set_f64(PoolHandle *h, uint64_t slot, double val) {
800 7           memcpy(pool_slot_ptr(h, slot), &val, sizeof(double));
801 7           }
802              
803             /* ================================================================
804             * Typed accessors — string (4-byte length prefix + data)
805             * ================================================================ */
806              
807 11           static inline uint32_t pool_get_str_len(PoolHandle *h, uint64_t slot) {
808             uint32_t len;
809 11           memcpy(&len, pool_slot_ptr(h, slot), sizeof(uint32_t));
810 11           uint32_t max_len = h->hdr->elem_size - sizeof(uint32_t);
811 11 50         if (len > max_len) len = max_len;
812 11           return len;
813             }
814              
815 11           static inline const char *pool_get_str_ptr(PoolHandle *h, uint64_t slot) {
816 11           return (const char *)(pool_slot_ptr(h, slot) + sizeof(uint32_t));
817             }
818              
819 8           static inline void pool_set_str(PoolHandle *h, uint64_t slot,
820             const char *str, uint32_t len) {
821 8           uint32_t max_len = h->hdr->elem_size - sizeof(uint32_t);
822 8 100         if (len > max_len) len = max_len;
823 8           memcpy(pool_slot_ptr(h, slot), &len, sizeof(uint32_t));
824 8           memcpy(pool_slot_ptr(h, slot) + sizeof(uint32_t), str, len);
825 8           }
826              
827             /* ================================================================
828             * Reset — free all slots (NOT concurrency-safe, caller must
829             * ensure no other process is accessing the pool)
830             * ================================================================ */
831              
832 10           static inline void pool_reset(PoolHandle *h) {
833 10           PoolHeader *hdr = h->hdr;
834 10           memset(h->bitmap, 0, (size_t)h->bitmap_words * 8);
835 10           memset(h->owners, 0, (size_t)hdr->capacity * 4);
836 10           __atomic_store_n(&hdr->used, 0, __ATOMIC_RELEASE);
837 10 50         if (__atomic_load_n(&hdr->waiters, __ATOMIC_RELAXED) > 0)
838 0           syscall(SYS_futex, &hdr->used, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
839 10           }
840              
841             #endif /* POOL_H */