File Coverage

ring.h
Criterion Covered Total %
statement 206 222 92.7
branch 99 208 47.6
condition n/a
subroutine n/a
pod n/a
total 305 430 70.9


line stmt bran cond sub pod time code
1             /*
2             * ring.h -- Shared-memory fixed-size ring buffer for Linux
3             *
4             * Lock-free circular buffer: writes overwrite oldest when full.
5             * Readers access by relative position (0=latest) or absolute sequence.
6             * No consumer tracking — data persists until overwritten.
7             *
8             * v2 layout adds per-slot publication sequence (seqlock-per-slot), so
9             * readers never observe a partially-written or cross-epoch torn slot:
10             * read_seq / read_latest return 0 if the slot is mid-write or has been
11             * overwritten to a different epoch. Safe under MPMC writers.
12             *
13             * Unlike Queue (consumed on read) or PubSub (subscription tracking),
14             * RingBuffer is a simple overwriting circular window.
15             */
16              
17             #ifndef RING_H
18             #define RING_H
19              
20             #include
21             #include
22             #include
23             #include
24             #include
25             #include
26             #include
27             #include
28             #include
29             #include
30             #include
31             #include
32             #include
33              
34             #define RING_MAGIC 0x524E4732U /* "RNG2" — v2 layout: per-slot publication seq */
35             #define RING_VERSION 2
36             #define RING_ERR_BUFLEN 256
37              
38             #define RING_VAR_INT 0
39             #define RING_VAR_F64 1
40              
41             /* ================================================================
42             * Header (128 bytes)
43             * ================================================================ */
44              
45             typedef struct {
46             uint32_t magic;
47             uint32_t version;
48             uint32_t elem_size;
49             uint32_t variant_id;
50             uint64_t capacity;
51             uint64_t total_size;
52             uint64_t data_off; /* 32: offset to data array */
53             uint64_t seq_off; /* 40: offset to per-slot publication seq array */
54             uint8_t _pad0[16]; /* 48-63 */
55              
56             uint64_t head; /* 64: monotonic write cursor (next write position) */
57             uint64_t count; /* 72: total writes (for overwrite detection) */
58             uint32_t waiters; /* 80: blocked on new data */
59             uint32_t wake_seq; /* 84: FUTEX_WAIT target (avoids 64-bit count wraparound) */
60             uint64_t stat_writes; /* 88 */
61             uint64_t stat_overwrites; /* 96 */
62             uint8_t _pad2[24]; /* 104-127 */
63             } RingHeader;
64              
65             #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
66             _Static_assert(sizeof(RingHeader) == 128, "RingHeader must be 128 bytes");
67             #endif
68              
69             typedef struct {
70             RingHeader *hdr;
71             uint8_t *data;
72             uint64_t *seq; /* per-slot publication sequence (cap entries) */
73             size_t mmap_size;
74             uint32_t elem_size;
75             char *path;
76             int notify_fd;
77             int backing_fd;
78             } RingHandle;
79              
80             /* ================================================================
81             * Slot access
82             * ================================================================ */
83              
84 165           static inline uint8_t *ring_slot(RingHandle *h, uint64_t seq) {
85 165           return h->data + (seq % h->hdr->capacity) * h->elem_size;
86             }
87              
88             /* ================================================================
89             * Write — overwrites oldest when full, always succeeds
90             *
91             * Per-slot seq encoding (uint64_t, initial 0):
92             * bit 0 = 1 (odd): writer in progress for pos = (seq >> 1) - 1
93             * bit 0 = 0 (even): data for pos = (seq >> 1) - 1 is published and stable
94             * Writers serialize on the slot via CAS (two writers at pos N and pos N+cap
95             * racing on the same slot index). Readers use a seqlock-style double-load
96             * to detect mid-write tearing.
97             * ================================================================ */
98              
99 108           static inline uint64_t ring_write(RingHandle *h, const void *val, uint32_t vlen) {
100 108           RingHeader *hdr = h->hdr;
101             /* Claim a unique position via fetch_add — ring overwrites, no capacity check. */
102 108           uint64_t pos = __atomic_fetch_add(&hdr->head, 1, __ATOMIC_ACQ_REL);
103 108           uint32_t slot_idx = (uint32_t)(pos % hdr->capacity);
104 108           uint64_t my_writing = ((pos + 1) << 1) | 1; /* odd: writing for pos */
105 108           uint64_t my_done = (pos + 1) << 1; /* even: pos is committed */
106              
107             /* CAS per-slot seq from a committed (even) mark to our writing-mark.
108             * If another writer is in progress (odd), spin until they commit —
109             * otherwise we'd race data writes to the same slot. If a newer writer
110             * has already committed (seq >> 1 > pos+1), skip: their data wins. */
111 108           uint64_t cur = __atomic_load_n(&h->seq[slot_idx], __ATOMIC_ACQUIRE);
112 108           int wrote = 0;
113 0           for (;;) {
114 108 50         if (cur & 1) {
115             /* Another writer owns the slot; wait for them to publish. */
116 0           cur = __atomic_load_n(&h->seq[slot_idx], __ATOMIC_ACQUIRE);
117 0           continue;
118             }
119 108           uint64_t cur_committed = cur >> 1;
120 108 50         if (cur_committed > pos + 1) break; /* newer writer already here */
121 108 50         if (__atomic_compare_exchange_n(&h->seq[slot_idx], &cur, my_writing,
122             0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
123 108           wrote = 1; break;
124             }
125             }
126 108 50         if (wrote) {
127 108           uint32_t sz = h->elem_size;
128 108           uint32_t cp = vlen < sz ? vlen : sz;
129 108           memcpy(ring_slot(h, pos), val, cp);
130 108 50         if (cp < sz) memset(ring_slot(h, pos) + cp, 0, sz - cp);
131 108           __atomic_store_n(&h->seq[slot_idx], my_done, __ATOMIC_RELEASE);
132             }
133              
134 108           uint64_t cnt = __atomic_add_fetch(&hdr->count, 1, __ATOMIC_RELEASE);
135 108 100         if (cnt > hdr->capacity)
136 56           __atomic_add_fetch(&hdr->stat_overwrites, 1, __ATOMIC_RELAXED);
137 108           __atomic_add_fetch(&hdr->stat_writes, 1, __ATOMIC_RELAXED);
138 108           __atomic_add_fetch(&hdr->wake_seq, 1, __ATOMIC_RELEASE);
139              
140             /* Wake readers */
141 108 50         if (__atomic_load_n(&hdr->waiters, __ATOMIC_RELAXED) > 0)
142 0           syscall(SYS_futex, &hdr->wake_seq, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
143              
144 108           return pos;
145             }
146              
147             /* ================================================================
148             * Read — by relative position (0=latest) or absolute sequence
149             * ================================================================ */
150              
151             /* Read by absolute sequence number. Returns 1 if data for that seq was
152             * observed intact, 0 if not-yet-written / overwritten / mid-write. */
153 59           static inline int ring_read_seq(RingHandle *h, uint64_t seq, void *out) {
154 59           uint64_t head = __atomic_load_n(&h->hdr->head, __ATOMIC_ACQUIRE);
155 59 100         if (seq >= head) return 0; /* not yet written */
156 58 100         uint64_t oldest = (head > h->hdr->capacity) ? head - h->hdr->capacity : 0;
157 58 100         if (seq < oldest) return 0; /* already overwritten */
158              
159 57           uint32_t slot_idx = (uint32_t)(seq % h->hdr->capacity);
160 57           uint64_t expected = (seq + 1) << 1; /* even mark: pos=seq committed */
161              
162 57 50         for (int retry = 0; retry < 8; retry++) {
163 57           uint64_t s1 = __atomic_load_n(&h->seq[slot_idx], __ATOMIC_ACQUIRE);
164 57 50         if (s1 & 1) continue; /* writer in progress: spin and retry */
165 57 50         if (s1 != expected) return 0; /* stale epoch (overwritten) */
166 57           memcpy(out, ring_slot(h, seq), h->elem_size);
167 57           uint64_t s2 = __atomic_load_n(&h->seq[slot_idx], __ATOMIC_ACQUIRE);
168 57 50         if (s1 == s2) return 1; /* stable: no concurrent writer touched us */
169             }
170 0           return 0; /* too much contention to get a clean read */
171             }
172              
173             /* Read the nth most recent value (0=latest, 1=previous, ...).
174             * Returns 1 on success, 0 if n >= available entries or slot is unstable. */
175 56           static inline int ring_read_latest(RingHandle *h, uint32_t n, void *out) {
176 56           uint64_t head = __atomic_load_n(&h->hdr->head, __ATOMIC_ACQUIRE);
177 56 50         if (head == 0) return 0;
178 56           uint64_t avail = head < h->hdr->capacity ? head : h->hdr->capacity;
179 56 100         if (n >= avail) return 0;
180 54           return ring_read_seq(h, head - 1 - n, out);
181             }
182              
183             /* ================================================================
184             * Status
185             * ================================================================ */
186              
187 4           static inline uint64_t ring_head(RingHandle *h) {
188 4           return __atomic_load_n(&h->hdr->head, __ATOMIC_ACQUIRE);
189             }
190              
191 12           static inline uint64_t ring_size(RingHandle *h) {
192 12           uint64_t head = __atomic_load_n(&h->hdr->head, __ATOMIC_ACQUIRE);
193 12           return head < h->hdr->capacity ? head : h->hdr->capacity;
194             }
195              
196             /* ================================================================
197             * Wait — block until new data arrives
198             * ================================================================ */
199              
200 2           static inline void ring_make_deadline(double t, struct timespec *dl) {
201 2           clock_gettime(CLOCK_MONOTONIC, dl);
202 2           dl->tv_sec += (time_t)t;
203 2           dl->tv_nsec += (long)((t - (double)(time_t)t) * 1e9);
204 2 100         if (dl->tv_nsec >= 1000000000L) { dl->tv_sec++; dl->tv_nsec -= 1000000000L; }
205 2           }
206              
207 3           static inline int ring_remaining(const struct timespec *dl, struct timespec *rem) {
208             struct timespec now;
209 3           clock_gettime(CLOCK_MONOTONIC, &now);
210 3           rem->tv_sec = dl->tv_sec - now.tv_sec;
211 3           rem->tv_nsec = dl->tv_nsec - now.tv_nsec;
212 3 50         if (rem->tv_nsec < 0) { rem->tv_sec--; rem->tv_nsec += 1000000000L; }
213 3           return rem->tv_sec >= 0;
214             }
215              
216 2           static inline int ring_wait(RingHandle *h, uint64_t expected_count, double timeout) {
217 2 50         if (__atomic_load_n(&h->hdr->count, __ATOMIC_ACQUIRE) != expected_count) return 1;
218 2 50         if (timeout == 0) return 0;
219              
220             struct timespec dl, rem;
221 2           int has_dl = (timeout > 0);
222 2 50         if (has_dl) ring_make_deadline(timeout, &dl);
223              
224 0           for (;;) {
225 2           __atomic_add_fetch(&h->hdr->waiters, 1, __ATOMIC_RELEASE);
226 2           uint32_t seq = __atomic_load_n(&h->hdr->wake_seq, __ATOMIC_ACQUIRE);
227 2           uint64_t cur = __atomic_load_n(&h->hdr->count, __ATOMIC_ACQUIRE);
228 2 50         if (cur == expected_count) {
229 2           struct timespec *pts = NULL;
230 2 50         if (has_dl) {
231 2 50         if (!ring_remaining(&dl, &rem)) {
232 0           __atomic_sub_fetch(&h->hdr->waiters, 1, __ATOMIC_RELAXED);
233 0           return 0;
234             }
235 2           pts = &rem;
236             }
237 2           syscall(SYS_futex, &h->hdr->wake_seq, FUTEX_WAIT, seq, pts, NULL, 0);
238             }
239 2           __atomic_sub_fetch(&h->hdr->waiters, 1, __ATOMIC_RELAXED);
240 2 100         if (__atomic_load_n(&h->hdr->count, __ATOMIC_ACQUIRE) != expected_count) return 1;
241 1 50         if (has_dl && !ring_remaining(&dl, &rem)) return 0;
    50          
242             }
243             }
244              
245             /* ================================================================
246             * Create / Open / Close
247             * ================================================================ */
248              
249             #define RING_ERR(fmt, ...) do { if (errbuf) snprintf(errbuf, RING_ERR_BUFLEN, fmt, ##__VA_ARGS__); } while(0)
250              
251             /* Layout offsets:
252             * seq_off = sizeof(RingHeader) (128)
253             * data_off = sizeof(RingHeader) + capacity * sizeof(uint64_t)
254             * total = data_off + capacity * elem_size
255             */
256 18           static inline uint64_t ring_seq_off(void) { return sizeof(RingHeader); }
257 35           static inline uint64_t ring_data_off(uint64_t capacity) {
258 35           return sizeof(RingHeader) + capacity * sizeof(uint64_t);
259             }
260              
261 16           static inline void ring_init_header(void *base, uint64_t total,
262             uint32_t elem_size, uint32_t variant_id,
263             uint64_t capacity) {
264 16           RingHeader *hdr = (RingHeader *)base;
265 16           memset(base, 0, (size_t)total);
266 16           hdr->magic = RING_MAGIC;
267 16           hdr->version = RING_VERSION;
268 16           hdr->elem_size = elem_size;
269 16           hdr->variant_id = variant_id;
270 16           hdr->capacity = capacity;
271 16           hdr->total_size = total;
272 16           hdr->seq_off = ring_seq_off();
273 16           hdr->data_off = ring_data_off(capacity);
274 16           __atomic_thread_fence(__ATOMIC_SEQ_CST);
275 16           }
276              
277             /* Validate a mapped header (shared by ring_create reopen and ring_open_fd). */
278 2           static inline int ring_validate_header(const RingHeader *hdr, uint64_t file_size,
279             uint32_t expected_variant) {
280 2 50         if (hdr->magic != RING_MAGIC) return 0;
281 2 50         if (hdr->version != RING_VERSION) return 0;
282 2 50         if (hdr->variant_id != expected_variant) return 0;
283 2 50         if (hdr->elem_size == 0 || hdr->capacity == 0) return 0;
    50          
284             /* capacity * 8 + capacity * elem_size + header must fit in uint64_t */
285 2 50         if (hdr->capacity > (UINT64_MAX - sizeof(RingHeader)) / (sizeof(uint64_t) + hdr->elem_size)) return 0;
286 2 50         if (hdr->total_size != file_size) return 0;
287 2 50         if (hdr->seq_off != ring_seq_off()) return 0;
288 2 50         if (hdr->data_off != ring_data_off(hdr->capacity)) return 0;
289 2 50         if (hdr->total_size != hdr->data_off + hdr->capacity * hdr->elem_size) return 0;
290 2           return 1;
291             }
292              
293 18           static inline RingHandle *ring_setup(void *base, size_t ms, const char *path, int bfd) {
294 18           RingHeader *hdr = (RingHeader *)base;
295 18           RingHandle *h = (RingHandle *)calloc(1, sizeof(RingHandle));
296 18 50         if (!h) { munmap(base, ms); return NULL; }
297 18           h->hdr = hdr;
298 18           h->seq = (uint64_t *)((uint8_t *)base + hdr->seq_off);
299 18           h->data = (uint8_t *)base + hdr->data_off;
300 18           h->mmap_size = ms;
301 18           h->elem_size = hdr->elem_size;
302 18 100         h->path = path ? strdup(path) : NULL;
303 18           h->notify_fd = -1;
304 18           h->backing_fd = bfd;
305 18           return h;
306             }
307              
308 10           static RingHandle *ring_create(const char *path, uint64_t capacity,
309             uint32_t elem_size, uint32_t variant_id,
310             char *errbuf) {
311 10 50         if (errbuf) errbuf[0] = '\0';
312 10 50         if (capacity == 0) { RING_ERR("capacity must be > 0"); return NULL; }
    0          
313 10 50         if (elem_size == 0) { RING_ERR("elem_size must be > 0"); return NULL; }
    0          
314 10 50         if (capacity > (UINT64_MAX - sizeof(RingHeader)) / (sizeof(uint64_t) + elem_size)) {
315 0 0         RING_ERR("capacity * elem_size overflow"); return NULL;
316             }
317              
318 10           uint64_t total = ring_data_off(capacity) + capacity * elem_size;
319 10           int anonymous = (path == NULL);
320 10           int fd = -1;
321             size_t map_size;
322             void *base;
323              
324 10 100         if (anonymous) {
325 7           map_size = (size_t)total;
326 7           base = mmap(NULL, map_size, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
327 7 50         if (base == MAP_FAILED) { RING_ERR("mmap: %s", strerror(errno)); return NULL; }
    0          
328             } else {
329 3           fd = open(path, O_RDWR|O_CREAT, 0666);
330 4 50         if (fd < 0) { RING_ERR("open: %s", strerror(errno)); return NULL; }
    0          
331 3 50         if (flock(fd, LOCK_EX) < 0) { RING_ERR("flock: %s", strerror(errno)); close(fd); return NULL; }
    0          
332             struct stat st;
333 3 50         if (fstat(fd, &st) < 0) { RING_ERR("fstat: %s", strerror(errno)); flock(fd, LOCK_UN); close(fd); return NULL; }
    0          
334 3           int is_new = (st.st_size == 0);
335 3 100         if (!is_new && (uint64_t)st.st_size < sizeof(RingHeader)) {
    50          
336 0 0         RING_ERR("%s: file too small (%lld)", path, (long long)st.st_size);
337 0           flock(fd, LOCK_UN); close(fd); return NULL;
338             }
339 3 100         if (is_new && ftruncate(fd, (off_t)total) < 0) {
    50          
340 0 0         RING_ERR("ftruncate: %s", strerror(errno)); flock(fd, LOCK_UN); close(fd); return NULL;
341             }
342 3 100         map_size = is_new ? (size_t)total : (size_t)st.st_size;
343 3           base = mmap(NULL, map_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
344 3 50         if (base == MAP_FAILED) { RING_ERR("mmap: %s", strerror(errno)); flock(fd, LOCK_UN); close(fd); return NULL; }
    0          
345 3 100         if (!is_new) {
346 1 50         if (!ring_validate_header((RingHeader *)base, (uint64_t)st.st_size, variant_id)) {
347 0 0         RING_ERR("invalid ring file"); munmap(base, map_size); flock(fd, LOCK_UN); close(fd); return NULL;
348             }
349 1           flock(fd, LOCK_UN); close(fd);
350 1           return ring_setup(base, map_size, path, -1);
351             }
352             }
353 9           ring_init_header(base, total, elem_size, variant_id, capacity);
354 9 100         if (fd >= 0) { flock(fd, LOCK_UN); close(fd); }
355 9           return ring_setup(base, map_size, path, -1);
356             }
357              
358 7           static RingHandle *ring_create_memfd(const char *name, uint64_t capacity,
359             uint32_t elem_size, uint32_t variant_id,
360             char *errbuf) {
361 7 50         if (errbuf) errbuf[0] = '\0';
362 7 50         if (capacity == 0) { RING_ERR("capacity must be > 0"); return NULL; }
    0          
363 7 50         if (elem_size == 0) { RING_ERR("elem_size must be > 0"); return NULL; }
    0          
364 7 50         if (capacity > (UINT64_MAX - sizeof(RingHeader)) / (sizeof(uint64_t) + elem_size)) {
365 0 0         RING_ERR("capacity * elem_size overflow"); return NULL;
366             }
367 7           uint64_t total = ring_data_off(capacity) + capacity * elem_size;
368 7 50         int fd = memfd_create(name ? name : "ring", MFD_CLOEXEC | MFD_ALLOW_SEALING);
369 7 50         if (fd < 0) { RING_ERR("memfd_create: %s", strerror(errno)); return NULL; }
    0          
370 7 50         if (ftruncate(fd, (off_t)total) < 0) { RING_ERR("ftruncate: %s", strerror(errno)); close(fd); return NULL; }
    0          
371 7           (void)fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW);
372 7           void *base = mmap(NULL, (size_t)total, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
373 7 50         if (base == MAP_FAILED) { RING_ERR("mmap: %s", strerror(errno)); close(fd); return NULL; }
    0          
374 7           ring_init_header(base, total, elem_size, variant_id, capacity);
375 7           return ring_setup(base, (size_t)total, NULL, fd);
376             }
377              
378 1           static RingHandle *ring_open_fd(int fd, uint32_t variant_id, char *errbuf) {
379 1 50         if (errbuf) errbuf[0] = '\0';
380             struct stat st;
381 1 50         if (fstat(fd, &st) < 0) { RING_ERR("fstat: %s", strerror(errno)); return NULL; }
    0          
382 1 50         if ((uint64_t)st.st_size < sizeof(RingHeader)) { RING_ERR("too small"); return NULL; }
    0          
383 1           size_t ms = (size_t)st.st_size;
384 1           void *base = mmap(NULL, ms, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
385 1 50         if (base == MAP_FAILED) { RING_ERR("mmap: %s", strerror(errno)); return NULL; }
    0          
386 1 50         if (!ring_validate_header((RingHeader *)base, (uint64_t)st.st_size, variant_id)) {
387 0 0         RING_ERR("invalid ring"); munmap(base, ms); return NULL;
388             }
389 1           int myfd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
390 1 50         if (myfd < 0) { RING_ERR("fcntl: %s", strerror(errno)); munmap(base, ms); return NULL; }
    0          
391 1           return ring_setup(base, ms, NULL, myfd);
392             }
393              
394 18           static void ring_destroy(RingHandle *h) {
395 18 50         if (!h) return;
396 18 100         if (h->notify_fd >= 0) close(h->notify_fd);
397 18 100         if (h->backing_fd >= 0) close(h->backing_fd);
398 18 50         if (h->hdr) munmap(h->hdr, h->mmap_size);
399 18           free(h->path);
400 18           free(h);
401             }
402              
403             /* NOT concurrency-safe — caller must ensure no concurrent writers/readers. */
404 1           static void ring_clear(RingHandle *h) {
405 1           uint64_t cap = h->hdr->capacity;
406             /* Reset per-slot seq: otherwise new writes at pos=0 look stale against
407             * old high seq marks. */
408 6 100         for (uint64_t i = 0; i < cap; i++)
409 5           __atomic_store_n(&h->seq[i], 0, __ATOMIC_RELAXED);
410 1           __atomic_store_n(&h->hdr->head, 0, __ATOMIC_RELEASE);
411 1           __atomic_store_n(&h->hdr->count, 0, __ATOMIC_RELEASE);
412 1           __atomic_add_fetch(&h->hdr->wake_seq, 1, __ATOMIC_RELEASE);
413             /* Wake any ring_wait callers parked with timeout=-1 so they re-check. */
414 1 50         if (__atomic_load_n(&h->hdr->waiters, __ATOMIC_RELAXED) > 0)
415 0           syscall(SYS_futex, &h->hdr->wake_seq, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
416 1           }
417              
418 2           static int ring_create_eventfd(RingHandle *h) {
419 2 50         if (h->notify_fd >= 0) return h->notify_fd;
420 2           int efd = eventfd(0, EFD_NONBLOCK|EFD_CLOEXEC);
421 2 50         if (efd < 0) return -1;
422 2           h->notify_fd = efd; return efd;
423             }
424 2           static int ring_notify(RingHandle *h) {
425 2 50         if (h->notify_fd < 0) return 0;
426 2           uint64_t v = 1; return write(h->notify_fd, &v, sizeof(v)) == sizeof(v);
427             }
428 2           static int64_t ring_eventfd_consume(RingHandle *h) {
429 2 50         if (h->notify_fd < 0) return -1;
430 2           uint64_t v = 0;
431 2 50         if (read(h->notify_fd, &v, sizeof(v)) != sizeof(v)) return -1;
432 2           return (int64_t)v;
433             }
434 1           static int ring_msync(RingHandle *h) { return msync(h->hdr, h->mmap_size, MS_SYNC); }
435              
436             #endif /* RING_H */