| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
/* |
|
2
|
|
|
|
|
|
|
* ring.h -- Shared-memory fixed-size ring buffer for Linux |
|
3
|
|
|
|
|
|
|
* |
|
4
|
|
|
|
|
|
|
* Lock-free circular buffer: writes overwrite oldest when full. |
|
5
|
|
|
|
|
|
|
* Readers access by relative position (0=latest) or absolute sequence. |
|
6
|
|
|
|
|
|
|
* No consumer tracking — data persists until overwritten. |
|
7
|
|
|
|
|
|
|
* |
|
8
|
|
|
|
|
|
|
* v2 layout adds per-slot publication sequence (seqlock-per-slot), so |
|
9
|
|
|
|
|
|
|
* readers never observe a partially-written or cross-epoch torn slot: |
|
10
|
|
|
|
|
|
|
* read_seq / read_latest return 0 if the slot is mid-write or has been |
|
11
|
|
|
|
|
|
|
* overwritten to a different epoch. Safe under MPMC writers. |
|
12
|
|
|
|
|
|
|
* |
|
13
|
|
|
|
|
|
|
* Unlike Queue (consumed on read) or PubSub (subscription tracking), |
|
14
|
|
|
|
|
|
|
* RingBuffer is a simple overwriting circular window. |
|
15
|
|
|
|
|
|
|
*/ |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
#ifndef RING_H |
|
18
|
|
|
|
|
|
|
#define RING_H |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
#include |
|
21
|
|
|
|
|
|
|
#include |
|
22
|
|
|
|
|
|
|
#include |
|
23
|
|
|
|
|
|
|
#include |
|
24
|
|
|
|
|
|
|
#include |
|
25
|
|
|
|
|
|
|
#include |
|
26
|
|
|
|
|
|
|
#include |
|
27
|
|
|
|
|
|
|
#include |
|
28
|
|
|
|
|
|
|
#include |
|
29
|
|
|
|
|
|
|
#include |
|
30
|
|
|
|
|
|
|
#include |
|
31
|
|
|
|
|
|
|
#include |
|
32
|
|
|
|
|
|
|
#include |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
#define RING_MAGIC 0x524E4732U /* "RNG2" — v2 layout: per-slot publication seq */ |
|
35
|
|
|
|
|
|
|
#define RING_VERSION 2 |
|
36
|
|
|
|
|
|
|
#define RING_ERR_BUFLEN 256 |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
#define RING_VAR_INT 0 |
|
39
|
|
|
|
|
|
|
#define RING_VAR_F64 1 |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
/* ================================================================ |
|
42
|
|
|
|
|
|
|
* Header (128 bytes) |
|
43
|
|
|
|
|
|
|
* ================================================================ */ |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
typedef struct { |
|
46
|
|
|
|
|
|
|
uint32_t magic; |
|
47
|
|
|
|
|
|
|
uint32_t version; |
|
48
|
|
|
|
|
|
|
uint32_t elem_size; |
|
49
|
|
|
|
|
|
|
uint32_t variant_id; |
|
50
|
|
|
|
|
|
|
uint64_t capacity; |
|
51
|
|
|
|
|
|
|
uint64_t total_size; |
|
52
|
|
|
|
|
|
|
uint64_t data_off; /* 32: offset to data array */ |
|
53
|
|
|
|
|
|
|
uint64_t seq_off; /* 40: offset to per-slot publication seq array */ |
|
54
|
|
|
|
|
|
|
uint8_t _pad0[16]; /* 48-63 */ |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
uint64_t head; /* 64: monotonic write cursor (next write position) */ |
|
57
|
|
|
|
|
|
|
uint64_t count; /* 72: total writes (for overwrite detection) */ |
|
58
|
|
|
|
|
|
|
uint32_t waiters; /* 80: blocked on new data */ |
|
59
|
|
|
|
|
|
|
uint32_t wake_seq; /* 84: FUTEX_WAIT target (avoids 64-bit count wraparound) */ |
|
60
|
|
|
|
|
|
|
uint64_t stat_writes; /* 88 */ |
|
61
|
|
|
|
|
|
|
uint64_t stat_overwrites; /* 96 */ |
|
62
|
|
|
|
|
|
|
uint8_t _pad2[24]; /* 104-127 */ |
|
63
|
|
|
|
|
|
|
} RingHeader; |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L |
|
66
|
|
|
|
|
|
|
_Static_assert(sizeof(RingHeader) == 128, "RingHeader must be 128 bytes"); |
|
67
|
|
|
|
|
|
|
#endif |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
typedef struct { |
|
70
|
|
|
|
|
|
|
RingHeader *hdr; |
|
71
|
|
|
|
|
|
|
uint8_t *data; |
|
72
|
|
|
|
|
|
|
uint64_t *seq; /* per-slot publication sequence (cap entries) */ |
|
73
|
|
|
|
|
|
|
size_t mmap_size; |
|
74
|
|
|
|
|
|
|
uint32_t elem_size; |
|
75
|
|
|
|
|
|
|
char *path; |
|
76
|
|
|
|
|
|
|
int notify_fd; |
|
77
|
|
|
|
|
|
|
int backing_fd; |
|
78
|
|
|
|
|
|
|
} RingHandle; |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
/* ================================================================ |
|
81
|
|
|
|
|
|
|
* Slot access |
|
82
|
|
|
|
|
|
|
* ================================================================ */ |
|
83
|
|
|
|
|
|
|
|
|
84
|
165
|
|
|
|
|
|
static inline uint8_t *ring_slot(RingHandle *h, uint64_t seq) { |
|
85
|
165
|
|
|
|
|
|
return h->data + (seq % h->hdr->capacity) * h->elem_size; |
|
86
|
|
|
|
|
|
|
} |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
/* ================================================================ |
|
89
|
|
|
|
|
|
|
* Write — overwrites oldest when full, always succeeds |
|
90
|
|
|
|
|
|
|
* |
|
91
|
|
|
|
|
|
|
* Per-slot seq encoding (uint64_t, initial 0): |
|
92
|
|
|
|
|
|
|
* bit 0 = 1 (odd): writer in progress for pos = (seq >> 1) - 1 |
|
93
|
|
|
|
|
|
|
* bit 0 = 0 (even): data for pos = (seq >> 1) - 1 is published and stable |
|
94
|
|
|
|
|
|
|
* Writers serialize on the slot via CAS (two writers at pos N and pos N+cap |
|
95
|
|
|
|
|
|
|
* racing on the same slot index). Readers use a seqlock-style double-load |
|
96
|
|
|
|
|
|
|
* to detect mid-write tearing. |
|
97
|
|
|
|
|
|
|
* ================================================================ */ |
|
98
|
|
|
|
|
|
|
|
|
99
|
108
|
|
|
|
|
|
static inline uint64_t ring_write(RingHandle *h, const void *val, uint32_t vlen) { |
|
100
|
108
|
|
|
|
|
|
RingHeader *hdr = h->hdr; |
|
101
|
|
|
|
|
|
|
/* Claim a unique position via fetch_add — ring overwrites, no capacity check. */ |
|
102
|
108
|
|
|
|
|
|
uint64_t pos = __atomic_fetch_add(&hdr->head, 1, __ATOMIC_ACQ_REL); |
|
103
|
108
|
|
|
|
|
|
uint32_t slot_idx = (uint32_t)(pos % hdr->capacity); |
|
104
|
108
|
|
|
|
|
|
uint64_t my_writing = ((pos + 1) << 1) | 1; /* odd: writing for pos */ |
|
105
|
108
|
|
|
|
|
|
uint64_t my_done = (pos + 1) << 1; /* even: pos is committed */ |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
/* CAS per-slot seq from a committed (even) mark to our writing-mark. |
|
108
|
|
|
|
|
|
|
* If another writer is in progress (odd), spin until they commit — |
|
109
|
|
|
|
|
|
|
* otherwise we'd race data writes to the same slot. If a newer writer |
|
110
|
|
|
|
|
|
|
* has already committed (seq >> 1 > pos+1), skip: their data wins. */ |
|
111
|
108
|
|
|
|
|
|
uint64_t cur = __atomic_load_n(&h->seq[slot_idx], __ATOMIC_ACQUIRE); |
|
112
|
108
|
|
|
|
|
|
int wrote = 0; |
|
113
|
0
|
|
|
|
|
|
for (;;) { |
|
114
|
108
|
50
|
|
|
|
|
if (cur & 1) { |
|
115
|
|
|
|
|
|
|
/* Another writer owns the slot; wait for them to publish. */ |
|
116
|
0
|
|
|
|
|
|
cur = __atomic_load_n(&h->seq[slot_idx], __ATOMIC_ACQUIRE); |
|
117
|
0
|
|
|
|
|
|
continue; |
|
118
|
|
|
|
|
|
|
} |
|
119
|
108
|
|
|
|
|
|
uint64_t cur_committed = cur >> 1; |
|
120
|
108
|
50
|
|
|
|
|
if (cur_committed > pos + 1) break; /* newer writer already here */ |
|
121
|
108
|
50
|
|
|
|
|
if (__atomic_compare_exchange_n(&h->seq[slot_idx], &cur, my_writing, |
|
122
|
|
|
|
|
|
|
0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { |
|
123
|
108
|
|
|
|
|
|
wrote = 1; break; |
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
} |
|
126
|
108
|
50
|
|
|
|
|
if (wrote) { |
|
127
|
108
|
|
|
|
|
|
uint32_t sz = h->elem_size; |
|
128
|
108
|
|
|
|
|
|
uint32_t cp = vlen < sz ? vlen : sz; |
|
129
|
108
|
|
|
|
|
|
memcpy(ring_slot(h, pos), val, cp); |
|
130
|
108
|
50
|
|
|
|
|
if (cp < sz) memset(ring_slot(h, pos) + cp, 0, sz - cp); |
|
131
|
108
|
|
|
|
|
|
__atomic_store_n(&h->seq[slot_idx], my_done, __ATOMIC_RELEASE); |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
|
|
134
|
108
|
|
|
|
|
|
uint64_t cnt = __atomic_add_fetch(&hdr->count, 1, __ATOMIC_RELEASE); |
|
135
|
108
|
100
|
|
|
|
|
if (cnt > hdr->capacity) |
|
136
|
56
|
|
|
|
|
|
__atomic_add_fetch(&hdr->stat_overwrites, 1, __ATOMIC_RELAXED); |
|
137
|
108
|
|
|
|
|
|
__atomic_add_fetch(&hdr->stat_writes, 1, __ATOMIC_RELAXED); |
|
138
|
108
|
|
|
|
|
|
__atomic_add_fetch(&hdr->wake_seq, 1, __ATOMIC_RELEASE); |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
/* Wake readers */ |
|
141
|
108
|
50
|
|
|
|
|
if (__atomic_load_n(&hdr->waiters, __ATOMIC_RELAXED) > 0) |
|
142
|
0
|
|
|
|
|
|
syscall(SYS_futex, &hdr->wake_seq, FUTEX_WAKE, INT_MAX, NULL, NULL, 0); |
|
143
|
|
|
|
|
|
|
|
|
144
|
108
|
|
|
|
|
|
return pos; |
|
145
|
|
|
|
|
|
|
} |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
/* ================================================================ |
|
148
|
|
|
|
|
|
|
* Read — by relative position (0=latest) or absolute sequence |
|
149
|
|
|
|
|
|
|
* ================================================================ */ |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
/* Read by absolute sequence number. Returns 1 if data for that seq was |
|
152
|
|
|
|
|
|
|
* observed intact, 0 if not-yet-written / overwritten / mid-write. */ |
|
153
|
59
|
|
|
|
|
|
static inline int ring_read_seq(RingHandle *h, uint64_t seq, void *out) { |
|
154
|
59
|
|
|
|
|
|
uint64_t head = __atomic_load_n(&h->hdr->head, __ATOMIC_ACQUIRE); |
|
155
|
59
|
100
|
|
|
|
|
if (seq >= head) return 0; /* not yet written */ |
|
156
|
58
|
100
|
|
|
|
|
uint64_t oldest = (head > h->hdr->capacity) ? head - h->hdr->capacity : 0; |
|
157
|
58
|
100
|
|
|
|
|
if (seq < oldest) return 0; /* already overwritten */ |
|
158
|
|
|
|
|
|
|
|
|
159
|
57
|
|
|
|
|
|
uint32_t slot_idx = (uint32_t)(seq % h->hdr->capacity); |
|
160
|
57
|
|
|
|
|
|
uint64_t expected = (seq + 1) << 1; /* even mark: pos=seq committed */ |
|
161
|
|
|
|
|
|
|
|
|
162
|
57
|
50
|
|
|
|
|
for (int retry = 0; retry < 8; retry++) { |
|
163
|
57
|
|
|
|
|
|
uint64_t s1 = __atomic_load_n(&h->seq[slot_idx], __ATOMIC_ACQUIRE); |
|
164
|
57
|
50
|
|
|
|
|
if (s1 & 1) continue; /* writer in progress: spin and retry */ |
|
165
|
57
|
50
|
|
|
|
|
if (s1 != expected) return 0; /* stale epoch (overwritten) */ |
|
166
|
57
|
|
|
|
|
|
memcpy(out, ring_slot(h, seq), h->elem_size); |
|
167
|
57
|
|
|
|
|
|
uint64_t s2 = __atomic_load_n(&h->seq[slot_idx], __ATOMIC_ACQUIRE); |
|
168
|
57
|
50
|
|
|
|
|
if (s1 == s2) return 1; /* stable: no concurrent writer touched us */ |
|
169
|
|
|
|
|
|
|
} |
|
170
|
0
|
|
|
|
|
|
return 0; /* too much contention to get a clean read */ |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
/* Read the nth most recent value (0=latest, 1=previous, ...). |
|
174
|
|
|
|
|
|
|
* Returns 1 on success, 0 if n >= available entries or slot is unstable. */ |
|
175
|
56
|
|
|
|
|
|
static inline int ring_read_latest(RingHandle *h, uint32_t n, void *out) { |
|
176
|
56
|
|
|
|
|
|
uint64_t head = __atomic_load_n(&h->hdr->head, __ATOMIC_ACQUIRE); |
|
177
|
56
|
50
|
|
|
|
|
if (head == 0) return 0; |
|
178
|
56
|
|
|
|
|
|
uint64_t avail = head < h->hdr->capacity ? head : h->hdr->capacity; |
|
179
|
56
|
100
|
|
|
|
|
if (n >= avail) return 0; |
|
180
|
54
|
|
|
|
|
|
return ring_read_seq(h, head - 1 - n, out); |
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
/* ================================================================ |
|
184
|
|
|
|
|
|
|
* Status |
|
185
|
|
|
|
|
|
|
* ================================================================ */ |
|
186
|
|
|
|
|
|
|
|
|
187
|
4
|
|
|
|
|
|
static inline uint64_t ring_head(RingHandle *h) { |
|
188
|
4
|
|
|
|
|
|
return __atomic_load_n(&h->hdr->head, __ATOMIC_ACQUIRE); |
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
|
|
191
|
12
|
|
|
|
|
|
static inline uint64_t ring_size(RingHandle *h) { |
|
192
|
12
|
|
|
|
|
|
uint64_t head = __atomic_load_n(&h->hdr->head, __ATOMIC_ACQUIRE); |
|
193
|
12
|
|
|
|
|
|
return head < h->hdr->capacity ? head : h->hdr->capacity; |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
/* ================================================================ |
|
197
|
|
|
|
|
|
|
* Wait — block until new data arrives |
|
198
|
|
|
|
|
|
|
* ================================================================ */ |
|
199
|
|
|
|
|
|
|
|
|
200
|
2
|
|
|
|
|
|
static inline void ring_make_deadline(double t, struct timespec *dl) { |
|
201
|
2
|
|
|
|
|
|
clock_gettime(CLOCK_MONOTONIC, dl); |
|
202
|
2
|
|
|
|
|
|
dl->tv_sec += (time_t)t; |
|
203
|
2
|
|
|
|
|
|
dl->tv_nsec += (long)((t - (double)(time_t)t) * 1e9); |
|
204
|
2
|
100
|
|
|
|
|
if (dl->tv_nsec >= 1000000000L) { dl->tv_sec++; dl->tv_nsec -= 1000000000L; } |
|
205
|
2
|
|
|
|
|
|
} |
|
206
|
|
|
|
|
|
|
|
|
207
|
3
|
|
|
|
|
|
static inline int ring_remaining(const struct timespec *dl, struct timespec *rem) { |
|
208
|
|
|
|
|
|
|
struct timespec now; |
|
209
|
3
|
|
|
|
|
|
clock_gettime(CLOCK_MONOTONIC, &now); |
|
210
|
3
|
|
|
|
|
|
rem->tv_sec = dl->tv_sec - now.tv_sec; |
|
211
|
3
|
|
|
|
|
|
rem->tv_nsec = dl->tv_nsec - now.tv_nsec; |
|
212
|
3
|
50
|
|
|
|
|
if (rem->tv_nsec < 0) { rem->tv_sec--; rem->tv_nsec += 1000000000L; } |
|
213
|
3
|
|
|
|
|
|
return rem->tv_sec >= 0; |
|
214
|
|
|
|
|
|
|
} |
|
215
|
|
|
|
|
|
|
|
|
216
|
2
|
|
|
|
|
|
static inline int ring_wait(RingHandle *h, uint64_t expected_count, double timeout) { |
|
217
|
2
|
50
|
|
|
|
|
if (__atomic_load_n(&h->hdr->count, __ATOMIC_ACQUIRE) != expected_count) return 1; |
|
218
|
2
|
50
|
|
|
|
|
if (timeout == 0) return 0; |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
struct timespec dl, rem; |
|
221
|
2
|
|
|
|
|
|
int has_dl = (timeout > 0); |
|
222
|
2
|
50
|
|
|
|
|
if (has_dl) ring_make_deadline(timeout, &dl); |
|
223
|
|
|
|
|
|
|
|
|
224
|
0
|
|
|
|
|
|
for (;;) { |
|
225
|
2
|
|
|
|
|
|
__atomic_add_fetch(&h->hdr->waiters, 1, __ATOMIC_RELEASE); |
|
226
|
2
|
|
|
|
|
|
uint32_t seq = __atomic_load_n(&h->hdr->wake_seq, __ATOMIC_ACQUIRE); |
|
227
|
2
|
|
|
|
|
|
uint64_t cur = __atomic_load_n(&h->hdr->count, __ATOMIC_ACQUIRE); |
|
228
|
2
|
50
|
|
|
|
|
if (cur == expected_count) { |
|
229
|
2
|
|
|
|
|
|
struct timespec *pts = NULL; |
|
230
|
2
|
50
|
|
|
|
|
if (has_dl) { |
|
231
|
2
|
50
|
|
|
|
|
if (!ring_remaining(&dl, &rem)) { |
|
232
|
0
|
|
|
|
|
|
__atomic_sub_fetch(&h->hdr->waiters, 1, __ATOMIC_RELAXED); |
|
233
|
0
|
|
|
|
|
|
return 0; |
|
234
|
|
|
|
|
|
|
} |
|
235
|
2
|
|
|
|
|
|
pts = &rem; |
|
236
|
|
|
|
|
|
|
} |
|
237
|
2
|
|
|
|
|
|
syscall(SYS_futex, &h->hdr->wake_seq, FUTEX_WAIT, seq, pts, NULL, 0); |
|
238
|
|
|
|
|
|
|
} |
|
239
|
2
|
|
|
|
|
|
__atomic_sub_fetch(&h->hdr->waiters, 1, __ATOMIC_RELAXED); |
|
240
|
2
|
100
|
|
|
|
|
if (__atomic_load_n(&h->hdr->count, __ATOMIC_ACQUIRE) != expected_count) return 1; |
|
241
|
1
|
50
|
|
|
|
|
if (has_dl && !ring_remaining(&dl, &rem)) return 0; |
|
|
|
50
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
} |
|
243
|
|
|
|
|
|
|
} |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
/* ================================================================ |
|
246
|
|
|
|
|
|
|
* Create / Open / Close |
|
247
|
|
|
|
|
|
|
* ================================================================ */ |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
#define RING_ERR(fmt, ...) do { if (errbuf) snprintf(errbuf, RING_ERR_BUFLEN, fmt, ##__VA_ARGS__); } while(0) |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
/* Layout offsets: |
|
252
|
|
|
|
|
|
|
* seq_off = sizeof(RingHeader) (128) |
|
253
|
|
|
|
|
|
|
* data_off = sizeof(RingHeader) + capacity * sizeof(uint64_t) |
|
254
|
|
|
|
|
|
|
* total = data_off + capacity * elem_size |
|
255
|
|
|
|
|
|
|
*/ |
|
256
|
18
|
|
|
|
|
|
static inline uint64_t ring_seq_off(void) { return sizeof(RingHeader); } |
|
257
|
35
|
|
|
|
|
|
static inline uint64_t ring_data_off(uint64_t capacity) { |
|
258
|
35
|
|
|
|
|
|
return sizeof(RingHeader) + capacity * sizeof(uint64_t); |
|
259
|
|
|
|
|
|
|
} |
|
260
|
|
|
|
|
|
|
|
|
261
|
16
|
|
|
|
|
|
static inline void ring_init_header(void *base, uint64_t total, |
|
262
|
|
|
|
|
|
|
uint32_t elem_size, uint32_t variant_id, |
|
263
|
|
|
|
|
|
|
uint64_t capacity) { |
|
264
|
16
|
|
|
|
|
|
RingHeader *hdr = (RingHeader *)base; |
|
265
|
16
|
|
|
|
|
|
memset(base, 0, (size_t)total); |
|
266
|
16
|
|
|
|
|
|
hdr->magic = RING_MAGIC; |
|
267
|
16
|
|
|
|
|
|
hdr->version = RING_VERSION; |
|
268
|
16
|
|
|
|
|
|
hdr->elem_size = elem_size; |
|
269
|
16
|
|
|
|
|
|
hdr->variant_id = variant_id; |
|
270
|
16
|
|
|
|
|
|
hdr->capacity = capacity; |
|
271
|
16
|
|
|
|
|
|
hdr->total_size = total; |
|
272
|
16
|
|
|
|
|
|
hdr->seq_off = ring_seq_off(); |
|
273
|
16
|
|
|
|
|
|
hdr->data_off = ring_data_off(capacity); |
|
274
|
16
|
|
|
|
|
|
__atomic_thread_fence(__ATOMIC_SEQ_CST); |
|
275
|
16
|
|
|
|
|
|
} |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
/* Validate a mapped header (shared by ring_create reopen and ring_open_fd). */ |
|
278
|
2
|
|
|
|
|
|
static inline int ring_validate_header(const RingHeader *hdr, uint64_t file_size, |
|
279
|
|
|
|
|
|
|
uint32_t expected_variant) { |
|
280
|
2
|
50
|
|
|
|
|
if (hdr->magic != RING_MAGIC) return 0; |
|
281
|
2
|
50
|
|
|
|
|
if (hdr->version != RING_VERSION) return 0; |
|
282
|
2
|
50
|
|
|
|
|
if (hdr->variant_id != expected_variant) return 0; |
|
283
|
2
|
50
|
|
|
|
|
if (hdr->elem_size == 0 || hdr->capacity == 0) return 0; |
|
|
|
50
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
/* capacity * 8 + capacity * elem_size + header must fit in uint64_t */ |
|
285
|
2
|
50
|
|
|
|
|
if (hdr->capacity > (UINT64_MAX - sizeof(RingHeader)) / (sizeof(uint64_t) + hdr->elem_size)) return 0; |
|
286
|
2
|
50
|
|
|
|
|
if (hdr->total_size != file_size) return 0; |
|
287
|
2
|
50
|
|
|
|
|
if (hdr->seq_off != ring_seq_off()) return 0; |
|
288
|
2
|
50
|
|
|
|
|
if (hdr->data_off != ring_data_off(hdr->capacity)) return 0; |
|
289
|
2
|
50
|
|
|
|
|
if (hdr->total_size != hdr->data_off + hdr->capacity * hdr->elem_size) return 0; |
|
290
|
2
|
|
|
|
|
|
return 1; |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
|
|
293
|
18
|
|
|
|
|
|
static inline RingHandle *ring_setup(void *base, size_t ms, const char *path, int bfd) { |
|
294
|
18
|
|
|
|
|
|
RingHeader *hdr = (RingHeader *)base; |
|
295
|
18
|
|
|
|
|
|
RingHandle *h = (RingHandle *)calloc(1, sizeof(RingHandle)); |
|
296
|
18
|
50
|
|
|
|
|
if (!h) { munmap(base, ms); return NULL; } |
|
297
|
18
|
|
|
|
|
|
h->hdr = hdr; |
|
298
|
18
|
|
|
|
|
|
h->seq = (uint64_t *)((uint8_t *)base + hdr->seq_off); |
|
299
|
18
|
|
|
|
|
|
h->data = (uint8_t *)base + hdr->data_off; |
|
300
|
18
|
|
|
|
|
|
h->mmap_size = ms; |
|
301
|
18
|
|
|
|
|
|
h->elem_size = hdr->elem_size; |
|
302
|
18
|
100
|
|
|
|
|
h->path = path ? strdup(path) : NULL; |
|
303
|
18
|
|
|
|
|
|
h->notify_fd = -1; |
|
304
|
18
|
|
|
|
|
|
h->backing_fd = bfd; |
|
305
|
18
|
|
|
|
|
|
return h; |
|
306
|
|
|
|
|
|
|
} |
|
307
|
|
|
|
|
|
|
|
|
308
|
10
|
|
|
|
|
|
static RingHandle *ring_create(const char *path, uint64_t capacity, |
|
309
|
|
|
|
|
|
|
uint32_t elem_size, uint32_t variant_id, |
|
310
|
|
|
|
|
|
|
char *errbuf) { |
|
311
|
10
|
50
|
|
|
|
|
if (errbuf) errbuf[0] = '\0'; |
|
312
|
10
|
50
|
|
|
|
|
if (capacity == 0) { RING_ERR("capacity must be > 0"); return NULL; } |
|
|
|
0
|
|
|
|
|
|
|
313
|
10
|
50
|
|
|
|
|
if (elem_size == 0) { RING_ERR("elem_size must be > 0"); return NULL; } |
|
|
|
0
|
|
|
|
|
|
|
314
|
10
|
50
|
|
|
|
|
if (capacity > (UINT64_MAX - sizeof(RingHeader)) / (sizeof(uint64_t) + elem_size)) { |
|
315
|
0
|
0
|
|
|
|
|
RING_ERR("capacity * elem_size overflow"); return NULL; |
|
316
|
|
|
|
|
|
|
} |
|
317
|
|
|
|
|
|
|
|
|
318
|
10
|
|
|
|
|
|
uint64_t total = ring_data_off(capacity) + capacity * elem_size; |
|
319
|
10
|
|
|
|
|
|
int anonymous = (path == NULL); |
|
320
|
10
|
|
|
|
|
|
int fd = -1; |
|
321
|
|
|
|
|
|
|
size_t map_size; |
|
322
|
|
|
|
|
|
|
void *base; |
|
323
|
|
|
|
|
|
|
|
|
324
|
10
|
100
|
|
|
|
|
if (anonymous) { |
|
325
|
7
|
|
|
|
|
|
map_size = (size_t)total; |
|
326
|
7
|
|
|
|
|
|
base = mmap(NULL, map_size, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0); |
|
327
|
7
|
50
|
|
|
|
|
if (base == MAP_FAILED) { RING_ERR("mmap: %s", strerror(errno)); return NULL; } |
|
|
|
0
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
} else { |
|
329
|
3
|
|
|
|
|
|
fd = open(path, O_RDWR|O_CREAT, 0666); |
|
330
|
4
|
50
|
|
|
|
|
if (fd < 0) { RING_ERR("open: %s", strerror(errno)); return NULL; } |
|
|
|
0
|
|
|
|
|
|
|
331
|
3
|
50
|
|
|
|
|
if (flock(fd, LOCK_EX) < 0) { RING_ERR("flock: %s", strerror(errno)); close(fd); return NULL; } |
|
|
|
0
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
struct stat st; |
|
333
|
3
|
50
|
|
|
|
|
if (fstat(fd, &st) < 0) { RING_ERR("fstat: %s", strerror(errno)); flock(fd, LOCK_UN); close(fd); return NULL; } |
|
|
|
0
|
|
|
|
|
|
|
334
|
3
|
|
|
|
|
|
int is_new = (st.st_size == 0); |
|
335
|
3
|
100
|
|
|
|
|
if (!is_new && (uint64_t)st.st_size < sizeof(RingHeader)) { |
|
|
|
50
|
|
|
|
|
|
|
336
|
0
|
0
|
|
|
|
|
RING_ERR("%s: file too small (%lld)", path, (long long)st.st_size); |
|
337
|
0
|
|
|
|
|
|
flock(fd, LOCK_UN); close(fd); return NULL; |
|
338
|
|
|
|
|
|
|
} |
|
339
|
3
|
100
|
|
|
|
|
if (is_new && ftruncate(fd, (off_t)total) < 0) { |
|
|
|
50
|
|
|
|
|
|
|
340
|
0
|
0
|
|
|
|
|
RING_ERR("ftruncate: %s", strerror(errno)); flock(fd, LOCK_UN); close(fd); return NULL; |
|
341
|
|
|
|
|
|
|
} |
|
342
|
3
|
100
|
|
|
|
|
map_size = is_new ? (size_t)total : (size_t)st.st_size; |
|
343
|
3
|
|
|
|
|
|
base = mmap(NULL, map_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); |
|
344
|
3
|
50
|
|
|
|
|
if (base == MAP_FAILED) { RING_ERR("mmap: %s", strerror(errno)); flock(fd, LOCK_UN); close(fd); return NULL; } |
|
|
|
0
|
|
|
|
|
|
|
345
|
3
|
100
|
|
|
|
|
if (!is_new) { |
|
346
|
1
|
50
|
|
|
|
|
if (!ring_validate_header((RingHeader *)base, (uint64_t)st.st_size, variant_id)) { |
|
347
|
0
|
0
|
|
|
|
|
RING_ERR("invalid ring file"); munmap(base, map_size); flock(fd, LOCK_UN); close(fd); return NULL; |
|
348
|
|
|
|
|
|
|
} |
|
349
|
1
|
|
|
|
|
|
flock(fd, LOCK_UN); close(fd); |
|
350
|
1
|
|
|
|
|
|
return ring_setup(base, map_size, path, -1); |
|
351
|
|
|
|
|
|
|
} |
|
352
|
|
|
|
|
|
|
} |
|
353
|
9
|
|
|
|
|
|
ring_init_header(base, total, elem_size, variant_id, capacity); |
|
354
|
9
|
100
|
|
|
|
|
if (fd >= 0) { flock(fd, LOCK_UN); close(fd); } |
|
355
|
9
|
|
|
|
|
|
return ring_setup(base, map_size, path, -1); |
|
356
|
|
|
|
|
|
|
} |
|
357
|
|
|
|
|
|
|
|
|
358
|
7
|
|
|
|
|
|
static RingHandle *ring_create_memfd(const char *name, uint64_t capacity, |
|
359
|
|
|
|
|
|
|
uint32_t elem_size, uint32_t variant_id, |
|
360
|
|
|
|
|
|
|
char *errbuf) { |
|
361
|
7
|
50
|
|
|
|
|
if (errbuf) errbuf[0] = '\0'; |
|
362
|
7
|
50
|
|
|
|
|
if (capacity == 0) { RING_ERR("capacity must be > 0"); return NULL; } |
|
|
|
0
|
|
|
|
|
|
|
363
|
7
|
50
|
|
|
|
|
if (elem_size == 0) { RING_ERR("elem_size must be > 0"); return NULL; } |
|
|
|
0
|
|
|
|
|
|
|
364
|
7
|
50
|
|
|
|
|
if (capacity > (UINT64_MAX - sizeof(RingHeader)) / (sizeof(uint64_t) + elem_size)) { |
|
365
|
0
|
0
|
|
|
|
|
RING_ERR("capacity * elem_size overflow"); return NULL; |
|
366
|
|
|
|
|
|
|
} |
|
367
|
7
|
|
|
|
|
|
uint64_t total = ring_data_off(capacity) + capacity * elem_size; |
|
368
|
7
|
50
|
|
|
|
|
int fd = memfd_create(name ? name : "ring", MFD_CLOEXEC | MFD_ALLOW_SEALING); |
|
369
|
7
|
50
|
|
|
|
|
if (fd < 0) { RING_ERR("memfd_create: %s", strerror(errno)); return NULL; } |
|
|
|
0
|
|
|
|
|
|
|
370
|
7
|
50
|
|
|
|
|
if (ftruncate(fd, (off_t)total) < 0) { RING_ERR("ftruncate: %s", strerror(errno)); close(fd); return NULL; } |
|
|
|
0
|
|
|
|
|
|
|
371
|
7
|
|
|
|
|
|
(void)fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW); |
|
372
|
7
|
|
|
|
|
|
void *base = mmap(NULL, (size_t)total, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); |
|
373
|
7
|
50
|
|
|
|
|
if (base == MAP_FAILED) { RING_ERR("mmap: %s", strerror(errno)); close(fd); return NULL; } |
|
|
|
0
|
|
|
|
|
|
|
374
|
7
|
|
|
|
|
|
ring_init_header(base, total, elem_size, variant_id, capacity); |
|
375
|
7
|
|
|
|
|
|
return ring_setup(base, (size_t)total, NULL, fd); |
|
376
|
|
|
|
|
|
|
} |
|
377
|
|
|
|
|
|
|
|
|
378
|
1
|
|
|
|
|
|
static RingHandle *ring_open_fd(int fd, uint32_t variant_id, char *errbuf) { |
|
379
|
1
|
50
|
|
|
|
|
if (errbuf) errbuf[0] = '\0'; |
|
380
|
|
|
|
|
|
|
struct stat st; |
|
381
|
1
|
50
|
|
|
|
|
if (fstat(fd, &st) < 0) { RING_ERR("fstat: %s", strerror(errno)); return NULL; } |
|
|
|
0
|
|
|
|
|
|
|
382
|
1
|
50
|
|
|
|
|
if ((uint64_t)st.st_size < sizeof(RingHeader)) { RING_ERR("too small"); return NULL; } |
|
|
|
0
|
|
|
|
|
|
|
383
|
1
|
|
|
|
|
|
size_t ms = (size_t)st.st_size; |
|
384
|
1
|
|
|
|
|
|
void *base = mmap(NULL, ms, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); |
|
385
|
1
|
50
|
|
|
|
|
if (base == MAP_FAILED) { RING_ERR("mmap: %s", strerror(errno)); return NULL; } |
|
|
|
0
|
|
|
|
|
|
|
386
|
1
|
50
|
|
|
|
|
if (!ring_validate_header((RingHeader *)base, (uint64_t)st.st_size, variant_id)) { |
|
387
|
0
|
0
|
|
|
|
|
RING_ERR("invalid ring"); munmap(base, ms); return NULL; |
|
388
|
|
|
|
|
|
|
} |
|
389
|
1
|
|
|
|
|
|
int myfd = fcntl(fd, F_DUPFD_CLOEXEC, 0); |
|
390
|
1
|
50
|
|
|
|
|
if (myfd < 0) { RING_ERR("fcntl: %s", strerror(errno)); munmap(base, ms); return NULL; } |
|
|
|
0
|
|
|
|
|
|
|
391
|
1
|
|
|
|
|
|
return ring_setup(base, ms, NULL, myfd); |
|
392
|
|
|
|
|
|
|
} |
|
393
|
|
|
|
|
|
|
|
|
394
|
18
|
|
|
|
|
|
static void ring_destroy(RingHandle *h) { |
|
395
|
18
|
50
|
|
|
|
|
if (!h) return; |
|
396
|
18
|
100
|
|
|
|
|
if (h->notify_fd >= 0) close(h->notify_fd); |
|
397
|
18
|
100
|
|
|
|
|
if (h->backing_fd >= 0) close(h->backing_fd); |
|
398
|
18
|
50
|
|
|
|
|
if (h->hdr) munmap(h->hdr, h->mmap_size); |
|
399
|
18
|
|
|
|
|
|
free(h->path); |
|
400
|
18
|
|
|
|
|
|
free(h); |
|
401
|
|
|
|
|
|
|
} |
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
/* NOT concurrency-safe — caller must ensure no concurrent writers/readers. */ |
|
404
|
1
|
|
|
|
|
|
static void ring_clear(RingHandle *h) { |
|
405
|
1
|
|
|
|
|
|
uint64_t cap = h->hdr->capacity; |
|
406
|
|
|
|
|
|
|
/* Reset per-slot seq: otherwise new writes at pos=0 look stale against |
|
407
|
|
|
|
|
|
|
* old high seq marks. */ |
|
408
|
6
|
100
|
|
|
|
|
for (uint64_t i = 0; i < cap; i++) |
|
409
|
5
|
|
|
|
|
|
__atomic_store_n(&h->seq[i], 0, __ATOMIC_RELAXED); |
|
410
|
1
|
|
|
|
|
|
__atomic_store_n(&h->hdr->head, 0, __ATOMIC_RELEASE); |
|
411
|
1
|
|
|
|
|
|
__atomic_store_n(&h->hdr->count, 0, __ATOMIC_RELEASE); |
|
412
|
1
|
|
|
|
|
|
__atomic_add_fetch(&h->hdr->wake_seq, 1, __ATOMIC_RELEASE); |
|
413
|
|
|
|
|
|
|
/* Wake any ring_wait callers parked with timeout=-1 so they re-check. */ |
|
414
|
1
|
50
|
|
|
|
|
if (__atomic_load_n(&h->hdr->waiters, __ATOMIC_RELAXED) > 0) |
|
415
|
0
|
|
|
|
|
|
syscall(SYS_futex, &h->hdr->wake_seq, FUTEX_WAKE, INT_MAX, NULL, NULL, 0); |
|
416
|
1
|
|
|
|
|
|
} |
|
417
|
|
|
|
|
|
|
|
|
418
|
2
|
|
|
|
|
|
static int ring_create_eventfd(RingHandle *h) { |
|
419
|
2
|
50
|
|
|
|
|
if (h->notify_fd >= 0) return h->notify_fd; |
|
420
|
2
|
|
|
|
|
|
int efd = eventfd(0, EFD_NONBLOCK|EFD_CLOEXEC); |
|
421
|
2
|
50
|
|
|
|
|
if (efd < 0) return -1; |
|
422
|
2
|
|
|
|
|
|
h->notify_fd = efd; return efd; |
|
423
|
|
|
|
|
|
|
} |
|
424
|
2
|
|
|
|
|
|
static int ring_notify(RingHandle *h) { |
|
425
|
2
|
50
|
|
|
|
|
if (h->notify_fd < 0) return 0; |
|
426
|
2
|
|
|
|
|
|
uint64_t v = 1; return write(h->notify_fd, &v, sizeof(v)) == sizeof(v); |
|
427
|
|
|
|
|
|
|
} |
|
428
|
2
|
|
|
|
|
|
static int64_t ring_eventfd_consume(RingHandle *h) { |
|
429
|
2
|
50
|
|
|
|
|
if (h->notify_fd < 0) return -1; |
|
430
|
2
|
|
|
|
|
|
uint64_t v = 0; |
|
431
|
2
|
50
|
|
|
|
|
if (read(h->notify_fd, &v, sizeof(v)) != sizeof(v)) return -1; |
|
432
|
2
|
|
|
|
|
|
return (int64_t)v; |
|
433
|
|
|
|
|
|
|
} |
|
434
|
1
|
|
|
|
|
|
static int ring_msync(RingHandle *h) { return msync(h->hdr, h->mmap_size, MS_SYNC); } |
|
435
|
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
#endif /* RING_H */ |