File Coverage

hist.h
Criterion Covered Total %
statement 300 430 69.7
branch 134 318 42.1
condition n/a
subroutine n/a
pod n/a
total 434 748 58.0


line stmt bran cond sub pod time code
1             /*
2             * hist.h -- Shared-memory HdrHistogram for Linux
3             *
4             * A High Dynamic Range histogram: records integer values across a very wide
5             * range and answers percentile / min / max / mean queries within a fixed,
6             * configurable relative error. Values are bucketed logarithmically (one bucket
7             * per power of two of magnitude) and linearly within each bucket (a fixed
8             * number of sub-buckets per power of two), so a constant number of significant
9             * figures is preserved across the whole range. The counts array lives in a
10             * shared mapping so several processes share one histogram; a write-preferring
11             * futex rwlock with reader-slot dead-process recovery guards mutation. Two
12             * histograms of equal geometry can be merged (cellwise add -> combined stream).
13             *
14             * Layout: Header -> reader_slots[1024] -> counts[counts_len] (each int64_t)
15             */
16              
17             #ifndef HIST_H
18             #define HIST_H
19              
20             #include
21             #include
22             #include
23             #include
24             #include
25             #include
26             #include
27             #include
28             #include
29             #include
30             #include
31             #include
32             #include
33             #include
34             #include
35             #include
36             #include
37              
38             #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
39             #error "hist.h: requires little-endian architecture"
40             #endif
41              
42              
43             /* ================================================================
44             * Constants
45             * ================================================================ */
46              
47             #define HIST_MAGIC 0x54534948U /* "HIST" (little-endian) */
48             #define HIST_VERSION 1
49             #define HIST_ERR_BUFLEN 256
50             #define HIST_READER_SLOTS 1024 /* max concurrent reader processes for dead-process recovery */
51             #define HIST_MIN_SIG 1 /* significant figures range */
52             #define HIST_MAX_SIG 5
53              
54             #define HIST_ERR(fmt, ...) do { if (errbuf) snprintf(errbuf, HIST_ERR_BUFLEN, fmt, ##__VA_ARGS__); } while (0)
55              
56             /* ================================================================
57             * Structs
58             * ================================================================ */
59              
60             /* Per-process slot for dead-process recovery. Each shared rwlock counter
61             * (the main rwlock-reader count, rwlock_waiters, rwlock_writers_waiting)
62             * is mirrored here so a wrlock timeout can attribute and reverse a dead
63             * process's contribution instead of waiting for the slow per-op timeout
64             * drain. */
65             typedef struct {
66             uint32_t pid; /* 0 = unclaimed */
67             uint32_t subcount; /* in-flight rdlock acquisitions for this process */
68             uint32_t waiters_parked; /* contribution to hdr->rwlock_waiters */
69             uint32_t writers_parked; /* contribution to hdr->rwlock_writers_waiting */
70             } HistReaderSlot;
71              
72             struct HistHeader {
73             uint32_t magic, version; /* 0,4 */
74             uint32_t _pad0; /* 8 */
75             uint32_t _pad1; /* 12 */
76              
77             /* ---- configuration ---- */
78             int64_t lowest; /* 16 lowest trackable value (>= 1) */
79             int64_t highest; /* 24 highest trackable value */
80             int32_t sig_figs; /* 32 significant figures, [1,5] */
81             int32_t unit_magnitude; /* 36 floor(log2(lowest)) */
82              
83             /* ---- derived geometry ---- */
84             int32_t sub_bucket_count_magnitude; /* 40 */
85             int32_t sub_bucket_half_count_magnitude; /* 44 */
86             int32_t sub_bucket_count; /* 48 */
87             int32_t sub_bucket_half_count; /* 52 */
88             int64_t sub_bucket_mask; /* 56 */
89             int32_t bucket_count; /* 64 */
90             int32_t _pad2; /* 68 */
91             int64_t counts_len; /* 72 number of int64 counts */
92              
93             /* ---- recorded data ---- */
94             int64_t total_count; /* 80 sum of all recorded counts */
95             int64_t min_value; /* 88 min recorded value (INT64_MAX init)*/
96             int64_t max_value; /* 96 max recorded value (0 init) */
97              
98             /* ---- offsets / size ---- */
99             uint64_t total_size; /* 104 */
100             uint64_t reader_slots_off; /* 112 */
101             uint64_t counts_off; /* 120 */
102              
103             /* ---- lock + stats ---- */
104             uint32_t rwlock; /* 128 */
105             uint32_t rwlock_waiters; /* 132 */
106             uint32_t rwlock_writers_waiting; /* 136 */
107             uint32_t _pad3; /* 140 */
108             uint64_t stat_ops; /* 144 */
109             uint8_t _pad[104]; /* 152..255 */
110             };
111             typedef struct HistHeader HistHeader;
112              
113             _Static_assert(sizeof(HistHeader) == 256, "HistHeader must be 256 bytes");
114              
115             /* ---- Process-local handle ---- */
116              
117             typedef struct HistHandle {
118             HistHeader *hdr;
119             HistReaderSlot *reader_slots; /* HIST_READER_SLOTS entries */
120             void *base; /* mmap base */
121             size_t mmap_size;
122             char *path; /* backing file path (strdup'd) */
123             int backing_fd; /* memfd or reopened-fd to close on destroy, -1 for file/anon */
124             uint32_t my_slot_idx; /* UINT32_MAX if all slots taken (no recovery for this handle) */
125             uint32_t cached_pid; /* getpid() cached at last slot claim */
126             uint32_t cached_fork_gen; /* hist_fork_gen value at last slot claim */
127             } HistHandle;
128              
129             /* ================================================================
130             * Futex-based write-preferring read-write lock
131             * with reader-slot dead-process recovery
132             * ================================================================ */
133              
134             #define HIST_RWLOCK_SPIN_LIMIT 32
135             #define HIST_LOCK_TIMEOUT_SEC 2 /* FUTEX_WAIT timeout for stale lock detection */
136              
137 0           static inline void hist_rwlock_spin_pause(void) {
138             #if defined(__x86_64__) || defined(__i386__)
139 0           __asm__ volatile("pause" ::: "memory");
140             #elif defined(__aarch64__)
141             __asm__ volatile("yield" ::: "memory");
142             #else
143             __asm__ volatile("" ::: "memory");
144             #endif
145 0           }
146              
147             /* Extract writer PID from rwlock value (lower 31 bits when write-locked). */
148             #define HIST_RWLOCK_WRITER_BIT 0x80000000U
149             #define HIST_RWLOCK_PID_MASK 0x7FFFFFFFU
150             #define HIST_RWLOCK_WR(pid) (HIST_RWLOCK_WRITER_BIT | ((uint32_t)(pid) & HIST_RWLOCK_PID_MASK))
151              
152             /* Check if a PID is alive. Returns 1 if alive or unknown, 0 if definitely dead. */
153             /* Liveness via kill(pid,0). NOTE: cannot detect PID reuse -- if a dead
154             * lock-holder's PID is recycled to an unrelated live process before recovery
155             * runs, this reports "alive" and that slot's orphaned contribution is not
156             * reclaimed until the recycled process exits. Robust detection would require
157             * a per-slot process-start-time epoch (a header-layout/version change).
158             * Documented under "Crash Safety" in the POD. */
159 0           static inline int hist_pid_alive(uint32_t pid) {
160 0 0         if (pid == 0) return 1; /* no owner recorded, assume alive */
161 0 0         return !(kill((pid_t)pid, 0) == -1 && errno == ESRCH);
    0          
162             }
163              
164             /* Force-recover a stale write lock left by a dead process.
165             * CAS to OUR pid to hold the lock while fixing shared state, then release.
166             * Using our pid (not a bare WRITER_BIT sentinel) means a subsequent
167             * recovering process can detect and re-recover if we crash mid-recovery. */
168 0           static inline void hist_recover_stale_lock(HistHandle *h, uint32_t observed_rwlock) {
169 0           HistHeader *hdr = h->hdr;
170 0           uint32_t mypid = HIST_RWLOCK_WR((uint32_t)getpid());
171 0 0         if (!__atomic_compare_exchange_n(&hdr->rwlock, &observed_rwlock,
172             mypid, 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
173 0           return;
174             /* We now hold the write lock as mypid. No additional shared state needs
175             * repair here (this module has no seqlock); just release the lock. */
176 0           __atomic_store_n(&hdr->rwlock, 0, __ATOMIC_RELEASE);
177 0 0         if (__atomic_load_n(&hdr->rwlock_waiters, __ATOMIC_RELAXED) > 0)
178 0           syscall(SYS_futex, &hdr->rwlock, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
179             }
180              
181             static const struct timespec hist_lock_timeout = { HIST_LOCK_TIMEOUT_SEC, 0 };
182              
183             /* Process-global fork-generation counter. Incremented in the pthread_atfork
184             * child callback so every open handle detects a fork transition on the next
185             * lock call without paying a getpid() syscall on the hot path. */
186             static uint32_t hist_fork_gen = 1;
187             static pthread_once_t hist_atfork_once = PTHREAD_ONCE_INIT;
188 0           static void hist_on_fork_child(void) {
189 0           __atomic_add_fetch(&hist_fork_gen, 1, __ATOMIC_RELAXED);
190 0           }
191 2           static void hist_atfork_init(void) {
192 2           pthread_atfork(NULL, NULL, hist_on_fork_child);
193 2           }
194              
195             /* Ensure this process owns a reader slot. Called from the lock helpers so
196             * that fork()'d children pick up their own slot lazily instead of sharing
197             * the parent's. Hot-path is a single relaxed load + compare; only on a
198             * fork-generation mismatch do we touch getpid() and scan slots. */
199 321234           static inline void hist_claim_reader_slot(HistHandle *h) {
200 321234           uint32_t cur_gen = __atomic_load_n(&hist_fork_gen, __ATOMIC_RELAXED);
201 321234 100         if (__builtin_expect(cur_gen == h->cached_fork_gen && h->my_slot_idx != UINT32_MAX, 1))
    50          
202 321197           return;
203             /* Cold path -- register the atfork hook once per process, then claim. */
204 37           pthread_once(&hist_atfork_once, hist_atfork_init);
205             /* Re-read after pthread_once: hist_on_fork_child may have bumped it. */
206 37           cur_gen = __atomic_load_n(&hist_fork_gen, __ATOMIC_RELAXED);
207 37           uint32_t now_pid = (uint32_t)getpid();
208 37           h->cached_pid = now_pid;
209 37           h->cached_fork_gen = cur_gen;
210 37           h->my_slot_idx = UINT32_MAX;
211 37           uint32_t start = now_pid % HIST_READER_SLOTS;
212 41 50         for (uint32_t i = 0; i < HIST_READER_SLOTS; i++) {
213 41           uint32_t s = (start + i) % HIST_READER_SLOTS;
214 41           uint32_t expected = 0;
215 41 100         if (__atomic_compare_exchange_n(&h->reader_slots[s].pid,
216             &expected, now_pid, 0,
217             __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
218             /* Zero all mirror fields, not just subcount: a SIGKILL'd
219             * predecessor may have left waiters_parked/writers_parked
220             * non-zero, and hist_recover_dead_readers won't drain them
221             * once we own the slot (the CAS expects the dead PID). */
222 37           __atomic_store_n(&h->reader_slots[s].subcount, 0, __ATOMIC_RELAXED);
223 37           __atomic_store_n(&h->reader_slots[s].waiters_parked, 0, __ATOMIC_RELAXED);
224 37           __atomic_store_n(&h->reader_slots[s].writers_parked, 0, __ATOMIC_RELAXED);
225 37           h->my_slot_idx = s;
226 37           return;
227             }
228             }
229             /* Table full -- leave my_slot_idx = UINT32_MAX so we silently skip
230             * tracking for this handle (lock still works; just no recovery). */
231             }
232              
233             /* Atomically subtract `sub` from a counter, capped at 0 (never underflows). */
234 0           static inline void hist_atomic_sub_cap(uint32_t *p, uint32_t sub) {
235 0 0         if (!sub) return;
236 0           uint32_t cur = __atomic_load_n(p, __ATOMIC_RELAXED);
237 0           for (;;) {
238 0 0         uint32_t want = (cur > sub) ? cur - sub : 0;
239 0 0         if (__atomic_compare_exchange_n(p, &cur, want,
240             1, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
241 0           return;
242             }
243             }
244              
245             /* Try to claim a dead slot (CAS pid -> 0) and drain its parked-waiter
246             * contributions back to the global counters. A no-op if the slot was stolen
247             * by another recoverer or had no waiter contribution to drain.
248             *
249             * Note: subcount/waiters_parked/writers_parked are NOT zeroed here.
250             * Between our CAS and a follow-up store, a new process could claim the
251             * slot and start populating these fields -- our stores would clobber its
252             * state. hist_claim_reader_slot zeros all three on every claim, so
253             * leaving stale values is harmless. */
254 0           static inline void hist_drain_dead_slot(HistHandle *h, uint32_t i, uint32_t pid) {
255 0           HistHeader *hdr = h->hdr;
256 0           uint32_t expected = pid;
257             /* ACQ_REL on success: RELEASE publishes pid=0 to other observers;
258             * ACQUIRE syncs us with prior writes from the dead process to
259             * waiters_parked/writers_parked. On weakly-ordered archs (aarch64)
260             * a plain RELAXED load before the CAS could miss those writes;
261             * loading them after the CAS keeps them inside the acquire window. */
262 0 0         if (!__atomic_compare_exchange_n(&h->reader_slots[i].pid, &expected, 0,
263             0, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED))
264 0           return;
265 0           uint32_t wp = __atomic_load_n(&h->reader_slots[i].waiters_parked, __ATOMIC_RELAXED);
266 0           uint32_t writp = __atomic_load_n(&h->reader_slots[i].writers_parked, __ATOMIC_RELAXED);
267 0 0         if (wp) hist_atomic_sub_cap(&hdr->rwlock_waiters, wp);
268 0 0         if (writp) hist_atomic_sub_cap(&hdr->rwlock_writers_waiting, writp);
269             }
270              
271             /* Scan reader slots for dead-process recovery.
272             *
273             * For each dead PID with non-zero contributions to the shared rwlock,
274             * rwlock_waiters, or rwlock_writers_waiting counters, drain its share back
275             * out so live processes don't have to wait for the slow per-op timeout
276             * decrement to drain it for them.
277             *
278             * For the main rwlock counter we use the "no live reader holds -> force-
279             * reset to 0" trick (precise) because per-process attribution of the
280             * subcount is racy across the inc-counter-then-inc-subcount window. */
281 0           static inline void hist_recover_dead_readers(HistHandle *h) {
282 0 0         if (!h->reader_slots) return;
283 0           HistHeader *hdr = h->hdr;
284 0           int any_live_reader = 0;
285 0           int found_dead_reader = 0;
286              
287             /* Pass 1: classify slots. Slots with dead pid and sc == 0 (no rwlock
288             * contribution to lose) are wiped immediately to free the slot for
289             * future claimants and drain any orphan parked-waiter counters. Slots
290             * with dead pid and sc > 0 are left intact in this pass: if force-
291             * reset cannot fire (because a live reader is concurrently present),
292             * wiping the dead slot would lose the only record of its orphan
293             * rwlock contribution and strand writers permanently once the live
294             * reader releases. */
295 0 0         for (uint32_t i = 0; i < HIST_READER_SLOTS; i++) {
296 0           uint32_t pid = __atomic_load_n(&h->reader_slots[i].pid, __ATOMIC_ACQUIRE);
297 0 0         if (pid == 0) continue;
298 0           uint32_t sc = __atomic_load_n(&h->reader_slots[i].subcount, __ATOMIC_RELAXED);
299 0 0         if (hist_pid_alive(pid)) {
300 0 0         if (sc > 0) any_live_reader = 1;
301 0           continue;
302             }
303 0 0         if (sc > 0) { found_dead_reader = 1; continue; }
304 0           hist_drain_dead_slot(h, i, pid);
305             }
306              
307             /* Pass 2: only if force-reset will fire. Issue the rwlock force-
308             * reset CAS FIRST, while the window since pass 1's last scan is
309             * still narrow (a handful of instructions, as in the original
310             * single-pass code). A new reader that started rdlock between
311             * pass 1's scan and the CAS will either:
312             * (a) have already CAS'd rwlock from cur to cur+1 -- our CAS then
313             * fails (cur mismatched), recovery yields and a future
314             * cycle retries; or
315             * (b) be still in the subcount-bump phase -- our CAS sees the
316             * stale cur and resets to 0; the new reader's subsequent CAS
317             * rwlock(0 -> 1) succeeds cleanly.
318             * Only after the CAS resolves do we wipe the deferred dead slots,
319             * keeping that work outside the race-sensitive window. */
320 0 0         if (found_dead_reader && !any_live_reader) {
    0          
321 0           uint32_t cur = __atomic_load_n(&hdr->rwlock, __ATOMIC_RELAXED);
322 0 0         if (cur > 0 && cur < HIST_RWLOCK_WRITER_BIT) {
    0          
323 0 0         if (__atomic_compare_exchange_n(&hdr->rwlock, &cur, 0,
324             0, __ATOMIC_RELEASE, __ATOMIC_RELAXED)) {
325 0 0         if (__atomic_load_n(&hdr->rwlock_waiters, __ATOMIC_RELAXED) > 0)
326 0           syscall(SYS_futex, &hdr->rwlock, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
327             }
328             }
329 0 0         for (uint32_t i = 0; i < HIST_READER_SLOTS; i++) {
330 0           uint32_t pid = __atomic_load_n(&h->reader_slots[i].pid, __ATOMIC_ACQUIRE);
331 0 0         if (pid == 0 || hist_pid_alive(pid)) continue;
    0          
332 0           hist_drain_dead_slot(h, i, pid);
333             }
334             }
335             }
336              
337             /* Inspect the lock word after a futex-wait timeout. If a dead writer
338             * holds it, force-recover the lock. Otherwise drain dead readers' shares
339             * of the rwlock/waiter counters. Called from rdlock and wrlock ETIMEDOUT
340             * branches -- identical recovery logic in both. */
341 0           static inline void hist_recover_after_timeout(HistHandle *h) {
342 0           HistHeader *hdr = h->hdr;
343 0           uint32_t val = __atomic_load_n(&hdr->rwlock, __ATOMIC_RELAXED);
344 0 0         if (val >= HIST_RWLOCK_WRITER_BIT) {
345 0           uint32_t pid = val & HIST_RWLOCK_PID_MASK;
346 0 0         if (!hist_pid_alive(pid))
347 0           hist_recover_stale_lock(h, val);
348             } else {
349 0           hist_recover_dead_readers(h);
350             }
351 0           }
352              
353             /* Park/unpark helpers: bump the global waiter counters together with this
354             * process's mirrored slot counters so a wrlock-timeout recovery scan can
355             * attribute and reverse a dead PID's contribution. Kept paired to make
356             * accidental drift between global and per-slot counts impossible. */
357 0           static inline void hist_park_reader(HistHandle *h) {
358 0 0         if (h->my_slot_idx != UINT32_MAX)
359 0           __atomic_add_fetch(&h->reader_slots[h->my_slot_idx].waiters_parked, 1, __ATOMIC_RELAXED);
360 0           __atomic_add_fetch(&h->hdr->rwlock_waiters, 1, __ATOMIC_RELAXED);
361 0           }
362 0           static inline void hist_unpark_reader(HistHandle *h) {
363 0           __atomic_sub_fetch(&h->hdr->rwlock_waiters, 1, __ATOMIC_RELAXED);
364 0 0         if (h->my_slot_idx != UINT32_MAX)
365 0           __atomic_sub_fetch(&h->reader_slots[h->my_slot_idx].waiters_parked, 1, __ATOMIC_RELAXED);
366 0           }
367 0           static inline void hist_park_writer(HistHandle *h) {
368 0 0         if (h->my_slot_idx != UINT32_MAX) {
369 0           __atomic_add_fetch(&h->reader_slots[h->my_slot_idx].waiters_parked, 1, __ATOMIC_RELAXED);
370 0           __atomic_add_fetch(&h->reader_slots[h->my_slot_idx].writers_parked, 1, __ATOMIC_RELAXED);
371             }
372 0           __atomic_add_fetch(&h->hdr->rwlock_waiters, 1, __ATOMIC_RELAXED);
373 0           __atomic_add_fetch(&h->hdr->rwlock_writers_waiting, 1, __ATOMIC_RELAXED);
374 0           }
375 0           static inline void hist_unpark_writer(HistHandle *h) {
376 0           __atomic_sub_fetch(&h->hdr->rwlock_waiters, 1, __ATOMIC_RELAXED);
377 0           __atomic_sub_fetch(&h->hdr->rwlock_writers_waiting, 1, __ATOMIC_RELAXED);
378 0 0         if (h->my_slot_idx != UINT32_MAX) {
379 0           __atomic_sub_fetch(&h->reader_slots[h->my_slot_idx].waiters_parked, 1, __ATOMIC_RELAXED);
380 0           __atomic_sub_fetch(&h->reader_slots[h->my_slot_idx].writers_parked, 1, __ATOMIC_RELAXED);
381             }
382 0           }
383              
384 113           static inline void hist_rwlock_rdlock(HistHandle *h) {
385 113           hist_claim_reader_slot(h);
386 113           HistHeader *hdr = h->hdr;
387 113           uint32_t *lock = &hdr->rwlock;
388 113           uint32_t *writers_waiting = &hdr->rwlock_writers_waiting;
389             /* Claim subcount BEFORE bumping the shared rwlock counter. This way
390             * a concurrent writer-side recovery scan that sees our PID alive with
391             * subcount > 0 will (correctly) defer force-reset, even while we are
392             * still spinning trying to win the rwlock CAS. Without this, a reader
393             * killed between rwlock CAS-success and subcount++ would let recovery
394             * force-reset rwlock to 0 underneath us, causing a UINT32_MAX wrap on
395             * our eventual rdunlock dec. */
396 113 50         if (h->my_slot_idx != UINT32_MAX)
397 113           __atomic_add_fetch(&h->reader_slots[h->my_slot_idx].subcount, 1, __ATOMIC_RELAXED);
398 113           for (int spin = 0; ; spin++) {
399 113           uint32_t cur = __atomic_load_n(lock, __ATOMIC_RELAXED);
400             /* Write-preferring: when lock is free (cur==0) and writers are
401             * waiting, yield to let the writer acquire. When readers are
402             * already active (cur>=1), new readers may join freely. */
403 113 50         if (cur > 0 && cur < HIST_RWLOCK_WRITER_BIT) {
    0          
404 0 0         if (__atomic_compare_exchange_n(lock, &cur, cur + 1,
405             1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
406 113           return;
407 113 50         } else if (cur == 0 && !__atomic_load_n(writers_waiting, __ATOMIC_RELAXED)) {
    50          
408 113 50         if (__atomic_compare_exchange_n(lock, &cur, 1,
409             1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
410 113           return;
411             }
412 0 0         if (__builtin_expect(spin < HIST_RWLOCK_SPIN_LIMIT, 1)) {
413 0           hist_rwlock_spin_pause();
414 0           continue;
415             }
416 0           hist_park_reader(h);
417 0           cur = __atomic_load_n(lock, __ATOMIC_RELAXED);
418             /* Sleep when write-locked OR when yielding to waiting writers */
419 0 0         if (cur >= HIST_RWLOCK_WRITER_BIT || cur == 0) {
    0          
420 0           long rc = syscall(SYS_futex, lock, FUTEX_WAIT, cur,
421             &hist_lock_timeout, NULL, 0);
422 0 0         if (rc == -1 && errno == ETIMEDOUT) {
    0          
423 0           hist_unpark_reader(h);
424 0           hist_recover_after_timeout(h);
425 0           spin = 0;
426 0           continue;
427             }
428             }
429 0           hist_unpark_reader(h);
430 0           spin = 0;
431             }
432             }
433              
434 113           static inline void hist_rwlock_rdunlock(HistHandle *h) {
435 113           HistHeader *hdr = h->hdr;
436             /* Release the shared counter BEFORE dropping our subcount so that
437             * "any live PID with subcount > 0" is a reliable in-flight indicator
438             * for the writer-side recovery scan. Inverting these would create a
439             * window where we still own a unit of rwlock but our slot subcount is
440             * 0, letting recovery force-reset rwlock underneath us. */
441 113           uint32_t after = __atomic_sub_fetch(&hdr->rwlock, 1, __ATOMIC_RELEASE);
442 113 50         if (h->my_slot_idx != UINT32_MAX)
443 113           __atomic_sub_fetch(&h->reader_slots[h->my_slot_idx].subcount, 1, __ATOMIC_RELAXED);
444 113 50         if (after == 0 && __atomic_load_n(&hdr->rwlock_waiters, __ATOMIC_RELAXED) > 0)
    50          
445 0           syscall(SYS_futex, &hdr->rwlock, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
446 113           }
447              
448 321121           static inline void hist_rwlock_wrlock(HistHandle *h) {
449 321121           hist_claim_reader_slot(h); /* refresh cached_pid across fork */
450 321121           HistHeader *hdr = h->hdr;
451 321121           uint32_t *lock = &hdr->rwlock;
452             /* Encode PID in the rwlock word itself (0x80000000 | pid) to eliminate
453             * any crash window between acquiring the lock and storing the owner. */
454 321121           uint32_t mypid = HIST_RWLOCK_WR(h->cached_pid);
455 321121           for (int spin = 0; ; spin++) {
456 321121           uint32_t expected = 0;
457 321121 50         if (__atomic_compare_exchange_n(lock, &expected, mypid,
458             1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
459 321121           return;
460 0 0         if (__builtin_expect(spin < HIST_RWLOCK_SPIN_LIMIT, 1)) {
461 0           hist_rwlock_spin_pause();
462 0           continue;
463             }
464 0           hist_park_writer(h);
465 0           uint32_t cur = __atomic_load_n(lock, __ATOMIC_RELAXED);
466 0 0         if (cur != 0) {
467 0           long rc = syscall(SYS_futex, lock, FUTEX_WAIT, cur,
468             &hist_lock_timeout, NULL, 0);
469 0 0         if (rc == -1 && errno == ETIMEDOUT) {
    0          
470 0           hist_unpark_writer(h);
471 0           hist_recover_after_timeout(h);
472 0           spin = 0;
473 0           continue;
474             }
475             }
476 0           hist_unpark_writer(h);
477 0           spin = 0;
478             }
479             }
480              
481 321121           static inline void hist_rwlock_wrunlock(HistHandle *h) {
482 321121           HistHeader *hdr = h->hdr;
483 321121           __atomic_store_n(&hdr->rwlock, 0, __ATOMIC_RELEASE);
484 321121 50         if (__atomic_load_n(&hdr->rwlock_waiters, __ATOMIC_RELAXED) > 0)
485 0           syscall(SYS_futex, &hdr->rwlock, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
486 321121           }
487              
488             /* ================================================================
489             * Layout math + create / open / destroy
490             *
491             * Layout: Header -> reader_slots[1024] -> counts[counts_len] (int64_t)
492             * ================================================================ */
493              
494             /* Single source of truth for the mmap region layout offsets. */
495             typedef struct { uint64_t reader_slots, counts; } HistLayout;
496              
497 101           static inline HistLayout hist_layout(void) {
498             HistLayout L;
499 101           L.reader_slots = sizeof(HistHeader);
500 101           L.counts = L.reader_slots + (uint64_t)HIST_READER_SLOTS * sizeof(HistReaderSlot);
501 101           L.counts = (L.counts + 7) & ~(uint64_t)7; /* 8-byte align the counts array (int64_t words) */
502 101           return L;
503             }
504              
505 52           static inline uint64_t hist_total_size(int64_t counts_len) {
506 52           HistLayout L = hist_layout();
507 52           return L.counts + (uint64_t)counts_len * sizeof(int64_t); /* counts_len int64_t cells */
508             }
509              
510 322690           static inline int64_t *hist_counts(HistHandle *h) {
511 322690           return (int64_t *)((char *)h->base + h->hdr->counts_off);
512             }
513              
514             /* ================================================================
515             * HdrHistogram geometry -- canonical formulas (see HdrHistogram_c).
516             * All derived fields are computed once here and stored in the header.
517             * ================================================================ */
518              
519             typedef struct {
520             int64_t lowest;
521             int64_t highest;
522             int32_t sig_figs;
523             int32_t unit_magnitude;
524             int32_t sub_bucket_count_magnitude;
525             int32_t sub_bucket_half_count_magnitude;
526             int32_t sub_bucket_count;
527             int32_t sub_bucket_half_count;
528             int64_t sub_bucket_mask;
529             int32_t bucket_count;
530             int64_t counts_len;
531             } HistGeometry;
532              
533             /* Validate args + compute the full geometry. Single source of truth: the XS
534             * layer does NOT duplicate these range checks. */
535 59           static int hist_validate_create_args(int64_t lowest, int64_t highest, int32_t sig_figs,
536             HistGeometry *g, char *errbuf) {
537 59 100         if (errbuf) errbuf[0] = '\0';
538 59 100         if (lowest < 1) { HIST_ERR("lowest must be >= 1"); return 0; }
    50          
539 57 100         if (highest < 2 * lowest) { HIST_ERR("highest must be >= 2 * lowest"); return 0; }
    50          
540 56 100         if (sig_figs < HIST_MIN_SIG || sig_figs > HIST_MAX_SIG) {
    100          
541 3 50         HIST_ERR("sig_figs must be between %d and %d", HIST_MIN_SIG, HIST_MAX_SIG); return 0;
542             }
543              
544 53           int32_t unit_magnitude = (int32_t)floor(log2((double)lowest));
545 53           int32_t sbc_magnitude = (int32_t)ceil(log2(2.0 * pow(10.0, (double)sig_figs)));
546 53 50         if (sbc_magnitude < 1) sbc_magnitude = 1;
547 53           int32_t shc_magnitude = sbc_magnitude - 1;
548 53 100         if (unit_magnitude + shc_magnitude > 61) {
549 1 50         HIST_ERR("lowest too large for sig_figs (unit_magnitude %d + sub_bucket_half_count_magnitude %d exceeds 61)", unit_magnitude, shc_magnitude);
550 1           return 0;
551             }
552 52           int32_t sub_bucket_count = (int32_t)(1 << sbc_magnitude);
553 52           int32_t sub_bucket_half_count = sub_bucket_count / 2;
554 52           int64_t sub_bucket_mask = ((int64_t)sub_bucket_count - 1) << unit_magnitude;
555              
556             /* bucket_count: smallest count of buckets covering 'highest' */
557 52           int64_t smallest_untrackable = (int64_t)sub_bucket_count << unit_magnitude;
558 52           int32_t bucket_count = 1;
559 502 100         while (smallest_untrackable <= highest) {
560 450 50         if (smallest_untrackable > (INT64_MAX / 2)) { bucket_count++; break; }
561 450           smallest_untrackable <<= 1;
562 450           bucket_count++;
563             }
564 52           int64_t counts_len = (int64_t)(bucket_count + 1) * sub_bucket_half_count;
565              
566 52           g->lowest = lowest;
567 52           g->highest = highest;
568 52           g->sig_figs = sig_figs;
569 52           g->unit_magnitude = unit_magnitude;
570 52           g->sub_bucket_count_magnitude = sbc_magnitude;
571 52           g->sub_bucket_half_count_magnitude = shc_magnitude;
572 52           g->sub_bucket_count = sub_bucket_count;
573 52           g->sub_bucket_half_count = sub_bucket_half_count;
574 52           g->sub_bucket_mask = sub_bucket_mask;
575 52           g->bucket_count = bucket_count;
576 52           g->counts_len = counts_len;
577 52           return 1;
578             }
579              
580 46           static inline void hist_init_header(void *base, const HistGeometry *g, uint64_t total_size) {
581 46           HistLayout L = hist_layout();
582 46           HistHeader *hdr = (HistHeader *)base;
583             /* Explicitly zero the header + reader-slot region (lock-recovery state);
584             the counts array relies on the fresh mapping being OS zero-filled. */
585 46           memset(base, 0, (size_t)L.counts);
586 46           hdr->magic = HIST_MAGIC;
587 46           hdr->version = HIST_VERSION;
588 46           hdr->lowest = g->lowest;
589 46           hdr->highest = g->highest;
590 46           hdr->sig_figs = g->sig_figs;
591 46           hdr->unit_magnitude = g->unit_magnitude;
592 46           hdr->sub_bucket_count_magnitude = g->sub_bucket_count_magnitude;
593 46           hdr->sub_bucket_half_count_magnitude = g->sub_bucket_half_count_magnitude;
594 46           hdr->sub_bucket_count = g->sub_bucket_count;
595 46           hdr->sub_bucket_half_count = g->sub_bucket_half_count;
596 46           hdr->sub_bucket_mask = g->sub_bucket_mask;
597 46           hdr->bucket_count = g->bucket_count;
598 46           hdr->counts_len = g->counts_len;
599 46           hdr->total_count = 0;
600 46           hdr->min_value = INT64_MAX;
601 46           hdr->max_value = 0;
602 46           hdr->total_size = total_size;
603 46           hdr->reader_slots_off = L.reader_slots;
604 46           hdr->counts_off = L.counts;
605 46           __atomic_thread_fence(__ATOMIC_SEQ_CST);
606 46           }
607              
608 49           static inline HistHandle *hist_setup(void *base, size_t map_size,
609             const char *path, int backing_fd) {
610 49           HistHeader *hdr = (HistHeader *)base;
611 49           HistHandle *h = (HistHandle *)calloc(1, sizeof(HistHandle));
612 49 50         if (!h) {
613 0           munmap(base, map_size);
614 0 0         if (backing_fd >= 0) close(backing_fd);
615 0           return NULL;
616             }
617 49           h->hdr = hdr;
618 49           h->base = base;
619 49           h->reader_slots = (HistReaderSlot *)((uint8_t *)base + hdr->reader_slots_off);
620 49           h->mmap_size = map_size;
621 49 100         h->path = path ? strdup(path) : NULL;
622 49           h->backing_fd = backing_fd;
623 49           h->my_slot_idx = UINT32_MAX;
624 49           return h;
625             }
626              
627             /* Validate a mapped header (shared by hist_create reopen and hist_open_fd).
628             * Stored geometry wins on reopen; we re-derive it from lowest/highest/sig_figs
629             * and require every cached field to match, then require total_size == the size
630             * the geometry implies and == the actual file size. */
631 3           static inline int hist_validate_header(const HistHeader *hdr, uint64_t file_size) {
632 3 50         if (hdr->magic != HIST_MAGIC) return 0;
633 3 50         if (hdr->version != HIST_VERSION) return 0;
634 3 50         if (hdr->sig_figs < HIST_MIN_SIG || hdr->sig_figs > HIST_MAX_SIG) return 0;
    50          
635 3 50         if (hdr->lowest < 1) return 0;
636 3 50         if (hdr->highest < 2 * hdr->lowest) return 0;
637              
638             HistGeometry g;
639 3 50         if (!hist_validate_create_args(hdr->lowest, hdr->highest, hdr->sig_figs, &g, NULL))
640 0           return 0;
641 3 50         if (hdr->unit_magnitude != g.unit_magnitude) return 0;
642 3 50         if (hdr->sub_bucket_count_magnitude != g.sub_bucket_count_magnitude) return 0;
643 3 50         if (hdr->sub_bucket_half_count_magnitude != g.sub_bucket_half_count_magnitude) return 0;
644 3 50         if (hdr->sub_bucket_count != g.sub_bucket_count) return 0;
645 3 50         if (hdr->sub_bucket_half_count != g.sub_bucket_half_count) return 0;
646 3 50         if (hdr->sub_bucket_mask != g.sub_bucket_mask) return 0;
647 3 50         if (hdr->bucket_count != g.bucket_count) return 0;
648 3 50         if (hdr->counts_len != g.counts_len) return 0;
649              
650 3 50         if (hdr->total_size != file_size) return 0;
651 3 50         if (hdr->total_size != hist_total_size(hdr->counts_len)) return 0;
652 3           HistLayout L = hist_layout();
653 3 50         if (hdr->reader_slots_off != L.reader_slots) return 0;
654 3 50         if (hdr->counts_off != L.counts) return 0;
655 3           return 1;
656             }
657              
658 52           static HistHandle *hist_create(const char *path, int64_t lowest, int64_t highest,
659             int32_t sig_figs, char *errbuf) {
660             HistGeometry g;
661 52 100         if (!hist_validate_create_args(lowest, highest, sig_figs, &g, errbuf)) return NULL;
662              
663 47           uint64_t total = hist_total_size(g.counts_len);
664 47           int anonymous = (path == NULL);
665 47           int fd = -1;
666             size_t map_size;
667             void *base;
668              
669 47 100         if (anonymous) {
670 41           map_size = (size_t)total;
671 41           base = mmap(NULL, map_size, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
672 41 50         if (base == MAP_FAILED) { HIST_ERR("mmap: %s", strerror(errno)); return NULL; }
    0          
673             } else {
674 6           fd = open(path, O_RDWR|O_CREAT, 0666);
675 9 50         if (fd < 0) { HIST_ERR("open: %s", strerror(errno)); return NULL; }
    0          
676 6 50         if (flock(fd, LOCK_EX) < 0) { HIST_ERR("flock: %s", strerror(errno)); close(fd); return NULL; }
    0          
677             struct stat st;
678 6 50         if (fstat(fd, &st) < 0) { HIST_ERR("fstat: %s", strerror(errno)); flock(fd, LOCK_UN); close(fd); return NULL; }
    0          
679 6           int is_new = (st.st_size == 0);
680 6 100         if (!is_new && (uint64_t)st.st_size < sizeof(HistHeader)) {
    100          
681 1 50         HIST_ERR("%s: file too small (%lld)", path, (long long)st.st_size);
682 1           flock(fd, LOCK_UN); close(fd); return NULL;
683             }
684 5 100         if (is_new && ftruncate(fd, (off_t)total) < 0) {
    50          
685 0 0         HIST_ERR("ftruncate: %s", strerror(errno)); flock(fd, LOCK_UN); close(fd); return NULL;
686             }
687 5 100         map_size = is_new ? (size_t)total : (size_t)st.st_size;
688 5           base = mmap(NULL, map_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
689 5 50         if (base == MAP_FAILED) { HIST_ERR("mmap: %s", strerror(errno)); flock(fd, LOCK_UN); close(fd); return NULL; }
    0          
690 5 100         if (!is_new) {
691 2 50         if (!hist_validate_header((HistHeader *)base, (uint64_t)st.st_size)) {
692 0 0         HIST_ERR("invalid histogram file"); munmap(base, map_size); flock(fd, LOCK_UN); close(fd); return NULL;
693             }
694 2           flock(fd, LOCK_UN); close(fd);
695 2           return hist_setup(base, map_size, path, -1);
696             }
697             }
698 44           hist_init_header(base, &g, total);
699 44 100         if (fd >= 0) { flock(fd, LOCK_UN); close(fd); }
700 44           return hist_setup(base, map_size, path, -1);
701             }
702              
703 4           static HistHandle *hist_create_memfd(const char *name, int64_t lowest, int64_t highest,
704             int32_t sig_figs, char *errbuf) {
705             HistGeometry g;
706 4 100         if (!hist_validate_create_args(lowest, highest, sig_figs, &g, errbuf)) return NULL;
707              
708 2           uint64_t total = hist_total_size(g.counts_len);
709 2 100         int fd = memfd_create(name ? name : "hist", MFD_CLOEXEC | MFD_ALLOW_SEALING);
710 2 50         if (fd < 0) { HIST_ERR("memfd_create: %s", strerror(errno)); return NULL; }
    0          
711 2 50         if (ftruncate(fd, (off_t)total) < 0) {
712 0 0         HIST_ERR("ftruncate: %s", strerror(errno)); close(fd); return NULL;
713             }
714 2           (void)fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW);
715 2           void *base = mmap(NULL, (size_t)total, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
716 2 50         if (base == MAP_FAILED) { HIST_ERR("mmap: %s", strerror(errno)); close(fd); return NULL; }
    0          
717 2           hist_init_header(base, &g, total);
718 2           return hist_setup(base, (size_t)total, NULL, fd);
719             }
720              
721 2           static HistHandle *hist_open_fd(int fd, char *errbuf) {
722 2 50         if (errbuf) errbuf[0] = '\0';
723             struct stat st;
724 2 50         if (fstat(fd, &st) < 0) { HIST_ERR("fstat: %s", strerror(errno)); return NULL; }
    0          
725 2 100         if ((uint64_t)st.st_size < sizeof(HistHeader)) { HIST_ERR("too small"); return NULL; }
    50          
726 1           size_t ms = (size_t)st.st_size;
727 1           void *base = mmap(NULL, ms, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
728 1 50         if (base == MAP_FAILED) { HIST_ERR("mmap: %s", strerror(errno)); return NULL; }
    0          
729 1 50         if (!hist_validate_header((HistHeader *)base, (uint64_t)st.st_size)) {
730 0 0         HIST_ERR("invalid histogram table"); munmap(base, ms); return NULL;
731             }
732 1           int myfd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
733 1 50         if (myfd < 0) { HIST_ERR("fcntl: %s", strerror(errno)); munmap(base, ms); return NULL; }
    0          
734 1           return hist_setup(base, ms, NULL, myfd);
735             }
736              
737 49           static void hist_destroy(HistHandle *h) {
738 49 50         if (!h) return;
739 49 100         if (h->backing_fd >= 0) close(h->backing_fd);
740 49 50         if (h->base) munmap(h->base, h->mmap_size);
741 49           free(h->path);
742 49           free(h);
743             }
744              
745 2           static inline int hist_msync(HistHandle *h) {
746 2 50         if (!h || !h->base) return 0;
    50          
747 2           return msync(h->base, h->mmap_size, MS_SYNC);
748             }
749              
750             /* ================================================================
751             * HdrHistogram index helpers (lock-free; pure functions of geometry)
752             *
753             * value | sub_bucket_mask is always >= 1 (sub_bucket_mask >= 1 since
754             * sub_bucket_count >= 2 and unit_magnitude >= 0), so __builtin_clzll is
755             * never called with 0.
756             * ================================================================ */
757              
758 672391           static inline int32_t hist_bucket_index(HistHandle *h, int64_t v) {
759 1344782           return (int32_t)((64 - __builtin_clzll((uint64_t)(v | h->hdr->sub_bucket_mask)))
760 672391           - h->hdr->unit_magnitude - (h->hdr->sub_bucket_half_count_magnitude + 1));
761             }
762              
763 672387           static inline int32_t hist_sub_bucket_index(HistHandle *h, int64_t v, int32_t bi) {
764 672387           return (int32_t)((uint64_t)v >> (bi + h->hdr->unit_magnitude));
765             }
766              
767 658821           static inline int64_t hist_counts_index(HistHandle *h, int32_t bi, int32_t sbi) {
768 658821           return ((int64_t)(bi + 1) << h->hdr->sub_bucket_half_count_magnitude)
769 658821           + (sbi - h->hdr->sub_bucket_half_count);
770             }
771              
772 336178           static inline int64_t hist_counts_index_for(HistHandle *h, int64_t v) {
773 336178           int32_t bi = hist_bucket_index(h, v);
774 336178           int32_t sbi = hist_sub_bucket_index(h, v, bi);
775 336178           return hist_counts_index(h, bi, sbi);
776             }
777              
778             /* reverse: lowest value stored at counts[index] */
779 27132           static inline int64_t hist_value_at_index(HistHandle *h, int64_t index) {
780 27132           int32_t bi = (int32_t)(index >> h->hdr->sub_bucket_half_count_magnitude) - 1;
781 27132           int32_t sbi = (int32_t)(index & (h->hdr->sub_bucket_half_count - 1)) + h->hdr->sub_bucket_half_count;
782 27132 100         if (bi < 0) { sbi -= h->hdr->sub_bucket_half_count; bi = 0; }
783 27132           return (int64_t)sbi << (bi + h->hdr->unit_magnitude);
784             }
785              
786 13566           static inline int64_t hist_size_of_equiv_range(HistHandle *h, int64_t v) {
787 13566           int32_t bi = hist_bucket_index(h, v);
788 13566           int32_t sbi = hist_sub_bucket_index(h, v, bi);
789 13566 50         int32_t adj = (sbi >= h->hdr->sub_bucket_count) ? bi + 1 : bi;
790 13566           return (int64_t)1 << (h->hdr->unit_magnitude + adj);
791             }
792              
793 13566           static inline int64_t hist_lowest_equiv(HistHandle *h, int64_t v) {
794 13566           return hist_value_at_index(h, hist_counts_index_for(h, v));
795             }
796              
797 38           static inline int64_t hist_highest_equiv(HistHandle *h, int64_t v) {
798 38           return hist_lowest_equiv(h, v) + hist_size_of_equiv_range(h, v) - 1;
799             }
800              
801 13528           static inline int64_t hist_median_equiv(HistHandle *h, int64_t v) {
802 13528           return hist_lowest_equiv(h, v) + (hist_size_of_equiv_range(h, v) >> 1);
803             }
804              
805             /* Non-locking index resolver for the XS range-check before taking the lock.
806             * Returns the counts index for v, or -1 if v falls outside the trackable
807             * range (idx < 0 or idx >= counts_len). v must be >= 0. */
808 322647           static inline int64_t hist_index_for(HistHandle *h, int64_t v) {
809 322647           int32_t bi = hist_bucket_index(h, v);
810 322647 50         if (bi < 0 || bi >= h->hdr->bucket_count) return -1;
    100          
811 322643           int32_t sbi = hist_sub_bucket_index(h, v, bi);
812 322643           int64_t idx = hist_counts_index(h, bi, sbi);
813 322643 50         if (idx < 0 || idx >= h->hdr->counts_len) return -1;
    50          
814 322643           return idx;
815             }
816              
817             /* ================================================================
818             * HdrHistogram operations (callers hold the lock)
819             * ================================================================ */
820              
821             /* Record `count` occurrences of `value`. The XS caller has ALREADY range-
822             * checked 0 <= value <= highest and idx < counts_len before locking. */
823 322612           static void hist_record_locked(HistHandle *h, int64_t value, int64_t count) {
824 322612           int64_t idx = hist_counts_index_for(h, value);
825 322612           int64_t *counts = hist_counts(h);
826 322612           counts[idx] += count;
827 322612           h->hdr->total_count += count;
828 322612 100         if (value < h->hdr->min_value) h->hdr->min_value = value;
829 322612 100         if (value > h->hdr->max_value) h->hdr->max_value = value;
830 322612           }
831              
832             /* Highest equivalent value at or below which `p` percent of recorded values
833             * lie. Returns 0 for an empty histogram. */
834 39           static int64_t hist_value_at_percentile_locked(HistHandle *h, double p) {
835 39           int64_t total = h->hdr->total_count;
836 39 100         if (total == 0) return 0;
837 38           int64_t want = (int64_t)ceil((p / 100.0) * (double)total);
838 38 50         if (want < 1) want = 1;
839 38 50         if (want > total) want = total;
840 38           int64_t *counts = hist_counts(h);
841 38           int64_t running = 0;
842 38           int64_t len = h->hdr->counts_len;
843 190360 50         for (int64_t idx = 0; idx < len; idx++) {
844 190360 100         if (!counts[idx]) continue; /* skip empty cells (sparse); a 0 cell can never be the first to reach want */
845 151200           running += counts[idx];
846 151200 100         if (running >= want)
847 38           return hist_highest_equiv(h, hist_value_at_index(h, idx));
848             }
849 0           return 0;
850             }
851              
852             /* Arithmetic mean of all recorded values (using each bucket's median-equivalent
853             * value as the representative). Returns 0.0 for an empty histogram. */
854 5           static double hist_mean_locked(HistHandle *h) {
855 5           int64_t total = h->hdr->total_count;
856 5 50         if (total == 0) return 0.0;
857 5           int64_t *counts = hist_counts(h);
858 5           int64_t len = h->hdr->counts_len;
859 5           double sum = 0.0;
860 56325 100         for (int64_t idx = 0; idx < len; idx++) {
861 56320           int64_t c = counts[idx];
862 56320 100         if (c)
863 13528           sum += (double)c * (double)hist_median_equiv(h, hist_value_at_index(h, idx));
864             }
865 5           return sum / (double)total;
866             }
867              
868             /* merge src counts into dst (caller guarantees equal geometry); cellwise add,
869             * saturating at INT64_MAX on overflow (caller holds dst's write lock) */
870 4           static void hist_merge_counts(int64_t *dst, const int64_t *src, int64_t counts_len) {
871 45060 100         for (int64_t i = 0; i < counts_len; i++) {
872 45056 100         if (src[i] <= 0) continue; /* counts are non-negative; skip empty cells */
873 1079 50         if (dst[i] > INT64_MAX - src[i]) dst[i] = INT64_MAX; /* saturate */
874 1079           else dst[i] += src[i];
875             }
876 4           }
877              
878             /* reset all counts to 0; reset total/min/max (caller holds the write lock) */
879 1           static inline void hist_reset_locked(HistHandle *h) {
880 1           memset(hist_counts(h), 0, (size_t)((uint64_t)h->hdr->counts_len * sizeof(int64_t)));
881 1           h->hdr->total_count = 0;
882 1           h->hdr->min_value = INT64_MAX;
883 1           h->hdr->max_value = 0;
884 1           }
885              
886             #endif /* HIST_H */