xref: /freebsd-src/sys/contrib/ck/include/ck_ec.h (revision 74e9b5f29ad0056bbe11a30c91dfa0705fa19cd5)
1*74e9b5f2SOlivier Houchard /*
2*74e9b5f2SOlivier Houchard  * Copyright 2018 Paul Khuong, Google LLC.
3*74e9b5f2SOlivier Houchard  * All rights reserved.
4*74e9b5f2SOlivier Houchard  *
5*74e9b5f2SOlivier Houchard  * Redistribution and use in source and binary forms, with or without
6*74e9b5f2SOlivier Houchard  * modification, are permitted provided that the following conditions
7*74e9b5f2SOlivier Houchard  * are met:
8*74e9b5f2SOlivier Houchard  * 1. Redistributions of source code must retain the above copyright
9*74e9b5f2SOlivier Houchard  *    notice, this list of conditions and the following disclaimer.
10*74e9b5f2SOlivier Houchard  * 2. Redistributions in binary form must reproduce the above copyright
11*74e9b5f2SOlivier Houchard  *    notice, this list of conditions and the following disclaimer in the
12*74e9b5f2SOlivier Houchard  *    documentation and/or other materials provided with the distribution.
13*74e9b5f2SOlivier Houchard  *
14*74e9b5f2SOlivier Houchard  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15*74e9b5f2SOlivier Houchard  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16*74e9b5f2SOlivier Houchard  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17*74e9b5f2SOlivier Houchard  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18*74e9b5f2SOlivier Houchard  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19*74e9b5f2SOlivier Houchard  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20*74e9b5f2SOlivier Houchard  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21*74e9b5f2SOlivier Houchard  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22*74e9b5f2SOlivier Houchard  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23*74e9b5f2SOlivier Houchard  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24*74e9b5f2SOlivier Houchard  * SUCH DAMAGE.
25*74e9b5f2SOlivier Houchard  */
26*74e9b5f2SOlivier Houchard 
27*74e9b5f2SOlivier Houchard /*
28*74e9b5f2SOlivier Houchard  * Overview
29*74e9b5f2SOlivier Houchard  * ========
30*74e9b5f2SOlivier Houchard  *
31*74e9b5f2SOlivier Houchard  * ck_ec implements 32- and 64- bit event counts. Event counts let us
32*74e9b5f2SOlivier Houchard  * easily integrate OS-level blocking (e.g., futexes) in lock-free
33*74e9b5f2SOlivier Houchard  * protocols. Waiters block conditionally, if the event count's value
34*74e9b5f2SOlivier Houchard  * is still equal to some old value.
35*74e9b5f2SOlivier Houchard  *
36*74e9b5f2SOlivier Houchard  * Event counts come in four variants: 32 and 64 bit (with one bit
37*74e9b5f2SOlivier Houchard  * stolen for internal signaling, so 31 and 63 bit counters), and
38*74e9b5f2SOlivier Houchard  * single or multiple producers (wakers). Waiters are always multiple
39*74e9b5f2SOlivier Houchard  * consumers. The 32 bit variants are smaller, and more efficient,
40*74e9b5f2SOlivier Houchard  * especially in single producer mode. The 64 bit variants are larger,
41*74e9b5f2SOlivier Houchard  * but practically invulnerable to ABA.
42*74e9b5f2SOlivier Houchard  *
43*74e9b5f2SOlivier Houchard  * The 32 bit variant is always available. The 64 bit variant is only
44*74e9b5f2SOlivier Houchard  * available if CK supports 64-bit atomic operations. Currently,
45*74e9b5f2SOlivier Houchard  * specialization for single producer is only implemented for x86 and
46*74e9b5f2SOlivier Houchard  * x86-64, on compilers that support GCC extended inline assembly;
47*74e9b5f2SOlivier Houchard  * other platforms fall back to the multiple producer code path.
48*74e9b5f2SOlivier Houchard  *
49*74e9b5f2SOlivier Houchard  * A typical usage pattern is:
50*74e9b5f2SOlivier Houchard  *
51*74e9b5f2SOlivier Houchard  *  1. On the producer side:
52*74e9b5f2SOlivier Houchard  *
53*74e9b5f2SOlivier Houchard  *    - Make changes to some shared data structure, without involving
54*74e9b5f2SOlivier Houchard  *	the event count at all.
55*74e9b5f2SOlivier Houchard  *    - After each change, call ck_ec_inc on the event count. The call
56*74e9b5f2SOlivier Houchard  *	acts as a write-write barrier, and wakes up any consumer blocked
57*74e9b5f2SOlivier Houchard  *	on the event count (waiting for new changes).
58*74e9b5f2SOlivier Houchard  *
59*74e9b5f2SOlivier Houchard  *  2. On the consumer side:
60*74e9b5f2SOlivier Houchard  *
61*74e9b5f2SOlivier Houchard  *    - Snapshot ck_ec_value of the event count. The call acts as a
62*74e9b5f2SOlivier Houchard  *	read barrier.
63*74e9b5f2SOlivier Houchard  *    - Read and process the shared data structure.
64*74e9b5f2SOlivier Houchard  *    - Wait for new changes by calling ck_ec_wait with the snapshot value.
65*74e9b5f2SOlivier Houchard  *
66*74e9b5f2SOlivier Houchard  * Some data structures may opt for tighter integration with their
67*74e9b5f2SOlivier Houchard  * event count. For example, an SPMC ring buffer or disruptor might
68*74e9b5f2SOlivier Houchard  * use the event count's value as the write pointer. If the buffer is
69*74e9b5f2SOlivier Houchard  * regularly full, it might also make sense to store the read pointer
70*74e9b5f2SOlivier Houchard  * in an MP event count.
71*74e9b5f2SOlivier Houchard  *
72*74e9b5f2SOlivier Houchard  * This event count implementation supports tighter integration in two
73*74e9b5f2SOlivier Houchard  * ways.
74*74e9b5f2SOlivier Houchard  *
75*74e9b5f2SOlivier Houchard  * Producers may opt to increment by an arbitrary value (less than
76*74e9b5f2SOlivier Houchard  * INT32_MAX / INT64_MAX), in order to encode, e.g., byte
77*74e9b5f2SOlivier Houchard  * offsets. Larger increment values make wraparound more likely, so
78*74e9b5f2SOlivier Houchard  * the increments should still be relatively small.
79*74e9b5f2SOlivier Houchard  *
80*74e9b5f2SOlivier Houchard  * Consumers may pass a predicate to ck_ec_wait_pred. This predicate
81*74e9b5f2SOlivier Houchard  * can make `ck_ec_wait_pred` return early, before the event count's
82*74e9b5f2SOlivier Houchard  * value changes, and can override the deadline passed to futex_wait.
83*74e9b5f2SOlivier Houchard  * This lets consumer block on one eventcount, while optimistically
84*74e9b5f2SOlivier Houchard  * looking at other waking conditions.
85*74e9b5f2SOlivier Houchard  *
86*74e9b5f2SOlivier Houchard  * API Reference
87*74e9b5f2SOlivier Houchard  * =============
88*74e9b5f2SOlivier Houchard  *
89*74e9b5f2SOlivier Houchard  * When compiled as C11 or later, this header defines type-generic
90*74e9b5f2SOlivier Houchard  * macros for ck_ec32 and ck_ec64; the reference describes this
91*74e9b5f2SOlivier Houchard  * type-generic API.
92*74e9b5f2SOlivier Houchard  *
93*74e9b5f2SOlivier Houchard  * ck_ec needs additional OS primitives to determine the current time,
94*74e9b5f2SOlivier Houchard  * to wait on an address, and to wake all threads waiting on a given
95*74e9b5f2SOlivier Houchard  * address. These are defined with fields in a struct ck_ec_ops.  Each
96*74e9b5f2SOlivier Houchard  * ck_ec_ops may additionally define the number of spin loop
97*74e9b5f2SOlivier Houchard  * iterations in the slow path, as well as the initial wait time in
98*74e9b5f2SOlivier Houchard  * the internal exponential backoff, the exponential scale factor, and
99*74e9b5f2SOlivier Houchard  * the right shift count (< 32).
100*74e9b5f2SOlivier Houchard  *
101*74e9b5f2SOlivier Houchard  * The ops, in addition to the single/multiple producer flag, are
102*74e9b5f2SOlivier Houchard  * encapsulated in a struct ck_ec_mode, passed to most ck_ec
103*74e9b5f2SOlivier Houchard  * operations.
104*74e9b5f2SOlivier Houchard  *
105*74e9b5f2SOlivier Houchard  * ec is a struct ck_ec32 *, or a struct ck_ec64 *.
106*74e9b5f2SOlivier Houchard  *
107*74e9b5f2SOlivier Houchard  * value is an uint32_t for ck_ec32, and an uint64_t for ck_ec64. It
108*74e9b5f2SOlivier Houchard  * never exceeds INT32_MAX and INT64_MAX respectively.
109*74e9b5f2SOlivier Houchard  *
110*74e9b5f2SOlivier Houchard  * mode is a struct ck_ec_mode *.
111*74e9b5f2SOlivier Houchard  *
112*74e9b5f2SOlivier Houchard  * deadline is either NULL, or a `const struct timespec *` that will
113*74e9b5f2SOlivier Houchard  * be treated as an absolute deadline.
114*74e9b5f2SOlivier Houchard  *
115*74e9b5f2SOlivier Houchard  * `void ck_ec_init(ec, value)`: initializes the event count to value.
116*74e9b5f2SOlivier Houchard  *
117*74e9b5f2SOlivier Houchard  * `value ck_ec_value(ec)`: returns the current value of the event
118*74e9b5f2SOlivier Houchard  *  counter.  This read acts as a read (acquire) barrier.
119*74e9b5f2SOlivier Houchard  *
120*74e9b5f2SOlivier Houchard  * `bool ck_ec_has_waiters(ec)`: returns whether some thread has
121*74e9b5f2SOlivier Houchard  *  marked the event count as requiring an OS wakeup.
122*74e9b5f2SOlivier Houchard  *
123*74e9b5f2SOlivier Houchard  * `void ck_ec_inc(ec, mode)`: increments the value of the event
124*74e9b5f2SOlivier Houchard  *  counter by one. This writes acts as a write barrier. Wakes up
125*74e9b5f2SOlivier Houchard  *  any waiting thread.
126*74e9b5f2SOlivier Houchard  *
127*74e9b5f2SOlivier Houchard  * `value ck_ec_add(ec, mode, value)`: increments the event counter by
128*74e9b5f2SOlivier Houchard  *  `value`, and returns the event counter's previous value. This
129*74e9b5f2SOlivier Houchard  *  write acts as a write barrier. Wakes up any waiting thread.
130*74e9b5f2SOlivier Houchard  *
131*74e9b5f2SOlivier Houchard  * `int ck_ec_deadline(struct timespec *new_deadline,
132*74e9b5f2SOlivier Houchard  *		       mode,
133*74e9b5f2SOlivier Houchard  *		       const struct timespec *timeout)`:
134*74e9b5f2SOlivier Houchard  *  computes a deadline `timeout` away from the current time. If
135*74e9b5f2SOlivier Houchard  *  timeout is NULL, computes a deadline in the infinite future. The
136*74e9b5f2SOlivier Houchard  *  resulting deadline is written to `new_deadline`. Returns 0 on
137*74e9b5f2SOlivier Houchard  *  success, and -1 if ops->gettime failed (without touching errno).
138*74e9b5f2SOlivier Houchard  *
139*74e9b5f2SOlivier Houchard  * `int ck_ec_wait(ec, mode, value, deadline)`: waits until the event
140*74e9b5f2SOlivier Houchard  *  counter's value differs from `value`, or, if `deadline` is
141*74e9b5f2SOlivier Houchard  *  provided and non-NULL, until the current time is after that
142*74e9b5f2SOlivier Houchard  *  deadline. Use a deadline with tv_sec = 0 for a non-blocking
143*74e9b5f2SOlivier Houchard  *  execution. Returns 0 if the event counter has changed, and -1 on
144*74e9b5f2SOlivier Houchard  *  timeout. This function acts as a read (acquire) barrier.
145*74e9b5f2SOlivier Houchard  *
146*74e9b5f2SOlivier Houchard  * `int ck_ec_wait_pred(ec, mode, value, pred, data, deadline)`: waits
147*74e9b5f2SOlivier Houchard  * until the event counter's value differs from `value`, or until
148*74e9b5f2SOlivier Houchard  * `pred` returns non-zero, or, if `deadline` is provided and
149*74e9b5f2SOlivier Houchard  * non-NULL, until the current time is after that deadline. Use a
150*74e9b5f2SOlivier Houchard  * deadline with tv_sec = 0 for a non-blocking execution. Returns 0 if
151*74e9b5f2SOlivier Houchard  * the event counter has changed, `pred`'s return value if non-zero,
152*74e9b5f2SOlivier Houchard  * and -1 on timeout. This function acts as a read (acquire) barrier.
153*74e9b5f2SOlivier Houchard  *
154*74e9b5f2SOlivier Houchard  * `pred` is always called as `pred(data, iteration_deadline, now)`,
155*74e9b5f2SOlivier Houchard  * where `iteration_deadline` is a timespec of the deadline for this
156*74e9b5f2SOlivier Houchard  * exponential backoff iteration, and `now` is the current time. If
157*74e9b5f2SOlivier Houchard  * `pred` returns a non-zero value, that value is immediately returned
158*74e9b5f2SOlivier Houchard  * to the waiter. Otherwise, `pred` is free to modify
159*74e9b5f2SOlivier Houchard  * `iteration_deadline` (moving it further in the future is a bad
160*74e9b5f2SOlivier Houchard  * idea).
161*74e9b5f2SOlivier Houchard  *
162*74e9b5f2SOlivier Houchard  * Implementation notes
163*74e9b5f2SOlivier Houchard  * ====================
164*74e9b5f2SOlivier Houchard  *
165*74e9b5f2SOlivier Houchard  * The multiple producer implementation is a regular locked event
166*74e9b5f2SOlivier Houchard  * count, with a single flag bit to denote the need to wake up waiting
167*74e9b5f2SOlivier Houchard  * threads.
168*74e9b5f2SOlivier Houchard  *
169*74e9b5f2SOlivier Houchard  * The single producer specialization is heavily tied to
170*74e9b5f2SOlivier Houchard  * [x86-TSO](https://www.cl.cam.ac.uk/~pes20/weakmemory/cacm.pdf), and
171*74e9b5f2SOlivier Houchard  * to non-atomic read-modify-write instructions (e.g., `inc mem`);
172*74e9b5f2SOlivier Houchard  * these non-atomic RMW let us write to the same memory locations with
173*74e9b5f2SOlivier Houchard  * atomic and non-atomic instructions, without suffering from process
174*74e9b5f2SOlivier Houchard  * scheduling stalls.
175*74e9b5f2SOlivier Houchard  *
176*74e9b5f2SOlivier Houchard  * The reason we can mix atomic and non-atomic writes to the `counter`
177*74e9b5f2SOlivier Houchard  * word is that every non-atomic write obviates the need for the
178*74e9b5f2SOlivier Houchard  * atomically flipped flag bit: we only use non-atomic writes to
179*74e9b5f2SOlivier Houchard  * update the event count, and the atomic flag only informs the
180*74e9b5f2SOlivier Houchard  * producer that we would like a futex_wake, because of the update.
181*74e9b5f2SOlivier Houchard  * We only require the non-atomic RMW counter update to prevent
182*74e9b5f2SOlivier Houchard  * preemption from introducing arbitrarily long worst case delays.
183*74e9b5f2SOlivier Houchard  *
184*74e9b5f2SOlivier Houchard  * Correctness does not rely on the usual ordering argument: in the
185*74e9b5f2SOlivier Houchard  * absence of fences, there is no strict ordering between atomic and
186*74e9b5f2SOlivier Houchard  * non-atomic writes. The key is instead x86-TSO's guarantee that a
187*74e9b5f2SOlivier Houchard  * read is satisfied from the most recent buffered write in the local
188*74e9b5f2SOlivier Houchard  * store queue if there is one, or from memory if there is no write to
189*74e9b5f2SOlivier Houchard  * that address in the store queue.
190*74e9b5f2SOlivier Houchard  *
191*74e9b5f2SOlivier Houchard  * x86-TSO's constraint on reads suffices to guarantee that the
192*74e9b5f2SOlivier Houchard  * producer will never forget about a counter update. If the last
193*74e9b5f2SOlivier Houchard  * update is still queued, the new update will be based on the queued
194*74e9b5f2SOlivier Houchard  * value. Otherwise, the new update will be based on the value in
195*74e9b5f2SOlivier Houchard  * memory, which may or may not have had its flag flipped. In either
196*74e9b5f2SOlivier Houchard  * case, the value of the counter (modulo flag) is correct.
197*74e9b5f2SOlivier Houchard  *
198*74e9b5f2SOlivier Houchard  * When the producer forwards the counter's value from its store
199*74e9b5f2SOlivier Houchard  * queue, the new update might not preserve a flag flip. Any waiter
200*74e9b5f2SOlivier Houchard  * thus has to check from time to time to determine if it wasn't
201*74e9b5f2SOlivier Houchard  * woken up because the flag bit was silently cleared.
202*74e9b5f2SOlivier Houchard  *
203*74e9b5f2SOlivier Houchard  * In reality, the store queue in x86-TSO stands for in-flight
204*74e9b5f2SOlivier Houchard  * instructions in the chip's out-of-order backend. In the vast
205*74e9b5f2SOlivier Houchard  * majority of cases, instructions will only remain in flight for a
206*74e9b5f2SOlivier Houchard  * few hundred or thousand of cycles. That's why ck_ec_wait spins on
207*74e9b5f2SOlivier Houchard  * the `counter` word for ~100 iterations after flipping its flag bit:
208*74e9b5f2SOlivier Houchard  * if the counter hasn't changed after that many iterations, it is
209*74e9b5f2SOlivier Houchard  * very likely that the producer's next counter update will observe
210*74e9b5f2SOlivier Houchard  * the flag flip.
211*74e9b5f2SOlivier Houchard  *
212*74e9b5f2SOlivier Houchard  * That's still not a hard guarantee of correctness. Conservatively,
213*74e9b5f2SOlivier Houchard  * we can expect that no instruction will remain in flight for more
214*74e9b5f2SOlivier Houchard  * than 1 second... if only because some interrupt will have forced
215*74e9b5f2SOlivier Houchard  * the chip to store its architectural state in memory, at which point
216*74e9b5f2SOlivier Houchard  * an instruction is either fully retired or rolled back. Interrupts,
217*74e9b5f2SOlivier Houchard  * particularly the pre-emption timer, are why single-producer updates
218*74e9b5f2SOlivier Houchard  * must happen in a single non-atomic read-modify-write instruction.
219*74e9b5f2SOlivier Houchard  * Having a single instruction as the critical section means we only
220*74e9b5f2SOlivier Houchard  * have to consider the worst-case execution time for that
221*74e9b5f2SOlivier Houchard  * instruction. That's easier than doing the same for a pair of
222*74e9b5f2SOlivier Houchard  * instructions, which an unlucky pre-emption could delay for
223*74e9b5f2SOlivier Houchard  * arbitrarily long.
224*74e9b5f2SOlivier Houchard  *
225*74e9b5f2SOlivier Houchard  * Thus, after a short spin loop, ck_ec_wait enters an exponential
226*74e9b5f2SOlivier Houchard  * backoff loop, where each "sleep" is instead a futex_wait.  The
227*74e9b5f2SOlivier Houchard  * backoff is only necessary to handle rare cases where the flag flip
228*74e9b5f2SOlivier Houchard  * was overwritten after the spin loop. Eventually, more than one
229*74e9b5f2SOlivier Houchard  * second will have elapsed since the flag flip, and the sleep timeout
230*74e9b5f2SOlivier Houchard  * becomes infinite: since the flag bit has been set for much longer
231*74e9b5f2SOlivier Houchard  * than the time for which an instruction may remain in flight, the
232*74e9b5f2SOlivier Houchard  * flag will definitely be observed at the next counter update.
233*74e9b5f2SOlivier Houchard  *
234*74e9b5f2SOlivier Houchard  * The 64 bit ck_ec_wait pulls another trick: futexes only handle 32
235*74e9b5f2SOlivier Houchard  * bit ints, so we must treat the 64 bit counter's low 32 bits as an
236*74e9b5f2SOlivier Houchard  * int in futex_wait. That's a bit dodgy, but fine in practice, given
237*74e9b5f2SOlivier Houchard  * that the OS's futex code will always read whatever value is
238*74e9b5f2SOlivier Houchard  * currently in memory: even if the producer thread were to wait on
239*74e9b5f2SOlivier Houchard  * its own event count, the syscall and ring transition would empty
240*74e9b5f2SOlivier Houchard  * the store queue (the out-of-order execution backend).
241*74e9b5f2SOlivier Houchard  *
242*74e9b5f2SOlivier Houchard  * Finally, what happens when the producer is migrated to another core
243*74e9b5f2SOlivier Houchard  * or otherwise pre-empted? Migration must already incur a barrier, so
244*74e9b5f2SOlivier Houchard  * that thread always sees its own writes, so that's safe. As for
245*74e9b5f2SOlivier Houchard  * pre-emption, that requires storing the architectural state, which
246*74e9b5f2SOlivier Houchard  * means every instruction must either be executed fully or not at
247*74e9b5f2SOlivier Houchard  * all when pre-emption happens.
248*74e9b5f2SOlivier Houchard  */
249*74e9b5f2SOlivier Houchard 
250*74e9b5f2SOlivier Houchard #ifndef CK_EC_H
251*74e9b5f2SOlivier Houchard #define CK_EC_H
252*74e9b5f2SOlivier Houchard #include <ck_cc.h>
253*74e9b5f2SOlivier Houchard #include <ck_pr.h>
254*74e9b5f2SOlivier Houchard #include <ck_stdbool.h>
255*74e9b5f2SOlivier Houchard #include <ck_stdint.h>
256*74e9b5f2SOlivier Houchard #include <ck_stddef.h>
257*74e9b5f2SOlivier Houchard #include <sys/time.h>
258*74e9b5f2SOlivier Houchard 
259*74e9b5f2SOlivier Houchard /*
260*74e9b5f2SOlivier Houchard  * If we have ck_pr_faa_64 (and, presumably, ck_pr_load_64), we
261*74e9b5f2SOlivier Houchard  * support 63 bit counters.
262*74e9b5f2SOlivier Houchard  */
263*74e9b5f2SOlivier Houchard #ifdef CK_F_PR_FAA_64
264*74e9b5f2SOlivier Houchard #define CK_F_EC64
265*74e9b5f2SOlivier Houchard #endif /* CK_F_PR_FAA_64 */
266*74e9b5f2SOlivier Houchard 
267*74e9b5f2SOlivier Houchard /*
268*74e9b5f2SOlivier Houchard  * GCC inline assembly lets us exploit non-atomic read-modify-write
269*74e9b5f2SOlivier Houchard  * instructions on x86/x86_64 for a fast single-producer mode.
270*74e9b5f2SOlivier Houchard  *
271*74e9b5f2SOlivier Houchard  * If we CK_F_EC_SP is not defined, CK_EC always uses the slower
272*74e9b5f2SOlivier Houchard  * multiple producer code.
273*74e9b5f2SOlivier Houchard  */
274*74e9b5f2SOlivier Houchard #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
275*74e9b5f2SOlivier Houchard #define CK_F_EC_SP
276*74e9b5f2SOlivier Houchard #endif /* GNUC && (__i386__ || __x86_64__) */
277*74e9b5f2SOlivier Houchard 
278*74e9b5f2SOlivier Houchard struct ck_ec_ops;
279*74e9b5f2SOlivier Houchard 
280*74e9b5f2SOlivier Houchard struct ck_ec_wait_state {
281*74e9b5f2SOlivier Houchard 	struct timespec start;	/* Time when we entered ck_ec_wait. */
282*74e9b5f2SOlivier Houchard 	struct timespec now;  /* Time now. */
283*74e9b5f2SOlivier Houchard 	const struct ck_ec_ops *ops;
284*74e9b5f2SOlivier Houchard 	void *data;  /* Opaque pointer for the predicate's internal state. */
285*74e9b5f2SOlivier Houchard 
286*74e9b5f2SOlivier Houchard };
287*74e9b5f2SOlivier Houchard 
288*74e9b5f2SOlivier Houchard /*
289*74e9b5f2SOlivier Houchard  * ck_ec_ops define system-specific functions to get the current time,
290*74e9b5f2SOlivier Houchard  * atomically wait on an address if it still has some expected value,
291*74e9b5f2SOlivier Houchard  * and to wake all threads waiting on an address.
292*74e9b5f2SOlivier Houchard  *
293*74e9b5f2SOlivier Houchard  * Each platform is expected to have few (one) opaque pointer to a
294*74e9b5f2SOlivier Houchard  * const ops struct, and reuse it for all ck_ec_mode structs.
295*74e9b5f2SOlivier Houchard  */
296*74e9b5f2SOlivier Houchard struct ck_ec_ops {
297*74e9b5f2SOlivier Houchard 	/* Populates out with the current time. Returns non-zero on failure. */
298*74e9b5f2SOlivier Houchard 	int (*gettime)(const struct ck_ec_ops *, struct timespec *out);
299*74e9b5f2SOlivier Houchard 
300*74e9b5f2SOlivier Houchard 	/*
301*74e9b5f2SOlivier Houchard 	 * Waits on address if its value is still `expected`.  If
302*74e9b5f2SOlivier Houchard 	 * deadline is non-NULL, stops waiting once that deadline is
303*74e9b5f2SOlivier Houchard 	 * reached. May return early for any reason.
304*74e9b5f2SOlivier Houchard 	 */
305*74e9b5f2SOlivier Houchard 	void (*wait32)(const struct ck_ec_wait_state *, const uint32_t *,
306*74e9b5f2SOlivier Houchard 		       uint32_t expected, const struct timespec *deadline);
307*74e9b5f2SOlivier Houchard 
308*74e9b5f2SOlivier Houchard 	/*
309*74e9b5f2SOlivier Houchard 	 * Same as wait32, but for a 64 bit counter. Only used if
310*74e9b5f2SOlivier Houchard 	 * CK_F_EC64 is defined.
311*74e9b5f2SOlivier Houchard 	 *
312*74e9b5f2SOlivier Houchard 	 * If underlying blocking primitive only supports 32 bit
313*74e9b5f2SOlivier Houchard 	 * control words, it should be safe to block on the least
314*74e9b5f2SOlivier Houchard 	 * significant half of the 64 bit address.
315*74e9b5f2SOlivier Houchard 	 */
316*74e9b5f2SOlivier Houchard 	void (*wait64)(const struct ck_ec_wait_state *, const uint64_t *,
317*74e9b5f2SOlivier Houchard 		       uint64_t expected, const struct timespec *deadline);
318*74e9b5f2SOlivier Houchard 
319*74e9b5f2SOlivier Houchard 	/* Wakes up all threads waiting on address. */
320*74e9b5f2SOlivier Houchard 	void (*wake32)(const struct ck_ec_ops *, const uint32_t *address);
321*74e9b5f2SOlivier Houchard 
322*74e9b5f2SOlivier Houchard 	/*
323*74e9b5f2SOlivier Houchard 	 * Same as wake32, but for a 64 bit counter. Only used if
324*74e9b5f2SOlivier Houchard 	 * CK_F_EC64 is defined.
325*74e9b5f2SOlivier Houchard 	 *
326*74e9b5f2SOlivier Houchard 	 * When wait64 truncates the control word at address to `only`
327*74e9b5f2SOlivier Houchard 	 * consider its least significant half, wake64 should perform
328*74e9b5f2SOlivier Houchard 	 * any necessary fixup (e.g., on big endian platforms).
329*74e9b5f2SOlivier Houchard 	 */
330*74e9b5f2SOlivier Houchard 	void (*wake64)(const struct ck_ec_ops *, const uint64_t *address);
331*74e9b5f2SOlivier Houchard 
332*74e9b5f2SOlivier Houchard 	/*
333*74e9b5f2SOlivier Houchard 	 * Number of iterations for the initial busy wait. 0 defaults
334*74e9b5f2SOlivier Houchard 	 * to 100 (not ABI stable).
335*74e9b5f2SOlivier Houchard 	 */
336*74e9b5f2SOlivier Houchard 	uint32_t busy_loop_iter;
337*74e9b5f2SOlivier Houchard 
338*74e9b5f2SOlivier Houchard 	/*
339*74e9b5f2SOlivier Houchard 	 * Delay in nanoseconds for the first iteration of the
340*74e9b5f2SOlivier Houchard 	 * exponential backoff. 0 defaults to 2 ms (not ABI stable).
341*74e9b5f2SOlivier Houchard 	 */
342*74e9b5f2SOlivier Houchard 	uint32_t initial_wait_ns;
343*74e9b5f2SOlivier Houchard 
344*74e9b5f2SOlivier Houchard 	/*
345*74e9b5f2SOlivier Houchard 	 * Scale factor for the exponential backoff. 0 defaults to 8x
346*74e9b5f2SOlivier Houchard 	 * (not ABI stable).
347*74e9b5f2SOlivier Houchard 	 */
348*74e9b5f2SOlivier Houchard 	uint32_t wait_scale_factor;
349*74e9b5f2SOlivier Houchard 
350*74e9b5f2SOlivier Houchard 	/*
351*74e9b5f2SOlivier Houchard 	 * Right shift count for the exponential backoff. The update
352*74e9b5f2SOlivier Houchard 	 * after each iteration is
353*74e9b5f2SOlivier Houchard 	 *     wait_ns = (wait_ns * wait_scale_factor) >> wait_shift_count,
354*74e9b5f2SOlivier Houchard 	 * until one second has elapsed. After that, the deadline goes
355*74e9b5f2SOlivier Houchard 	 * to infinity.
356*74e9b5f2SOlivier Houchard 	 */
357*74e9b5f2SOlivier Houchard 	uint32_t wait_shift_count;
358*74e9b5f2SOlivier Houchard };
359*74e9b5f2SOlivier Houchard 
360*74e9b5f2SOlivier Houchard /*
361*74e9b5f2SOlivier Houchard  * ck_ec_mode wraps the ops table, and informs the fast path whether
362*74e9b5f2SOlivier Houchard  * it should attempt to specialize for single producer mode.
363*74e9b5f2SOlivier Houchard  *
364*74e9b5f2SOlivier Houchard  * mode structs are expected to be exposed by value, e.g.,
365*74e9b5f2SOlivier Houchard  *
366*74e9b5f2SOlivier Houchard  *    extern const struct ck_ec_ops system_ec_ops;
367*74e9b5f2SOlivier Houchard  *
368*74e9b5f2SOlivier Houchard  *    static const struct ck_ec_mode ec_sp = {
369*74e9b5f2SOlivier Houchard  *	  .ops = &system_ec_ops,
370*74e9b5f2SOlivier Houchard  *	  .single_producer = true
371*74e9b5f2SOlivier Houchard  *    };
372*74e9b5f2SOlivier Houchard  *
373*74e9b5f2SOlivier Houchard  *    static const struct ck_ec_mode ec_mp = {
374*74e9b5f2SOlivier Houchard  *	  .ops = &system_ec_ops,
375*74e9b5f2SOlivier Houchard  *	  .single_producer = false
376*74e9b5f2SOlivier Houchard  *    };
377*74e9b5f2SOlivier Houchard  *
378*74e9b5f2SOlivier Houchard  * ck_ec_mode structs are only passed to inline functions defined in
379*74e9b5f2SOlivier Houchard  * this header, and never escape to their slow paths, so they should
380*74e9b5f2SOlivier Houchard  * not result in any object file size increase.
381*74e9b5f2SOlivier Houchard  */
382*74e9b5f2SOlivier Houchard struct ck_ec_mode {
383*74e9b5f2SOlivier Houchard 	const struct ck_ec_ops *ops;
384*74e9b5f2SOlivier Houchard 	/*
385*74e9b5f2SOlivier Houchard 	 * If single_producer is true, the event count has a unique
386*74e9b5f2SOlivier Houchard 	 * incrementer. The implementation will specialize ck_ec_inc
387*74e9b5f2SOlivier Houchard 	 * and ck_ec_add if possible (if CK_F_EC_SP is defined).
388*74e9b5f2SOlivier Houchard 	 */
389*74e9b5f2SOlivier Houchard 	bool single_producer;
390*74e9b5f2SOlivier Houchard };
391*74e9b5f2SOlivier Houchard 
392*74e9b5f2SOlivier Houchard struct ck_ec32 {
393*74e9b5f2SOlivier Houchard 	/* Flag is "sign" bit, value in bits 0:30. */
394*74e9b5f2SOlivier Houchard 	uint32_t counter;
395*74e9b5f2SOlivier Houchard };
396*74e9b5f2SOlivier Houchard 
397*74e9b5f2SOlivier Houchard typedef struct ck_ec32 ck_ec32_t;
398*74e9b5f2SOlivier Houchard 
399*74e9b5f2SOlivier Houchard #ifdef CK_F_EC64
400*74e9b5f2SOlivier Houchard struct ck_ec64 {
401*74e9b5f2SOlivier Houchard 	/*
402*74e9b5f2SOlivier Houchard 	 * Flag is bottom bit, value in bits 1:63. Eventcount only
403*74e9b5f2SOlivier Houchard 	 * works on x86-64 (i.e., little endian), so the futex int
404*74e9b5f2SOlivier Houchard 	 * lies in the first 4 (bottom) bytes.
405*74e9b5f2SOlivier Houchard 	 */
406*74e9b5f2SOlivier Houchard 	uint64_t counter;
407*74e9b5f2SOlivier Houchard };
408*74e9b5f2SOlivier Houchard 
409*74e9b5f2SOlivier Houchard typedef struct ck_ec64 ck_ec64_t;
410*74e9b5f2SOlivier Houchard #endif /* CK_F_EC64 */
411*74e9b5f2SOlivier Houchard 
412*74e9b5f2SOlivier Houchard #define CK_EC_INITIALIZER { .counter = 0 }
413*74e9b5f2SOlivier Houchard 
414*74e9b5f2SOlivier Houchard /*
415*74e9b5f2SOlivier Houchard  * Initializes the event count to `value`. The value must not
416*74e9b5f2SOlivier Houchard  * exceed INT32_MAX.
417*74e9b5f2SOlivier Houchard  */
418*74e9b5f2SOlivier Houchard static void ck_ec32_init(struct ck_ec32 *ec, uint32_t value);
419*74e9b5f2SOlivier Houchard 
420*74e9b5f2SOlivier Houchard #ifndef CK_F_EC64
421*74e9b5f2SOlivier Houchard #define ck_ec_init ck_ec32_init
422*74e9b5f2SOlivier Houchard #else
423*74e9b5f2SOlivier Houchard /*
424*74e9b5f2SOlivier Houchard  * Initializes the event count to `value`. The value must not
425*74e9b5f2SOlivier Houchard  * exceed INT64_MAX.
426*74e9b5f2SOlivier Houchard  */
427*74e9b5f2SOlivier Houchard static void ck_ec64_init(struct ck_ec64 *ec, uint64_t value);
428*74e9b5f2SOlivier Houchard 
429*74e9b5f2SOlivier Houchard #if __STDC_VERSION__ >= 201112L
430*74e9b5f2SOlivier Houchard #define ck_ec_init(EC, VALUE)				\
431*74e9b5f2SOlivier Houchard 	(_Generic(*(EC),				\
432*74e9b5f2SOlivier Houchard 		  struct ck_ec32 : ck_ec32_init,	\
433*74e9b5f2SOlivier Houchard 		  struct ck_ec64 : ck_ec64_init)((EC), (VALUE)))
434*74e9b5f2SOlivier Houchard #endif /* __STDC_VERSION__ */
435*74e9b5f2SOlivier Houchard #endif /* CK_F_EC64 */
436*74e9b5f2SOlivier Houchard 
437*74e9b5f2SOlivier Houchard /*
438*74e9b5f2SOlivier Houchard  * Returns the counter value in the event count. The value is at most
439*74e9b5f2SOlivier Houchard  * INT32_MAX.
440*74e9b5f2SOlivier Houchard  */
441*74e9b5f2SOlivier Houchard static uint32_t ck_ec32_value(const struct ck_ec32* ec);
442*74e9b5f2SOlivier Houchard 
443*74e9b5f2SOlivier Houchard #ifndef CK_F_EC64
444*74e9b5f2SOlivier Houchard #define ck_ec_value ck_ec32_value
445*74e9b5f2SOlivier Houchard #else
446*74e9b5f2SOlivier Houchard /*
447*74e9b5f2SOlivier Houchard  * Returns the counter value in the event count. The value is at most
448*74e9b5f2SOlivier Houchard  * INT64_MAX.
449*74e9b5f2SOlivier Houchard  */
450*74e9b5f2SOlivier Houchard static uint64_t ck_ec64_value(const struct ck_ec64* ec);
451*74e9b5f2SOlivier Houchard 
452*74e9b5f2SOlivier Houchard #if __STDC_VERSION__ >= 201112L
453*74e9b5f2SOlivier Houchard #define ck_ec_value(EC)					\
454*74e9b5f2SOlivier Houchard 	(_Generic(*(EC),				\
455*74e9b5f2SOlivier Houchard 		  struct ck_ec32 : ck_ec32_value,	\
456*74e9b5f2SOlivier Houchard 		struct ck_ec64 : ck_ec64_value)((EC)))
457*74e9b5f2SOlivier Houchard #endif /* __STDC_VERSION__ */
458*74e9b5f2SOlivier Houchard #endif /* CK_F_EC64 */
459*74e9b5f2SOlivier Houchard 
460*74e9b5f2SOlivier Houchard /*
461*74e9b5f2SOlivier Houchard  * Returns whether there may be slow pathed waiters that need an
462*74e9b5f2SOlivier Houchard  * explicit OS wakeup for this event count.
463*74e9b5f2SOlivier Houchard  */
464*74e9b5f2SOlivier Houchard static bool ck_ec32_has_waiters(const struct ck_ec32 *ec);
465*74e9b5f2SOlivier Houchard 
466*74e9b5f2SOlivier Houchard #ifndef CK_F_EC64
467*74e9b5f2SOlivier Houchard #define ck_ec_has_waiters ck_ec32_has_waiters
468*74e9b5f2SOlivier Houchard #else
469*74e9b5f2SOlivier Houchard static bool ck_ec64_has_waiters(const struct ck_ec64 *ec);
470*74e9b5f2SOlivier Houchard 
471*74e9b5f2SOlivier Houchard #if __STDC_VERSION__ >= 201112L
472*74e9b5f2SOlivier Houchard #define ck_ec_has_waiters(EC)				      \
473*74e9b5f2SOlivier Houchard 	(_Generic(*(EC),				      \
474*74e9b5f2SOlivier Houchard 		  struct ck_ec32 : ck_ec32_has_waiters,	      \
475*74e9b5f2SOlivier Houchard 		  struct ck_ec64 : ck_ec64_has_waiters)((EC)))
476*74e9b5f2SOlivier Houchard #endif /* __STDC_VERSION__ */
477*74e9b5f2SOlivier Houchard #endif /* CK_F_EC64 */
478*74e9b5f2SOlivier Houchard 
479*74e9b5f2SOlivier Houchard /*
480*74e9b5f2SOlivier Houchard  * Increments the counter value in the event count by one, and wakes
481*74e9b5f2SOlivier Houchard  * up any waiter.
482*74e9b5f2SOlivier Houchard  */
483*74e9b5f2SOlivier Houchard static void ck_ec32_inc(struct ck_ec32 *ec, const struct ck_ec_mode *mode);
484*74e9b5f2SOlivier Houchard 
485*74e9b5f2SOlivier Houchard #ifndef CK_F_EC64
486*74e9b5f2SOlivier Houchard #define ck_ec_inc ck_ec32_inc
487*74e9b5f2SOlivier Houchard #else
488*74e9b5f2SOlivier Houchard static void ck_ec64_inc(struct ck_ec64 *ec, const struct ck_ec_mode *mode);
489*74e9b5f2SOlivier Houchard 
490*74e9b5f2SOlivier Houchard #if __STDC_VERSION__ >= 201112L
491*74e9b5f2SOlivier Houchard #define ck_ec_inc(EC, MODE)					\
492*74e9b5f2SOlivier Houchard 	(_Generic(*(EC),					\
493*74e9b5f2SOlivier Houchard 		  struct ck_ec32 : ck_ec32_inc,			\
494*74e9b5f2SOlivier Houchard 		  struct ck_ec64 : ck_ec64_inc)((EC), (MODE)))
495*74e9b5f2SOlivier Houchard #endif /* __STDC_VERSION__ */
496*74e9b5f2SOlivier Houchard #endif /* CK_F_EC64 */
497*74e9b5f2SOlivier Houchard 
498*74e9b5f2SOlivier Houchard /*
499*74e9b5f2SOlivier Houchard  * Increments the counter value in the event count by delta, wakes
500*74e9b5f2SOlivier Houchard  * up any waiter, and returns the previous counter value.
501*74e9b5f2SOlivier Houchard  */
502*74e9b5f2SOlivier Houchard static uint32_t ck_ec32_add(struct ck_ec32 *ec,
503*74e9b5f2SOlivier Houchard 			    const struct ck_ec_mode *mode,
504*74e9b5f2SOlivier Houchard 			    uint32_t delta);
505*74e9b5f2SOlivier Houchard 
506*74e9b5f2SOlivier Houchard #ifndef CK_F_EC64
507*74e9b5f2SOlivier Houchard #define ck_ec_add ck_ec32_add
508*74e9b5f2SOlivier Houchard #else
509*74e9b5f2SOlivier Houchard static uint64_t ck_ec64_add(struct ck_ec64 *ec,
510*74e9b5f2SOlivier Houchard 			    const struct ck_ec_mode *mode,
511*74e9b5f2SOlivier Houchard 			    uint64_t delta);
512*74e9b5f2SOlivier Houchard 
513*74e9b5f2SOlivier Houchard #if __STDC_VERSION__ >= 201112L
514*74e9b5f2SOlivier Houchard #define ck_ec_add(EC, MODE, DELTA)					\
515*74e9b5f2SOlivier Houchard 	(_Generic(*(EC),						\
516*74e9b5f2SOlivier Houchard 		  struct ck_ec32 : ck_ec32_add,				\
517*74e9b5f2SOlivier Houchard 		  struct ck_ec64 : ck_ec64_add)((EC), (MODE), (DELTA)))
518*74e9b5f2SOlivier Houchard #endif /* __STDC_VERSION__ */
519*74e9b5f2SOlivier Houchard #endif /* CK_F_EC64 */
520*74e9b5f2SOlivier Houchard 
521*74e9b5f2SOlivier Houchard /*
522*74e9b5f2SOlivier Houchard  * Populates `new_deadline` with a deadline `timeout` in the future.
523*74e9b5f2SOlivier Houchard  * Returns 0 on success, and -1 if clock_gettime failed, in which
524*74e9b5f2SOlivier Houchard  * case errno is left as is.
525*74e9b5f2SOlivier Houchard  */
526*74e9b5f2SOlivier Houchard static int ck_ec_deadline(struct timespec *new_deadline,
527*74e9b5f2SOlivier Houchard 			  const struct ck_ec_mode *mode,
528*74e9b5f2SOlivier Houchard 			  const struct timespec *timeout);
529*74e9b5f2SOlivier Houchard 
530*74e9b5f2SOlivier Houchard /*
531*74e9b5f2SOlivier Houchard  * Waits until the counter value in the event count differs from
532*74e9b5f2SOlivier Houchard  * old_value, or, if deadline is non-NULL, until CLOCK_MONOTONIC is
533*74e9b5f2SOlivier Houchard  * past the deadline.
534*74e9b5f2SOlivier Houchard  *
535*74e9b5f2SOlivier Houchard  * Returns 0 on success, and -1 on timeout.
536*74e9b5f2SOlivier Houchard  */
537*74e9b5f2SOlivier Houchard static int ck_ec32_wait(struct ck_ec32 *ec,
538*74e9b5f2SOlivier Houchard 			const struct ck_ec_mode *mode,
539*74e9b5f2SOlivier Houchard 			uint32_t old_value,
540*74e9b5f2SOlivier Houchard 			const struct timespec *deadline);
541*74e9b5f2SOlivier Houchard 
542*74e9b5f2SOlivier Houchard #ifndef CK_F_EC64
543*74e9b5f2SOlivier Houchard #define ck_ec_wait ck_ec32_wait
544*74e9b5f2SOlivier Houchard #else
545*74e9b5f2SOlivier Houchard static int ck_ec64_wait(struct ck_ec64 *ec,
546*74e9b5f2SOlivier Houchard 			const struct ck_ec_mode *mode,
547*74e9b5f2SOlivier Houchard 			uint64_t old_value,
548*74e9b5f2SOlivier Houchard 			const struct timespec *deadline);
549*74e9b5f2SOlivier Houchard 
550*74e9b5f2SOlivier Houchard #if __STDC_VERSION__ >= 201112L
551*74e9b5f2SOlivier Houchard #define ck_ec_wait(EC, MODE, OLD_VALUE, DEADLINE)			\
552*74e9b5f2SOlivier Houchard 	(_Generic(*(EC),						\
553*74e9b5f2SOlivier Houchard 		  struct ck_ec32 : ck_ec32_wait,			\
554*74e9b5f2SOlivier Houchard 		  struct ck_ec64 : ck_ec64_wait)((EC), (MODE),		\
555*74e9b5f2SOlivier Houchard 						 (OLD_VALUE), (DEADLINE)))
556*74e9b5f2SOlivier Houchard 
557*74e9b5f2SOlivier Houchard #endif /* __STDC_VERSION__ */
558*74e9b5f2SOlivier Houchard #endif /* CK_F_EC64 */
559*74e9b5f2SOlivier Houchard 
560*74e9b5f2SOlivier Houchard /*
561*74e9b5f2SOlivier Houchard  * Waits until the counter value in the event count differs from
562*74e9b5f2SOlivier Houchard  * old_value, pred returns non-zero, or, if deadline is non-NULL,
563*74e9b5f2SOlivier Houchard  * until CLOCK_MONOTONIC is past the deadline.
564*74e9b5f2SOlivier Houchard  *
565*74e9b5f2SOlivier Houchard  * Returns 0 on success, -1 on timeout, and the return value of pred
566*74e9b5f2SOlivier Houchard  * if it returns non-zero.
567*74e9b5f2SOlivier Houchard  *
568*74e9b5f2SOlivier Houchard  * A NULL pred represents a function that always returns 0.
569*74e9b5f2SOlivier Houchard  */
570*74e9b5f2SOlivier Houchard static int ck_ec32_wait_pred(struct ck_ec32 *ec,
571*74e9b5f2SOlivier Houchard 			     const struct ck_ec_mode *mode,
572*74e9b5f2SOlivier Houchard 			     uint32_t old_value,
573*74e9b5f2SOlivier Houchard 			     int (*pred)(const struct ck_ec_wait_state *,
574*74e9b5f2SOlivier Houchard 					 struct timespec *deadline),
575*74e9b5f2SOlivier Houchard 			     void *data,
576*74e9b5f2SOlivier Houchard 			     const struct timespec *deadline);
577*74e9b5f2SOlivier Houchard 
578*74e9b5f2SOlivier Houchard #ifndef CK_F_EC64
579*74e9b5f2SOlivier Houchard #define ck_ec_wait_pred ck_ec32_wait_pred
580*74e9b5f2SOlivier Houchard #else
581*74e9b5f2SOlivier Houchard static int ck_ec64_wait_pred(struct ck_ec64 *ec,
582*74e9b5f2SOlivier Houchard 			     const struct ck_ec_mode *mode,
583*74e9b5f2SOlivier Houchard 			     uint64_t old_value,
584*74e9b5f2SOlivier Houchard 			     int (*pred)(const struct ck_ec_wait_state *,
585*74e9b5f2SOlivier Houchard 					 struct timespec *deadline),
586*74e9b5f2SOlivier Houchard 			     void *data,
587*74e9b5f2SOlivier Houchard 			     const struct timespec *deadline);
588*74e9b5f2SOlivier Houchard 
589*74e9b5f2SOlivier Houchard #if __STDC_VERSION__ >= 201112L
590*74e9b5f2SOlivier Houchard #define ck_ec_wait_pred(EC, MODE, OLD_VALUE, PRED, DATA, DEADLINE)	\
591*74e9b5f2SOlivier Houchard 	(_Generic(*(EC),						\
592*74e9b5f2SOlivier Houchard 		  struct ck_ec32 : ck_ec32_wait_pred,			\
593*74e9b5f2SOlivier Houchard 		  struct ck_ec64 : ck_ec64_wait_pred)			\
594*74e9b5f2SOlivier Houchard 	 ((EC), (MODE), (OLD_VALUE), (PRED), (DATA), (DEADLINE)))
595*74e9b5f2SOlivier Houchard #endif /* __STDC_VERSION__ */
596*74e9b5f2SOlivier Houchard #endif /* CK_F_EC64 */
597*74e9b5f2SOlivier Houchard 
598*74e9b5f2SOlivier Houchard /*
599*74e9b5f2SOlivier Houchard  * Inline implementation details. 32 bit first, then 64 bit
600*74e9b5f2SOlivier Houchard  * conditionally.
601*74e9b5f2SOlivier Houchard  */
ck_ec32_init(struct ck_ec32 * ec,uint32_t value)602*74e9b5f2SOlivier Houchard CK_CC_FORCE_INLINE void ck_ec32_init(struct ck_ec32 *ec, uint32_t value)
603*74e9b5f2SOlivier Houchard {
604*74e9b5f2SOlivier Houchard 	ec->counter = value & ~(1UL << 31);
605*74e9b5f2SOlivier Houchard 	return;
606*74e9b5f2SOlivier Houchard }
607*74e9b5f2SOlivier Houchard 
ck_ec32_value(const struct ck_ec32 * ec)608*74e9b5f2SOlivier Houchard CK_CC_FORCE_INLINE uint32_t ck_ec32_value(const struct ck_ec32 *ec)
609*74e9b5f2SOlivier Houchard {
610*74e9b5f2SOlivier Houchard 	uint32_t ret = ck_pr_load_32(&ec->counter) & ~(1UL << 31);
611*74e9b5f2SOlivier Houchard 
612*74e9b5f2SOlivier Houchard 	ck_pr_fence_acquire();
613*74e9b5f2SOlivier Houchard 	return ret;
614*74e9b5f2SOlivier Houchard }
615*74e9b5f2SOlivier Houchard 
ck_ec32_has_waiters(const struct ck_ec32 * ec)616*74e9b5f2SOlivier Houchard CK_CC_FORCE_INLINE bool ck_ec32_has_waiters(const struct ck_ec32 *ec)
617*74e9b5f2SOlivier Houchard {
618*74e9b5f2SOlivier Houchard 	return ck_pr_load_32(&ec->counter) & (1UL << 31);
619*74e9b5f2SOlivier Houchard }
620*74e9b5f2SOlivier Houchard 
621*74e9b5f2SOlivier Houchard /* Slow path for ck_ec{32,64}_{inc,add} */
622*74e9b5f2SOlivier Houchard void ck_ec32_wake(struct ck_ec32 *ec, const struct ck_ec_ops *ops);
623*74e9b5f2SOlivier Houchard 
ck_ec32_inc(struct ck_ec32 * ec,const struct ck_ec_mode * mode)624*74e9b5f2SOlivier Houchard CK_CC_FORCE_INLINE void ck_ec32_inc(struct ck_ec32 *ec,
625*74e9b5f2SOlivier Houchard 				    const struct ck_ec_mode *mode)
626*74e9b5f2SOlivier Houchard {
627*74e9b5f2SOlivier Houchard #if !defined(CK_F_EC_SP)
628*74e9b5f2SOlivier Houchard 	/* Nothing to specialize if we don't have EC_SP. */
629*74e9b5f2SOlivier Houchard 	ck_ec32_add(ec, mode, 1);
630*74e9b5f2SOlivier Houchard 	return;
631*74e9b5f2SOlivier Houchard #else
632*74e9b5f2SOlivier Houchard 	char flagged;
633*74e9b5f2SOlivier Houchard 
634*74e9b5f2SOlivier Houchard #if __GNUC__ >= 6
635*74e9b5f2SOlivier Houchard 	/*
636*74e9b5f2SOlivier Houchard 	 * We don't want to wake if the sign bit is 0. We do want to
637*74e9b5f2SOlivier Houchard 	 * wake if the sign bit just flipped from 1 to 0. We don't
638*74e9b5f2SOlivier Houchard 	 * care what happens when our increment caused the sign bit to
639*74e9b5f2SOlivier Houchard 	 * flip from 0 to 1 (that's once per 2^31 increment).
640*74e9b5f2SOlivier Houchard 	 *
641*74e9b5f2SOlivier Houchard 	 * This leaves us with four cases:
642*74e9b5f2SOlivier Houchard 	 *
643*74e9b5f2SOlivier Houchard 	 *  old sign bit | new sign bit | SF | OF | ZF
644*74e9b5f2SOlivier Houchard 	 *  -------------------------------------------
645*74e9b5f2SOlivier Houchard 	 *	       0 |	      0 |  0 |	0 | ?
646*74e9b5f2SOlivier Houchard 	 *	       0 |	      1 |  1 |	0 | ?
647*74e9b5f2SOlivier Houchard 	 *	       1 |	      1 |  1 |	0 | ?
648*74e9b5f2SOlivier Houchard 	 *	       1 |	      0 |  0 |	0 | 1
649*74e9b5f2SOlivier Houchard 	 *
650*74e9b5f2SOlivier Houchard 	 * In the first case, we don't want to hit ck_ec32_wake. In
651*74e9b5f2SOlivier Houchard 	 * the last two cases, we do want to call ck_ec32_wake. In the
652*74e9b5f2SOlivier Houchard 	 * second case, we don't care, so we arbitrarily choose to
653*74e9b5f2SOlivier Houchard 	 * call ck_ec32_wake.
654*74e9b5f2SOlivier Houchard 	 *
655*74e9b5f2SOlivier Houchard 	 * The "le" condition checks if SF != OF, or ZF == 1, which
656*74e9b5f2SOlivier Houchard 	 * meets our requirements.
657*74e9b5f2SOlivier Houchard 	 */
658*74e9b5f2SOlivier Houchard #define CK_EC32_INC_ASM(PREFIX)					\
659*74e9b5f2SOlivier Houchard 	__asm__ volatile(PREFIX " incl %0"		    \
660*74e9b5f2SOlivier Houchard 			 : "+m"(ec->counter), "=@ccle"(flagged)	 \
661*74e9b5f2SOlivier Houchard 			 :: "cc", "memory")
662*74e9b5f2SOlivier Houchard #else
663*74e9b5f2SOlivier Houchard #define CK_EC32_INC_ASM(PREFIX)						\
664*74e9b5f2SOlivier Houchard 	__asm__ volatile(PREFIX " incl %0; setle %1"			\
665*74e9b5f2SOlivier Houchard 			 : "+m"(ec->counter), "=r"(flagged)		\
666*74e9b5f2SOlivier Houchard 			 :: "cc", "memory")
667*74e9b5f2SOlivier Houchard #endif /* __GNUC__ */
668*74e9b5f2SOlivier Houchard 
669*74e9b5f2SOlivier Houchard 	if (mode->single_producer == true) {
670*74e9b5f2SOlivier Houchard 		ck_pr_fence_store();
671*74e9b5f2SOlivier Houchard 		CK_EC32_INC_ASM("");
672*74e9b5f2SOlivier Houchard 	} else {
673*74e9b5f2SOlivier Houchard 		ck_pr_fence_store_atomic();
674*74e9b5f2SOlivier Houchard 		CK_EC32_INC_ASM("lock");
675*74e9b5f2SOlivier Houchard 	}
676*74e9b5f2SOlivier Houchard #undef CK_EC32_INC_ASM
677*74e9b5f2SOlivier Houchard 
678*74e9b5f2SOlivier Houchard 	if (CK_CC_UNLIKELY(flagged)) {
679*74e9b5f2SOlivier Houchard 		ck_ec32_wake(ec, mode->ops);
680*74e9b5f2SOlivier Houchard 	}
681*74e9b5f2SOlivier Houchard 
682*74e9b5f2SOlivier Houchard 	return;
683*74e9b5f2SOlivier Houchard #endif /* CK_F_EC_SP */
684*74e9b5f2SOlivier Houchard }
685*74e9b5f2SOlivier Houchard 
ck_ec32_add_epilogue(struct ck_ec32 * ec,const struct ck_ec_mode * mode,uint32_t old)686*74e9b5f2SOlivier Houchard CK_CC_FORCE_INLINE uint32_t ck_ec32_add_epilogue(struct ck_ec32 *ec,
687*74e9b5f2SOlivier Houchard 						 const struct ck_ec_mode *mode,
688*74e9b5f2SOlivier Houchard 						 uint32_t old)
689*74e9b5f2SOlivier Houchard {
690*74e9b5f2SOlivier Houchard 	const uint32_t flag_mask = 1U << 31;
691*74e9b5f2SOlivier Houchard 	uint32_t ret;
692*74e9b5f2SOlivier Houchard 
693*74e9b5f2SOlivier Houchard 	ret = old & ~flag_mask;
694*74e9b5f2SOlivier Houchard 	/* These two only differ if the flag bit is set. */
695*74e9b5f2SOlivier Houchard 	if (CK_CC_UNLIKELY(old != ret)) {
696*74e9b5f2SOlivier Houchard 		ck_ec32_wake(ec, mode->ops);
697*74e9b5f2SOlivier Houchard 	}
698*74e9b5f2SOlivier Houchard 
699*74e9b5f2SOlivier Houchard 	return ret;
700*74e9b5f2SOlivier Houchard }
701*74e9b5f2SOlivier Houchard 
ck_ec32_add_mp(struct ck_ec32 * ec,const struct ck_ec_mode * mode,uint32_t delta)702*74e9b5f2SOlivier Houchard static CK_CC_INLINE uint32_t ck_ec32_add_mp(struct ck_ec32 *ec,
703*74e9b5f2SOlivier Houchard 					    const struct ck_ec_mode *mode,
704*74e9b5f2SOlivier Houchard 					    uint32_t delta)
705*74e9b5f2SOlivier Houchard {
706*74e9b5f2SOlivier Houchard 	uint32_t old;
707*74e9b5f2SOlivier Houchard 
708*74e9b5f2SOlivier Houchard 	ck_pr_fence_store_atomic();
709*74e9b5f2SOlivier Houchard 	old = ck_pr_faa_32(&ec->counter, delta);
710*74e9b5f2SOlivier Houchard 	return ck_ec32_add_epilogue(ec, mode, old);
711*74e9b5f2SOlivier Houchard }
712*74e9b5f2SOlivier Houchard 
713*74e9b5f2SOlivier Houchard #ifdef CK_F_EC_SP
ck_ec32_add_sp(struct ck_ec32 * ec,const struct ck_ec_mode * mode,uint32_t delta)714*74e9b5f2SOlivier Houchard static CK_CC_INLINE uint32_t ck_ec32_add_sp(struct ck_ec32 *ec,
715*74e9b5f2SOlivier Houchard 					    const struct ck_ec_mode *mode,
716*74e9b5f2SOlivier Houchard 					    uint32_t delta)
717*74e9b5f2SOlivier Houchard {
718*74e9b5f2SOlivier Houchard 	uint32_t old;
719*74e9b5f2SOlivier Houchard 
720*74e9b5f2SOlivier Houchard 	/*
721*74e9b5f2SOlivier Houchard 	 * Correctness of this racy write depends on actually
722*74e9b5f2SOlivier Houchard 	 * having an update to write. Exit here if the update
723*74e9b5f2SOlivier Houchard 	 * is a no-op.
724*74e9b5f2SOlivier Houchard 	 */
725*74e9b5f2SOlivier Houchard 	if (CK_CC_UNLIKELY(delta == 0)) {
726*74e9b5f2SOlivier Houchard 		return ck_ec32_value(ec);
727*74e9b5f2SOlivier Houchard 	}
728*74e9b5f2SOlivier Houchard 
729*74e9b5f2SOlivier Houchard 	ck_pr_fence_store();
730*74e9b5f2SOlivier Houchard 	old = delta;
731*74e9b5f2SOlivier Houchard 	__asm__ volatile("xaddl %1, %0"
732*74e9b5f2SOlivier Houchard 			 : "+m"(ec->counter), "+r"(old)
733*74e9b5f2SOlivier Houchard 			 :: "cc", "memory");
734*74e9b5f2SOlivier Houchard 	return ck_ec32_add_epilogue(ec, mode, old);
735*74e9b5f2SOlivier Houchard }
736*74e9b5f2SOlivier Houchard #endif /* CK_F_EC_SP */
737*74e9b5f2SOlivier Houchard 
ck_ec32_add(struct ck_ec32 * ec,const struct ck_ec_mode * mode,uint32_t delta)738*74e9b5f2SOlivier Houchard CK_CC_FORCE_INLINE uint32_t ck_ec32_add(struct ck_ec32 *ec,
739*74e9b5f2SOlivier Houchard 					const struct ck_ec_mode *mode,
740*74e9b5f2SOlivier Houchard 					uint32_t delta)
741*74e9b5f2SOlivier Houchard {
742*74e9b5f2SOlivier Houchard #ifdef CK_F_EC_SP
743*74e9b5f2SOlivier Houchard 	if (mode->single_producer == true) {
744*74e9b5f2SOlivier Houchard 		return ck_ec32_add_sp(ec, mode, delta);
745*74e9b5f2SOlivier Houchard 	}
746*74e9b5f2SOlivier Houchard #endif
747*74e9b5f2SOlivier Houchard 
748*74e9b5f2SOlivier Houchard 	return ck_ec32_add_mp(ec, mode, delta);
749*74e9b5f2SOlivier Houchard }
750*74e9b5f2SOlivier Houchard 
751*74e9b5f2SOlivier Houchard int ck_ec_deadline_impl(struct timespec *new_deadline,
752*74e9b5f2SOlivier Houchard 			const struct ck_ec_ops *ops,
753*74e9b5f2SOlivier Houchard 			const struct timespec *timeout);
754*74e9b5f2SOlivier Houchard 
ck_ec_deadline(struct timespec * new_deadline,const struct ck_ec_mode * mode,const struct timespec * timeout)755*74e9b5f2SOlivier Houchard CK_CC_FORCE_INLINE int ck_ec_deadline(struct timespec *new_deadline,
756*74e9b5f2SOlivier Houchard 				      const struct ck_ec_mode *mode,
757*74e9b5f2SOlivier Houchard 				      const struct timespec *timeout)
758*74e9b5f2SOlivier Houchard {
759*74e9b5f2SOlivier Houchard 	return ck_ec_deadline_impl(new_deadline, mode->ops, timeout);
760*74e9b5f2SOlivier Houchard }
761*74e9b5f2SOlivier Houchard 
762*74e9b5f2SOlivier Houchard 
763*74e9b5f2SOlivier Houchard int ck_ec32_wait_slow(struct ck_ec32 *ec,
764*74e9b5f2SOlivier Houchard 		      const struct ck_ec_ops *ops,
765*74e9b5f2SOlivier Houchard 		      uint32_t old_value,
766*74e9b5f2SOlivier Houchard 		      const struct timespec *deadline);
767*74e9b5f2SOlivier Houchard 
ck_ec32_wait(struct ck_ec32 * ec,const struct ck_ec_mode * mode,uint32_t old_value,const struct timespec * deadline)768*74e9b5f2SOlivier Houchard CK_CC_FORCE_INLINE int ck_ec32_wait(struct ck_ec32 *ec,
769*74e9b5f2SOlivier Houchard 				    const struct ck_ec_mode *mode,
770*74e9b5f2SOlivier Houchard 				    uint32_t old_value,
771*74e9b5f2SOlivier Houchard 				    const struct timespec *deadline)
772*74e9b5f2SOlivier Houchard {
773*74e9b5f2SOlivier Houchard 	if (ck_ec32_value(ec) != old_value) {
774*74e9b5f2SOlivier Houchard 		return 0;
775*74e9b5f2SOlivier Houchard 	}
776*74e9b5f2SOlivier Houchard 
777*74e9b5f2SOlivier Houchard 	return ck_ec32_wait_slow(ec, mode->ops, old_value, deadline);
778*74e9b5f2SOlivier Houchard }
779*74e9b5f2SOlivier Houchard 
780*74e9b5f2SOlivier Houchard int ck_ec32_wait_pred_slow(struct ck_ec32 *ec,
781*74e9b5f2SOlivier Houchard 			   const struct ck_ec_ops *ops,
782*74e9b5f2SOlivier Houchard 			   uint32_t old_value,
783*74e9b5f2SOlivier Houchard 			   int (*pred)(const struct ck_ec_wait_state *state,
784*74e9b5f2SOlivier Houchard 				       struct timespec *deadline),
785*74e9b5f2SOlivier Houchard 			   void *data,
786*74e9b5f2SOlivier Houchard 			   const struct timespec *deadline);
787*74e9b5f2SOlivier Houchard 
788*74e9b5f2SOlivier Houchard CK_CC_FORCE_INLINE int
ck_ec32_wait_pred(struct ck_ec32 * ec,const struct ck_ec_mode * mode,uint32_t old_value,int (* pred)(const struct ck_ec_wait_state * state,struct timespec * deadline),void * data,const struct timespec * deadline)789*74e9b5f2SOlivier Houchard ck_ec32_wait_pred(struct ck_ec32 *ec,
790*74e9b5f2SOlivier Houchard 		  const struct ck_ec_mode *mode,
791*74e9b5f2SOlivier Houchard 		  uint32_t old_value,
792*74e9b5f2SOlivier Houchard 		  int (*pred)(const struct ck_ec_wait_state *state,
793*74e9b5f2SOlivier Houchard 			      struct timespec *deadline),
794*74e9b5f2SOlivier Houchard 		  void *data,
795*74e9b5f2SOlivier Houchard 		  const struct timespec *deadline)
796*74e9b5f2SOlivier Houchard {
797*74e9b5f2SOlivier Houchard 	if (ck_ec32_value(ec) != old_value) {
798*74e9b5f2SOlivier Houchard 		return 0;
799*74e9b5f2SOlivier Houchard 	}
800*74e9b5f2SOlivier Houchard 
801*74e9b5f2SOlivier Houchard 	return ck_ec32_wait_pred_slow(ec, mode->ops, old_value,
802*74e9b5f2SOlivier Houchard 				      pred, data, deadline);
803*74e9b5f2SOlivier Houchard }
804*74e9b5f2SOlivier Houchard 
805*74e9b5f2SOlivier Houchard #ifdef CK_F_EC64
ck_ec64_init(struct ck_ec64 * ec,uint64_t value)806*74e9b5f2SOlivier Houchard CK_CC_FORCE_INLINE void ck_ec64_init(struct ck_ec64 *ec, uint64_t value)
807*74e9b5f2SOlivier Houchard {
808*74e9b5f2SOlivier Houchard 	ec->counter = value << 1;
809*74e9b5f2SOlivier Houchard 	return;
810*74e9b5f2SOlivier Houchard }
811*74e9b5f2SOlivier Houchard 
ck_ec64_value(const struct ck_ec64 * ec)812*74e9b5f2SOlivier Houchard CK_CC_FORCE_INLINE uint64_t ck_ec64_value(const struct ck_ec64 *ec)
813*74e9b5f2SOlivier Houchard {
814*74e9b5f2SOlivier Houchard 	uint64_t ret = ck_pr_load_64(&ec->counter) >> 1;
815*74e9b5f2SOlivier Houchard 
816*74e9b5f2SOlivier Houchard 	ck_pr_fence_acquire();
817*74e9b5f2SOlivier Houchard 	return ret;
818*74e9b5f2SOlivier Houchard }
819*74e9b5f2SOlivier Houchard 
ck_ec64_has_waiters(const struct ck_ec64 * ec)820*74e9b5f2SOlivier Houchard CK_CC_FORCE_INLINE bool ck_ec64_has_waiters(const struct ck_ec64 *ec)
821*74e9b5f2SOlivier Houchard {
822*74e9b5f2SOlivier Houchard 	return ck_pr_load_64(&ec->counter) & 1;
823*74e9b5f2SOlivier Houchard }
824*74e9b5f2SOlivier Houchard 
825*74e9b5f2SOlivier Houchard void ck_ec64_wake(struct ck_ec64 *ec, const struct ck_ec_ops *ops);
826*74e9b5f2SOlivier Houchard 
ck_ec64_inc(struct ck_ec64 * ec,const struct ck_ec_mode * mode)827*74e9b5f2SOlivier Houchard CK_CC_FORCE_INLINE void ck_ec64_inc(struct ck_ec64 *ec,
828*74e9b5f2SOlivier Houchard 				    const struct ck_ec_mode *mode)
829*74e9b5f2SOlivier Houchard {
830*74e9b5f2SOlivier Houchard 	/* We always xadd, so there's no special optimization here. */
831*74e9b5f2SOlivier Houchard 	(void)ck_ec64_add(ec, mode, 1);
832*74e9b5f2SOlivier Houchard 	return;
833*74e9b5f2SOlivier Houchard }
834*74e9b5f2SOlivier Houchard 
ck_ec_add64_epilogue(struct ck_ec64 * ec,const struct ck_ec_mode * mode,uint64_t old)835*74e9b5f2SOlivier Houchard CK_CC_FORCE_INLINE uint64_t ck_ec_add64_epilogue(struct ck_ec64 *ec,
836*74e9b5f2SOlivier Houchard 					       const struct ck_ec_mode *mode,
837*74e9b5f2SOlivier Houchard 					       uint64_t old)
838*74e9b5f2SOlivier Houchard {
839*74e9b5f2SOlivier Houchard 	uint64_t ret = old >> 1;
840*74e9b5f2SOlivier Houchard 
841*74e9b5f2SOlivier Houchard 	if (CK_CC_UNLIKELY(old & 1)) {
842*74e9b5f2SOlivier Houchard 		ck_ec64_wake(ec, mode->ops);
843*74e9b5f2SOlivier Houchard 	}
844*74e9b5f2SOlivier Houchard 
845*74e9b5f2SOlivier Houchard 	return ret;
846*74e9b5f2SOlivier Houchard }
847*74e9b5f2SOlivier Houchard 
ck_ec64_add_mp(struct ck_ec64 * ec,const struct ck_ec_mode * mode,uint64_t delta)848*74e9b5f2SOlivier Houchard static CK_CC_INLINE uint64_t ck_ec64_add_mp(struct ck_ec64 *ec,
849*74e9b5f2SOlivier Houchard 					    const struct ck_ec_mode *mode,
850*74e9b5f2SOlivier Houchard 					    uint64_t delta)
851*74e9b5f2SOlivier Houchard {
852*74e9b5f2SOlivier Houchard 	uint64_t inc = 2 * delta;  /* The low bit is the flag bit. */
853*74e9b5f2SOlivier Houchard 
854*74e9b5f2SOlivier Houchard 	ck_pr_fence_store_atomic();
855*74e9b5f2SOlivier Houchard 	return ck_ec_add64_epilogue(ec, mode, ck_pr_faa_64(&ec->counter, inc));
856*74e9b5f2SOlivier Houchard }
857*74e9b5f2SOlivier Houchard 
858*74e9b5f2SOlivier Houchard #ifdef CK_F_EC_SP
859*74e9b5f2SOlivier Houchard /* Single-producer specialisation. */
ck_ec64_add_sp(struct ck_ec64 * ec,const struct ck_ec_mode * mode,uint64_t delta)860*74e9b5f2SOlivier Houchard static CK_CC_INLINE uint64_t ck_ec64_add_sp(struct ck_ec64 *ec,
861*74e9b5f2SOlivier Houchard 					    const struct ck_ec_mode *mode,
862*74e9b5f2SOlivier Houchard 					    uint64_t delta)
863*74e9b5f2SOlivier Houchard {
864*74e9b5f2SOlivier Houchard 	uint64_t old;
865*74e9b5f2SOlivier Houchard 
866*74e9b5f2SOlivier Houchard 	/*
867*74e9b5f2SOlivier Houchard 	 * Correctness of this racy write depends on actually
868*74e9b5f2SOlivier Houchard 	 * having an update to write. Exit here if the update
869*74e9b5f2SOlivier Houchard 	 * is a no-op.
870*74e9b5f2SOlivier Houchard 	 */
871*74e9b5f2SOlivier Houchard 	if (CK_CC_UNLIKELY(delta == 0)) {
872*74e9b5f2SOlivier Houchard 		return ck_ec64_value(ec);
873*74e9b5f2SOlivier Houchard 	}
874*74e9b5f2SOlivier Houchard 
875*74e9b5f2SOlivier Houchard 	ck_pr_fence_store();
876*74e9b5f2SOlivier Houchard 	old = 2 * delta;  /* The low bit is the flag bit. */
877*74e9b5f2SOlivier Houchard 	__asm__ volatile("xaddq %1, %0"
878*74e9b5f2SOlivier Houchard 			 : "+m"(ec->counter), "+r"(old)
879*74e9b5f2SOlivier Houchard 			 :: "cc", "memory");
880*74e9b5f2SOlivier Houchard 	return ck_ec_add64_epilogue(ec, mode, old);
881*74e9b5f2SOlivier Houchard }
882*74e9b5f2SOlivier Houchard #endif /* CK_F_EC_SP */
883*74e9b5f2SOlivier Houchard 
884*74e9b5f2SOlivier Houchard /*
885*74e9b5f2SOlivier Houchard  * Dispatch on mode->single_producer in this FORCE_INLINE function:
886*74e9b5f2SOlivier Houchard  * the end result is always small, but not all compilers have enough
887*74e9b5f2SOlivier Houchard  * foresight to inline and get the reduction.
888*74e9b5f2SOlivier Houchard  */
ck_ec64_add(struct ck_ec64 * ec,const struct ck_ec_mode * mode,uint64_t delta)889*74e9b5f2SOlivier Houchard CK_CC_FORCE_INLINE uint64_t ck_ec64_add(struct ck_ec64 *ec,
890*74e9b5f2SOlivier Houchard 					const struct ck_ec_mode *mode,
891*74e9b5f2SOlivier Houchard 					uint64_t delta)
892*74e9b5f2SOlivier Houchard {
893*74e9b5f2SOlivier Houchard #ifdef CK_F_EC_SP
894*74e9b5f2SOlivier Houchard 	if (mode->single_producer == true) {
895*74e9b5f2SOlivier Houchard 		return ck_ec64_add_sp(ec, mode, delta);
896*74e9b5f2SOlivier Houchard 	}
897*74e9b5f2SOlivier Houchard #endif
898*74e9b5f2SOlivier Houchard 
899*74e9b5f2SOlivier Houchard 	return ck_ec64_add_mp(ec, mode, delta);
900*74e9b5f2SOlivier Houchard }
901*74e9b5f2SOlivier Houchard 
902*74e9b5f2SOlivier Houchard int ck_ec64_wait_slow(struct ck_ec64 *ec,
903*74e9b5f2SOlivier Houchard 		      const struct ck_ec_ops *ops,
904*74e9b5f2SOlivier Houchard 		      uint64_t old_value,
905*74e9b5f2SOlivier Houchard 		      const struct timespec *deadline);
906*74e9b5f2SOlivier Houchard 
ck_ec64_wait(struct ck_ec64 * ec,const struct ck_ec_mode * mode,uint64_t old_value,const struct timespec * deadline)907*74e9b5f2SOlivier Houchard CK_CC_FORCE_INLINE int ck_ec64_wait(struct ck_ec64 *ec,
908*74e9b5f2SOlivier Houchard 				    const struct ck_ec_mode *mode,
909*74e9b5f2SOlivier Houchard 				    uint64_t old_value,
910*74e9b5f2SOlivier Houchard 				    const struct timespec *deadline)
911*74e9b5f2SOlivier Houchard {
912*74e9b5f2SOlivier Houchard 	if (ck_ec64_value(ec) != old_value) {
913*74e9b5f2SOlivier Houchard 		return 0;
914*74e9b5f2SOlivier Houchard 	}
915*74e9b5f2SOlivier Houchard 
916*74e9b5f2SOlivier Houchard 	return ck_ec64_wait_slow(ec, mode->ops, old_value, deadline);
917*74e9b5f2SOlivier Houchard }
918*74e9b5f2SOlivier Houchard 
919*74e9b5f2SOlivier Houchard int ck_ec64_wait_pred_slow(struct ck_ec64 *ec,
920*74e9b5f2SOlivier Houchard 			   const struct ck_ec_ops *ops,
921*74e9b5f2SOlivier Houchard 			   uint64_t old_value,
922*74e9b5f2SOlivier Houchard 			   int (*pred)(const struct ck_ec_wait_state *state,
923*74e9b5f2SOlivier Houchard 				       struct timespec *deadline),
924*74e9b5f2SOlivier Houchard 			   void *data,
925*74e9b5f2SOlivier Houchard 			   const struct timespec *deadline);
926*74e9b5f2SOlivier Houchard 
927*74e9b5f2SOlivier Houchard 
928*74e9b5f2SOlivier Houchard CK_CC_FORCE_INLINE int
ck_ec64_wait_pred(struct ck_ec64 * ec,const struct ck_ec_mode * mode,uint64_t old_value,int (* pred)(const struct ck_ec_wait_state * state,struct timespec * deadline),void * data,const struct timespec * deadline)929*74e9b5f2SOlivier Houchard ck_ec64_wait_pred(struct ck_ec64 *ec,
930*74e9b5f2SOlivier Houchard 		  const struct ck_ec_mode *mode,
931*74e9b5f2SOlivier Houchard 		  uint64_t old_value,
932*74e9b5f2SOlivier Houchard 		  int (*pred)(const struct ck_ec_wait_state *state,
933*74e9b5f2SOlivier Houchard 			      struct timespec *deadline),
934*74e9b5f2SOlivier Houchard 		  void *data,
935*74e9b5f2SOlivier Houchard 		  const struct timespec *deadline)
936*74e9b5f2SOlivier Houchard {
937*74e9b5f2SOlivier Houchard 	if (ck_ec64_value(ec) != old_value) {
938*74e9b5f2SOlivier Houchard 		return 0;
939*74e9b5f2SOlivier Houchard 	}
940*74e9b5f2SOlivier Houchard 
941*74e9b5f2SOlivier Houchard 	return ck_ec64_wait_pred_slow(ec, mode->ops, old_value,
942*74e9b5f2SOlivier Houchard 				      pred, data, deadline);
943*74e9b5f2SOlivier Houchard }
944*74e9b5f2SOlivier Houchard #endif /* CK_F_EC64 */
945*74e9b5f2SOlivier Houchard #endif /* !CK_EC_H */
946