171b3fa15SDavid Xu /*-
271b3fa15SDavid Xu * Copyright (c) 2005 David Xu <davidxu@freebsd.org>
371b3fa15SDavid Xu * Copyright (c) 2005 Matthew Dillon <dillon@backplane.com>
471b3fa15SDavid Xu *
571b3fa15SDavid Xu * All rights reserved.
671b3fa15SDavid Xu *
771b3fa15SDavid Xu * Redistribution and use in source and binary forms, with or without
871b3fa15SDavid Xu * modification, are permitted provided that the following conditions
971b3fa15SDavid Xu * are met:
1071b3fa15SDavid Xu * 1. Redistributions of source code must retain the above copyright
1171b3fa15SDavid Xu * notice, this list of conditions and the following disclaimer.
1271b3fa15SDavid Xu * 2. Redistributions in binary form must reproduce the above copyright
1371b3fa15SDavid Xu * notice, this list of conditions and the following disclaimer in the
1471b3fa15SDavid Xu * documentation and/or other materials provided with the distribution.
1571b3fa15SDavid Xu *
1671b3fa15SDavid Xu * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1771b3fa15SDavid Xu * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1871b3fa15SDavid Xu * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1971b3fa15SDavid Xu * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2071b3fa15SDavid Xu * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2171b3fa15SDavid Xu * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2271b3fa15SDavid Xu * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2371b3fa15SDavid Xu * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2471b3fa15SDavid Xu * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2571b3fa15SDavid Xu * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2671b3fa15SDavid Xu * SUCH DAMAGE.
2771b3fa15SDavid Xu *
2871b3fa15SDavid Xu */
2971b3fa15SDavid Xu
3071b3fa15SDavid Xu #include <assert.h>
3171b3fa15SDavid Xu #include <errno.h>
3271b3fa15SDavid Xu #include <unistd.h>
3371b3fa15SDavid Xu #include <sys/time.h>
3471b3fa15SDavid Xu
3571b3fa15SDavid Xu #include "thr_private.h"
3671b3fa15SDavid Xu
37e19be507SMatthew Dillon #define cpu_ccfence() __asm __volatile("" : : : "memory")
38e19be507SMatthew Dillon
39a8408233SDavid Xu /*
40a8408233SDavid Xu * This function is used to acquire a contested lock.
41*8afbe037SMatthew Dillon *
42*8afbe037SMatthew Dillon * There is a performance trade-off between spinning and sleeping. In
43*8afbe037SMatthew Dillon * a heavily-multi-threaded program, heavily contested locks that are
44*8afbe037SMatthew Dillon * sleeping and waking up create a large IPI load on the system. For
45*8afbe037SMatthew Dillon * example, qemu with a lot of CPUs configured. It winds up being much
46*8afbe037SMatthew Dillon * faster to spin instead.
47*8afbe037SMatthew Dillon *
48*8afbe037SMatthew Dillon * So the first optimization here is to hard loop in-scale with the number
49*8afbe037SMatthew Dillon * of therads.
50*8afbe037SMatthew Dillon *
51*8afbe037SMatthew Dillon * The second optimization is to wake-up just one waiter at a time. This
52*8afbe037SMatthew Dillon * is frought with issues because waiters can abort and races can result in
53*8afbe037SMatthew Dillon * nobody being woken up to acquire the released lock, so to smooth things
54*8afbe037SMatthew Dillon * over sleeps are limited to 1mS before we retry.
55a8408233SDavid Xu */
5671b3fa15SDavid Xu int
__thr_umtx_lock(volatile umtx_t * mtx,int id,int timo)57fcaa7a3aSMatthew Dillon __thr_umtx_lock(volatile umtx_t *mtx, int id, int timo)
5871b3fa15SDavid Xu {
59f56151faSMatthew Dillon int v;
60f56151faSMatthew Dillon int errval;
61f56151faSMatthew Dillon int ret = 0;
62*8afbe037SMatthew Dillon int retry = _thread_active_threads * 200 + 10;
6371b3fa15SDavid Xu
6471b3fa15SDavid Xu v = *mtx;
65e19be507SMatthew Dillon cpu_ccfence();
6698247283SMatthew Dillon id &= 0x3FFFFFFF;
6798247283SMatthew Dillon
6898247283SMatthew Dillon for (;;) {
69f56151faSMatthew Dillon cpu_pause();
70fcaa7a3aSMatthew Dillon if (v == 0) {
71f56151faSMatthew Dillon if (atomic_fcmpset_int(mtx, &v, id))
72e19be507SMatthew Dillon break;
73fcaa7a3aSMatthew Dillon continue;
74fcaa7a3aSMatthew Dillon }
75f56151faSMatthew Dillon if (--retry) {
76f56151faSMatthew Dillon v = *mtx;
77f56151faSMatthew Dillon continue;
78f56151faSMatthew Dillon }
79f56151faSMatthew Dillon
80f56151faSMatthew Dillon /*
81f56151faSMatthew Dillon * Set the waiting bit. If the fcmpset fails v is loaded
82f56151faSMatthew Dillon * with the current content of the mutex, and if the waiting
83f56151faSMatthew Dillon * bit is already set, we can also sleep.
84f56151faSMatthew Dillon */
85f56151faSMatthew Dillon if (atomic_fcmpset_int(mtx, &v, v|0x40000000) ||
86f56151faSMatthew Dillon (v & 0x40000000)) {
87e19be507SMatthew Dillon if (timo == 0) {
88*8afbe037SMatthew Dillon _umtx_sleep_err(mtx, v|0x40000000, 1000);
89*8afbe037SMatthew Dillon } else if (timo > 1500) {
90*8afbe037SMatthew Dillon /*
91*8afbe037SMatthew Dillon * Short sleep and retry. Because umtx
92*8afbe037SMatthew Dillon * ops can timeout and abort, wakeup1()
93*8afbe037SMatthew Dillon * races can cause a wakeup to be missed.
94*8afbe037SMatthew Dillon */
95*8afbe037SMatthew Dillon _umtx_sleep_err(mtx, v|0x40000000, 1000);
96*8afbe037SMatthew Dillon timo -= 1000;
97*8afbe037SMatthew Dillon } else {
98*8afbe037SMatthew Dillon /*
99*8afbe037SMatthew Dillon * Final sleep, do one last attempt to get
100*8afbe037SMatthew Dillon * the lock before giving up.
101*8afbe037SMatthew Dillon */
102*8afbe037SMatthew Dillon errval = _umtx_sleep_err(mtx, v|0x40000000,
103*8afbe037SMatthew Dillon timo);
104*8afbe037SMatthew Dillon if (__predict_false(errval == EAGAIN)) {
105fcaa7a3aSMatthew Dillon if (atomic_cmpset_acq_int(mtx, 0, id))
10671b3fa15SDavid Xu ret = 0;
10771b3fa15SDavid Xu else
10871b3fa15SDavid Xu ret = ETIMEDOUT;
109a8408233SDavid Xu break;
11071b3fa15SDavid Xu }
11171b3fa15SDavid Xu }
11271b3fa15SDavid Xu }
113*8afbe037SMatthew Dillon retry = _thread_active_threads * 200 + 10;
114e19be507SMatthew Dillon }
11571b3fa15SDavid Xu return (ret);
11671b3fa15SDavid Xu }
11771b3fa15SDavid Xu
118e19be507SMatthew Dillon /*
119f56151faSMatthew Dillon * Inline followup when releasing a mutex. The mutex has been released
120f56151faSMatthew Dillon * but 'v' either doesn't match id or needs a wakeup.
121e19be507SMatthew Dillon */
12271b3fa15SDavid Xu void
__thr_umtx_unlock(volatile umtx_t * mtx,int v,int id)123f56151faSMatthew Dillon __thr_umtx_unlock(volatile umtx_t *mtx, int v, int id)
12471b3fa15SDavid Xu {
125f56151faSMatthew Dillon if (v & 0x40000000) {
126*8afbe037SMatthew Dillon _umtx_wakeup_err(mtx, 1);
127f56151faSMatthew Dillon v &= 0x3FFFFFFF;
12871b3fa15SDavid Xu }
129f56151faSMatthew Dillon THR_ASSERT(v == id, "thr_umtx_unlock: wrong owner");
13071b3fa15SDavid Xu }
13171b3fa15SDavid Xu
13269697490SMatthew Dillon /*
13369697490SMatthew Dillon * Low level timed umtx lock. This function must never return
13469697490SMatthew Dillon * EINTR.
13569697490SMatthew Dillon */
13671b3fa15SDavid Xu int
__thr_umtx_timedlock(volatile umtx_t * mtx,int id,const struct timespec * timeout)137fcaa7a3aSMatthew Dillon __thr_umtx_timedlock(volatile umtx_t *mtx, int id,
138fcaa7a3aSMatthew Dillon const struct timespec *timeout)
13971b3fa15SDavid Xu {
14071b3fa15SDavid Xu struct timespec ts, ts2, ts3;
14171b3fa15SDavid Xu int timo, ret;
14271b3fa15SDavid Xu
14371b3fa15SDavid Xu if ((timeout->tv_sec < 0) ||
144fcaa7a3aSMatthew Dillon (timeout->tv_sec == 0 && timeout->tv_nsec <= 0)) {
14571b3fa15SDavid Xu return (ETIMEDOUT);
146fcaa7a3aSMatthew Dillon }
14771b3fa15SDavid Xu
14871b3fa15SDavid Xu /* XXX there should have MONO timer! */
14971b3fa15SDavid Xu clock_gettime(CLOCK_REALTIME, &ts);
150ce96aca2SSascha Wildner timespecadd(&ts, timeout, &ts);
15171b3fa15SDavid Xu ts2 = *timeout;
15271b3fa15SDavid Xu
153fcaa7a3aSMatthew Dillon id &= 0x3FFFFFFF;
154fcaa7a3aSMatthew Dillon
15571b3fa15SDavid Xu for (;;) {
15671b3fa15SDavid Xu if (ts2.tv_nsec) {
15771b3fa15SDavid Xu timo = (int)(ts2.tv_nsec / 1000);
15871b3fa15SDavid Xu if (timo == 0)
15971b3fa15SDavid Xu timo = 1;
16071b3fa15SDavid Xu } else {
16171b3fa15SDavid Xu timo = 1000000;
16271b3fa15SDavid Xu }
163fcaa7a3aSMatthew Dillon ret = __thr_umtx_lock(mtx, id, timo);
16469697490SMatthew Dillon if (ret != EINTR && ret != ETIMEDOUT)
16571b3fa15SDavid Xu break;
16671b3fa15SDavid Xu clock_gettime(CLOCK_REALTIME, &ts3);
167ce96aca2SSascha Wildner timespecsub(&ts, &ts3, &ts2);
16819451dc5Szrj if (ts2.tv_sec < 0 ||
16919451dc5Szrj (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) {
17071b3fa15SDavid Xu ret = ETIMEDOUT;
17171b3fa15SDavid Xu break;
17271b3fa15SDavid Xu }
17371b3fa15SDavid Xu }
17471b3fa15SDavid Xu return (ret);
17571b3fa15SDavid Xu }
17671b3fa15SDavid Xu
17798247283SMatthew Dillon /*
17898247283SMatthew Dillon * Regular umtx wait that cannot return EINTR
17998247283SMatthew Dillon */
18071b3fa15SDavid Xu int
_thr_umtx_wait(volatile umtx_t * mtx,int exp,const struct timespec * timeout,int clockid)1819219c44cSDavid Xu _thr_umtx_wait(volatile umtx_t *mtx, int exp, const struct timespec *timeout,
1829219c44cSDavid Xu int clockid)
18371b3fa15SDavid Xu {
18471b3fa15SDavid Xu struct timespec ts, ts2, ts3;
1850a215286SMatthew Dillon int timo, errval, ret = 0;
18671b3fa15SDavid Xu
187e19be507SMatthew Dillon cpu_ccfence();
18871b3fa15SDavid Xu if (*mtx != exp)
18971b3fa15SDavid Xu return (0);
19071b3fa15SDavid Xu
19171b3fa15SDavid Xu if (timeout == NULL) {
192fcaa7a3aSMatthew Dillon /*
193fcaa7a3aSMatthew Dillon * NOTE: If no timeout, EINTR cannot be returned. Ignore
194fcaa7a3aSMatthew Dillon * EINTR.
195fcaa7a3aSMatthew Dillon */
1965a307eebSMatthew Dillon while ((errval = _umtx_sleep_err(mtx, exp, 10000000)) > 0) {
1970a215286SMatthew Dillon if (errval == EBUSY)
1983db51647SMatthew Dillon break;
19969697490SMatthew Dillon #if 0
2000a215286SMatthew Dillon if (errval == ETIMEDOUT || errval == EWOULDBLOCK) {
2013db51647SMatthew Dillon if (*mtx != exp) {
20269697490SMatthew Dillon fprintf(stderr,
20369697490SMatthew Dillon "thr_umtx_wait: FAULT VALUE CHANGE "
20469697490SMatthew Dillon "%d -> %d oncond %p\n",
20569697490SMatthew Dillon exp, *mtx, mtx);
2063db51647SMatthew Dillon }
2073db51647SMatthew Dillon }
20869697490SMatthew Dillon #endif
2093db51647SMatthew Dillon if (*mtx != exp)
2103db51647SMatthew Dillon return(0);
21171b3fa15SDavid Xu }
21271b3fa15SDavid Xu return (ret);
21371b3fa15SDavid Xu }
21471b3fa15SDavid Xu
215fcaa7a3aSMatthew Dillon /*
216fcaa7a3aSMatthew Dillon * Timed waits can return EINTR
217fcaa7a3aSMatthew Dillon */
21871b3fa15SDavid Xu if ((timeout->tv_sec < 0) ||
21971b3fa15SDavid Xu (timeout->tv_sec == 0 && timeout->tv_nsec <= 0))
22071b3fa15SDavid Xu return (ETIMEDOUT);
22171b3fa15SDavid Xu
2229219c44cSDavid Xu clock_gettime(clockid, &ts);
223ce96aca2SSascha Wildner timespecadd(&ts, timeout, &ts);
22471b3fa15SDavid Xu ts2 = *timeout;
22571b3fa15SDavid Xu
22671b3fa15SDavid Xu for (;;) {
22771b3fa15SDavid Xu if (ts2.tv_nsec) {
22871b3fa15SDavid Xu timo = (int)(ts2.tv_nsec / 1000);
22971b3fa15SDavid Xu if (timo == 0)
23071b3fa15SDavid Xu timo = 1;
23171b3fa15SDavid Xu } else {
23271b3fa15SDavid Xu timo = 1000000;
23371b3fa15SDavid Xu }
23419451dc5Szrj
2355a307eebSMatthew Dillon if ((errval = _umtx_sleep_err(mtx, exp, timo)) > 0) {
2360a215286SMatthew Dillon if (errval == EBUSY) {
23771b3fa15SDavid Xu ret = 0;
23871b3fa15SDavid Xu break;
239e19be507SMatthew Dillon }
240e19be507SMatthew Dillon if (errval == EINTR) {
24171b3fa15SDavid Xu ret = EINTR;
24271b3fa15SDavid Xu break;
24371b3fa15SDavid Xu }
24471b3fa15SDavid Xu }
24519451dc5Szrj
2469219c44cSDavid Xu clock_gettime(clockid, &ts3);
247ce96aca2SSascha Wildner timespecsub(&ts, &ts3, &ts2);
24871b3fa15SDavid Xu if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) {
24971b3fa15SDavid Xu ret = ETIMEDOUT;
25071b3fa15SDavid Xu break;
25171b3fa15SDavid Xu }
25271b3fa15SDavid Xu }
25371b3fa15SDavid Xu return (ret);
25471b3fa15SDavid Xu }
25571b3fa15SDavid Xu
25698247283SMatthew Dillon /*
25798247283SMatthew Dillon * Simple version without a timeout which can also return EINTR
25898247283SMatthew Dillon */
25998247283SMatthew Dillon int
_thr_umtx_wait_intr(volatile umtx_t * mtx,int exp)26098247283SMatthew Dillon _thr_umtx_wait_intr(volatile umtx_t *mtx, int exp)
26198247283SMatthew Dillon {
26298247283SMatthew Dillon int ret = 0;
26398247283SMatthew Dillon int errval;
26498247283SMatthew Dillon
26598247283SMatthew Dillon cpu_ccfence();
26698247283SMatthew Dillon for (;;) {
26798247283SMatthew Dillon if (*mtx != exp)
26898247283SMatthew Dillon return (0);
26998247283SMatthew Dillon errval = _umtx_sleep_err(mtx, exp, 10000000);
27098247283SMatthew Dillon if (errval == 0)
27198247283SMatthew Dillon break;
27298247283SMatthew Dillon if (errval == EBUSY)
27398247283SMatthew Dillon break;
27498247283SMatthew Dillon if (errval == EINTR) {
27598247283SMatthew Dillon ret = errval;
27698247283SMatthew Dillon break;
27798247283SMatthew Dillon }
27898247283SMatthew Dillon cpu_ccfence();
27998247283SMatthew Dillon }
28098247283SMatthew Dillon return (ret);
28198247283SMatthew Dillon }
28298247283SMatthew Dillon
28319451dc5Szrj void
_thr_umtx_wake(volatile umtx_t * mtx,int count)284e111829dSMatthew Dillon _thr_umtx_wake(volatile umtx_t *mtx, int count)
28571b3fa15SDavid Xu {
286e111829dSMatthew Dillon _umtx_wakeup_err(mtx, count);
28771b3fa15SDavid Xu }
288