xref: /netbsd-src/sys/kern/kern_mutex.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: kern_mutex.c,v 1.73 2018/02/25 18:54:29 chs Exp $	*/
2 
3 /*-
4  * Copyright (c) 2002, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe and Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Kernel mutex implementation, modeled after those found in Solaris,
34  * a description of which can be found in:
35  *
36  *	Solaris Internals: Core Kernel Architecture, Jim Mauro and
37  *	    Richard McDougall.
38  */
39 
40 #define	__MUTEX_PRIVATE
41 
42 #include <sys/cdefs.h>
43 __KERNEL_RCSID(0, "$NetBSD: kern_mutex.c,v 1.73 2018/02/25 18:54:29 chs Exp $");
44 
45 #include <sys/param.h>
46 #include <sys/atomic.h>
47 #include <sys/proc.h>
48 #include <sys/mutex.h>
49 #include <sys/sched.h>
50 #include <sys/sleepq.h>
51 #include <sys/systm.h>
52 #include <sys/lockdebug.h>
53 #include <sys/kernel.h>
54 #include <sys/intr.h>
55 #include <sys/lock.h>
56 #include <sys/types.h>
57 #include <sys/cpu.h>
58 
59 #include <dev/lockstat.h>
60 
61 #include <machine/lock.h>
62 
63 #define MUTEX_PANIC_SKIP_SPIN 1
64 #define MUTEX_PANIC_SKIP_ADAPTIVE 1
65 
66 /*
67  * When not running a debug kernel, spin mutexes are not much
68  * more than an splraiseipl() and splx() pair.
69  */
70 
71 #if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
72 #define	FULL
73 #endif
74 
75 /*
76  * Debugging support.
77  */
78 
79 #define	MUTEX_WANTLOCK(mtx)					\
80     LOCKDEBUG_WANTLOCK(MUTEX_DEBUG_P(mtx), (mtx),		\
81         (uintptr_t)__builtin_return_address(0), 0)
82 #define	MUTEX_TESTLOCK(mtx)					\
83     LOCKDEBUG_WANTLOCK(MUTEX_DEBUG_P(mtx), (mtx),		\
84         (uintptr_t)__builtin_return_address(0), -1)
85 #define	MUTEX_LOCKED(mtx)					\
86     LOCKDEBUG_LOCKED(MUTEX_DEBUG_P(mtx), (mtx), NULL,		\
87         (uintptr_t)__builtin_return_address(0), 0)
88 #define	MUTEX_UNLOCKED(mtx)					\
89     LOCKDEBUG_UNLOCKED(MUTEX_DEBUG_P(mtx), (mtx),		\
90         (uintptr_t)__builtin_return_address(0), 0)
91 #define	MUTEX_ABORT(mtx, msg)					\
92     mutex_abort(__func__, __LINE__, mtx, msg)
93 
94 #if defined(LOCKDEBUG)
95 
96 #define	MUTEX_DASSERT(mtx, cond)				\
97 do {								\
98 	if (!(cond))						\
99 		MUTEX_ABORT(mtx, "assertion failed: " #cond);	\
100 } while (/* CONSTCOND */ 0);
101 
102 #else	/* LOCKDEBUG */
103 
104 #define	MUTEX_DASSERT(mtx, cond)	/* nothing */
105 
106 #endif /* LOCKDEBUG */
107 
108 #if defined(DIAGNOSTIC)
109 
110 #define	MUTEX_ASSERT(mtx, cond)					\
111 do {								\
112 	if (!(cond))						\
113 		MUTEX_ABORT(mtx, "assertion failed: " #cond);	\
114 } while (/* CONSTCOND */ 0)
115 
116 #else	/* DIAGNOSTIC */
117 
118 #define	MUTEX_ASSERT(mtx, cond)	/* nothing */
119 
120 #endif	/* DIAGNOSTIC */
121 
122 /*
123  * Some architectures can't use __cpu_simple_lock as is so allow a way
124  * for them to use an alternate definition.
125  */
126 #ifndef MUTEX_SPINBIT_LOCK_INIT
127 #define MUTEX_SPINBIT_LOCK_INIT(mtx)	__cpu_simple_lock_init(&(mtx)->mtx_lock)
128 #endif
129 #ifndef MUTEX_SPINBIT_LOCKED_P
130 #define MUTEX_SPINBIT_LOCKED_P(mtx)	__SIMPLELOCK_LOCKED_P(&(mtx)->mtx_lock)
131 #endif
132 #ifndef MUTEX_SPINBIT_LOCK_TRY
133 #define MUTEX_SPINBIT_LOCK_TRY(mtx)	__cpu_simple_lock_try(&(mtx)->mtx_lock)
134 #endif
135 #ifndef MUTEX_SPINBIT_LOCK_UNLOCK
136 #define MUTEX_SPINBIT_LOCK_UNLOCK(mtx)	__cpu_simple_unlock(&(mtx)->mtx_lock)
137 #endif
138 
139 #ifndef MUTEX_INITIALIZE_SPIN_IPL
140 #define MUTEX_INITIALIZE_SPIN_IPL(mtx, ipl) \
141 					((mtx)->mtx_ipl = makeiplcookie((ipl)))
142 #endif
143 
144 /*
145  * Spin mutex SPL save / restore.
146  */
147 
148 #define	MUTEX_SPIN_SPLRAISE(mtx)					\
149 do {									\
150 	struct cpu_info *x__ci;						\
151 	int x__cnt, s;							\
152 	s = splraiseipl(MUTEX_SPIN_IPL(mtx));				\
153 	x__ci = curcpu();						\
154 	x__cnt = x__ci->ci_mtx_count--;					\
155 	__insn_barrier();						\
156 	if (x__cnt == 0)						\
157 		x__ci->ci_mtx_oldspl = (s);				\
158 } while (/* CONSTCOND */ 0)
159 
160 #define	MUTEX_SPIN_SPLRESTORE(mtx)					\
161 do {									\
162 	struct cpu_info *x__ci = curcpu();				\
163 	int s = x__ci->ci_mtx_oldspl;					\
164 	__insn_barrier();						\
165 	if (++(x__ci->ci_mtx_count) == 0)			\
166 		splx(s);						\
167 } while (/* CONSTCOND */ 0)
168 
169 /*
170  * For architectures that provide 'simple' mutexes: they provide a
171  * CAS function that is either MP-safe, or does not need to be MP
172  * safe.  Adaptive mutexes on these architectures do not require an
173  * additional interlock.
174  */
175 
176 #ifdef __HAVE_SIMPLE_MUTEXES
177 
178 #define	MUTEX_OWNER(owner)						\
179 	(owner & MUTEX_THREAD)
180 #define	MUTEX_HAS_WAITERS(mtx)						\
181 	(((int)(mtx)->mtx_owner & MUTEX_BIT_WAITERS) != 0)
182 
183 #define	MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug)				\
184 	if (!dodebug)							\
185 		(mtx)->mtx_owner |= MUTEX_BIT_NODEBUG;			\
186 do {									\
187 } while (/* CONSTCOND */ 0);
188 
189 #define	MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl)			\
190 do {									\
191 	(mtx)->mtx_owner = MUTEX_BIT_SPIN;				\
192 	if (!dodebug)							\
193 		(mtx)->mtx_owner |= MUTEX_BIT_NODEBUG;			\
194 	MUTEX_INITIALIZE_SPIN_IPL((mtx), (ipl));			\
195 	MUTEX_SPINBIT_LOCK_INIT((mtx));					\
196 } while (/* CONSTCOND */ 0)
197 
198 #define	MUTEX_DESTROY(mtx)						\
199 do {									\
200 	(mtx)->mtx_owner = MUTEX_THREAD;				\
201 } while (/* CONSTCOND */ 0);
202 
203 #define	MUTEX_SPIN_P(mtx)		\
204     (((mtx)->mtx_owner & MUTEX_BIT_SPIN) != 0)
205 #define	MUTEX_ADAPTIVE_P(mtx)		\
206     (((mtx)->mtx_owner & MUTEX_BIT_SPIN) == 0)
207 
208 #define	MUTEX_DEBUG_P(mtx)	(((mtx)->mtx_owner & MUTEX_BIT_NODEBUG) == 0)
209 #if defined(LOCKDEBUG)
210 #define	MUTEX_OWNED(owner)		(((owner) & ~MUTEX_BIT_NODEBUG) != 0)
211 #define	MUTEX_INHERITDEBUG(n, o)	(n) |= (o) & MUTEX_BIT_NODEBUG
212 #else /* defined(LOCKDEBUG) */
213 #define	MUTEX_OWNED(owner)		((owner) != 0)
214 #define	MUTEX_INHERITDEBUG(n, o)	/* nothing */
215 #endif /* defined(LOCKDEBUG) */
216 
217 static inline int
218 MUTEX_ACQUIRE(kmutex_t *mtx, uintptr_t curthread)
219 {
220 	int rv;
221 	uintptr_t oldown = 0;
222 	uintptr_t newown = curthread;
223 
224 	MUTEX_INHERITDEBUG(oldown, mtx->mtx_owner);
225 	MUTEX_INHERITDEBUG(newown, oldown);
226 	rv = MUTEX_CAS(&mtx->mtx_owner, oldown, newown);
227 	MUTEX_RECEIVE(mtx);
228 	return rv;
229 }
230 
231 static inline int
232 MUTEX_SET_WAITERS(kmutex_t *mtx, uintptr_t owner)
233 {
234 	int rv;
235 	rv = MUTEX_CAS(&mtx->mtx_owner, owner, owner | MUTEX_BIT_WAITERS);
236 	MUTEX_RECEIVE(mtx);
237 	return rv;
238 }
239 
240 static inline void
241 MUTEX_RELEASE(kmutex_t *mtx)
242 {
243 	uintptr_t newown;
244 
245 	MUTEX_GIVE(mtx);
246 	newown = 0;
247 	MUTEX_INHERITDEBUG(newown, mtx->mtx_owner);
248 	mtx->mtx_owner = newown;
249 }
250 #endif	/* __HAVE_SIMPLE_MUTEXES */
251 
252 /*
253  * Patch in stubs via strong alias where they are not available.
254  */
255 
256 #if defined(LOCKDEBUG)
257 #undef	__HAVE_MUTEX_STUBS
258 #undef	__HAVE_SPIN_MUTEX_STUBS
259 #endif
260 
261 #ifndef __HAVE_MUTEX_STUBS
262 __strong_alias(mutex_enter,mutex_vector_enter);
263 __strong_alias(mutex_exit,mutex_vector_exit);
264 #endif
265 
266 #ifndef __HAVE_SPIN_MUTEX_STUBS
267 __strong_alias(mutex_spin_enter,mutex_vector_enter);
268 __strong_alias(mutex_spin_exit,mutex_vector_exit);
269 #endif
270 
271 static void	mutex_abort(const char *, size_t, const kmutex_t *,
272     const char *);
273 static void	mutex_dump(const volatile void *);
274 
275 lockops_t mutex_spin_lockops = {
276 	.lo_name = "Mutex",
277 	.lo_type = LOCKOPS_SPIN,
278 	.lo_dump = mutex_dump,
279 };
280 
281 lockops_t mutex_adaptive_lockops = {
282 	.lo_name = "Mutex",
283 	.lo_type = LOCKOPS_SLEEP,
284 	.lo_dump = mutex_dump,
285 };
286 
287 syncobj_t mutex_syncobj = {
288 	.sobj_flag	= SOBJ_SLEEPQ_SORTED,
289 	.sobj_unsleep	= turnstile_unsleep,
290 	.sobj_changepri	= turnstile_changepri,
291 	.sobj_lendpri	= sleepq_lendpri,
292 	.sobj_owner	= (void *)mutex_owner,
293 };
294 
295 /*
296  * mutex_dump:
297  *
298  *	Dump the contents of a mutex structure.
299  */
300 void
301 mutex_dump(const volatile void *cookie)
302 {
303 	const volatile kmutex_t *mtx = cookie;
304 
305 	printf_nolog("owner field  : %#018lx wait/spin: %16d/%d\n",
306 	    (long)MUTEX_OWNER(mtx->mtx_owner), MUTEX_HAS_WAITERS(mtx),
307 	    MUTEX_SPIN_P(mtx));
308 }
309 
310 /*
311  * mutex_abort:
312  *
313  *	Dump information about an error and panic the system.  This
314  *	generates a lot of machine code in the DIAGNOSTIC case, so
315  *	we ask the compiler to not inline it.
316  */
317 void __noinline
318 mutex_abort(const char *func, size_t line, const kmutex_t *mtx, const char *msg)
319 {
320 
321 	LOCKDEBUG_ABORT(func, line, mtx, (MUTEX_SPIN_P(mtx) ?
322 	    &mutex_spin_lockops : &mutex_adaptive_lockops), msg);
323 }
324 
325 /*
326  * mutex_init:
327  *
328  *	Initialize a mutex for use.  Note that adaptive mutexes are in
329  *	essence spin mutexes that can sleep to avoid deadlock and wasting
330  *	CPU time.  We can't easily provide a type of mutex that always
331  *	sleeps - see comments in mutex_vector_enter() about releasing
332  *	mutexes unlocked.
333  */
334 void _mutex_init(kmutex_t *, kmutex_type_t, int, uintptr_t);
335 void
336 _mutex_init(kmutex_t *mtx, kmutex_type_t type, int ipl,
337     uintptr_t return_address)
338 {
339 	bool dodebug;
340 
341 	memset(mtx, 0, sizeof(*mtx));
342 
343 	switch (type) {
344 	case MUTEX_ADAPTIVE:
345 		KASSERT(ipl == IPL_NONE);
346 		break;
347 	case MUTEX_DEFAULT:
348 	case MUTEX_DRIVER:
349 		if (ipl == IPL_NONE || ipl == IPL_SOFTCLOCK ||
350 		    ipl == IPL_SOFTBIO || ipl == IPL_SOFTNET ||
351 		    ipl == IPL_SOFTSERIAL) {
352 			type = MUTEX_ADAPTIVE;
353 		} else {
354 			type = MUTEX_SPIN;
355 		}
356 		break;
357 	default:
358 		break;
359 	}
360 
361 	switch (type) {
362 	case MUTEX_NODEBUG:
363 		dodebug = LOCKDEBUG_ALLOC(mtx, NULL, return_address);
364 		MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl);
365 		break;
366 	case MUTEX_ADAPTIVE:
367 		dodebug = LOCKDEBUG_ALLOC(mtx, &mutex_adaptive_lockops,
368 		    return_address);
369 		MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug);
370 		break;
371 	case MUTEX_SPIN:
372 		dodebug = LOCKDEBUG_ALLOC(mtx, &mutex_spin_lockops,
373 		    return_address);
374 		MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl);
375 		break;
376 	default:
377 		panic("mutex_init: impossible type");
378 		break;
379 	}
380 }
381 
382 void
383 mutex_init(kmutex_t *mtx, kmutex_type_t type, int ipl)
384 {
385 
386 	_mutex_init(mtx, type, ipl, (uintptr_t)__builtin_return_address(0));
387 }
388 
389 /*
390  * mutex_destroy:
391  *
392  *	Tear down a mutex.
393  */
394 void
395 mutex_destroy(kmutex_t *mtx)
396 {
397 
398 	if (MUTEX_ADAPTIVE_P(mtx)) {
399 		MUTEX_ASSERT(mtx, !MUTEX_OWNED(mtx->mtx_owner) &&
400 		    !MUTEX_HAS_WAITERS(mtx));
401 	} else {
402 		MUTEX_ASSERT(mtx, !MUTEX_SPINBIT_LOCKED_P(mtx));
403 	}
404 
405 	LOCKDEBUG_FREE(MUTEX_DEBUG_P(mtx), mtx);
406 	MUTEX_DESTROY(mtx);
407 }
408 
409 #ifdef MULTIPROCESSOR
410 /*
411  * mutex_oncpu:
412  *
413  *	Return true if an adaptive mutex owner is running on a CPU in the
414  *	system.  If the target is waiting on the kernel big lock, then we
415  *	must release it.  This is necessary to avoid deadlock.
416  */
417 static bool
418 mutex_oncpu(uintptr_t owner)
419 {
420 	struct cpu_info *ci;
421 	lwp_t *l;
422 
423 	KASSERT(kpreempt_disabled());
424 
425 	if (!MUTEX_OWNED(owner)) {
426 		return false;
427 	}
428 
429 	/*
430 	 * See lwp_dtor() why dereference of the LWP pointer is safe.
431 	 * We must have kernel preemption disabled for that.
432 	 */
433 	l = (lwp_t *)MUTEX_OWNER(owner);
434 	ci = l->l_cpu;
435 
436 	if (ci && ci->ci_curlwp == l) {
437 		/* Target is running; do we need to block? */
438 		return (ci->ci_biglock_wanted != l);
439 	}
440 
441 	/* Not running.  It may be safe to block now. */
442 	return false;
443 }
444 #endif	/* MULTIPROCESSOR */
445 
446 /*
447  * mutex_vector_enter:
448  *
449  *	Support routine for mutex_enter() that must handle all cases.  In
450  *	the LOCKDEBUG case, mutex_enter() is always aliased here, even if
451  *	fast-path stubs are available.  If a mutex_spin_enter() stub is
452  *	not available, then it is also aliased directly here.
453  */
454 void
455 mutex_vector_enter(kmutex_t *mtx)
456 {
457 	uintptr_t owner, curthread;
458 	turnstile_t *ts;
459 #ifdef MULTIPROCESSOR
460 	u_int count;
461 #endif
462 	LOCKSTAT_COUNTER(spincnt);
463 	LOCKSTAT_COUNTER(slpcnt);
464 	LOCKSTAT_TIMER(spintime);
465 	LOCKSTAT_TIMER(slptime);
466 	LOCKSTAT_FLAG(lsflag);
467 
468 	/*
469 	 * Handle spin mutexes.
470 	 */
471 	if (MUTEX_SPIN_P(mtx)) {
472 #if defined(LOCKDEBUG) && defined(MULTIPROCESSOR)
473 		u_int spins = 0;
474 #endif
475 		MUTEX_SPIN_SPLRAISE(mtx);
476 		MUTEX_WANTLOCK(mtx);
477 #ifdef FULL
478 		if (MUTEX_SPINBIT_LOCK_TRY(mtx)) {
479 			MUTEX_LOCKED(mtx);
480 			return;
481 		}
482 #if !defined(MULTIPROCESSOR)
483 		MUTEX_ABORT(mtx, "locking against myself");
484 #else /* !MULTIPROCESSOR */
485 
486 		LOCKSTAT_ENTER(lsflag);
487 		LOCKSTAT_START_TIMER(lsflag, spintime);
488 		count = SPINLOCK_BACKOFF_MIN;
489 
490 		/*
491 		 * Spin testing the lock word and do exponential backoff
492 		 * to reduce cache line ping-ponging between CPUs.
493 		 */
494 		do {
495 #if MUTEX_PANIC_SKIP_SPIN
496 			if (panicstr != NULL)
497 				break;
498 #endif
499 			while (MUTEX_SPINBIT_LOCKED_P(mtx)) {
500 				SPINLOCK_BACKOFF(count);
501 #ifdef LOCKDEBUG
502 				if (SPINLOCK_SPINOUT(spins))
503 					MUTEX_ABORT(mtx, "spinout");
504 #endif	/* LOCKDEBUG */
505 			}
506 		} while (!MUTEX_SPINBIT_LOCK_TRY(mtx));
507 
508 		if (count != SPINLOCK_BACKOFF_MIN) {
509 			LOCKSTAT_STOP_TIMER(lsflag, spintime);
510 			LOCKSTAT_EVENT(lsflag, mtx,
511 			    LB_SPIN_MUTEX | LB_SPIN, 1, spintime);
512 		}
513 		LOCKSTAT_EXIT(lsflag);
514 #endif	/* !MULTIPROCESSOR */
515 #endif	/* FULL */
516 		MUTEX_LOCKED(mtx);
517 		return;
518 	}
519 
520 	curthread = (uintptr_t)curlwp;
521 
522 	MUTEX_DASSERT(mtx, MUTEX_ADAPTIVE_P(mtx));
523 	MUTEX_ASSERT(mtx, curthread != 0);
524 	MUTEX_ASSERT(mtx, !cpu_intr_p());
525 	MUTEX_WANTLOCK(mtx);
526 
527 	if (panicstr == NULL) {
528 		LOCKDEBUG_BARRIER(&kernel_lock, 1);
529 	}
530 
531 	LOCKSTAT_ENTER(lsflag);
532 
533 	/*
534 	 * Adaptive mutex; spin trying to acquire the mutex.  If we
535 	 * determine that the owner is not running on a processor,
536 	 * then we stop spinning, and sleep instead.
537 	 */
538 	KPREEMPT_DISABLE(curlwp);
539 	for (owner = mtx->mtx_owner;;) {
540 		if (!MUTEX_OWNED(owner)) {
541 			/*
542 			 * Mutex owner clear could mean two things:
543 			 *
544 			 *	* The mutex has been released.
545 			 *	* The owner field hasn't been set yet.
546 			 *
547 			 * Try to acquire it again.  If that fails,
548 			 * we'll just loop again.
549 			 */
550 			if (MUTEX_ACQUIRE(mtx, curthread))
551 				break;
552 			owner = mtx->mtx_owner;
553 			continue;
554 		}
555 #if MUTEX_PANIC_SKIP_ADAPTIVE
556 		if (__predict_false(panicstr != NULL)) {
557 			KPREEMPT_ENABLE(curlwp);
558 			return;
559 		}
560 #endif
561 		if (__predict_false(MUTEX_OWNER(owner) == curthread)) {
562 			MUTEX_ABORT(mtx, "locking against myself");
563 		}
564 #ifdef MULTIPROCESSOR
565 		/*
566 		 * Check to see if the owner is running on a processor.
567 		 * If so, then we should just spin, as the owner will
568 		 * likely release the lock very soon.
569 		 */
570 		if (mutex_oncpu(owner)) {
571 			LOCKSTAT_START_TIMER(lsflag, spintime);
572 			count = SPINLOCK_BACKOFF_MIN;
573 			do {
574 				KPREEMPT_ENABLE(curlwp);
575 				SPINLOCK_BACKOFF(count);
576 				KPREEMPT_DISABLE(curlwp);
577 				owner = mtx->mtx_owner;
578 			} while (mutex_oncpu(owner));
579 			LOCKSTAT_STOP_TIMER(lsflag, spintime);
580 			LOCKSTAT_COUNT(spincnt, 1);
581 			if (!MUTEX_OWNED(owner))
582 				continue;
583 		}
584 #endif
585 
586 		ts = turnstile_lookup(mtx);
587 
588 		/*
589 		 * Once we have the turnstile chain interlock, mark the
590 		 * mutex as having waiters.  If that fails, spin again:
591 		 * chances are that the mutex has been released.
592 		 */
593 		if (!MUTEX_SET_WAITERS(mtx, owner)) {
594 			turnstile_exit(mtx);
595 			owner = mtx->mtx_owner;
596 			continue;
597 		}
598 
599 #ifdef MULTIPROCESSOR
600 		/*
601 		 * mutex_exit() is permitted to release the mutex without
602 		 * any interlocking instructions, and the following can
603 		 * occur as a result:
604 		 *
605 		 *  CPU 1: MUTEX_SET_WAITERS()      CPU2: mutex_exit()
606 		 * ---------------------------- ----------------------------
607 		 *		..		    acquire cache line
608 		 *		..                   test for waiters
609 		 *	acquire cache line    <-      lose cache line
610 		 *	 lock cache line	           ..
611 		 *     verify mutex is held                ..
612 		 *	    set waiters  	           ..
613 		 *	 unlock cache line		   ..
614 		 *	  lose cache line     ->    acquire cache line
615 		 *		..	          clear lock word, waiters
616 		 *	  return success
617 		 *
618 		 * There is another race that can occur: a third CPU could
619 		 * acquire the mutex as soon as it is released.  Since
620 		 * adaptive mutexes are primarily spin mutexes, this is not
621 		 * something that we need to worry about too much.  What we
622 		 * do need to ensure is that the waiters bit gets set.
623 		 *
624 		 * To allow the unlocked release, we need to make some
625 		 * assumptions here:
626 		 *
627 		 * o Release is the only non-atomic/unlocked operation
628 		 *   that can be performed on the mutex.  (It must still
629 		 *   be atomic on the local CPU, e.g. in case interrupted
630 		 *   or preempted).
631 		 *
632 		 * o At any given time, MUTEX_SET_WAITERS() can only ever
633 		 *   be in progress on one CPU in the system - guaranteed
634 		 *   by the turnstile chain lock.
635 		 *
636 		 * o No other operations other than MUTEX_SET_WAITERS()
637 		 *   and release can modify a mutex with a non-zero
638 		 *   owner field.
639 		 *
640 		 * o The result of a successful MUTEX_SET_WAITERS() call
641 		 *   is an unbuffered write that is immediately visible
642 		 *   to all other processors in the system.
643 		 *
644 		 * o If the holding LWP switches away, it posts a store
645 		 *   fence before changing curlwp, ensuring that any
646 		 *   overwrite of the mutex waiters flag by mutex_exit()
647 		 *   completes before the modification of curlwp becomes
648 		 *   visible to this CPU.
649 		 *
650 		 * o mi_switch() posts a store fence before setting curlwp
651 		 *   and before resuming execution of an LWP.
652 		 *
653 		 * o _kernel_lock() posts a store fence before setting
654 		 *   curcpu()->ci_biglock_wanted, and after clearing it.
655 		 *   This ensures that any overwrite of the mutex waiters
656 		 *   flag by mutex_exit() completes before the modification
657 		 *   of ci_biglock_wanted becomes visible.
658 		 *
659 		 * We now post a read memory barrier (after setting the
660 		 * waiters field) and check the lock holder's status again.
661 		 * Some of the possible outcomes (not an exhaustive list):
662 		 *
663 		 * 1. The on-CPU check returns true: the holding LWP is
664 		 *    running again.  The lock may be released soon and
665 		 *    we should spin.  Importantly, we can't trust the
666 		 *    value of the waiters flag.
667 		 *
668 		 * 2. The on-CPU check returns false: the holding LWP is
669 		 *    not running.  We now have the opportunity to check
670 		 *    if mutex_exit() has blatted the modifications made
671 		 *    by MUTEX_SET_WAITERS().
672 		 *
673 		 * 3. The on-CPU check returns false: the holding LWP may
674 		 *    or may not be running.  It has context switched at
675 		 *    some point during our check.  Again, we have the
676 		 *    chance to see if the waiters bit is still set or
677 		 *    has been overwritten.
678 		 *
679 		 * 4. The on-CPU check returns false: the holding LWP is
680 		 *    running on a CPU, but wants the big lock.  It's OK
681 		 *    to check the waiters field in this case.
682 		 *
683 		 * 5. The has-waiters check fails: the mutex has been
684 		 *    released, the waiters flag cleared and another LWP
685 		 *    now owns the mutex.
686 		 *
687 		 * 6. The has-waiters check fails: the mutex has been
688 		 *    released.
689 		 *
690 		 * If the waiters bit is not set it's unsafe to go asleep,
691 		 * as we might never be awoken.
692 		 */
693 		if ((membar_consumer(), mutex_oncpu(owner)) ||
694 		    (membar_consumer(), !MUTEX_HAS_WAITERS(mtx))) {
695 			turnstile_exit(mtx);
696 			owner = mtx->mtx_owner;
697 			continue;
698 		}
699 #endif	/* MULTIPROCESSOR */
700 
701 		LOCKSTAT_START_TIMER(lsflag, slptime);
702 
703 		turnstile_block(ts, TS_WRITER_Q, mtx, &mutex_syncobj);
704 
705 		LOCKSTAT_STOP_TIMER(lsflag, slptime);
706 		LOCKSTAT_COUNT(slpcnt, 1);
707 
708 		owner = mtx->mtx_owner;
709 	}
710 	KPREEMPT_ENABLE(curlwp);
711 
712 	LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SLEEP1,
713 	    slpcnt, slptime);
714 	LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SPIN,
715 	    spincnt, spintime);
716 	LOCKSTAT_EXIT(lsflag);
717 
718 	MUTEX_DASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread);
719 	MUTEX_LOCKED(mtx);
720 }
721 
722 /*
723  * mutex_vector_exit:
724  *
725  *	Support routine for mutex_exit() that handles all cases.
726  */
727 void
728 mutex_vector_exit(kmutex_t *mtx)
729 {
730 	turnstile_t *ts;
731 	uintptr_t curthread;
732 
733 	if (MUTEX_SPIN_P(mtx)) {
734 #ifdef FULL
735 		if (__predict_false(!MUTEX_SPINBIT_LOCKED_P(mtx))) {
736 #if MUTEX_PANIC_SKIP_SPIN
737 			if (panicstr != NULL)
738 				return;
739 #endif
740 			MUTEX_ABORT(mtx, "exiting unheld spin mutex");
741 		}
742 		MUTEX_UNLOCKED(mtx);
743 		MUTEX_SPINBIT_LOCK_UNLOCK(mtx);
744 #endif
745 		MUTEX_SPIN_SPLRESTORE(mtx);
746 		return;
747 	}
748 
749 #ifdef MUTEX_PANIC_SKIP_ADAPTIVE
750 	if (__predict_false((uintptr_t)panicstr | cold)) {
751 		MUTEX_UNLOCKED(mtx);
752 		MUTEX_RELEASE(mtx);
753 		return;
754 	}
755 #endif
756 
757 	curthread = (uintptr_t)curlwp;
758 	MUTEX_DASSERT(mtx, curthread != 0);
759 	MUTEX_ASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread);
760 	MUTEX_UNLOCKED(mtx);
761 #if !defined(LOCKDEBUG)
762 	__USE(curthread);
763 #endif
764 
765 #ifdef LOCKDEBUG
766 	/*
767 	 * Avoid having to take the turnstile chain lock every time
768 	 * around.  Raise the priority level to splhigh() in order
769 	 * to disable preemption and so make the following atomic.
770 	 */
771 	{
772 		int s = splhigh();
773 		if (!MUTEX_HAS_WAITERS(mtx)) {
774 			MUTEX_RELEASE(mtx);
775 			splx(s);
776 			return;
777 		}
778 		splx(s);
779 	}
780 #endif
781 
782 	/*
783 	 * Get this lock's turnstile.  This gets the interlock on
784 	 * the sleep queue.  Once we have that, we can clear the
785 	 * lock.  If there was no turnstile for the lock, there
786 	 * were no waiters remaining.
787 	 */
788 	ts = turnstile_lookup(mtx);
789 
790 	if (ts == NULL) {
791 		MUTEX_RELEASE(mtx);
792 		turnstile_exit(mtx);
793 	} else {
794 		MUTEX_RELEASE(mtx);
795 		turnstile_wakeup(ts, TS_WRITER_Q,
796 		    TS_WAITERS(ts, TS_WRITER_Q), NULL);
797 	}
798 }
799 
800 #ifndef __HAVE_SIMPLE_MUTEXES
801 /*
802  * mutex_wakeup:
803  *
804  *	Support routine for mutex_exit() that wakes up all waiters.
805  *	We assume that the mutex has been released, but it need not
806  *	be.
807  */
808 void
809 mutex_wakeup(kmutex_t *mtx)
810 {
811 	turnstile_t *ts;
812 
813 	ts = turnstile_lookup(mtx);
814 	if (ts == NULL) {
815 		turnstile_exit(mtx);
816 		return;
817 	}
818 	MUTEX_CLEAR_WAITERS(mtx);
819 	turnstile_wakeup(ts, TS_WRITER_Q, TS_WAITERS(ts, TS_WRITER_Q), NULL);
820 }
821 #endif	/* !__HAVE_SIMPLE_MUTEXES */
822 
823 /*
824  * mutex_owned:
825  *
826  *	Return true if the current LWP (adaptive) or CPU (spin)
827  *	holds the mutex.
828  */
829 int
830 mutex_owned(const kmutex_t *mtx)
831 {
832 
833 	if (mtx == NULL)
834 		return 0;
835 	if (MUTEX_ADAPTIVE_P(mtx))
836 		return MUTEX_OWNER(mtx->mtx_owner) == (uintptr_t)curlwp;
837 #ifdef FULL
838 	return MUTEX_SPINBIT_LOCKED_P(mtx);
839 #else
840 	return 1;
841 #endif
842 }
843 
844 /*
845  * mutex_owner:
846  *
847  *	Return the current owner of an adaptive mutex.  Used for
848  *	priority inheritance.
849  */
850 lwp_t *
851 mutex_owner(const kmutex_t *mtx)
852 {
853 
854 	MUTEX_ASSERT(mtx, MUTEX_ADAPTIVE_P(mtx));
855 	return (struct lwp *)MUTEX_OWNER(mtx->mtx_owner);
856 }
857 
858 /*
859  * mutex_ownable:
860  *
861  *	When compiled with DEBUG and LOCKDEBUG defined, ensure that
862  *	the mutex is available.  We cannot use !mutex_owned() since
863  *	that won't work correctly for spin mutexes.
864  */
865 int
866 mutex_ownable(const kmutex_t *mtx)
867 {
868 
869 #ifdef LOCKDEBUG
870 	MUTEX_TESTLOCK(mtx);
871 #endif
872 	return 1;
873 }
874 
875 /*
876  * mutex_tryenter:
877  *
878  *	Try to acquire the mutex; return non-zero if we did.
879  */
880 int
881 mutex_tryenter(kmutex_t *mtx)
882 {
883 	uintptr_t curthread;
884 
885 	/*
886 	 * Handle spin mutexes.
887 	 */
888 	if (MUTEX_SPIN_P(mtx)) {
889 		MUTEX_SPIN_SPLRAISE(mtx);
890 #ifdef FULL
891 		if (MUTEX_SPINBIT_LOCK_TRY(mtx)) {
892 			MUTEX_WANTLOCK(mtx);
893 			MUTEX_LOCKED(mtx);
894 			return 1;
895 		}
896 		MUTEX_SPIN_SPLRESTORE(mtx);
897 #else
898 		MUTEX_WANTLOCK(mtx);
899 		MUTEX_LOCKED(mtx);
900 		return 1;
901 #endif
902 	} else {
903 		curthread = (uintptr_t)curlwp;
904 		MUTEX_ASSERT(mtx, curthread != 0);
905 		if (MUTEX_ACQUIRE(mtx, curthread)) {
906 			MUTEX_WANTLOCK(mtx);
907 			MUTEX_LOCKED(mtx);
908 			MUTEX_DASSERT(mtx,
909 			    MUTEX_OWNER(mtx->mtx_owner) == curthread);
910 			return 1;
911 		}
912 	}
913 
914 	return 0;
915 }
916 
917 #if defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL)
918 /*
919  * mutex_spin_retry:
920  *
921  *	Support routine for mutex_spin_enter().  Assumes that the caller
922  *	has already raised the SPL, and adjusted counters.
923  */
924 void
925 mutex_spin_retry(kmutex_t *mtx)
926 {
927 #ifdef MULTIPROCESSOR
928 	u_int count;
929 	LOCKSTAT_TIMER(spintime);
930 	LOCKSTAT_FLAG(lsflag);
931 #ifdef LOCKDEBUG
932 	u_int spins = 0;
933 #endif	/* LOCKDEBUG */
934 
935 	MUTEX_WANTLOCK(mtx);
936 
937 	LOCKSTAT_ENTER(lsflag);
938 	LOCKSTAT_START_TIMER(lsflag, spintime);
939 	count = SPINLOCK_BACKOFF_MIN;
940 
941 	/*
942 	 * Spin testing the lock word and do exponential backoff
943 	 * to reduce cache line ping-ponging between CPUs.
944 	 */
945 	do {
946 #if MUTEX_PANIC_SKIP_SPIN
947 		if (panicstr != NULL)
948 			break;
949 #endif
950 		while (MUTEX_SPINBIT_LOCKED_P(mtx)) {
951 			SPINLOCK_BACKOFF(count);
952 #ifdef LOCKDEBUG
953 			if (SPINLOCK_SPINOUT(spins))
954 				MUTEX_ABORT(mtx, "spinout");
955 #endif	/* LOCKDEBUG */
956 		}
957 	} while (!MUTEX_SPINBIT_LOCK_TRY(mtx));
958 
959 	LOCKSTAT_STOP_TIMER(lsflag, spintime);
960 	LOCKSTAT_EVENT(lsflag, mtx, LB_SPIN_MUTEX | LB_SPIN, 1, spintime);
961 	LOCKSTAT_EXIT(lsflag);
962 
963 	MUTEX_LOCKED(mtx);
964 #else	/* MULTIPROCESSOR */
965 	MUTEX_ABORT(mtx, "locking against myself");
966 #endif	/* MULTIPROCESSOR */
967 }
968 #endif	/* defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL) */
969