xref: /netbsd-src/sys/kern/kern_rwlock.c (revision 4b71a66d0f279143147d63ebfcfd8a59499a3684)
1 /*	$NetBSD: kern_rwlock.c,v 1.24 2008/05/19 17:06:02 ad Exp $	*/
2 
3 /*-
4  * Copyright (c) 2002, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe and Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Kernel reader/writer lock implementation, modeled after those
34  * found in Solaris, a description of which can be found in:
35  *
36  *	Solaris Internals: Core Kernel Architecture, Jim Mauro and
37  *	    Richard McDougall.
38  */
39 
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.24 2008/05/19 17:06:02 ad Exp $");
42 
43 #define	__RWLOCK_PRIVATE
44 
45 #include <sys/param.h>
46 #include <sys/proc.h>
47 #include <sys/rwlock.h>
48 #include <sys/sched.h>
49 #include <sys/sleepq.h>
50 #include <sys/systm.h>
51 #include <sys/lockdebug.h>
52 #include <sys/cpu.h>
53 #include <sys/atomic.h>
54 #include <sys/lock.h>
55 
56 #include <dev/lockstat.h>
57 
58 /*
59  * LOCKDEBUG
60  */
61 
62 #if defined(LOCKDEBUG)
63 
64 #define	RW_WANTLOCK(rw, op, t)						\
65 	LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw),			\
66 	    (uintptr_t)__builtin_return_address(0), op == RW_READER, t);
67 #define	RW_LOCKED(rw, op)						\
68 	LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw),				\
69 	    (uintptr_t)__builtin_return_address(0), op == RW_READER);
70 #define	RW_UNLOCKED(rw, op)						\
71 	LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw),			\
72 	    (uintptr_t)__builtin_return_address(0), op == RW_READER);
73 #define	RW_DASSERT(rw, cond)						\
74 do {									\
75 	if (!(cond))							\
76 		rw_abort(rw, __func__, "assertion failed: " #cond);	\
77 } while (/* CONSTCOND */ 0);
78 
79 #else	/* LOCKDEBUG */
80 
81 #define	RW_WANTLOCK(rw, op, t)	/* nothing */
82 #define	RW_LOCKED(rw, op)	/* nothing */
83 #define	RW_UNLOCKED(rw, op)	/* nothing */
84 #define	RW_DASSERT(rw, cond)	/* nothing */
85 
86 #endif	/* LOCKDEBUG */
87 
88 /*
89  * DIAGNOSTIC
90  */
91 
92 #if defined(DIAGNOSTIC)
93 
94 #define	RW_ASSERT(rw, cond)						\
95 do {									\
96 	if (!(cond))							\
97 		rw_abort(rw, __func__, "assertion failed: " #cond);	\
98 } while (/* CONSTCOND */ 0)
99 
100 #else
101 
102 #define	RW_ASSERT(rw, cond)	/* nothing */
103 
104 #endif	/* DIAGNOSTIC */
105 
106 #define	RW_SETDEBUG(rw, on)		((rw)->rw_owner |= (on) ? RW_DEBUG : 0)
107 #define	RW_DEBUG_P(rw)			(((rw)->rw_owner & RW_DEBUG) != 0)
108 #if defined(LOCKDEBUG)
109 #define	RW_INHERITDEBUG(new, old)	(new) |= (old) & RW_DEBUG
110 #else /* defined(LOCKDEBUG) */
111 #define	RW_INHERITDEBUG(new, old)	/* nothing */
112 #endif /* defined(LOCKDEBUG) */
113 
114 static void	rw_abort(krwlock_t *, const char *, const char *);
115 static void	rw_dump(volatile void *);
116 static lwp_t	*rw_owner(wchan_t);
117 
118 static inline uintptr_t
119 rw_cas(krwlock_t *rw, uintptr_t o, uintptr_t n)
120 {
121 
122 	RW_INHERITDEBUG(n, o);
123 	return (uintptr_t)atomic_cas_ptr((volatile void *)&rw->rw_owner,
124 	    (void *)o, (void *)n);
125 }
126 
127 static inline void
128 rw_swap(krwlock_t *rw, uintptr_t o, uintptr_t n)
129 {
130 
131 	RW_INHERITDEBUG(n, o);
132 	n = (uintptr_t)atomic_swap_ptr((volatile void *)&rw->rw_owner,
133 	    (void *)n);
134 	RW_DASSERT(rw, n == o);
135 }
136 
137 /*
138  * For platforms that do not provide stubs, or for the LOCKDEBUG case.
139  */
140 #ifdef LOCKDEBUG
141 #undef	__HAVE_RW_STUBS
142 #endif
143 
144 #ifndef __HAVE_RW_STUBS
145 __strong_alias(rw_enter,rw_vector_enter);
146 __strong_alias(rw_exit,rw_vector_exit);
147 __strong_alias(rw_tryenter,rw_vector_tryenter);
148 #endif
149 
150 lockops_t rwlock_lockops = {
151 	"Reader / writer lock",
152 	1,
153 	rw_dump
154 };
155 
156 syncobj_t rw_syncobj = {
157 	SOBJ_SLEEPQ_SORTED,
158 	turnstile_unsleep,
159 	turnstile_changepri,
160 	sleepq_lendpri,
161 	rw_owner,
162 };
163 
164 /*
165  * rw_dump:
166  *
167  *	Dump the contents of a rwlock structure.
168  */
169 static void
170 rw_dump(volatile void *cookie)
171 {
172 	volatile krwlock_t *rw = cookie;
173 
174 	printf_nolog("owner/count  : %#018lx flags    : %#018x\n",
175 	    (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
176 }
177 
178 /*
179  * rw_abort:
180  *
181  *	Dump information about an error and panic the system.  This
182  *	generates a lot of machine code in the DIAGNOSTIC case, so
183  *	we ask the compiler to not inline it.
184  */
185 #if __GNUC_PREREQ__(3, 0)
186 __attribute ((noinline))
187 #endif
188 static void
189 rw_abort(krwlock_t *rw, const char *func, const char *msg)
190 {
191 
192 	if (panicstr != NULL)
193 		return;
194 
195 	LOCKDEBUG_ABORT(rw, &rwlock_lockops, func, msg);
196 }
197 
198 /*
199  * rw_init:
200  *
201  *	Initialize a rwlock for use.
202  */
203 void
204 rw_init(krwlock_t *rw)
205 {
206 	bool dodebug;
207 
208 	memset(rw, 0, sizeof(*rw));
209 
210 	dodebug = LOCKDEBUG_ALLOC(rw, &rwlock_lockops,
211 	    (uintptr_t)__builtin_return_address(0));
212 	RW_SETDEBUG(rw, dodebug);
213 }
214 
215 /*
216  * rw_destroy:
217  *
218  *	Tear down a rwlock.
219  */
220 void
221 rw_destroy(krwlock_t *rw)
222 {
223 
224 	RW_ASSERT(rw, (rw->rw_owner & ~RW_DEBUG) == 0);
225 	LOCKDEBUG_FREE(RW_DEBUG_P(rw), rw);
226 }
227 
228 /*
229  * rw_onproc:
230  *
231  *	Return true if an rwlock owner is running on a CPU in the system.
232  *	If the target is waiting on the kernel big lock, then we must
233  *	release it.  This is necessary to avoid deadlock.
234  *
235  *	Note that we can't use the rwlock owner field as an LWP pointer.  We
236  *	don't have full control over the timing of our execution, and so the
237  *	pointer could be completely invalid by the time we dereference it.
238  */
239 static int
240 rw_onproc(uintptr_t owner, struct cpu_info **cip)
241 {
242 #ifdef MULTIPROCESSOR
243 	CPU_INFO_ITERATOR cii;
244 	struct cpu_info *ci;
245 	lwp_t *l;
246 
247 	if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED)
248 		return 0;
249 	l = (lwp_t *)(owner & RW_THREAD);
250 
251 	/* See if the target is running on a CPU somewhere. */
252 	if ((ci = *cip) != NULL && ci->ci_curlwp == l)
253 		goto run;
254 	for (CPU_INFO_FOREACH(cii, ci))
255 		if (ci->ci_curlwp == l)
256 			goto run;
257 
258 	/* No: it may be safe to block now. */
259 	*cip = NULL;
260 	return 0;
261 
262  run:
263  	/* Target is running; do we need to block? */
264  	*cip = ci;
265 	return ci->ci_biglock_wanted != l;
266 #else
267 	return 0;
268 #endif	/* MULTIPROCESSOR */
269 }
270 
271 /*
272  * rw_vector_enter:
273  *
274  *	Acquire a rwlock.
275  */
276 void
277 rw_vector_enter(krwlock_t *rw, const krw_t op)
278 {
279 	uintptr_t owner, incr, need_wait, set_wait, curthread, next;
280 	struct cpu_info *ci;
281 	turnstile_t *ts;
282 	int queue;
283 	lwp_t *l;
284 	LOCKSTAT_TIMER(slptime);
285 	LOCKSTAT_TIMER(slpcnt);
286 	LOCKSTAT_TIMER(spintime);
287 	LOCKSTAT_COUNTER(spincnt);
288 	LOCKSTAT_FLAG(lsflag);
289 
290 	l = curlwp;
291 	curthread = (uintptr_t)l;
292 
293 	RW_ASSERT(rw, !cpu_intr_p());
294 	RW_ASSERT(rw, curthread != 0);
295 	RW_WANTLOCK(rw, op, false);
296 
297 	if (panicstr == NULL) {
298 		LOCKDEBUG_BARRIER(&kernel_lock, 1);
299 	}
300 
301 	/*
302 	 * We play a slight trick here.  If we're a reader, we want
303 	 * increment the read count.  If we're a writer, we want to
304 	 * set the owner field and whe WRITE_LOCKED bit.
305 	 *
306 	 * In the latter case, we expect those bits to be zero,
307 	 * therefore we can use an add operation to set them, which
308 	 * means an add operation for both cases.
309 	 */
310 	if (__predict_true(op == RW_READER)) {
311 		incr = RW_READ_INCR;
312 		set_wait = RW_HAS_WAITERS;
313 		need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
314 		queue = TS_READER_Q;
315 	} else {
316 		RW_DASSERT(rw, op == RW_WRITER);
317 		incr = curthread | RW_WRITE_LOCKED;
318 		set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
319 		need_wait = RW_WRITE_LOCKED | RW_THREAD;
320 		queue = TS_WRITER_Q;
321 	}
322 
323 	LOCKSTAT_ENTER(lsflag);
324 
325 	for (ci = NULL, owner = rw->rw_owner;;) {
326 		/*
327 		 * Read the lock owner field.  If the need-to-wait
328 		 * indicator is clear, then try to acquire the lock.
329 		 */
330 		if ((owner & need_wait) == 0) {
331 			next = rw_cas(rw, owner, (owner + incr) &
332 			    ~RW_WRITE_WANTED);
333 			if (__predict_true(next == owner)) {
334 				/* Got it! */
335 #ifndef __HAVE_ATOMIC_AS_MEMBAR
336 				membar_enter();
337 #endif
338 				break;
339 			}
340 
341 			/*
342 			 * Didn't get it -- spin around again (we'll
343 			 * probably sleep on the next iteration).
344 			 */
345 			owner = next;
346 			continue;
347 		}
348 
349 		if (__predict_false(panicstr != NULL))
350 			return;
351 		if (__predict_false(RW_OWNER(rw) == curthread))
352 			rw_abort(rw, __func__, "locking against myself");
353 
354 		/*
355 		 * If the lock owner is running on another CPU, and
356 		 * there are no existing waiters, then spin.
357 		 */
358 		if (rw_onproc(owner, &ci)) {
359 			LOCKSTAT_START_TIMER(lsflag, spintime);
360 			u_int count = SPINLOCK_BACKOFF_MIN;
361 			do {
362 				SPINLOCK_BACKOFF(count);
363 				owner = rw->rw_owner;
364 			} while (rw_onproc(owner, &ci));
365 			LOCKSTAT_STOP_TIMER(lsflag, spintime);
366 			LOCKSTAT_COUNT(spincnt, 1);
367 			if ((owner & need_wait) == 0)
368 				continue;
369 		}
370 
371 		/*
372 		 * Grab the turnstile chain lock.  Once we have that, we
373 		 * can adjust the waiter bits and sleep queue.
374 		 */
375 		ts = turnstile_lookup(rw);
376 
377 		/*
378 		 * Mark the rwlock as having waiters.  If the set fails,
379 		 * then we may not need to sleep and should spin again.
380 		 * Reload rw_owner because turnstile_lookup() may have
381 		 * spun on the turnstile chain lock.
382 		 */
383 		owner = rw->rw_owner;
384 		if ((owner & need_wait) == 0 || rw_onproc(owner, &ci)) {
385 			turnstile_exit(rw);
386 			continue;
387 		}
388 		next = rw_cas(rw, owner, owner | set_wait);
389 		if (__predict_false(next != owner)) {
390 			turnstile_exit(rw);
391 			owner = next;
392 			continue;
393 		}
394 
395 		LOCKSTAT_START_TIMER(lsflag, slptime);
396 		turnstile_block(ts, queue, rw, &rw_syncobj);
397 		LOCKSTAT_STOP_TIMER(lsflag, slptime);
398 		LOCKSTAT_COUNT(slpcnt, 1);
399 
400 		/*
401 		 * No need for a memory barrier because of context switch.
402 		 * If not handed the lock, then spin again.
403 		 */
404 		if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread)
405 			break;
406 	}
407 
408 	LOCKSTAT_EVENT(lsflag, rw, LB_RWLOCK |
409 	    (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime);
410 	LOCKSTAT_EVENT(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime);
411 	LOCKSTAT_EXIT(lsflag);
412 
413 	RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
414 	    (op == RW_READER && RW_COUNT(rw) != 0));
415 	RW_LOCKED(rw, op);
416 }
417 
418 /*
419  * rw_vector_exit:
420  *
421  *	Release a rwlock.
422  */
423 void
424 rw_vector_exit(krwlock_t *rw)
425 {
426 	uintptr_t curthread, owner, decr, new, next;
427 	turnstile_t *ts;
428 	int rcnt, wcnt;
429 	lwp_t *l;
430 
431 	curthread = (uintptr_t)curlwp;
432 	RW_ASSERT(rw, curthread != 0);
433 
434 	if (__predict_false(panicstr != NULL))
435 		return;
436 
437 	/*
438 	 * Again, we use a trick.  Since we used an add operation to
439 	 * set the required lock bits, we can use a subtract to clear
440 	 * them, which makes the read-release and write-release path
441 	 * the same.
442 	 */
443 	owner = rw->rw_owner;
444 	if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
445 		RW_UNLOCKED(rw, RW_WRITER);
446 		RW_ASSERT(rw, RW_OWNER(rw) == curthread);
447 		decr = curthread | RW_WRITE_LOCKED;
448 	} else {
449 		RW_UNLOCKED(rw, RW_READER);
450 		RW_ASSERT(rw, RW_COUNT(rw) != 0);
451 		decr = RW_READ_INCR;
452 	}
453 
454 	/*
455 	 * Compute what we expect the new value of the lock to be. Only
456 	 * proceed to do direct handoff if there are waiters, and if the
457 	 * lock would become unowned.
458 	 */
459 #ifndef __HAVE_ATOMIC_AS_MEMBAR
460 	membar_exit();
461 #endif
462 	for (;;) {
463 		new = (owner - decr);
464 		if ((new & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS)
465 			break;
466 		next = rw_cas(rw, owner, new);
467 		if (__predict_true(next == owner))
468 			return;
469 		owner = next;
470 	}
471 
472 	/*
473 	 * Grab the turnstile chain lock.  This gets the interlock
474 	 * on the sleep queue.  Once we have that, we can adjust the
475 	 * waiter bits.
476 	 */
477 	ts = turnstile_lookup(rw);
478 	owner = rw->rw_owner;
479 	RW_DASSERT(rw, ts != NULL);
480 	RW_DASSERT(rw, (owner & RW_HAS_WAITERS) != 0);
481 
482 	wcnt = TS_WAITERS(ts, TS_WRITER_Q);
483 	rcnt = TS_WAITERS(ts, TS_READER_Q);
484 
485 	/*
486 	 * Give the lock away.
487 	 *
488 	 * If we are releasing a write lock, then prefer to wake all
489 	 * outstanding readers.  Otherwise, wake one writer if there
490 	 * are outstanding readers, or all writers if there are no
491 	 * pending readers.  If waking one specific writer, the writer
492 	 * is handed the lock here.  If waking multiple writers, we
493 	 * set WRITE_WANTED to block out new readers, and let them
494 	 * do the work of acquring the lock in rw_vector_enter().
495 	 */
496 	if (rcnt == 0 || (decr == RW_READ_INCR && wcnt != 0)) {
497 		RW_DASSERT(rw, wcnt != 0);
498 		RW_DASSERT(rw, (owner & RW_WRITE_WANTED) != 0);
499 
500 		if (rcnt != 0) {
501 			/* Give the lock to the longest waiting writer. */
502 			l = TS_FIRST(ts, TS_WRITER_Q);
503 			new = (uintptr_t)l | RW_WRITE_LOCKED | RW_HAS_WAITERS;
504 			if (wcnt != 0)
505 				new |= RW_WRITE_WANTED;
506 			rw_swap(rw, owner, new);
507 			turnstile_wakeup(ts, TS_WRITER_Q, 1, l);
508 		} else {
509 			/* Wake all writers and let them fight it out. */
510 			rw_swap(rw, owner, RW_WRITE_WANTED);
511 			turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL);
512 		}
513 	} else {
514 		RW_DASSERT(rw, rcnt != 0);
515 
516 		/*
517 		 * Give the lock to all blocked readers.  If there
518 		 * is a writer waiting, new readers that arrive
519 		 * after the release will be blocked out.
520 		 */
521 		new = rcnt << RW_READ_COUNT_SHIFT;
522 		if (wcnt != 0)
523 			new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
524 
525 		/* Wake up all sleeping readers. */
526 		rw_swap(rw, owner, new);
527 		turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
528 	}
529 }
530 
531 /*
532  * rw_vector_tryenter:
533  *
534  *	Try to acquire a rwlock.
535  */
536 int
537 rw_vector_tryenter(krwlock_t *rw, const krw_t op)
538 {
539 	uintptr_t curthread, owner, incr, need_wait, next;
540 
541 	curthread = (uintptr_t)curlwp;
542 
543 	RW_ASSERT(rw, curthread != 0);
544 
545 	if (op == RW_READER) {
546 		incr = RW_READ_INCR;
547 		need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
548 	} else {
549 		RW_DASSERT(rw, op == RW_WRITER);
550 		incr = curthread | RW_WRITE_LOCKED;
551 		need_wait = RW_WRITE_LOCKED | RW_THREAD;
552 	}
553 
554 	for (owner = rw->rw_owner;; owner = next) {
555 		owner = rw->rw_owner;
556 		if (__predict_false((owner & need_wait) != 0))
557 			return 0;
558 		next = rw_cas(rw, owner, owner + incr);
559 		if (__predict_true(next == owner)) {
560 			/* Got it! */
561 			break;
562 		}
563 	}
564 
565 #ifndef __HAVE_ATOMIC_AS_MEMBAR
566 	membar_enter();
567 #endif
568 	RW_WANTLOCK(rw, op, true);
569 	RW_LOCKED(rw, op);
570 	RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
571 	    (op == RW_READER && RW_COUNT(rw) != 0));
572 
573 	return 1;
574 }
575 
576 /*
577  * rw_downgrade:
578  *
579  *	Downgrade a write lock to a read lock.
580  */
581 void
582 rw_downgrade(krwlock_t *rw)
583 {
584 	uintptr_t owner, curthread, new, next;
585 	turnstile_t *ts;
586 	int rcnt, wcnt;
587 
588 	curthread = (uintptr_t)curlwp;
589 	RW_ASSERT(rw, curthread != 0);
590 	RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
591 	RW_ASSERT(rw, RW_OWNER(rw) == curthread);
592 	RW_UNLOCKED(rw, RW_WRITER);
593 
594 #ifndef __HAVE_ATOMIC_AS_MEMBAR
595 	membar_producer();
596 #endif
597 
598 	owner = rw->rw_owner;
599 	if ((owner & RW_HAS_WAITERS) == 0) {
600 		/*
601 		 * There are no waiters, so we can do this the easy way.
602 		 * Try swapping us down to one read hold.  If it fails, the
603 		 * lock condition has changed and we most likely now have
604 		 * waiters.
605 		 */
606 		next = rw_cas(rw, owner, RW_READ_INCR);
607 		if (__predict_true(next == owner)) {
608 			RW_LOCKED(rw, RW_READER);
609 			RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
610 			RW_DASSERT(rw, RW_COUNT(rw) != 0);
611 			return;
612 		}
613 		owner = next;
614 	}
615 
616 	/*
617 	 * Grab the turnstile chain lock.  This gets the interlock
618 	 * on the sleep queue.  Once we have that, we can adjust the
619 	 * waiter bits.
620 	 */
621 	for (;; owner = next) {
622 		ts = turnstile_lookup(rw);
623 		RW_DASSERT(rw, ts != NULL);
624 
625 		rcnt = TS_WAITERS(ts, TS_READER_Q);
626 		wcnt = TS_WAITERS(ts, TS_WRITER_Q);
627 
628 		/*
629 		 * If there are no readers, just preserve the waiters
630 		 * bits, swap us down to one read hold and return.
631 		 */
632 		if (rcnt == 0) {
633 			RW_DASSERT(rw, wcnt != 0);
634 			RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
635 			RW_DASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
636 
637 			new = RW_READ_INCR | RW_HAS_WAITERS | RW_WRITE_WANTED;
638 			next = rw_cas(rw, owner, new);
639 			turnstile_exit(ts);
640 			if (__predict_true(next == owner))
641 				break;
642 		} else {
643 			/*
644 			 * Give the lock to all blocked readers.  We may
645 			 * retain one read hold if downgrading.  If there
646 			 * is a writer waiting, new readers will be blocked
647 			 * out.
648 			 */
649 			new = (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR;
650 			if (wcnt != 0)
651 				new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
652 
653 			next = rw_cas(rw, owner, new);
654 			if (__predict_true(next == owner)) {
655 				/* Wake up all sleeping readers. */
656 				turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
657 				break;
658 			}
659 			turnstile_exit(ts);
660 		}
661 	}
662 
663 	RW_LOCKED(rw, RW_READER);
664 	RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
665 	RW_DASSERT(rw, RW_COUNT(rw) != 0);
666 }
667 
668 /*
669  * rw_tryupgrade:
670  *
671  *	Try to upgrade a read lock to a write lock.  We must be the
672  *	only reader.
673  */
674 int
675 rw_tryupgrade(krwlock_t *rw)
676 {
677 	uintptr_t owner, curthread, new, next;
678 
679 	curthread = (uintptr_t)curlwp;
680 	RW_ASSERT(rw, curthread != 0);
681 	RW_WANTLOCK(rw, RW_WRITER, true);
682 
683 	for (owner = rw->rw_owner;; owner = next) {
684 		RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0);
685 		if (__predict_false((owner & RW_THREAD) != RW_READ_INCR)) {
686 			RW_ASSERT(rw, (owner & RW_THREAD) != 0);
687 			return 0;
688 		}
689 		new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
690 		next = rw_cas(rw, owner, new);
691 		if (__predict_true(next == owner))
692 			break;
693 	}
694 
695 	RW_UNLOCKED(rw, RW_READER);
696 	RW_LOCKED(rw, RW_WRITER);
697 	RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
698 	RW_DASSERT(rw, RW_OWNER(rw) == curthread);
699 
700 #ifndef __HAVE_ATOMIC_AS_MEMBAR
701 	membar_producer();
702 #endif
703 
704 	return 1;
705 }
706 
707 /*
708  * rw_read_held:
709  *
710  *	Returns true if the rwlock is held for reading.  Must only be
711  *	used for diagnostic assertions, and never be used to make
712  * 	decisions about how to use a rwlock.
713  */
714 int
715 rw_read_held(krwlock_t *rw)
716 {
717 	uintptr_t owner;
718 
719 	if (panicstr != NULL)
720 		return 1;
721 	if (rw == NULL)
722 		return 0;
723 	owner = rw->rw_owner;
724 	return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
725 }
726 
727 /*
728  * rw_write_held:
729  *
730  *	Returns true if the rwlock is held for writing.  Must only be
731  *	used for diagnostic assertions, and never be used to make
732  *	decisions about how to use a rwlock.
733  */
734 int
735 rw_write_held(krwlock_t *rw)
736 {
737 
738 	if (panicstr != NULL)
739 		return 1;
740 	if (rw == NULL)
741 		return 0;
742 	return (rw->rw_owner & (RW_WRITE_LOCKED | RW_THREAD)) ==
743 	    (RW_WRITE_LOCKED | (uintptr_t)curlwp);
744 }
745 
746 /*
747  * rw_lock_held:
748  *
749  *	Returns true if the rwlock is held for reading or writing.  Must
750  *	only be used for diagnostic assertions, and never be used to make
751  *	decisions about how to use a rwlock.
752  */
753 int
754 rw_lock_held(krwlock_t *rw)
755 {
756 
757 	if (panicstr != NULL)
758 		return 1;
759 	if (rw == NULL)
760 		return 0;
761 	return (rw->rw_owner & RW_THREAD) != 0;
762 }
763 
764 /*
765  * rw_owner:
766  *
767  *	Return the current owner of an RW lock, but only if it is write
768  *	held.  Used for priority inheritance.
769  */
770 static lwp_t *
771 rw_owner(wchan_t obj)
772 {
773 	krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */
774 	uintptr_t owner = rw->rw_owner;
775 
776 	if ((owner & RW_WRITE_LOCKED) == 0)
777 		return NULL;
778 
779 	return (void *)(owner & RW_THREAD);
780 }
781