1 /* $NetBSD: kern_rwlock.c,v 1.76 2023/10/15 10:28:48 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020, 2023
5 * The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe and Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Kernel reader/writer lock implementation, modeled after those
35 * found in Solaris, a description of which can be found in:
36 *
37 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
38 * Richard McDougall.
39 *
40 * The NetBSD implementation differs from that described in the book, in
41 * that the locks are partially adaptive. Lock waiters spin wait while a
42 * lock is write held and the holder is still running on a CPU. The method
43 * of choosing which threads to awaken when a lock is released also differs,
44 * mainly to take account of the partially adaptive behaviour.
45 */
46
47 #include <sys/cdefs.h>
48 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.76 2023/10/15 10:28:48 riastradh Exp $");
49
50 #include "opt_lockdebug.h"
51
52 #define __RWLOCK_PRIVATE
53
54 #include <sys/param.h>
55
56 #include <sys/atomic.h>
57 #include <sys/cpu.h>
58 #include <sys/lock.h>
59 #include <sys/lockdebug.h>
60 #include <sys/proc.h>
61 #include <sys/pserialize.h>
62 #include <sys/rwlock.h>
63 #include <sys/sched.h>
64 #include <sys/sleepq.h>
65 #include <sys/syncobj.h>
66 #include <sys/systm.h>
67
68 #include <dev/lockstat.h>
69
70 #include <machine/rwlock.h>
71
72 /*
73 * LOCKDEBUG
74 */
75
76 #define RW_DEBUG_P(rw) (((rw)->rw_owner & RW_NODEBUG) == 0)
77
78 #define RW_WANTLOCK(rw, op) \
79 LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \
80 (uintptr_t)__builtin_return_address(0), op == RW_READER);
81 #define RW_LOCKED(rw, op) \
82 LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \
83 (uintptr_t)__builtin_return_address(0), op == RW_READER);
84 #define RW_UNLOCKED(rw, op) \
85 LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \
86 (uintptr_t)__builtin_return_address(0), op == RW_READER);
87
88 /*
89 * DIAGNOSTIC
90 */
91
92 #if defined(DIAGNOSTIC)
93 #define RW_ASSERT(rw, cond) \
94 do { \
95 if (__predict_false(!(cond))) \
96 rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\
97 } while (/* CONSTCOND */ 0)
98 #else
99 #define RW_ASSERT(rw, cond) /* nothing */
100 #endif /* DIAGNOSTIC */
101
102 /*
103 * For platforms that do not provide stubs, or for the LOCKDEBUG case.
104 */
105 #ifdef LOCKDEBUG
106 #undef __HAVE_RW_STUBS
107 #endif
108
109 #ifndef __HAVE_RW_STUBS
110 __strong_alias(rw_enter,rw_vector_enter);
111 __strong_alias(rw_exit,rw_vector_exit);
112 __strong_alias(rw_tryenter,rw_vector_tryenter);
113 #endif
114
115 static void rw_abort(const char *, size_t, krwlock_t *, const char *);
116 static void rw_dump(const volatile void *, lockop_printer_t);
117 static lwp_t *rw_owner(wchan_t);
118
119 lockops_t rwlock_lockops = {
120 .lo_name = "Reader / writer lock",
121 .lo_type = LOCKOPS_SLEEP,
122 .lo_dump = rw_dump,
123 };
124
125 /*
126 * Give rwlock holders an extra-high priority boost on-blocking due to
127 * direct handoff. XXX To be revisited.
128 */
129 syncobj_t rw_syncobj = {
130 .sobj_name = "rwlock",
131 .sobj_flag = SOBJ_SLEEPQ_SORTED,
132 .sobj_boostpri = PRI_KTHREAD,
133 .sobj_unsleep = turnstile_unsleep,
134 .sobj_changepri = turnstile_changepri,
135 .sobj_lendpri = sleepq_lendpri,
136 .sobj_owner = rw_owner,
137 };
138
139 /*
140 * rw_cas:
141 *
142 * Do an atomic compare-and-swap on the lock word.
143 */
144 static inline uintptr_t
rw_cas(krwlock_t * rw,uintptr_t o,uintptr_t n)145 rw_cas(krwlock_t *rw, uintptr_t o, uintptr_t n)
146 {
147
148 return (uintptr_t)atomic_cas_ptr((volatile void *)&rw->rw_owner,
149 (void *)o, (void *)n);
150 }
151
152 /*
153 * rw_swap:
154 *
155 * Do an atomic swap of the lock word. This is used only when it's
156 * known that the lock word is set up such that it can't be changed
157 * behind us (assert this), so there's no point considering the result.
158 */
159 static inline void
rw_swap(krwlock_t * rw,uintptr_t o,uintptr_t n)160 rw_swap(krwlock_t *rw, uintptr_t o, uintptr_t n)
161 {
162
163 n = (uintptr_t)atomic_swap_ptr((volatile void *)&rw->rw_owner,
164 (void *)n);
165
166 RW_ASSERT(rw, n == o);
167 RW_ASSERT(rw, (o & RW_HAS_WAITERS) != 0);
168 }
169
170 /*
171 * rw_dump:
172 *
173 * Dump the contents of a rwlock structure.
174 */
175 static void
rw_dump(const volatile void * cookie,lockop_printer_t pr)176 rw_dump(const volatile void *cookie, lockop_printer_t pr)
177 {
178 const volatile krwlock_t *rw = cookie;
179
180 pr("owner/count : %#018lx flags : %#018x\n",
181 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
182 }
183
184 /*
185 * rw_abort:
186 *
187 * Dump information about an error and panic the system. This
188 * generates a lot of machine code in the DIAGNOSTIC case, so
189 * we ask the compiler to not inline it.
190 */
191 static void __noinline
rw_abort(const char * func,size_t line,krwlock_t * rw,const char * msg)192 rw_abort(const char *func, size_t line, krwlock_t *rw, const char *msg)
193 {
194
195 if (__predict_false(panicstr != NULL))
196 return;
197
198 LOCKDEBUG_ABORT(func, line, rw, &rwlock_lockops, msg);
199 }
200
201 /*
202 * rw_init:
203 *
204 * Initialize a rwlock for use.
205 */
206 void
_rw_init(krwlock_t * rw,uintptr_t return_address)207 _rw_init(krwlock_t *rw, uintptr_t return_address)
208 {
209
210 #ifdef LOCKDEBUG
211 /* XXX only because the assembly stubs can't handle RW_NODEBUG */
212 if (LOCKDEBUG_ALLOC(rw, &rwlock_lockops, return_address))
213 rw->rw_owner = 0;
214 else
215 rw->rw_owner = RW_NODEBUG;
216 #else
217 rw->rw_owner = 0;
218 #endif
219 }
220
221 void
rw_init(krwlock_t * rw)222 rw_init(krwlock_t *rw)
223 {
224
225 _rw_init(rw, (uintptr_t)__builtin_return_address(0));
226 }
227
228 /*
229 * rw_destroy:
230 *
231 * Tear down a rwlock.
232 */
233 void
rw_destroy(krwlock_t * rw)234 rw_destroy(krwlock_t *rw)
235 {
236
237 RW_ASSERT(rw, (rw->rw_owner & ~RW_NODEBUG) == 0);
238 LOCKDEBUG_FREE((rw->rw_owner & RW_NODEBUG) == 0, rw);
239 }
240
241 /*
242 * rw_oncpu:
243 *
244 * Return true if an rwlock owner is running on a CPU in the system.
245 * If the target is waiting on the kernel big lock, then we must
246 * release it. This is necessary to avoid deadlock.
247 */
248 static bool
rw_oncpu(uintptr_t owner)249 rw_oncpu(uintptr_t owner)
250 {
251 #ifdef MULTIPROCESSOR
252 struct cpu_info *ci;
253 lwp_t *l;
254
255 KASSERT(kpreempt_disabled());
256
257 if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) {
258 return false;
259 }
260
261 /*
262 * See lwp_dtor() why dereference of the LWP pointer is safe.
263 * We must have kernel preemption disabled for that.
264 */
265 l = (lwp_t *)(owner & RW_THREAD);
266 ci = l->l_cpu;
267
268 if (ci && ci->ci_curlwp == l) {
269 /* Target is running; do we need to block? */
270 return (ci->ci_biglock_wanted != l);
271 }
272 #endif
273 /* Not running. It may be safe to block now. */
274 return false;
275 }
276
277 /*
278 * rw_vector_enter:
279 *
280 * Acquire a rwlock.
281 */
282 void
rw_vector_enter(krwlock_t * rw,const krw_t op)283 rw_vector_enter(krwlock_t *rw, const krw_t op)
284 {
285 uintptr_t owner, incr, need_wait, set_wait, curthread, next;
286 turnstile_t *ts;
287 int queue;
288 lwp_t *l;
289 LOCKSTAT_TIMER(slptime);
290 LOCKSTAT_TIMER(slpcnt);
291 LOCKSTAT_TIMER(spintime);
292 LOCKSTAT_COUNTER(spincnt);
293 LOCKSTAT_FLAG(lsflag);
294
295 l = curlwp;
296 curthread = (uintptr_t)l;
297
298 RW_ASSERT(rw, !cpu_intr_p());
299 RW_ASSERT(rw, curthread != 0);
300 RW_WANTLOCK(rw, op);
301
302 if (__predict_true(panicstr == NULL)) {
303 KDASSERT(pserialize_not_in_read_section());
304 LOCKDEBUG_BARRIER(&kernel_lock, 1);
305 }
306
307 /*
308 * We play a slight trick here. If we're a reader, we want
309 * increment the read count. If we're a writer, we want to
310 * set the owner field and the WRITE_LOCKED bit.
311 *
312 * In the latter case, we expect those bits to be zero,
313 * therefore we can use an add operation to set them, which
314 * means an add operation for both cases.
315 */
316 if (__predict_true(op == RW_READER)) {
317 incr = RW_READ_INCR;
318 set_wait = RW_HAS_WAITERS;
319 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
320 queue = TS_READER_Q;
321 } else {
322 RW_ASSERT(rw, op == RW_WRITER);
323 incr = curthread | RW_WRITE_LOCKED;
324 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
325 need_wait = RW_WRITE_LOCKED | RW_THREAD;
326 queue = TS_WRITER_Q;
327 }
328
329 LOCKSTAT_ENTER(lsflag);
330
331 KPREEMPT_DISABLE(curlwp);
332 for (owner = rw->rw_owner;;) {
333 /*
334 * Read the lock owner field. If the need-to-wait
335 * indicator is clear, then try to acquire the lock.
336 */
337 if ((owner & need_wait) == 0) {
338 next = rw_cas(rw, owner, (owner + incr) &
339 ~RW_WRITE_WANTED);
340 if (__predict_true(next == owner)) {
341 /* Got it! */
342 membar_acquire();
343 break;
344 }
345
346 /*
347 * Didn't get it -- spin around again (we'll
348 * probably sleep on the next iteration).
349 */
350 owner = next;
351 continue;
352 }
353 if (__predict_false(RW_OWNER(rw) == curthread)) {
354 rw_abort(__func__, __LINE__, rw,
355 "locking against myself");
356 }
357 /*
358 * If the lock owner is running on another CPU, and
359 * there are no existing waiters, then spin.
360 */
361 if (rw_oncpu(owner)) {
362 LOCKSTAT_START_TIMER(lsflag, spintime);
363 u_int count = SPINLOCK_BACKOFF_MIN;
364 do {
365 KPREEMPT_ENABLE(curlwp);
366 SPINLOCK_BACKOFF(count);
367 KPREEMPT_DISABLE(curlwp);
368 owner = rw->rw_owner;
369 } while (rw_oncpu(owner));
370 LOCKSTAT_STOP_TIMER(lsflag, spintime);
371 LOCKSTAT_COUNT(spincnt, 1);
372 if ((owner & need_wait) == 0)
373 continue;
374 }
375
376 /*
377 * Grab the turnstile chain lock. Once we have that, we
378 * can adjust the waiter bits and sleep queue.
379 */
380 ts = turnstile_lookup(rw);
381
382 /*
383 * Mark the rwlock as having waiters. If the set fails,
384 * then we may not need to sleep and should spin again.
385 * Reload rw_owner because turnstile_lookup() may have
386 * spun on the turnstile chain lock.
387 */
388 owner = rw->rw_owner;
389 if ((owner & need_wait) == 0 || rw_oncpu(owner)) {
390 turnstile_exit(rw);
391 continue;
392 }
393 next = rw_cas(rw, owner, owner | set_wait);
394 /* XXX membar? */
395 if (__predict_false(next != owner)) {
396 turnstile_exit(rw);
397 owner = next;
398 continue;
399 }
400
401 LOCKSTAT_START_TIMER(lsflag, slptime);
402 turnstile_block(ts, queue, rw, &rw_syncobj);
403 LOCKSTAT_STOP_TIMER(lsflag, slptime);
404 LOCKSTAT_COUNT(slpcnt, 1);
405
406 /*
407 * No need for a memory barrier because of context switch.
408 * If not handed the lock, then spin again.
409 */
410 if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread)
411 break;
412
413 owner = rw->rw_owner;
414 }
415 KPREEMPT_ENABLE(curlwp);
416
417 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK |
418 (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime,
419 (l->l_rwcallsite != 0 ? l->l_rwcallsite :
420 (uintptr_t)__builtin_return_address(0)));
421 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime,
422 (l->l_rwcallsite != 0 ? l->l_rwcallsite :
423 (uintptr_t)__builtin_return_address(0)));
424 LOCKSTAT_EXIT(lsflag);
425
426 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
427 (op == RW_READER && RW_COUNT(rw) != 0));
428 RW_LOCKED(rw, op);
429 }
430
431 /*
432 * rw_vector_exit:
433 *
434 * Release a rwlock.
435 */
436 void
rw_vector_exit(krwlock_t * rw)437 rw_vector_exit(krwlock_t *rw)
438 {
439 uintptr_t curthread, owner, decr, newown, next;
440 turnstile_t *ts;
441 int rcnt, wcnt;
442 lwp_t *l;
443
444 l = curlwp;
445 curthread = (uintptr_t)l;
446 RW_ASSERT(rw, curthread != 0);
447
448 /*
449 * Again, we use a trick. Since we used an add operation to
450 * set the required lock bits, we can use a subtract to clear
451 * them, which makes the read-release and write-release path
452 * the same.
453 */
454 owner = rw->rw_owner;
455 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
456 RW_UNLOCKED(rw, RW_WRITER);
457 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
458 decr = curthread | RW_WRITE_LOCKED;
459 } else {
460 RW_UNLOCKED(rw, RW_READER);
461 RW_ASSERT(rw, RW_COUNT(rw) != 0);
462 decr = RW_READ_INCR;
463 }
464
465 /*
466 * Compute what we expect the new value of the lock to be. Only
467 * proceed to do direct handoff if there are waiters, and if the
468 * lock would become unowned.
469 */
470 membar_release();
471 for (;;) {
472 newown = (owner - decr);
473 if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS)
474 break;
475 next = rw_cas(rw, owner, newown);
476 if (__predict_true(next == owner))
477 return;
478 owner = next;
479 }
480
481 /*
482 * Grab the turnstile chain lock. This gets the interlock
483 * on the sleep queue. Once we have that, we can adjust the
484 * waiter bits.
485 */
486 ts = turnstile_lookup(rw);
487 owner = rw->rw_owner;
488 RW_ASSERT(rw, ts != NULL);
489 RW_ASSERT(rw, (owner & RW_HAS_WAITERS) != 0);
490
491 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
492 rcnt = TS_WAITERS(ts, TS_READER_Q);
493
494 /*
495 * Give the lock away.
496 *
497 * If we are releasing a write lock, then prefer to wake all
498 * outstanding readers. Otherwise, wake one writer if there
499 * are outstanding readers, or all writers if there are no
500 * pending readers. If waking one specific writer, the writer
501 * is handed the lock here. If waking multiple writers, we
502 * set WRITE_WANTED to block out new readers, and let them
503 * do the work of acquiring the lock in rw_vector_enter().
504 */
505 if (rcnt == 0 || decr == RW_READ_INCR) {
506 RW_ASSERT(rw, wcnt != 0);
507 RW_ASSERT(rw, (owner & RW_WRITE_WANTED) != 0);
508
509 if (rcnt != 0) {
510 /* Give the lock to the longest waiting writer. */
511 l = TS_FIRST(ts, TS_WRITER_Q);
512 newown = (uintptr_t)l | (owner & RW_NODEBUG);
513 newown |= RW_WRITE_LOCKED | RW_HAS_WAITERS;
514 if (wcnt > 1)
515 newown |= RW_WRITE_WANTED;
516 rw_swap(rw, owner, newown);
517 turnstile_wakeup(ts, TS_WRITER_Q, 1, l);
518 } else {
519 /* Wake all writers and let them fight it out. */
520 newown = owner & RW_NODEBUG;
521 newown |= RW_WRITE_WANTED;
522 rw_swap(rw, owner, newown);
523 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL);
524 }
525 } else {
526 RW_ASSERT(rw, rcnt != 0);
527
528 /*
529 * Give the lock to all blocked readers. If there
530 * is a writer waiting, new readers that arrive
531 * after the release will be blocked out.
532 */
533 newown = owner & RW_NODEBUG;
534 newown += rcnt << RW_READ_COUNT_SHIFT;
535 if (wcnt != 0)
536 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
537
538 /* Wake up all sleeping readers. */
539 rw_swap(rw, owner, newown);
540 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
541 }
542 }
543
544 /*
545 * rw_vector_tryenter:
546 *
547 * Try to acquire a rwlock.
548 */
549 int
rw_vector_tryenter(krwlock_t * rw,const krw_t op)550 rw_vector_tryenter(krwlock_t *rw, const krw_t op)
551 {
552 uintptr_t curthread, owner, incr, need_wait, next;
553 lwp_t *l;
554
555 l = curlwp;
556 curthread = (uintptr_t)l;
557
558 RW_ASSERT(rw, curthread != 0);
559
560 if (op == RW_READER) {
561 incr = RW_READ_INCR;
562 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
563 } else {
564 RW_ASSERT(rw, op == RW_WRITER);
565 incr = curthread | RW_WRITE_LOCKED;
566 need_wait = RW_WRITE_LOCKED | RW_THREAD;
567 }
568
569 for (owner = rw->rw_owner;; owner = next) {
570 if (__predict_false((owner & need_wait) != 0))
571 return 0;
572 next = rw_cas(rw, owner, owner + incr);
573 if (__predict_true(next == owner)) {
574 /* Got it! */
575 break;
576 }
577 }
578
579 RW_WANTLOCK(rw, op);
580 RW_LOCKED(rw, op);
581 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
582 (op == RW_READER && RW_COUNT(rw) != 0));
583
584 membar_acquire();
585 return 1;
586 }
587
588 /*
589 * rw_downgrade:
590 *
591 * Downgrade a write lock to a read lock.
592 */
593 void
rw_downgrade(krwlock_t * rw)594 rw_downgrade(krwlock_t *rw)
595 {
596 uintptr_t owner, newown, next, curthread __diagused;
597 turnstile_t *ts;
598 int rcnt, wcnt;
599 lwp_t *l;
600
601 l = curlwp;
602 curthread = (uintptr_t)l;
603 RW_ASSERT(rw, curthread != 0);
604 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
605 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
606 RW_UNLOCKED(rw, RW_WRITER);
607
608 membar_release();
609 for (owner = rw->rw_owner;; owner = next) {
610 /*
611 * If there are no waiters we can do this the easy way. Try
612 * swapping us down to one read hold. If it fails, the lock
613 * condition has changed and we most likely now have
614 * waiters.
615 */
616 if ((owner & RW_HAS_WAITERS) == 0) {
617 newown = (owner & RW_NODEBUG);
618 next = rw_cas(rw, owner, newown + RW_READ_INCR);
619 if (__predict_true(next == owner)) {
620 RW_LOCKED(rw, RW_READER);
621 RW_ASSERT(rw,
622 (rw->rw_owner & RW_WRITE_LOCKED) == 0);
623 RW_ASSERT(rw, RW_COUNT(rw) != 0);
624 return;
625 }
626 continue;
627 }
628
629 /*
630 * Grab the turnstile chain lock. This gets the interlock
631 * on the sleep queue. Once we have that, we can adjust the
632 * waiter bits.
633 */
634 ts = turnstile_lookup(rw);
635 RW_ASSERT(rw, ts != NULL);
636
637 rcnt = TS_WAITERS(ts, TS_READER_Q);
638 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
639
640 if (rcnt == 0) {
641 /*
642 * If there are no readers, just preserve the
643 * waiters bits, swap us down to one read hold and
644 * return.
645 */
646 RW_ASSERT(rw, wcnt != 0);
647 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
648 RW_ASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
649
650 newown = owner & RW_NODEBUG;
651 newown |= RW_READ_INCR | RW_HAS_WAITERS |
652 RW_WRITE_WANTED;
653 next = rw_cas(rw, owner, newown);
654 turnstile_exit(rw);
655 if (__predict_true(next == owner))
656 break;
657 } else {
658 /*
659 * Give the lock to all blocked readers. We may
660 * retain one read hold if downgrading. If there is
661 * a writer waiting, new readers will be blocked
662 * out.
663 */
664 newown = owner & RW_NODEBUG;
665 newown += (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR;
666 if (wcnt != 0)
667 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
668
669 next = rw_cas(rw, owner, newown);
670 if (__predict_true(next == owner)) {
671 /* Wake up all sleeping readers. */
672 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
673 break;
674 }
675 turnstile_exit(rw);
676 }
677 }
678
679 RW_WANTLOCK(rw, RW_READER);
680 RW_LOCKED(rw, RW_READER);
681 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
682 RW_ASSERT(rw, RW_COUNT(rw) != 0);
683 }
684
685 /*
686 * rw_tryupgrade:
687 *
688 * Try to upgrade a read lock to a write lock. We must be the only
689 * reader.
690 */
691 int
rw_tryupgrade(krwlock_t * rw)692 rw_tryupgrade(krwlock_t *rw)
693 {
694 uintptr_t owner, curthread, newown, next;
695 struct lwp *l;
696
697 l = curlwp;
698 curthread = (uintptr_t)l;
699 RW_ASSERT(rw, curthread != 0);
700 RW_ASSERT(rw, rw_read_held(rw));
701
702 for (owner = RW_READ_INCR;; owner = next) {
703 newown = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
704 next = rw_cas(rw, owner, newown);
705 if (__predict_true(next == owner)) {
706 membar_acquire();
707 break;
708 }
709 RW_ASSERT(rw, (next & RW_WRITE_LOCKED) == 0);
710 if (__predict_false((next & RW_THREAD) != RW_READ_INCR)) {
711 RW_ASSERT(rw, (next & RW_THREAD) != 0);
712 return 0;
713 }
714 }
715
716 RW_UNLOCKED(rw, RW_READER);
717 RW_WANTLOCK(rw, RW_WRITER);
718 RW_LOCKED(rw, RW_WRITER);
719 RW_ASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
720 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
721
722 return 1;
723 }
724
725 /*
726 * rw_read_held:
727 *
728 * Returns true if the rwlock is held for reading. Must only be
729 * used for diagnostic assertions, and never be used to make
730 * decisions about how to use a rwlock.
731 */
732 int
rw_read_held(krwlock_t * rw)733 rw_read_held(krwlock_t *rw)
734 {
735 uintptr_t owner;
736
737 if (rw == NULL)
738 return 0;
739 owner = rw->rw_owner;
740 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
741 }
742
743 /*
744 * rw_write_held:
745 *
746 * Returns true if the rwlock is held for writing. Must only be
747 * used for diagnostic assertions, and never be used to make
748 * decisions about how to use a rwlock.
749 */
750 int
rw_write_held(krwlock_t * rw)751 rw_write_held(krwlock_t *rw)
752 {
753
754 if (rw == NULL)
755 return 0;
756 return (rw->rw_owner & (RW_WRITE_LOCKED | RW_THREAD)) ==
757 (RW_WRITE_LOCKED | (uintptr_t)curlwp);
758 }
759
760 /*
761 * rw_lock_held:
762 *
763 * Returns true if the rwlock is held for reading or writing. Must
764 * only be used for diagnostic assertions, and never be used to make
765 * decisions about how to use a rwlock.
766 */
767 int
rw_lock_held(krwlock_t * rw)768 rw_lock_held(krwlock_t *rw)
769 {
770
771 if (rw == NULL)
772 return 0;
773 return (rw->rw_owner & RW_THREAD) != 0;
774 }
775
776 /*
777 * rw_lock_op:
778 *
779 * For a rwlock that is known to be held by the caller, return
780 * RW_READER or RW_WRITER to describe the hold type.
781 */
782 krw_t
rw_lock_op(krwlock_t * rw)783 rw_lock_op(krwlock_t *rw)
784 {
785
786 RW_ASSERT(rw, rw_lock_held(rw));
787
788 return (rw->rw_owner & RW_WRITE_LOCKED) != 0 ? RW_WRITER : RW_READER;
789 }
790
791 /*
792 * rw_owner:
793 *
794 * Return the current owner of an RW lock, but only if it is write
795 * held. Used for priority inheritance.
796 */
797 static lwp_t *
rw_owner(wchan_t obj)798 rw_owner(wchan_t obj)
799 {
800 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */
801 uintptr_t owner = rw->rw_owner;
802
803 if ((owner & RW_WRITE_LOCKED) == 0)
804 return NULL;
805
806 return (void *)(owner & RW_THREAD);
807 }
808