1 /* $NetBSD: kern_rwlock.c,v 1.76 2023/10/15 10:28:48 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020, 2023 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jason R. Thorpe and Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Kernel reader/writer lock implementation, modeled after those 35 * found in Solaris, a description of which can be found in: 36 * 37 * Solaris Internals: Core Kernel Architecture, Jim Mauro and 38 * Richard McDougall. 39 * 40 * The NetBSD implementation differs from that described in the book, in 41 * that the locks are partially adaptive. Lock waiters spin wait while a 42 * lock is write held and the holder is still running on a CPU. The method 43 * of choosing which threads to awaken when a lock is released also differs, 44 * mainly to take account of the partially adaptive behaviour. 45 */ 46 47 #include <sys/cdefs.h> 48 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.76 2023/10/15 10:28:48 riastradh Exp $"); 49 50 #include "opt_lockdebug.h" 51 52 #define __RWLOCK_PRIVATE 53 54 #include <sys/param.h> 55 56 #include <sys/atomic.h> 57 #include <sys/cpu.h> 58 #include <sys/lock.h> 59 #include <sys/lockdebug.h> 60 #include <sys/proc.h> 61 #include <sys/pserialize.h> 62 #include <sys/rwlock.h> 63 #include <sys/sched.h> 64 #include <sys/sleepq.h> 65 #include <sys/syncobj.h> 66 #include <sys/systm.h> 67 68 #include <dev/lockstat.h> 69 70 #include <machine/rwlock.h> 71 72 /* 73 * LOCKDEBUG 74 */ 75 76 #define RW_DEBUG_P(rw) (((rw)->rw_owner & RW_NODEBUG) == 0) 77 78 #define RW_WANTLOCK(rw, op) \ 79 LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \ 80 (uintptr_t)__builtin_return_address(0), op == RW_READER); 81 #define RW_LOCKED(rw, op) \ 82 LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \ 83 (uintptr_t)__builtin_return_address(0), op == RW_READER); 84 #define RW_UNLOCKED(rw, op) \ 85 LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \ 86 (uintptr_t)__builtin_return_address(0), op == RW_READER); 87 88 /* 89 * DIAGNOSTIC 90 */ 91 92 #if defined(DIAGNOSTIC) 93 #define RW_ASSERT(rw, cond) \ 94 do { \ 95 if (__predict_false(!(cond))) \ 96 rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\ 97 } while (/* CONSTCOND */ 0) 98 #else 99 #define RW_ASSERT(rw, cond) /* nothing */ 100 #endif /* DIAGNOSTIC */ 101 102 /* 103 * For platforms that do not provide stubs, or for the LOCKDEBUG case. 104 */ 105 #ifdef LOCKDEBUG 106 #undef __HAVE_RW_STUBS 107 #endif 108 109 #ifndef __HAVE_RW_STUBS 110 __strong_alias(rw_enter,rw_vector_enter); 111 __strong_alias(rw_exit,rw_vector_exit); 112 __strong_alias(rw_tryenter,rw_vector_tryenter); 113 #endif 114 115 static void rw_abort(const char *, size_t, krwlock_t *, const char *); 116 static void rw_dump(const volatile void *, lockop_printer_t); 117 static lwp_t *rw_owner(wchan_t); 118 119 lockops_t rwlock_lockops = { 120 .lo_name = "Reader / writer lock", 121 .lo_type = LOCKOPS_SLEEP, 122 .lo_dump = rw_dump, 123 }; 124 125 /* 126 * Give rwlock holders an extra-high priority boost on-blocking due to 127 * direct handoff. XXX To be revisited. 128 */ 129 syncobj_t rw_syncobj = { 130 .sobj_name = "rwlock", 131 .sobj_flag = SOBJ_SLEEPQ_SORTED, 132 .sobj_boostpri = PRI_KTHREAD, 133 .sobj_unsleep = turnstile_unsleep, 134 .sobj_changepri = turnstile_changepri, 135 .sobj_lendpri = sleepq_lendpri, 136 .sobj_owner = rw_owner, 137 }; 138 139 /* 140 * rw_cas: 141 * 142 * Do an atomic compare-and-swap on the lock word. 143 */ 144 static inline uintptr_t 145 rw_cas(krwlock_t *rw, uintptr_t o, uintptr_t n) 146 { 147 148 return (uintptr_t)atomic_cas_ptr((volatile void *)&rw->rw_owner, 149 (void *)o, (void *)n); 150 } 151 152 /* 153 * rw_swap: 154 * 155 * Do an atomic swap of the lock word. This is used only when it's 156 * known that the lock word is set up such that it can't be changed 157 * behind us (assert this), so there's no point considering the result. 158 */ 159 static inline void 160 rw_swap(krwlock_t *rw, uintptr_t o, uintptr_t n) 161 { 162 163 n = (uintptr_t)atomic_swap_ptr((volatile void *)&rw->rw_owner, 164 (void *)n); 165 166 RW_ASSERT(rw, n == o); 167 RW_ASSERT(rw, (o & RW_HAS_WAITERS) != 0); 168 } 169 170 /* 171 * rw_dump: 172 * 173 * Dump the contents of a rwlock structure. 174 */ 175 static void 176 rw_dump(const volatile void *cookie, lockop_printer_t pr) 177 { 178 const volatile krwlock_t *rw = cookie; 179 180 pr("owner/count : %#018lx flags : %#018x\n", 181 (long)RW_OWNER(rw), (int)RW_FLAGS(rw)); 182 } 183 184 /* 185 * rw_abort: 186 * 187 * Dump information about an error and panic the system. This 188 * generates a lot of machine code in the DIAGNOSTIC case, so 189 * we ask the compiler to not inline it. 190 */ 191 static void __noinline 192 rw_abort(const char *func, size_t line, krwlock_t *rw, const char *msg) 193 { 194 195 if (__predict_false(panicstr != NULL)) 196 return; 197 198 LOCKDEBUG_ABORT(func, line, rw, &rwlock_lockops, msg); 199 } 200 201 /* 202 * rw_init: 203 * 204 * Initialize a rwlock for use. 205 */ 206 void 207 _rw_init(krwlock_t *rw, uintptr_t return_address) 208 { 209 210 #ifdef LOCKDEBUG 211 /* XXX only because the assembly stubs can't handle RW_NODEBUG */ 212 if (LOCKDEBUG_ALLOC(rw, &rwlock_lockops, return_address)) 213 rw->rw_owner = 0; 214 else 215 rw->rw_owner = RW_NODEBUG; 216 #else 217 rw->rw_owner = 0; 218 #endif 219 } 220 221 void 222 rw_init(krwlock_t *rw) 223 { 224 225 _rw_init(rw, (uintptr_t)__builtin_return_address(0)); 226 } 227 228 /* 229 * rw_destroy: 230 * 231 * Tear down a rwlock. 232 */ 233 void 234 rw_destroy(krwlock_t *rw) 235 { 236 237 RW_ASSERT(rw, (rw->rw_owner & ~RW_NODEBUG) == 0); 238 LOCKDEBUG_FREE((rw->rw_owner & RW_NODEBUG) == 0, rw); 239 } 240 241 /* 242 * rw_oncpu: 243 * 244 * Return true if an rwlock owner is running on a CPU in the system. 245 * If the target is waiting on the kernel big lock, then we must 246 * release it. This is necessary to avoid deadlock. 247 */ 248 static bool 249 rw_oncpu(uintptr_t owner) 250 { 251 #ifdef MULTIPROCESSOR 252 struct cpu_info *ci; 253 lwp_t *l; 254 255 KASSERT(kpreempt_disabled()); 256 257 if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) { 258 return false; 259 } 260 261 /* 262 * See lwp_dtor() why dereference of the LWP pointer is safe. 263 * We must have kernel preemption disabled for that. 264 */ 265 l = (lwp_t *)(owner & RW_THREAD); 266 ci = l->l_cpu; 267 268 if (ci && ci->ci_curlwp == l) { 269 /* Target is running; do we need to block? */ 270 return (ci->ci_biglock_wanted != l); 271 } 272 #endif 273 /* Not running. It may be safe to block now. */ 274 return false; 275 } 276 277 /* 278 * rw_vector_enter: 279 * 280 * Acquire a rwlock. 281 */ 282 void 283 rw_vector_enter(krwlock_t *rw, const krw_t op) 284 { 285 uintptr_t owner, incr, need_wait, set_wait, curthread, next; 286 turnstile_t *ts; 287 int queue; 288 lwp_t *l; 289 LOCKSTAT_TIMER(slptime); 290 LOCKSTAT_TIMER(slpcnt); 291 LOCKSTAT_TIMER(spintime); 292 LOCKSTAT_COUNTER(spincnt); 293 LOCKSTAT_FLAG(lsflag); 294 295 l = curlwp; 296 curthread = (uintptr_t)l; 297 298 RW_ASSERT(rw, !cpu_intr_p()); 299 RW_ASSERT(rw, curthread != 0); 300 RW_WANTLOCK(rw, op); 301 302 if (__predict_true(panicstr == NULL)) { 303 KDASSERT(pserialize_not_in_read_section()); 304 LOCKDEBUG_BARRIER(&kernel_lock, 1); 305 } 306 307 /* 308 * We play a slight trick here. If we're a reader, we want 309 * increment the read count. If we're a writer, we want to 310 * set the owner field and the WRITE_LOCKED bit. 311 * 312 * In the latter case, we expect those bits to be zero, 313 * therefore we can use an add operation to set them, which 314 * means an add operation for both cases. 315 */ 316 if (__predict_true(op == RW_READER)) { 317 incr = RW_READ_INCR; 318 set_wait = RW_HAS_WAITERS; 319 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; 320 queue = TS_READER_Q; 321 } else { 322 RW_ASSERT(rw, op == RW_WRITER); 323 incr = curthread | RW_WRITE_LOCKED; 324 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED; 325 need_wait = RW_WRITE_LOCKED | RW_THREAD; 326 queue = TS_WRITER_Q; 327 } 328 329 LOCKSTAT_ENTER(lsflag); 330 331 KPREEMPT_DISABLE(curlwp); 332 for (owner = rw->rw_owner;;) { 333 /* 334 * Read the lock owner field. If the need-to-wait 335 * indicator is clear, then try to acquire the lock. 336 */ 337 if ((owner & need_wait) == 0) { 338 next = rw_cas(rw, owner, (owner + incr) & 339 ~RW_WRITE_WANTED); 340 if (__predict_true(next == owner)) { 341 /* Got it! */ 342 membar_acquire(); 343 break; 344 } 345 346 /* 347 * Didn't get it -- spin around again (we'll 348 * probably sleep on the next iteration). 349 */ 350 owner = next; 351 continue; 352 } 353 if (__predict_false(RW_OWNER(rw) == curthread)) { 354 rw_abort(__func__, __LINE__, rw, 355 "locking against myself"); 356 } 357 /* 358 * If the lock owner is running on another CPU, and 359 * there are no existing waiters, then spin. 360 */ 361 if (rw_oncpu(owner)) { 362 LOCKSTAT_START_TIMER(lsflag, spintime); 363 u_int count = SPINLOCK_BACKOFF_MIN; 364 do { 365 KPREEMPT_ENABLE(curlwp); 366 SPINLOCK_BACKOFF(count); 367 KPREEMPT_DISABLE(curlwp); 368 owner = rw->rw_owner; 369 } while (rw_oncpu(owner)); 370 LOCKSTAT_STOP_TIMER(lsflag, spintime); 371 LOCKSTAT_COUNT(spincnt, 1); 372 if ((owner & need_wait) == 0) 373 continue; 374 } 375 376 /* 377 * Grab the turnstile chain lock. Once we have that, we 378 * can adjust the waiter bits and sleep queue. 379 */ 380 ts = turnstile_lookup(rw); 381 382 /* 383 * Mark the rwlock as having waiters. If the set fails, 384 * then we may not need to sleep and should spin again. 385 * Reload rw_owner because turnstile_lookup() may have 386 * spun on the turnstile chain lock. 387 */ 388 owner = rw->rw_owner; 389 if ((owner & need_wait) == 0 || rw_oncpu(owner)) { 390 turnstile_exit(rw); 391 continue; 392 } 393 next = rw_cas(rw, owner, owner | set_wait); 394 /* XXX membar? */ 395 if (__predict_false(next != owner)) { 396 turnstile_exit(rw); 397 owner = next; 398 continue; 399 } 400 401 LOCKSTAT_START_TIMER(lsflag, slptime); 402 turnstile_block(ts, queue, rw, &rw_syncobj); 403 LOCKSTAT_STOP_TIMER(lsflag, slptime); 404 LOCKSTAT_COUNT(slpcnt, 1); 405 406 /* 407 * No need for a memory barrier because of context switch. 408 * If not handed the lock, then spin again. 409 */ 410 if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread) 411 break; 412 413 owner = rw->rw_owner; 414 } 415 KPREEMPT_ENABLE(curlwp); 416 417 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | 418 (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime, 419 (l->l_rwcallsite != 0 ? l->l_rwcallsite : 420 (uintptr_t)__builtin_return_address(0))); 421 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime, 422 (l->l_rwcallsite != 0 ? l->l_rwcallsite : 423 (uintptr_t)__builtin_return_address(0))); 424 LOCKSTAT_EXIT(lsflag); 425 426 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || 427 (op == RW_READER && RW_COUNT(rw) != 0)); 428 RW_LOCKED(rw, op); 429 } 430 431 /* 432 * rw_vector_exit: 433 * 434 * Release a rwlock. 435 */ 436 void 437 rw_vector_exit(krwlock_t *rw) 438 { 439 uintptr_t curthread, owner, decr, newown, next; 440 turnstile_t *ts; 441 int rcnt, wcnt; 442 lwp_t *l; 443 444 l = curlwp; 445 curthread = (uintptr_t)l; 446 RW_ASSERT(rw, curthread != 0); 447 448 /* 449 * Again, we use a trick. Since we used an add operation to 450 * set the required lock bits, we can use a subtract to clear 451 * them, which makes the read-release and write-release path 452 * the same. 453 */ 454 owner = rw->rw_owner; 455 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) { 456 RW_UNLOCKED(rw, RW_WRITER); 457 RW_ASSERT(rw, RW_OWNER(rw) == curthread); 458 decr = curthread | RW_WRITE_LOCKED; 459 } else { 460 RW_UNLOCKED(rw, RW_READER); 461 RW_ASSERT(rw, RW_COUNT(rw) != 0); 462 decr = RW_READ_INCR; 463 } 464 465 /* 466 * Compute what we expect the new value of the lock to be. Only 467 * proceed to do direct handoff if there are waiters, and if the 468 * lock would become unowned. 469 */ 470 membar_release(); 471 for (;;) { 472 newown = (owner - decr); 473 if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS) 474 break; 475 next = rw_cas(rw, owner, newown); 476 if (__predict_true(next == owner)) 477 return; 478 owner = next; 479 } 480 481 /* 482 * Grab the turnstile chain lock. This gets the interlock 483 * on the sleep queue. Once we have that, we can adjust the 484 * waiter bits. 485 */ 486 ts = turnstile_lookup(rw); 487 owner = rw->rw_owner; 488 RW_ASSERT(rw, ts != NULL); 489 RW_ASSERT(rw, (owner & RW_HAS_WAITERS) != 0); 490 491 wcnt = TS_WAITERS(ts, TS_WRITER_Q); 492 rcnt = TS_WAITERS(ts, TS_READER_Q); 493 494 /* 495 * Give the lock away. 496 * 497 * If we are releasing a write lock, then prefer to wake all 498 * outstanding readers. Otherwise, wake one writer if there 499 * are outstanding readers, or all writers if there are no 500 * pending readers. If waking one specific writer, the writer 501 * is handed the lock here. If waking multiple writers, we 502 * set WRITE_WANTED to block out new readers, and let them 503 * do the work of acquiring the lock in rw_vector_enter(). 504 */ 505 if (rcnt == 0 || decr == RW_READ_INCR) { 506 RW_ASSERT(rw, wcnt != 0); 507 RW_ASSERT(rw, (owner & RW_WRITE_WANTED) != 0); 508 509 if (rcnt != 0) { 510 /* Give the lock to the longest waiting writer. */ 511 l = TS_FIRST(ts, TS_WRITER_Q); 512 newown = (uintptr_t)l | (owner & RW_NODEBUG); 513 newown |= RW_WRITE_LOCKED | RW_HAS_WAITERS; 514 if (wcnt > 1) 515 newown |= RW_WRITE_WANTED; 516 rw_swap(rw, owner, newown); 517 turnstile_wakeup(ts, TS_WRITER_Q, 1, l); 518 } else { 519 /* Wake all writers and let them fight it out. */ 520 newown = owner & RW_NODEBUG; 521 newown |= RW_WRITE_WANTED; 522 rw_swap(rw, owner, newown); 523 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL); 524 } 525 } else { 526 RW_ASSERT(rw, rcnt != 0); 527 528 /* 529 * Give the lock to all blocked readers. If there 530 * is a writer waiting, new readers that arrive 531 * after the release will be blocked out. 532 */ 533 newown = owner & RW_NODEBUG; 534 newown += rcnt << RW_READ_COUNT_SHIFT; 535 if (wcnt != 0) 536 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED; 537 538 /* Wake up all sleeping readers. */ 539 rw_swap(rw, owner, newown); 540 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL); 541 } 542 } 543 544 /* 545 * rw_vector_tryenter: 546 * 547 * Try to acquire a rwlock. 548 */ 549 int 550 rw_vector_tryenter(krwlock_t *rw, const krw_t op) 551 { 552 uintptr_t curthread, owner, incr, need_wait, next; 553 lwp_t *l; 554 555 l = curlwp; 556 curthread = (uintptr_t)l; 557 558 RW_ASSERT(rw, curthread != 0); 559 560 if (op == RW_READER) { 561 incr = RW_READ_INCR; 562 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; 563 } else { 564 RW_ASSERT(rw, op == RW_WRITER); 565 incr = curthread | RW_WRITE_LOCKED; 566 need_wait = RW_WRITE_LOCKED | RW_THREAD; 567 } 568 569 for (owner = rw->rw_owner;; owner = next) { 570 if (__predict_false((owner & need_wait) != 0)) 571 return 0; 572 next = rw_cas(rw, owner, owner + incr); 573 if (__predict_true(next == owner)) { 574 /* Got it! */ 575 break; 576 } 577 } 578 579 RW_WANTLOCK(rw, op); 580 RW_LOCKED(rw, op); 581 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || 582 (op == RW_READER && RW_COUNT(rw) != 0)); 583 584 membar_acquire(); 585 return 1; 586 } 587 588 /* 589 * rw_downgrade: 590 * 591 * Downgrade a write lock to a read lock. 592 */ 593 void 594 rw_downgrade(krwlock_t *rw) 595 { 596 uintptr_t owner, newown, next, curthread __diagused; 597 turnstile_t *ts; 598 int rcnt, wcnt; 599 lwp_t *l; 600 601 l = curlwp; 602 curthread = (uintptr_t)l; 603 RW_ASSERT(rw, curthread != 0); 604 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0); 605 RW_ASSERT(rw, RW_OWNER(rw) == curthread); 606 RW_UNLOCKED(rw, RW_WRITER); 607 608 membar_release(); 609 for (owner = rw->rw_owner;; owner = next) { 610 /* 611 * If there are no waiters we can do this the easy way. Try 612 * swapping us down to one read hold. If it fails, the lock 613 * condition has changed and we most likely now have 614 * waiters. 615 */ 616 if ((owner & RW_HAS_WAITERS) == 0) { 617 newown = (owner & RW_NODEBUG); 618 next = rw_cas(rw, owner, newown + RW_READ_INCR); 619 if (__predict_true(next == owner)) { 620 RW_LOCKED(rw, RW_READER); 621 RW_ASSERT(rw, 622 (rw->rw_owner & RW_WRITE_LOCKED) == 0); 623 RW_ASSERT(rw, RW_COUNT(rw) != 0); 624 return; 625 } 626 continue; 627 } 628 629 /* 630 * Grab the turnstile chain lock. This gets the interlock 631 * on the sleep queue. Once we have that, we can adjust the 632 * waiter bits. 633 */ 634 ts = turnstile_lookup(rw); 635 RW_ASSERT(rw, ts != NULL); 636 637 rcnt = TS_WAITERS(ts, TS_READER_Q); 638 wcnt = TS_WAITERS(ts, TS_WRITER_Q); 639 640 if (rcnt == 0) { 641 /* 642 * If there are no readers, just preserve the 643 * waiters bits, swap us down to one read hold and 644 * return. 645 */ 646 RW_ASSERT(rw, wcnt != 0); 647 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0); 648 RW_ASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0); 649 650 newown = owner & RW_NODEBUG; 651 newown |= RW_READ_INCR | RW_HAS_WAITERS | 652 RW_WRITE_WANTED; 653 next = rw_cas(rw, owner, newown); 654 turnstile_exit(rw); 655 if (__predict_true(next == owner)) 656 break; 657 } else { 658 /* 659 * Give the lock to all blocked readers. We may 660 * retain one read hold if downgrading. If there is 661 * a writer waiting, new readers will be blocked 662 * out. 663 */ 664 newown = owner & RW_NODEBUG; 665 newown += (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR; 666 if (wcnt != 0) 667 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED; 668 669 next = rw_cas(rw, owner, newown); 670 if (__predict_true(next == owner)) { 671 /* Wake up all sleeping readers. */ 672 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL); 673 break; 674 } 675 turnstile_exit(rw); 676 } 677 } 678 679 RW_WANTLOCK(rw, RW_READER); 680 RW_LOCKED(rw, RW_READER); 681 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0); 682 RW_ASSERT(rw, RW_COUNT(rw) != 0); 683 } 684 685 /* 686 * rw_tryupgrade: 687 * 688 * Try to upgrade a read lock to a write lock. We must be the only 689 * reader. 690 */ 691 int 692 rw_tryupgrade(krwlock_t *rw) 693 { 694 uintptr_t owner, curthread, newown, next; 695 struct lwp *l; 696 697 l = curlwp; 698 curthread = (uintptr_t)l; 699 RW_ASSERT(rw, curthread != 0); 700 RW_ASSERT(rw, rw_read_held(rw)); 701 702 for (owner = RW_READ_INCR;; owner = next) { 703 newown = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD); 704 next = rw_cas(rw, owner, newown); 705 if (__predict_true(next == owner)) { 706 membar_acquire(); 707 break; 708 } 709 RW_ASSERT(rw, (next & RW_WRITE_LOCKED) == 0); 710 if (__predict_false((next & RW_THREAD) != RW_READ_INCR)) { 711 RW_ASSERT(rw, (next & RW_THREAD) != 0); 712 return 0; 713 } 714 } 715 716 RW_UNLOCKED(rw, RW_READER); 717 RW_WANTLOCK(rw, RW_WRITER); 718 RW_LOCKED(rw, RW_WRITER); 719 RW_ASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED); 720 RW_ASSERT(rw, RW_OWNER(rw) == curthread); 721 722 return 1; 723 } 724 725 /* 726 * rw_read_held: 727 * 728 * Returns true if the rwlock is held for reading. Must only be 729 * used for diagnostic assertions, and never be used to make 730 * decisions about how to use a rwlock. 731 */ 732 int 733 rw_read_held(krwlock_t *rw) 734 { 735 uintptr_t owner; 736 737 if (rw == NULL) 738 return 0; 739 owner = rw->rw_owner; 740 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0; 741 } 742 743 /* 744 * rw_write_held: 745 * 746 * Returns true if the rwlock is held for writing. Must only be 747 * used for diagnostic assertions, and never be used to make 748 * decisions about how to use a rwlock. 749 */ 750 int 751 rw_write_held(krwlock_t *rw) 752 { 753 754 if (rw == NULL) 755 return 0; 756 return (rw->rw_owner & (RW_WRITE_LOCKED | RW_THREAD)) == 757 (RW_WRITE_LOCKED | (uintptr_t)curlwp); 758 } 759 760 /* 761 * rw_lock_held: 762 * 763 * Returns true if the rwlock is held for reading or writing. Must 764 * only be used for diagnostic assertions, and never be used to make 765 * decisions about how to use a rwlock. 766 */ 767 int 768 rw_lock_held(krwlock_t *rw) 769 { 770 771 if (rw == NULL) 772 return 0; 773 return (rw->rw_owner & RW_THREAD) != 0; 774 } 775 776 /* 777 * rw_lock_op: 778 * 779 * For a rwlock that is known to be held by the caller, return 780 * RW_READER or RW_WRITER to describe the hold type. 781 */ 782 krw_t 783 rw_lock_op(krwlock_t *rw) 784 { 785 786 RW_ASSERT(rw, rw_lock_held(rw)); 787 788 return (rw->rw_owner & RW_WRITE_LOCKED) != 0 ? RW_WRITER : RW_READER; 789 } 790 791 /* 792 * rw_owner: 793 * 794 * Return the current owner of an RW lock, but only if it is write 795 * held. Used for priority inheritance. 796 */ 797 static lwp_t * 798 rw_owner(wchan_t obj) 799 { 800 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */ 801 uintptr_t owner = rw->rw_owner; 802 803 if ((owner & RW_WRITE_LOCKED) == 0) 804 return NULL; 805 806 return (void *)(owner & RW_THREAD); 807 } 808