1 /* $NetBSD: kern_rwlock.c,v 1.70 2023/02/24 11:11:10 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jason R. Thorpe and Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Kernel reader/writer lock implementation, modeled after those 35 * found in Solaris, a description of which can be found in: 36 * 37 * Solaris Internals: Core Kernel Architecture, Jim Mauro and 38 * Richard McDougall. 39 * 40 * The NetBSD implementation differs from that described in the book, in 41 * that the locks are partially adaptive. Lock waiters spin wait while a 42 * lock is write held and the holder is still running on a CPU. The method 43 * of choosing which threads to awaken when a lock is released also differs, 44 * mainly to take account of the partially adaptive behaviour. 45 */ 46 47 #include <sys/cdefs.h> 48 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.70 2023/02/24 11:11:10 riastradh Exp $"); 49 50 #include "opt_lockdebug.h" 51 52 #define __RWLOCK_PRIVATE 53 54 #include <sys/param.h> 55 #include <sys/proc.h> 56 #include <sys/rwlock.h> 57 #include <sys/sched.h> 58 #include <sys/sleepq.h> 59 #include <sys/systm.h> 60 #include <sys/lockdebug.h> 61 #include <sys/cpu.h> 62 #include <sys/atomic.h> 63 #include <sys/lock.h> 64 #include <sys/pserialize.h> 65 66 #include <dev/lockstat.h> 67 68 #include <machine/rwlock.h> 69 70 /* 71 * LOCKDEBUG 72 */ 73 74 #define RW_DEBUG_P(rw) (((rw)->rw_owner & RW_NODEBUG) == 0) 75 76 #define RW_WANTLOCK(rw, op) \ 77 LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \ 78 (uintptr_t)__builtin_return_address(0), op == RW_READER); 79 #define RW_LOCKED(rw, op) \ 80 LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \ 81 (uintptr_t)__builtin_return_address(0), op == RW_READER); 82 #define RW_UNLOCKED(rw, op) \ 83 LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \ 84 (uintptr_t)__builtin_return_address(0), op == RW_READER); 85 86 /* 87 * DIAGNOSTIC 88 */ 89 90 #if defined(DIAGNOSTIC) 91 #define RW_ASSERT(rw, cond) \ 92 do { \ 93 if (__predict_false(!(cond))) \ 94 rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\ 95 } while (/* CONSTCOND */ 0) 96 #else 97 #define RW_ASSERT(rw, cond) /* nothing */ 98 #endif /* DIAGNOSTIC */ 99 100 /* 101 * For platforms that do not provide stubs, or for the LOCKDEBUG case. 102 */ 103 #ifdef LOCKDEBUG 104 #undef __HAVE_RW_STUBS 105 #endif 106 107 #ifndef __HAVE_RW_STUBS 108 __strong_alias(rw_enter,rw_vector_enter); 109 __strong_alias(rw_exit,rw_vector_exit); 110 __strong_alias(rw_tryenter,rw_vector_tryenter); 111 #endif 112 113 static void rw_abort(const char *, size_t, krwlock_t *, const char *); 114 static void rw_dump(const volatile void *, lockop_printer_t); 115 static lwp_t *rw_owner(wchan_t); 116 117 lockops_t rwlock_lockops = { 118 .lo_name = "Reader / writer lock", 119 .lo_type = LOCKOPS_SLEEP, 120 .lo_dump = rw_dump, 121 }; 122 123 syncobj_t rw_syncobj = { 124 .sobj_flag = SOBJ_SLEEPQ_SORTED, 125 .sobj_unsleep = turnstile_unsleep, 126 .sobj_changepri = turnstile_changepri, 127 .sobj_lendpri = sleepq_lendpri, 128 .sobj_owner = rw_owner, 129 }; 130 131 /* 132 * rw_cas: 133 * 134 * Do an atomic compare-and-swap on the lock word. 135 */ 136 static inline uintptr_t 137 rw_cas(krwlock_t *rw, uintptr_t o, uintptr_t n) 138 { 139 140 return (uintptr_t)atomic_cas_ptr((volatile void *)&rw->rw_owner, 141 (void *)o, (void *)n); 142 } 143 144 /* 145 * rw_swap: 146 * 147 * Do an atomic swap of the lock word. This is used only when it's 148 * known that the lock word is set up such that it can't be changed 149 * behind us (assert this), so there's no point considering the result. 150 */ 151 static inline void 152 rw_swap(krwlock_t *rw, uintptr_t o, uintptr_t n) 153 { 154 155 n = (uintptr_t)atomic_swap_ptr((volatile void *)&rw->rw_owner, 156 (void *)n); 157 158 RW_ASSERT(rw, n == o); 159 RW_ASSERT(rw, (o & RW_HAS_WAITERS) != 0); 160 } 161 162 /* 163 * rw_dump: 164 * 165 * Dump the contents of a rwlock structure. 166 */ 167 static void 168 rw_dump(const volatile void *cookie, lockop_printer_t pr) 169 { 170 const volatile krwlock_t *rw = cookie; 171 172 pr("owner/count : %#018lx flags : %#018x\n", 173 (long)RW_OWNER(rw), (int)RW_FLAGS(rw)); 174 } 175 176 /* 177 * rw_abort: 178 * 179 * Dump information about an error and panic the system. This 180 * generates a lot of machine code in the DIAGNOSTIC case, so 181 * we ask the compiler to not inline it. 182 */ 183 static void __noinline 184 rw_abort(const char *func, size_t line, krwlock_t *rw, const char *msg) 185 { 186 187 if (__predict_false(panicstr != NULL)) 188 return; 189 190 LOCKDEBUG_ABORT(func, line, rw, &rwlock_lockops, msg); 191 } 192 193 /* 194 * rw_init: 195 * 196 * Initialize a rwlock for use. 197 */ 198 void 199 _rw_init(krwlock_t *rw, uintptr_t return_address) 200 { 201 202 #ifdef LOCKDEBUG 203 /* XXX only because the assembly stubs can't handle RW_NODEBUG */ 204 if (LOCKDEBUG_ALLOC(rw, &rwlock_lockops, return_address)) 205 rw->rw_owner = 0; 206 else 207 rw->rw_owner = RW_NODEBUG; 208 #else 209 rw->rw_owner = 0; 210 #endif 211 } 212 213 void 214 rw_init(krwlock_t *rw) 215 { 216 217 _rw_init(rw, (uintptr_t)__builtin_return_address(0)); 218 } 219 220 /* 221 * rw_destroy: 222 * 223 * Tear down a rwlock. 224 */ 225 void 226 rw_destroy(krwlock_t *rw) 227 { 228 229 RW_ASSERT(rw, (rw->rw_owner & ~RW_NODEBUG) == 0); 230 LOCKDEBUG_FREE((rw->rw_owner & RW_NODEBUG) == 0, rw); 231 } 232 233 /* 234 * rw_oncpu: 235 * 236 * Return true if an rwlock owner is running on a CPU in the system. 237 * If the target is waiting on the kernel big lock, then we must 238 * release it. This is necessary to avoid deadlock. 239 */ 240 static bool 241 rw_oncpu(uintptr_t owner) 242 { 243 #ifdef MULTIPROCESSOR 244 struct cpu_info *ci; 245 lwp_t *l; 246 247 KASSERT(kpreempt_disabled()); 248 249 if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) { 250 return false; 251 } 252 253 /* 254 * See lwp_dtor() why dereference of the LWP pointer is safe. 255 * We must have kernel preemption disabled for that. 256 */ 257 l = (lwp_t *)(owner & RW_THREAD); 258 ci = l->l_cpu; 259 260 if (ci && ci->ci_curlwp == l) { 261 /* Target is running; do we need to block? */ 262 return (ci->ci_biglock_wanted != l); 263 } 264 #endif 265 /* Not running. It may be safe to block now. */ 266 return false; 267 } 268 269 /* 270 * rw_vector_enter: 271 * 272 * Acquire a rwlock. 273 */ 274 void 275 rw_vector_enter(krwlock_t *rw, const krw_t op) 276 { 277 uintptr_t owner, incr, need_wait, set_wait, curthread, next; 278 turnstile_t *ts; 279 int queue; 280 lwp_t *l; 281 LOCKSTAT_TIMER(slptime); 282 LOCKSTAT_TIMER(slpcnt); 283 LOCKSTAT_TIMER(spintime); 284 LOCKSTAT_COUNTER(spincnt); 285 LOCKSTAT_FLAG(lsflag); 286 287 l = curlwp; 288 curthread = (uintptr_t)l; 289 290 RW_ASSERT(rw, !cpu_intr_p()); 291 RW_ASSERT(rw, curthread != 0); 292 RW_WANTLOCK(rw, op); 293 294 if (__predict_true(panicstr == NULL)) { 295 KDASSERT(pserialize_not_in_read_section()); 296 LOCKDEBUG_BARRIER(&kernel_lock, 1); 297 } 298 299 /* 300 * We play a slight trick here. If we're a reader, we want 301 * increment the read count. If we're a writer, we want to 302 * set the owner field and the WRITE_LOCKED bit. 303 * 304 * In the latter case, we expect those bits to be zero, 305 * therefore we can use an add operation to set them, which 306 * means an add operation for both cases. 307 */ 308 if (__predict_true(op == RW_READER)) { 309 incr = RW_READ_INCR; 310 set_wait = RW_HAS_WAITERS; 311 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; 312 queue = TS_READER_Q; 313 } else { 314 RW_ASSERT(rw, op == RW_WRITER); 315 incr = curthread | RW_WRITE_LOCKED; 316 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED; 317 need_wait = RW_WRITE_LOCKED | RW_THREAD; 318 queue = TS_WRITER_Q; 319 } 320 321 LOCKSTAT_ENTER(lsflag); 322 323 KPREEMPT_DISABLE(curlwp); 324 for (owner = rw->rw_owner;;) { 325 /* 326 * Read the lock owner field. If the need-to-wait 327 * indicator is clear, then try to acquire the lock. 328 */ 329 if ((owner & need_wait) == 0) { 330 next = rw_cas(rw, owner, (owner + incr) & 331 ~RW_WRITE_WANTED); 332 if (__predict_true(next == owner)) { 333 /* Got it! */ 334 membar_acquire(); 335 break; 336 } 337 338 /* 339 * Didn't get it -- spin around again (we'll 340 * probably sleep on the next iteration). 341 */ 342 owner = next; 343 continue; 344 } 345 if (__predict_false(RW_OWNER(rw) == curthread)) { 346 rw_abort(__func__, __LINE__, rw, 347 "locking against myself"); 348 } 349 /* 350 * If the lock owner is running on another CPU, and 351 * there are no existing waiters, then spin. 352 */ 353 if (rw_oncpu(owner)) { 354 LOCKSTAT_START_TIMER(lsflag, spintime); 355 u_int count = SPINLOCK_BACKOFF_MIN; 356 do { 357 KPREEMPT_ENABLE(curlwp); 358 SPINLOCK_BACKOFF(count); 359 KPREEMPT_DISABLE(curlwp); 360 owner = rw->rw_owner; 361 } while (rw_oncpu(owner)); 362 LOCKSTAT_STOP_TIMER(lsflag, spintime); 363 LOCKSTAT_COUNT(spincnt, 1); 364 if ((owner & need_wait) == 0) 365 continue; 366 } 367 368 /* 369 * Grab the turnstile chain lock. Once we have that, we 370 * can adjust the waiter bits and sleep queue. 371 */ 372 ts = turnstile_lookup(rw); 373 374 /* 375 * Mark the rwlock as having waiters. If the set fails, 376 * then we may not need to sleep and should spin again. 377 * Reload rw_owner because turnstile_lookup() may have 378 * spun on the turnstile chain lock. 379 */ 380 owner = rw->rw_owner; 381 if ((owner & need_wait) == 0 || rw_oncpu(owner)) { 382 turnstile_exit(rw); 383 continue; 384 } 385 next = rw_cas(rw, owner, owner | set_wait); 386 /* XXX membar? */ 387 if (__predict_false(next != owner)) { 388 turnstile_exit(rw); 389 owner = next; 390 continue; 391 } 392 393 LOCKSTAT_START_TIMER(lsflag, slptime); 394 turnstile_block(ts, queue, rw, &rw_syncobj); 395 LOCKSTAT_STOP_TIMER(lsflag, slptime); 396 LOCKSTAT_COUNT(slpcnt, 1); 397 398 /* 399 * No need for a memory barrier because of context switch. 400 * If not handed the lock, then spin again. 401 */ 402 if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread) 403 break; 404 405 owner = rw->rw_owner; 406 } 407 KPREEMPT_ENABLE(curlwp); 408 409 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | 410 (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime, 411 (l->l_rwcallsite != 0 ? l->l_rwcallsite : 412 (uintptr_t)__builtin_return_address(0))); 413 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime, 414 (l->l_rwcallsite != 0 ? l->l_rwcallsite : 415 (uintptr_t)__builtin_return_address(0))); 416 LOCKSTAT_EXIT(lsflag); 417 418 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || 419 (op == RW_READER && RW_COUNT(rw) != 0)); 420 RW_LOCKED(rw, op); 421 } 422 423 /* 424 * rw_vector_exit: 425 * 426 * Release a rwlock. 427 */ 428 void 429 rw_vector_exit(krwlock_t *rw) 430 { 431 uintptr_t curthread, owner, decr, newown, next; 432 turnstile_t *ts; 433 int rcnt, wcnt; 434 lwp_t *l; 435 436 l = curlwp; 437 curthread = (uintptr_t)l; 438 RW_ASSERT(rw, curthread != 0); 439 440 /* 441 * Again, we use a trick. Since we used an add operation to 442 * set the required lock bits, we can use a subtract to clear 443 * them, which makes the read-release and write-release path 444 * the same. 445 */ 446 owner = rw->rw_owner; 447 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) { 448 RW_UNLOCKED(rw, RW_WRITER); 449 RW_ASSERT(rw, RW_OWNER(rw) == curthread); 450 decr = curthread | RW_WRITE_LOCKED; 451 } else { 452 RW_UNLOCKED(rw, RW_READER); 453 RW_ASSERT(rw, RW_COUNT(rw) != 0); 454 decr = RW_READ_INCR; 455 } 456 457 /* 458 * Compute what we expect the new value of the lock to be. Only 459 * proceed to do direct handoff if there are waiters, and if the 460 * lock would become unowned. 461 */ 462 membar_release(); 463 for (;;) { 464 newown = (owner - decr); 465 if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS) 466 break; 467 next = rw_cas(rw, owner, newown); 468 if (__predict_true(next == owner)) 469 return; 470 owner = next; 471 } 472 473 /* 474 * Grab the turnstile chain lock. This gets the interlock 475 * on the sleep queue. Once we have that, we can adjust the 476 * waiter bits. 477 */ 478 ts = turnstile_lookup(rw); 479 owner = rw->rw_owner; 480 RW_ASSERT(rw, ts != NULL); 481 RW_ASSERT(rw, (owner & RW_HAS_WAITERS) != 0); 482 483 wcnt = TS_WAITERS(ts, TS_WRITER_Q); 484 rcnt = TS_WAITERS(ts, TS_READER_Q); 485 486 /* 487 * Give the lock away. 488 * 489 * If we are releasing a write lock, then prefer to wake all 490 * outstanding readers. Otherwise, wake one writer if there 491 * are outstanding readers, or all writers if there are no 492 * pending readers. If waking one specific writer, the writer 493 * is handed the lock here. If waking multiple writers, we 494 * set WRITE_WANTED to block out new readers, and let them 495 * do the work of acquiring the lock in rw_vector_enter(). 496 */ 497 if (rcnt == 0 || decr == RW_READ_INCR) { 498 RW_ASSERT(rw, wcnt != 0); 499 RW_ASSERT(rw, (owner & RW_WRITE_WANTED) != 0); 500 501 if (rcnt != 0) { 502 /* Give the lock to the longest waiting writer. */ 503 l = TS_FIRST(ts, TS_WRITER_Q); 504 newown = (uintptr_t)l | (owner & RW_NODEBUG); 505 newown |= RW_WRITE_LOCKED | RW_HAS_WAITERS; 506 if (wcnt > 1) 507 newown |= RW_WRITE_WANTED; 508 rw_swap(rw, owner, newown); 509 turnstile_wakeup(ts, TS_WRITER_Q, 1, l); 510 } else { 511 /* Wake all writers and let them fight it out. */ 512 newown = owner & RW_NODEBUG; 513 newown |= RW_WRITE_WANTED; 514 rw_swap(rw, owner, newown); 515 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL); 516 } 517 } else { 518 RW_ASSERT(rw, rcnt != 0); 519 520 /* 521 * Give the lock to all blocked readers. If there 522 * is a writer waiting, new readers that arrive 523 * after the release will be blocked out. 524 */ 525 newown = owner & RW_NODEBUG; 526 newown += rcnt << RW_READ_COUNT_SHIFT; 527 if (wcnt != 0) 528 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED; 529 530 /* Wake up all sleeping readers. */ 531 rw_swap(rw, owner, newown); 532 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL); 533 } 534 } 535 536 /* 537 * rw_vector_tryenter: 538 * 539 * Try to acquire a rwlock. 540 */ 541 int 542 rw_vector_tryenter(krwlock_t *rw, const krw_t op) 543 { 544 uintptr_t curthread, owner, incr, need_wait, next; 545 lwp_t *l; 546 547 l = curlwp; 548 curthread = (uintptr_t)l; 549 550 RW_ASSERT(rw, curthread != 0); 551 552 if (op == RW_READER) { 553 incr = RW_READ_INCR; 554 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; 555 } else { 556 RW_ASSERT(rw, op == RW_WRITER); 557 incr = curthread | RW_WRITE_LOCKED; 558 need_wait = RW_WRITE_LOCKED | RW_THREAD; 559 } 560 561 for (owner = rw->rw_owner;; owner = next) { 562 if (__predict_false((owner & need_wait) != 0)) 563 return 0; 564 next = rw_cas(rw, owner, owner + incr); 565 if (__predict_true(next == owner)) { 566 /* Got it! */ 567 break; 568 } 569 } 570 571 RW_WANTLOCK(rw, op); 572 RW_LOCKED(rw, op); 573 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || 574 (op == RW_READER && RW_COUNT(rw) != 0)); 575 576 membar_acquire(); 577 return 1; 578 } 579 580 /* 581 * rw_downgrade: 582 * 583 * Downgrade a write lock to a read lock. 584 */ 585 void 586 rw_downgrade(krwlock_t *rw) 587 { 588 uintptr_t owner, curthread, newown, next; 589 turnstile_t *ts; 590 int rcnt, wcnt; 591 lwp_t *l; 592 593 l = curlwp; 594 curthread = (uintptr_t)l; 595 RW_ASSERT(rw, curthread != 0); 596 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0); 597 RW_ASSERT(rw, RW_OWNER(rw) == curthread); 598 RW_UNLOCKED(rw, RW_WRITER); 599 #if !defined(DIAGNOSTIC) 600 __USE(curthread); 601 #endif 602 603 membar_release(); 604 for (owner = rw->rw_owner;; owner = next) { 605 /* 606 * If there are no waiters we can do this the easy way. Try 607 * swapping us down to one read hold. If it fails, the lock 608 * condition has changed and we most likely now have 609 * waiters. 610 */ 611 if ((owner & RW_HAS_WAITERS) == 0) { 612 newown = (owner & RW_NODEBUG); 613 next = rw_cas(rw, owner, newown + RW_READ_INCR); 614 if (__predict_true(next == owner)) { 615 RW_LOCKED(rw, RW_READER); 616 RW_ASSERT(rw, 617 (rw->rw_owner & RW_WRITE_LOCKED) == 0); 618 RW_ASSERT(rw, RW_COUNT(rw) != 0); 619 return; 620 } 621 continue; 622 } 623 624 /* 625 * Grab the turnstile chain lock. This gets the interlock 626 * on the sleep queue. Once we have that, we can adjust the 627 * waiter bits. 628 */ 629 ts = turnstile_lookup(rw); 630 RW_ASSERT(rw, ts != NULL); 631 632 rcnt = TS_WAITERS(ts, TS_READER_Q); 633 wcnt = TS_WAITERS(ts, TS_WRITER_Q); 634 635 if (rcnt == 0) { 636 /* 637 * If there are no readers, just preserve the 638 * waiters bits, swap us down to one read hold and 639 * return. 640 */ 641 RW_ASSERT(rw, wcnt != 0); 642 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0); 643 RW_ASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0); 644 645 newown = owner & RW_NODEBUG; 646 newown |= RW_READ_INCR | RW_HAS_WAITERS | 647 RW_WRITE_WANTED; 648 next = rw_cas(rw, owner, newown); 649 turnstile_exit(rw); 650 if (__predict_true(next == owner)) 651 break; 652 } else { 653 /* 654 * Give the lock to all blocked readers. We may 655 * retain one read hold if downgrading. If there is 656 * a writer waiting, new readers will be blocked 657 * out. 658 */ 659 newown = owner & RW_NODEBUG; 660 newown += (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR; 661 if (wcnt != 0) 662 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED; 663 664 next = rw_cas(rw, owner, newown); 665 if (__predict_true(next == owner)) { 666 /* Wake up all sleeping readers. */ 667 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL); 668 break; 669 } 670 turnstile_exit(rw); 671 } 672 } 673 674 RW_WANTLOCK(rw, RW_READER); 675 RW_LOCKED(rw, RW_READER); 676 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0); 677 RW_ASSERT(rw, RW_COUNT(rw) != 0); 678 } 679 680 /* 681 * rw_tryupgrade: 682 * 683 * Try to upgrade a read lock to a write lock. We must be the only 684 * reader. 685 */ 686 int 687 rw_tryupgrade(krwlock_t *rw) 688 { 689 uintptr_t owner, curthread, newown, next; 690 struct lwp *l; 691 692 l = curlwp; 693 curthread = (uintptr_t)l; 694 RW_ASSERT(rw, curthread != 0); 695 RW_ASSERT(rw, rw_read_held(rw)); 696 697 for (owner = RW_READ_INCR;; owner = next) { 698 newown = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD); 699 next = rw_cas(rw, owner, newown); 700 if (__predict_true(next == owner)) { 701 membar_acquire(); 702 break; 703 } 704 RW_ASSERT(rw, (next & RW_WRITE_LOCKED) == 0); 705 if (__predict_false((next & RW_THREAD) != RW_READ_INCR)) { 706 RW_ASSERT(rw, (next & RW_THREAD) != 0); 707 return 0; 708 } 709 } 710 711 RW_UNLOCKED(rw, RW_READER); 712 RW_WANTLOCK(rw, RW_WRITER); 713 RW_LOCKED(rw, RW_WRITER); 714 RW_ASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED); 715 RW_ASSERT(rw, RW_OWNER(rw) == curthread); 716 717 return 1; 718 } 719 720 /* 721 * rw_read_held: 722 * 723 * Returns true if the rwlock is held for reading. Must only be 724 * used for diagnostic assertions, and never be used to make 725 * decisions about how to use a rwlock. 726 */ 727 int 728 rw_read_held(krwlock_t *rw) 729 { 730 uintptr_t owner; 731 732 if (rw == NULL) 733 return 0; 734 owner = rw->rw_owner; 735 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0; 736 } 737 738 /* 739 * rw_write_held: 740 * 741 * Returns true if the rwlock is held for writing. Must only be 742 * used for diagnostic assertions, and never be used to make 743 * decisions about how to use a rwlock. 744 */ 745 int 746 rw_write_held(krwlock_t *rw) 747 { 748 749 if (rw == NULL) 750 return 0; 751 return (rw->rw_owner & (RW_WRITE_LOCKED | RW_THREAD)) == 752 (RW_WRITE_LOCKED | (uintptr_t)curlwp); 753 } 754 755 /* 756 * rw_lock_held: 757 * 758 * Returns true if the rwlock is held for reading or writing. Must 759 * only be used for diagnostic assertions, and never be used to make 760 * decisions about how to use a rwlock. 761 */ 762 int 763 rw_lock_held(krwlock_t *rw) 764 { 765 766 if (rw == NULL) 767 return 0; 768 return (rw->rw_owner & RW_THREAD) != 0; 769 } 770 771 /* 772 * rw_lock_op: 773 * 774 * For a rwlock that is known to be held by the caller, return 775 * RW_READER or RW_WRITER to describe the hold type. 776 */ 777 krw_t 778 rw_lock_op(krwlock_t *rw) 779 { 780 781 RW_ASSERT(rw, rw_lock_held(rw)); 782 783 return (rw->rw_owner & RW_WRITE_LOCKED) != 0 ? RW_WRITER : RW_READER; 784 } 785 786 /* 787 * rw_owner: 788 * 789 * Return the current owner of an RW lock, but only if it is write 790 * held. Used for priority inheritance. 791 */ 792 static lwp_t * 793 rw_owner(wchan_t obj) 794 { 795 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */ 796 uintptr_t owner = rw->rw_owner; 797 798 if ((owner & RW_WRITE_LOCKED) == 0) 799 return NULL; 800 801 return (void *)(owner & RW_THREAD); 802 } 803 804 /* 805 * rw_owner_running: 806 * 807 * Return true if a RW lock is unheld, or write held and the owner is 808 * running on a CPU. For the pagedaemon. 809 */ 810 bool 811 rw_owner_running(const krwlock_t *rw) 812 { 813 #ifdef MULTIPROCESSOR 814 uintptr_t owner; 815 bool rv; 816 817 kpreempt_disable(); 818 owner = rw->rw_owner; 819 rv = (owner & RW_THREAD) == 0 || rw_oncpu(owner); 820 kpreempt_enable(); 821 return rv; 822 #else 823 return rw_owner(rw) == curlwp; 824 #endif 825 } 826