1 /* $NetBSD: kern_rwlock.c,v 1.66 2022/04/09 23:46:19 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jason R. Thorpe and Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Kernel reader/writer lock implementation, modeled after those 35 * found in Solaris, a description of which can be found in: 36 * 37 * Solaris Internals: Core Kernel Architecture, Jim Mauro and 38 * Richard McDougall. 39 * 40 * The NetBSD implementation differs from that described in the book, in 41 * that the locks are partially adaptive. Lock waiters spin wait while a 42 * lock is write held and the holder is still running on a CPU. The method 43 * of choosing which threads to awaken when a lock is released also differs, 44 * mainly to take account of the partially adaptive behaviour. 45 */ 46 47 #include <sys/cdefs.h> 48 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.66 2022/04/09 23:46:19 riastradh Exp $"); 49 50 #include "opt_lockdebug.h" 51 52 #define __RWLOCK_PRIVATE 53 54 #include <sys/param.h> 55 #include <sys/proc.h> 56 #include <sys/rwlock.h> 57 #include <sys/sched.h> 58 #include <sys/sleepq.h> 59 #include <sys/systm.h> 60 #include <sys/lockdebug.h> 61 #include <sys/cpu.h> 62 #include <sys/atomic.h> 63 #include <sys/lock.h> 64 #include <sys/pserialize.h> 65 66 #include <dev/lockstat.h> 67 68 #include <machine/rwlock.h> 69 70 /* 71 * LOCKDEBUG 72 */ 73 74 #define RW_DEBUG_P(rw) (((rw)->rw_owner & RW_NODEBUG) == 0) 75 76 #define RW_WANTLOCK(rw, op) \ 77 LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \ 78 (uintptr_t)__builtin_return_address(0), op == RW_READER); 79 #define RW_LOCKED(rw, op) \ 80 LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \ 81 (uintptr_t)__builtin_return_address(0), op == RW_READER); 82 #define RW_UNLOCKED(rw, op) \ 83 LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \ 84 (uintptr_t)__builtin_return_address(0), op == RW_READER); 85 86 /* 87 * DIAGNOSTIC 88 */ 89 90 #if defined(DIAGNOSTIC) 91 #define RW_ASSERT(rw, cond) \ 92 do { \ 93 if (__predict_false(!(cond))) \ 94 rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\ 95 } while (/* CONSTCOND */ 0) 96 #else 97 #define RW_ASSERT(rw, cond) /* nothing */ 98 #endif /* DIAGNOSTIC */ 99 100 /* 101 * Memory barriers. 102 */ 103 #ifdef __HAVE_ATOMIC_AS_MEMBAR 104 #define RW_MEMBAR_ACQUIRE() 105 #define RW_MEMBAR_RELEASE() 106 #define RW_MEMBAR_PRODUCER() 107 #else 108 #define RW_MEMBAR_ACQUIRE() membar_acquire() 109 #define RW_MEMBAR_RELEASE() membar_release() 110 #define RW_MEMBAR_PRODUCER() membar_producer() 111 #endif 112 113 /* 114 * For platforms that do not provide stubs, or for the LOCKDEBUG case. 115 */ 116 #ifdef LOCKDEBUG 117 #undef __HAVE_RW_STUBS 118 #endif 119 120 #ifndef __HAVE_RW_STUBS 121 __strong_alias(rw_enter,rw_vector_enter); 122 __strong_alias(rw_exit,rw_vector_exit); 123 __strong_alias(rw_tryenter,rw_vector_tryenter); 124 #endif 125 126 static void rw_abort(const char *, size_t, krwlock_t *, const char *); 127 static void rw_dump(const volatile void *, lockop_printer_t); 128 static lwp_t *rw_owner(wchan_t); 129 130 lockops_t rwlock_lockops = { 131 .lo_name = "Reader / writer lock", 132 .lo_type = LOCKOPS_SLEEP, 133 .lo_dump = rw_dump, 134 }; 135 136 syncobj_t rw_syncobj = { 137 .sobj_flag = SOBJ_SLEEPQ_SORTED, 138 .sobj_unsleep = turnstile_unsleep, 139 .sobj_changepri = turnstile_changepri, 140 .sobj_lendpri = sleepq_lendpri, 141 .sobj_owner = rw_owner, 142 }; 143 144 /* 145 * rw_cas: 146 * 147 * Do an atomic compare-and-swap on the lock word. 148 */ 149 static inline uintptr_t 150 rw_cas(krwlock_t *rw, uintptr_t o, uintptr_t n) 151 { 152 153 return (uintptr_t)atomic_cas_ptr((volatile void *)&rw->rw_owner, 154 (void *)o, (void *)n); 155 } 156 157 /* 158 * rw_swap: 159 * 160 * Do an atomic swap of the lock word. This is used only when it's 161 * known that the lock word is set up such that it can't be changed 162 * behind us (assert this), so there's no point considering the result. 163 */ 164 static inline void 165 rw_swap(krwlock_t *rw, uintptr_t o, uintptr_t n) 166 { 167 168 n = (uintptr_t)atomic_swap_ptr((volatile void *)&rw->rw_owner, 169 (void *)n); 170 171 RW_ASSERT(rw, n == o); 172 RW_ASSERT(rw, (o & RW_HAS_WAITERS) != 0); 173 } 174 175 /* 176 * rw_dump: 177 * 178 * Dump the contents of a rwlock structure. 179 */ 180 static void 181 rw_dump(const volatile void *cookie, lockop_printer_t pr) 182 { 183 const volatile krwlock_t *rw = cookie; 184 185 pr("owner/count : %#018lx flags : %#018x\n", 186 (long)RW_OWNER(rw), (int)RW_FLAGS(rw)); 187 } 188 189 /* 190 * rw_abort: 191 * 192 * Dump information about an error and panic the system. This 193 * generates a lot of machine code in the DIAGNOSTIC case, so 194 * we ask the compiler to not inline it. 195 */ 196 static void __noinline 197 rw_abort(const char *func, size_t line, krwlock_t *rw, const char *msg) 198 { 199 200 if (panicstr != NULL) 201 return; 202 203 LOCKDEBUG_ABORT(func, line, rw, &rwlock_lockops, msg); 204 } 205 206 /* 207 * rw_init: 208 * 209 * Initialize a rwlock for use. 210 */ 211 void 212 _rw_init(krwlock_t *rw, uintptr_t return_address) 213 { 214 215 #ifdef LOCKDEBUG 216 /* XXX only because the assembly stubs can't handle RW_NODEBUG */ 217 if (LOCKDEBUG_ALLOC(rw, &rwlock_lockops, return_address)) 218 rw->rw_owner = 0; 219 else 220 rw->rw_owner = RW_NODEBUG; 221 #else 222 rw->rw_owner = 0; 223 #endif 224 } 225 226 void 227 rw_init(krwlock_t *rw) 228 { 229 230 _rw_init(rw, (uintptr_t)__builtin_return_address(0)); 231 } 232 233 /* 234 * rw_destroy: 235 * 236 * Tear down a rwlock. 237 */ 238 void 239 rw_destroy(krwlock_t *rw) 240 { 241 242 RW_ASSERT(rw, (rw->rw_owner & ~RW_NODEBUG) == 0); 243 LOCKDEBUG_FREE((rw->rw_owner & RW_NODEBUG) == 0, rw); 244 } 245 246 /* 247 * rw_oncpu: 248 * 249 * Return true if an rwlock owner is running on a CPU in the system. 250 * If the target is waiting on the kernel big lock, then we must 251 * release it. This is necessary to avoid deadlock. 252 */ 253 static bool 254 rw_oncpu(uintptr_t owner) 255 { 256 #ifdef MULTIPROCESSOR 257 struct cpu_info *ci; 258 lwp_t *l; 259 260 KASSERT(kpreempt_disabled()); 261 262 if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) { 263 return false; 264 } 265 266 /* 267 * See lwp_dtor() why dereference of the LWP pointer is safe. 268 * We must have kernel preemption disabled for that. 269 */ 270 l = (lwp_t *)(owner & RW_THREAD); 271 ci = l->l_cpu; 272 273 if (ci && ci->ci_curlwp == l) { 274 /* Target is running; do we need to block? */ 275 return (ci->ci_biglock_wanted != l); 276 } 277 #endif 278 /* Not running. It may be safe to block now. */ 279 return false; 280 } 281 282 /* 283 * rw_vector_enter: 284 * 285 * Acquire a rwlock. 286 */ 287 void 288 rw_vector_enter(krwlock_t *rw, const krw_t op) 289 { 290 uintptr_t owner, incr, need_wait, set_wait, curthread, next; 291 turnstile_t *ts; 292 int queue; 293 lwp_t *l; 294 LOCKSTAT_TIMER(slptime); 295 LOCKSTAT_TIMER(slpcnt); 296 LOCKSTAT_TIMER(spintime); 297 LOCKSTAT_COUNTER(spincnt); 298 LOCKSTAT_FLAG(lsflag); 299 300 l = curlwp; 301 curthread = (uintptr_t)l; 302 303 RW_ASSERT(rw, !cpu_intr_p()); 304 RW_ASSERT(rw, curthread != 0); 305 RW_WANTLOCK(rw, op); 306 307 if (panicstr == NULL) { 308 KDASSERT(pserialize_not_in_read_section()); 309 LOCKDEBUG_BARRIER(&kernel_lock, 1); 310 } 311 312 /* 313 * We play a slight trick here. If we're a reader, we want 314 * increment the read count. If we're a writer, we want to 315 * set the owner field and the WRITE_LOCKED bit. 316 * 317 * In the latter case, we expect those bits to be zero, 318 * therefore we can use an add operation to set them, which 319 * means an add operation for both cases. 320 */ 321 if (__predict_true(op == RW_READER)) { 322 incr = RW_READ_INCR; 323 set_wait = RW_HAS_WAITERS; 324 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; 325 queue = TS_READER_Q; 326 } else { 327 RW_ASSERT(rw, op == RW_WRITER); 328 incr = curthread | RW_WRITE_LOCKED; 329 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED; 330 need_wait = RW_WRITE_LOCKED | RW_THREAD; 331 queue = TS_WRITER_Q; 332 } 333 334 LOCKSTAT_ENTER(lsflag); 335 336 KPREEMPT_DISABLE(curlwp); 337 for (owner = rw->rw_owner;;) { 338 /* 339 * Read the lock owner field. If the need-to-wait 340 * indicator is clear, then try to acquire the lock. 341 */ 342 if ((owner & need_wait) == 0) { 343 next = rw_cas(rw, owner, (owner + incr) & 344 ~RW_WRITE_WANTED); 345 if (__predict_true(next == owner)) { 346 /* Got it! */ 347 RW_MEMBAR_ACQUIRE(); 348 break; 349 } 350 351 /* 352 * Didn't get it -- spin around again (we'll 353 * probably sleep on the next iteration). 354 */ 355 owner = next; 356 continue; 357 } 358 if (__predict_false(RW_OWNER(rw) == curthread)) { 359 rw_abort(__func__, __LINE__, rw, 360 "locking against myself"); 361 } 362 /* 363 * If the lock owner is running on another CPU, and 364 * there are no existing waiters, then spin. 365 */ 366 if (rw_oncpu(owner)) { 367 LOCKSTAT_START_TIMER(lsflag, spintime); 368 u_int count = SPINLOCK_BACKOFF_MIN; 369 do { 370 KPREEMPT_ENABLE(curlwp); 371 SPINLOCK_BACKOFF(count); 372 KPREEMPT_DISABLE(curlwp); 373 owner = rw->rw_owner; 374 } while (rw_oncpu(owner)); 375 LOCKSTAT_STOP_TIMER(lsflag, spintime); 376 LOCKSTAT_COUNT(spincnt, 1); 377 if ((owner & need_wait) == 0) 378 continue; 379 } 380 381 /* 382 * Grab the turnstile chain lock. Once we have that, we 383 * can adjust the waiter bits and sleep queue. 384 */ 385 ts = turnstile_lookup(rw); 386 387 /* 388 * Mark the rwlock as having waiters. If the set fails, 389 * then we may not need to sleep and should spin again. 390 * Reload rw_owner because turnstile_lookup() may have 391 * spun on the turnstile chain lock. 392 */ 393 owner = rw->rw_owner; 394 if ((owner & need_wait) == 0 || rw_oncpu(owner)) { 395 turnstile_exit(rw); 396 continue; 397 } 398 next = rw_cas(rw, owner, owner | set_wait); 399 /* XXX membar? */ 400 if (__predict_false(next != owner)) { 401 turnstile_exit(rw); 402 owner = next; 403 continue; 404 } 405 406 LOCKSTAT_START_TIMER(lsflag, slptime); 407 turnstile_block(ts, queue, rw, &rw_syncobj); 408 LOCKSTAT_STOP_TIMER(lsflag, slptime); 409 LOCKSTAT_COUNT(slpcnt, 1); 410 411 /* 412 * No need for a memory barrier because of context switch. 413 * If not handed the lock, then spin again. 414 */ 415 if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread) 416 break; 417 418 owner = rw->rw_owner; 419 } 420 KPREEMPT_ENABLE(curlwp); 421 422 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | 423 (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime, 424 (l->l_rwcallsite != 0 ? l->l_rwcallsite : 425 (uintptr_t)__builtin_return_address(0))); 426 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime, 427 (l->l_rwcallsite != 0 ? l->l_rwcallsite : 428 (uintptr_t)__builtin_return_address(0))); 429 LOCKSTAT_EXIT(lsflag); 430 431 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || 432 (op == RW_READER && RW_COUNT(rw) != 0)); 433 RW_LOCKED(rw, op); 434 } 435 436 /* 437 * rw_vector_exit: 438 * 439 * Release a rwlock. 440 */ 441 void 442 rw_vector_exit(krwlock_t *rw) 443 { 444 uintptr_t curthread, owner, decr, newown, next; 445 turnstile_t *ts; 446 int rcnt, wcnt; 447 lwp_t *l; 448 449 l = curlwp; 450 curthread = (uintptr_t)l; 451 RW_ASSERT(rw, curthread != 0); 452 453 /* 454 * Again, we use a trick. Since we used an add operation to 455 * set the required lock bits, we can use a subtract to clear 456 * them, which makes the read-release and write-release path 457 * the same. 458 */ 459 owner = rw->rw_owner; 460 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) { 461 RW_UNLOCKED(rw, RW_WRITER); 462 RW_ASSERT(rw, RW_OWNER(rw) == curthread); 463 decr = curthread | RW_WRITE_LOCKED; 464 } else { 465 RW_UNLOCKED(rw, RW_READER); 466 RW_ASSERT(rw, RW_COUNT(rw) != 0); 467 decr = RW_READ_INCR; 468 } 469 470 /* 471 * Compute what we expect the new value of the lock to be. Only 472 * proceed to do direct handoff if there are waiters, and if the 473 * lock would become unowned. 474 */ 475 RW_MEMBAR_RELEASE(); 476 for (;;) { 477 newown = (owner - decr); 478 if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS) 479 break; 480 next = rw_cas(rw, owner, newown); 481 if (__predict_true(next == owner)) 482 return; 483 owner = next; 484 } 485 486 /* 487 * Grab the turnstile chain lock. This gets the interlock 488 * on the sleep queue. Once we have that, we can adjust the 489 * waiter bits. 490 */ 491 ts = turnstile_lookup(rw); 492 owner = rw->rw_owner; 493 RW_ASSERT(rw, ts != NULL); 494 RW_ASSERT(rw, (owner & RW_HAS_WAITERS) != 0); 495 496 wcnt = TS_WAITERS(ts, TS_WRITER_Q); 497 rcnt = TS_WAITERS(ts, TS_READER_Q); 498 499 /* 500 * Give the lock away. 501 * 502 * If we are releasing a write lock, then prefer to wake all 503 * outstanding readers. Otherwise, wake one writer if there 504 * are outstanding readers, or all writers if there are no 505 * pending readers. If waking one specific writer, the writer 506 * is handed the lock here. If waking multiple writers, we 507 * set WRITE_WANTED to block out new readers, and let them 508 * do the work of acquiring the lock in rw_vector_enter(). 509 */ 510 if (rcnt == 0 || decr == RW_READ_INCR) { 511 RW_ASSERT(rw, wcnt != 0); 512 RW_ASSERT(rw, (owner & RW_WRITE_WANTED) != 0); 513 514 if (rcnt != 0) { 515 /* Give the lock to the longest waiting writer. */ 516 l = TS_FIRST(ts, TS_WRITER_Q); 517 newown = (uintptr_t)l | (owner & RW_NODEBUG); 518 newown |= RW_WRITE_LOCKED | RW_HAS_WAITERS; 519 if (wcnt > 1) 520 newown |= RW_WRITE_WANTED; 521 rw_swap(rw, owner, newown); 522 turnstile_wakeup(ts, TS_WRITER_Q, 1, l); 523 } else { 524 /* Wake all writers and let them fight it out. */ 525 newown = owner & RW_NODEBUG; 526 newown |= RW_WRITE_WANTED; 527 rw_swap(rw, owner, newown); 528 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL); 529 } 530 } else { 531 RW_ASSERT(rw, rcnt != 0); 532 533 /* 534 * Give the lock to all blocked readers. If there 535 * is a writer waiting, new readers that arrive 536 * after the release will be blocked out. 537 */ 538 newown = owner & RW_NODEBUG; 539 newown += rcnt << RW_READ_COUNT_SHIFT; 540 if (wcnt != 0) 541 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED; 542 543 /* Wake up all sleeping readers. */ 544 rw_swap(rw, owner, newown); 545 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL); 546 } 547 } 548 549 /* 550 * rw_vector_tryenter: 551 * 552 * Try to acquire a rwlock. 553 */ 554 int 555 rw_vector_tryenter(krwlock_t *rw, const krw_t op) 556 { 557 uintptr_t curthread, owner, incr, need_wait, next; 558 lwp_t *l; 559 560 l = curlwp; 561 curthread = (uintptr_t)l; 562 563 RW_ASSERT(rw, curthread != 0); 564 565 if (op == RW_READER) { 566 incr = RW_READ_INCR; 567 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; 568 } else { 569 RW_ASSERT(rw, op == RW_WRITER); 570 incr = curthread | RW_WRITE_LOCKED; 571 need_wait = RW_WRITE_LOCKED | RW_THREAD; 572 } 573 574 for (owner = rw->rw_owner;; owner = next) { 575 if (__predict_false((owner & need_wait) != 0)) 576 return 0; 577 next = rw_cas(rw, owner, owner + incr); 578 if (__predict_true(next == owner)) { 579 /* Got it! */ 580 break; 581 } 582 } 583 584 RW_WANTLOCK(rw, op); 585 RW_LOCKED(rw, op); 586 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || 587 (op == RW_READER && RW_COUNT(rw) != 0)); 588 589 RW_MEMBAR_ACQUIRE(); 590 return 1; 591 } 592 593 /* 594 * rw_downgrade: 595 * 596 * Downgrade a write lock to a read lock. 597 */ 598 void 599 rw_downgrade(krwlock_t *rw) 600 { 601 uintptr_t owner, curthread, newown, next; 602 turnstile_t *ts; 603 int rcnt, wcnt; 604 lwp_t *l; 605 606 l = curlwp; 607 curthread = (uintptr_t)l; 608 RW_ASSERT(rw, curthread != 0); 609 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0); 610 RW_ASSERT(rw, RW_OWNER(rw) == curthread); 611 RW_UNLOCKED(rw, RW_WRITER); 612 #if !defined(DIAGNOSTIC) 613 __USE(curthread); 614 #endif 615 616 RW_MEMBAR_PRODUCER(); 617 618 for (owner = rw->rw_owner;; owner = next) { 619 /* 620 * If there are no waiters we can do this the easy way. Try 621 * swapping us down to one read hold. If it fails, the lock 622 * condition has changed and we most likely now have 623 * waiters. 624 */ 625 if ((owner & RW_HAS_WAITERS) == 0) { 626 newown = (owner & RW_NODEBUG); 627 next = rw_cas(rw, owner, newown + RW_READ_INCR); 628 if (__predict_true(next == owner)) { 629 RW_LOCKED(rw, RW_READER); 630 RW_ASSERT(rw, 631 (rw->rw_owner & RW_WRITE_LOCKED) == 0); 632 RW_ASSERT(rw, RW_COUNT(rw) != 0); 633 return; 634 } 635 continue; 636 } 637 638 /* 639 * Grab the turnstile chain lock. This gets the interlock 640 * on the sleep queue. Once we have that, we can adjust the 641 * waiter bits. 642 */ 643 ts = turnstile_lookup(rw); 644 RW_ASSERT(rw, ts != NULL); 645 646 rcnt = TS_WAITERS(ts, TS_READER_Q); 647 wcnt = TS_WAITERS(ts, TS_WRITER_Q); 648 649 if (rcnt == 0) { 650 /* 651 * If there are no readers, just preserve the 652 * waiters bits, swap us down to one read hold and 653 * return. 654 */ 655 RW_ASSERT(rw, wcnt != 0); 656 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0); 657 RW_ASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0); 658 659 newown = owner & RW_NODEBUG; 660 newown |= RW_READ_INCR | RW_HAS_WAITERS | 661 RW_WRITE_WANTED; 662 next = rw_cas(rw, owner, newown); 663 turnstile_exit(rw); 664 if (__predict_true(next == owner)) 665 break; 666 } else { 667 /* 668 * Give the lock to all blocked readers. We may 669 * retain one read hold if downgrading. If there is 670 * a writer waiting, new readers will be blocked 671 * out. 672 */ 673 newown = owner & RW_NODEBUG; 674 newown += (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR; 675 if (wcnt != 0) 676 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED; 677 678 next = rw_cas(rw, owner, newown); 679 if (__predict_true(next == owner)) { 680 /* Wake up all sleeping readers. */ 681 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL); 682 break; 683 } 684 turnstile_exit(rw); 685 } 686 } 687 688 RW_WANTLOCK(rw, RW_READER); 689 RW_LOCKED(rw, RW_READER); 690 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0); 691 RW_ASSERT(rw, RW_COUNT(rw) != 0); 692 } 693 694 /* 695 * rw_tryupgrade: 696 * 697 * Try to upgrade a read lock to a write lock. We must be the only 698 * reader. 699 */ 700 int 701 rw_tryupgrade(krwlock_t *rw) 702 { 703 uintptr_t owner, curthread, newown, next; 704 struct lwp *l; 705 706 l = curlwp; 707 curthread = (uintptr_t)l; 708 RW_ASSERT(rw, curthread != 0); 709 RW_ASSERT(rw, rw_read_held(rw)); 710 711 for (owner = RW_READ_INCR;; owner = next) { 712 newown = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD); 713 next = rw_cas(rw, owner, newown); 714 if (__predict_true(next == owner)) { 715 RW_MEMBAR_PRODUCER(); 716 break; 717 } 718 RW_ASSERT(rw, (next & RW_WRITE_LOCKED) == 0); 719 if (__predict_false((next & RW_THREAD) != RW_READ_INCR)) { 720 RW_ASSERT(rw, (next & RW_THREAD) != 0); 721 return 0; 722 } 723 } 724 725 RW_UNLOCKED(rw, RW_READER); 726 RW_WANTLOCK(rw, RW_WRITER); 727 RW_LOCKED(rw, RW_WRITER); 728 RW_ASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED); 729 RW_ASSERT(rw, RW_OWNER(rw) == curthread); 730 731 return 1; 732 } 733 734 /* 735 * rw_read_held: 736 * 737 * Returns true if the rwlock is held for reading. Must only be 738 * used for diagnostic assertions, and never be used to make 739 * decisions about how to use a rwlock. 740 */ 741 int 742 rw_read_held(krwlock_t *rw) 743 { 744 uintptr_t owner; 745 746 if (rw == NULL) 747 return 0; 748 owner = rw->rw_owner; 749 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0; 750 } 751 752 /* 753 * rw_write_held: 754 * 755 * Returns true if the rwlock is held for writing. Must only be 756 * used for diagnostic assertions, and never be used to make 757 * decisions about how to use a rwlock. 758 */ 759 int 760 rw_write_held(krwlock_t *rw) 761 { 762 763 if (rw == NULL) 764 return 0; 765 return (rw->rw_owner & (RW_WRITE_LOCKED | RW_THREAD)) == 766 (RW_WRITE_LOCKED | (uintptr_t)curlwp); 767 } 768 769 /* 770 * rw_lock_held: 771 * 772 * Returns true if the rwlock is held for reading or writing. Must 773 * only be used for diagnostic assertions, and never be used to make 774 * decisions about how to use a rwlock. 775 */ 776 int 777 rw_lock_held(krwlock_t *rw) 778 { 779 780 if (rw == NULL) 781 return 0; 782 return (rw->rw_owner & RW_THREAD) != 0; 783 } 784 785 /* 786 * rw_lock_op: 787 * 788 * For a rwlock that is known to be held by the caller, return 789 * RW_READER or RW_WRITER to describe the hold type. 790 */ 791 krw_t 792 rw_lock_op(krwlock_t *rw) 793 { 794 795 RW_ASSERT(rw, rw_lock_held(rw)); 796 797 return (rw->rw_owner & RW_WRITE_LOCKED) != 0 ? RW_WRITER : RW_READER; 798 } 799 800 /* 801 * rw_owner: 802 * 803 * Return the current owner of an RW lock, but only if it is write 804 * held. Used for priority inheritance. 805 */ 806 static lwp_t * 807 rw_owner(wchan_t obj) 808 { 809 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */ 810 uintptr_t owner = rw->rw_owner; 811 812 if ((owner & RW_WRITE_LOCKED) == 0) 813 return NULL; 814 815 return (void *)(owner & RW_THREAD); 816 } 817 818 /* 819 * rw_owner_running: 820 * 821 * Return true if a RW lock is unheld, or write held and the owner is 822 * running on a CPU. For the pagedaemon. 823 */ 824 bool 825 rw_owner_running(const krwlock_t *rw) 826 { 827 #ifdef MULTIPROCESSOR 828 uintptr_t owner; 829 bool rv; 830 831 kpreempt_disable(); 832 owner = rw->rw_owner; 833 rv = (owner & RW_THREAD) == 0 || rw_oncpu(owner); 834 kpreempt_enable(); 835 return rv; 836 #else 837 return rw_owner(rw) == curlwp; 838 #endif 839 } 840