1 /* $NetBSD: kern_rwlock.c,v 1.71 2023/07/17 12:54:29 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jason R. Thorpe and Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Kernel reader/writer lock implementation, modeled after those 35 * found in Solaris, a description of which can be found in: 36 * 37 * Solaris Internals: Core Kernel Architecture, Jim Mauro and 38 * Richard McDougall. 39 * 40 * The NetBSD implementation differs from that described in the book, in 41 * that the locks are partially adaptive. Lock waiters spin wait while a 42 * lock is write held and the holder is still running on a CPU. The method 43 * of choosing which threads to awaken when a lock is released also differs, 44 * mainly to take account of the partially adaptive behaviour. 45 */ 46 47 #include <sys/cdefs.h> 48 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.71 2023/07/17 12:54:29 riastradh Exp $"); 49 50 #include "opt_lockdebug.h" 51 52 #define __RWLOCK_PRIVATE 53 54 #include <sys/param.h> 55 #include <sys/proc.h> 56 #include <sys/rwlock.h> 57 #include <sys/sched.h> 58 #include <sys/sleepq.h> 59 #include <sys/systm.h> 60 #include <sys/lockdebug.h> 61 #include <sys/cpu.h> 62 #include <sys/atomic.h> 63 #include <sys/lock.h> 64 #include <sys/pserialize.h> 65 66 #include <dev/lockstat.h> 67 68 #include <machine/rwlock.h> 69 70 /* 71 * LOCKDEBUG 72 */ 73 74 #define RW_DEBUG_P(rw) (((rw)->rw_owner & RW_NODEBUG) == 0) 75 76 #define RW_WANTLOCK(rw, op) \ 77 LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \ 78 (uintptr_t)__builtin_return_address(0), op == RW_READER); 79 #define RW_LOCKED(rw, op) \ 80 LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \ 81 (uintptr_t)__builtin_return_address(0), op == RW_READER); 82 #define RW_UNLOCKED(rw, op) \ 83 LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \ 84 (uintptr_t)__builtin_return_address(0), op == RW_READER); 85 86 /* 87 * DIAGNOSTIC 88 */ 89 90 #if defined(DIAGNOSTIC) 91 #define RW_ASSERT(rw, cond) \ 92 do { \ 93 if (__predict_false(!(cond))) \ 94 rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\ 95 } while (/* CONSTCOND */ 0) 96 #else 97 #define RW_ASSERT(rw, cond) /* nothing */ 98 #endif /* DIAGNOSTIC */ 99 100 /* 101 * For platforms that do not provide stubs, or for the LOCKDEBUG case. 102 */ 103 #ifdef LOCKDEBUG 104 #undef __HAVE_RW_STUBS 105 #endif 106 107 #ifndef __HAVE_RW_STUBS 108 __strong_alias(rw_enter,rw_vector_enter); 109 __strong_alias(rw_exit,rw_vector_exit); 110 __strong_alias(rw_tryenter,rw_vector_tryenter); 111 #endif 112 113 static void rw_abort(const char *, size_t, krwlock_t *, const char *); 114 static void rw_dump(const volatile void *, lockop_printer_t); 115 static lwp_t *rw_owner(wchan_t); 116 117 lockops_t rwlock_lockops = { 118 .lo_name = "Reader / writer lock", 119 .lo_type = LOCKOPS_SLEEP, 120 .lo_dump = rw_dump, 121 }; 122 123 syncobj_t rw_syncobj = { 124 .sobj_name = "rw", 125 .sobj_flag = SOBJ_SLEEPQ_SORTED, 126 .sobj_unsleep = turnstile_unsleep, 127 .sobj_changepri = turnstile_changepri, 128 .sobj_lendpri = sleepq_lendpri, 129 .sobj_owner = rw_owner, 130 }; 131 132 /* 133 * rw_cas: 134 * 135 * Do an atomic compare-and-swap on the lock word. 136 */ 137 static inline uintptr_t 138 rw_cas(krwlock_t *rw, uintptr_t o, uintptr_t n) 139 { 140 141 return (uintptr_t)atomic_cas_ptr((volatile void *)&rw->rw_owner, 142 (void *)o, (void *)n); 143 } 144 145 /* 146 * rw_swap: 147 * 148 * Do an atomic swap of the lock word. This is used only when it's 149 * known that the lock word is set up such that it can't be changed 150 * behind us (assert this), so there's no point considering the result. 151 */ 152 static inline void 153 rw_swap(krwlock_t *rw, uintptr_t o, uintptr_t n) 154 { 155 156 n = (uintptr_t)atomic_swap_ptr((volatile void *)&rw->rw_owner, 157 (void *)n); 158 159 RW_ASSERT(rw, n == o); 160 RW_ASSERT(rw, (o & RW_HAS_WAITERS) != 0); 161 } 162 163 /* 164 * rw_dump: 165 * 166 * Dump the contents of a rwlock structure. 167 */ 168 static void 169 rw_dump(const volatile void *cookie, lockop_printer_t pr) 170 { 171 const volatile krwlock_t *rw = cookie; 172 173 pr("owner/count : %#018lx flags : %#018x\n", 174 (long)RW_OWNER(rw), (int)RW_FLAGS(rw)); 175 } 176 177 /* 178 * rw_abort: 179 * 180 * Dump information about an error and panic the system. This 181 * generates a lot of machine code in the DIAGNOSTIC case, so 182 * we ask the compiler to not inline it. 183 */ 184 static void __noinline 185 rw_abort(const char *func, size_t line, krwlock_t *rw, const char *msg) 186 { 187 188 if (__predict_false(panicstr != NULL)) 189 return; 190 191 LOCKDEBUG_ABORT(func, line, rw, &rwlock_lockops, msg); 192 } 193 194 /* 195 * rw_init: 196 * 197 * Initialize a rwlock for use. 198 */ 199 void 200 _rw_init(krwlock_t *rw, uintptr_t return_address) 201 { 202 203 #ifdef LOCKDEBUG 204 /* XXX only because the assembly stubs can't handle RW_NODEBUG */ 205 if (LOCKDEBUG_ALLOC(rw, &rwlock_lockops, return_address)) 206 rw->rw_owner = 0; 207 else 208 rw->rw_owner = RW_NODEBUG; 209 #else 210 rw->rw_owner = 0; 211 #endif 212 } 213 214 void 215 rw_init(krwlock_t *rw) 216 { 217 218 _rw_init(rw, (uintptr_t)__builtin_return_address(0)); 219 } 220 221 /* 222 * rw_destroy: 223 * 224 * Tear down a rwlock. 225 */ 226 void 227 rw_destroy(krwlock_t *rw) 228 { 229 230 RW_ASSERT(rw, (rw->rw_owner & ~RW_NODEBUG) == 0); 231 LOCKDEBUG_FREE((rw->rw_owner & RW_NODEBUG) == 0, rw); 232 } 233 234 /* 235 * rw_oncpu: 236 * 237 * Return true if an rwlock owner is running on a CPU in the system. 238 * If the target is waiting on the kernel big lock, then we must 239 * release it. This is necessary to avoid deadlock. 240 */ 241 static bool 242 rw_oncpu(uintptr_t owner) 243 { 244 #ifdef MULTIPROCESSOR 245 struct cpu_info *ci; 246 lwp_t *l; 247 248 KASSERT(kpreempt_disabled()); 249 250 if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) { 251 return false; 252 } 253 254 /* 255 * See lwp_dtor() why dereference of the LWP pointer is safe. 256 * We must have kernel preemption disabled for that. 257 */ 258 l = (lwp_t *)(owner & RW_THREAD); 259 ci = l->l_cpu; 260 261 if (ci && ci->ci_curlwp == l) { 262 /* Target is running; do we need to block? */ 263 return (ci->ci_biglock_wanted != l); 264 } 265 #endif 266 /* Not running. It may be safe to block now. */ 267 return false; 268 } 269 270 /* 271 * rw_vector_enter: 272 * 273 * Acquire a rwlock. 274 */ 275 void 276 rw_vector_enter(krwlock_t *rw, const krw_t op) 277 { 278 uintptr_t owner, incr, need_wait, set_wait, curthread, next; 279 turnstile_t *ts; 280 int queue; 281 lwp_t *l; 282 LOCKSTAT_TIMER(slptime); 283 LOCKSTAT_TIMER(slpcnt); 284 LOCKSTAT_TIMER(spintime); 285 LOCKSTAT_COUNTER(spincnt); 286 LOCKSTAT_FLAG(lsflag); 287 288 l = curlwp; 289 curthread = (uintptr_t)l; 290 291 RW_ASSERT(rw, !cpu_intr_p()); 292 RW_ASSERT(rw, curthread != 0); 293 RW_WANTLOCK(rw, op); 294 295 if (__predict_true(panicstr == NULL)) { 296 KDASSERT(pserialize_not_in_read_section()); 297 LOCKDEBUG_BARRIER(&kernel_lock, 1); 298 } 299 300 /* 301 * We play a slight trick here. If we're a reader, we want 302 * increment the read count. If we're a writer, we want to 303 * set the owner field and the WRITE_LOCKED bit. 304 * 305 * In the latter case, we expect those bits to be zero, 306 * therefore we can use an add operation to set them, which 307 * means an add operation for both cases. 308 */ 309 if (__predict_true(op == RW_READER)) { 310 incr = RW_READ_INCR; 311 set_wait = RW_HAS_WAITERS; 312 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; 313 queue = TS_READER_Q; 314 } else { 315 RW_ASSERT(rw, op == RW_WRITER); 316 incr = curthread | RW_WRITE_LOCKED; 317 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED; 318 need_wait = RW_WRITE_LOCKED | RW_THREAD; 319 queue = TS_WRITER_Q; 320 } 321 322 LOCKSTAT_ENTER(lsflag); 323 324 KPREEMPT_DISABLE(curlwp); 325 for (owner = rw->rw_owner;;) { 326 /* 327 * Read the lock owner field. If the need-to-wait 328 * indicator is clear, then try to acquire the lock. 329 */ 330 if ((owner & need_wait) == 0) { 331 next = rw_cas(rw, owner, (owner + incr) & 332 ~RW_WRITE_WANTED); 333 if (__predict_true(next == owner)) { 334 /* Got it! */ 335 membar_acquire(); 336 break; 337 } 338 339 /* 340 * Didn't get it -- spin around again (we'll 341 * probably sleep on the next iteration). 342 */ 343 owner = next; 344 continue; 345 } 346 if (__predict_false(RW_OWNER(rw) == curthread)) { 347 rw_abort(__func__, __LINE__, rw, 348 "locking against myself"); 349 } 350 /* 351 * If the lock owner is running on another CPU, and 352 * there are no existing waiters, then spin. 353 */ 354 if (rw_oncpu(owner)) { 355 LOCKSTAT_START_TIMER(lsflag, spintime); 356 u_int count = SPINLOCK_BACKOFF_MIN; 357 do { 358 KPREEMPT_ENABLE(curlwp); 359 SPINLOCK_BACKOFF(count); 360 KPREEMPT_DISABLE(curlwp); 361 owner = rw->rw_owner; 362 } while (rw_oncpu(owner)); 363 LOCKSTAT_STOP_TIMER(lsflag, spintime); 364 LOCKSTAT_COUNT(spincnt, 1); 365 if ((owner & need_wait) == 0) 366 continue; 367 } 368 369 /* 370 * Grab the turnstile chain lock. Once we have that, we 371 * can adjust the waiter bits and sleep queue. 372 */ 373 ts = turnstile_lookup(rw); 374 375 /* 376 * Mark the rwlock as having waiters. If the set fails, 377 * then we may not need to sleep and should spin again. 378 * Reload rw_owner because turnstile_lookup() may have 379 * spun on the turnstile chain lock. 380 */ 381 owner = rw->rw_owner; 382 if ((owner & need_wait) == 0 || rw_oncpu(owner)) { 383 turnstile_exit(rw); 384 continue; 385 } 386 next = rw_cas(rw, owner, owner | set_wait); 387 /* XXX membar? */ 388 if (__predict_false(next != owner)) { 389 turnstile_exit(rw); 390 owner = next; 391 continue; 392 } 393 394 LOCKSTAT_START_TIMER(lsflag, slptime); 395 turnstile_block(ts, queue, rw, &rw_syncobj); 396 LOCKSTAT_STOP_TIMER(lsflag, slptime); 397 LOCKSTAT_COUNT(slpcnt, 1); 398 399 /* 400 * No need for a memory barrier because of context switch. 401 * If not handed the lock, then spin again. 402 */ 403 if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread) 404 break; 405 406 owner = rw->rw_owner; 407 } 408 KPREEMPT_ENABLE(curlwp); 409 410 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | 411 (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime, 412 (l->l_rwcallsite != 0 ? l->l_rwcallsite : 413 (uintptr_t)__builtin_return_address(0))); 414 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime, 415 (l->l_rwcallsite != 0 ? l->l_rwcallsite : 416 (uintptr_t)__builtin_return_address(0))); 417 LOCKSTAT_EXIT(lsflag); 418 419 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || 420 (op == RW_READER && RW_COUNT(rw) != 0)); 421 RW_LOCKED(rw, op); 422 } 423 424 /* 425 * rw_vector_exit: 426 * 427 * Release a rwlock. 428 */ 429 void 430 rw_vector_exit(krwlock_t *rw) 431 { 432 uintptr_t curthread, owner, decr, newown, next; 433 turnstile_t *ts; 434 int rcnt, wcnt; 435 lwp_t *l; 436 437 l = curlwp; 438 curthread = (uintptr_t)l; 439 RW_ASSERT(rw, curthread != 0); 440 441 /* 442 * Again, we use a trick. Since we used an add operation to 443 * set the required lock bits, we can use a subtract to clear 444 * them, which makes the read-release and write-release path 445 * the same. 446 */ 447 owner = rw->rw_owner; 448 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) { 449 RW_UNLOCKED(rw, RW_WRITER); 450 RW_ASSERT(rw, RW_OWNER(rw) == curthread); 451 decr = curthread | RW_WRITE_LOCKED; 452 } else { 453 RW_UNLOCKED(rw, RW_READER); 454 RW_ASSERT(rw, RW_COUNT(rw) != 0); 455 decr = RW_READ_INCR; 456 } 457 458 /* 459 * Compute what we expect the new value of the lock to be. Only 460 * proceed to do direct handoff if there are waiters, and if the 461 * lock would become unowned. 462 */ 463 membar_release(); 464 for (;;) { 465 newown = (owner - decr); 466 if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS) 467 break; 468 next = rw_cas(rw, owner, newown); 469 if (__predict_true(next == owner)) 470 return; 471 owner = next; 472 } 473 474 /* 475 * Grab the turnstile chain lock. This gets the interlock 476 * on the sleep queue. Once we have that, we can adjust the 477 * waiter bits. 478 */ 479 ts = turnstile_lookup(rw); 480 owner = rw->rw_owner; 481 RW_ASSERT(rw, ts != NULL); 482 RW_ASSERT(rw, (owner & RW_HAS_WAITERS) != 0); 483 484 wcnt = TS_WAITERS(ts, TS_WRITER_Q); 485 rcnt = TS_WAITERS(ts, TS_READER_Q); 486 487 /* 488 * Give the lock away. 489 * 490 * If we are releasing a write lock, then prefer to wake all 491 * outstanding readers. Otherwise, wake one writer if there 492 * are outstanding readers, or all writers if there are no 493 * pending readers. If waking one specific writer, the writer 494 * is handed the lock here. If waking multiple writers, we 495 * set WRITE_WANTED to block out new readers, and let them 496 * do the work of acquiring the lock in rw_vector_enter(). 497 */ 498 if (rcnt == 0 || decr == RW_READ_INCR) { 499 RW_ASSERT(rw, wcnt != 0); 500 RW_ASSERT(rw, (owner & RW_WRITE_WANTED) != 0); 501 502 if (rcnt != 0) { 503 /* Give the lock to the longest waiting writer. */ 504 l = TS_FIRST(ts, TS_WRITER_Q); 505 newown = (uintptr_t)l | (owner & RW_NODEBUG); 506 newown |= RW_WRITE_LOCKED | RW_HAS_WAITERS; 507 if (wcnt > 1) 508 newown |= RW_WRITE_WANTED; 509 rw_swap(rw, owner, newown); 510 turnstile_wakeup(ts, TS_WRITER_Q, 1, l); 511 } else { 512 /* Wake all writers and let them fight it out. */ 513 newown = owner & RW_NODEBUG; 514 newown |= RW_WRITE_WANTED; 515 rw_swap(rw, owner, newown); 516 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL); 517 } 518 } else { 519 RW_ASSERT(rw, rcnt != 0); 520 521 /* 522 * Give the lock to all blocked readers. If there 523 * is a writer waiting, new readers that arrive 524 * after the release will be blocked out. 525 */ 526 newown = owner & RW_NODEBUG; 527 newown += rcnt << RW_READ_COUNT_SHIFT; 528 if (wcnt != 0) 529 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED; 530 531 /* Wake up all sleeping readers. */ 532 rw_swap(rw, owner, newown); 533 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL); 534 } 535 } 536 537 /* 538 * rw_vector_tryenter: 539 * 540 * Try to acquire a rwlock. 541 */ 542 int 543 rw_vector_tryenter(krwlock_t *rw, const krw_t op) 544 { 545 uintptr_t curthread, owner, incr, need_wait, next; 546 lwp_t *l; 547 548 l = curlwp; 549 curthread = (uintptr_t)l; 550 551 RW_ASSERT(rw, curthread != 0); 552 553 if (op == RW_READER) { 554 incr = RW_READ_INCR; 555 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; 556 } else { 557 RW_ASSERT(rw, op == RW_WRITER); 558 incr = curthread | RW_WRITE_LOCKED; 559 need_wait = RW_WRITE_LOCKED | RW_THREAD; 560 } 561 562 for (owner = rw->rw_owner;; owner = next) { 563 if (__predict_false((owner & need_wait) != 0)) 564 return 0; 565 next = rw_cas(rw, owner, owner + incr); 566 if (__predict_true(next == owner)) { 567 /* Got it! */ 568 break; 569 } 570 } 571 572 RW_WANTLOCK(rw, op); 573 RW_LOCKED(rw, op); 574 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || 575 (op == RW_READER && RW_COUNT(rw) != 0)); 576 577 membar_acquire(); 578 return 1; 579 } 580 581 /* 582 * rw_downgrade: 583 * 584 * Downgrade a write lock to a read lock. 585 */ 586 void 587 rw_downgrade(krwlock_t *rw) 588 { 589 uintptr_t owner, curthread, newown, next; 590 turnstile_t *ts; 591 int rcnt, wcnt; 592 lwp_t *l; 593 594 l = curlwp; 595 curthread = (uintptr_t)l; 596 RW_ASSERT(rw, curthread != 0); 597 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0); 598 RW_ASSERT(rw, RW_OWNER(rw) == curthread); 599 RW_UNLOCKED(rw, RW_WRITER); 600 #if !defined(DIAGNOSTIC) 601 __USE(curthread); 602 #endif 603 604 membar_release(); 605 for (owner = rw->rw_owner;; owner = next) { 606 /* 607 * If there are no waiters we can do this the easy way. Try 608 * swapping us down to one read hold. If it fails, the lock 609 * condition has changed and we most likely now have 610 * waiters. 611 */ 612 if ((owner & RW_HAS_WAITERS) == 0) { 613 newown = (owner & RW_NODEBUG); 614 next = rw_cas(rw, owner, newown + RW_READ_INCR); 615 if (__predict_true(next == owner)) { 616 RW_LOCKED(rw, RW_READER); 617 RW_ASSERT(rw, 618 (rw->rw_owner & RW_WRITE_LOCKED) == 0); 619 RW_ASSERT(rw, RW_COUNT(rw) != 0); 620 return; 621 } 622 continue; 623 } 624 625 /* 626 * Grab the turnstile chain lock. This gets the interlock 627 * on the sleep queue. Once we have that, we can adjust the 628 * waiter bits. 629 */ 630 ts = turnstile_lookup(rw); 631 RW_ASSERT(rw, ts != NULL); 632 633 rcnt = TS_WAITERS(ts, TS_READER_Q); 634 wcnt = TS_WAITERS(ts, TS_WRITER_Q); 635 636 if (rcnt == 0) { 637 /* 638 * If there are no readers, just preserve the 639 * waiters bits, swap us down to one read hold and 640 * return. 641 */ 642 RW_ASSERT(rw, wcnt != 0); 643 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0); 644 RW_ASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0); 645 646 newown = owner & RW_NODEBUG; 647 newown |= RW_READ_INCR | RW_HAS_WAITERS | 648 RW_WRITE_WANTED; 649 next = rw_cas(rw, owner, newown); 650 turnstile_exit(rw); 651 if (__predict_true(next == owner)) 652 break; 653 } else { 654 /* 655 * Give the lock to all blocked readers. We may 656 * retain one read hold if downgrading. If there is 657 * a writer waiting, new readers will be blocked 658 * out. 659 */ 660 newown = owner & RW_NODEBUG; 661 newown += (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR; 662 if (wcnt != 0) 663 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED; 664 665 next = rw_cas(rw, owner, newown); 666 if (__predict_true(next == owner)) { 667 /* Wake up all sleeping readers. */ 668 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL); 669 break; 670 } 671 turnstile_exit(rw); 672 } 673 } 674 675 RW_WANTLOCK(rw, RW_READER); 676 RW_LOCKED(rw, RW_READER); 677 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0); 678 RW_ASSERT(rw, RW_COUNT(rw) != 0); 679 } 680 681 /* 682 * rw_tryupgrade: 683 * 684 * Try to upgrade a read lock to a write lock. We must be the only 685 * reader. 686 */ 687 int 688 rw_tryupgrade(krwlock_t *rw) 689 { 690 uintptr_t owner, curthread, newown, next; 691 struct lwp *l; 692 693 l = curlwp; 694 curthread = (uintptr_t)l; 695 RW_ASSERT(rw, curthread != 0); 696 RW_ASSERT(rw, rw_read_held(rw)); 697 698 for (owner = RW_READ_INCR;; owner = next) { 699 newown = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD); 700 next = rw_cas(rw, owner, newown); 701 if (__predict_true(next == owner)) { 702 membar_acquire(); 703 break; 704 } 705 RW_ASSERT(rw, (next & RW_WRITE_LOCKED) == 0); 706 if (__predict_false((next & RW_THREAD) != RW_READ_INCR)) { 707 RW_ASSERT(rw, (next & RW_THREAD) != 0); 708 return 0; 709 } 710 } 711 712 RW_UNLOCKED(rw, RW_READER); 713 RW_WANTLOCK(rw, RW_WRITER); 714 RW_LOCKED(rw, RW_WRITER); 715 RW_ASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED); 716 RW_ASSERT(rw, RW_OWNER(rw) == curthread); 717 718 return 1; 719 } 720 721 /* 722 * rw_read_held: 723 * 724 * Returns true if the rwlock is held for reading. Must only be 725 * used for diagnostic assertions, and never be used to make 726 * decisions about how to use a rwlock. 727 */ 728 int 729 rw_read_held(krwlock_t *rw) 730 { 731 uintptr_t owner; 732 733 if (rw == NULL) 734 return 0; 735 owner = rw->rw_owner; 736 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0; 737 } 738 739 /* 740 * rw_write_held: 741 * 742 * Returns true if the rwlock is held for writing. Must only be 743 * used for diagnostic assertions, and never be used to make 744 * decisions about how to use a rwlock. 745 */ 746 int 747 rw_write_held(krwlock_t *rw) 748 { 749 750 if (rw == NULL) 751 return 0; 752 return (rw->rw_owner & (RW_WRITE_LOCKED | RW_THREAD)) == 753 (RW_WRITE_LOCKED | (uintptr_t)curlwp); 754 } 755 756 /* 757 * rw_lock_held: 758 * 759 * Returns true if the rwlock is held for reading or writing. Must 760 * only be used for diagnostic assertions, and never be used to make 761 * decisions about how to use a rwlock. 762 */ 763 int 764 rw_lock_held(krwlock_t *rw) 765 { 766 767 if (rw == NULL) 768 return 0; 769 return (rw->rw_owner & RW_THREAD) != 0; 770 } 771 772 /* 773 * rw_lock_op: 774 * 775 * For a rwlock that is known to be held by the caller, return 776 * RW_READER or RW_WRITER to describe the hold type. 777 */ 778 krw_t 779 rw_lock_op(krwlock_t *rw) 780 { 781 782 RW_ASSERT(rw, rw_lock_held(rw)); 783 784 return (rw->rw_owner & RW_WRITE_LOCKED) != 0 ? RW_WRITER : RW_READER; 785 } 786 787 /* 788 * rw_owner: 789 * 790 * Return the current owner of an RW lock, but only if it is write 791 * held. Used for priority inheritance. 792 */ 793 static lwp_t * 794 rw_owner(wchan_t obj) 795 { 796 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */ 797 uintptr_t owner = rw->rw_owner; 798 799 if ((owner & RW_WRITE_LOCKED) == 0) 800 return NULL; 801 802 return (void *)(owner & RW_THREAD); 803 } 804 805 /* 806 * rw_owner_running: 807 * 808 * Return true if a RW lock is unheld, or write held and the owner is 809 * running on a CPU. For the pagedaemon. 810 */ 811 bool 812 rw_owner_running(const krwlock_t *rw) 813 { 814 #ifdef MULTIPROCESSOR 815 uintptr_t owner; 816 bool rv; 817 818 kpreempt_disable(); 819 owner = rw->rw_owner; 820 rv = (owner & RW_THREAD) == 0 || rw_oncpu(owner); 821 kpreempt_enable(); 822 return rv; 823 #else 824 return rw_owner(rw) == curlwp; 825 #endif 826 } 827