1 /* $NetBSD: kern_rwlock.c,v 1.65 2020/02/22 21:24:45 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jason R. Thorpe and Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Kernel reader/writer lock implementation, modeled after those 35 * found in Solaris, a description of which can be found in: 36 * 37 * Solaris Internals: Core Kernel Architecture, Jim Mauro and 38 * Richard McDougall. 39 * 40 * The NetBSD implementation differs from that described in the book, in 41 * that the locks are partially adaptive. Lock waiters spin wait while a 42 * lock is write held and the holder is still running on a CPU. The method 43 * of choosing which threads to awaken when a lock is released also differs, 44 * mainly to take account of the partially adaptive behaviour. 45 */ 46 47 #include <sys/cdefs.h> 48 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.65 2020/02/22 21:24:45 ad Exp $"); 49 50 #include "opt_lockdebug.h" 51 52 #define __RWLOCK_PRIVATE 53 54 #include <sys/param.h> 55 #include <sys/proc.h> 56 #include <sys/rwlock.h> 57 #include <sys/sched.h> 58 #include <sys/sleepq.h> 59 #include <sys/systm.h> 60 #include <sys/lockdebug.h> 61 #include <sys/cpu.h> 62 #include <sys/atomic.h> 63 #include <sys/lock.h> 64 #include <sys/pserialize.h> 65 66 #include <dev/lockstat.h> 67 68 #include <machine/rwlock.h> 69 70 /* 71 * LOCKDEBUG 72 */ 73 74 #define RW_DEBUG_P(rw) (((rw)->rw_owner & RW_NODEBUG) == 0) 75 76 #define RW_WANTLOCK(rw, op) \ 77 LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \ 78 (uintptr_t)__builtin_return_address(0), op == RW_READER); 79 #define RW_LOCKED(rw, op) \ 80 LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \ 81 (uintptr_t)__builtin_return_address(0), op == RW_READER); 82 #define RW_UNLOCKED(rw, op) \ 83 LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \ 84 (uintptr_t)__builtin_return_address(0), op == RW_READER); 85 86 /* 87 * DIAGNOSTIC 88 */ 89 90 #if defined(DIAGNOSTIC) 91 #define RW_ASSERT(rw, cond) \ 92 do { \ 93 if (__predict_false(!(cond))) \ 94 rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\ 95 } while (/* CONSTCOND */ 0) 96 #else 97 #define RW_ASSERT(rw, cond) /* nothing */ 98 #endif /* DIAGNOSTIC */ 99 100 /* 101 * Memory barriers. 102 */ 103 #ifdef __HAVE_ATOMIC_AS_MEMBAR 104 #define RW_MEMBAR_ENTER() 105 #define RW_MEMBAR_EXIT() 106 #define RW_MEMBAR_PRODUCER() 107 #else 108 #define RW_MEMBAR_ENTER() membar_enter() 109 #define RW_MEMBAR_EXIT() membar_exit() 110 #define RW_MEMBAR_PRODUCER() membar_producer() 111 #endif 112 113 /* 114 * For platforms that do not provide stubs, or for the LOCKDEBUG case. 115 */ 116 #ifdef LOCKDEBUG 117 #undef __HAVE_RW_STUBS 118 #endif 119 120 #ifndef __HAVE_RW_STUBS 121 __strong_alias(rw_enter,rw_vector_enter); 122 __strong_alias(rw_exit,rw_vector_exit); 123 __strong_alias(rw_tryenter,rw_vector_tryenter); 124 #endif 125 126 static void rw_abort(const char *, size_t, krwlock_t *, const char *); 127 static void rw_dump(const volatile void *, lockop_printer_t); 128 static lwp_t *rw_owner(wchan_t); 129 130 lockops_t rwlock_lockops = { 131 .lo_name = "Reader / writer lock", 132 .lo_type = LOCKOPS_SLEEP, 133 .lo_dump = rw_dump, 134 }; 135 136 syncobj_t rw_syncobj = { 137 .sobj_flag = SOBJ_SLEEPQ_SORTED, 138 .sobj_unsleep = turnstile_unsleep, 139 .sobj_changepri = turnstile_changepri, 140 .sobj_lendpri = sleepq_lendpri, 141 .sobj_owner = rw_owner, 142 }; 143 144 /* 145 * rw_cas: 146 * 147 * Do an atomic compare-and-swap on the lock word. 148 */ 149 static inline uintptr_t 150 rw_cas(krwlock_t *rw, uintptr_t o, uintptr_t n) 151 { 152 153 return (uintptr_t)atomic_cas_ptr((volatile void *)&rw->rw_owner, 154 (void *)o, (void *)n); 155 } 156 157 /* 158 * rw_swap: 159 * 160 * Do an atomic swap of the lock word. This is used only when it's 161 * known that the lock word is set up such that it can't be changed 162 * behind us (assert this), so there's no point considering the result. 163 */ 164 static inline void 165 rw_swap(krwlock_t *rw, uintptr_t o, uintptr_t n) 166 { 167 168 n = (uintptr_t)atomic_swap_ptr((volatile void *)&rw->rw_owner, 169 (void *)n); 170 171 RW_ASSERT(rw, n == o); 172 RW_ASSERT(rw, (o & RW_HAS_WAITERS) != 0); 173 } 174 175 /* 176 * rw_dump: 177 * 178 * Dump the contents of a rwlock structure. 179 */ 180 static void 181 rw_dump(const volatile void *cookie, lockop_printer_t pr) 182 { 183 const volatile krwlock_t *rw = cookie; 184 185 pr("owner/count : %#018lx flags : %#018x\n", 186 (long)RW_OWNER(rw), (int)RW_FLAGS(rw)); 187 } 188 189 /* 190 * rw_abort: 191 * 192 * Dump information about an error and panic the system. This 193 * generates a lot of machine code in the DIAGNOSTIC case, so 194 * we ask the compiler to not inline it. 195 */ 196 static void __noinline 197 rw_abort(const char *func, size_t line, krwlock_t *rw, const char *msg) 198 { 199 200 if (panicstr != NULL) 201 return; 202 203 LOCKDEBUG_ABORT(func, line, rw, &rwlock_lockops, msg); 204 } 205 206 /* 207 * rw_init: 208 * 209 * Initialize a rwlock for use. 210 */ 211 void 212 _rw_init(krwlock_t *rw, uintptr_t return_address) 213 { 214 215 #ifdef LOCKDEBUG 216 /* XXX only because the assembly stubs can't handle RW_NODEBUG */ 217 if (LOCKDEBUG_ALLOC(rw, &rwlock_lockops, return_address)) 218 rw->rw_owner = 0; 219 else 220 rw->rw_owner = RW_NODEBUG; 221 #else 222 rw->rw_owner = 0; 223 #endif 224 } 225 226 void 227 rw_init(krwlock_t *rw) 228 { 229 230 _rw_init(rw, (uintptr_t)__builtin_return_address(0)); 231 } 232 233 /* 234 * rw_destroy: 235 * 236 * Tear down a rwlock. 237 */ 238 void 239 rw_destroy(krwlock_t *rw) 240 { 241 242 RW_ASSERT(rw, (rw->rw_owner & ~RW_NODEBUG) == 0); 243 LOCKDEBUG_FREE((rw->rw_owner & RW_NODEBUG) == 0, rw); 244 } 245 246 /* 247 * rw_oncpu: 248 * 249 * Return true if an rwlock owner is running on a CPU in the system. 250 * If the target is waiting on the kernel big lock, then we must 251 * release it. This is necessary to avoid deadlock. 252 */ 253 static bool 254 rw_oncpu(uintptr_t owner) 255 { 256 #ifdef MULTIPROCESSOR 257 struct cpu_info *ci; 258 lwp_t *l; 259 260 KASSERT(kpreempt_disabled()); 261 262 if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) { 263 return false; 264 } 265 266 /* 267 * See lwp_dtor() why dereference of the LWP pointer is safe. 268 * We must have kernel preemption disabled for that. 269 */ 270 l = (lwp_t *)(owner & RW_THREAD); 271 ci = l->l_cpu; 272 273 if (ci && ci->ci_curlwp == l) { 274 /* Target is running; do we need to block? */ 275 return (ci->ci_biglock_wanted != l); 276 } 277 #endif 278 /* Not running. It may be safe to block now. */ 279 return false; 280 } 281 282 /* 283 * rw_vector_enter: 284 * 285 * Acquire a rwlock. 286 */ 287 void 288 rw_vector_enter(krwlock_t *rw, const krw_t op) 289 { 290 uintptr_t owner, incr, need_wait, set_wait, curthread, next; 291 turnstile_t *ts; 292 int queue; 293 lwp_t *l; 294 LOCKSTAT_TIMER(slptime); 295 LOCKSTAT_TIMER(slpcnt); 296 LOCKSTAT_TIMER(spintime); 297 LOCKSTAT_COUNTER(spincnt); 298 LOCKSTAT_FLAG(lsflag); 299 300 l = curlwp; 301 curthread = (uintptr_t)l; 302 303 RW_ASSERT(rw, !cpu_intr_p()); 304 RW_ASSERT(rw, curthread != 0); 305 RW_WANTLOCK(rw, op); 306 307 if (panicstr == NULL) { 308 KDASSERT(pserialize_not_in_read_section()); 309 LOCKDEBUG_BARRIER(&kernel_lock, 1); 310 } 311 312 /* 313 * We play a slight trick here. If we're a reader, we want 314 * increment the read count. If we're a writer, we want to 315 * set the owner field and the WRITE_LOCKED bit. 316 * 317 * In the latter case, we expect those bits to be zero, 318 * therefore we can use an add operation to set them, which 319 * means an add operation for both cases. 320 */ 321 if (__predict_true(op == RW_READER)) { 322 incr = RW_READ_INCR; 323 set_wait = RW_HAS_WAITERS; 324 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; 325 queue = TS_READER_Q; 326 } else { 327 RW_ASSERT(rw, op == RW_WRITER); 328 incr = curthread | RW_WRITE_LOCKED; 329 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED; 330 need_wait = RW_WRITE_LOCKED | RW_THREAD; 331 queue = TS_WRITER_Q; 332 } 333 334 LOCKSTAT_ENTER(lsflag); 335 336 KPREEMPT_DISABLE(curlwp); 337 for (owner = rw->rw_owner;;) { 338 /* 339 * Read the lock owner field. If the need-to-wait 340 * indicator is clear, then try to acquire the lock. 341 */ 342 if ((owner & need_wait) == 0) { 343 next = rw_cas(rw, owner, (owner + incr) & 344 ~RW_WRITE_WANTED); 345 if (__predict_true(next == owner)) { 346 /* Got it! */ 347 RW_MEMBAR_ENTER(); 348 break; 349 } 350 351 /* 352 * Didn't get it -- spin around again (we'll 353 * probably sleep on the next iteration). 354 */ 355 owner = next; 356 continue; 357 } 358 if (__predict_false(RW_OWNER(rw) == curthread)) { 359 rw_abort(__func__, __LINE__, rw, 360 "locking against myself"); 361 } 362 /* 363 * If the lock owner is running on another CPU, and 364 * there are no existing waiters, then spin. 365 */ 366 if (rw_oncpu(owner)) { 367 LOCKSTAT_START_TIMER(lsflag, spintime); 368 u_int count = SPINLOCK_BACKOFF_MIN; 369 do { 370 KPREEMPT_ENABLE(curlwp); 371 SPINLOCK_BACKOFF(count); 372 KPREEMPT_DISABLE(curlwp); 373 owner = rw->rw_owner; 374 } while (rw_oncpu(owner)); 375 LOCKSTAT_STOP_TIMER(lsflag, spintime); 376 LOCKSTAT_COUNT(spincnt, 1); 377 if ((owner & need_wait) == 0) 378 continue; 379 } 380 381 /* 382 * Grab the turnstile chain lock. Once we have that, we 383 * can adjust the waiter bits and sleep queue. 384 */ 385 ts = turnstile_lookup(rw); 386 387 /* 388 * Mark the rwlock as having waiters. If the set fails, 389 * then we may not need to sleep and should spin again. 390 * Reload rw_owner because turnstile_lookup() may have 391 * spun on the turnstile chain lock. 392 */ 393 owner = rw->rw_owner; 394 if ((owner & need_wait) == 0 || rw_oncpu(owner)) { 395 turnstile_exit(rw); 396 continue; 397 } 398 next = rw_cas(rw, owner, owner | set_wait); 399 if (__predict_false(next != owner)) { 400 turnstile_exit(rw); 401 owner = next; 402 continue; 403 } 404 405 LOCKSTAT_START_TIMER(lsflag, slptime); 406 turnstile_block(ts, queue, rw, &rw_syncobj); 407 LOCKSTAT_STOP_TIMER(lsflag, slptime); 408 LOCKSTAT_COUNT(slpcnt, 1); 409 410 /* 411 * No need for a memory barrier because of context switch. 412 * If not handed the lock, then spin again. 413 */ 414 if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread) 415 break; 416 417 owner = rw->rw_owner; 418 } 419 KPREEMPT_ENABLE(curlwp); 420 421 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | 422 (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime, 423 (l->l_rwcallsite != 0 ? l->l_rwcallsite : 424 (uintptr_t)__builtin_return_address(0))); 425 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime, 426 (l->l_rwcallsite != 0 ? l->l_rwcallsite : 427 (uintptr_t)__builtin_return_address(0))); 428 LOCKSTAT_EXIT(lsflag); 429 430 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || 431 (op == RW_READER && RW_COUNT(rw) != 0)); 432 RW_LOCKED(rw, op); 433 } 434 435 /* 436 * rw_vector_exit: 437 * 438 * Release a rwlock. 439 */ 440 void 441 rw_vector_exit(krwlock_t *rw) 442 { 443 uintptr_t curthread, owner, decr, newown, next; 444 turnstile_t *ts; 445 int rcnt, wcnt; 446 lwp_t *l; 447 448 l = curlwp; 449 curthread = (uintptr_t)l; 450 RW_ASSERT(rw, curthread != 0); 451 452 /* 453 * Again, we use a trick. Since we used an add operation to 454 * set the required lock bits, we can use a subtract to clear 455 * them, which makes the read-release and write-release path 456 * the same. 457 */ 458 owner = rw->rw_owner; 459 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) { 460 RW_UNLOCKED(rw, RW_WRITER); 461 RW_ASSERT(rw, RW_OWNER(rw) == curthread); 462 decr = curthread | RW_WRITE_LOCKED; 463 } else { 464 RW_UNLOCKED(rw, RW_READER); 465 RW_ASSERT(rw, RW_COUNT(rw) != 0); 466 decr = RW_READ_INCR; 467 } 468 469 /* 470 * Compute what we expect the new value of the lock to be. Only 471 * proceed to do direct handoff if there are waiters, and if the 472 * lock would become unowned. 473 */ 474 RW_MEMBAR_EXIT(); 475 for (;;) { 476 newown = (owner - decr); 477 if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS) 478 break; 479 next = rw_cas(rw, owner, newown); 480 if (__predict_true(next == owner)) 481 return; 482 owner = next; 483 } 484 485 /* 486 * Grab the turnstile chain lock. This gets the interlock 487 * on the sleep queue. Once we have that, we can adjust the 488 * waiter bits. 489 */ 490 ts = turnstile_lookup(rw); 491 owner = rw->rw_owner; 492 RW_ASSERT(rw, ts != NULL); 493 RW_ASSERT(rw, (owner & RW_HAS_WAITERS) != 0); 494 495 wcnt = TS_WAITERS(ts, TS_WRITER_Q); 496 rcnt = TS_WAITERS(ts, TS_READER_Q); 497 498 /* 499 * Give the lock away. 500 * 501 * If we are releasing a write lock, then prefer to wake all 502 * outstanding readers. Otherwise, wake one writer if there 503 * are outstanding readers, or all writers if there are no 504 * pending readers. If waking one specific writer, the writer 505 * is handed the lock here. If waking multiple writers, we 506 * set WRITE_WANTED to block out new readers, and let them 507 * do the work of acquiring the lock in rw_vector_enter(). 508 */ 509 if (rcnt == 0 || decr == RW_READ_INCR) { 510 RW_ASSERT(rw, wcnt != 0); 511 RW_ASSERT(rw, (owner & RW_WRITE_WANTED) != 0); 512 513 if (rcnt != 0) { 514 /* Give the lock to the longest waiting writer. */ 515 l = TS_FIRST(ts, TS_WRITER_Q); 516 newown = (uintptr_t)l | (owner & RW_NODEBUG); 517 newown |= RW_WRITE_LOCKED | RW_HAS_WAITERS; 518 if (wcnt > 1) 519 newown |= RW_WRITE_WANTED; 520 rw_swap(rw, owner, newown); 521 turnstile_wakeup(ts, TS_WRITER_Q, 1, l); 522 } else { 523 /* Wake all writers and let them fight it out. */ 524 newown = owner & RW_NODEBUG; 525 newown |= RW_WRITE_WANTED; 526 rw_swap(rw, owner, newown); 527 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL); 528 } 529 } else { 530 RW_ASSERT(rw, rcnt != 0); 531 532 /* 533 * Give the lock to all blocked readers. If there 534 * is a writer waiting, new readers that arrive 535 * after the release will be blocked out. 536 */ 537 newown = owner & RW_NODEBUG; 538 newown += rcnt << RW_READ_COUNT_SHIFT; 539 if (wcnt != 0) 540 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED; 541 542 /* Wake up all sleeping readers. */ 543 rw_swap(rw, owner, newown); 544 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL); 545 } 546 } 547 548 /* 549 * rw_vector_tryenter: 550 * 551 * Try to acquire a rwlock. 552 */ 553 int 554 rw_vector_tryenter(krwlock_t *rw, const krw_t op) 555 { 556 uintptr_t curthread, owner, incr, need_wait, next; 557 lwp_t *l; 558 559 l = curlwp; 560 curthread = (uintptr_t)l; 561 562 RW_ASSERT(rw, curthread != 0); 563 564 if (op == RW_READER) { 565 incr = RW_READ_INCR; 566 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; 567 } else { 568 RW_ASSERT(rw, op == RW_WRITER); 569 incr = curthread | RW_WRITE_LOCKED; 570 need_wait = RW_WRITE_LOCKED | RW_THREAD; 571 } 572 573 for (owner = rw->rw_owner;; owner = next) { 574 if (__predict_false((owner & need_wait) != 0)) 575 return 0; 576 next = rw_cas(rw, owner, owner + incr); 577 if (__predict_true(next == owner)) { 578 /* Got it! */ 579 break; 580 } 581 } 582 583 RW_WANTLOCK(rw, op); 584 RW_LOCKED(rw, op); 585 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || 586 (op == RW_READER && RW_COUNT(rw) != 0)); 587 588 RW_MEMBAR_ENTER(); 589 return 1; 590 } 591 592 /* 593 * rw_downgrade: 594 * 595 * Downgrade a write lock to a read lock. 596 */ 597 void 598 rw_downgrade(krwlock_t *rw) 599 { 600 uintptr_t owner, curthread, newown, next; 601 turnstile_t *ts; 602 int rcnt, wcnt; 603 lwp_t *l; 604 605 l = curlwp; 606 curthread = (uintptr_t)l; 607 RW_ASSERT(rw, curthread != 0); 608 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0); 609 RW_ASSERT(rw, RW_OWNER(rw) == curthread); 610 RW_UNLOCKED(rw, RW_WRITER); 611 #if !defined(DIAGNOSTIC) 612 __USE(curthread); 613 #endif 614 615 RW_MEMBAR_PRODUCER(); 616 617 for (owner = rw->rw_owner;; owner = next) { 618 /* 619 * If there are no waiters we can do this the easy way. Try 620 * swapping us down to one read hold. If it fails, the lock 621 * condition has changed and we most likely now have 622 * waiters. 623 */ 624 if ((owner & RW_HAS_WAITERS) == 0) { 625 newown = (owner & RW_NODEBUG); 626 next = rw_cas(rw, owner, newown + RW_READ_INCR); 627 if (__predict_true(next == owner)) { 628 RW_LOCKED(rw, RW_READER); 629 RW_ASSERT(rw, 630 (rw->rw_owner & RW_WRITE_LOCKED) == 0); 631 RW_ASSERT(rw, RW_COUNT(rw) != 0); 632 return; 633 } 634 continue; 635 } 636 637 /* 638 * Grab the turnstile chain lock. This gets the interlock 639 * on the sleep queue. Once we have that, we can adjust the 640 * waiter bits. 641 */ 642 ts = turnstile_lookup(rw); 643 RW_ASSERT(rw, ts != NULL); 644 645 rcnt = TS_WAITERS(ts, TS_READER_Q); 646 wcnt = TS_WAITERS(ts, TS_WRITER_Q); 647 648 if (rcnt == 0) { 649 /* 650 * If there are no readers, just preserve the 651 * waiters bits, swap us down to one read hold and 652 * return. 653 */ 654 RW_ASSERT(rw, wcnt != 0); 655 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0); 656 RW_ASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0); 657 658 newown = owner & RW_NODEBUG; 659 newown |= RW_READ_INCR | RW_HAS_WAITERS | 660 RW_WRITE_WANTED; 661 next = rw_cas(rw, owner, newown); 662 turnstile_exit(rw); 663 if (__predict_true(next == owner)) 664 break; 665 } else { 666 /* 667 * Give the lock to all blocked readers. We may 668 * retain one read hold if downgrading. If there is 669 * a writer waiting, new readers will be blocked 670 * out. 671 */ 672 newown = owner & RW_NODEBUG; 673 newown += (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR; 674 if (wcnt != 0) 675 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED; 676 677 next = rw_cas(rw, owner, newown); 678 if (__predict_true(next == owner)) { 679 /* Wake up all sleeping readers. */ 680 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL); 681 break; 682 } 683 turnstile_exit(rw); 684 } 685 } 686 687 RW_WANTLOCK(rw, RW_READER); 688 RW_LOCKED(rw, RW_READER); 689 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0); 690 RW_ASSERT(rw, RW_COUNT(rw) != 0); 691 } 692 693 /* 694 * rw_tryupgrade: 695 * 696 * Try to upgrade a read lock to a write lock. We must be the only 697 * reader. 698 */ 699 int 700 rw_tryupgrade(krwlock_t *rw) 701 { 702 uintptr_t owner, curthread, newown, next; 703 struct lwp *l; 704 705 l = curlwp; 706 curthread = (uintptr_t)l; 707 RW_ASSERT(rw, curthread != 0); 708 RW_ASSERT(rw, rw_read_held(rw)); 709 710 for (owner = RW_READ_INCR;; owner = next) { 711 newown = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD); 712 next = rw_cas(rw, owner, newown); 713 if (__predict_true(next == owner)) { 714 RW_MEMBAR_PRODUCER(); 715 break; 716 } 717 RW_ASSERT(rw, (next & RW_WRITE_LOCKED) == 0); 718 if (__predict_false((next & RW_THREAD) != RW_READ_INCR)) { 719 RW_ASSERT(rw, (next & RW_THREAD) != 0); 720 return 0; 721 } 722 } 723 724 RW_UNLOCKED(rw, RW_READER); 725 RW_WANTLOCK(rw, RW_WRITER); 726 RW_LOCKED(rw, RW_WRITER); 727 RW_ASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED); 728 RW_ASSERT(rw, RW_OWNER(rw) == curthread); 729 730 return 1; 731 } 732 733 /* 734 * rw_read_held: 735 * 736 * Returns true if the rwlock is held for reading. Must only be 737 * used for diagnostic assertions, and never be used to make 738 * decisions about how to use a rwlock. 739 */ 740 int 741 rw_read_held(krwlock_t *rw) 742 { 743 uintptr_t owner; 744 745 if (rw == NULL) 746 return 0; 747 owner = rw->rw_owner; 748 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0; 749 } 750 751 /* 752 * rw_write_held: 753 * 754 * Returns true if the rwlock is held for writing. Must only be 755 * used for diagnostic assertions, and never be used to make 756 * decisions about how to use a rwlock. 757 */ 758 int 759 rw_write_held(krwlock_t *rw) 760 { 761 762 if (rw == NULL) 763 return 0; 764 return (rw->rw_owner & (RW_WRITE_LOCKED | RW_THREAD)) == 765 (RW_WRITE_LOCKED | (uintptr_t)curlwp); 766 } 767 768 /* 769 * rw_lock_held: 770 * 771 * Returns true if the rwlock is held for reading or writing. Must 772 * only be used for diagnostic assertions, and never be used to make 773 * decisions about how to use a rwlock. 774 */ 775 int 776 rw_lock_held(krwlock_t *rw) 777 { 778 779 if (rw == NULL) 780 return 0; 781 return (rw->rw_owner & RW_THREAD) != 0; 782 } 783 784 /* 785 * rw_lock_op: 786 * 787 * For a rwlock that is known to be held by the caller, return 788 * RW_READER or RW_WRITER to describe the hold type. 789 */ 790 krw_t 791 rw_lock_op(krwlock_t *rw) 792 { 793 794 RW_ASSERT(rw, rw_lock_held(rw)); 795 796 return (rw->rw_owner & RW_WRITE_LOCKED) != 0 ? RW_WRITER : RW_READER; 797 } 798 799 /* 800 * rw_owner: 801 * 802 * Return the current owner of an RW lock, but only if it is write 803 * held. Used for priority inheritance. 804 */ 805 static lwp_t * 806 rw_owner(wchan_t obj) 807 { 808 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */ 809 uintptr_t owner = rw->rw_owner; 810 811 if ((owner & RW_WRITE_LOCKED) == 0) 812 return NULL; 813 814 return (void *)(owner & RW_THREAD); 815 } 816 817 /* 818 * rw_owner_running: 819 * 820 * Return true if a RW lock is unheld, or write held and the owner is 821 * running on a CPU. For the pagedaemon. 822 */ 823 bool 824 rw_owner_running(const krwlock_t *rw) 825 { 826 #ifdef MULTIPROCESSOR 827 uintptr_t owner; 828 bool rv; 829 830 kpreempt_disable(); 831 owner = rw->rw_owner; 832 rv = (owner & RW_THREAD) == 0 || rw_oncpu(owner); 833 kpreempt_enable(); 834 return rv; 835 #else 836 return rw_owner(rw) == curlwp; 837 #endif 838 } 839