1 /* $NetBSD: kern_rwlock.c,v 1.29 2009/04/19 08:36:04 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2002, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe and Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Kernel reader/writer lock implementation, modeled after those 34 * found in Solaris, a description of which can be found in: 35 * 36 * Solaris Internals: Core Kernel Architecture, Jim Mauro and 37 * Richard McDougall. 38 */ 39 40 #include <sys/cdefs.h> 41 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.29 2009/04/19 08:36:04 ad Exp $"); 42 43 #define __RWLOCK_PRIVATE 44 45 #include <sys/param.h> 46 #include <sys/proc.h> 47 #include <sys/rwlock.h> 48 #include <sys/sched.h> 49 #include <sys/sleepq.h> 50 #include <sys/systm.h> 51 #include <sys/lockdebug.h> 52 #include <sys/cpu.h> 53 #include <sys/atomic.h> 54 #include <sys/lock.h> 55 56 #include <dev/lockstat.h> 57 58 /* 59 * LOCKDEBUG 60 */ 61 62 #if defined(LOCKDEBUG) 63 64 #define RW_WANTLOCK(rw, op, t) \ 65 LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \ 66 (uintptr_t)__builtin_return_address(0), op == RW_READER, t); 67 #define RW_LOCKED(rw, op) \ 68 LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \ 69 (uintptr_t)__builtin_return_address(0), op == RW_READER); 70 #define RW_UNLOCKED(rw, op) \ 71 LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \ 72 (uintptr_t)__builtin_return_address(0), op == RW_READER); 73 #define RW_DASSERT(rw, cond) \ 74 do { \ 75 if (!(cond)) \ 76 rw_abort(rw, __func__, "assertion failed: " #cond); \ 77 } while (/* CONSTCOND */ 0); 78 79 #else /* LOCKDEBUG */ 80 81 #define RW_WANTLOCK(rw, op, t) /* nothing */ 82 #define RW_LOCKED(rw, op) /* nothing */ 83 #define RW_UNLOCKED(rw, op) /* nothing */ 84 #define RW_DASSERT(rw, cond) /* nothing */ 85 86 #endif /* LOCKDEBUG */ 87 88 /* 89 * DIAGNOSTIC 90 */ 91 92 #if defined(DIAGNOSTIC) 93 94 #define RW_ASSERT(rw, cond) \ 95 do { \ 96 if (!(cond)) \ 97 rw_abort(rw, __func__, "assertion failed: " #cond); \ 98 } while (/* CONSTCOND */ 0) 99 100 #else 101 102 #define RW_ASSERT(rw, cond) /* nothing */ 103 104 #endif /* DIAGNOSTIC */ 105 106 #define RW_SETDEBUG(rw, on) ((rw)->rw_owner |= (on) ? RW_DEBUG : 0) 107 #define RW_DEBUG_P(rw) (((rw)->rw_owner & RW_DEBUG) != 0) 108 #if defined(LOCKDEBUG) 109 #define RW_INHERITDEBUG(new, old) (new) |= (old) & RW_DEBUG 110 #else /* defined(LOCKDEBUG) */ 111 #define RW_INHERITDEBUG(new, old) /* nothing */ 112 #endif /* defined(LOCKDEBUG) */ 113 114 static void rw_abort(krwlock_t *, const char *, const char *); 115 static void rw_dump(volatile void *); 116 static lwp_t *rw_owner(wchan_t); 117 118 static inline uintptr_t 119 rw_cas(krwlock_t *rw, uintptr_t o, uintptr_t n) 120 { 121 122 RW_INHERITDEBUG(n, o); 123 return (uintptr_t)atomic_cas_ptr((volatile void *)&rw->rw_owner, 124 (void *)o, (void *)n); 125 } 126 127 static inline void 128 rw_swap(krwlock_t *rw, uintptr_t o, uintptr_t n) 129 { 130 131 RW_INHERITDEBUG(n, o); 132 n = (uintptr_t)atomic_swap_ptr((volatile void *)&rw->rw_owner, 133 (void *)n); 134 RW_DASSERT(rw, n == o); 135 } 136 137 /* 138 * For platforms that do not provide stubs, or for the LOCKDEBUG case. 139 */ 140 #ifdef LOCKDEBUG 141 #undef __HAVE_RW_STUBS 142 #endif 143 144 #ifndef __HAVE_RW_STUBS 145 __strong_alias(rw_enter,rw_vector_enter); 146 __strong_alias(rw_exit,rw_vector_exit); 147 __strong_alias(rw_tryenter,rw_vector_tryenter); 148 #endif 149 150 lockops_t rwlock_lockops = { 151 "Reader / writer lock", 152 LOCKOPS_SLEEP, 153 rw_dump 154 }; 155 156 syncobj_t rw_syncobj = { 157 SOBJ_SLEEPQ_SORTED, 158 turnstile_unsleep, 159 turnstile_changepri, 160 sleepq_lendpri, 161 rw_owner, 162 }; 163 164 /* Mutex cache */ 165 #define RW_OBJ_MAGIC 0x85d3c85d 166 struct krwobj { 167 krwlock_t ro_lock; 168 u_int ro_magic; 169 u_int ro_refcnt; 170 }; 171 172 static int rw_obj_ctor(void *, void *, int); 173 174 static pool_cache_t rw_obj_cache; 175 176 /* 177 * rw_dump: 178 * 179 * Dump the contents of a rwlock structure. 180 */ 181 static void 182 rw_dump(volatile void *cookie) 183 { 184 volatile krwlock_t *rw = cookie; 185 186 printf_nolog("owner/count : %#018lx flags : %#018x\n", 187 (long)RW_OWNER(rw), (int)RW_FLAGS(rw)); 188 } 189 190 /* 191 * rw_abort: 192 * 193 * Dump information about an error and panic the system. This 194 * generates a lot of machine code in the DIAGNOSTIC case, so 195 * we ask the compiler to not inline it. 196 */ 197 static void __noinline 198 rw_abort(krwlock_t *rw, const char *func, const char *msg) 199 { 200 201 if (panicstr != NULL) 202 return; 203 204 LOCKDEBUG_ABORT(rw, &rwlock_lockops, func, msg); 205 } 206 207 /* 208 * rw_init: 209 * 210 * Initialize a rwlock for use. 211 */ 212 void 213 rw_init(krwlock_t *rw) 214 { 215 bool dodebug; 216 217 memset(rw, 0, sizeof(*rw)); 218 219 dodebug = LOCKDEBUG_ALLOC(rw, &rwlock_lockops, 220 (uintptr_t)__builtin_return_address(0)); 221 RW_SETDEBUG(rw, dodebug); 222 } 223 224 /* 225 * rw_destroy: 226 * 227 * Tear down a rwlock. 228 */ 229 void 230 rw_destroy(krwlock_t *rw) 231 { 232 233 RW_ASSERT(rw, (rw->rw_owner & ~RW_DEBUG) == 0); 234 LOCKDEBUG_FREE(RW_DEBUG_P(rw), rw); 235 } 236 237 /* 238 * rw_onproc: 239 * 240 * Return true if an rwlock owner is running on a CPU in the system. 241 * If the target is waiting on the kernel big lock, then we must 242 * release it. This is necessary to avoid deadlock. 243 * 244 * Note that we can't use the rwlock owner field as an LWP pointer. We 245 * don't have full control over the timing of our execution, and so the 246 * pointer could be completely invalid by the time we dereference it. 247 */ 248 static int 249 rw_onproc(uintptr_t owner, struct cpu_info **cip) 250 { 251 #ifdef MULTIPROCESSOR 252 CPU_INFO_ITERATOR cii; 253 struct cpu_info *ci; 254 lwp_t *l; 255 256 if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) 257 return 0; 258 l = (lwp_t *)(owner & RW_THREAD); 259 260 /* See if the target is running on a CPU somewhere. */ 261 if ((ci = *cip) != NULL && ci->ci_curlwp == l) 262 goto run; 263 for (CPU_INFO_FOREACH(cii, ci)) 264 if (ci->ci_curlwp == l) 265 goto run; 266 267 /* No: it may be safe to block now. */ 268 *cip = NULL; 269 return 0; 270 271 run: 272 /* Target is running; do we need to block? */ 273 *cip = ci; 274 return ci->ci_biglock_wanted != l; 275 #else 276 return 0; 277 #endif /* MULTIPROCESSOR */ 278 } 279 280 /* 281 * rw_vector_enter: 282 * 283 * Acquire a rwlock. 284 */ 285 void 286 rw_vector_enter(krwlock_t *rw, const krw_t op) 287 { 288 uintptr_t owner, incr, need_wait, set_wait, curthread, next; 289 struct cpu_info *ci; 290 turnstile_t *ts; 291 int queue; 292 lwp_t *l; 293 LOCKSTAT_TIMER(slptime); 294 LOCKSTAT_TIMER(slpcnt); 295 LOCKSTAT_TIMER(spintime); 296 LOCKSTAT_COUNTER(spincnt); 297 LOCKSTAT_FLAG(lsflag); 298 299 l = curlwp; 300 curthread = (uintptr_t)l; 301 302 RW_ASSERT(rw, !cpu_intr_p()); 303 RW_ASSERT(rw, curthread != 0); 304 RW_WANTLOCK(rw, op, false); 305 306 if (panicstr == NULL) { 307 LOCKDEBUG_BARRIER(&kernel_lock, 1); 308 } 309 310 /* 311 * We play a slight trick here. If we're a reader, we want 312 * increment the read count. If we're a writer, we want to 313 * set the owner field and whe WRITE_LOCKED bit. 314 * 315 * In the latter case, we expect those bits to be zero, 316 * therefore we can use an add operation to set them, which 317 * means an add operation for both cases. 318 */ 319 if (__predict_true(op == RW_READER)) { 320 incr = RW_READ_INCR; 321 set_wait = RW_HAS_WAITERS; 322 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; 323 queue = TS_READER_Q; 324 } else { 325 RW_DASSERT(rw, op == RW_WRITER); 326 incr = curthread | RW_WRITE_LOCKED; 327 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED; 328 need_wait = RW_WRITE_LOCKED | RW_THREAD; 329 queue = TS_WRITER_Q; 330 } 331 332 LOCKSTAT_ENTER(lsflag); 333 334 for (ci = NULL, owner = rw->rw_owner;;) { 335 /* 336 * Read the lock owner field. If the need-to-wait 337 * indicator is clear, then try to acquire the lock. 338 */ 339 if ((owner & need_wait) == 0) { 340 next = rw_cas(rw, owner, (owner + incr) & 341 ~RW_WRITE_WANTED); 342 if (__predict_true(next == owner)) { 343 /* Got it! */ 344 #ifndef __HAVE_ATOMIC_AS_MEMBAR 345 membar_enter(); 346 #endif 347 break; 348 } 349 350 /* 351 * Didn't get it -- spin around again (we'll 352 * probably sleep on the next iteration). 353 */ 354 owner = next; 355 continue; 356 } 357 358 if (__predict_false(panicstr != NULL)) 359 return; 360 if (__predict_false(RW_OWNER(rw) == curthread)) 361 rw_abort(rw, __func__, "locking against myself"); 362 363 /* 364 * If the lock owner is running on another CPU, and 365 * there are no existing waiters, then spin. 366 */ 367 if (rw_onproc(owner, &ci)) { 368 LOCKSTAT_START_TIMER(lsflag, spintime); 369 u_int count = SPINLOCK_BACKOFF_MIN; 370 do { 371 SPINLOCK_BACKOFF(count); 372 owner = rw->rw_owner; 373 } while (rw_onproc(owner, &ci)); 374 LOCKSTAT_STOP_TIMER(lsflag, spintime); 375 LOCKSTAT_COUNT(spincnt, 1); 376 if ((owner & need_wait) == 0) 377 continue; 378 } 379 380 /* 381 * Grab the turnstile chain lock. Once we have that, we 382 * can adjust the waiter bits and sleep queue. 383 */ 384 ts = turnstile_lookup(rw); 385 386 /* 387 * Mark the rwlock as having waiters. If the set fails, 388 * then we may not need to sleep and should spin again. 389 * Reload rw_owner because turnstile_lookup() may have 390 * spun on the turnstile chain lock. 391 */ 392 owner = rw->rw_owner; 393 if ((owner & need_wait) == 0 || rw_onproc(owner, &ci)) { 394 turnstile_exit(rw); 395 continue; 396 } 397 next = rw_cas(rw, owner, owner | set_wait); 398 if (__predict_false(next != owner)) { 399 turnstile_exit(rw); 400 owner = next; 401 continue; 402 } 403 404 LOCKSTAT_START_TIMER(lsflag, slptime); 405 turnstile_block(ts, queue, rw, &rw_syncobj); 406 LOCKSTAT_STOP_TIMER(lsflag, slptime); 407 LOCKSTAT_COUNT(slpcnt, 1); 408 409 /* 410 * No need for a memory barrier because of context switch. 411 * If not handed the lock, then spin again. 412 */ 413 if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread) 414 break; 415 } 416 417 LOCKSTAT_EVENT(lsflag, rw, LB_RWLOCK | 418 (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime); 419 LOCKSTAT_EVENT(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime); 420 LOCKSTAT_EXIT(lsflag); 421 422 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || 423 (op == RW_READER && RW_COUNT(rw) != 0)); 424 RW_LOCKED(rw, op); 425 } 426 427 /* 428 * rw_vector_exit: 429 * 430 * Release a rwlock. 431 */ 432 void 433 rw_vector_exit(krwlock_t *rw) 434 { 435 uintptr_t curthread, owner, decr, new, next; 436 turnstile_t *ts; 437 int rcnt, wcnt; 438 lwp_t *l; 439 440 curthread = (uintptr_t)curlwp; 441 RW_ASSERT(rw, curthread != 0); 442 443 if (__predict_false(panicstr != NULL)) 444 return; 445 446 /* 447 * Again, we use a trick. Since we used an add operation to 448 * set the required lock bits, we can use a subtract to clear 449 * them, which makes the read-release and write-release path 450 * the same. 451 */ 452 owner = rw->rw_owner; 453 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) { 454 RW_UNLOCKED(rw, RW_WRITER); 455 RW_ASSERT(rw, RW_OWNER(rw) == curthread); 456 decr = curthread | RW_WRITE_LOCKED; 457 } else { 458 RW_UNLOCKED(rw, RW_READER); 459 RW_ASSERT(rw, RW_COUNT(rw) != 0); 460 decr = RW_READ_INCR; 461 } 462 463 /* 464 * Compute what we expect the new value of the lock to be. Only 465 * proceed to do direct handoff if there are waiters, and if the 466 * lock would become unowned. 467 */ 468 #ifndef __HAVE_ATOMIC_AS_MEMBAR 469 membar_exit(); 470 #endif 471 for (;;) { 472 new = (owner - decr); 473 if ((new & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS) 474 break; 475 next = rw_cas(rw, owner, new); 476 if (__predict_true(next == owner)) 477 return; 478 owner = next; 479 } 480 481 /* 482 * Grab the turnstile chain lock. This gets the interlock 483 * on the sleep queue. Once we have that, we can adjust the 484 * waiter bits. 485 */ 486 ts = turnstile_lookup(rw); 487 owner = rw->rw_owner; 488 RW_DASSERT(rw, ts != NULL); 489 RW_DASSERT(rw, (owner & RW_HAS_WAITERS) != 0); 490 491 wcnt = TS_WAITERS(ts, TS_WRITER_Q); 492 rcnt = TS_WAITERS(ts, TS_READER_Q); 493 494 /* 495 * Give the lock away. 496 * 497 * If we are releasing a write lock, then prefer to wake all 498 * outstanding readers. Otherwise, wake one writer if there 499 * are outstanding readers, or all writers if there are no 500 * pending readers. If waking one specific writer, the writer 501 * is handed the lock here. If waking multiple writers, we 502 * set WRITE_WANTED to block out new readers, and let them 503 * do the work of acquring the lock in rw_vector_enter(). 504 */ 505 if (rcnt == 0 || (decr == RW_READ_INCR && wcnt != 0)) { 506 RW_DASSERT(rw, wcnt != 0); 507 RW_DASSERT(rw, (owner & RW_WRITE_WANTED) != 0); 508 509 if (rcnt != 0) { 510 /* Give the lock to the longest waiting writer. */ 511 l = TS_FIRST(ts, TS_WRITER_Q); 512 new = (uintptr_t)l | RW_WRITE_LOCKED | RW_HAS_WAITERS; 513 if (wcnt > 1) 514 new |= RW_WRITE_WANTED; 515 rw_swap(rw, owner, new); 516 turnstile_wakeup(ts, TS_WRITER_Q, 1, l); 517 } else { 518 /* Wake all writers and let them fight it out. */ 519 rw_swap(rw, owner, RW_WRITE_WANTED); 520 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL); 521 } 522 } else { 523 RW_DASSERT(rw, rcnt != 0); 524 525 /* 526 * Give the lock to all blocked readers. If there 527 * is a writer waiting, new readers that arrive 528 * after the release will be blocked out. 529 */ 530 new = rcnt << RW_READ_COUNT_SHIFT; 531 if (wcnt != 0) 532 new |= RW_HAS_WAITERS | RW_WRITE_WANTED; 533 534 /* Wake up all sleeping readers. */ 535 rw_swap(rw, owner, new); 536 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL); 537 } 538 } 539 540 /* 541 * rw_vector_tryenter: 542 * 543 * Try to acquire a rwlock. 544 */ 545 int 546 rw_vector_tryenter(krwlock_t *rw, const krw_t op) 547 { 548 uintptr_t curthread, owner, incr, need_wait, next; 549 550 curthread = (uintptr_t)curlwp; 551 552 RW_ASSERT(rw, curthread != 0); 553 554 if (op == RW_READER) { 555 incr = RW_READ_INCR; 556 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; 557 } else { 558 RW_DASSERT(rw, op == RW_WRITER); 559 incr = curthread | RW_WRITE_LOCKED; 560 need_wait = RW_WRITE_LOCKED | RW_THREAD; 561 } 562 563 for (owner = rw->rw_owner;; owner = next) { 564 owner = rw->rw_owner; 565 if (__predict_false((owner & need_wait) != 0)) 566 return 0; 567 next = rw_cas(rw, owner, owner + incr); 568 if (__predict_true(next == owner)) { 569 /* Got it! */ 570 break; 571 } 572 } 573 574 #ifndef __HAVE_ATOMIC_AS_MEMBAR 575 membar_enter(); 576 #endif 577 RW_WANTLOCK(rw, op, true); 578 RW_LOCKED(rw, op); 579 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || 580 (op == RW_READER && RW_COUNT(rw) != 0)); 581 582 return 1; 583 } 584 585 /* 586 * rw_downgrade: 587 * 588 * Downgrade a write lock to a read lock. 589 */ 590 void 591 rw_downgrade(krwlock_t *rw) 592 { 593 uintptr_t owner, curthread, new, next; 594 turnstile_t *ts; 595 int rcnt, wcnt; 596 597 curthread = (uintptr_t)curlwp; 598 RW_ASSERT(rw, curthread != 0); 599 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0); 600 RW_ASSERT(rw, RW_OWNER(rw) == curthread); 601 RW_UNLOCKED(rw, RW_WRITER); 602 603 #ifndef __HAVE_ATOMIC_AS_MEMBAR 604 membar_producer(); 605 #endif 606 607 owner = rw->rw_owner; 608 if ((owner & RW_HAS_WAITERS) == 0) { 609 /* 610 * There are no waiters, so we can do this the easy way. 611 * Try swapping us down to one read hold. If it fails, the 612 * lock condition has changed and we most likely now have 613 * waiters. 614 */ 615 next = rw_cas(rw, owner, RW_READ_INCR); 616 if (__predict_true(next == owner)) { 617 RW_LOCKED(rw, RW_READER); 618 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0); 619 RW_DASSERT(rw, RW_COUNT(rw) != 0); 620 return; 621 } 622 owner = next; 623 } 624 625 /* 626 * Grab the turnstile chain lock. This gets the interlock 627 * on the sleep queue. Once we have that, we can adjust the 628 * waiter bits. 629 */ 630 for (;; owner = next) { 631 ts = turnstile_lookup(rw); 632 RW_DASSERT(rw, ts != NULL); 633 634 rcnt = TS_WAITERS(ts, TS_READER_Q); 635 wcnt = TS_WAITERS(ts, TS_WRITER_Q); 636 637 /* 638 * If there are no readers, just preserve the waiters 639 * bits, swap us down to one read hold and return. 640 */ 641 if (rcnt == 0) { 642 RW_DASSERT(rw, wcnt != 0); 643 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0); 644 RW_DASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0); 645 646 new = RW_READ_INCR | RW_HAS_WAITERS | RW_WRITE_WANTED; 647 next = rw_cas(rw, owner, new); 648 turnstile_exit(rw); 649 if (__predict_true(next == owner)) 650 break; 651 } else { 652 /* 653 * Give the lock to all blocked readers. We may 654 * retain one read hold if downgrading. If there 655 * is a writer waiting, new readers will be blocked 656 * out. 657 */ 658 new = (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR; 659 if (wcnt != 0) 660 new |= RW_HAS_WAITERS | RW_WRITE_WANTED; 661 662 next = rw_cas(rw, owner, new); 663 if (__predict_true(next == owner)) { 664 /* Wake up all sleeping readers. */ 665 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL); 666 break; 667 } 668 turnstile_exit(rw); 669 } 670 } 671 672 RW_LOCKED(rw, RW_READER); 673 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0); 674 RW_DASSERT(rw, RW_COUNT(rw) != 0); 675 } 676 677 /* 678 * rw_tryupgrade: 679 * 680 * Try to upgrade a read lock to a write lock. We must be the 681 * only reader. 682 */ 683 int 684 rw_tryupgrade(krwlock_t *rw) 685 { 686 uintptr_t owner, curthread, new, next; 687 688 curthread = (uintptr_t)curlwp; 689 RW_ASSERT(rw, curthread != 0); 690 RW_WANTLOCK(rw, RW_WRITER, true); 691 692 for (owner = rw->rw_owner;; owner = next) { 693 RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0); 694 if (__predict_false((owner & RW_THREAD) != RW_READ_INCR)) { 695 RW_ASSERT(rw, (owner & RW_THREAD) != 0); 696 return 0; 697 } 698 new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD); 699 next = rw_cas(rw, owner, new); 700 if (__predict_true(next == owner)) 701 break; 702 } 703 704 RW_UNLOCKED(rw, RW_READER); 705 RW_LOCKED(rw, RW_WRITER); 706 RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED); 707 RW_DASSERT(rw, RW_OWNER(rw) == curthread); 708 709 #ifndef __HAVE_ATOMIC_AS_MEMBAR 710 membar_producer(); 711 #endif 712 713 return 1; 714 } 715 716 /* 717 * rw_read_held: 718 * 719 * Returns true if the rwlock is held for reading. Must only be 720 * used for diagnostic assertions, and never be used to make 721 * decisions about how to use a rwlock. 722 */ 723 int 724 rw_read_held(krwlock_t *rw) 725 { 726 uintptr_t owner; 727 728 if (panicstr != NULL) 729 return 1; 730 if (rw == NULL) 731 return 0; 732 owner = rw->rw_owner; 733 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0; 734 } 735 736 /* 737 * rw_write_held: 738 * 739 * Returns true if the rwlock is held for writing. Must only be 740 * used for diagnostic assertions, and never be used to make 741 * decisions about how to use a rwlock. 742 */ 743 int 744 rw_write_held(krwlock_t *rw) 745 { 746 747 if (panicstr != NULL) 748 return 1; 749 if (rw == NULL) 750 return 0; 751 return (rw->rw_owner & (RW_WRITE_LOCKED | RW_THREAD)) == 752 (RW_WRITE_LOCKED | (uintptr_t)curlwp); 753 } 754 755 /* 756 * rw_lock_held: 757 * 758 * Returns true if the rwlock is held for reading or writing. Must 759 * only be used for diagnostic assertions, and never be used to make 760 * decisions about how to use a rwlock. 761 */ 762 int 763 rw_lock_held(krwlock_t *rw) 764 { 765 766 if (panicstr != NULL) 767 return 1; 768 if (rw == NULL) 769 return 0; 770 return (rw->rw_owner & RW_THREAD) != 0; 771 } 772 773 /* 774 * rw_owner: 775 * 776 * Return the current owner of an RW lock, but only if it is write 777 * held. Used for priority inheritance. 778 */ 779 static lwp_t * 780 rw_owner(wchan_t obj) 781 { 782 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */ 783 uintptr_t owner = rw->rw_owner; 784 785 if ((owner & RW_WRITE_LOCKED) == 0) 786 return NULL; 787 788 return (void *)(owner & RW_THREAD); 789 } 790 791 /* 792 * rw_obj_init: 793 * 794 * Initialize the rw object store. 795 */ 796 void 797 rw_obj_init(void) 798 { 799 800 rw_obj_cache = pool_cache_init(sizeof(struct krwobj), 801 coherency_unit, 0, 0, "rwlock", NULL, IPL_NONE, rw_obj_ctor, 802 NULL, NULL); 803 } 804 805 /* 806 * rw_obj_ctor: 807 * 808 * Initialize a new lock for the cache. 809 */ 810 static int 811 rw_obj_ctor(void *arg, void *obj, int flags) 812 { 813 struct krwobj * ro = obj; 814 815 ro->ro_magic = RW_OBJ_MAGIC; 816 817 return 0; 818 } 819 820 /* 821 * rw_obj_alloc: 822 * 823 * Allocate a single lock object. 824 */ 825 krwlock_t * 826 rw_obj_alloc(void) 827 { 828 struct krwobj *ro; 829 830 ro = pool_cache_get(rw_obj_cache, PR_WAITOK); 831 rw_init(&ro->ro_lock); 832 ro->ro_refcnt = 1; 833 834 return (krwlock_t *)ro; 835 } 836 837 /* 838 * rw_obj_hold: 839 * 840 * Add a single reference to a lock object. A reference to the object 841 * must already be held, and must be held across this call. 842 */ 843 void 844 rw_obj_hold(krwlock_t *lock) 845 { 846 struct krwobj *ro = (struct krwobj *)lock; 847 848 KASSERT(ro->ro_magic == RW_OBJ_MAGIC); 849 KASSERT(ro->ro_refcnt > 0); 850 851 atomic_inc_uint(&ro->ro_refcnt); 852 } 853 854 /* 855 * rw_obj_free: 856 * 857 * Drop a reference from a lock object. If the last reference is being 858 * dropped, free the object and return true. Otherwise, return false. 859 */ 860 bool 861 rw_obj_free(krwlock_t *lock) 862 { 863 struct krwobj *ro = (struct krwobj *)lock; 864 865 KASSERT(ro->ro_magic == RW_OBJ_MAGIC); 866 KASSERT(ro->ro_refcnt > 0); 867 868 if (atomic_dec_uint_nv(&ro->ro_refcnt) > 0) { 869 return false; 870 } 871 rw_destroy(&ro->ro_lock); 872 pool_cache_put(rw_obj_cache, ro); 873 return true; 874 } 875