1 /* $OpenBSD: kern_rwlock.c,v 1.48 2022/05/10 16:56:16 bluhm Exp $ */ 2 3 /* 4 * Copyright (c) 2002, 2003 Artur Grabowski <art@openbsd.org> 5 * Copyright (c) 2011 Thordur Bjornsson <thib@secnorth.net> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/pool.h> 23 #include <sys/proc.h> 24 #include <sys/rwlock.h> 25 #include <sys/limits.h> 26 #include <sys/atomic.h> 27 #include <sys/witness.h> 28 29 void rw_do_exit(struct rwlock *, unsigned long); 30 31 /* XXX - temporary measure until proc0 is properly aligned */ 32 #define RW_PROC(p) (((long)p) & ~RWLOCK_MASK) 33 34 /* 35 * Other OSes implement more sophisticated mechanism to determine how long the 36 * process attempting to acquire the lock should be spinning. We start with 37 * the most simple approach: we do RW_SPINS attempts at most before eventually 38 * giving up and putting the process to sleep queue. 39 */ 40 #define RW_SPINS 1000 41 42 #ifdef MULTIPROCESSOR 43 #define rw_cas(p, o, n) (atomic_cas_ulong(p, o, n) != o) 44 #else 45 static inline int 46 rw_cas(volatile unsigned long *p, unsigned long o, unsigned long n) 47 { 48 if (*p != o) 49 return (1); 50 *p = n; 51 52 return (0); 53 } 54 #endif 55 56 /* 57 * Magic wand for lock operations. Every operation checks if certain 58 * flags are set and if they aren't, it increments the lock with some 59 * value (that might need some computing in a few cases). If the operation 60 * fails, we need to set certain flags while waiting for the lock. 61 * 62 * RW_WRITE The lock must be completely empty. We increment it with 63 * RWLOCK_WRLOCK and the proc pointer of the holder. 64 * Sets RWLOCK_WAIT|RWLOCK_WRWANT while waiting. 65 * RW_READ RWLOCK_WRLOCK|RWLOCK_WRWANT may not be set. We increment 66 * with RWLOCK_READ_INCR. RWLOCK_WAIT while waiting. 67 */ 68 static const struct rwlock_op { 69 unsigned long inc; 70 unsigned long check; 71 unsigned long wait_set; 72 long proc_mult; 73 int wait_prio; 74 } rw_ops[] = { 75 { /* RW_WRITE */ 76 RWLOCK_WRLOCK, 77 ULONG_MAX, 78 RWLOCK_WAIT | RWLOCK_WRWANT, 79 1, 80 PLOCK - 4 81 }, 82 { /* RW_READ */ 83 RWLOCK_READ_INCR, 84 RWLOCK_WRLOCK | RWLOCK_WRWANT, 85 RWLOCK_WAIT, 86 0, 87 PLOCK 88 }, 89 { /* Sparse Entry. */ 90 0, 91 }, 92 { /* RW_DOWNGRADE */ 93 RWLOCK_READ_INCR - RWLOCK_WRLOCK, 94 0, 95 0, 96 -1, 97 PLOCK 98 }, 99 }; 100 101 void 102 rw_enter_read(struct rwlock *rwl) 103 { 104 unsigned long owner = rwl->rwl_owner; 105 106 if (__predict_false((owner & (RWLOCK_WRLOCK | RWLOCK_WRWANT)) || 107 rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR))) 108 rw_enter(rwl, RW_READ); 109 else { 110 membar_enter_after_atomic(); 111 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, NULL); 112 WITNESS_LOCK(&rwl->rwl_lock_obj, 0); 113 } 114 } 115 116 void 117 rw_enter_write(struct rwlock *rwl) 118 { 119 struct proc *p = curproc; 120 121 if (__predict_false(rw_cas(&rwl->rwl_owner, 0, 122 RW_PROC(p) | RWLOCK_WRLOCK))) 123 rw_enter(rwl, RW_WRITE); 124 else { 125 membar_enter_after_atomic(); 126 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, 127 LOP_EXCLUSIVE | LOP_NEWORDER, NULL); 128 WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE); 129 } 130 } 131 132 void 133 rw_exit_read(struct rwlock *rwl) 134 { 135 unsigned long owner; 136 137 rw_assert_rdlock(rwl); 138 WITNESS_UNLOCK(&rwl->rwl_lock_obj, 0); 139 140 membar_exit_before_atomic(); 141 owner = rwl->rwl_owner; 142 if (__predict_false((owner & RWLOCK_WAIT) || 143 rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR))) 144 rw_do_exit(rwl, 0); 145 } 146 147 void 148 rw_exit_write(struct rwlock *rwl) 149 { 150 unsigned long owner; 151 152 rw_assert_wrlock(rwl); 153 WITNESS_UNLOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE); 154 155 membar_exit_before_atomic(); 156 owner = rwl->rwl_owner; 157 if (__predict_false((owner & RWLOCK_WAIT) || 158 rw_cas(&rwl->rwl_owner, owner, 0))) 159 rw_do_exit(rwl, RWLOCK_WRLOCK); 160 } 161 162 #ifdef DIAGNOSTIC 163 /* 164 * Put the diagnostic functions here to keep the main code free 165 * from ifdef clutter. 166 */ 167 static void 168 rw_enter_diag(struct rwlock *rwl, int flags) 169 { 170 switch (flags & RW_OPMASK) { 171 case RW_WRITE: 172 case RW_READ: 173 if (RW_PROC(curproc) == RW_PROC(rwl->rwl_owner)) 174 panic("rw_enter: %s locking against myself", 175 rwl->rwl_name); 176 break; 177 case RW_DOWNGRADE: 178 /* 179 * If we're downgrading, we must hold the write lock. 180 */ 181 if ((rwl->rwl_owner & RWLOCK_WRLOCK) == 0) 182 panic("rw_enter: %s downgrade of non-write lock", 183 rwl->rwl_name); 184 if (RW_PROC(curproc) != RW_PROC(rwl->rwl_owner)) 185 panic("rw_enter: %s downgrade, not holder", 186 rwl->rwl_name); 187 break; 188 189 default: 190 panic("rw_enter: unknown op 0x%x", flags); 191 } 192 } 193 194 #else 195 #define rw_enter_diag(r, f) 196 #endif 197 198 static void 199 _rw_init_flags_witness(struct rwlock *rwl, const char *name, int lo_flags, 200 const struct lock_type *type) 201 { 202 rwl->rwl_owner = 0; 203 rwl->rwl_name = name; 204 205 #ifdef WITNESS 206 rwl->rwl_lock_obj.lo_flags = lo_flags; 207 rwl->rwl_lock_obj.lo_name = name; 208 rwl->rwl_lock_obj.lo_type = type; 209 WITNESS_INIT(&rwl->rwl_lock_obj, type); 210 #else 211 (void)type; 212 (void)lo_flags; 213 #endif 214 } 215 216 void 217 _rw_init_flags(struct rwlock *rwl, const char *name, int flags, 218 const struct lock_type *type) 219 { 220 _rw_init_flags_witness(rwl, name, RWLOCK_LO_FLAGS(flags), type); 221 } 222 223 int 224 rw_enter(struct rwlock *rwl, int flags) 225 { 226 const struct rwlock_op *op; 227 struct sleep_state sls; 228 unsigned long inc, o; 229 #ifdef MULTIPROCESSOR 230 /* 231 * If process holds the kernel lock, then we want to give up on CPU 232 * as soon as possible so other processes waiting for the kernel lock 233 * can progress. Hence no spinning if we hold the kernel lock. 234 */ 235 unsigned int spin = (_kernel_lock_held()) ? 0 : RW_SPINS; 236 #endif 237 int error, prio; 238 #ifdef WITNESS 239 int lop_flags; 240 241 lop_flags = LOP_NEWORDER; 242 if (flags & RW_WRITE) 243 lop_flags |= LOP_EXCLUSIVE; 244 if (flags & RW_DUPOK) 245 lop_flags |= LOP_DUPOK; 246 if ((flags & RW_NOSLEEP) == 0 && (flags & RW_DOWNGRADE) == 0) 247 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, lop_flags, NULL); 248 #endif 249 250 op = &rw_ops[(flags & RW_OPMASK) - 1]; 251 252 inc = op->inc + RW_PROC(curproc) * op->proc_mult; 253 retry: 254 while (__predict_false(((o = rwl->rwl_owner) & op->check) != 0)) { 255 unsigned long set = o | op->wait_set; 256 int do_sleep; 257 258 /* Avoid deadlocks after panic or in DDB */ 259 if (panicstr || db_active) 260 return (0); 261 262 #ifdef MULTIPROCESSOR 263 /* 264 * It makes sense to try to spin just in case the lock 265 * is acquired by writer. 266 */ 267 if ((o & RWLOCK_WRLOCK) && (spin != 0)) { 268 spin--; 269 CPU_BUSY_CYCLE(); 270 continue; 271 } 272 #endif 273 274 rw_enter_diag(rwl, flags); 275 276 if (flags & RW_NOSLEEP) 277 return (EBUSY); 278 279 prio = op->wait_prio; 280 if (flags & RW_INTR) 281 prio |= PCATCH; 282 sleep_setup(&sls, rwl, prio, rwl->rwl_name, 0); 283 284 do_sleep = !rw_cas(&rwl->rwl_owner, o, set); 285 286 error = sleep_finish(&sls, do_sleep); 287 if ((flags & RW_INTR) && 288 (error != 0)) 289 return (error); 290 if (flags & RW_SLEEPFAIL) 291 return (EAGAIN); 292 } 293 294 if (__predict_false(rw_cas(&rwl->rwl_owner, o, o + inc))) 295 goto retry; 296 membar_enter_after_atomic(); 297 298 /* 299 * If old lock had RWLOCK_WAIT and RWLOCK_WRLOCK set, it means we 300 * downgraded a write lock and had possible read waiter, wake them 301 * to let them retry the lock. 302 */ 303 if (__predict_false((o & (RWLOCK_WRLOCK|RWLOCK_WAIT)) == 304 (RWLOCK_WRLOCK|RWLOCK_WAIT))) 305 wakeup(rwl); 306 307 if (flags & RW_DOWNGRADE) 308 WITNESS_DOWNGRADE(&rwl->rwl_lock_obj, lop_flags); 309 else 310 WITNESS_LOCK(&rwl->rwl_lock_obj, lop_flags); 311 312 return (0); 313 } 314 315 void 316 rw_exit(struct rwlock *rwl) 317 { 318 unsigned long wrlock; 319 320 /* Avoid deadlocks after panic or in DDB */ 321 if (panicstr || db_active) 322 return; 323 324 wrlock = rwl->rwl_owner & RWLOCK_WRLOCK; 325 if (wrlock) 326 rw_assert_wrlock(rwl); 327 else 328 rw_assert_rdlock(rwl); 329 WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0); 330 331 membar_exit_before_atomic(); 332 rw_do_exit(rwl, wrlock); 333 } 334 335 /* membar_exit_before_atomic() has to precede call of this function. */ 336 void 337 rw_do_exit(struct rwlock *rwl, unsigned long wrlock) 338 { 339 unsigned long owner, set; 340 341 do { 342 owner = rwl->rwl_owner; 343 if (wrlock) 344 set = 0; 345 else 346 set = (owner - RWLOCK_READ_INCR) & 347 ~(RWLOCK_WAIT|RWLOCK_WRWANT); 348 /* 349 * Potential MP race here. If the owner had WRWANT set, we 350 * cleared it and a reader can sneak in before a writer. 351 */ 352 } while (__predict_false(rw_cas(&rwl->rwl_owner, owner, set))); 353 354 if (owner & RWLOCK_WAIT) 355 wakeup(rwl); 356 } 357 358 int 359 rw_status(struct rwlock *rwl) 360 { 361 unsigned long owner = rwl->rwl_owner; 362 363 if (owner & RWLOCK_WRLOCK) { 364 if (RW_PROC(curproc) == RW_PROC(owner)) 365 return RW_WRITE; 366 else 367 return RW_WRITE_OTHER; 368 } 369 if (owner) 370 return RW_READ; 371 return (0); 372 } 373 374 #ifdef DIAGNOSTIC 375 void 376 rw_assert_wrlock(struct rwlock *rwl) 377 { 378 if (panicstr || db_active) 379 return; 380 381 #ifdef WITNESS 382 witness_assert(&rwl->rwl_lock_obj, LA_XLOCKED); 383 #else 384 if (!(rwl->rwl_owner & RWLOCK_WRLOCK)) 385 panic("%s: lock not held", rwl->rwl_name); 386 387 if (RW_PROC(curproc) != RW_PROC(rwl->rwl_owner)) 388 panic("%s: lock not held by this process", rwl->rwl_name); 389 #endif 390 } 391 392 void 393 rw_assert_rdlock(struct rwlock *rwl) 394 { 395 if (panicstr || db_active) 396 return; 397 398 #ifdef WITNESS 399 witness_assert(&rwl->rwl_lock_obj, LA_SLOCKED); 400 #else 401 if (!RW_PROC(rwl->rwl_owner) || (rwl->rwl_owner & RWLOCK_WRLOCK)) 402 panic("%s: lock not shared", rwl->rwl_name); 403 #endif 404 } 405 406 void 407 rw_assert_anylock(struct rwlock *rwl) 408 { 409 if (panicstr || db_active) 410 return; 411 412 #ifdef WITNESS 413 witness_assert(&rwl->rwl_lock_obj, LA_LOCKED); 414 #else 415 switch (rw_status(rwl)) { 416 case RW_WRITE_OTHER: 417 panic("%s: lock held by different process", rwl->rwl_name); 418 case 0: 419 panic("%s: lock not held", rwl->rwl_name); 420 } 421 #endif 422 } 423 424 void 425 rw_assert_unlocked(struct rwlock *rwl) 426 { 427 if (panicstr || db_active) 428 return; 429 430 #ifdef WITNESS 431 witness_assert(&rwl->rwl_lock_obj, LA_UNLOCKED); 432 #else 433 if (RW_PROC(curproc) == RW_PROC(rwl->rwl_owner)) 434 panic("%s: lock held", rwl->rwl_name); 435 #endif 436 } 437 #endif 438 439 /* recursive rwlocks; */ 440 void 441 _rrw_init_flags(struct rrwlock *rrwl, const char *name, int flags, 442 const struct lock_type *type) 443 { 444 memset(rrwl, 0, sizeof(struct rrwlock)); 445 _rw_init_flags_witness(&rrwl->rrwl_lock, name, RRWLOCK_LO_FLAGS(flags), 446 type); 447 } 448 449 int 450 rrw_enter(struct rrwlock *rrwl, int flags) 451 { 452 int rv; 453 454 if (RW_PROC(rrwl->rrwl_lock.rwl_owner) == RW_PROC(curproc)) { 455 if (flags & RW_RECURSEFAIL) 456 return (EDEADLK); 457 else { 458 rrwl->rrwl_wcnt++; 459 WITNESS_LOCK(&rrwl->rrwl_lock.rwl_lock_obj, 460 LOP_EXCLUSIVE); 461 return (0); 462 } 463 } 464 465 rv = rw_enter(&rrwl->rrwl_lock, flags); 466 if (rv == 0) 467 rrwl->rrwl_wcnt = 1; 468 469 return (rv); 470 } 471 472 void 473 rrw_exit(struct rrwlock *rrwl) 474 { 475 476 if (RW_PROC(rrwl->rrwl_lock.rwl_owner) == RW_PROC(curproc)) { 477 KASSERT(rrwl->rrwl_wcnt > 0); 478 rrwl->rrwl_wcnt--; 479 if (rrwl->rrwl_wcnt != 0) { 480 WITNESS_UNLOCK(&rrwl->rrwl_lock.rwl_lock_obj, 481 LOP_EXCLUSIVE); 482 return; 483 } 484 } 485 486 rw_exit(&rrwl->rrwl_lock); 487 } 488 489 int 490 rrw_status(struct rrwlock *rrwl) 491 { 492 return (rw_status(&rrwl->rrwl_lock)); 493 } 494 495 /*- 496 * Copyright (c) 2008 The NetBSD Foundation, Inc. 497 * All rights reserved. 498 * 499 * This code is derived from software contributed to The NetBSD Foundation 500 * by Andrew Doran. 501 * 502 * Redistribution and use in source and binary forms, with or without 503 * modification, are permitted provided that the following conditions 504 * are met: 505 * 1. Redistributions of source code must retain the above copyright 506 * notice, this list of conditions and the following disclaimer. 507 * 2. Redistributions in binary form must reproduce the above copyright 508 * notice, this list of conditions and the following disclaimer in the 509 * documentation and/or other materials provided with the distribution. 510 * 511 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 512 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 513 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 514 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 515 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 516 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 517 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 518 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 519 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 520 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 521 * POSSIBILITY OF SUCH DAMAGE. 522 */ 523 524 #define RWLOCK_OBJ_MAGIC 0x5aa3c85d 525 struct rwlock_obj { 526 struct rwlock ro_lock; 527 u_int ro_magic; 528 u_int ro_refcnt; 529 }; 530 531 532 struct pool rwlock_obj_pool; 533 534 /* 535 * rw_obj_init: 536 * 537 * Initialize the mutex object store. 538 */ 539 void 540 rw_obj_init(void) 541 { 542 pool_init(&rwlock_obj_pool, sizeof(struct rwlock_obj), 0, IPL_MPFLOOR, 543 PR_WAITOK, "rwobjpl", NULL); 544 } 545 546 /* 547 * rw_obj_alloc: 548 * 549 * Allocate a single lock object. 550 */ 551 void 552 _rw_obj_alloc_flags(struct rwlock **lock, const char *name, int flags, 553 struct lock_type *type) 554 { 555 struct rwlock_obj *mo; 556 557 mo = pool_get(&rwlock_obj_pool, PR_WAITOK); 558 mo->ro_magic = RWLOCK_OBJ_MAGIC; 559 _rw_init_flags(&mo->ro_lock, name, flags, type); 560 mo->ro_refcnt = 1; 561 562 *lock = &mo->ro_lock; 563 } 564 565 /* 566 * rw_obj_hold: 567 * 568 * Add a single reference to a lock object. A reference to the object 569 * must already be held, and must be held across this call. 570 */ 571 572 void 573 rw_obj_hold(struct rwlock *lock) 574 { 575 struct rwlock_obj *mo = (struct rwlock_obj *)lock; 576 577 KASSERTMSG(mo->ro_magic == RWLOCK_OBJ_MAGIC, 578 "%s: lock %p: mo->ro_magic (%#x) != RWLOCK_OBJ_MAGIC (%#x)", 579 __func__, mo, mo->ro_magic, RWLOCK_OBJ_MAGIC); 580 KASSERTMSG(mo->ro_refcnt > 0, 581 "%s: lock %p: mo->ro_refcnt (%#x) == 0", 582 __func__, mo, mo->ro_refcnt); 583 584 atomic_inc_int(&mo->ro_refcnt); 585 } 586 587 /* 588 * rw_obj_free: 589 * 590 * Drop a reference from a lock object. If the last reference is being 591 * dropped, free the object and return true. Otherwise, return false. 592 */ 593 int 594 rw_obj_free(struct rwlock *lock) 595 { 596 struct rwlock_obj *mo = (struct rwlock_obj *)lock; 597 598 KASSERTMSG(mo->ro_magic == RWLOCK_OBJ_MAGIC, 599 "%s: lock %p: mo->ro_magic (%#x) != RWLOCK_OBJ_MAGIC (%#x)", 600 __func__, mo, mo->ro_magic, RWLOCK_OBJ_MAGIC); 601 KASSERTMSG(mo->ro_refcnt > 0, 602 "%s: lock %p: mo->ro_refcnt (%#x) == 0", 603 __func__, mo, mo->ro_refcnt); 604 605 if (atomic_dec_int_nv(&mo->ro_refcnt) > 0) { 606 return false; 607 } 608 #if notyet 609 WITNESS_DESTROY(&mo->ro_lock); 610 #endif 611 pool_put(&rwlock_obj_pool, mo); 612 return true; 613 } 614