1 /* $OpenBSD: kern_rwlock.c,v 1.50 2023/07/14 07:07:08 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2002, 2003 Artur Grabowski <art@openbsd.org> 5 * Copyright (c) 2011 Thordur Bjornsson <thib@secnorth.net> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/pool.h> 23 #include <sys/proc.h> 24 #include <sys/rwlock.h> 25 #include <sys/limits.h> 26 #include <sys/atomic.h> 27 #include <sys/witness.h> 28 29 void rw_do_exit(struct rwlock *, unsigned long); 30 31 /* XXX - temporary measure until proc0 is properly aligned */ 32 #define RW_PROC(p) (((long)p) & ~RWLOCK_MASK) 33 34 /* 35 * Other OSes implement more sophisticated mechanism to determine how long the 36 * process attempting to acquire the lock should be spinning. We start with 37 * the most simple approach: we do RW_SPINS attempts at most before eventually 38 * giving up and putting the process to sleep queue. 39 */ 40 #define RW_SPINS 1000 41 42 #ifdef MULTIPROCESSOR 43 #define rw_cas(p, o, n) (atomic_cas_ulong(p, o, n) != o) 44 #else 45 static inline int 46 rw_cas(volatile unsigned long *p, unsigned long o, unsigned long n) 47 { 48 if (*p != o) 49 return (1); 50 *p = n; 51 52 return (0); 53 } 54 #endif 55 56 /* 57 * Magic wand for lock operations. Every operation checks if certain 58 * flags are set and if they aren't, it increments the lock with some 59 * value (that might need some computing in a few cases). If the operation 60 * fails, we need to set certain flags while waiting for the lock. 61 * 62 * RW_WRITE The lock must be completely empty. We increment it with 63 * RWLOCK_WRLOCK and the proc pointer of the holder. 64 * Sets RWLOCK_WAIT|RWLOCK_WRWANT while waiting. 65 * RW_READ RWLOCK_WRLOCK|RWLOCK_WRWANT may not be set. We increment 66 * with RWLOCK_READ_INCR. RWLOCK_WAIT while waiting. 67 */ 68 static const struct rwlock_op { 69 unsigned long inc; 70 unsigned long check; 71 unsigned long wait_set; 72 long proc_mult; 73 int wait_prio; 74 } rw_ops[] = { 75 { /* RW_WRITE */ 76 RWLOCK_WRLOCK, 77 ULONG_MAX, 78 RWLOCK_WAIT | RWLOCK_WRWANT, 79 1, 80 PLOCK - 4 81 }, 82 { /* RW_READ */ 83 RWLOCK_READ_INCR, 84 RWLOCK_WRLOCK | RWLOCK_WRWANT, 85 RWLOCK_WAIT, 86 0, 87 PLOCK 88 }, 89 { /* Sparse Entry. */ 90 0, 91 }, 92 { /* RW_DOWNGRADE */ 93 RWLOCK_READ_INCR - RWLOCK_WRLOCK, 94 0, 95 0, 96 -1, 97 PLOCK 98 }, 99 }; 100 101 void 102 rw_enter_read(struct rwlock *rwl) 103 { 104 unsigned long owner = rwl->rwl_owner; 105 106 if (__predict_false((owner & (RWLOCK_WRLOCK | RWLOCK_WRWANT)) || 107 rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR))) 108 rw_enter(rwl, RW_READ); 109 else { 110 membar_enter_after_atomic(); 111 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, NULL); 112 WITNESS_LOCK(&rwl->rwl_lock_obj, 0); 113 } 114 } 115 116 void 117 rw_enter_write(struct rwlock *rwl) 118 { 119 struct proc *p = curproc; 120 121 if (__predict_false(rw_cas(&rwl->rwl_owner, 0, 122 RW_PROC(p) | RWLOCK_WRLOCK))) 123 rw_enter(rwl, RW_WRITE); 124 else { 125 membar_enter_after_atomic(); 126 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, 127 LOP_EXCLUSIVE | LOP_NEWORDER, NULL); 128 WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE); 129 } 130 } 131 132 void 133 rw_exit_read(struct rwlock *rwl) 134 { 135 unsigned long owner; 136 137 rw_assert_rdlock(rwl); 138 WITNESS_UNLOCK(&rwl->rwl_lock_obj, 0); 139 140 membar_exit_before_atomic(); 141 owner = rwl->rwl_owner; 142 if (__predict_false((owner & RWLOCK_WAIT) || 143 rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR))) 144 rw_do_exit(rwl, 0); 145 } 146 147 void 148 rw_exit_write(struct rwlock *rwl) 149 { 150 unsigned long owner; 151 152 rw_assert_wrlock(rwl); 153 WITNESS_UNLOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE); 154 155 membar_exit_before_atomic(); 156 owner = rwl->rwl_owner; 157 if (__predict_false((owner & RWLOCK_WAIT) || 158 rw_cas(&rwl->rwl_owner, owner, 0))) 159 rw_do_exit(rwl, RWLOCK_WRLOCK); 160 } 161 162 #ifdef DIAGNOSTIC 163 /* 164 * Put the diagnostic functions here to keep the main code free 165 * from ifdef clutter. 166 */ 167 static void 168 rw_enter_diag(struct rwlock *rwl, int flags) 169 { 170 switch (flags & RW_OPMASK) { 171 case RW_WRITE: 172 case RW_READ: 173 if (RW_PROC(curproc) == RW_PROC(rwl->rwl_owner)) 174 panic("rw_enter: %s locking against myself", 175 rwl->rwl_name); 176 break; 177 case RW_DOWNGRADE: 178 /* 179 * If we're downgrading, we must hold the write lock. 180 */ 181 if ((rwl->rwl_owner & RWLOCK_WRLOCK) == 0) 182 panic("rw_enter: %s downgrade of non-write lock", 183 rwl->rwl_name); 184 if (RW_PROC(curproc) != RW_PROC(rwl->rwl_owner)) 185 panic("rw_enter: %s downgrade, not holder", 186 rwl->rwl_name); 187 break; 188 189 default: 190 panic("rw_enter: unknown op 0x%x", flags); 191 } 192 } 193 194 #else 195 #define rw_enter_diag(r, f) 196 #endif 197 198 static void 199 _rw_init_flags_witness(struct rwlock *rwl, const char *name, int lo_flags, 200 const struct lock_type *type) 201 { 202 rwl->rwl_owner = 0; 203 rwl->rwl_name = name; 204 205 #ifdef WITNESS 206 rwl->rwl_lock_obj.lo_flags = lo_flags; 207 rwl->rwl_lock_obj.lo_name = name; 208 rwl->rwl_lock_obj.lo_type = type; 209 WITNESS_INIT(&rwl->rwl_lock_obj, type); 210 #else 211 (void)type; 212 (void)lo_flags; 213 #endif 214 } 215 216 void 217 _rw_init_flags(struct rwlock *rwl, const char *name, int flags, 218 const struct lock_type *type) 219 { 220 _rw_init_flags_witness(rwl, name, RWLOCK_LO_FLAGS(flags), type); 221 } 222 223 int 224 rw_enter(struct rwlock *rwl, int flags) 225 { 226 const struct rwlock_op *op; 227 unsigned long inc, o; 228 #ifdef MULTIPROCESSOR 229 /* 230 * If process holds the kernel lock, then we want to give up on CPU 231 * as soon as possible so other processes waiting for the kernel lock 232 * can progress. Hence no spinning if we hold the kernel lock. 233 */ 234 unsigned int spin = (_kernel_lock_held()) ? 0 : RW_SPINS; 235 #endif 236 int error, prio; 237 #ifdef WITNESS 238 int lop_flags; 239 240 lop_flags = LOP_NEWORDER; 241 if (flags & RW_WRITE) 242 lop_flags |= LOP_EXCLUSIVE; 243 if (flags & RW_DUPOK) 244 lop_flags |= LOP_DUPOK; 245 if ((flags & RW_NOSLEEP) == 0 && (flags & RW_DOWNGRADE) == 0) 246 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, lop_flags, NULL); 247 #endif 248 249 op = &rw_ops[(flags & RW_OPMASK) - 1]; 250 251 inc = op->inc + RW_PROC(curproc) * op->proc_mult; 252 retry: 253 while (__predict_false(((o = rwl->rwl_owner) & op->check) != 0)) { 254 unsigned long set = o | op->wait_set; 255 int do_sleep; 256 257 /* Avoid deadlocks after panic or in DDB */ 258 if (panicstr || db_active) 259 return (0); 260 261 #ifdef MULTIPROCESSOR 262 /* 263 * It makes sense to try to spin just in case the lock 264 * is acquired by writer. 265 */ 266 if ((o & RWLOCK_WRLOCK) && (spin != 0)) { 267 spin--; 268 CPU_BUSY_CYCLE(); 269 continue; 270 } 271 #endif 272 273 rw_enter_diag(rwl, flags); 274 275 if (flags & RW_NOSLEEP) 276 return (EBUSY); 277 278 prio = op->wait_prio; 279 if (flags & RW_INTR) 280 prio |= PCATCH; 281 sleep_setup(rwl, prio, rwl->rwl_name); 282 283 do_sleep = !rw_cas(&rwl->rwl_owner, o, set); 284 285 error = sleep_finish(0, do_sleep); 286 if ((flags & RW_INTR) && 287 (error != 0)) 288 return (error); 289 if (flags & RW_SLEEPFAIL) 290 return (EAGAIN); 291 } 292 293 if (__predict_false(rw_cas(&rwl->rwl_owner, o, o + inc))) 294 goto retry; 295 membar_enter_after_atomic(); 296 297 /* 298 * If old lock had RWLOCK_WAIT and RWLOCK_WRLOCK set, it means we 299 * downgraded a write lock and had possible read waiter, wake them 300 * to let them retry the lock. 301 */ 302 if (__predict_false((o & (RWLOCK_WRLOCK|RWLOCK_WAIT)) == 303 (RWLOCK_WRLOCK|RWLOCK_WAIT))) 304 wakeup(rwl); 305 306 if (flags & RW_DOWNGRADE) 307 WITNESS_DOWNGRADE(&rwl->rwl_lock_obj, lop_flags); 308 else 309 WITNESS_LOCK(&rwl->rwl_lock_obj, lop_flags); 310 311 return (0); 312 } 313 314 void 315 rw_exit(struct rwlock *rwl) 316 { 317 unsigned long wrlock; 318 319 /* Avoid deadlocks after panic or in DDB */ 320 if (panicstr || db_active) 321 return; 322 323 wrlock = rwl->rwl_owner & RWLOCK_WRLOCK; 324 if (wrlock) 325 rw_assert_wrlock(rwl); 326 else 327 rw_assert_rdlock(rwl); 328 WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0); 329 330 membar_exit_before_atomic(); 331 rw_do_exit(rwl, wrlock); 332 } 333 334 /* membar_exit_before_atomic() has to precede call of this function. */ 335 void 336 rw_do_exit(struct rwlock *rwl, unsigned long wrlock) 337 { 338 unsigned long owner, set; 339 340 do { 341 owner = rwl->rwl_owner; 342 if (wrlock) 343 set = 0; 344 else 345 set = (owner - RWLOCK_READ_INCR) & 346 ~(RWLOCK_WAIT|RWLOCK_WRWANT); 347 /* 348 * Potential MP race here. If the owner had WRWANT set, we 349 * cleared it and a reader can sneak in before a writer. 350 */ 351 } while (__predict_false(rw_cas(&rwl->rwl_owner, owner, set))); 352 353 if (owner & RWLOCK_WAIT) 354 wakeup(rwl); 355 } 356 357 int 358 rw_status(struct rwlock *rwl) 359 { 360 unsigned long owner = rwl->rwl_owner; 361 362 if (owner & RWLOCK_WRLOCK) { 363 if (RW_PROC(curproc) == RW_PROC(owner)) 364 return RW_WRITE; 365 else 366 return RW_WRITE_OTHER; 367 } 368 if (owner) 369 return RW_READ; 370 return (0); 371 } 372 373 #ifdef DIAGNOSTIC 374 void 375 rw_assert_wrlock(struct rwlock *rwl) 376 { 377 if (panicstr || db_active) 378 return; 379 380 #ifdef WITNESS 381 witness_assert(&rwl->rwl_lock_obj, LA_XLOCKED); 382 #else 383 if (!(rwl->rwl_owner & RWLOCK_WRLOCK)) 384 panic("%s: lock not held", rwl->rwl_name); 385 386 if (RW_PROC(curproc) != RW_PROC(rwl->rwl_owner)) 387 panic("%s: lock not held by this process", rwl->rwl_name); 388 #endif 389 } 390 391 void 392 rw_assert_rdlock(struct rwlock *rwl) 393 { 394 if (panicstr || db_active) 395 return; 396 397 #ifdef WITNESS 398 witness_assert(&rwl->rwl_lock_obj, LA_SLOCKED); 399 #else 400 if (!RW_PROC(rwl->rwl_owner) || (rwl->rwl_owner & RWLOCK_WRLOCK)) 401 panic("%s: lock not shared", rwl->rwl_name); 402 #endif 403 } 404 405 void 406 rw_assert_anylock(struct rwlock *rwl) 407 { 408 if (panicstr || db_active) 409 return; 410 411 #ifdef WITNESS 412 witness_assert(&rwl->rwl_lock_obj, LA_LOCKED); 413 #else 414 switch (rw_status(rwl)) { 415 case RW_WRITE_OTHER: 416 panic("%s: lock held by different process", rwl->rwl_name); 417 case 0: 418 panic("%s: lock not held", rwl->rwl_name); 419 } 420 #endif 421 } 422 423 void 424 rw_assert_unlocked(struct rwlock *rwl) 425 { 426 if (panicstr || db_active) 427 return; 428 429 #ifdef WITNESS 430 witness_assert(&rwl->rwl_lock_obj, LA_UNLOCKED); 431 #else 432 if (RW_PROC(curproc) == RW_PROC(rwl->rwl_owner)) 433 panic("%s: lock held", rwl->rwl_name); 434 #endif 435 } 436 #endif 437 438 /* recursive rwlocks; */ 439 void 440 _rrw_init_flags(struct rrwlock *rrwl, const char *name, int flags, 441 const struct lock_type *type) 442 { 443 memset(rrwl, 0, sizeof(struct rrwlock)); 444 _rw_init_flags_witness(&rrwl->rrwl_lock, name, RRWLOCK_LO_FLAGS(flags), 445 type); 446 } 447 448 int 449 rrw_enter(struct rrwlock *rrwl, int flags) 450 { 451 int rv; 452 453 if (RW_PROC(rrwl->rrwl_lock.rwl_owner) == RW_PROC(curproc)) { 454 if (flags & RW_RECURSEFAIL) 455 return (EDEADLK); 456 else { 457 rrwl->rrwl_wcnt++; 458 WITNESS_LOCK(&rrwl->rrwl_lock.rwl_lock_obj, 459 LOP_EXCLUSIVE); 460 return (0); 461 } 462 } 463 464 rv = rw_enter(&rrwl->rrwl_lock, flags); 465 if (rv == 0) 466 rrwl->rrwl_wcnt = 1; 467 468 return (rv); 469 } 470 471 void 472 rrw_exit(struct rrwlock *rrwl) 473 { 474 475 if (RW_PROC(rrwl->rrwl_lock.rwl_owner) == RW_PROC(curproc)) { 476 KASSERT(rrwl->rrwl_wcnt > 0); 477 rrwl->rrwl_wcnt--; 478 if (rrwl->rrwl_wcnt != 0) { 479 WITNESS_UNLOCK(&rrwl->rrwl_lock.rwl_lock_obj, 480 LOP_EXCLUSIVE); 481 return; 482 } 483 } 484 485 rw_exit(&rrwl->rrwl_lock); 486 } 487 488 int 489 rrw_status(struct rrwlock *rrwl) 490 { 491 return (rw_status(&rrwl->rrwl_lock)); 492 } 493 494 /*- 495 * Copyright (c) 2008 The NetBSD Foundation, Inc. 496 * All rights reserved. 497 * 498 * This code is derived from software contributed to The NetBSD Foundation 499 * by Andrew Doran. 500 * 501 * Redistribution and use in source and binary forms, with or without 502 * modification, are permitted provided that the following conditions 503 * are met: 504 * 1. Redistributions of source code must retain the above copyright 505 * notice, this list of conditions and the following disclaimer. 506 * 2. Redistributions in binary form must reproduce the above copyright 507 * notice, this list of conditions and the following disclaimer in the 508 * documentation and/or other materials provided with the distribution. 509 * 510 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 511 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 512 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 513 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 514 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 515 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 516 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 517 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 518 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 519 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 520 * POSSIBILITY OF SUCH DAMAGE. 521 */ 522 523 #define RWLOCK_OBJ_MAGIC 0x5aa3c85d 524 struct rwlock_obj { 525 struct rwlock ro_lock; 526 u_int ro_magic; 527 u_int ro_refcnt; 528 }; 529 530 531 struct pool rwlock_obj_pool; 532 533 /* 534 * rw_obj_init: 535 * 536 * Initialize the mutex object store. 537 */ 538 void 539 rw_obj_init(void) 540 { 541 pool_init(&rwlock_obj_pool, sizeof(struct rwlock_obj), 0, IPL_MPFLOOR, 542 PR_WAITOK, "rwobjpl", NULL); 543 } 544 545 /* 546 * rw_obj_alloc: 547 * 548 * Allocate a single lock object. 549 */ 550 void 551 _rw_obj_alloc_flags(struct rwlock **lock, const char *name, int flags, 552 struct lock_type *type) 553 { 554 struct rwlock_obj *mo; 555 556 mo = pool_get(&rwlock_obj_pool, PR_WAITOK); 557 mo->ro_magic = RWLOCK_OBJ_MAGIC; 558 _rw_init_flags(&mo->ro_lock, name, flags, type); 559 mo->ro_refcnt = 1; 560 561 *lock = &mo->ro_lock; 562 } 563 564 /* 565 * rw_obj_hold: 566 * 567 * Add a single reference to a lock object. A reference to the object 568 * must already be held, and must be held across this call. 569 */ 570 571 void 572 rw_obj_hold(struct rwlock *lock) 573 { 574 struct rwlock_obj *mo = (struct rwlock_obj *)lock; 575 576 KASSERTMSG(mo->ro_magic == RWLOCK_OBJ_MAGIC, 577 "%s: lock %p: mo->ro_magic (%#x) != RWLOCK_OBJ_MAGIC (%#x)", 578 __func__, mo, mo->ro_magic, RWLOCK_OBJ_MAGIC); 579 KASSERTMSG(mo->ro_refcnt > 0, 580 "%s: lock %p: mo->ro_refcnt (%#x) == 0", 581 __func__, mo, mo->ro_refcnt); 582 583 atomic_inc_int(&mo->ro_refcnt); 584 } 585 586 /* 587 * rw_obj_free: 588 * 589 * Drop a reference from a lock object. If the last reference is being 590 * dropped, free the object and return true. Otherwise, return false. 591 */ 592 int 593 rw_obj_free(struct rwlock *lock) 594 { 595 struct rwlock_obj *mo = (struct rwlock_obj *)lock; 596 597 KASSERTMSG(mo->ro_magic == RWLOCK_OBJ_MAGIC, 598 "%s: lock %p: mo->ro_magic (%#x) != RWLOCK_OBJ_MAGIC (%#x)", 599 __func__, mo, mo->ro_magic, RWLOCK_OBJ_MAGIC); 600 KASSERTMSG(mo->ro_refcnt > 0, 601 "%s: lock %p: mo->ro_refcnt (%#x) == 0", 602 __func__, mo, mo->ro_refcnt); 603 604 if (atomic_dec_int_nv(&mo->ro_refcnt) > 0) { 605 return false; 606 } 607 #if notyet 608 WITNESS_DESTROY(&mo->ro_lock); 609 #endif 610 pool_put(&rwlock_obj_pool, mo); 611 return true; 612 } 613