1 /* $OpenBSD: kern_rwlock.c,v 1.45 2020/03/02 17:07:49 visa Exp $ */ 2 3 /* 4 * Copyright (c) 2002, 2003 Artur Grabowski <art@openbsd.org> 5 * Copyright (c) 2011 Thordur Bjornsson <thib@secnorth.net> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/proc.h> 23 #include <sys/rwlock.h> 24 #include <sys/limits.h> 25 #include <sys/atomic.h> 26 #include <sys/witness.h> 27 28 void rw_do_exit(struct rwlock *, unsigned long); 29 30 /* XXX - temporary measure until proc0 is properly aligned */ 31 #define RW_PROC(p) (((long)p) & ~RWLOCK_MASK) 32 33 /* 34 * Other OSes implement more sophisticated mechanism to determine how long the 35 * process attempting to acquire the lock should be spinning. We start with 36 * the most simple approach: we do RW_SPINS attempts at most before eventually 37 * giving up and putting the process to sleep queue. 38 */ 39 #define RW_SPINS 1000 40 41 #ifdef MULTIPROCESSOR 42 #define rw_cas(p, o, n) (atomic_cas_ulong(p, o, n) != o) 43 #else 44 static inline int 45 rw_cas(volatile unsigned long *p, unsigned long o, unsigned long n) 46 { 47 if (*p != o) 48 return (1); 49 *p = n; 50 51 return (0); 52 } 53 #endif 54 55 /* 56 * Magic wand for lock operations. Every operation checks if certain 57 * flags are set and if they aren't, it increments the lock with some 58 * value (that might need some computing in a few cases). If the operation 59 * fails, we need to set certain flags while waiting for the lock. 60 * 61 * RW_WRITE The lock must be completely empty. We increment it with 62 * RWLOCK_WRLOCK and the proc pointer of the holder. 63 * Sets RWLOCK_WAIT|RWLOCK_WRWANT while waiting. 64 * RW_READ RWLOCK_WRLOCK|RWLOCK_WRWANT may not be set. We increment 65 * with RWLOCK_READ_INCR. RWLOCK_WAIT while waiting. 66 */ 67 static const struct rwlock_op { 68 unsigned long inc; 69 unsigned long check; 70 unsigned long wait_set; 71 long proc_mult; 72 int wait_prio; 73 } rw_ops[] = { 74 { /* RW_WRITE */ 75 RWLOCK_WRLOCK, 76 ULONG_MAX, 77 RWLOCK_WAIT | RWLOCK_WRWANT, 78 1, 79 PLOCK - 4 80 }, 81 { /* RW_READ */ 82 RWLOCK_READ_INCR, 83 RWLOCK_WRLOCK, 84 RWLOCK_WAIT, 85 0, 86 PLOCK 87 }, 88 { /* Sparse Entry. */ 89 0, 90 }, 91 { /* RW_DOWNGRADE */ 92 RWLOCK_READ_INCR - RWLOCK_WRLOCK, 93 0, 94 0, 95 -1, 96 PLOCK 97 }, 98 }; 99 100 void 101 rw_enter_read(struct rwlock *rwl) 102 { 103 unsigned long owner = rwl->rwl_owner; 104 105 if (__predict_false((owner & RWLOCK_WRLOCK) || 106 rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR))) 107 rw_enter(rwl, RW_READ); 108 else { 109 membar_enter_after_atomic(); 110 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, NULL); 111 WITNESS_LOCK(&rwl->rwl_lock_obj, 0); 112 } 113 } 114 115 void 116 rw_enter_write(struct rwlock *rwl) 117 { 118 struct proc *p = curproc; 119 120 if (__predict_false(rw_cas(&rwl->rwl_owner, 0, 121 RW_PROC(p) | RWLOCK_WRLOCK))) 122 rw_enter(rwl, RW_WRITE); 123 else { 124 membar_enter_after_atomic(); 125 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, 126 LOP_EXCLUSIVE | LOP_NEWORDER, NULL); 127 WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE); 128 } 129 } 130 131 void 132 rw_exit_read(struct rwlock *rwl) 133 { 134 unsigned long owner; 135 136 rw_assert_rdlock(rwl); 137 WITNESS_UNLOCK(&rwl->rwl_lock_obj, 0); 138 139 membar_exit_before_atomic(); 140 owner = rwl->rwl_owner; 141 if (__predict_false((owner & RWLOCK_WAIT) || 142 rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR))) 143 rw_do_exit(rwl, 0); 144 } 145 146 void 147 rw_exit_write(struct rwlock *rwl) 148 { 149 unsigned long owner; 150 151 rw_assert_wrlock(rwl); 152 WITNESS_UNLOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE); 153 154 membar_exit_before_atomic(); 155 owner = rwl->rwl_owner; 156 if (__predict_false((owner & RWLOCK_WAIT) || 157 rw_cas(&rwl->rwl_owner, owner, 0))) 158 rw_do_exit(rwl, RWLOCK_WRLOCK); 159 } 160 161 #ifdef DIAGNOSTIC 162 /* 163 * Put the diagnostic functions here to keep the main code free 164 * from ifdef clutter. 165 */ 166 static void 167 rw_enter_diag(struct rwlock *rwl, int flags) 168 { 169 switch (flags & RW_OPMASK) { 170 case RW_WRITE: 171 case RW_READ: 172 if (RW_PROC(curproc) == RW_PROC(rwl->rwl_owner)) 173 panic("rw_enter: %s locking against myself", 174 rwl->rwl_name); 175 break; 176 case RW_DOWNGRADE: 177 /* 178 * If we're downgrading, we must hold the write lock. 179 */ 180 if ((rwl->rwl_owner & RWLOCK_WRLOCK) == 0) 181 panic("rw_enter: %s downgrade of non-write lock", 182 rwl->rwl_name); 183 if (RW_PROC(curproc) != RW_PROC(rwl->rwl_owner)) 184 panic("rw_enter: %s downgrade, not holder", 185 rwl->rwl_name); 186 break; 187 188 default: 189 panic("rw_enter: unknown op 0x%x", flags); 190 } 191 } 192 193 #else 194 #define rw_enter_diag(r, f) 195 #endif 196 197 static void 198 _rw_init_flags_witness(struct rwlock *rwl, const char *name, int lo_flags, 199 const struct lock_type *type) 200 { 201 rwl->rwl_owner = 0; 202 rwl->rwl_name = name; 203 204 #ifdef WITNESS 205 rwl->rwl_lock_obj.lo_flags = lo_flags; 206 rwl->rwl_lock_obj.lo_name = name; 207 rwl->rwl_lock_obj.lo_type = type; 208 WITNESS_INIT(&rwl->rwl_lock_obj, type); 209 #else 210 (void)type; 211 (void)lo_flags; 212 #endif 213 } 214 215 void 216 _rw_init_flags(struct rwlock *rwl, const char *name, int flags, 217 const struct lock_type *type) 218 { 219 _rw_init_flags_witness(rwl, name, RWLOCK_LO_FLAGS(flags), type); 220 } 221 222 int 223 rw_enter(struct rwlock *rwl, int flags) 224 { 225 const struct rwlock_op *op; 226 struct sleep_state sls; 227 unsigned long inc, o; 228 #ifdef MULTIPROCESSOR 229 /* 230 * If process holds the kernel lock, then we want to give up on CPU 231 * as soon as possible so other processes waiting for the kernel lock 232 * can progress. Hence no spinning if we hold the kernel lock. 233 */ 234 unsigned int spin = (_kernel_lock_held()) ? 0 : RW_SPINS; 235 #endif 236 int error, prio; 237 #ifdef WITNESS 238 int lop_flags; 239 240 lop_flags = LOP_NEWORDER; 241 if (flags & RW_WRITE) 242 lop_flags |= LOP_EXCLUSIVE; 243 if (flags & RW_DUPOK) 244 lop_flags |= LOP_DUPOK; 245 if ((flags & RW_NOSLEEP) == 0 && (flags & RW_DOWNGRADE) == 0) 246 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, lop_flags, NULL); 247 #endif 248 249 op = &rw_ops[(flags & RW_OPMASK) - 1]; 250 251 inc = op->inc + RW_PROC(curproc) * op->proc_mult; 252 retry: 253 while (__predict_false(((o = rwl->rwl_owner) & op->check) != 0)) { 254 unsigned long set = o | op->wait_set; 255 int do_sleep; 256 257 /* Avoid deadlocks after panic or in DDB */ 258 if (panicstr || db_active) 259 return (0); 260 261 #ifdef MULTIPROCESSOR 262 /* 263 * It makes sense to try to spin just in case the lock 264 * is acquired by writer. 265 */ 266 if ((o & RWLOCK_WRLOCK) && (spin != 0)) { 267 spin--; 268 CPU_BUSY_CYCLE(); 269 continue; 270 } 271 #endif 272 273 rw_enter_diag(rwl, flags); 274 275 if (flags & RW_NOSLEEP) 276 return (EBUSY); 277 278 prio = op->wait_prio; 279 if (flags & RW_INTR) 280 prio |= PCATCH; 281 sleep_setup(&sls, rwl, prio, rwl->rwl_name); 282 if (flags & RW_INTR) 283 sleep_setup_signal(&sls); 284 285 do_sleep = !rw_cas(&rwl->rwl_owner, o, set); 286 287 sleep_finish(&sls, do_sleep); 288 if ((flags & RW_INTR) && 289 (error = sleep_finish_signal(&sls)) != 0) 290 return (error); 291 if (flags & RW_SLEEPFAIL) 292 return (EAGAIN); 293 } 294 295 if (__predict_false(rw_cas(&rwl->rwl_owner, o, o + inc))) 296 goto retry; 297 membar_enter_after_atomic(); 298 299 /* 300 * If old lock had RWLOCK_WAIT and RWLOCK_WRLOCK set, it means we 301 * downgraded a write lock and had possible read waiter, wake them 302 * to let them retry the lock. 303 */ 304 if (__predict_false((o & (RWLOCK_WRLOCK|RWLOCK_WAIT)) == 305 (RWLOCK_WRLOCK|RWLOCK_WAIT))) 306 wakeup(rwl); 307 308 if (flags & RW_DOWNGRADE) 309 WITNESS_DOWNGRADE(&rwl->rwl_lock_obj, lop_flags); 310 else 311 WITNESS_LOCK(&rwl->rwl_lock_obj, lop_flags); 312 313 return (0); 314 } 315 316 void 317 rw_exit(struct rwlock *rwl) 318 { 319 unsigned long wrlock; 320 321 /* Avoid deadlocks after panic or in DDB */ 322 if (panicstr || db_active) 323 return; 324 325 wrlock = rwl->rwl_owner & RWLOCK_WRLOCK; 326 if (wrlock) 327 rw_assert_wrlock(rwl); 328 else 329 rw_assert_rdlock(rwl); 330 WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0); 331 332 membar_exit_before_atomic(); 333 rw_do_exit(rwl, wrlock); 334 } 335 336 /* membar_exit_before_atomic() has to precede call of this function. */ 337 void 338 rw_do_exit(struct rwlock *rwl, unsigned long wrlock) 339 { 340 unsigned long owner, set; 341 342 do { 343 owner = rwl->rwl_owner; 344 if (wrlock) 345 set = 0; 346 else 347 set = (owner - RWLOCK_READ_INCR) & 348 ~(RWLOCK_WAIT|RWLOCK_WRWANT); 349 } while (__predict_false(rw_cas(&rwl->rwl_owner, owner, set))); 350 351 if (owner & RWLOCK_WAIT) 352 wakeup(rwl); 353 } 354 355 int 356 rw_status(struct rwlock *rwl) 357 { 358 unsigned long owner = rwl->rwl_owner; 359 360 if (owner & RWLOCK_WRLOCK) { 361 if (RW_PROC(curproc) == RW_PROC(owner)) 362 return RW_WRITE; 363 else 364 return RW_WRITE_OTHER; 365 } 366 if (owner) 367 return RW_READ; 368 return (0); 369 } 370 371 #ifdef DIAGNOSTIC 372 void 373 rw_assert_wrlock(struct rwlock *rwl) 374 { 375 if (panicstr || db_active) 376 return; 377 378 #ifdef WITNESS 379 witness_assert(&rwl->rwl_lock_obj, LA_XLOCKED); 380 #else 381 if (!(rwl->rwl_owner & RWLOCK_WRLOCK)) 382 panic("%s: lock not held", rwl->rwl_name); 383 384 if (RW_PROC(curproc) != RW_PROC(rwl->rwl_owner)) 385 panic("%s: lock not held by this process", rwl->rwl_name); 386 #endif 387 } 388 389 void 390 rw_assert_rdlock(struct rwlock *rwl) 391 { 392 if (panicstr || db_active) 393 return; 394 395 #ifdef WITNESS 396 witness_assert(&rwl->rwl_lock_obj, LA_SLOCKED); 397 #else 398 if (!RW_PROC(rwl->rwl_owner) || (rwl->rwl_owner & RWLOCK_WRLOCK)) 399 panic("%s: lock not shared", rwl->rwl_name); 400 #endif 401 } 402 403 void 404 rw_assert_anylock(struct rwlock *rwl) 405 { 406 if (panicstr || db_active) 407 return; 408 409 #ifdef WITNESS 410 witness_assert(&rwl->rwl_lock_obj, LA_LOCKED); 411 #else 412 switch (rw_status(rwl)) { 413 case RW_WRITE_OTHER: 414 panic("%s: lock held by different process", rwl->rwl_name); 415 case 0: 416 panic("%s: lock not held", rwl->rwl_name); 417 } 418 #endif 419 } 420 421 void 422 rw_assert_unlocked(struct rwlock *rwl) 423 { 424 if (panicstr || db_active) 425 return; 426 427 #ifdef WITNESS 428 witness_assert(&rwl->rwl_lock_obj, LA_UNLOCKED); 429 #else 430 if (RW_PROC(curproc) == RW_PROC(rwl->rwl_owner)) 431 panic("%s: lock held", rwl->rwl_name); 432 #endif 433 } 434 #endif 435 436 /* recursive rwlocks; */ 437 void 438 _rrw_init_flags(struct rrwlock *rrwl, const char *name, int flags, 439 const struct lock_type *type) 440 { 441 memset(rrwl, 0, sizeof(struct rrwlock)); 442 _rw_init_flags_witness(&rrwl->rrwl_lock, name, RRWLOCK_LO_FLAGS(flags), 443 type); 444 } 445 446 int 447 rrw_enter(struct rrwlock *rrwl, int flags) 448 { 449 int rv; 450 451 if (RW_PROC(rrwl->rrwl_lock.rwl_owner) == RW_PROC(curproc)) { 452 if (flags & RW_RECURSEFAIL) 453 return (EDEADLK); 454 else { 455 rrwl->rrwl_wcnt++; 456 WITNESS_LOCK(&rrwl->rrwl_lock.rwl_lock_obj, 457 LOP_EXCLUSIVE); 458 return (0); 459 } 460 } 461 462 rv = rw_enter(&rrwl->rrwl_lock, flags); 463 if (rv == 0) 464 rrwl->rrwl_wcnt = 1; 465 466 return (rv); 467 } 468 469 void 470 rrw_exit(struct rrwlock *rrwl) 471 { 472 473 if (RW_PROC(rrwl->rrwl_lock.rwl_owner) == RW_PROC(curproc)) { 474 KASSERT(rrwl->rrwl_wcnt > 0); 475 rrwl->rrwl_wcnt--; 476 if (rrwl->rrwl_wcnt != 0) { 477 WITNESS_UNLOCK(&rrwl->rrwl_lock.rwl_lock_obj, 478 LOP_EXCLUSIVE); 479 return; 480 } 481 } 482 483 rw_exit(&rrwl->rrwl_lock); 484 } 485 486 int 487 rrw_status(struct rrwlock *rrwl) 488 { 489 return (rw_status(&rrwl->rrwl_lock)); 490 } 491