1 /* $OpenBSD: kern_rwlock.c,v 1.39 2019/05/11 17:45:59 sashan Exp $ */ 2 3 /* 4 * Copyright (c) 2002, 2003 Artur Grabowski <art@openbsd.org> 5 * Copyright (c) 2011 Thordur Bjornsson <thib@secnorth.net> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/proc.h> 23 #include <sys/rwlock.h> 24 #include <sys/limits.h> 25 #include <sys/atomic.h> 26 #include <sys/witness.h> 27 28 /* XXX - temporary measure until proc0 is properly aligned */ 29 #define RW_PROC(p) (((long)p) & ~RWLOCK_MASK) 30 31 /* 32 * Other OSes implement more sophisticated mechanism to determine how long the 33 * process attempting to acquire the lock should be spinning. We start with 34 * the most simple approach: we do RW_SPINS attempts at most before eventually 35 * giving up and putting the process to sleep queue. 36 */ 37 #define RW_SPINS 1000 38 39 #ifdef MULTIPROCESSOR 40 #define rw_cas(p, o, n) (atomic_cas_ulong(p, o, n) != o) 41 #else 42 static inline int 43 rw_cas(volatile unsigned long *p, unsigned long o, unsigned long n) 44 { 45 if (*p != o) 46 return (1); 47 *p = n; 48 49 return (0); 50 } 51 #endif 52 53 /* 54 * Magic wand for lock operations. Every operation checks if certain 55 * flags are set and if they aren't, it increments the lock with some 56 * value (that might need some computing in a few cases). If the operation 57 * fails, we need to set certain flags while waiting for the lock. 58 * 59 * RW_WRITE The lock must be completely empty. We increment it with 60 * RWLOCK_WRLOCK and the proc pointer of the holder. 61 * Sets RWLOCK_WAIT|RWLOCK_WRWANT while waiting. 62 * RW_READ RWLOCK_WRLOCK|RWLOCK_WRWANT may not be set. We increment 63 * with RWLOCK_READ_INCR. RWLOCK_WAIT while waiting. 64 */ 65 static const struct rwlock_op { 66 unsigned long inc; 67 unsigned long check; 68 unsigned long wait_set; 69 long proc_mult; 70 int wait_prio; 71 } rw_ops[] = { 72 { /* RW_WRITE */ 73 RWLOCK_WRLOCK, 74 ULONG_MAX, 75 RWLOCK_WAIT | RWLOCK_WRWANT, 76 1, 77 PLOCK - 4 78 }, 79 { /* RW_READ */ 80 RWLOCK_READ_INCR, 81 RWLOCK_WRLOCK, 82 RWLOCK_WAIT, 83 0, 84 PLOCK 85 }, 86 { /* Sparse Entry. */ 87 0, 88 }, 89 { /* RW_DOWNGRADE */ 90 RWLOCK_READ_INCR - RWLOCK_WRLOCK, 91 0, 92 0, 93 -1, 94 PLOCK 95 }, 96 }; 97 98 void 99 rw_enter_read(struct rwlock *rwl) 100 { 101 unsigned long owner = rwl->rwl_owner; 102 103 if (__predict_false((owner & RWLOCK_WRLOCK) || 104 rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR))) 105 rw_enter(rwl, RW_READ); 106 else { 107 membar_enter_after_atomic(); 108 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, NULL); 109 WITNESS_LOCK(&rwl->rwl_lock_obj, 0); 110 } 111 } 112 113 void 114 rw_enter_write(struct rwlock *rwl) 115 { 116 struct proc *p = curproc; 117 118 if (__predict_false(rw_cas(&rwl->rwl_owner, 0, 119 RW_PROC(p) | RWLOCK_WRLOCK))) 120 rw_enter(rwl, RW_WRITE); 121 else { 122 membar_enter_after_atomic(); 123 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, 124 LOP_EXCLUSIVE | LOP_NEWORDER, NULL); 125 WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE); 126 } 127 } 128 129 void 130 rw_exit_read(struct rwlock *rwl) 131 { 132 unsigned long owner = rwl->rwl_owner; 133 134 rw_assert_rdlock(rwl); 135 136 membar_exit_before_atomic(); 137 if (__predict_false((owner & RWLOCK_WAIT) || 138 rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR))) 139 rw_exit(rwl); 140 else 141 WITNESS_UNLOCK(&rwl->rwl_lock_obj, 0); 142 } 143 144 void 145 rw_exit_write(struct rwlock *rwl) 146 { 147 unsigned long owner = rwl->rwl_owner; 148 149 rw_assert_wrlock(rwl); 150 151 membar_exit_before_atomic(); 152 if (__predict_false((owner & RWLOCK_WAIT) || 153 rw_cas(&rwl->rwl_owner, owner, 0))) 154 rw_exit(rwl); 155 else 156 WITNESS_UNLOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE); 157 } 158 159 #ifdef DIAGNOSTIC 160 /* 161 * Put the diagnostic functions here to keep the main code free 162 * from ifdef clutter. 163 */ 164 static void 165 rw_enter_diag(struct rwlock *rwl, int flags) 166 { 167 switch (flags & RW_OPMASK) { 168 case RW_WRITE: 169 case RW_READ: 170 if (RW_PROC(curproc) == RW_PROC(rwl->rwl_owner)) 171 panic("rw_enter: %s locking against myself", 172 rwl->rwl_name); 173 break; 174 case RW_DOWNGRADE: 175 /* 176 * If we're downgrading, we must hold the write lock. 177 */ 178 if ((rwl->rwl_owner & RWLOCK_WRLOCK) == 0) 179 panic("rw_enter: %s downgrade of non-write lock", 180 rwl->rwl_name); 181 if (RW_PROC(curproc) != RW_PROC(rwl->rwl_owner)) 182 panic("rw_enter: %s downgrade, not holder", 183 rwl->rwl_name); 184 break; 185 186 default: 187 panic("rw_enter: unknown op 0x%x", flags); 188 } 189 } 190 191 #else 192 #define rw_enter_diag(r, f) 193 #endif 194 195 static void 196 _rw_init_flags_witness(struct rwlock *rwl, const char *name, int lo_flags, 197 const struct lock_type *type) 198 { 199 rwl->rwl_owner = 0; 200 rwl->rwl_name = name; 201 202 #ifdef WITNESS 203 rwl->rwl_lock_obj.lo_flags = lo_flags; 204 rwl->rwl_lock_obj.lo_name = name; 205 rwl->rwl_lock_obj.lo_type = type; 206 WITNESS_INIT(&rwl->rwl_lock_obj, type); 207 #else 208 (void)type; 209 (void)lo_flags; 210 #endif 211 } 212 213 void 214 _rw_init_flags(struct rwlock *rwl, const char *name, int flags, 215 const struct lock_type *type) 216 { 217 _rw_init_flags_witness(rwl, name, RWLOCK_LO_FLAGS(flags), type); 218 } 219 220 int 221 rw_enter(struct rwlock *rwl, int flags) 222 { 223 const struct rwlock_op *op; 224 struct sleep_state sls; 225 unsigned long inc, o; 226 #ifdef MULTIPROCESSOR 227 /* 228 * If process holds the kernel lock, then we want to give up on CPU 229 * as soon as possible so other processes waiting for the kernel lock 230 * can progress. Hence no spinning if we hold the kernel lock. 231 */ 232 unsigned int spin = (_kernel_lock_held()) ? 0 : RW_SPINS; 233 #endif 234 int error; 235 #ifdef WITNESS 236 int lop_flags; 237 238 lop_flags = LOP_NEWORDER; 239 if (flags & RW_WRITE) 240 lop_flags |= LOP_EXCLUSIVE; 241 if (flags & RW_DUPOK) 242 lop_flags |= LOP_DUPOK; 243 if ((flags & RW_NOSLEEP) == 0 && (flags & RW_DOWNGRADE) == 0) 244 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, lop_flags, NULL); 245 #endif 246 247 op = &rw_ops[(flags & RW_OPMASK) - 1]; 248 249 inc = op->inc + RW_PROC(curproc) * op->proc_mult; 250 retry: 251 while (__predict_false(((o = rwl->rwl_owner) & op->check) != 0)) { 252 unsigned long set = o | op->wait_set; 253 int do_sleep; 254 255 /* Avoid deadlocks after panic or in DDB */ 256 if (panicstr || db_active) 257 return (0); 258 259 #ifdef MULTIPROCESSOR 260 /* 261 * It makes sense to try to spin just in case the lock 262 * is acquired by writer. 263 */ 264 if ((o & RWLOCK_WRLOCK) && (spin != 0)) { 265 spin--; 266 CPU_BUSY_CYCLE(); 267 continue; 268 } 269 #endif 270 271 rw_enter_diag(rwl, flags); 272 273 if (flags & RW_NOSLEEP) 274 return (EBUSY); 275 276 sleep_setup(&sls, rwl, op->wait_prio, rwl->rwl_name); 277 if (flags & RW_INTR) 278 sleep_setup_signal(&sls, op->wait_prio | PCATCH); 279 280 do_sleep = !rw_cas(&rwl->rwl_owner, o, set); 281 282 sleep_finish(&sls, do_sleep); 283 if ((flags & RW_INTR) && 284 (error = sleep_finish_signal(&sls)) != 0) 285 return (error); 286 if (flags & RW_SLEEPFAIL) 287 return (EAGAIN); 288 } 289 290 if (__predict_false(rw_cas(&rwl->rwl_owner, o, o + inc))) 291 goto retry; 292 membar_enter_after_atomic(); 293 294 /* 295 * If old lock had RWLOCK_WAIT and RWLOCK_WRLOCK set, it means we 296 * downgraded a write lock and had possible read waiter, wake them 297 * to let them retry the lock. 298 */ 299 if (__predict_false((o & (RWLOCK_WRLOCK|RWLOCK_WAIT)) == 300 (RWLOCK_WRLOCK|RWLOCK_WAIT))) 301 wakeup(rwl); 302 303 if (flags & RW_DOWNGRADE) 304 WITNESS_DOWNGRADE(&rwl->rwl_lock_obj, lop_flags); 305 else 306 WITNESS_LOCK(&rwl->rwl_lock_obj, lop_flags); 307 308 return (0); 309 } 310 311 void 312 rw_exit(struct rwlock *rwl) 313 { 314 unsigned long owner = rwl->rwl_owner; 315 int wrlock = owner & RWLOCK_WRLOCK; 316 unsigned long set; 317 318 /* Avoid deadlocks after panic or in DDB */ 319 if (panicstr || db_active) 320 return; 321 322 if (wrlock) 323 rw_assert_wrlock(rwl); 324 else 325 rw_assert_rdlock(rwl); 326 327 WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0); 328 329 membar_exit_before_atomic(); 330 do { 331 owner = rwl->rwl_owner; 332 if (wrlock) 333 set = 0; 334 else 335 set = (owner - RWLOCK_READ_INCR) & 336 ~(RWLOCK_WAIT|RWLOCK_WRWANT); 337 } while (rw_cas(&rwl->rwl_owner, owner, set)); 338 339 if (owner & RWLOCK_WAIT) 340 wakeup(rwl); 341 } 342 343 int 344 rw_status(struct rwlock *rwl) 345 { 346 unsigned long owner = rwl->rwl_owner; 347 348 if (owner & RWLOCK_WRLOCK) { 349 if (RW_PROC(curproc) == RW_PROC(owner)) 350 return RW_WRITE; 351 else 352 return RW_WRITE_OTHER; 353 } 354 if (owner) 355 return RW_READ; 356 return (0); 357 } 358 359 #ifdef DIAGNOSTIC 360 void 361 rw_assert_wrlock(struct rwlock *rwl) 362 { 363 if (panicstr || db_active) 364 return; 365 366 if (!(rwl->rwl_owner & RWLOCK_WRLOCK)) 367 panic("%s: lock not held", rwl->rwl_name); 368 369 if (RWLOCK_OWNER(rwl) != (struct proc *)RW_PROC(curproc)) 370 panic("%s: lock not held by this process", rwl->rwl_name); 371 } 372 373 void 374 rw_assert_rdlock(struct rwlock *rwl) 375 { 376 if (panicstr || db_active) 377 return; 378 379 if (!RWLOCK_OWNER(rwl) || (rwl->rwl_owner & RWLOCK_WRLOCK)) 380 panic("%s: lock not shared", rwl->rwl_name); 381 } 382 383 void 384 rw_assert_anylock(struct rwlock *rwl) 385 { 386 if (panicstr || db_active) 387 return; 388 389 switch (rw_status(rwl)) { 390 case RW_WRITE_OTHER: 391 panic("%s: lock held by different process", rwl->rwl_name); 392 case 0: 393 panic("%s: lock not held", rwl->rwl_name); 394 } 395 } 396 397 void 398 rw_assert_unlocked(struct rwlock *rwl) 399 { 400 if (panicstr || db_active) 401 return; 402 403 if (rwl->rwl_owner != 0L) 404 panic("%s: lock held", rwl->rwl_name); 405 } 406 #endif 407 408 /* recursive rwlocks; */ 409 void 410 _rrw_init_flags(struct rrwlock *rrwl, char *name, int flags, 411 const struct lock_type *type) 412 { 413 memset(rrwl, 0, sizeof(struct rrwlock)); 414 _rw_init_flags_witness(&rrwl->rrwl_lock, name, RRWLOCK_LO_FLAGS(flags), 415 type); 416 } 417 418 int 419 rrw_enter(struct rrwlock *rrwl, int flags) 420 { 421 int rv; 422 423 if (RWLOCK_OWNER(&rrwl->rrwl_lock) == 424 (struct proc *)RW_PROC(curproc)) { 425 if (flags & RW_RECURSEFAIL) 426 return (EDEADLK); 427 else { 428 rrwl->rrwl_wcnt++; 429 WITNESS_LOCK(&rrwl->rrwl_lock.rwl_lock_obj, 430 LOP_EXCLUSIVE); 431 return (0); 432 } 433 } 434 435 rv = rw_enter(&rrwl->rrwl_lock, flags); 436 if (rv == 0) 437 rrwl->rrwl_wcnt = 1; 438 439 return (rv); 440 } 441 442 void 443 rrw_exit(struct rrwlock *rrwl) 444 { 445 446 if (RWLOCK_OWNER(&rrwl->rrwl_lock) == 447 (struct proc *)RW_PROC(curproc)) { 448 KASSERT(rrwl->rrwl_wcnt > 0); 449 rrwl->rrwl_wcnt--; 450 if (rrwl->rrwl_wcnt != 0) { 451 WITNESS_UNLOCK(&rrwl->rrwl_lock.rwl_lock_obj, 452 LOP_EXCLUSIVE); 453 return; 454 } 455 } 456 457 rw_exit(&rrwl->rrwl_lock); 458 } 459 460 int 461 rrw_status(struct rrwlock *rrwl) 462 { 463 return (rw_status(&rrwl->rrwl_lock)); 464 } 465