1 /* $OpenBSD: kern_rwlock.c,v 1.38 2019/04/23 13:35:12 visa Exp $ */ 2 3 /* 4 * Copyright (c) 2002, 2003 Artur Grabowski <art@openbsd.org> 5 * Copyright (c) 2011 Thordur Bjornsson <thib@secnorth.net> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/proc.h> 23 #include <sys/rwlock.h> 24 #include <sys/limits.h> 25 #include <sys/atomic.h> 26 #include <sys/witness.h> 27 28 /* XXX - temporary measure until proc0 is properly aligned */ 29 #define RW_PROC(p) (((long)p) & ~RWLOCK_MASK) 30 31 #ifdef MULTIPROCESSOR 32 #define rw_cas(p, o, n) (atomic_cas_ulong(p, o, n) != o) 33 #else 34 static inline int 35 rw_cas(volatile unsigned long *p, unsigned long o, unsigned long n) 36 { 37 if (*p != o) 38 return (1); 39 *p = n; 40 41 return (0); 42 } 43 #endif 44 45 /* 46 * Magic wand for lock operations. Every operation checks if certain 47 * flags are set and if they aren't, it increments the lock with some 48 * value (that might need some computing in a few cases). If the operation 49 * fails, we need to set certain flags while waiting for the lock. 50 * 51 * RW_WRITE The lock must be completely empty. We increment it with 52 * RWLOCK_WRLOCK and the proc pointer of the holder. 53 * Sets RWLOCK_WAIT|RWLOCK_WRWANT while waiting. 54 * RW_READ RWLOCK_WRLOCK|RWLOCK_WRWANT may not be set. We increment 55 * with RWLOCK_READ_INCR. RWLOCK_WAIT while waiting. 56 */ 57 static const struct rwlock_op { 58 unsigned long inc; 59 unsigned long check; 60 unsigned long wait_set; 61 long proc_mult; 62 int wait_prio; 63 } rw_ops[] = { 64 { /* RW_WRITE */ 65 RWLOCK_WRLOCK, 66 ULONG_MAX, 67 RWLOCK_WAIT | RWLOCK_WRWANT, 68 1, 69 PLOCK - 4 70 }, 71 { /* RW_READ */ 72 RWLOCK_READ_INCR, 73 RWLOCK_WRLOCK, 74 RWLOCK_WAIT, 75 0, 76 PLOCK 77 }, 78 { /* Sparse Entry. */ 79 0, 80 }, 81 { /* RW_DOWNGRADE */ 82 RWLOCK_READ_INCR - RWLOCK_WRLOCK, 83 0, 84 0, 85 -1, 86 PLOCK 87 }, 88 }; 89 90 void 91 rw_enter_read(struct rwlock *rwl) 92 { 93 unsigned long owner = rwl->rwl_owner; 94 95 if (__predict_false((owner & RWLOCK_WRLOCK) || 96 rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR))) 97 rw_enter(rwl, RW_READ); 98 else { 99 membar_enter_after_atomic(); 100 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, NULL); 101 WITNESS_LOCK(&rwl->rwl_lock_obj, 0); 102 } 103 } 104 105 void 106 rw_enter_write(struct rwlock *rwl) 107 { 108 struct proc *p = curproc; 109 110 if (__predict_false(rw_cas(&rwl->rwl_owner, 0, 111 RW_PROC(p) | RWLOCK_WRLOCK))) 112 rw_enter(rwl, RW_WRITE); 113 else { 114 membar_enter_after_atomic(); 115 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, 116 LOP_EXCLUSIVE | LOP_NEWORDER, NULL); 117 WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE); 118 } 119 } 120 121 void 122 rw_exit_read(struct rwlock *rwl) 123 { 124 unsigned long owner = rwl->rwl_owner; 125 126 rw_assert_rdlock(rwl); 127 128 membar_exit_before_atomic(); 129 if (__predict_false((owner & RWLOCK_WAIT) || 130 rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR))) 131 rw_exit(rwl); 132 else 133 WITNESS_UNLOCK(&rwl->rwl_lock_obj, 0); 134 } 135 136 void 137 rw_exit_write(struct rwlock *rwl) 138 { 139 unsigned long owner = rwl->rwl_owner; 140 141 rw_assert_wrlock(rwl); 142 143 membar_exit_before_atomic(); 144 if (__predict_false((owner & RWLOCK_WAIT) || 145 rw_cas(&rwl->rwl_owner, owner, 0))) 146 rw_exit(rwl); 147 else 148 WITNESS_UNLOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE); 149 } 150 151 #ifdef DIAGNOSTIC 152 /* 153 * Put the diagnostic functions here to keep the main code free 154 * from ifdef clutter. 155 */ 156 static void 157 rw_enter_diag(struct rwlock *rwl, int flags) 158 { 159 switch (flags & RW_OPMASK) { 160 case RW_WRITE: 161 case RW_READ: 162 if (RW_PROC(curproc) == RW_PROC(rwl->rwl_owner)) 163 panic("rw_enter: %s locking against myself", 164 rwl->rwl_name); 165 break; 166 case RW_DOWNGRADE: 167 /* 168 * If we're downgrading, we must hold the write lock. 169 */ 170 if ((rwl->rwl_owner & RWLOCK_WRLOCK) == 0) 171 panic("rw_enter: %s downgrade of non-write lock", 172 rwl->rwl_name); 173 if (RW_PROC(curproc) != RW_PROC(rwl->rwl_owner)) 174 panic("rw_enter: %s downgrade, not holder", 175 rwl->rwl_name); 176 break; 177 178 default: 179 panic("rw_enter: unknown op 0x%x", flags); 180 } 181 } 182 183 #else 184 #define rw_enter_diag(r, f) 185 #endif 186 187 static void 188 _rw_init_flags_witness(struct rwlock *rwl, const char *name, int lo_flags, 189 const struct lock_type *type) 190 { 191 rwl->rwl_owner = 0; 192 rwl->rwl_name = name; 193 194 #ifdef WITNESS 195 rwl->rwl_lock_obj.lo_flags = lo_flags; 196 rwl->rwl_lock_obj.lo_name = name; 197 rwl->rwl_lock_obj.lo_type = type; 198 WITNESS_INIT(&rwl->rwl_lock_obj, type); 199 #else 200 (void)type; 201 (void)lo_flags; 202 #endif 203 } 204 205 void 206 _rw_init_flags(struct rwlock *rwl, const char *name, int flags, 207 const struct lock_type *type) 208 { 209 _rw_init_flags_witness(rwl, name, RWLOCK_LO_FLAGS(flags), type); 210 } 211 212 int 213 rw_enter(struct rwlock *rwl, int flags) 214 { 215 const struct rwlock_op *op; 216 struct sleep_state sls; 217 unsigned long inc, o; 218 int error; 219 #ifdef WITNESS 220 int lop_flags; 221 222 lop_flags = LOP_NEWORDER; 223 if (flags & RW_WRITE) 224 lop_flags |= LOP_EXCLUSIVE; 225 if (flags & RW_DUPOK) 226 lop_flags |= LOP_DUPOK; 227 if ((flags & RW_NOSLEEP) == 0 && (flags & RW_DOWNGRADE) == 0) 228 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, lop_flags, NULL); 229 #endif 230 231 op = &rw_ops[(flags & RW_OPMASK) - 1]; 232 233 inc = op->inc + RW_PROC(curproc) * op->proc_mult; 234 retry: 235 while (__predict_false(((o = rwl->rwl_owner) & op->check) != 0)) { 236 unsigned long set = o | op->wait_set; 237 int do_sleep; 238 239 /* Avoid deadlocks after panic or in DDB */ 240 if (panicstr || db_active) 241 return (0); 242 243 rw_enter_diag(rwl, flags); 244 245 if (flags & RW_NOSLEEP) 246 return (EBUSY); 247 248 sleep_setup(&sls, rwl, op->wait_prio, rwl->rwl_name); 249 if (flags & RW_INTR) 250 sleep_setup_signal(&sls, op->wait_prio | PCATCH); 251 252 do_sleep = !rw_cas(&rwl->rwl_owner, o, set); 253 254 sleep_finish(&sls, do_sleep); 255 if ((flags & RW_INTR) && 256 (error = sleep_finish_signal(&sls)) != 0) 257 return (error); 258 if (flags & RW_SLEEPFAIL) 259 return (EAGAIN); 260 } 261 262 if (__predict_false(rw_cas(&rwl->rwl_owner, o, o + inc))) 263 goto retry; 264 membar_enter_after_atomic(); 265 266 /* 267 * If old lock had RWLOCK_WAIT and RWLOCK_WRLOCK set, it means we 268 * downgraded a write lock and had possible read waiter, wake them 269 * to let them retry the lock. 270 */ 271 if (__predict_false((o & (RWLOCK_WRLOCK|RWLOCK_WAIT)) == 272 (RWLOCK_WRLOCK|RWLOCK_WAIT))) 273 wakeup(rwl); 274 275 if (flags & RW_DOWNGRADE) 276 WITNESS_DOWNGRADE(&rwl->rwl_lock_obj, lop_flags); 277 else 278 WITNESS_LOCK(&rwl->rwl_lock_obj, lop_flags); 279 280 return (0); 281 } 282 283 void 284 rw_exit(struct rwlock *rwl) 285 { 286 unsigned long owner = rwl->rwl_owner; 287 int wrlock = owner & RWLOCK_WRLOCK; 288 unsigned long set; 289 290 /* Avoid deadlocks after panic or in DDB */ 291 if (panicstr || db_active) 292 return; 293 294 if (wrlock) 295 rw_assert_wrlock(rwl); 296 else 297 rw_assert_rdlock(rwl); 298 299 WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0); 300 301 membar_exit_before_atomic(); 302 do { 303 owner = rwl->rwl_owner; 304 if (wrlock) 305 set = 0; 306 else 307 set = (owner - RWLOCK_READ_INCR) & 308 ~(RWLOCK_WAIT|RWLOCK_WRWANT); 309 } while (rw_cas(&rwl->rwl_owner, owner, set)); 310 311 if (owner & RWLOCK_WAIT) 312 wakeup(rwl); 313 } 314 315 int 316 rw_status(struct rwlock *rwl) 317 { 318 unsigned long owner = rwl->rwl_owner; 319 320 if (owner & RWLOCK_WRLOCK) { 321 if (RW_PROC(curproc) == RW_PROC(owner)) 322 return RW_WRITE; 323 else 324 return RW_WRITE_OTHER; 325 } 326 if (owner) 327 return RW_READ; 328 return (0); 329 } 330 331 #ifdef DIAGNOSTIC 332 void 333 rw_assert_wrlock(struct rwlock *rwl) 334 { 335 if (panicstr || db_active) 336 return; 337 338 if (!(rwl->rwl_owner & RWLOCK_WRLOCK)) 339 panic("%s: lock not held", rwl->rwl_name); 340 341 if (RWLOCK_OWNER(rwl) != (struct proc *)RW_PROC(curproc)) 342 panic("%s: lock not held by this process", rwl->rwl_name); 343 } 344 345 void 346 rw_assert_rdlock(struct rwlock *rwl) 347 { 348 if (panicstr || db_active) 349 return; 350 351 if (!RWLOCK_OWNER(rwl) || (rwl->rwl_owner & RWLOCK_WRLOCK)) 352 panic("%s: lock not shared", rwl->rwl_name); 353 } 354 355 void 356 rw_assert_anylock(struct rwlock *rwl) 357 { 358 if (panicstr || db_active) 359 return; 360 361 switch (rw_status(rwl)) { 362 case RW_WRITE_OTHER: 363 panic("%s: lock held by different process", rwl->rwl_name); 364 case 0: 365 panic("%s: lock not held", rwl->rwl_name); 366 } 367 } 368 369 void 370 rw_assert_unlocked(struct rwlock *rwl) 371 { 372 if (panicstr || db_active) 373 return; 374 375 if (rwl->rwl_owner != 0L) 376 panic("%s: lock held", rwl->rwl_name); 377 } 378 #endif 379 380 /* recursive rwlocks; */ 381 void 382 _rrw_init_flags(struct rrwlock *rrwl, char *name, int flags, 383 const struct lock_type *type) 384 { 385 memset(rrwl, 0, sizeof(struct rrwlock)); 386 _rw_init_flags_witness(&rrwl->rrwl_lock, name, RRWLOCK_LO_FLAGS(flags), 387 type); 388 } 389 390 int 391 rrw_enter(struct rrwlock *rrwl, int flags) 392 { 393 int rv; 394 395 if (RWLOCK_OWNER(&rrwl->rrwl_lock) == 396 (struct proc *)RW_PROC(curproc)) { 397 if (flags & RW_RECURSEFAIL) 398 return (EDEADLK); 399 else { 400 rrwl->rrwl_wcnt++; 401 WITNESS_LOCK(&rrwl->rrwl_lock.rwl_lock_obj, 402 LOP_EXCLUSIVE); 403 return (0); 404 } 405 } 406 407 rv = rw_enter(&rrwl->rrwl_lock, flags); 408 if (rv == 0) 409 rrwl->rrwl_wcnt = 1; 410 411 return (rv); 412 } 413 414 void 415 rrw_exit(struct rrwlock *rrwl) 416 { 417 418 if (RWLOCK_OWNER(&rrwl->rrwl_lock) == 419 (struct proc *)RW_PROC(curproc)) { 420 KASSERT(rrwl->rrwl_wcnt > 0); 421 rrwl->rrwl_wcnt--; 422 if (rrwl->rrwl_wcnt != 0) { 423 WITNESS_UNLOCK(&rrwl->rrwl_lock.rwl_lock_obj, 424 LOP_EXCLUSIVE); 425 return; 426 } 427 } 428 429 rw_exit(&rrwl->rrwl_lock); 430 } 431 432 int 433 rrw_status(struct rrwlock *rrwl) 434 { 435 return (rw_status(&rrwl->rrwl_lock)); 436 } 437