1 /* $OpenBSD: kern_rwlock.c,v 1.33 2017/12/18 10:05:43 mpi Exp $ */ 2 3 /* 4 * Copyright (c) 2002, 2003 Artur Grabowski <art@openbsd.org> 5 * Copyright (c) 2011 Thordur Bjornsson <thib@secnorth.net> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/proc.h> 23 #include <sys/rwlock.h> 24 #include <sys/limits.h> 25 #include <sys/atomic.h> 26 #include <sys/witness.h> 27 28 /* XXX - temporary measure until proc0 is properly aligned */ 29 #define RW_PROC(p) (((long)p) & ~RWLOCK_MASK) 30 31 #ifdef MULTIPROCESSOR 32 #define rw_cas(p, o, n) (atomic_cas_ulong(p, o, n) != o) 33 #else 34 static inline int 35 rw_cas(volatile unsigned long *p, unsigned long o, unsigned long n) 36 { 37 if (*p != o) 38 return (1); 39 *p = n; 40 41 return (0); 42 } 43 #endif 44 45 /* 46 * Magic wand for lock operations. Every operation checks if certain 47 * flags are set and if they aren't, it increments the lock with some 48 * value (that might need some computing in a few cases). If the operation 49 * fails, we need to set certain flags while waiting for the lock. 50 * 51 * RW_WRITE The lock must be completely empty. We increment it with 52 * RWLOCK_WRLOCK and the proc pointer of the holder. 53 * Sets RWLOCK_WAIT|RWLOCK_WRWANT while waiting. 54 * RW_READ RWLOCK_WRLOCK|RWLOCK_WRWANT may not be set. We increment 55 * with RWLOCK_READ_INCR. RWLOCK_WAIT while waiting. 56 */ 57 static const struct rwlock_op { 58 unsigned long inc; 59 unsigned long check; 60 unsigned long wait_set; 61 long proc_mult; 62 int wait_prio; 63 } rw_ops[] = { 64 { /* RW_WRITE */ 65 RWLOCK_WRLOCK, 66 ULONG_MAX, 67 RWLOCK_WAIT | RWLOCK_WRWANT, 68 1, 69 PLOCK - 4 70 }, 71 { /* RW_READ */ 72 RWLOCK_READ_INCR, 73 RWLOCK_WRLOCK, 74 RWLOCK_WAIT, 75 0, 76 PLOCK 77 }, 78 { /* Sparse Entry. */ 79 0, 80 }, 81 { /* RW_DOWNGRADE */ 82 RWLOCK_READ_INCR - RWLOCK_WRLOCK, 83 0, 84 0, 85 -1, 86 PLOCK 87 }, 88 }; 89 90 void 91 _rw_enter_read(struct rwlock *rwl LOCK_FL_VARS) 92 { 93 unsigned long owner = rwl->rwl_owner; 94 95 if (__predict_false((owner & RWLOCK_WRLOCK) || 96 rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR))) 97 _rw_enter(rwl, RW_READ LOCK_FL_ARGS); 98 else { 99 membar_enter_after_atomic(); 100 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, file, line, 101 NULL); 102 WITNESS_LOCK(&rwl->rwl_lock_obj, 0, file, line); 103 } 104 } 105 106 void 107 _rw_enter_write(struct rwlock *rwl LOCK_FL_VARS) 108 { 109 struct proc *p = curproc; 110 111 if (__predict_false(rw_cas(&rwl->rwl_owner, 0, 112 RW_PROC(p) | RWLOCK_WRLOCK))) 113 _rw_enter(rwl, RW_WRITE LOCK_FL_ARGS); 114 else { 115 membar_enter_after_atomic(); 116 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, 117 LOP_EXCLUSIVE | LOP_NEWORDER, file, line, NULL); 118 WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE, file, line); 119 } 120 } 121 122 void 123 _rw_exit_read(struct rwlock *rwl LOCK_FL_VARS) 124 { 125 unsigned long owner = rwl->rwl_owner; 126 127 rw_assert_rdlock(rwl); 128 129 membar_exit_before_atomic(); 130 if (__predict_false((owner & RWLOCK_WAIT) || 131 rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR))) 132 _rw_exit(rwl LOCK_FL_ARGS); 133 else 134 WITNESS_UNLOCK(&rwl->rwl_lock_obj, 0, file, line); 135 } 136 137 void 138 _rw_exit_write(struct rwlock *rwl LOCK_FL_VARS) 139 { 140 unsigned long owner = rwl->rwl_owner; 141 142 rw_assert_wrlock(rwl); 143 144 membar_exit_before_atomic(); 145 if (__predict_false((owner & RWLOCK_WAIT) || 146 rw_cas(&rwl->rwl_owner, owner, 0))) 147 _rw_exit(rwl LOCK_FL_ARGS); 148 else 149 WITNESS_UNLOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE, file, line); 150 } 151 152 #ifdef DIAGNOSTIC 153 /* 154 * Put the diagnostic functions here to keep the main code free 155 * from ifdef clutter. 156 */ 157 static void 158 rw_enter_diag(struct rwlock *rwl, int flags) 159 { 160 switch (flags & RW_OPMASK) { 161 case RW_WRITE: 162 case RW_READ: 163 if (RW_PROC(curproc) == RW_PROC(rwl->rwl_owner)) 164 panic("rw_enter: %s locking against myself", 165 rwl->rwl_name); 166 break; 167 case RW_DOWNGRADE: 168 /* 169 * If we're downgrading, we must hold the write lock. 170 */ 171 if ((rwl->rwl_owner & RWLOCK_WRLOCK) == 0) 172 panic("rw_enter: %s downgrade of non-write lock", 173 rwl->rwl_name); 174 if (RW_PROC(curproc) != RW_PROC(rwl->rwl_owner)) 175 panic("rw_enter: %s downgrade, not holder", 176 rwl->rwl_name); 177 break; 178 179 default: 180 panic("rw_enter: unknown op 0x%x", flags); 181 } 182 } 183 184 #else 185 #define rw_enter_diag(r, f) 186 #endif 187 188 static void 189 _rw_init_flags_witness(struct rwlock *rwl, const char *name, int lo_flags, 190 struct lock_type *type) 191 { 192 rwl->rwl_owner = 0; 193 rwl->rwl_name = name; 194 195 #ifdef WITNESS 196 rwl->rwl_lock_obj.lo_flags = lo_flags; 197 rwl->rwl_lock_obj.lo_name = name; 198 rwl->rwl_lock_obj.lo_type = type; 199 WITNESS_INIT(&rwl->rwl_lock_obj, type); 200 #else 201 (void)type; 202 (void)lo_flags; 203 #endif 204 } 205 206 void 207 _rw_init_flags(struct rwlock *rwl, const char *name, int flags, 208 struct lock_type *type) 209 { 210 _rw_init_flags_witness(rwl, name, RWLOCK_LO_FLAGS(flags), type); 211 } 212 213 int 214 _rw_enter(struct rwlock *rwl, int flags LOCK_FL_VARS) 215 { 216 const struct rwlock_op *op; 217 struct sleep_state sls; 218 unsigned long inc, o; 219 int error; 220 #ifdef WITNESS 221 int lop_flags; 222 223 lop_flags = LOP_NEWORDER; 224 if (flags & RW_WRITE) 225 lop_flags |= LOP_EXCLUSIVE; 226 if ((flags & RW_NOSLEEP) == 0 && (flags & RW_DOWNGRADE) == 0) 227 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, lop_flags, file, line, 228 NULL); 229 #endif 230 231 op = &rw_ops[(flags & RW_OPMASK) - 1]; 232 233 inc = op->inc + RW_PROC(curproc) * op->proc_mult; 234 retry: 235 while (__predict_false(((o = rwl->rwl_owner) & op->check) != 0)) { 236 unsigned long set = o | op->wait_set; 237 int do_sleep; 238 239 /* Avoid deadlocks after panic */ 240 if (panicstr) 241 return (0); 242 243 rw_enter_diag(rwl, flags); 244 245 if (flags & RW_NOSLEEP) 246 return (EBUSY); 247 248 sleep_setup(&sls, rwl, op->wait_prio, rwl->rwl_name); 249 if (flags & RW_INTR) 250 sleep_setup_signal(&sls, op->wait_prio | PCATCH); 251 252 do_sleep = !rw_cas(&rwl->rwl_owner, o, set); 253 254 sleep_finish(&sls, do_sleep); 255 if ((flags & RW_INTR) && 256 (error = sleep_finish_signal(&sls)) != 0) 257 return (error); 258 if (flags & RW_SLEEPFAIL) 259 return (EAGAIN); 260 } 261 262 if (__predict_false(rw_cas(&rwl->rwl_owner, o, o + inc))) 263 goto retry; 264 membar_enter_after_atomic(); 265 266 /* 267 * If old lock had RWLOCK_WAIT and RWLOCK_WRLOCK set, it means we 268 * downgraded a write lock and had possible read waiter, wake them 269 * to let them retry the lock. 270 */ 271 if (__predict_false((o & (RWLOCK_WRLOCK|RWLOCK_WAIT)) == 272 (RWLOCK_WRLOCK|RWLOCK_WAIT))) 273 wakeup(rwl); 274 275 if (flags & RW_DOWNGRADE) 276 WITNESS_DOWNGRADE(&rwl->rwl_lock_obj, lop_flags, file, line); 277 else 278 WITNESS_LOCK(&rwl->rwl_lock_obj, lop_flags, file, line); 279 280 return (0); 281 } 282 283 void 284 _rw_exit(struct rwlock *rwl LOCK_FL_VARS) 285 { 286 unsigned long owner = rwl->rwl_owner; 287 int wrlock = owner & RWLOCK_WRLOCK; 288 unsigned long set; 289 290 /* Avoid deadlocks after panic */ 291 if (panicstr) 292 return; 293 294 if (wrlock) 295 rw_assert_wrlock(rwl); 296 else 297 rw_assert_rdlock(rwl); 298 299 WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0, 300 file, line); 301 302 membar_exit_before_atomic(); 303 do { 304 owner = rwl->rwl_owner; 305 if (wrlock) 306 set = 0; 307 else 308 set = (owner - RWLOCK_READ_INCR) & 309 ~(RWLOCK_WAIT|RWLOCK_WRWANT); 310 } while (rw_cas(&rwl->rwl_owner, owner, set)); 311 312 if (owner & RWLOCK_WAIT) 313 wakeup(rwl); 314 } 315 316 int 317 rw_status(struct rwlock *rwl) 318 { 319 unsigned long owner = rwl->rwl_owner; 320 321 if (owner & RWLOCK_WRLOCK) { 322 if (RW_PROC(curproc) == RW_PROC(owner)) 323 return RW_WRITE; 324 else 325 return RW_WRITE_OTHER; 326 } 327 if (owner) 328 return RW_READ; 329 return (0); 330 } 331 332 #ifdef DIAGNOSTIC 333 void 334 rw_assert_wrlock(struct rwlock *rwl) 335 { 336 if (!(rwl->rwl_owner & RWLOCK_WRLOCK)) 337 panic("%s: lock not held", rwl->rwl_name); 338 339 if (RWLOCK_OWNER(rwl) != (struct proc *)RW_PROC(curproc)) 340 panic("%s: lock not held by this process", rwl->rwl_name); 341 } 342 343 void 344 rw_assert_rdlock(struct rwlock *rwl) 345 { 346 if (!RWLOCK_OWNER(rwl) || (rwl->rwl_owner & RWLOCK_WRLOCK)) 347 panic("%s: lock not shared", rwl->rwl_name); 348 } 349 350 void 351 rw_assert_anylock(struct rwlock *rwl) 352 { 353 switch (rw_status(rwl)) { 354 case RW_WRITE_OTHER: 355 panic("%s: lock held by different process", rwl->rwl_name); 356 case 0: 357 panic("%s: lock not held", rwl->rwl_name); 358 } 359 } 360 361 void 362 rw_assert_unlocked(struct rwlock *rwl) 363 { 364 if (rwl->rwl_owner != 0L) 365 panic("%s: lock held", rwl->rwl_name); 366 } 367 #endif 368 369 /* recursive rwlocks; */ 370 void 371 _rrw_init_flags(struct rrwlock *rrwl, char *name, int flags, 372 struct lock_type *type) 373 { 374 memset(rrwl, 0, sizeof(struct rrwlock)); 375 _rw_init_flags_witness(&rrwl->rrwl_lock, name, RRWLOCK_LO_FLAGS(flags), 376 type); 377 } 378 379 int 380 _rrw_enter(struct rrwlock *rrwl, int flags LOCK_FL_VARS) 381 { 382 int rv; 383 384 if (RWLOCK_OWNER(&rrwl->rrwl_lock) == 385 (struct proc *)RW_PROC(curproc)) { 386 if (flags & RW_RECURSEFAIL) 387 return (EDEADLK); 388 else { 389 rrwl->rrwl_wcnt++; 390 WITNESS_LOCK(&rrwl->rrwl_lock.rwl_lock_obj, 391 LOP_EXCLUSIVE, file, line); 392 return (0); 393 } 394 } 395 396 rv = _rw_enter(&rrwl->rrwl_lock, flags LOCK_FL_ARGS); 397 if (rv == 0) 398 rrwl->rrwl_wcnt = 1; 399 400 return (rv); 401 } 402 403 void 404 _rrw_exit(struct rrwlock *rrwl LOCK_FL_VARS) 405 { 406 407 if (RWLOCK_OWNER(&rrwl->rrwl_lock) == 408 (struct proc *)RW_PROC(curproc)) { 409 KASSERT(rrwl->rrwl_wcnt > 0); 410 rrwl->rrwl_wcnt--; 411 if (rrwl->rrwl_wcnt != 0) { 412 WITNESS_UNLOCK(&rrwl->rrwl_lock.rwl_lock_obj, 413 LOP_EXCLUSIVE, file, line); 414 return; 415 } 416 } 417 418 _rw_exit(&rrwl->rrwl_lock LOCK_FL_ARGS); 419 } 420 421 int 422 rrw_status(struct rrwlock *rrwl) 423 { 424 return (rw_status(&rrwl->rrwl_lock)); 425 } 426