xref: /openbsd-src/sys/kern/kern_rwlock.c (revision c020cf82e0cc147236f01a8dca7052034cf9d30d)
1 /*	$OpenBSD: kern_rwlock.c,v 1.45 2020/03/02 17:07:49 visa Exp $	*/
2 
3 /*
4  * Copyright (c) 2002, 2003 Artur Grabowski <art@openbsd.org>
5  * Copyright (c) 2011 Thordur Bjornsson <thib@secnorth.net>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/param.h>
21 #include <sys/systm.h>
22 #include <sys/proc.h>
23 #include <sys/rwlock.h>
24 #include <sys/limits.h>
25 #include <sys/atomic.h>
26 #include <sys/witness.h>
27 
28 void	rw_do_exit(struct rwlock *, unsigned long);
29 
30 /* XXX - temporary measure until proc0 is properly aligned */
31 #define RW_PROC(p) (((long)p) & ~RWLOCK_MASK)
32 
33 /*
34  * Other OSes implement more sophisticated mechanism to determine how long the
35  * process attempting to acquire the lock should be spinning. We start with
36  * the most simple approach: we do RW_SPINS attempts at most before eventually
37  * giving up and putting the process to sleep queue.
38  */
39 #define RW_SPINS	1000
40 
41 #ifdef MULTIPROCESSOR
42 #define rw_cas(p, o, n)	(atomic_cas_ulong(p, o, n) != o)
43 #else
44 static inline int
45 rw_cas(volatile unsigned long *p, unsigned long o, unsigned long n)
46 {
47 	if (*p != o)
48 		return (1);
49 	*p = n;
50 
51 	return (0);
52 }
53 #endif
54 
55 /*
56  * Magic wand for lock operations. Every operation checks if certain
57  * flags are set and if they aren't, it increments the lock with some
58  * value (that might need some computing in a few cases). If the operation
59  * fails, we need to set certain flags while waiting for the lock.
60  *
61  * RW_WRITE	The lock must be completely empty. We increment it with
62  *		RWLOCK_WRLOCK and the proc pointer of the holder.
63  *		Sets RWLOCK_WAIT|RWLOCK_WRWANT while waiting.
64  * RW_READ	RWLOCK_WRLOCK|RWLOCK_WRWANT may not be set. We increment
65  *		with RWLOCK_READ_INCR. RWLOCK_WAIT while waiting.
66  */
67 static const struct rwlock_op {
68 	unsigned long inc;
69 	unsigned long check;
70 	unsigned long wait_set;
71 	long proc_mult;
72 	int wait_prio;
73 } rw_ops[] = {
74 	{	/* RW_WRITE */
75 		RWLOCK_WRLOCK,
76 		ULONG_MAX,
77 		RWLOCK_WAIT | RWLOCK_WRWANT,
78 		1,
79 		PLOCK - 4
80 	},
81 	{	/* RW_READ */
82 		RWLOCK_READ_INCR,
83 		RWLOCK_WRLOCK,
84 		RWLOCK_WAIT,
85 		0,
86 		PLOCK
87 	},
88 	{	/* Sparse Entry. */
89 		0,
90 	},
91 	{	/* RW_DOWNGRADE */
92 		RWLOCK_READ_INCR - RWLOCK_WRLOCK,
93 		0,
94 		0,
95 		-1,
96 		PLOCK
97 	},
98 };
99 
100 void
101 rw_enter_read(struct rwlock *rwl)
102 {
103 	unsigned long owner = rwl->rwl_owner;
104 
105 	if (__predict_false((owner & RWLOCK_WRLOCK) ||
106 	    rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR)))
107 		rw_enter(rwl, RW_READ);
108 	else {
109 		membar_enter_after_atomic();
110 		WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, NULL);
111 		WITNESS_LOCK(&rwl->rwl_lock_obj, 0);
112 	}
113 }
114 
115 void
116 rw_enter_write(struct rwlock *rwl)
117 {
118 	struct proc *p = curproc;
119 
120 	if (__predict_false(rw_cas(&rwl->rwl_owner, 0,
121 	    RW_PROC(p) | RWLOCK_WRLOCK)))
122 		rw_enter(rwl, RW_WRITE);
123 	else {
124 		membar_enter_after_atomic();
125 		WITNESS_CHECKORDER(&rwl->rwl_lock_obj,
126 		    LOP_EXCLUSIVE | LOP_NEWORDER, NULL);
127 		WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE);
128 	}
129 }
130 
131 void
132 rw_exit_read(struct rwlock *rwl)
133 {
134 	unsigned long owner;
135 
136 	rw_assert_rdlock(rwl);
137 	WITNESS_UNLOCK(&rwl->rwl_lock_obj, 0);
138 
139 	membar_exit_before_atomic();
140 	owner = rwl->rwl_owner;
141 	if (__predict_false((owner & RWLOCK_WAIT) ||
142 	    rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR)))
143 		rw_do_exit(rwl, 0);
144 }
145 
146 void
147 rw_exit_write(struct rwlock *rwl)
148 {
149 	unsigned long owner;
150 
151 	rw_assert_wrlock(rwl);
152 	WITNESS_UNLOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE);
153 
154 	membar_exit_before_atomic();
155 	owner = rwl->rwl_owner;
156 	if (__predict_false((owner & RWLOCK_WAIT) ||
157 	    rw_cas(&rwl->rwl_owner, owner, 0)))
158 		rw_do_exit(rwl, RWLOCK_WRLOCK);
159 }
160 
161 #ifdef DIAGNOSTIC
162 /*
163  * Put the diagnostic functions here to keep the main code free
164  * from ifdef clutter.
165  */
166 static void
167 rw_enter_diag(struct rwlock *rwl, int flags)
168 {
169 	switch (flags & RW_OPMASK) {
170 	case RW_WRITE:
171 	case RW_READ:
172 		if (RW_PROC(curproc) == RW_PROC(rwl->rwl_owner))
173 			panic("rw_enter: %s locking against myself",
174 			    rwl->rwl_name);
175 		break;
176 	case RW_DOWNGRADE:
177 		/*
178 		 * If we're downgrading, we must hold the write lock.
179 		 */
180 		if ((rwl->rwl_owner & RWLOCK_WRLOCK) == 0)
181 			panic("rw_enter: %s downgrade of non-write lock",
182 			    rwl->rwl_name);
183 		if (RW_PROC(curproc) != RW_PROC(rwl->rwl_owner))
184 			panic("rw_enter: %s downgrade, not holder",
185 			    rwl->rwl_name);
186 		break;
187 
188 	default:
189 		panic("rw_enter: unknown op 0x%x", flags);
190 	}
191 }
192 
193 #else
194 #define rw_enter_diag(r, f)
195 #endif
196 
197 static void
198 _rw_init_flags_witness(struct rwlock *rwl, const char *name, int lo_flags,
199     const struct lock_type *type)
200 {
201 	rwl->rwl_owner = 0;
202 	rwl->rwl_name = name;
203 
204 #ifdef WITNESS
205 	rwl->rwl_lock_obj.lo_flags = lo_flags;
206 	rwl->rwl_lock_obj.lo_name = name;
207 	rwl->rwl_lock_obj.lo_type = type;
208 	WITNESS_INIT(&rwl->rwl_lock_obj, type);
209 #else
210 	(void)type;
211 	(void)lo_flags;
212 #endif
213 }
214 
215 void
216 _rw_init_flags(struct rwlock *rwl, const char *name, int flags,
217     const struct lock_type *type)
218 {
219 	_rw_init_flags_witness(rwl, name, RWLOCK_LO_FLAGS(flags), type);
220 }
221 
222 int
223 rw_enter(struct rwlock *rwl, int flags)
224 {
225 	const struct rwlock_op *op;
226 	struct sleep_state sls;
227 	unsigned long inc, o;
228 #ifdef MULTIPROCESSOR
229 	/*
230 	 * If process holds the kernel lock, then we want to give up on CPU
231 	 * as soon as possible so other processes waiting for the kernel lock
232 	 * can progress. Hence no spinning if we hold the kernel lock.
233 	 */
234 	unsigned int spin = (_kernel_lock_held()) ? 0 : RW_SPINS;
235 #endif
236 	int error, prio;
237 #ifdef WITNESS
238 	int lop_flags;
239 
240 	lop_flags = LOP_NEWORDER;
241 	if (flags & RW_WRITE)
242 		lop_flags |= LOP_EXCLUSIVE;
243 	if (flags & RW_DUPOK)
244 		lop_flags |= LOP_DUPOK;
245 	if ((flags & RW_NOSLEEP) == 0 && (flags & RW_DOWNGRADE) == 0)
246 		WITNESS_CHECKORDER(&rwl->rwl_lock_obj, lop_flags, NULL);
247 #endif
248 
249 	op = &rw_ops[(flags & RW_OPMASK) - 1];
250 
251 	inc = op->inc + RW_PROC(curproc) * op->proc_mult;
252 retry:
253 	while (__predict_false(((o = rwl->rwl_owner) & op->check) != 0)) {
254 		unsigned long set = o | op->wait_set;
255 		int do_sleep;
256 
257 		/* Avoid deadlocks after panic or in DDB */
258 		if (panicstr || db_active)
259 			return (0);
260 
261 #ifdef MULTIPROCESSOR
262 		/*
263 		 * It makes sense to try to spin just in case the lock
264 		 * is acquired by writer.
265 		 */
266 		if ((o & RWLOCK_WRLOCK) && (spin != 0)) {
267 			spin--;
268 			CPU_BUSY_CYCLE();
269 			continue;
270 		}
271 #endif
272 
273 		rw_enter_diag(rwl, flags);
274 
275 		if (flags & RW_NOSLEEP)
276 			return (EBUSY);
277 
278 		prio = op->wait_prio;
279 		if (flags & RW_INTR)
280 			prio |= PCATCH;
281 		sleep_setup(&sls, rwl, prio, rwl->rwl_name);
282 		if (flags & RW_INTR)
283 			sleep_setup_signal(&sls);
284 
285 		do_sleep = !rw_cas(&rwl->rwl_owner, o, set);
286 
287 		sleep_finish(&sls, do_sleep);
288 		if ((flags & RW_INTR) &&
289 		    (error = sleep_finish_signal(&sls)) != 0)
290 			return (error);
291 		if (flags & RW_SLEEPFAIL)
292 			return (EAGAIN);
293 	}
294 
295 	if (__predict_false(rw_cas(&rwl->rwl_owner, o, o + inc)))
296 		goto retry;
297 	membar_enter_after_atomic();
298 
299 	/*
300 	 * If old lock had RWLOCK_WAIT and RWLOCK_WRLOCK set, it means we
301 	 * downgraded a write lock and had possible read waiter, wake them
302 	 * to let them retry the lock.
303 	 */
304 	if (__predict_false((o & (RWLOCK_WRLOCK|RWLOCK_WAIT)) ==
305 	    (RWLOCK_WRLOCK|RWLOCK_WAIT)))
306 		wakeup(rwl);
307 
308 	if (flags & RW_DOWNGRADE)
309 		WITNESS_DOWNGRADE(&rwl->rwl_lock_obj, lop_flags);
310 	else
311 		WITNESS_LOCK(&rwl->rwl_lock_obj, lop_flags);
312 
313 	return (0);
314 }
315 
316 void
317 rw_exit(struct rwlock *rwl)
318 {
319 	unsigned long wrlock;
320 
321 	/* Avoid deadlocks after panic or in DDB */
322 	if (panicstr || db_active)
323 		return;
324 
325 	wrlock = rwl->rwl_owner & RWLOCK_WRLOCK;
326 	if (wrlock)
327 		rw_assert_wrlock(rwl);
328 	else
329 		rw_assert_rdlock(rwl);
330 	WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0);
331 
332 	membar_exit_before_atomic();
333 	rw_do_exit(rwl, wrlock);
334 }
335 
336 /* membar_exit_before_atomic() has to precede call of this function. */
337 void
338 rw_do_exit(struct rwlock *rwl, unsigned long wrlock)
339 {
340 	unsigned long owner, set;
341 
342 	do {
343 		owner = rwl->rwl_owner;
344 		if (wrlock)
345 			set = 0;
346 		else
347 			set = (owner - RWLOCK_READ_INCR) &
348 				~(RWLOCK_WAIT|RWLOCK_WRWANT);
349 	} while (__predict_false(rw_cas(&rwl->rwl_owner, owner, set)));
350 
351 	if (owner & RWLOCK_WAIT)
352 		wakeup(rwl);
353 }
354 
355 int
356 rw_status(struct rwlock *rwl)
357 {
358 	unsigned long owner = rwl->rwl_owner;
359 
360 	if (owner & RWLOCK_WRLOCK) {
361 		if (RW_PROC(curproc) == RW_PROC(owner))
362 			return RW_WRITE;
363 		else
364 			return RW_WRITE_OTHER;
365 	}
366 	if (owner)
367 		return RW_READ;
368 	return (0);
369 }
370 
371 #ifdef DIAGNOSTIC
372 void
373 rw_assert_wrlock(struct rwlock *rwl)
374 {
375 	if (panicstr || db_active)
376 		return;
377 
378 #ifdef WITNESS
379 	witness_assert(&rwl->rwl_lock_obj, LA_XLOCKED);
380 #else
381 	if (!(rwl->rwl_owner & RWLOCK_WRLOCK))
382 		panic("%s: lock not held", rwl->rwl_name);
383 
384 	if (RW_PROC(curproc) != RW_PROC(rwl->rwl_owner))
385 		panic("%s: lock not held by this process", rwl->rwl_name);
386 #endif
387 }
388 
389 void
390 rw_assert_rdlock(struct rwlock *rwl)
391 {
392 	if (panicstr || db_active)
393 		return;
394 
395 #ifdef WITNESS
396 	witness_assert(&rwl->rwl_lock_obj, LA_SLOCKED);
397 #else
398 	if (!RW_PROC(rwl->rwl_owner) || (rwl->rwl_owner & RWLOCK_WRLOCK))
399 		panic("%s: lock not shared", rwl->rwl_name);
400 #endif
401 }
402 
403 void
404 rw_assert_anylock(struct rwlock *rwl)
405 {
406 	if (panicstr || db_active)
407 		return;
408 
409 #ifdef WITNESS
410 	witness_assert(&rwl->rwl_lock_obj, LA_LOCKED);
411 #else
412 	switch (rw_status(rwl)) {
413 	case RW_WRITE_OTHER:
414 		panic("%s: lock held by different process", rwl->rwl_name);
415 	case 0:
416 		panic("%s: lock not held", rwl->rwl_name);
417 	}
418 #endif
419 }
420 
421 void
422 rw_assert_unlocked(struct rwlock *rwl)
423 {
424 	if (panicstr || db_active)
425 		return;
426 
427 #ifdef WITNESS
428 	witness_assert(&rwl->rwl_lock_obj, LA_UNLOCKED);
429 #else
430 	if (RW_PROC(curproc) == RW_PROC(rwl->rwl_owner))
431 		panic("%s: lock held", rwl->rwl_name);
432 #endif
433 }
434 #endif
435 
436 /* recursive rwlocks; */
437 void
438 _rrw_init_flags(struct rrwlock *rrwl, const char *name, int flags,
439     const struct lock_type *type)
440 {
441 	memset(rrwl, 0, sizeof(struct rrwlock));
442 	_rw_init_flags_witness(&rrwl->rrwl_lock, name, RRWLOCK_LO_FLAGS(flags),
443 	    type);
444 }
445 
446 int
447 rrw_enter(struct rrwlock *rrwl, int flags)
448 {
449 	int	rv;
450 
451 	if (RW_PROC(rrwl->rrwl_lock.rwl_owner) == RW_PROC(curproc)) {
452 		if (flags & RW_RECURSEFAIL)
453 			return (EDEADLK);
454 		else {
455 			rrwl->rrwl_wcnt++;
456 			WITNESS_LOCK(&rrwl->rrwl_lock.rwl_lock_obj,
457 			    LOP_EXCLUSIVE);
458 			return (0);
459 		}
460 	}
461 
462 	rv = rw_enter(&rrwl->rrwl_lock, flags);
463 	if (rv == 0)
464 		rrwl->rrwl_wcnt = 1;
465 
466 	return (rv);
467 }
468 
469 void
470 rrw_exit(struct rrwlock *rrwl)
471 {
472 
473 	if (RW_PROC(rrwl->rrwl_lock.rwl_owner) == RW_PROC(curproc)) {
474 		KASSERT(rrwl->rrwl_wcnt > 0);
475 		rrwl->rrwl_wcnt--;
476 		if (rrwl->rrwl_wcnt != 0) {
477 			WITNESS_UNLOCK(&rrwl->rrwl_lock.rwl_lock_obj,
478 			    LOP_EXCLUSIVE);
479 			return;
480 		}
481 	}
482 
483 	rw_exit(&rrwl->rrwl_lock);
484 }
485 
486 int
487 rrw_status(struct rrwlock *rrwl)
488 {
489 	return (rw_status(&rrwl->rrwl_lock));
490 }
491