xref: /openbsd-src/sys/kern/kern_rwlock.c (revision 7350f337b9e3eb4461d99580e625c7ef148d107c)
1 /*	$OpenBSD: kern_rwlock.c,v 1.39 2019/05/11 17:45:59 sashan Exp $	*/
2 
3 /*
4  * Copyright (c) 2002, 2003 Artur Grabowski <art@openbsd.org>
5  * Copyright (c) 2011 Thordur Bjornsson <thib@secnorth.net>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/param.h>
21 #include <sys/systm.h>
22 #include <sys/proc.h>
23 #include <sys/rwlock.h>
24 #include <sys/limits.h>
25 #include <sys/atomic.h>
26 #include <sys/witness.h>
27 
28 /* XXX - temporary measure until proc0 is properly aligned */
29 #define RW_PROC(p) (((long)p) & ~RWLOCK_MASK)
30 
31 /*
32  * Other OSes implement more sophisticated mechanism to determine how long the
33  * process attempting to acquire the lock should be spinning. We start with
34  * the most simple approach: we do RW_SPINS attempts at most before eventually
35  * giving up and putting the process to sleep queue.
36  */
37 #define RW_SPINS	1000
38 
39 #ifdef MULTIPROCESSOR
40 #define rw_cas(p, o, n)	(atomic_cas_ulong(p, o, n) != o)
41 #else
42 static inline int
43 rw_cas(volatile unsigned long *p, unsigned long o, unsigned long n)
44 {
45 	if (*p != o)
46 		return (1);
47 	*p = n;
48 
49 	return (0);
50 }
51 #endif
52 
53 /*
54  * Magic wand for lock operations. Every operation checks if certain
55  * flags are set and if they aren't, it increments the lock with some
56  * value (that might need some computing in a few cases). If the operation
57  * fails, we need to set certain flags while waiting for the lock.
58  *
59  * RW_WRITE	The lock must be completely empty. We increment it with
60  *		RWLOCK_WRLOCK and the proc pointer of the holder.
61  *		Sets RWLOCK_WAIT|RWLOCK_WRWANT while waiting.
62  * RW_READ	RWLOCK_WRLOCK|RWLOCK_WRWANT may not be set. We increment
63  *		with RWLOCK_READ_INCR. RWLOCK_WAIT while waiting.
64  */
65 static const struct rwlock_op {
66 	unsigned long inc;
67 	unsigned long check;
68 	unsigned long wait_set;
69 	long proc_mult;
70 	int wait_prio;
71 } rw_ops[] = {
72 	{	/* RW_WRITE */
73 		RWLOCK_WRLOCK,
74 		ULONG_MAX,
75 		RWLOCK_WAIT | RWLOCK_WRWANT,
76 		1,
77 		PLOCK - 4
78 	},
79 	{	/* RW_READ */
80 		RWLOCK_READ_INCR,
81 		RWLOCK_WRLOCK,
82 		RWLOCK_WAIT,
83 		0,
84 		PLOCK
85 	},
86 	{	/* Sparse Entry. */
87 		0,
88 	},
89 	{	/* RW_DOWNGRADE */
90 		RWLOCK_READ_INCR - RWLOCK_WRLOCK,
91 		0,
92 		0,
93 		-1,
94 		PLOCK
95 	},
96 };
97 
98 void
99 rw_enter_read(struct rwlock *rwl)
100 {
101 	unsigned long owner = rwl->rwl_owner;
102 
103 	if (__predict_false((owner & RWLOCK_WRLOCK) ||
104 	    rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR)))
105 		rw_enter(rwl, RW_READ);
106 	else {
107 		membar_enter_after_atomic();
108 		WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, NULL);
109 		WITNESS_LOCK(&rwl->rwl_lock_obj, 0);
110 	}
111 }
112 
113 void
114 rw_enter_write(struct rwlock *rwl)
115 {
116 	struct proc *p = curproc;
117 
118 	if (__predict_false(rw_cas(&rwl->rwl_owner, 0,
119 	    RW_PROC(p) | RWLOCK_WRLOCK)))
120 		rw_enter(rwl, RW_WRITE);
121 	else {
122 		membar_enter_after_atomic();
123 		WITNESS_CHECKORDER(&rwl->rwl_lock_obj,
124 		    LOP_EXCLUSIVE | LOP_NEWORDER, NULL);
125 		WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE);
126 	}
127 }
128 
129 void
130 rw_exit_read(struct rwlock *rwl)
131 {
132 	unsigned long owner = rwl->rwl_owner;
133 
134 	rw_assert_rdlock(rwl);
135 
136 	membar_exit_before_atomic();
137 	if (__predict_false((owner & RWLOCK_WAIT) ||
138 	    rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR)))
139 		rw_exit(rwl);
140 	else
141 		WITNESS_UNLOCK(&rwl->rwl_lock_obj, 0);
142 }
143 
144 void
145 rw_exit_write(struct rwlock *rwl)
146 {
147 	unsigned long owner = rwl->rwl_owner;
148 
149 	rw_assert_wrlock(rwl);
150 
151 	membar_exit_before_atomic();
152 	if (__predict_false((owner & RWLOCK_WAIT) ||
153 	    rw_cas(&rwl->rwl_owner, owner, 0)))
154 		rw_exit(rwl);
155 	else
156 		WITNESS_UNLOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE);
157 }
158 
159 #ifdef DIAGNOSTIC
160 /*
161  * Put the diagnostic functions here to keep the main code free
162  * from ifdef clutter.
163  */
164 static void
165 rw_enter_diag(struct rwlock *rwl, int flags)
166 {
167 	switch (flags & RW_OPMASK) {
168 	case RW_WRITE:
169 	case RW_READ:
170 		if (RW_PROC(curproc) == RW_PROC(rwl->rwl_owner))
171 			panic("rw_enter: %s locking against myself",
172 			    rwl->rwl_name);
173 		break;
174 	case RW_DOWNGRADE:
175 		/*
176 		 * If we're downgrading, we must hold the write lock.
177 		 */
178 		if ((rwl->rwl_owner & RWLOCK_WRLOCK) == 0)
179 			panic("rw_enter: %s downgrade of non-write lock",
180 			    rwl->rwl_name);
181 		if (RW_PROC(curproc) != RW_PROC(rwl->rwl_owner))
182 			panic("rw_enter: %s downgrade, not holder",
183 			    rwl->rwl_name);
184 		break;
185 
186 	default:
187 		panic("rw_enter: unknown op 0x%x", flags);
188 	}
189 }
190 
191 #else
192 #define rw_enter_diag(r, f)
193 #endif
194 
195 static void
196 _rw_init_flags_witness(struct rwlock *rwl, const char *name, int lo_flags,
197     const struct lock_type *type)
198 {
199 	rwl->rwl_owner = 0;
200 	rwl->rwl_name = name;
201 
202 #ifdef WITNESS
203 	rwl->rwl_lock_obj.lo_flags = lo_flags;
204 	rwl->rwl_lock_obj.lo_name = name;
205 	rwl->rwl_lock_obj.lo_type = type;
206 	WITNESS_INIT(&rwl->rwl_lock_obj, type);
207 #else
208 	(void)type;
209 	(void)lo_flags;
210 #endif
211 }
212 
213 void
214 _rw_init_flags(struct rwlock *rwl, const char *name, int flags,
215     const struct lock_type *type)
216 {
217 	_rw_init_flags_witness(rwl, name, RWLOCK_LO_FLAGS(flags), type);
218 }
219 
220 int
221 rw_enter(struct rwlock *rwl, int flags)
222 {
223 	const struct rwlock_op *op;
224 	struct sleep_state sls;
225 	unsigned long inc, o;
226 #ifdef MULTIPROCESSOR
227 	/*
228 	 * If process holds the kernel lock, then we want to give up on CPU
229 	 * as soon as possible so other processes waiting for the kernel lock
230 	 * can progress. Hence no spinning if we hold the kernel lock.
231 	 */
232 	unsigned int spin = (_kernel_lock_held()) ? 0 : RW_SPINS;
233 #endif
234 	int error;
235 #ifdef WITNESS
236 	int lop_flags;
237 
238 	lop_flags = LOP_NEWORDER;
239 	if (flags & RW_WRITE)
240 		lop_flags |= LOP_EXCLUSIVE;
241 	if (flags & RW_DUPOK)
242 		lop_flags |= LOP_DUPOK;
243 	if ((flags & RW_NOSLEEP) == 0 && (flags & RW_DOWNGRADE) == 0)
244 		WITNESS_CHECKORDER(&rwl->rwl_lock_obj, lop_flags, NULL);
245 #endif
246 
247 	op = &rw_ops[(flags & RW_OPMASK) - 1];
248 
249 	inc = op->inc + RW_PROC(curproc) * op->proc_mult;
250 retry:
251 	while (__predict_false(((o = rwl->rwl_owner) & op->check) != 0)) {
252 		unsigned long set = o | op->wait_set;
253 		int do_sleep;
254 
255 		/* Avoid deadlocks after panic or in DDB */
256 		if (panicstr || db_active)
257 			return (0);
258 
259 #ifdef MULTIPROCESSOR
260 		/*
261 		 * It makes sense to try to spin just in case the lock
262 		 * is acquired by writer.
263 		 */
264 		if ((o & RWLOCK_WRLOCK) && (spin != 0)) {
265 			spin--;
266 			CPU_BUSY_CYCLE();
267 			continue;
268 		}
269 #endif
270 
271 		rw_enter_diag(rwl, flags);
272 
273 		if (flags & RW_NOSLEEP)
274 			return (EBUSY);
275 
276 		sleep_setup(&sls, rwl, op->wait_prio, rwl->rwl_name);
277 		if (flags & RW_INTR)
278 			sleep_setup_signal(&sls, op->wait_prio | PCATCH);
279 
280 		do_sleep = !rw_cas(&rwl->rwl_owner, o, set);
281 
282 		sleep_finish(&sls, do_sleep);
283 		if ((flags & RW_INTR) &&
284 		    (error = sleep_finish_signal(&sls)) != 0)
285 			return (error);
286 		if (flags & RW_SLEEPFAIL)
287 			return (EAGAIN);
288 	}
289 
290 	if (__predict_false(rw_cas(&rwl->rwl_owner, o, o + inc)))
291 		goto retry;
292 	membar_enter_after_atomic();
293 
294 	/*
295 	 * If old lock had RWLOCK_WAIT and RWLOCK_WRLOCK set, it means we
296 	 * downgraded a write lock and had possible read waiter, wake them
297 	 * to let them retry the lock.
298 	 */
299 	if (__predict_false((o & (RWLOCK_WRLOCK|RWLOCK_WAIT)) ==
300 	    (RWLOCK_WRLOCK|RWLOCK_WAIT)))
301 		wakeup(rwl);
302 
303 	if (flags & RW_DOWNGRADE)
304 		WITNESS_DOWNGRADE(&rwl->rwl_lock_obj, lop_flags);
305 	else
306 		WITNESS_LOCK(&rwl->rwl_lock_obj, lop_flags);
307 
308 	return (0);
309 }
310 
311 void
312 rw_exit(struct rwlock *rwl)
313 {
314 	unsigned long owner = rwl->rwl_owner;
315 	int wrlock = owner & RWLOCK_WRLOCK;
316 	unsigned long set;
317 
318 	/* Avoid deadlocks after panic or in DDB */
319 	if (panicstr || db_active)
320 		return;
321 
322 	if (wrlock)
323 		rw_assert_wrlock(rwl);
324 	else
325 		rw_assert_rdlock(rwl);
326 
327 	WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0);
328 
329 	membar_exit_before_atomic();
330 	do {
331 		owner = rwl->rwl_owner;
332 		if (wrlock)
333 			set = 0;
334 		else
335 			set = (owner - RWLOCK_READ_INCR) &
336 				~(RWLOCK_WAIT|RWLOCK_WRWANT);
337 	} while (rw_cas(&rwl->rwl_owner, owner, set));
338 
339 	if (owner & RWLOCK_WAIT)
340 		wakeup(rwl);
341 }
342 
343 int
344 rw_status(struct rwlock *rwl)
345 {
346 	unsigned long owner = rwl->rwl_owner;
347 
348 	if (owner & RWLOCK_WRLOCK) {
349 		if (RW_PROC(curproc) == RW_PROC(owner))
350 			return RW_WRITE;
351 		else
352 			return RW_WRITE_OTHER;
353 	}
354 	if (owner)
355 		return RW_READ;
356 	return (0);
357 }
358 
359 #ifdef DIAGNOSTIC
360 void
361 rw_assert_wrlock(struct rwlock *rwl)
362 {
363 	if (panicstr || db_active)
364 		return;
365 
366 	if (!(rwl->rwl_owner & RWLOCK_WRLOCK))
367 		panic("%s: lock not held", rwl->rwl_name);
368 
369 	if (RWLOCK_OWNER(rwl) != (struct proc *)RW_PROC(curproc))
370 		panic("%s: lock not held by this process", rwl->rwl_name);
371 }
372 
373 void
374 rw_assert_rdlock(struct rwlock *rwl)
375 {
376 	if (panicstr || db_active)
377 		return;
378 
379 	if (!RWLOCK_OWNER(rwl) || (rwl->rwl_owner & RWLOCK_WRLOCK))
380 		panic("%s: lock not shared", rwl->rwl_name);
381 }
382 
383 void
384 rw_assert_anylock(struct rwlock *rwl)
385 {
386 	if (panicstr || db_active)
387 		return;
388 
389 	switch (rw_status(rwl)) {
390 	case RW_WRITE_OTHER:
391 		panic("%s: lock held by different process", rwl->rwl_name);
392 	case 0:
393 		panic("%s: lock not held", rwl->rwl_name);
394 	}
395 }
396 
397 void
398 rw_assert_unlocked(struct rwlock *rwl)
399 {
400 	if (panicstr || db_active)
401 		return;
402 
403 	if (rwl->rwl_owner != 0L)
404 		panic("%s: lock held", rwl->rwl_name);
405 }
406 #endif
407 
408 /* recursive rwlocks; */
409 void
410 _rrw_init_flags(struct rrwlock *rrwl, char *name, int flags,
411     const struct lock_type *type)
412 {
413 	memset(rrwl, 0, sizeof(struct rrwlock));
414 	_rw_init_flags_witness(&rrwl->rrwl_lock, name, RRWLOCK_LO_FLAGS(flags),
415 	    type);
416 }
417 
418 int
419 rrw_enter(struct rrwlock *rrwl, int flags)
420 {
421 	int	rv;
422 
423 	if (RWLOCK_OWNER(&rrwl->rrwl_lock) ==
424 	    (struct proc *)RW_PROC(curproc)) {
425 		if (flags & RW_RECURSEFAIL)
426 			return (EDEADLK);
427 		else {
428 			rrwl->rrwl_wcnt++;
429 			WITNESS_LOCK(&rrwl->rrwl_lock.rwl_lock_obj,
430 			    LOP_EXCLUSIVE);
431 			return (0);
432 		}
433 	}
434 
435 	rv = rw_enter(&rrwl->rrwl_lock, flags);
436 	if (rv == 0)
437 		rrwl->rrwl_wcnt = 1;
438 
439 	return (rv);
440 }
441 
442 void
443 rrw_exit(struct rrwlock *rrwl)
444 {
445 
446 	if (RWLOCK_OWNER(&rrwl->rrwl_lock) ==
447 	    (struct proc *)RW_PROC(curproc)) {
448 		KASSERT(rrwl->rrwl_wcnt > 0);
449 		rrwl->rrwl_wcnt--;
450 		if (rrwl->rrwl_wcnt != 0) {
451 			WITNESS_UNLOCK(&rrwl->rrwl_lock.rwl_lock_obj,
452 			    LOP_EXCLUSIVE);
453 			return;
454 		}
455 	}
456 
457 	rw_exit(&rrwl->rrwl_lock);
458 }
459 
460 int
461 rrw_status(struct rrwlock *rrwl)
462 {
463 	return (rw_status(&rrwl->rrwl_lock));
464 }
465