xref: /openbsd-src/sys/kern/kern_rwlock.c (revision 4b70baf6e17fc8b27fc1f7fa7929335753fa94c3)
1 /*	$OpenBSD: kern_rwlock.c,v 1.38 2019/04/23 13:35:12 visa Exp $	*/
2 
3 /*
4  * Copyright (c) 2002, 2003 Artur Grabowski <art@openbsd.org>
5  * Copyright (c) 2011 Thordur Bjornsson <thib@secnorth.net>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/param.h>
21 #include <sys/systm.h>
22 #include <sys/proc.h>
23 #include <sys/rwlock.h>
24 #include <sys/limits.h>
25 #include <sys/atomic.h>
26 #include <sys/witness.h>
27 
28 /* XXX - temporary measure until proc0 is properly aligned */
29 #define RW_PROC(p) (((long)p) & ~RWLOCK_MASK)
30 
31 #ifdef MULTIPROCESSOR
32 #define rw_cas(p, o, n)	(atomic_cas_ulong(p, o, n) != o)
33 #else
34 static inline int
35 rw_cas(volatile unsigned long *p, unsigned long o, unsigned long n)
36 {
37 	if (*p != o)
38 		return (1);
39 	*p = n;
40 
41 	return (0);
42 }
43 #endif
44 
45 /*
46  * Magic wand for lock operations. Every operation checks if certain
47  * flags are set and if they aren't, it increments the lock with some
48  * value (that might need some computing in a few cases). If the operation
49  * fails, we need to set certain flags while waiting for the lock.
50  *
51  * RW_WRITE	The lock must be completely empty. We increment it with
52  *		RWLOCK_WRLOCK and the proc pointer of the holder.
53  *		Sets RWLOCK_WAIT|RWLOCK_WRWANT while waiting.
54  * RW_READ	RWLOCK_WRLOCK|RWLOCK_WRWANT may not be set. We increment
55  *		with RWLOCK_READ_INCR. RWLOCK_WAIT while waiting.
56  */
57 static const struct rwlock_op {
58 	unsigned long inc;
59 	unsigned long check;
60 	unsigned long wait_set;
61 	long proc_mult;
62 	int wait_prio;
63 } rw_ops[] = {
64 	{	/* RW_WRITE */
65 		RWLOCK_WRLOCK,
66 		ULONG_MAX,
67 		RWLOCK_WAIT | RWLOCK_WRWANT,
68 		1,
69 		PLOCK - 4
70 	},
71 	{	/* RW_READ */
72 		RWLOCK_READ_INCR,
73 		RWLOCK_WRLOCK,
74 		RWLOCK_WAIT,
75 		0,
76 		PLOCK
77 	},
78 	{	/* Sparse Entry. */
79 		0,
80 	},
81 	{	/* RW_DOWNGRADE */
82 		RWLOCK_READ_INCR - RWLOCK_WRLOCK,
83 		0,
84 		0,
85 		-1,
86 		PLOCK
87 	},
88 };
89 
90 void
91 rw_enter_read(struct rwlock *rwl)
92 {
93 	unsigned long owner = rwl->rwl_owner;
94 
95 	if (__predict_false((owner & RWLOCK_WRLOCK) ||
96 	    rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR)))
97 		rw_enter(rwl, RW_READ);
98 	else {
99 		membar_enter_after_atomic();
100 		WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, NULL);
101 		WITNESS_LOCK(&rwl->rwl_lock_obj, 0);
102 	}
103 }
104 
105 void
106 rw_enter_write(struct rwlock *rwl)
107 {
108 	struct proc *p = curproc;
109 
110 	if (__predict_false(rw_cas(&rwl->rwl_owner, 0,
111 	    RW_PROC(p) | RWLOCK_WRLOCK)))
112 		rw_enter(rwl, RW_WRITE);
113 	else {
114 		membar_enter_after_atomic();
115 		WITNESS_CHECKORDER(&rwl->rwl_lock_obj,
116 		    LOP_EXCLUSIVE | LOP_NEWORDER, NULL);
117 		WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE);
118 	}
119 }
120 
121 void
122 rw_exit_read(struct rwlock *rwl)
123 {
124 	unsigned long owner = rwl->rwl_owner;
125 
126 	rw_assert_rdlock(rwl);
127 
128 	membar_exit_before_atomic();
129 	if (__predict_false((owner & RWLOCK_WAIT) ||
130 	    rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR)))
131 		rw_exit(rwl);
132 	else
133 		WITNESS_UNLOCK(&rwl->rwl_lock_obj, 0);
134 }
135 
136 void
137 rw_exit_write(struct rwlock *rwl)
138 {
139 	unsigned long owner = rwl->rwl_owner;
140 
141 	rw_assert_wrlock(rwl);
142 
143 	membar_exit_before_atomic();
144 	if (__predict_false((owner & RWLOCK_WAIT) ||
145 	    rw_cas(&rwl->rwl_owner, owner, 0)))
146 		rw_exit(rwl);
147 	else
148 		WITNESS_UNLOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE);
149 }
150 
151 #ifdef DIAGNOSTIC
152 /*
153  * Put the diagnostic functions here to keep the main code free
154  * from ifdef clutter.
155  */
156 static void
157 rw_enter_diag(struct rwlock *rwl, int flags)
158 {
159 	switch (flags & RW_OPMASK) {
160 	case RW_WRITE:
161 	case RW_READ:
162 		if (RW_PROC(curproc) == RW_PROC(rwl->rwl_owner))
163 			panic("rw_enter: %s locking against myself",
164 			    rwl->rwl_name);
165 		break;
166 	case RW_DOWNGRADE:
167 		/*
168 		 * If we're downgrading, we must hold the write lock.
169 		 */
170 		if ((rwl->rwl_owner & RWLOCK_WRLOCK) == 0)
171 			panic("rw_enter: %s downgrade of non-write lock",
172 			    rwl->rwl_name);
173 		if (RW_PROC(curproc) != RW_PROC(rwl->rwl_owner))
174 			panic("rw_enter: %s downgrade, not holder",
175 			    rwl->rwl_name);
176 		break;
177 
178 	default:
179 		panic("rw_enter: unknown op 0x%x", flags);
180 	}
181 }
182 
183 #else
184 #define rw_enter_diag(r, f)
185 #endif
186 
187 static void
188 _rw_init_flags_witness(struct rwlock *rwl, const char *name, int lo_flags,
189     const struct lock_type *type)
190 {
191 	rwl->rwl_owner = 0;
192 	rwl->rwl_name = name;
193 
194 #ifdef WITNESS
195 	rwl->rwl_lock_obj.lo_flags = lo_flags;
196 	rwl->rwl_lock_obj.lo_name = name;
197 	rwl->rwl_lock_obj.lo_type = type;
198 	WITNESS_INIT(&rwl->rwl_lock_obj, type);
199 #else
200 	(void)type;
201 	(void)lo_flags;
202 #endif
203 }
204 
205 void
206 _rw_init_flags(struct rwlock *rwl, const char *name, int flags,
207     const struct lock_type *type)
208 {
209 	_rw_init_flags_witness(rwl, name, RWLOCK_LO_FLAGS(flags), type);
210 }
211 
212 int
213 rw_enter(struct rwlock *rwl, int flags)
214 {
215 	const struct rwlock_op *op;
216 	struct sleep_state sls;
217 	unsigned long inc, o;
218 	int error;
219 #ifdef WITNESS
220 	int lop_flags;
221 
222 	lop_flags = LOP_NEWORDER;
223 	if (flags & RW_WRITE)
224 		lop_flags |= LOP_EXCLUSIVE;
225 	if (flags & RW_DUPOK)
226 		lop_flags |= LOP_DUPOK;
227 	if ((flags & RW_NOSLEEP) == 0 && (flags & RW_DOWNGRADE) == 0)
228 		WITNESS_CHECKORDER(&rwl->rwl_lock_obj, lop_flags, NULL);
229 #endif
230 
231 	op = &rw_ops[(flags & RW_OPMASK) - 1];
232 
233 	inc = op->inc + RW_PROC(curproc) * op->proc_mult;
234 retry:
235 	while (__predict_false(((o = rwl->rwl_owner) & op->check) != 0)) {
236 		unsigned long set = o | op->wait_set;
237 		int do_sleep;
238 
239 		/* Avoid deadlocks after panic or in DDB */
240 		if (panicstr || db_active)
241 			return (0);
242 
243 		rw_enter_diag(rwl, flags);
244 
245 		if (flags & RW_NOSLEEP)
246 			return (EBUSY);
247 
248 		sleep_setup(&sls, rwl, op->wait_prio, rwl->rwl_name);
249 		if (flags & RW_INTR)
250 			sleep_setup_signal(&sls, op->wait_prio | PCATCH);
251 
252 		do_sleep = !rw_cas(&rwl->rwl_owner, o, set);
253 
254 		sleep_finish(&sls, do_sleep);
255 		if ((flags & RW_INTR) &&
256 		    (error = sleep_finish_signal(&sls)) != 0)
257 			return (error);
258 		if (flags & RW_SLEEPFAIL)
259 			return (EAGAIN);
260 	}
261 
262 	if (__predict_false(rw_cas(&rwl->rwl_owner, o, o + inc)))
263 		goto retry;
264 	membar_enter_after_atomic();
265 
266 	/*
267 	 * If old lock had RWLOCK_WAIT and RWLOCK_WRLOCK set, it means we
268 	 * downgraded a write lock and had possible read waiter, wake them
269 	 * to let them retry the lock.
270 	 */
271 	if (__predict_false((o & (RWLOCK_WRLOCK|RWLOCK_WAIT)) ==
272 	    (RWLOCK_WRLOCK|RWLOCK_WAIT)))
273 		wakeup(rwl);
274 
275 	if (flags & RW_DOWNGRADE)
276 		WITNESS_DOWNGRADE(&rwl->rwl_lock_obj, lop_flags);
277 	else
278 		WITNESS_LOCK(&rwl->rwl_lock_obj, lop_flags);
279 
280 	return (0);
281 }
282 
283 void
284 rw_exit(struct rwlock *rwl)
285 {
286 	unsigned long owner = rwl->rwl_owner;
287 	int wrlock = owner & RWLOCK_WRLOCK;
288 	unsigned long set;
289 
290 	/* Avoid deadlocks after panic or in DDB */
291 	if (panicstr || db_active)
292 		return;
293 
294 	if (wrlock)
295 		rw_assert_wrlock(rwl);
296 	else
297 		rw_assert_rdlock(rwl);
298 
299 	WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0);
300 
301 	membar_exit_before_atomic();
302 	do {
303 		owner = rwl->rwl_owner;
304 		if (wrlock)
305 			set = 0;
306 		else
307 			set = (owner - RWLOCK_READ_INCR) &
308 				~(RWLOCK_WAIT|RWLOCK_WRWANT);
309 	} while (rw_cas(&rwl->rwl_owner, owner, set));
310 
311 	if (owner & RWLOCK_WAIT)
312 		wakeup(rwl);
313 }
314 
315 int
316 rw_status(struct rwlock *rwl)
317 {
318 	unsigned long owner = rwl->rwl_owner;
319 
320 	if (owner & RWLOCK_WRLOCK) {
321 		if (RW_PROC(curproc) == RW_PROC(owner))
322 			return RW_WRITE;
323 		else
324 			return RW_WRITE_OTHER;
325 	}
326 	if (owner)
327 		return RW_READ;
328 	return (0);
329 }
330 
331 #ifdef DIAGNOSTIC
332 void
333 rw_assert_wrlock(struct rwlock *rwl)
334 {
335 	if (panicstr || db_active)
336 		return;
337 
338 	if (!(rwl->rwl_owner & RWLOCK_WRLOCK))
339 		panic("%s: lock not held", rwl->rwl_name);
340 
341 	if (RWLOCK_OWNER(rwl) != (struct proc *)RW_PROC(curproc))
342 		panic("%s: lock not held by this process", rwl->rwl_name);
343 }
344 
345 void
346 rw_assert_rdlock(struct rwlock *rwl)
347 {
348 	if (panicstr || db_active)
349 		return;
350 
351 	if (!RWLOCK_OWNER(rwl) || (rwl->rwl_owner & RWLOCK_WRLOCK))
352 		panic("%s: lock not shared", rwl->rwl_name);
353 }
354 
355 void
356 rw_assert_anylock(struct rwlock *rwl)
357 {
358 	if (panicstr || db_active)
359 		return;
360 
361 	switch (rw_status(rwl)) {
362 	case RW_WRITE_OTHER:
363 		panic("%s: lock held by different process", rwl->rwl_name);
364 	case 0:
365 		panic("%s: lock not held", rwl->rwl_name);
366 	}
367 }
368 
369 void
370 rw_assert_unlocked(struct rwlock *rwl)
371 {
372 	if (panicstr || db_active)
373 		return;
374 
375 	if (rwl->rwl_owner != 0L)
376 		panic("%s: lock held", rwl->rwl_name);
377 }
378 #endif
379 
380 /* recursive rwlocks; */
381 void
382 _rrw_init_flags(struct rrwlock *rrwl, char *name, int flags,
383     const struct lock_type *type)
384 {
385 	memset(rrwl, 0, sizeof(struct rrwlock));
386 	_rw_init_flags_witness(&rrwl->rrwl_lock, name, RRWLOCK_LO_FLAGS(flags),
387 	    type);
388 }
389 
390 int
391 rrw_enter(struct rrwlock *rrwl, int flags)
392 {
393 	int	rv;
394 
395 	if (RWLOCK_OWNER(&rrwl->rrwl_lock) ==
396 	    (struct proc *)RW_PROC(curproc)) {
397 		if (flags & RW_RECURSEFAIL)
398 			return (EDEADLK);
399 		else {
400 			rrwl->rrwl_wcnt++;
401 			WITNESS_LOCK(&rrwl->rrwl_lock.rwl_lock_obj,
402 			    LOP_EXCLUSIVE);
403 			return (0);
404 		}
405 	}
406 
407 	rv = rw_enter(&rrwl->rrwl_lock, flags);
408 	if (rv == 0)
409 		rrwl->rrwl_wcnt = 1;
410 
411 	return (rv);
412 }
413 
414 void
415 rrw_exit(struct rrwlock *rrwl)
416 {
417 
418 	if (RWLOCK_OWNER(&rrwl->rrwl_lock) ==
419 	    (struct proc *)RW_PROC(curproc)) {
420 		KASSERT(rrwl->rrwl_wcnt > 0);
421 		rrwl->rrwl_wcnt--;
422 		if (rrwl->rrwl_wcnt != 0) {
423 			WITNESS_UNLOCK(&rrwl->rrwl_lock.rwl_lock_obj,
424 			    LOP_EXCLUSIVE);
425 			return;
426 		}
427 	}
428 
429 	rw_exit(&rrwl->rrwl_lock);
430 }
431 
432 int
433 rrw_status(struct rrwlock *rrwl)
434 {
435 	return (rw_status(&rrwl->rrwl_lock));
436 }
437