xref: /openbsd-src/sys/kern/kern_rwlock.c (revision c90a81c56dcebd6a1b73fe4aff9b03385b8e63b3)
1 /*	$OpenBSD: kern_rwlock.c,v 1.37 2018/06/08 15:38:15 guenther Exp $	*/
2 
3 /*
4  * Copyright (c) 2002, 2003 Artur Grabowski <art@openbsd.org>
5  * Copyright (c) 2011 Thordur Bjornsson <thib@secnorth.net>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/param.h>
21 #include <sys/systm.h>
22 #include <sys/proc.h>
23 #include <sys/rwlock.h>
24 #include <sys/limits.h>
25 #include <sys/atomic.h>
26 #include <sys/witness.h>
27 
28 /* XXX - temporary measure until proc0 is properly aligned */
29 #define RW_PROC(p) (((long)p) & ~RWLOCK_MASK)
30 
31 #ifdef MULTIPROCESSOR
32 #define rw_cas(p, o, n)	(atomic_cas_ulong(p, o, n) != o)
33 #else
34 static inline int
35 rw_cas(volatile unsigned long *p, unsigned long o, unsigned long n)
36 {
37 	if (*p != o)
38 		return (1);
39 	*p = n;
40 
41 	return (0);
42 }
43 #endif
44 
45 /*
46  * Magic wand for lock operations. Every operation checks if certain
47  * flags are set and if they aren't, it increments the lock with some
48  * value (that might need some computing in a few cases). If the operation
49  * fails, we need to set certain flags while waiting for the lock.
50  *
51  * RW_WRITE	The lock must be completely empty. We increment it with
52  *		RWLOCK_WRLOCK and the proc pointer of the holder.
53  *		Sets RWLOCK_WAIT|RWLOCK_WRWANT while waiting.
54  * RW_READ	RWLOCK_WRLOCK|RWLOCK_WRWANT may not be set. We increment
55  *		with RWLOCK_READ_INCR. RWLOCK_WAIT while waiting.
56  */
57 static const struct rwlock_op {
58 	unsigned long inc;
59 	unsigned long check;
60 	unsigned long wait_set;
61 	long proc_mult;
62 	int wait_prio;
63 } rw_ops[] = {
64 	{	/* RW_WRITE */
65 		RWLOCK_WRLOCK,
66 		ULONG_MAX,
67 		RWLOCK_WAIT | RWLOCK_WRWANT,
68 		1,
69 		PLOCK - 4
70 	},
71 	{	/* RW_READ */
72 		RWLOCK_READ_INCR,
73 		RWLOCK_WRLOCK,
74 		RWLOCK_WAIT,
75 		0,
76 		PLOCK
77 	},
78 	{	/* Sparse Entry. */
79 		0,
80 	},
81 	{	/* RW_DOWNGRADE */
82 		RWLOCK_READ_INCR - RWLOCK_WRLOCK,
83 		0,
84 		0,
85 		-1,
86 		PLOCK
87 	},
88 };
89 
90 void
91 _rw_enter_read(struct rwlock *rwl LOCK_FL_VARS)
92 {
93 	unsigned long owner = rwl->rwl_owner;
94 
95 	if (__predict_false((owner & RWLOCK_WRLOCK) ||
96 	    rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR)))
97 		_rw_enter(rwl, RW_READ LOCK_FL_ARGS);
98 	else {
99 		membar_enter_after_atomic();
100 		WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, file, line,
101 		    NULL);
102 		WITNESS_LOCK(&rwl->rwl_lock_obj, 0, file, line);
103 	}
104 }
105 
106 void
107 _rw_enter_write(struct rwlock *rwl LOCK_FL_VARS)
108 {
109 	struct proc *p = curproc;
110 
111 	if (__predict_false(rw_cas(&rwl->rwl_owner, 0,
112 	    RW_PROC(p) | RWLOCK_WRLOCK)))
113 		_rw_enter(rwl, RW_WRITE LOCK_FL_ARGS);
114 	else {
115 		membar_enter_after_atomic();
116 		WITNESS_CHECKORDER(&rwl->rwl_lock_obj,
117 		    LOP_EXCLUSIVE | LOP_NEWORDER, file, line, NULL);
118 		WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE, file, line);
119 	}
120 }
121 
122 void
123 _rw_exit_read(struct rwlock *rwl LOCK_FL_VARS)
124 {
125 	unsigned long owner = rwl->rwl_owner;
126 
127 	rw_assert_rdlock(rwl);
128 
129 	membar_exit_before_atomic();
130 	if (__predict_false((owner & RWLOCK_WAIT) ||
131 	    rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR)))
132 		_rw_exit(rwl LOCK_FL_ARGS);
133 	else
134 		WITNESS_UNLOCK(&rwl->rwl_lock_obj, 0, file, line);
135 }
136 
137 void
138 _rw_exit_write(struct rwlock *rwl LOCK_FL_VARS)
139 {
140 	unsigned long owner = rwl->rwl_owner;
141 
142 	rw_assert_wrlock(rwl);
143 
144 	membar_exit_before_atomic();
145 	if (__predict_false((owner & RWLOCK_WAIT) ||
146 	    rw_cas(&rwl->rwl_owner, owner, 0)))
147 		_rw_exit(rwl LOCK_FL_ARGS);
148 	else
149 		WITNESS_UNLOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE, file, line);
150 }
151 
152 #ifdef DIAGNOSTIC
153 /*
154  * Put the diagnostic functions here to keep the main code free
155  * from ifdef clutter.
156  */
157 static void
158 rw_enter_diag(struct rwlock *rwl, int flags)
159 {
160 	switch (flags & RW_OPMASK) {
161 	case RW_WRITE:
162 	case RW_READ:
163 		if (RW_PROC(curproc) == RW_PROC(rwl->rwl_owner))
164 			panic("rw_enter: %s locking against myself",
165 			    rwl->rwl_name);
166 		break;
167 	case RW_DOWNGRADE:
168 		/*
169 		 * If we're downgrading, we must hold the write lock.
170 		 */
171 		if ((rwl->rwl_owner & RWLOCK_WRLOCK) == 0)
172 			panic("rw_enter: %s downgrade of non-write lock",
173 			    rwl->rwl_name);
174 		if (RW_PROC(curproc) != RW_PROC(rwl->rwl_owner))
175 			panic("rw_enter: %s downgrade, not holder",
176 			    rwl->rwl_name);
177 		break;
178 
179 	default:
180 		panic("rw_enter: unknown op 0x%x", flags);
181 	}
182 }
183 
184 #else
185 #define rw_enter_diag(r, f)
186 #endif
187 
188 static void
189 _rw_init_flags_witness(struct rwlock *rwl, const char *name, int lo_flags,
190     const struct lock_type *type)
191 {
192 	rwl->rwl_owner = 0;
193 	rwl->rwl_name = name;
194 
195 #ifdef WITNESS
196 	rwl->rwl_lock_obj.lo_flags = lo_flags;
197 	rwl->rwl_lock_obj.lo_name = name;
198 	rwl->rwl_lock_obj.lo_type = type;
199 	WITNESS_INIT(&rwl->rwl_lock_obj, type);
200 #else
201 	(void)type;
202 	(void)lo_flags;
203 #endif
204 }
205 
206 void
207 _rw_init_flags(struct rwlock *rwl, const char *name, int flags,
208     const struct lock_type *type)
209 {
210 	_rw_init_flags_witness(rwl, name, RWLOCK_LO_FLAGS(flags), type);
211 }
212 
213 int
214 _rw_enter(struct rwlock *rwl, int flags LOCK_FL_VARS)
215 {
216 	const struct rwlock_op *op;
217 	struct sleep_state sls;
218 	unsigned long inc, o;
219 	int error;
220 #ifdef WITNESS
221 	int lop_flags;
222 
223 	lop_flags = LOP_NEWORDER;
224 	if (flags & RW_WRITE)
225 		lop_flags |= LOP_EXCLUSIVE;
226 	if (flags & RW_DUPOK)
227 		lop_flags |= LOP_DUPOK;
228 	if ((flags & RW_NOSLEEP) == 0 && (flags & RW_DOWNGRADE) == 0)
229 		WITNESS_CHECKORDER(&rwl->rwl_lock_obj, lop_flags, file, line,
230 		    NULL);
231 #endif
232 
233 	op = &rw_ops[(flags & RW_OPMASK) - 1];
234 
235 	inc = op->inc + RW_PROC(curproc) * op->proc_mult;
236 retry:
237 	while (__predict_false(((o = rwl->rwl_owner) & op->check) != 0)) {
238 		unsigned long set = o | op->wait_set;
239 		int do_sleep;
240 
241 		/* Avoid deadlocks after panic or in DDB */
242 		if (panicstr || db_active)
243 			return (0);
244 
245 		rw_enter_diag(rwl, flags);
246 
247 		if (flags & RW_NOSLEEP)
248 			return (EBUSY);
249 
250 		sleep_setup(&sls, rwl, op->wait_prio, rwl->rwl_name);
251 		if (flags & RW_INTR)
252 			sleep_setup_signal(&sls, op->wait_prio | PCATCH);
253 
254 		do_sleep = !rw_cas(&rwl->rwl_owner, o, set);
255 
256 		sleep_finish(&sls, do_sleep);
257 		if ((flags & RW_INTR) &&
258 		    (error = sleep_finish_signal(&sls)) != 0)
259 			return (error);
260 		if (flags & RW_SLEEPFAIL)
261 			return (EAGAIN);
262 	}
263 
264 	if (__predict_false(rw_cas(&rwl->rwl_owner, o, o + inc)))
265 		goto retry;
266 	membar_enter_after_atomic();
267 
268 	/*
269 	 * If old lock had RWLOCK_WAIT and RWLOCK_WRLOCK set, it means we
270 	 * downgraded a write lock and had possible read waiter, wake them
271 	 * to let them retry the lock.
272 	 */
273 	if (__predict_false((o & (RWLOCK_WRLOCK|RWLOCK_WAIT)) ==
274 	    (RWLOCK_WRLOCK|RWLOCK_WAIT)))
275 		wakeup(rwl);
276 
277 	if (flags & RW_DOWNGRADE)
278 		WITNESS_DOWNGRADE(&rwl->rwl_lock_obj, lop_flags, file, line);
279 	else
280 		WITNESS_LOCK(&rwl->rwl_lock_obj, lop_flags, file, line);
281 
282 	return (0);
283 }
284 
285 void
286 _rw_exit(struct rwlock *rwl LOCK_FL_VARS)
287 {
288 	unsigned long owner = rwl->rwl_owner;
289 	int wrlock = owner & RWLOCK_WRLOCK;
290 	unsigned long set;
291 
292 	/* Avoid deadlocks after panic or in DDB */
293 	if (panicstr || db_active)
294 		return;
295 
296 	if (wrlock)
297 		rw_assert_wrlock(rwl);
298 	else
299 		rw_assert_rdlock(rwl);
300 
301 	WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0,
302 	    file, line);
303 
304 	membar_exit_before_atomic();
305 	do {
306 		owner = rwl->rwl_owner;
307 		if (wrlock)
308 			set = 0;
309 		else
310 			set = (owner - RWLOCK_READ_INCR) &
311 				~(RWLOCK_WAIT|RWLOCK_WRWANT);
312 	} while (rw_cas(&rwl->rwl_owner, owner, set));
313 
314 	if (owner & RWLOCK_WAIT)
315 		wakeup(rwl);
316 }
317 
318 int
319 rw_status(struct rwlock *rwl)
320 {
321 	unsigned long owner = rwl->rwl_owner;
322 
323 	if (owner & RWLOCK_WRLOCK) {
324 		if (RW_PROC(curproc) == RW_PROC(owner))
325 			return RW_WRITE;
326 		else
327 			return RW_WRITE_OTHER;
328 	}
329 	if (owner)
330 		return RW_READ;
331 	return (0);
332 }
333 
334 #ifdef DIAGNOSTIC
335 void
336 rw_assert_wrlock(struct rwlock *rwl)
337 {
338 	if (panicstr || db_active)
339 		return;
340 
341 	if (!(rwl->rwl_owner & RWLOCK_WRLOCK))
342 		panic("%s: lock not held", rwl->rwl_name);
343 
344 	if (RWLOCK_OWNER(rwl) != (struct proc *)RW_PROC(curproc))
345 		panic("%s: lock not held by this process", rwl->rwl_name);
346 }
347 
348 void
349 rw_assert_rdlock(struct rwlock *rwl)
350 {
351 	if (panicstr || db_active)
352 		return;
353 
354 	if (!RWLOCK_OWNER(rwl) || (rwl->rwl_owner & RWLOCK_WRLOCK))
355 		panic("%s: lock not shared", rwl->rwl_name);
356 }
357 
358 void
359 rw_assert_anylock(struct rwlock *rwl)
360 {
361 	if (panicstr || db_active)
362 		return;
363 
364 	switch (rw_status(rwl)) {
365 	case RW_WRITE_OTHER:
366 		panic("%s: lock held by different process", rwl->rwl_name);
367 	case 0:
368 		panic("%s: lock not held", rwl->rwl_name);
369 	}
370 }
371 
372 void
373 rw_assert_unlocked(struct rwlock *rwl)
374 {
375 	if (panicstr || db_active)
376 		return;
377 
378 	if (rwl->rwl_owner != 0L)
379 		panic("%s: lock held", rwl->rwl_name);
380 }
381 #endif
382 
383 /* recursive rwlocks; */
384 void
385 _rrw_init_flags(struct rrwlock *rrwl, char *name, int flags,
386     const struct lock_type *type)
387 {
388 	memset(rrwl, 0, sizeof(struct rrwlock));
389 	_rw_init_flags_witness(&rrwl->rrwl_lock, name, RRWLOCK_LO_FLAGS(flags),
390 	    type);
391 }
392 
393 int
394 _rrw_enter(struct rrwlock *rrwl, int flags LOCK_FL_VARS)
395 {
396 	int	rv;
397 
398 	if (RWLOCK_OWNER(&rrwl->rrwl_lock) ==
399 	    (struct proc *)RW_PROC(curproc)) {
400 		if (flags & RW_RECURSEFAIL)
401 			return (EDEADLK);
402 		else {
403 			rrwl->rrwl_wcnt++;
404 			WITNESS_LOCK(&rrwl->rrwl_lock.rwl_lock_obj,
405 			    LOP_EXCLUSIVE, file, line);
406 			return (0);
407 		}
408 	}
409 
410 	rv = _rw_enter(&rrwl->rrwl_lock, flags LOCK_FL_ARGS);
411 	if (rv == 0)
412 		rrwl->rrwl_wcnt = 1;
413 
414 	return (rv);
415 }
416 
417 void
418 _rrw_exit(struct rrwlock *rrwl LOCK_FL_VARS)
419 {
420 
421 	if (RWLOCK_OWNER(&rrwl->rrwl_lock) ==
422 	    (struct proc *)RW_PROC(curproc)) {
423 		KASSERT(rrwl->rrwl_wcnt > 0);
424 		rrwl->rrwl_wcnt--;
425 		if (rrwl->rrwl_wcnt != 0) {
426 			WITNESS_UNLOCK(&rrwl->rrwl_lock.rwl_lock_obj,
427 			    LOP_EXCLUSIVE, file, line);
428 			return;
429 		}
430 	}
431 
432 	_rw_exit(&rrwl->rrwl_lock LOCK_FL_ARGS);
433 }
434 
435 int
436 rrw_status(struct rrwlock *rrwl)
437 {
438 	return (rw_status(&rrwl->rrwl_lock));
439 }
440