xref: /openbsd-src/sys/kern/kern_rwlock.c (revision ae3cb403620ab940fbaabb3055fac045a63d56b7)
1 /*	$OpenBSD: kern_rwlock.c,v 1.33 2017/12/18 10:05:43 mpi Exp $	*/
2 
3 /*
4  * Copyright (c) 2002, 2003 Artur Grabowski <art@openbsd.org>
5  * Copyright (c) 2011 Thordur Bjornsson <thib@secnorth.net>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/param.h>
21 #include <sys/systm.h>
22 #include <sys/proc.h>
23 #include <sys/rwlock.h>
24 #include <sys/limits.h>
25 #include <sys/atomic.h>
26 #include <sys/witness.h>
27 
28 /* XXX - temporary measure until proc0 is properly aligned */
29 #define RW_PROC(p) (((long)p) & ~RWLOCK_MASK)
30 
31 #ifdef MULTIPROCESSOR
32 #define rw_cas(p, o, n)	(atomic_cas_ulong(p, o, n) != o)
33 #else
34 static inline int
35 rw_cas(volatile unsigned long *p, unsigned long o, unsigned long n)
36 {
37 	if (*p != o)
38 		return (1);
39 	*p = n;
40 
41 	return (0);
42 }
43 #endif
44 
45 /*
46  * Magic wand for lock operations. Every operation checks if certain
47  * flags are set and if they aren't, it increments the lock with some
48  * value (that might need some computing in a few cases). If the operation
49  * fails, we need to set certain flags while waiting for the lock.
50  *
51  * RW_WRITE	The lock must be completely empty. We increment it with
52  *		RWLOCK_WRLOCK and the proc pointer of the holder.
53  *		Sets RWLOCK_WAIT|RWLOCK_WRWANT while waiting.
54  * RW_READ	RWLOCK_WRLOCK|RWLOCK_WRWANT may not be set. We increment
55  *		with RWLOCK_READ_INCR. RWLOCK_WAIT while waiting.
56  */
57 static const struct rwlock_op {
58 	unsigned long inc;
59 	unsigned long check;
60 	unsigned long wait_set;
61 	long proc_mult;
62 	int wait_prio;
63 } rw_ops[] = {
64 	{	/* RW_WRITE */
65 		RWLOCK_WRLOCK,
66 		ULONG_MAX,
67 		RWLOCK_WAIT | RWLOCK_WRWANT,
68 		1,
69 		PLOCK - 4
70 	},
71 	{	/* RW_READ */
72 		RWLOCK_READ_INCR,
73 		RWLOCK_WRLOCK,
74 		RWLOCK_WAIT,
75 		0,
76 		PLOCK
77 	},
78 	{	/* Sparse Entry. */
79 		0,
80 	},
81 	{	/* RW_DOWNGRADE */
82 		RWLOCK_READ_INCR - RWLOCK_WRLOCK,
83 		0,
84 		0,
85 		-1,
86 		PLOCK
87 	},
88 };
89 
90 void
91 _rw_enter_read(struct rwlock *rwl LOCK_FL_VARS)
92 {
93 	unsigned long owner = rwl->rwl_owner;
94 
95 	if (__predict_false((owner & RWLOCK_WRLOCK) ||
96 	    rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR)))
97 		_rw_enter(rwl, RW_READ LOCK_FL_ARGS);
98 	else {
99 		membar_enter_after_atomic();
100 		WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, file, line,
101 		    NULL);
102 		WITNESS_LOCK(&rwl->rwl_lock_obj, 0, file, line);
103 	}
104 }
105 
106 void
107 _rw_enter_write(struct rwlock *rwl LOCK_FL_VARS)
108 {
109 	struct proc *p = curproc;
110 
111 	if (__predict_false(rw_cas(&rwl->rwl_owner, 0,
112 	    RW_PROC(p) | RWLOCK_WRLOCK)))
113 		_rw_enter(rwl, RW_WRITE LOCK_FL_ARGS);
114 	else {
115 		membar_enter_after_atomic();
116 		WITNESS_CHECKORDER(&rwl->rwl_lock_obj,
117 		    LOP_EXCLUSIVE | LOP_NEWORDER, file, line, NULL);
118 		WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE, file, line);
119 	}
120 }
121 
122 void
123 _rw_exit_read(struct rwlock *rwl LOCK_FL_VARS)
124 {
125 	unsigned long owner = rwl->rwl_owner;
126 
127 	rw_assert_rdlock(rwl);
128 
129 	membar_exit_before_atomic();
130 	if (__predict_false((owner & RWLOCK_WAIT) ||
131 	    rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR)))
132 		_rw_exit(rwl LOCK_FL_ARGS);
133 	else
134 		WITNESS_UNLOCK(&rwl->rwl_lock_obj, 0, file, line);
135 }
136 
137 void
138 _rw_exit_write(struct rwlock *rwl LOCK_FL_VARS)
139 {
140 	unsigned long owner = rwl->rwl_owner;
141 
142 	rw_assert_wrlock(rwl);
143 
144 	membar_exit_before_atomic();
145 	if (__predict_false((owner & RWLOCK_WAIT) ||
146 	    rw_cas(&rwl->rwl_owner, owner, 0)))
147 		_rw_exit(rwl LOCK_FL_ARGS);
148 	else
149 		WITNESS_UNLOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE, file, line);
150 }
151 
152 #ifdef DIAGNOSTIC
153 /*
154  * Put the diagnostic functions here to keep the main code free
155  * from ifdef clutter.
156  */
157 static void
158 rw_enter_diag(struct rwlock *rwl, int flags)
159 {
160 	switch (flags & RW_OPMASK) {
161 	case RW_WRITE:
162 	case RW_READ:
163 		if (RW_PROC(curproc) == RW_PROC(rwl->rwl_owner))
164 			panic("rw_enter: %s locking against myself",
165 			    rwl->rwl_name);
166 		break;
167 	case RW_DOWNGRADE:
168 		/*
169 		 * If we're downgrading, we must hold the write lock.
170 		 */
171 		if ((rwl->rwl_owner & RWLOCK_WRLOCK) == 0)
172 			panic("rw_enter: %s downgrade of non-write lock",
173 			    rwl->rwl_name);
174 		if (RW_PROC(curproc) != RW_PROC(rwl->rwl_owner))
175 			panic("rw_enter: %s downgrade, not holder",
176 			    rwl->rwl_name);
177 		break;
178 
179 	default:
180 		panic("rw_enter: unknown op 0x%x", flags);
181 	}
182 }
183 
184 #else
185 #define rw_enter_diag(r, f)
186 #endif
187 
188 static void
189 _rw_init_flags_witness(struct rwlock *rwl, const char *name, int lo_flags,
190     struct lock_type *type)
191 {
192 	rwl->rwl_owner = 0;
193 	rwl->rwl_name = name;
194 
195 #ifdef WITNESS
196 	rwl->rwl_lock_obj.lo_flags = lo_flags;
197 	rwl->rwl_lock_obj.lo_name = name;
198 	rwl->rwl_lock_obj.lo_type = type;
199 	WITNESS_INIT(&rwl->rwl_lock_obj, type);
200 #else
201 	(void)type;
202 	(void)lo_flags;
203 #endif
204 }
205 
206 void
207 _rw_init_flags(struct rwlock *rwl, const char *name, int flags,
208     struct lock_type *type)
209 {
210 	_rw_init_flags_witness(rwl, name, RWLOCK_LO_FLAGS(flags), type);
211 }
212 
213 int
214 _rw_enter(struct rwlock *rwl, int flags LOCK_FL_VARS)
215 {
216 	const struct rwlock_op *op;
217 	struct sleep_state sls;
218 	unsigned long inc, o;
219 	int error;
220 #ifdef WITNESS
221 	int lop_flags;
222 
223 	lop_flags = LOP_NEWORDER;
224 	if (flags & RW_WRITE)
225 		lop_flags |= LOP_EXCLUSIVE;
226 	if ((flags & RW_NOSLEEP) == 0 && (flags & RW_DOWNGRADE) == 0)
227 		WITNESS_CHECKORDER(&rwl->rwl_lock_obj, lop_flags, file, line,
228 		    NULL);
229 #endif
230 
231 	op = &rw_ops[(flags & RW_OPMASK) - 1];
232 
233 	inc = op->inc + RW_PROC(curproc) * op->proc_mult;
234 retry:
235 	while (__predict_false(((o = rwl->rwl_owner) & op->check) != 0)) {
236 		unsigned long set = o | op->wait_set;
237 		int do_sleep;
238 
239 		/* Avoid deadlocks after panic */
240 		if (panicstr)
241 			return (0);
242 
243 		rw_enter_diag(rwl, flags);
244 
245 		if (flags & RW_NOSLEEP)
246 			return (EBUSY);
247 
248 		sleep_setup(&sls, rwl, op->wait_prio, rwl->rwl_name);
249 		if (flags & RW_INTR)
250 			sleep_setup_signal(&sls, op->wait_prio | PCATCH);
251 
252 		do_sleep = !rw_cas(&rwl->rwl_owner, o, set);
253 
254 		sleep_finish(&sls, do_sleep);
255 		if ((flags & RW_INTR) &&
256 		    (error = sleep_finish_signal(&sls)) != 0)
257 			return (error);
258 		if (flags & RW_SLEEPFAIL)
259 			return (EAGAIN);
260 	}
261 
262 	if (__predict_false(rw_cas(&rwl->rwl_owner, o, o + inc)))
263 		goto retry;
264 	membar_enter_after_atomic();
265 
266 	/*
267 	 * If old lock had RWLOCK_WAIT and RWLOCK_WRLOCK set, it means we
268 	 * downgraded a write lock and had possible read waiter, wake them
269 	 * to let them retry the lock.
270 	 */
271 	if (__predict_false((o & (RWLOCK_WRLOCK|RWLOCK_WAIT)) ==
272 	    (RWLOCK_WRLOCK|RWLOCK_WAIT)))
273 		wakeup(rwl);
274 
275 	if (flags & RW_DOWNGRADE)
276 		WITNESS_DOWNGRADE(&rwl->rwl_lock_obj, lop_flags, file, line);
277 	else
278 		WITNESS_LOCK(&rwl->rwl_lock_obj, lop_flags, file, line);
279 
280 	return (0);
281 }
282 
283 void
284 _rw_exit(struct rwlock *rwl LOCK_FL_VARS)
285 {
286 	unsigned long owner = rwl->rwl_owner;
287 	int wrlock = owner & RWLOCK_WRLOCK;
288 	unsigned long set;
289 
290 	/* Avoid deadlocks after panic */
291 	if (panicstr)
292 		return;
293 
294 	if (wrlock)
295 		rw_assert_wrlock(rwl);
296 	else
297 		rw_assert_rdlock(rwl);
298 
299 	WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0,
300 	    file, line);
301 
302 	membar_exit_before_atomic();
303 	do {
304 		owner = rwl->rwl_owner;
305 		if (wrlock)
306 			set = 0;
307 		else
308 			set = (owner - RWLOCK_READ_INCR) &
309 				~(RWLOCK_WAIT|RWLOCK_WRWANT);
310 	} while (rw_cas(&rwl->rwl_owner, owner, set));
311 
312 	if (owner & RWLOCK_WAIT)
313 		wakeup(rwl);
314 }
315 
316 int
317 rw_status(struct rwlock *rwl)
318 {
319 	unsigned long owner = rwl->rwl_owner;
320 
321 	if (owner & RWLOCK_WRLOCK) {
322 		if (RW_PROC(curproc) == RW_PROC(owner))
323 			return RW_WRITE;
324 		else
325 			return RW_WRITE_OTHER;
326 	}
327 	if (owner)
328 		return RW_READ;
329 	return (0);
330 }
331 
332 #ifdef DIAGNOSTIC
333 void
334 rw_assert_wrlock(struct rwlock *rwl)
335 {
336 	if (!(rwl->rwl_owner & RWLOCK_WRLOCK))
337 		panic("%s: lock not held", rwl->rwl_name);
338 
339 	if (RWLOCK_OWNER(rwl) != (struct proc *)RW_PROC(curproc))
340 		panic("%s: lock not held by this process", rwl->rwl_name);
341 }
342 
343 void
344 rw_assert_rdlock(struct rwlock *rwl)
345 {
346 	if (!RWLOCK_OWNER(rwl) || (rwl->rwl_owner & RWLOCK_WRLOCK))
347 		panic("%s: lock not shared", rwl->rwl_name);
348 }
349 
350 void
351 rw_assert_anylock(struct rwlock *rwl)
352 {
353 	switch (rw_status(rwl)) {
354 	case RW_WRITE_OTHER:
355 		panic("%s: lock held by different process", rwl->rwl_name);
356 	case 0:
357 		panic("%s: lock not held", rwl->rwl_name);
358 	}
359 }
360 
361 void
362 rw_assert_unlocked(struct rwlock *rwl)
363 {
364 	if (rwl->rwl_owner != 0L)
365 		panic("%s: lock held", rwl->rwl_name);
366 }
367 #endif
368 
369 /* recursive rwlocks; */
370 void
371 _rrw_init_flags(struct rrwlock *rrwl, char *name, int flags,
372     struct lock_type *type)
373 {
374 	memset(rrwl, 0, sizeof(struct rrwlock));
375 	_rw_init_flags_witness(&rrwl->rrwl_lock, name, RRWLOCK_LO_FLAGS(flags),
376 	    type);
377 }
378 
379 int
380 _rrw_enter(struct rrwlock *rrwl, int flags LOCK_FL_VARS)
381 {
382 	int	rv;
383 
384 	if (RWLOCK_OWNER(&rrwl->rrwl_lock) ==
385 	    (struct proc *)RW_PROC(curproc)) {
386 		if (flags & RW_RECURSEFAIL)
387 			return (EDEADLK);
388 		else {
389 			rrwl->rrwl_wcnt++;
390 			WITNESS_LOCK(&rrwl->rrwl_lock.rwl_lock_obj,
391 			    LOP_EXCLUSIVE, file, line);
392 			return (0);
393 		}
394 	}
395 
396 	rv = _rw_enter(&rrwl->rrwl_lock, flags LOCK_FL_ARGS);
397 	if (rv == 0)
398 		rrwl->rrwl_wcnt = 1;
399 
400 	return (rv);
401 }
402 
403 void
404 _rrw_exit(struct rrwlock *rrwl LOCK_FL_VARS)
405 {
406 
407 	if (RWLOCK_OWNER(&rrwl->rrwl_lock) ==
408 	    (struct proc *)RW_PROC(curproc)) {
409 		KASSERT(rrwl->rrwl_wcnt > 0);
410 		rrwl->rrwl_wcnt--;
411 		if (rrwl->rrwl_wcnt != 0) {
412 			WITNESS_UNLOCK(&rrwl->rrwl_lock.rwl_lock_obj,
413 			    LOP_EXCLUSIVE, file, line);
414 			return;
415 		}
416 	}
417 
418 	_rw_exit(&rrwl->rrwl_lock LOCK_FL_ARGS);
419 }
420 
421 int
422 rrw_status(struct rrwlock *rrwl)
423 {
424 	return (rw_status(&rrwl->rrwl_lock));
425 }
426