xref: /netbsd-src/sys/kern/kern_lock.c (revision 0df165c04d0a9ca1adde9ed2b890344c937954a6)
1 /*	$NetBSD: kern_lock.c,v 1.127 2007/11/21 10:19:09 yamt Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999, 2000, 2006, 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center, and by Andrew Doran.
10  *
11  * This code is derived from software contributed to The NetBSD Foundation
12  * by Ross Harvey.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *	This product includes software developed by the NetBSD
25  *	Foundation, Inc. and its contributors.
26  * 4. Neither the name of The NetBSD Foundation nor the names of its
27  *    contributors may be used to endorse or promote products derived
28  *    from this software without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
31  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
32  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
33  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
34  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
37  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
38  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40  * POSSIBILITY OF SUCH DAMAGE.
41  */
42 
43 /*
44  * Copyright (c) 1995
45  *	The Regents of the University of California.  All rights reserved.
46  *
47  * This code contains ideas from software contributed to Berkeley by
48  * Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating
49  * System project at Carnegie-Mellon University.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions
53  * are met:
54  * 1. Redistributions of source code must retain the above copyright
55  *    notice, this list of conditions and the following disclaimer.
56  * 2. Redistributions in binary form must reproduce the above copyright
57  *    notice, this list of conditions and the following disclaimer in the
58  *    documentation and/or other materials provided with the distribution.
59  * 3. Neither the name of the University nor the names of its contributors
60  *    may be used to endorse or promote products derived from this software
61  *    without specific prior written permission.
62  *
63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73  * SUCH DAMAGE.
74  *
75  *	@(#)kern_lock.c	8.18 (Berkeley) 5/21/95
76  */
77 
78 #include <sys/cdefs.h>
79 __KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.127 2007/11/21 10:19:09 yamt Exp $");
80 
81 #include "opt_multiprocessor.h"
82 
83 #include <sys/param.h>
84 #include <sys/proc.h>
85 #include <sys/lock.h>
86 #include <sys/systm.h>
87 #include <sys/kernel.h>
88 #include <sys/lockdebug.h>
89 #include <sys/cpu.h>
90 #include <sys/syslog.h>
91 
92 #include <machine/stdarg.h>
93 
94 #include <dev/lockstat.h>
95 
96 /*
97  * note that stdarg.h and the ansi style va_start macro is used for both
98  * ansi and traditional c compiles.
99  * XXX: this requires that stdarg.h define: va_alist and va_dcl
100  */
101 void	lock_printf(const char *fmt, ...)
102     __attribute__((__format__(__printf__,1,2)));
103 
104 static int acquire(struct lock **, int *, int, int, int, uintptr_t);
105 
106 int	lock_debug_syslog = 0;	/* defaults to printf, but can be patched */
107 bool	kernel_lock_dodebug;
108 __cpu_simple_lock_t kernel_lock;
109 
110 #if defined(LOCKDEBUG) || defined(DIAGNOSTIC) /* { */
111 #define	COUNT(lkp, l, cpu_id, x)	(l)->l_locks += (x)
112 #else
113 #define COUNT(lkp, p, cpu_id, x)
114 #endif /* LOCKDEBUG || DIAGNOSTIC */ /* } */
115 
116 #define	RETURN_ADDRESS		((uintptr_t)__builtin_return_address(0))
117 
118 /*
119  * Acquire a resource.
120  */
121 static int
122 acquire(struct lock **lkpp, int *s, int extflags,
123 	int drain, int wanted, uintptr_t ra)
124 {
125 	int error;
126 	struct lock *lkp = *lkpp;
127 	LOCKSTAT_TIMER(slptime);
128 	LOCKSTAT_FLAG(lsflag);
129 
130 	KASSERT(drain || (wanted & LK_WAIT_NONZERO) == 0);
131 
132 	LOCKSTAT_ENTER(lsflag);
133 
134 	for (error = 0; (lkp->lk_flags & wanted) != 0; ) {
135 		if (drain)
136 			lkp->lk_flags |= LK_WAITDRAIN;
137 		else {
138 			lkp->lk_waitcount++;
139 			lkp->lk_flags |= LK_WAIT_NONZERO;
140 		}
141 		LOCKSTAT_START_TIMER(lsflag, slptime);
142 		error = ltsleep(drain ? (void *)&lkp->lk_flags : (void *)lkp,
143 		    lkp->lk_prio, lkp->lk_wmesg, lkp->lk_timo,
144 		    &lkp->lk_interlock);
145 		LOCKSTAT_STOP_TIMER(lsflag, slptime);
146 		LOCKSTAT_EVENT_RA(lsflag, (void *)(uintptr_t)lkp,
147 		    LB_LOCKMGR | LB_SLEEP1, 1, slptime, ra);
148 		if (!drain) {
149 			lkp->lk_waitcount--;
150 			if (lkp->lk_waitcount == 0)
151 				lkp->lk_flags &= ~LK_WAIT_NONZERO;
152 		}
153 		if (error)
154 			break;
155 		if (extflags & LK_SLEEPFAIL) {
156 			error = ENOLCK;
157 			break;
158 		}
159 	}
160 
161 	LOCKSTAT_EXIT(lsflag);
162 
163 	return error;
164 }
165 
166 #define	SETHOLDER(lkp, pid, lid, cpu_id)				\
167 do {									\
168 	(lkp)->lk_lockholder = pid;					\
169 	(lkp)->lk_locklwp = lid;					\
170 } while (/*CONSTCOND*/0)
171 
172 #define	WEHOLDIT(lkp, pid, lid, cpu_id)					\
173 	 ((lkp)->lk_lockholder == (pid) && (lkp)->lk_locklwp == (lid))
174 
175 #define	WAKEUP_WAITER(lkp)						\
176 do {									\
177 	if (((lkp)->lk_flags & LK_WAIT_NONZERO) != 0) {			\
178 		wakeup((lkp));						\
179 	}								\
180 } while (/*CONSTCOND*/0)
181 
182 #if defined(LOCKDEBUG)
183 /*
184  * Lock debug printing routine; can be configured to print to console
185  * or log to syslog.
186  */
187 void
188 lock_printf(const char *fmt, ...)
189 {
190 	char b[150];
191 	va_list ap;
192 
193 	va_start(ap, fmt);
194 	if (lock_debug_syslog)
195 		vlog(LOG_DEBUG, fmt, ap);
196 	else {
197 		vsnprintf(b, sizeof(b), fmt, ap);
198 		printf_nolog("%s", b);
199 	}
200 	va_end(ap);
201 }
202 #endif /* LOCKDEBUG */
203 
204 static void
205 lockpanic(struct lock *lkp, const char *fmt, ...)
206 {
207 	char s[150], b[150];
208 	static const char *locktype[] = {
209 	    "*0*", "shared", "exclusive", "upgrade", "exclupgrade",
210 	    "downgrade", "release", "drain", "exclother", "*9*",
211 	    "*10*", "*11*", "*12*", "*13*", "*14*", "*15*"
212 	};
213 	va_list ap;
214 	va_start(ap, fmt);
215 	vsnprintf(s, sizeof(s), fmt, ap);
216 	va_end(ap);
217 	bitmask_snprintf(lkp->lk_flags, __LK_FLAG_BITS, b, sizeof(b));
218 	panic("%s ("
219 	    "type %s flags %s, sharecount %d, exclusivecount %d, "
220 	    "recurselevel %d, waitcount %d, wmesg %s"
221 	    ", lock_addr %p, unlock_addr %p"
222 	    ")\n",
223 	    s, locktype[lkp->lk_flags & LK_TYPE_MASK],
224 	    b, lkp->lk_sharecount, lkp->lk_exclusivecount,
225 	    lkp->lk_recurselevel, lkp->lk_waitcount, lkp->lk_wmesg,
226 	    (void *)lkp->lk_lock_addr, (void *)lkp->lk_unlock_addr
227 	);
228 }
229 
230 /*
231  * Initialize a lock; required before use.
232  */
233 void
234 lockinit(struct lock *lkp, pri_t prio, const char *wmesg, int timo, int flags)
235 {
236 
237 	memset(lkp, 0, sizeof(struct lock));
238 	lkp->lk_flags = flags & LK_EXTFLG_MASK;
239 	simple_lock_init(&lkp->lk_interlock);
240 	lkp->lk_lockholder = LK_NOPROC;
241 	lkp->lk_prio = prio;
242 	lkp->lk_timo = timo;
243 	lkp->lk_wmesg = wmesg;
244 	lkp->lk_lock_addr = 0;
245 	lkp->lk_unlock_addr = 0;
246 }
247 
248 void
249 lockdestroy(struct lock *lkp)
250 {
251 
252 	/* nothing yet */
253 }
254 
255 /*
256  * Determine the status of a lock.
257  */
258 int
259 lockstatus(struct lock *lkp)
260 {
261 	int lock_type = 0;
262 	struct lwp *l = curlwp; /* XXX */
263 	pid_t pid;
264 	lwpid_t lid;
265 	cpuid_t cpu_num;
266 
267 	if (l == NULL) {
268 		cpu_num = cpu_number();
269 		pid = LK_KERNPROC;
270 		lid = 0;
271 	} else {
272 		cpu_num = LK_NOCPU;
273 		pid = l->l_proc->p_pid;
274 		lid = l->l_lid;
275 	}
276 
277 	simple_lock(&lkp->lk_interlock);
278 	if (lkp->lk_exclusivecount != 0) {
279 		if (WEHOLDIT(lkp, pid, lid, cpu_num))
280 			lock_type = LK_EXCLUSIVE;
281 		else
282 			lock_type = LK_EXCLOTHER;
283 	} else if (lkp->lk_sharecount != 0)
284 		lock_type = LK_SHARED;
285 	else if (lkp->lk_flags & (LK_WANT_EXCL | LK_WANT_UPGRADE))
286 		lock_type = LK_EXCLOTHER;
287 	simple_unlock(&lkp->lk_interlock);
288 	return (lock_type);
289 }
290 
291 /*
292  * XXX XXX kludge around another kludge..
293  *
294  * vfs_shutdown() may be called from interrupt context, either as a result
295  * of a panic, or from the debugger.   It proceeds to call
296  * sys_sync(&proc0, ...), pretending its running on behalf of proc0
297  *
298  * We would like to make an attempt to sync the filesystems in this case, so
299  * if this happens, we treat attempts to acquire locks specially.
300  * All locks are acquired on behalf of proc0.
301  *
302  * If we've already paniced, we don't block waiting for locks, but
303  * just barge right ahead since we're already going down in flames.
304  */
305 
306 /*
307  * Set, change, or release a lock.
308  *
309  * Shared requests increment the shared count. Exclusive requests set the
310  * LK_WANT_EXCL flag (preventing further shared locks), and wait for already
311  * accepted shared locks and shared-to-exclusive upgrades to go away.
312  */
313 int
314 lockmgr(struct lock *lkp, u_int flags, struct simplelock *interlkp)
315 {
316 	int error;
317 	pid_t pid;
318 	lwpid_t lid;
319 	int extflags;
320 	cpuid_t cpu_num;
321 	struct lwp *l = curlwp;
322 	int lock_shutdown_noblock = 0;
323 	int s = 0;
324 
325 	error = 0;
326 
327 	/* LK_RETRY is for vn_lock, not for lockmgr. */
328 	KASSERT((flags & LK_RETRY) == 0);
329 	KASSERT((l->l_pflag & LP_INTR) == 0 || panicstr != NULL);
330 
331 	simple_lock(&lkp->lk_interlock);
332 	if (flags & LK_INTERLOCK)
333 		simple_unlock(interlkp);
334 	extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
335 
336 	if (l == NULL) {
337 		if (!doing_shutdown) {
338 			panic("lockmgr: no context");
339 		} else {
340 			l = &lwp0;
341 			if (panicstr && (!(flags & LK_NOWAIT))) {
342 				flags |= LK_NOWAIT;
343 				lock_shutdown_noblock = 1;
344 			}
345 		}
346 	}
347 	lid = l->l_lid;
348 	pid = l->l_proc->p_pid;
349 	cpu_num = cpu_number();
350 
351 	/*
352 	 * Once a lock has drained, the LK_DRAINING flag is set and an
353 	 * exclusive lock is returned. The only valid operation thereafter
354 	 * is a single release of that exclusive lock. This final release
355 	 * clears the LK_DRAINING flag and sets the LK_DRAINED flag. Any
356 	 * further requests of any sort will result in a panic. The bits
357 	 * selected for these two flags are chosen so that they will be set
358 	 * in memory that is freed (freed memory is filled with 0xdeadbeef).
359 	 * The final release is permitted to give a new lease on life to
360 	 * the lock by specifying LK_REENABLE.
361 	 */
362 	if (lkp->lk_flags & (LK_DRAINING|LK_DRAINED)) {
363 #ifdef DIAGNOSTIC /* { */
364 		if (lkp->lk_flags & LK_DRAINED)
365 			lockpanic(lkp, "lockmgr: using decommissioned lock");
366 		if ((flags & LK_TYPE_MASK) != LK_RELEASE ||
367 		    WEHOLDIT(lkp, pid, lid, cpu_num) == 0)
368 			lockpanic(lkp, "lockmgr: non-release on draining lock: %d",
369 			    flags & LK_TYPE_MASK);
370 #endif /* DIAGNOSTIC */ /* } */
371 		lkp->lk_flags &= ~LK_DRAINING;
372 		if ((flags & LK_REENABLE) == 0)
373 			lkp->lk_flags |= LK_DRAINED;
374 	}
375 
376 	switch (flags & LK_TYPE_MASK) {
377 
378 	case LK_SHARED:
379 		if (WEHOLDIT(lkp, pid, lid, cpu_num) == 0) {
380 			/*
381 			 * If just polling, check to see if we will block.
382 			 */
383 			if ((extflags & LK_NOWAIT) && (lkp->lk_flags &
384 			    (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE))) {
385 				error = EBUSY;
386 				break;
387 			}
388 			/*
389 			 * Wait for exclusive locks and upgrades to clear.
390 			 */
391 			error = acquire(&lkp, &s, extflags, 0,
392 			    LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE,
393 			    RETURN_ADDRESS);
394 			if (error)
395 				break;
396 			lkp->lk_sharecount++;
397 			lkp->lk_flags |= LK_SHARE_NONZERO;
398 			COUNT(lkp, l, cpu_num, 1);
399 			break;
400 		}
401 		/*
402 		 * We hold an exclusive lock, so downgrade it to shared.
403 		 * An alternative would be to fail with EDEADLK.
404 		 */
405 		lkp->lk_sharecount++;
406 		lkp->lk_flags |= LK_SHARE_NONZERO;
407 		COUNT(lkp, l, cpu_num, 1);
408 		/* fall into downgrade */
409 
410 	case LK_DOWNGRADE:
411 		if (WEHOLDIT(lkp, pid, lid, cpu_num) == 0 ||
412 		    lkp->lk_exclusivecount == 0)
413 			lockpanic(lkp, "lockmgr: not holding exclusive lock");
414 		lkp->lk_sharecount += lkp->lk_exclusivecount;
415 		lkp->lk_flags |= LK_SHARE_NONZERO;
416 		lkp->lk_exclusivecount = 0;
417 		lkp->lk_recurselevel = 0;
418 		lkp->lk_flags &= ~LK_HAVE_EXCL;
419 		SETHOLDER(lkp, LK_NOPROC, 0, LK_NOCPU);
420 #if defined(LOCKDEBUG)
421 		lkp->lk_unlock_addr = RETURN_ADDRESS;
422 #endif
423 		WAKEUP_WAITER(lkp);
424 		break;
425 
426 	case LK_EXCLUPGRADE:
427 		/*
428 		 * If another process is ahead of us to get an upgrade,
429 		 * then we want to fail rather than have an intervening
430 		 * exclusive access.
431 		 */
432 		if (lkp->lk_flags & LK_WANT_UPGRADE) {
433 			lkp->lk_sharecount--;
434 			if (lkp->lk_sharecount == 0)
435 				lkp->lk_flags &= ~LK_SHARE_NONZERO;
436 			COUNT(lkp, l, cpu_num, -1);
437 			error = EBUSY;
438 			break;
439 		}
440 		/* fall into normal upgrade */
441 
442 	case LK_UPGRADE:
443 		/*
444 		 * Upgrade a shared lock to an exclusive one. If another
445 		 * shared lock has already requested an upgrade to an
446 		 * exclusive lock, our shared lock is released and an
447 		 * exclusive lock is requested (which will be granted
448 		 * after the upgrade). If we return an error, the file
449 		 * will always be unlocked.
450 		 */
451 		if (WEHOLDIT(lkp, pid, lid, cpu_num) || lkp->lk_sharecount <= 0)
452 			lockpanic(lkp, "lockmgr: upgrade exclusive lock");
453 		lkp->lk_sharecount--;
454 		if (lkp->lk_sharecount == 0)
455 			lkp->lk_flags &= ~LK_SHARE_NONZERO;
456 		COUNT(lkp, l, cpu_num, -1);
457 		/*
458 		 * If we are just polling, check to see if we will block.
459 		 */
460 		if ((extflags & LK_NOWAIT) &&
461 		    ((lkp->lk_flags & LK_WANT_UPGRADE) ||
462 		     lkp->lk_sharecount > 1)) {
463 			error = EBUSY;
464 			break;
465 		}
466 		if ((lkp->lk_flags & LK_WANT_UPGRADE) == 0) {
467 			/*
468 			 * We are first shared lock to request an upgrade, so
469 			 * request upgrade and wait for the shared count to
470 			 * drop to zero, then take exclusive lock.
471 			 */
472 			lkp->lk_flags |= LK_WANT_UPGRADE;
473 			error = acquire(&lkp, &s, extflags, 0, LK_SHARE_NONZERO,
474 			    RETURN_ADDRESS);
475 			lkp->lk_flags &= ~LK_WANT_UPGRADE;
476 			if (error) {
477 				WAKEUP_WAITER(lkp);
478 				break;
479 			}
480 			lkp->lk_flags |= LK_HAVE_EXCL;
481 			SETHOLDER(lkp, pid, lid, cpu_num);
482 #if defined(LOCKDEBUG)
483 			lkp->lk_lock_addr = RETURN_ADDRESS;
484 #endif
485 			if (lkp->lk_exclusivecount != 0)
486 				lockpanic(lkp, "lockmgr: non-zero exclusive count");
487 			lkp->lk_exclusivecount = 1;
488 			if (extflags & LK_SETRECURSE)
489 				lkp->lk_recurselevel = 1;
490 			COUNT(lkp, l, cpu_num, 1);
491 			break;
492 		}
493 		/*
494 		 * Someone else has requested upgrade. Release our shared
495 		 * lock, awaken upgrade requestor if we are the last shared
496 		 * lock, then request an exclusive lock.
497 		 */
498 		if (lkp->lk_sharecount == 0)
499 			WAKEUP_WAITER(lkp);
500 		/* fall into exclusive request */
501 
502 	case LK_EXCLUSIVE:
503 		if (WEHOLDIT(lkp, pid, lid, cpu_num)) {
504 			/*
505 			 * Recursive lock.
506 			 */
507 			if ((extflags & LK_CANRECURSE) == 0 &&
508 			     lkp->lk_recurselevel == 0) {
509 				if (extflags & LK_RECURSEFAIL) {
510 					error = EDEADLK;
511 					break;
512 				} else
513 					lockpanic(lkp, "lockmgr: locking against myself");
514 			}
515 			lkp->lk_exclusivecount++;
516 			if (extflags & LK_SETRECURSE &&
517 			    lkp->lk_recurselevel == 0)
518 				lkp->lk_recurselevel = lkp->lk_exclusivecount;
519 			COUNT(lkp, l, cpu_num, 1);
520 			break;
521 		}
522 		/*
523 		 * If we are just polling, check to see if we will sleep.
524 		 */
525 		if ((extflags & LK_NOWAIT) && (lkp->lk_flags &
526 		     (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE |
527 		     LK_SHARE_NONZERO))) {
528 			error = EBUSY;
529 			break;
530 		}
531 		/*
532 		 * Try to acquire the want_exclusive flag.
533 		 */
534 		error = acquire(&lkp, &s, extflags, 0,
535 		    LK_HAVE_EXCL | LK_WANT_EXCL, RETURN_ADDRESS);
536 		if (error)
537 			break;
538 		lkp->lk_flags |= LK_WANT_EXCL;
539 		/*
540 		 * Wait for shared locks and upgrades to finish.
541 		 */
542 		error = acquire(&lkp, &s, extflags, 0,
543 		    LK_HAVE_EXCL | LK_WANT_UPGRADE | LK_SHARE_NONZERO,
544 		    RETURN_ADDRESS);
545 		lkp->lk_flags &= ~LK_WANT_EXCL;
546 		if (error) {
547 			WAKEUP_WAITER(lkp);
548 			break;
549 		}
550 		lkp->lk_flags |= LK_HAVE_EXCL;
551 		SETHOLDER(lkp, pid, lid, cpu_num);
552 #if defined(LOCKDEBUG)
553 		lkp->lk_lock_addr = RETURN_ADDRESS;
554 #endif
555 		if (lkp->lk_exclusivecount != 0)
556 			lockpanic(lkp, "lockmgr: non-zero exclusive count");
557 		lkp->lk_exclusivecount = 1;
558 		if (extflags & LK_SETRECURSE)
559 			lkp->lk_recurselevel = 1;
560 		COUNT(lkp, l, cpu_num, 1);
561 		break;
562 
563 	case LK_RELEASE:
564 		if (lkp->lk_exclusivecount != 0) {
565 			if (WEHOLDIT(lkp, pid, lid, cpu_num) == 0) {
566 				lockpanic(lkp, "lockmgr: pid %d.%d, not "
567 				    "exclusive lock holder %d.%d "
568 				    "unlocking", pid, lid,
569 				    lkp->lk_lockholder,
570 				    lkp->lk_locklwp);
571 			}
572 			if (lkp->lk_exclusivecount == lkp->lk_recurselevel)
573 				lkp->lk_recurselevel = 0;
574 			lkp->lk_exclusivecount--;
575 			COUNT(lkp, l, cpu_num, -1);
576 			if (lkp->lk_exclusivecount == 0) {
577 				lkp->lk_flags &= ~LK_HAVE_EXCL;
578 				SETHOLDER(lkp, LK_NOPROC, 0, LK_NOCPU);
579 #if defined(LOCKDEBUG)
580 				lkp->lk_unlock_addr = RETURN_ADDRESS;
581 #endif
582 			}
583 		} else if (lkp->lk_sharecount != 0) {
584 			lkp->lk_sharecount--;
585 			if (lkp->lk_sharecount == 0)
586 				lkp->lk_flags &= ~LK_SHARE_NONZERO;
587 			COUNT(lkp, l, cpu_num, -1);
588 		}
589 #ifdef DIAGNOSTIC
590 		else
591 			lockpanic(lkp, "lockmgr: release of unlocked lock!");
592 #endif
593 		WAKEUP_WAITER(lkp);
594 		break;
595 
596 	case LK_DRAIN:
597 		/*
598 		 * Check that we do not already hold the lock, as it can
599 		 * never drain if we do. Unfortunately, we have no way to
600 		 * check for holding a shared lock, but at least we can
601 		 * check for an exclusive one.
602 		 */
603 		if (WEHOLDIT(lkp, pid, lid, cpu_num))
604 			lockpanic(lkp, "lockmgr: draining against myself");
605 		/*
606 		 * If we are just polling, check to see if we will sleep.
607 		 */
608 		if ((extflags & LK_NOWAIT) && (lkp->lk_flags &
609 		     (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE |
610 		     LK_SHARE_NONZERO | LK_WAIT_NONZERO))) {
611 			error = EBUSY;
612 			break;
613 		}
614 		error = acquire(&lkp, &s, extflags, 1,
615 		    LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE |
616 		    LK_SHARE_NONZERO | LK_WAIT_NONZERO,
617 		    RETURN_ADDRESS);
618 		if (error)
619 			break;
620 		lkp->lk_flags |= LK_HAVE_EXCL;
621 		if ((extflags & LK_RESURRECT) == 0)
622 			lkp->lk_flags |= LK_DRAINING;
623 		SETHOLDER(lkp, pid, lid, cpu_num);
624 #if defined(LOCKDEBUG)
625 		lkp->lk_lock_addr = RETURN_ADDRESS;
626 #endif
627 		lkp->lk_exclusivecount = 1;
628 		/* XXX unlikely that we'd want this */
629 		if (extflags & LK_SETRECURSE)
630 			lkp->lk_recurselevel = 1;
631 		COUNT(lkp, l, cpu_num, 1);
632 		break;
633 
634 	default:
635 		simple_unlock(&lkp->lk_interlock);
636 		lockpanic(lkp, "lockmgr: unknown locktype request %d",
637 		    flags & LK_TYPE_MASK);
638 		/* NOTREACHED */
639 	}
640 	if ((lkp->lk_flags & LK_WAITDRAIN) != 0 &&
641 	    ((lkp->lk_flags &
642 	      (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE |
643 	      LK_SHARE_NONZERO | LK_WAIT_NONZERO)) == 0)) {
644 		lkp->lk_flags &= ~LK_WAITDRAIN;
645 		wakeup(&lkp->lk_flags);
646 	}
647 	/*
648 	 * Note that this panic will be a recursive panic, since
649 	 * we only set lock_shutdown_noblock above if panicstr != NULL.
650 	 */
651 	if (error && lock_shutdown_noblock)
652 		lockpanic(lkp, "lockmgr: deadlock (see previous panic)");
653 
654 	simple_unlock(&lkp->lk_interlock);
655 	return (error);
656 }
657 
658 /*
659  * Print out information about state of a lock. Used by VOP_PRINT
660  * routines to display ststus about contained locks.
661  */
662 void
663 lockmgr_printinfo(struct lock *lkp)
664 {
665 
666 	if (lkp->lk_sharecount)
667 		printf(" lock type %s: SHARED (count %d)", lkp->lk_wmesg,
668 		    lkp->lk_sharecount);
669 	else if (lkp->lk_flags & LK_HAVE_EXCL) {
670 		printf(" lock type %s: EXCL (count %d) by ",
671 		    lkp->lk_wmesg, lkp->lk_exclusivecount);
672 		printf("pid %d.%d", lkp->lk_lockholder,
673 		    lkp->lk_locklwp);
674 	} else
675 		printf(" not locked");
676 	if (lkp->lk_waitcount > 0)
677 		printf(" with %d pending", lkp->lk_waitcount);
678 }
679 
680 #if defined(LOCKDEBUG)
681 void
682 assert_sleepable(struct simplelock *interlock, const char *msg)
683 {
684 
685 	if (panicstr != NULL)
686 		return;
687 	LOCKDEBUG_BARRIER(&kernel_lock, 1);
688 	if (CURCPU_IDLE_P() && !cold) {
689 		panic("assert_sleepable: idle");
690 	}
691 }
692 #endif
693 
694 /*
695  * rump doesn't need the kernel lock so force it out.  We cannot
696  * currently easily include it for compilation because of
697  * a) SPINLOCK_* b) mb_write().  They are defined in different
698  * places / way for each arch, so just simply do not bother to
699  * fight a lot for no gain (i.e. pain but still no gain).
700  */
701 #ifndef _RUMPKERNEL
702 /*
703  * Functions for manipulating the kernel_lock.  We put them here
704  * so that they show up in profiles.
705  */
706 
707 #define	_KERNEL_LOCK_ABORT(msg)						\
708     LOCKDEBUG_ABORT(&kernel_lock, &_kernel_lock_ops, __func__, msg)
709 
710 #ifdef LOCKDEBUG
711 #define	_KERNEL_LOCK_ASSERT(cond)					\
712 do {									\
713 	if (!(cond))							\
714 		_KERNEL_LOCK_ABORT("assertion failed: " #cond);		\
715 } while (/* CONSTCOND */ 0)
716 #else
717 #define	_KERNEL_LOCK_ASSERT(cond)	/* nothing */
718 #endif
719 
720 void	_kernel_lock_dump(volatile void *);
721 
722 lockops_t _kernel_lock_ops = {
723 	"Kernel lock",
724 	0,
725 	_kernel_lock_dump
726 };
727 
728 /*
729  * Initialize the kernel lock.
730  */
731 void
732 kernel_lock_init(void)
733 {
734 
735 	__cpu_simple_lock_init(&kernel_lock);
736 	kernel_lock_dodebug = LOCKDEBUG_ALLOC(&kernel_lock, &_kernel_lock_ops,
737 	    RETURN_ADDRESS);
738 }
739 
740 /*
741  * Print debugging information about the kernel lock.
742  */
743 void
744 _kernel_lock_dump(volatile void *junk)
745 {
746 	struct cpu_info *ci = curcpu();
747 
748 	(void)junk;
749 
750 	printf_nolog("curcpu holds : %18d wanted by: %#018lx\n",
751 	    ci->ci_biglock_count, (long)ci->ci_biglock_wanted);
752 }
753 
754 /*
755  * Acquire 'nlocks' holds on the kernel lock.  If 'l' is non-null, the
756  * acquisition is from process context.
757  */
758 void
759 _kernel_lock(int nlocks, struct lwp *l)
760 {
761 	struct cpu_info *ci = curcpu();
762 	LOCKSTAT_TIMER(spintime);
763 	LOCKSTAT_FLAG(lsflag);
764 	struct lwp *owant;
765 #ifdef LOCKDEBUG
766 	u_int spins;
767 #endif
768 	int s;
769 
770 	if (nlocks == 0)
771 		return;
772 	_KERNEL_LOCK_ASSERT(nlocks > 0);
773 
774 	l = curlwp;
775 
776 	if (ci->ci_biglock_count != 0) {
777 		_KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(&kernel_lock));
778 		ci->ci_biglock_count += nlocks;
779 		l->l_blcnt += nlocks;
780 		return;
781 	}
782 
783 	_KERNEL_LOCK_ASSERT(l->l_blcnt == 0);
784 	LOCKDEBUG_WANTLOCK(kernel_lock_dodebug, &kernel_lock, RETURN_ADDRESS,
785 	    0);
786 
787 	s = splvm();
788 	if (__cpu_simple_lock_try(&kernel_lock)) {
789 		ci->ci_biglock_count = nlocks;
790 		l->l_blcnt = nlocks;
791 		LOCKDEBUG_LOCKED(kernel_lock_dodebug, &kernel_lock,
792 		    RETURN_ADDRESS, 0);
793 		splx(s);
794 		return;
795 	}
796 
797 	LOCKSTAT_ENTER(lsflag);
798 	LOCKSTAT_START_TIMER(lsflag, spintime);
799 
800 	/*
801 	 * Before setting ci_biglock_wanted we must post a store
802 	 * fence (see kern_mutex.c).  This is accomplished by the
803 	 * __cpu_simple_lock_try() above.
804 	 */
805 	owant = ci->ci_biglock_wanted;
806 	ci->ci_biglock_wanted = curlwp;	/* XXXAD */
807 
808 #ifdef LOCKDEBUG
809 	spins = 0;
810 #endif
811 
812 	do {
813 		splx(s);
814 		while (__SIMPLELOCK_LOCKED_P(&kernel_lock)) {
815 #ifdef LOCKDEBUG
816 			if (SPINLOCK_SPINOUT(spins))
817 				_KERNEL_LOCK_ABORT("spinout");
818 #endif
819 			SPINLOCK_BACKOFF_HOOK;
820 			SPINLOCK_SPIN_HOOK;
821 		}
822 		(void)splvm();
823 	} while (!__cpu_simple_lock_try(&kernel_lock));
824 
825 	ci->ci_biglock_wanted = owant;
826 	ci->ci_biglock_count = nlocks;
827 	l->l_blcnt = nlocks;
828 	LOCKSTAT_STOP_TIMER(lsflag, spintime);
829 	LOCKDEBUG_LOCKED(kernel_lock_dodebug, &kernel_lock, RETURN_ADDRESS, 0);
830 	splx(s);
831 
832 	/*
833 	 * Again, another store fence is required (see kern_mutex.c).
834 	 */
835 	mb_write();
836 	if (owant == NULL) {
837 		LOCKSTAT_EVENT(lsflag, &kernel_lock, LB_KERNEL_LOCK | LB_SPIN,
838 		    1, spintime);
839 	}
840 	LOCKSTAT_EXIT(lsflag);
841 }
842 
843 /*
844  * Release 'nlocks' holds on the kernel lock.  If 'nlocks' is zero, release
845  * all holds.  If 'l' is non-null, the release is from process context.
846  */
847 void
848 _kernel_unlock(int nlocks, struct lwp *l, int *countp)
849 {
850 	struct cpu_info *ci = curcpu();
851 	u_int olocks;
852 	int s;
853 
854 	l = curlwp;
855 
856 	_KERNEL_LOCK_ASSERT(nlocks < 2);
857 
858 	olocks = l->l_blcnt;
859 
860 	if (olocks == 0) {
861 		_KERNEL_LOCK_ASSERT(nlocks <= 0);
862 		if (countp != NULL)
863 			*countp = 0;
864 		return;
865 	}
866 
867 	_KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(&kernel_lock));
868 
869 	if (nlocks == 0)
870 		nlocks = olocks;
871 	else if (nlocks == -1) {
872 		nlocks = 1;
873 		_KERNEL_LOCK_ASSERT(olocks == 1);
874 	}
875 
876 	_KERNEL_LOCK_ASSERT(ci->ci_biglock_count >= l->l_blcnt);
877 
878 	l->l_blcnt -= nlocks;
879 	if (ci->ci_biglock_count == nlocks) {
880 		s = splvm();
881 		LOCKDEBUG_UNLOCKED(kernel_lock_dodebug, &kernel_lock,
882 		    RETURN_ADDRESS, 0);
883 		ci->ci_biglock_count = 0;
884 		__cpu_simple_unlock(&kernel_lock);
885 		splx(s);
886 	} else
887 		ci->ci_biglock_count -= nlocks;
888 
889 	if (countp != NULL)
890 		*countp = olocks;
891 }
892 #endif /* !_RUMPKERNEL */
893