xref: /netbsd-src/sys/kern/kern_lock.c (revision 8b0f9554ff8762542c4defc4f70e1eb76fb508fa)
1 /*	$NetBSD: kern_lock.c,v 1.129 2007/12/06 17:05:08 ad Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999, 2000, 2006, 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center, and by Andrew Doran.
10  *
11  * This code is derived from software contributed to The NetBSD Foundation
12  * by Ross Harvey.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *	This product includes software developed by the NetBSD
25  *	Foundation, Inc. and its contributors.
26  * 4. Neither the name of The NetBSD Foundation nor the names of its
27  *    contributors may be used to endorse or promote products derived
28  *    from this software without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
31  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
32  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
33  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
34  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
37  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
38  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40  * POSSIBILITY OF SUCH DAMAGE.
41  */
42 
43 /*
44  * Copyright (c) 1995
45  *	The Regents of the University of California.  All rights reserved.
46  *
47  * This code contains ideas from software contributed to Berkeley by
48  * Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating
49  * System project at Carnegie-Mellon University.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions
53  * are met:
54  * 1. Redistributions of source code must retain the above copyright
55  *    notice, this list of conditions and the following disclaimer.
56  * 2. Redistributions in binary form must reproduce the above copyright
57  *    notice, this list of conditions and the following disclaimer in the
58  *    documentation and/or other materials provided with the distribution.
59  * 3. Neither the name of the University nor the names of its contributors
60  *    may be used to endorse or promote products derived from this software
61  *    without specific prior written permission.
62  *
63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73  * SUCH DAMAGE.
74  *
75  *	@(#)kern_lock.c	8.18 (Berkeley) 5/21/95
76  */
77 
78 #include <sys/cdefs.h>
79 __KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.129 2007/12/06 17:05:08 ad Exp $");
80 
81 #include "opt_multiprocessor.h"
82 
83 #include <sys/param.h>
84 #include <sys/proc.h>
85 #include <sys/lock.h>
86 #include <sys/systm.h>
87 #include <sys/kernel.h>
88 #include <sys/lockdebug.h>
89 #include <sys/cpu.h>
90 #include <sys/syslog.h>
91 #include <sys/atomic.h>
92 
93 #include <machine/stdarg.h>
94 
95 #include <dev/lockstat.h>
96 
97 /*
98  * note that stdarg.h and the ansi style va_start macro is used for both
99  * ansi and traditional c compiles.
100  * XXX: this requires that stdarg.h define: va_alist and va_dcl
101  */
102 void	lock_printf(const char *fmt, ...)
103     __attribute__((__format__(__printf__,1,2)));
104 
105 static int acquire(struct lock **, int *, int, int, int, uintptr_t);
106 
107 int	lock_debug_syslog = 0;	/* defaults to printf, but can be patched */
108 bool	kernel_lock_dodebug;
109 __cpu_simple_lock_t kernel_lock;
110 
111 #if defined(LOCKDEBUG) || defined(DIAGNOSTIC) /* { */
112 #define	COUNT(lkp, l, cpu_id, x)	(l)->l_locks += (x)
113 #else
114 #define COUNT(lkp, p, cpu_id, x)
115 #endif /* LOCKDEBUG || DIAGNOSTIC */ /* } */
116 
117 #define	RETURN_ADDRESS		((uintptr_t)__builtin_return_address(0))
118 
119 /*
120  * Acquire a resource.
121  */
122 static int
123 acquire(struct lock **lkpp, int *s, int extflags,
124 	int drain, int wanted, uintptr_t ra)
125 {
126 	int error;
127 	struct lock *lkp = *lkpp;
128 	LOCKSTAT_TIMER(slptime);
129 	LOCKSTAT_FLAG(lsflag);
130 
131 	KASSERT(drain || (wanted & LK_WAIT_NONZERO) == 0);
132 
133 	LOCKSTAT_ENTER(lsflag);
134 
135 	for (error = 0; (lkp->lk_flags & wanted) != 0; ) {
136 		if (drain)
137 			lkp->lk_flags |= LK_WAITDRAIN;
138 		else {
139 			lkp->lk_waitcount++;
140 			lkp->lk_flags |= LK_WAIT_NONZERO;
141 		}
142 		LOCKSTAT_START_TIMER(lsflag, slptime);
143 		error = ltsleep(drain ? (void *)&lkp->lk_flags : (void *)lkp,
144 		    lkp->lk_prio, lkp->lk_wmesg, lkp->lk_timo,
145 		    &lkp->lk_interlock);
146 		LOCKSTAT_STOP_TIMER(lsflag, slptime);
147 		LOCKSTAT_EVENT_RA(lsflag, (void *)(uintptr_t)lkp,
148 		    LB_LOCKMGR | LB_SLEEP1, 1, slptime, ra);
149 		if (!drain) {
150 			lkp->lk_waitcount--;
151 			if (lkp->lk_waitcount == 0)
152 				lkp->lk_flags &= ~LK_WAIT_NONZERO;
153 		}
154 		if (error)
155 			break;
156 		if (extflags & LK_SLEEPFAIL) {
157 			error = ENOLCK;
158 			break;
159 		}
160 	}
161 
162 	LOCKSTAT_EXIT(lsflag);
163 
164 	return error;
165 }
166 
167 #define	SETHOLDER(lkp, pid, lid, cpu_id)				\
168 do {									\
169 	(lkp)->lk_lockholder = pid;					\
170 	(lkp)->lk_locklwp = lid;					\
171 } while (/*CONSTCOND*/0)
172 
173 #define	WEHOLDIT(lkp, pid, lid, cpu_id)					\
174 	 ((lkp)->lk_lockholder == (pid) && (lkp)->lk_locklwp == (lid))
175 
176 #define	WAKEUP_WAITER(lkp)						\
177 do {									\
178 	if (((lkp)->lk_flags & LK_WAIT_NONZERO) != 0) {			\
179 		wakeup((lkp));						\
180 	}								\
181 } while (/*CONSTCOND*/0)
182 
183 #if defined(LOCKDEBUG)
184 /*
185  * Lock debug printing routine; can be configured to print to console
186  * or log to syslog.
187  */
188 void
189 lock_printf(const char *fmt, ...)
190 {
191 	char b[150];
192 	va_list ap;
193 
194 	va_start(ap, fmt);
195 	if (lock_debug_syslog)
196 		vlog(LOG_DEBUG, fmt, ap);
197 	else {
198 		vsnprintf(b, sizeof(b), fmt, ap);
199 		printf_nolog("%s", b);
200 	}
201 	va_end(ap);
202 }
203 #endif /* LOCKDEBUG */
204 
205 static void
206 lockpanic(struct lock *lkp, const char *fmt, ...)
207 {
208 	char s[150], b[150];
209 	static const char *locktype[] = {
210 	    "*0*", "shared", "exclusive", "*3*", "*4*", "downgrade",
211 	    "*release*", "drain", "exclother", "*9*", "*10*",
212 	    "*11*", "*12*", "*13*", "*14*", "*15*"
213 	};
214 	va_list ap;
215 	va_start(ap, fmt);
216 	vsnprintf(s, sizeof(s), fmt, ap);
217 	va_end(ap);
218 	bitmask_snprintf(lkp->lk_flags, __LK_FLAG_BITS, b, sizeof(b));
219 	panic("%s ("
220 	    "type %s flags %s, sharecount %d, exclusivecount %d, "
221 	    "recurselevel %d, waitcount %d, wmesg %s"
222 	    ", lock_addr %p, unlock_addr %p"
223 	    ")\n",
224 	    s, locktype[lkp->lk_flags & LK_TYPE_MASK],
225 	    b, lkp->lk_sharecount, lkp->lk_exclusivecount,
226 	    lkp->lk_recurselevel, lkp->lk_waitcount, lkp->lk_wmesg,
227 	    (void *)lkp->lk_lock_addr, (void *)lkp->lk_unlock_addr
228 	);
229 }
230 
231 /*
232  * Initialize a lock; required before use.
233  */
234 void
235 lockinit(struct lock *lkp, pri_t prio, const char *wmesg, int timo, int flags)
236 {
237 
238 	memset(lkp, 0, sizeof(struct lock));
239 	lkp->lk_flags = flags & LK_EXTFLG_MASK;
240 	simple_lock_init(&lkp->lk_interlock);
241 	lkp->lk_lockholder = LK_NOPROC;
242 	lkp->lk_prio = prio;
243 	lkp->lk_timo = timo;
244 	lkp->lk_wmesg = wmesg;
245 	lkp->lk_lock_addr = 0;
246 	lkp->lk_unlock_addr = 0;
247 }
248 
249 void
250 lockdestroy(struct lock *lkp)
251 {
252 
253 	/* nothing yet */
254 }
255 
256 /*
257  * Determine the status of a lock.
258  */
259 int
260 lockstatus(struct lock *lkp)
261 {
262 	int lock_type = 0;
263 	struct lwp *l = curlwp; /* XXX */
264 	pid_t pid;
265 	lwpid_t lid;
266 	cpuid_t cpu_num;
267 
268 	if (l == NULL) {
269 		cpu_num = cpu_number();
270 		pid = LK_KERNPROC;
271 		lid = 0;
272 	} else {
273 		cpu_num = LK_NOCPU;
274 		pid = l->l_proc->p_pid;
275 		lid = l->l_lid;
276 	}
277 
278 	simple_lock(&lkp->lk_interlock);
279 	if (lkp->lk_exclusivecount != 0) {
280 		if (WEHOLDIT(lkp, pid, lid, cpu_num))
281 			lock_type = LK_EXCLUSIVE;
282 		else
283 			lock_type = LK_EXCLOTHER;
284 	} else if (lkp->lk_sharecount != 0)
285 		lock_type = LK_SHARED;
286 	else if (lkp->lk_flags & LK_WANT_EXCL)
287 		lock_type = LK_EXCLOTHER;
288 	simple_unlock(&lkp->lk_interlock);
289 	return (lock_type);
290 }
291 
292 /*
293  * XXX XXX kludge around another kludge..
294  *
295  * vfs_shutdown() may be called from interrupt context, either as a result
296  * of a panic, or from the debugger.   It proceeds to call
297  * sys_sync(&proc0, ...), pretending its running on behalf of proc0
298  *
299  * We would like to make an attempt to sync the filesystems in this case, so
300  * if this happens, we treat attempts to acquire locks specially.
301  * All locks are acquired on behalf of proc0.
302  *
303  * If we've already paniced, we don't block waiting for locks, but
304  * just barge right ahead since we're already going down in flames.
305  */
306 
307 /*
308  * Set, change, or release a lock.
309  *
310  * Shared requests increment the shared count. Exclusive requests set the
311  * LK_WANT_EXCL flag (preventing further shared locks), and wait for already
312  * accepted shared locks to go away.
313  */
314 int
315 lockmgr(struct lock *lkp, u_int flags, struct simplelock *interlkp)
316 {
317 	int error;
318 	pid_t pid;
319 	lwpid_t lid;
320 	int extflags;
321 	cpuid_t cpu_num;
322 	struct lwp *l = curlwp;
323 	int lock_shutdown_noblock = 0;
324 	int s = 0;
325 
326 	error = 0;
327 
328 	/* LK_RETRY is for vn_lock, not for lockmgr. */
329 	KASSERT((flags & LK_RETRY) == 0);
330 	KASSERT((l->l_pflag & LP_INTR) == 0 || panicstr != NULL);
331 
332 	simple_lock(&lkp->lk_interlock);
333 	if (flags & LK_INTERLOCK)
334 		simple_unlock(interlkp);
335 	extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
336 
337 	if (l == NULL) {
338 		if (!doing_shutdown) {
339 			panic("lockmgr: no context");
340 		} else {
341 			l = &lwp0;
342 			if (panicstr && (!(flags & LK_NOWAIT))) {
343 				flags |= LK_NOWAIT;
344 				lock_shutdown_noblock = 1;
345 			}
346 		}
347 	}
348 	lid = l->l_lid;
349 	pid = l->l_proc->p_pid;
350 	cpu_num = cpu_number();
351 
352 	/*
353 	 * Once a lock has drained, the LK_DRAINING flag is set and an
354 	 * exclusive lock is returned. The only valid operation thereafter
355 	 * is a single release of that exclusive lock. This final release
356 	 * clears the LK_DRAINING flag and sets the LK_DRAINED flag. Any
357 	 * further requests of any sort will result in a panic. The bits
358 	 * selected for these two flags are chosen so that they will be set
359 	 * in memory that is freed (freed memory is filled with 0xdeadbeef).
360 	 * The final release is permitted to give a new lease on life to
361 	 * the lock by specifying LK_REENABLE.
362 	 */
363 	if (lkp->lk_flags & (LK_DRAINING|LK_DRAINED)) {
364 #ifdef DIAGNOSTIC /* { */
365 		if (lkp->lk_flags & LK_DRAINED)
366 			lockpanic(lkp, "lockmgr: using decommissioned lock");
367 		if ((flags & LK_TYPE_MASK) != LK_RELEASE ||
368 		    WEHOLDIT(lkp, pid, lid, cpu_num) == 0)
369 			lockpanic(lkp, "lockmgr: non-release on draining lock: %d",
370 			    flags & LK_TYPE_MASK);
371 #endif /* DIAGNOSTIC */ /* } */
372 		lkp->lk_flags &= ~LK_DRAINING;
373 		if ((flags & LK_REENABLE) == 0)
374 			lkp->lk_flags |= LK_DRAINED;
375 	}
376 
377 	switch (flags & LK_TYPE_MASK) {
378 
379 	case LK_SHARED:
380 		if (WEHOLDIT(lkp, pid, lid, cpu_num) == 0) {
381 			/*
382 			 * If just polling, check to see if we will block.
383 			 */
384 			if ((extflags & LK_NOWAIT) && (lkp->lk_flags &
385 			    (LK_HAVE_EXCL | LK_WANT_EXCL))) {
386 				error = EBUSY;
387 				break;
388 			}
389 			/*
390 			 * Wait for exclusive locks to clear.
391 			 */
392 			error = acquire(&lkp, &s, extflags, 0,
393 			    LK_HAVE_EXCL | LK_WANT_EXCL,
394 			    RETURN_ADDRESS);
395 			if (error)
396 				break;
397 			lkp->lk_sharecount++;
398 			lkp->lk_flags |= LK_SHARE_NONZERO;
399 			COUNT(lkp, l, cpu_num, 1);
400 			break;
401 		}
402 		/*
403 		 * We hold an exclusive lock, so downgrade it to shared.
404 		 * An alternative would be to fail with EDEADLK.
405 		 */
406 		lkp->lk_sharecount++;
407 		lkp->lk_flags |= LK_SHARE_NONZERO;
408 		COUNT(lkp, l, cpu_num, 1);
409 		/* fall into downgrade */
410 
411 	case LK_DOWNGRADE:
412 		if (WEHOLDIT(lkp, pid, lid, cpu_num) == 0 ||
413 		    lkp->lk_exclusivecount == 0)
414 			lockpanic(lkp, "lockmgr: not holding exclusive lock");
415 		lkp->lk_sharecount += lkp->lk_exclusivecount;
416 		lkp->lk_flags |= LK_SHARE_NONZERO;
417 		lkp->lk_exclusivecount = 0;
418 		lkp->lk_recurselevel = 0;
419 		lkp->lk_flags &= ~LK_HAVE_EXCL;
420 		SETHOLDER(lkp, LK_NOPROC, 0, LK_NOCPU);
421 #if defined(LOCKDEBUG)
422 		lkp->lk_unlock_addr = RETURN_ADDRESS;
423 #endif
424 		WAKEUP_WAITER(lkp);
425 		break;
426 
427 	case LK_EXCLUSIVE:
428 		if (WEHOLDIT(lkp, pid, lid, cpu_num)) {
429 			/*
430 			 * Recursive lock.
431 			 */
432 			if ((extflags & LK_CANRECURSE) == 0 &&
433 			     lkp->lk_recurselevel == 0) {
434 				if (extflags & LK_RECURSEFAIL) {
435 					error = EDEADLK;
436 					break;
437 				} else
438 					lockpanic(lkp, "lockmgr: locking against myself");
439 			}
440 			lkp->lk_exclusivecount++;
441 			if (extflags & LK_SETRECURSE &&
442 			    lkp->lk_recurselevel == 0)
443 				lkp->lk_recurselevel = lkp->lk_exclusivecount;
444 			COUNT(lkp, l, cpu_num, 1);
445 			break;
446 		}
447 		/*
448 		 * If we are just polling, check to see if we will sleep.
449 		 */
450 		if ((extflags & LK_NOWAIT) && (lkp->lk_flags &
451 		     (LK_HAVE_EXCL | LK_WANT_EXCL | LK_SHARE_NONZERO))) {
452 			error = EBUSY;
453 			break;
454 		}
455 		/*
456 		 * Try to acquire the want_exclusive flag.
457 		 */
458 		error = acquire(&lkp, &s, extflags, 0,
459 		    LK_HAVE_EXCL | LK_WANT_EXCL, RETURN_ADDRESS);
460 		if (error)
461 			break;
462 		lkp->lk_flags |= LK_WANT_EXCL;
463 		/*
464 		 * Wait for shared locks to finish.
465 		 */
466 		error = acquire(&lkp, &s, extflags, 0,
467 		    LK_HAVE_EXCL | LK_SHARE_NONZERO,
468 		    RETURN_ADDRESS);
469 		lkp->lk_flags &= ~LK_WANT_EXCL;
470 		if (error) {
471 			WAKEUP_WAITER(lkp);
472 			break;
473 		}
474 		lkp->lk_flags |= LK_HAVE_EXCL;
475 		SETHOLDER(lkp, pid, lid, cpu_num);
476 #if defined(LOCKDEBUG)
477 		lkp->lk_lock_addr = RETURN_ADDRESS;
478 #endif
479 		if (lkp->lk_exclusivecount != 0)
480 			lockpanic(lkp, "lockmgr: non-zero exclusive count");
481 		lkp->lk_exclusivecount = 1;
482 		if (extflags & LK_SETRECURSE)
483 			lkp->lk_recurselevel = 1;
484 		COUNT(lkp, l, cpu_num, 1);
485 		break;
486 
487 	case LK_RELEASE:
488 		if (lkp->lk_exclusivecount != 0) {
489 			if (WEHOLDIT(lkp, pid, lid, cpu_num) == 0) {
490 				lockpanic(lkp, "lockmgr: pid %d.%d, not "
491 				    "exclusive lock holder %d.%d "
492 				    "unlocking", pid, lid,
493 				    lkp->lk_lockholder,
494 				    lkp->lk_locklwp);
495 			}
496 			if (lkp->lk_exclusivecount == lkp->lk_recurselevel)
497 				lkp->lk_recurselevel = 0;
498 			lkp->lk_exclusivecount--;
499 			COUNT(lkp, l, cpu_num, -1);
500 			if (lkp->lk_exclusivecount == 0) {
501 				lkp->lk_flags &= ~LK_HAVE_EXCL;
502 				SETHOLDER(lkp, LK_NOPROC, 0, LK_NOCPU);
503 #if defined(LOCKDEBUG)
504 				lkp->lk_unlock_addr = RETURN_ADDRESS;
505 #endif
506 			}
507 		} else if (lkp->lk_sharecount != 0) {
508 			lkp->lk_sharecount--;
509 			if (lkp->lk_sharecount == 0)
510 				lkp->lk_flags &= ~LK_SHARE_NONZERO;
511 			COUNT(lkp, l, cpu_num, -1);
512 		}
513 #ifdef DIAGNOSTIC
514 		else
515 			lockpanic(lkp, "lockmgr: release of unlocked lock!");
516 #endif
517 		WAKEUP_WAITER(lkp);
518 		break;
519 
520 	case LK_DRAIN:
521 		/*
522 		 * Check that we do not already hold the lock, as it can
523 		 * never drain if we do. Unfortunately, we have no way to
524 		 * check for holding a shared lock, but at least we can
525 		 * check for an exclusive one.
526 		 */
527 		if (WEHOLDIT(lkp, pid, lid, cpu_num))
528 			lockpanic(lkp, "lockmgr: draining against myself");
529 		/*
530 		 * If we are just polling, check to see if we will sleep.
531 		 */
532 		if ((extflags & LK_NOWAIT) && (lkp->lk_flags &
533 		     (LK_HAVE_EXCL | LK_WANT_EXCL |
534 		     LK_SHARE_NONZERO | LK_WAIT_NONZERO))) {
535 			error = EBUSY;
536 			break;
537 		}
538 		error = acquire(&lkp, &s, extflags, 1,
539 		    LK_HAVE_EXCL | LK_WANT_EXCL |
540 		    LK_SHARE_NONZERO | LK_WAIT_NONZERO,
541 		    RETURN_ADDRESS);
542 		if (error)
543 			break;
544 		lkp->lk_flags |= LK_HAVE_EXCL;
545 		if ((extflags & LK_RESURRECT) == 0)
546 			lkp->lk_flags |= LK_DRAINING;
547 		SETHOLDER(lkp, pid, lid, cpu_num);
548 #if defined(LOCKDEBUG)
549 		lkp->lk_lock_addr = RETURN_ADDRESS;
550 #endif
551 		lkp->lk_exclusivecount = 1;
552 		/* XXX unlikely that we'd want this */
553 		if (extflags & LK_SETRECURSE)
554 			lkp->lk_recurselevel = 1;
555 		COUNT(lkp, l, cpu_num, 1);
556 		break;
557 
558 	default:
559 		simple_unlock(&lkp->lk_interlock);
560 		lockpanic(lkp, "lockmgr: unknown locktype request %d",
561 		    flags & LK_TYPE_MASK);
562 		/* NOTREACHED */
563 	}
564 	if ((lkp->lk_flags & LK_WAITDRAIN) != 0 &&
565 	    ((lkp->lk_flags &
566 	      (LK_HAVE_EXCL | LK_WANT_EXCL |
567 	      LK_SHARE_NONZERO | LK_WAIT_NONZERO)) == 0)) {
568 		lkp->lk_flags &= ~LK_WAITDRAIN;
569 		wakeup(&lkp->lk_flags);
570 	}
571 	/*
572 	 * Note that this panic will be a recursive panic, since
573 	 * we only set lock_shutdown_noblock above if panicstr != NULL.
574 	 */
575 	if (error && lock_shutdown_noblock)
576 		lockpanic(lkp, "lockmgr: deadlock (see previous panic)");
577 
578 	simple_unlock(&lkp->lk_interlock);
579 	return (error);
580 }
581 
582 /*
583  * Print out information about state of a lock. Used by VOP_PRINT
584  * routines to display ststus about contained locks.
585  */
586 void
587 lockmgr_printinfo(struct lock *lkp)
588 {
589 
590 	if (lkp->lk_sharecount)
591 		printf(" lock type %s: SHARED (count %d)", lkp->lk_wmesg,
592 		    lkp->lk_sharecount);
593 	else if (lkp->lk_flags & LK_HAVE_EXCL) {
594 		printf(" lock type %s: EXCL (count %d) by ",
595 		    lkp->lk_wmesg, lkp->lk_exclusivecount);
596 		printf("pid %d.%d", lkp->lk_lockholder,
597 		    lkp->lk_locklwp);
598 	} else
599 		printf(" not locked");
600 	if (lkp->lk_waitcount > 0)
601 		printf(" with %d pending", lkp->lk_waitcount);
602 }
603 
604 #if defined(LOCKDEBUG)
605 void
606 assert_sleepable(struct simplelock *interlock, const char *msg)
607 {
608 
609 	if (panicstr != NULL)
610 		return;
611 	LOCKDEBUG_BARRIER(&kernel_lock, 1);
612 	if (CURCPU_IDLE_P() && !cold) {
613 		panic("assert_sleepable: idle");
614 	}
615 }
616 #endif
617 
618 /*
619  * rump doesn't need the kernel lock so force it out.  We cannot
620  * currently easily include it for compilation because of
621  * a) SPINLOCK_* b) membar_producer().  They are defined in different
622  * places / way for each arch, so just simply do not bother to
623  * fight a lot for no gain (i.e. pain but still no gain).
624  */
625 #ifndef _RUMPKERNEL
626 /*
627  * Functions for manipulating the kernel_lock.  We put them here
628  * so that they show up in profiles.
629  */
630 
631 #define	_KERNEL_LOCK_ABORT(msg)						\
632     LOCKDEBUG_ABORT(&kernel_lock, &_kernel_lock_ops, __func__, msg)
633 
634 #ifdef LOCKDEBUG
635 #define	_KERNEL_LOCK_ASSERT(cond)					\
636 do {									\
637 	if (!(cond))							\
638 		_KERNEL_LOCK_ABORT("assertion failed: " #cond);		\
639 } while (/* CONSTCOND */ 0)
640 #else
641 #define	_KERNEL_LOCK_ASSERT(cond)	/* nothing */
642 #endif
643 
644 void	_kernel_lock_dump(volatile void *);
645 
646 lockops_t _kernel_lock_ops = {
647 	"Kernel lock",
648 	0,
649 	_kernel_lock_dump
650 };
651 
652 /*
653  * Initialize the kernel lock.
654  */
655 void
656 kernel_lock_init(void)
657 {
658 
659 	__cpu_simple_lock_init(&kernel_lock);
660 	kernel_lock_dodebug = LOCKDEBUG_ALLOC(&kernel_lock, &_kernel_lock_ops,
661 	    RETURN_ADDRESS);
662 }
663 
664 /*
665  * Print debugging information about the kernel lock.
666  */
667 void
668 _kernel_lock_dump(volatile void *junk)
669 {
670 	struct cpu_info *ci = curcpu();
671 
672 	(void)junk;
673 
674 	printf_nolog("curcpu holds : %18d wanted by: %#018lx\n",
675 	    ci->ci_biglock_count, (long)ci->ci_biglock_wanted);
676 }
677 
678 /*
679  * Acquire 'nlocks' holds on the kernel lock.  If 'l' is non-null, the
680  * acquisition is from process context.
681  */
682 void
683 _kernel_lock(int nlocks, struct lwp *l)
684 {
685 	struct cpu_info *ci = curcpu();
686 	LOCKSTAT_TIMER(spintime);
687 	LOCKSTAT_FLAG(lsflag);
688 	struct lwp *owant;
689 #ifdef LOCKDEBUG
690 	u_int spins;
691 #endif
692 	int s;
693 
694 	if (nlocks == 0)
695 		return;
696 	_KERNEL_LOCK_ASSERT(nlocks > 0);
697 
698 	l = curlwp;
699 
700 	if (ci->ci_biglock_count != 0) {
701 		_KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(&kernel_lock));
702 		ci->ci_biglock_count += nlocks;
703 		l->l_blcnt += nlocks;
704 		return;
705 	}
706 
707 	_KERNEL_LOCK_ASSERT(l->l_blcnt == 0);
708 	LOCKDEBUG_WANTLOCK(kernel_lock_dodebug, &kernel_lock, RETURN_ADDRESS,
709 	    0);
710 
711 	s = splvm();
712 	if (__cpu_simple_lock_try(&kernel_lock)) {
713 		ci->ci_biglock_count = nlocks;
714 		l->l_blcnt = nlocks;
715 		LOCKDEBUG_LOCKED(kernel_lock_dodebug, &kernel_lock,
716 		    RETURN_ADDRESS, 0);
717 		splx(s);
718 		return;
719 	}
720 
721 	LOCKSTAT_ENTER(lsflag);
722 	LOCKSTAT_START_TIMER(lsflag, spintime);
723 
724 	/*
725 	 * Before setting ci_biglock_wanted we must post a store
726 	 * fence (see kern_mutex.c).  This is accomplished by the
727 	 * __cpu_simple_lock_try() above.
728 	 */
729 	owant = ci->ci_biglock_wanted;
730 	ci->ci_biglock_wanted = curlwp;	/* XXXAD */
731 
732 #ifdef LOCKDEBUG
733 	spins = 0;
734 #endif
735 
736 	do {
737 		splx(s);
738 		while (__SIMPLELOCK_LOCKED_P(&kernel_lock)) {
739 #ifdef LOCKDEBUG
740 			if (SPINLOCK_SPINOUT(spins))
741 				_KERNEL_LOCK_ABORT("spinout");
742 #endif
743 			SPINLOCK_BACKOFF_HOOK;
744 			SPINLOCK_SPIN_HOOK;
745 		}
746 		(void)splvm();
747 	} while (!__cpu_simple_lock_try(&kernel_lock));
748 
749 	ci->ci_biglock_wanted = owant;
750 	ci->ci_biglock_count = nlocks;
751 	l->l_blcnt = nlocks;
752 	LOCKSTAT_STOP_TIMER(lsflag, spintime);
753 	LOCKDEBUG_LOCKED(kernel_lock_dodebug, &kernel_lock, RETURN_ADDRESS, 0);
754 	splx(s);
755 
756 	/*
757 	 * Again, another store fence is required (see kern_mutex.c).
758 	 */
759 	membar_producer();
760 	if (owant == NULL) {
761 		LOCKSTAT_EVENT(lsflag, &kernel_lock, LB_KERNEL_LOCK | LB_SPIN,
762 		    1, spintime);
763 	}
764 	LOCKSTAT_EXIT(lsflag);
765 }
766 
767 /*
768  * Release 'nlocks' holds on the kernel lock.  If 'nlocks' is zero, release
769  * all holds.  If 'l' is non-null, the release is from process context.
770  */
771 void
772 _kernel_unlock(int nlocks, struct lwp *l, int *countp)
773 {
774 	struct cpu_info *ci = curcpu();
775 	u_int olocks;
776 	int s;
777 
778 	l = curlwp;
779 
780 	_KERNEL_LOCK_ASSERT(nlocks < 2);
781 
782 	olocks = l->l_blcnt;
783 
784 	if (olocks == 0) {
785 		_KERNEL_LOCK_ASSERT(nlocks <= 0);
786 		if (countp != NULL)
787 			*countp = 0;
788 		return;
789 	}
790 
791 	_KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(&kernel_lock));
792 
793 	if (nlocks == 0)
794 		nlocks = olocks;
795 	else if (nlocks == -1) {
796 		nlocks = 1;
797 		_KERNEL_LOCK_ASSERT(olocks == 1);
798 	}
799 
800 	_KERNEL_LOCK_ASSERT(ci->ci_biglock_count >= l->l_blcnt);
801 
802 	l->l_blcnt -= nlocks;
803 	if (ci->ci_biglock_count == nlocks) {
804 		s = splvm();
805 		LOCKDEBUG_UNLOCKED(kernel_lock_dodebug, &kernel_lock,
806 		    RETURN_ADDRESS, 0);
807 		ci->ci_biglock_count = 0;
808 		__cpu_simple_unlock(&kernel_lock);
809 		splx(s);
810 	} else
811 		ci->ci_biglock_count -= nlocks;
812 
813 	if (countp != NULL)
814 		*countp = olocks;
815 }
816 #endif /* !_RUMPKERNEL */
817