xref: /netbsd-src/sys/kern/kern_subr.c (revision 23c8222edbfb0f0932d88a8351d3a0cf817dfb9e)
1 /*	$NetBSD: kern_subr.c,v 1.114 2004/10/24 17:06:24 cube Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998, 1999, 2002 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center, and by Luke Mewburn.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Copyright (c) 1982, 1986, 1991, 1993
42  *	The Regents of the University of California.  All rights reserved.
43  * (c) UNIX System Laboratories, Inc.
44  * All or some portions of this file are derived from material licensed
45  * to the University of California by American Telephone and Telegraph
46  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47  * the permission of UNIX System Laboratories, Inc.
48  *
49  * Copyright (c) 1992, 1993
50  *	The Regents of the University of California.  All rights reserved.
51  *
52  * This software was developed by the Computer Systems Engineering group
53  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
54  * contributed to Berkeley.
55  *
56  * All advertising materials mentioning features or use of this software
57  * must display the following acknowledgement:
58  *	This product includes software developed by the University of
59  *	California, Lawrence Berkeley Laboratory.
60  *
61  * Redistribution and use in source and binary forms, with or without
62  * modification, are permitted provided that the following conditions
63  * are met:
64  * 1. Redistributions of source code must retain the above copyright
65  *    notice, this list of conditions and the following disclaimer.
66  * 2. Redistributions in binary form must reproduce the above copyright
67  *    notice, this list of conditions and the following disclaimer in the
68  *    documentation and/or other materials provided with the distribution.
69  * 3. Neither the name of the University nor the names of its contributors
70  *    may be used to endorse or promote products derived from this software
71  *    without specific prior written permission.
72  *
73  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
74  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
75  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
76  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
77  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
78  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
79  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
80  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
81  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
82  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
83  * SUCH DAMAGE.
84  *
85  *	@(#)kern_subr.c	8.4 (Berkeley) 2/14/95
86  */
87 
88 #include <sys/cdefs.h>
89 __KERNEL_RCSID(0, "$NetBSD: kern_subr.c,v 1.114 2004/10/24 17:06:24 cube Exp $");
90 
91 #include "opt_ddb.h"
92 #include "opt_md.h"
93 #include "opt_syscall_debug.h"
94 #include "opt_ktrace.h"
95 #include "opt_systrace.h"
96 
97 #include <sys/param.h>
98 #include <sys/systm.h>
99 #include <sys/proc.h>
100 #include <sys/malloc.h>
101 #include <sys/mount.h>
102 #include <sys/device.h>
103 #include <sys/reboot.h>
104 #include <sys/conf.h>
105 #include <sys/disklabel.h>
106 #include <sys/queue.h>
107 #include <sys/systrace.h>
108 #include <sys/ktrace.h>
109 
110 #include <uvm/uvm_extern.h>
111 
112 #include <dev/cons.h>
113 
114 #include <net/if.h>
115 
116 /* XXX these should eventually move to subr_autoconf.c */
117 static struct device *finddevice(const char *);
118 static struct device *getdisk(char *, int, int, dev_t *, int);
119 static struct device *parsedisk(char *, int, int, dev_t *);
120 
121 /*
122  * A generic linear hook.
123  */
124 struct hook_desc {
125 	LIST_ENTRY(hook_desc) hk_list;
126 	void	(*hk_fn)(void *);
127 	void	*hk_arg;
128 };
129 typedef LIST_HEAD(, hook_desc) hook_list_t;
130 
131 static void *hook_establish(hook_list_t *, void (*)(void *), void *);
132 static void hook_disestablish(hook_list_t *, void *);
133 static void hook_destroy(hook_list_t *);
134 static void hook_proc_run(hook_list_t *, struct proc *);
135 
136 MALLOC_DEFINE(M_IOV, "iov", "large iov's");
137 
138 int
139 uiomove(buf, n, uio)
140 	void *buf;
141 	size_t n;
142 	struct uio *uio;
143 {
144 	struct iovec *iov;
145 	u_int cnt;
146 	int error = 0;
147 	char *cp = buf;
148 	struct proc *p = uio->uio_procp;
149 	int hold_count;
150 
151 	hold_count = KERNEL_LOCK_RELEASE_ALL();
152 
153 #if defined(LOCKDEBUG) || defined(DIAGNOSTIC)
154 	spinlock_switchcheck();
155 #endif
156 #ifdef LOCKDEBUG
157 	simple_lock_only_held(NULL, "uiomove");
158 #endif
159 
160 #ifdef DIAGNOSTIC
161 	if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE)
162 		panic("uiomove: mode");
163 #endif
164 	while (n > 0 && uio->uio_resid) {
165 		iov = uio->uio_iov;
166 		cnt = iov->iov_len;
167 		if (cnt == 0) {
168 			KASSERT(uio->uio_iovcnt > 0);
169 			uio->uio_iov++;
170 			uio->uio_iovcnt--;
171 			continue;
172 		}
173 		if (cnt > n)
174 			cnt = n;
175 		switch (uio->uio_segflg) {
176 
177 		case UIO_USERSPACE:
178 			if (curcpu()->ci_schedstate.spc_flags &
179 			    SPCF_SHOULDYIELD)
180 				preempt(1);
181 			if (__predict_true(p == curproc)) {
182 				if (uio->uio_rw == UIO_READ)
183 					error = copyout(cp, iov->iov_base, cnt);
184 				else
185 					error = copyin(iov->iov_base, cp, cnt);
186 			} else {
187 				if (uio->uio_rw == UIO_READ)
188 					error = copyout_proc(p, cp,
189 					    iov->iov_base, cnt);
190 				else
191 					error = copyin_proc(p, iov->iov_base,
192 					    cp, cnt);
193 			}
194 			if (error)
195 				goto out;
196 			break;
197 
198 		case UIO_SYSSPACE:
199 			if (uio->uio_rw == UIO_READ)
200 				error = kcopy(cp, iov->iov_base, cnt);
201 			else
202 				error = kcopy(iov->iov_base, cp, cnt);
203 			if (error)
204 				goto out;
205 			break;
206 		}
207 		iov->iov_base = (caddr_t)iov->iov_base + cnt;
208 		iov->iov_len -= cnt;
209 		uio->uio_resid -= cnt;
210 		uio->uio_offset += cnt;
211 		cp += cnt;
212 		KDASSERT(cnt <= n);
213 		n -= cnt;
214 	}
215 out:
216 	KERNEL_LOCK_ACQUIRE_COUNT(hold_count);
217 	return (error);
218 }
219 
220 /*
221  * Wrapper for uiomove() that validates the arguments against a known-good
222  * kernel buffer.
223  */
224 int
225 uiomove_frombuf(void *buf, size_t buflen, struct uio *uio)
226 {
227 	size_t offset;
228 
229 	if (uio->uio_offset < 0 || uio->uio_resid < 0 ||
230 	    (offset = uio->uio_offset) != uio->uio_offset)
231 		return (EINVAL);
232 	if (offset >= buflen)
233 		return (0);
234 	return (uiomove((char *)buf + offset, buflen - offset, uio));
235 }
236 
237 /*
238  * Give next character to user as result of read.
239  */
240 int
241 ureadc(c, uio)
242 	int c;
243 	struct uio *uio;
244 {
245 	struct iovec *iov;
246 
247 	if (uio->uio_resid <= 0)
248 		panic("ureadc: non-positive resid");
249 again:
250 	if (uio->uio_iovcnt <= 0)
251 		panic("ureadc: non-positive iovcnt");
252 	iov = uio->uio_iov;
253 	if (iov->iov_len <= 0) {
254 		uio->uio_iovcnt--;
255 		uio->uio_iov++;
256 		goto again;
257 	}
258 	switch (uio->uio_segflg) {
259 
260 	case UIO_USERSPACE:
261 		if (subyte(iov->iov_base, c) < 0)
262 			return (EFAULT);
263 		break;
264 
265 	case UIO_SYSSPACE:
266 		*(char *)iov->iov_base = c;
267 		break;
268 	}
269 	iov->iov_base = (caddr_t)iov->iov_base + 1;
270 	iov->iov_len--;
271 	uio->uio_resid--;
272 	uio->uio_offset++;
273 	return (0);
274 }
275 
276 /*
277  * Like copyin(), but operates on an arbitrary process.
278  */
279 int
280 copyin_proc(struct proc *p, const void *uaddr, void *kaddr, size_t len)
281 {
282 	struct iovec iov;
283 	struct uio uio;
284 	int error;
285 
286 	if (len == 0)
287 		return (0);
288 
289 	iov.iov_base = kaddr;
290 	iov.iov_len = len;
291 	uio.uio_iov = &iov;
292 	uio.uio_iovcnt = 1;
293 	uio.uio_offset = (off_t)(intptr_t)uaddr;
294 	uio.uio_resid = len;
295 	uio.uio_segflg = UIO_SYSSPACE;
296 	uio.uio_rw = UIO_READ;
297 	uio.uio_procp = NULL;
298 
299 	/* XXXCDC: how should locking work here? */
300 	if ((p->p_flag & P_WEXIT) || (p->p_vmspace->vm_refcnt < 1))
301 		return (EFAULT);
302 	p->p_vmspace->vm_refcnt++;	/* XXX */
303 	error = uvm_io(&p->p_vmspace->vm_map, &uio);
304 	uvmspace_free(p->p_vmspace);
305 
306 	return (error);
307 }
308 
309 /*
310  * Like copyout(), but operates on an arbitrary process.
311  */
312 int
313 copyout_proc(struct proc *p, const void *kaddr, void *uaddr, size_t len)
314 {
315 	struct iovec iov;
316 	struct uio uio;
317 	int error;
318 
319 	if (len == 0)
320 		return (0);
321 
322 	iov.iov_base = (void *) kaddr;	/* XXX cast away const */
323 	iov.iov_len = len;
324 	uio.uio_iov = &iov;
325 	uio.uio_iovcnt = 1;
326 	uio.uio_offset = (off_t)(intptr_t)uaddr;
327 	uio.uio_resid = len;
328 	uio.uio_segflg = UIO_SYSSPACE;
329 	uio.uio_rw = UIO_WRITE;
330 	uio.uio_procp = NULL;
331 
332 	/* XXXCDC: how should locking work here? */
333 	if ((p->p_flag & P_WEXIT) || (p->p_vmspace->vm_refcnt < 1))
334 		return (EFAULT);
335 	p->p_vmspace->vm_refcnt++;	/* XXX */
336 	error = uvm_io(&p->p_vmspace->vm_map, &uio);
337 	uvmspace_free(p->p_vmspace);
338 
339 	return (error);
340 }
341 
342 /*
343  * General routine to allocate a hash table.
344  * Allocate enough memory to hold at least `elements' list-head pointers.
345  * Return a pointer to the allocated space and set *hashmask to a pattern
346  * suitable for masking a value to use as an index into the returned array.
347  */
348 void *
349 hashinit(elements, htype, mtype, mflags, hashmask)
350 	u_int elements;
351 	enum hashtype htype;
352 	struct malloc_type *mtype;
353 	int mflags;
354 	u_long *hashmask;
355 {
356 	u_long hashsize, i;
357 	LIST_HEAD(, generic) *hashtbl_list;
358 	TAILQ_HEAD(, generic) *hashtbl_tailq;
359 	size_t esize;
360 	void *p;
361 
362 	if (elements == 0)
363 		panic("hashinit: bad cnt");
364 	for (hashsize = 1; hashsize < elements; hashsize <<= 1)
365 		continue;
366 
367 	switch (htype) {
368 	case HASH_LIST:
369 		esize = sizeof(*hashtbl_list);
370 		break;
371 	case HASH_TAILQ:
372 		esize = sizeof(*hashtbl_tailq);
373 		break;
374 	default:
375 #ifdef DIAGNOSTIC
376 		panic("hashinit: invalid table type");
377 #else
378 		return NULL;
379 #endif
380 	}
381 
382 	if ((p = malloc(hashsize * esize, mtype, mflags)) == NULL)
383 		return (NULL);
384 
385 	switch (htype) {
386 	case HASH_LIST:
387 		hashtbl_list = p;
388 		for (i = 0; i < hashsize; i++)
389 			LIST_INIT(&hashtbl_list[i]);
390 		break;
391 	case HASH_TAILQ:
392 		hashtbl_tailq = p;
393 		for (i = 0; i < hashsize; i++)
394 			TAILQ_INIT(&hashtbl_tailq[i]);
395 		break;
396 	}
397 	*hashmask = hashsize - 1;
398 	return (p);
399 }
400 
401 /*
402  * Free memory from hash table previosly allocated via hashinit().
403  */
404 void
405 hashdone(hashtbl, mtype)
406 	void *hashtbl;
407 	struct malloc_type *mtype;
408 {
409 
410 	free(hashtbl, mtype);
411 }
412 
413 
414 static void *
415 hook_establish(list, fn, arg)
416 	hook_list_t *list;
417 	void (*fn)(void *);
418 	void *arg;
419 {
420 	struct hook_desc *hd;
421 
422 	hd = malloc(sizeof(*hd), M_DEVBUF, M_NOWAIT);
423 	if (hd == NULL)
424 		return (NULL);
425 
426 	hd->hk_fn = fn;
427 	hd->hk_arg = arg;
428 	LIST_INSERT_HEAD(list, hd, hk_list);
429 
430 	return (hd);
431 }
432 
433 static void
434 hook_disestablish(list, vhook)
435 	hook_list_t *list;
436 	void *vhook;
437 {
438 #ifdef DIAGNOSTIC
439 	struct hook_desc *hd;
440 
441 	LIST_FOREACH(hd, list, hk_list) {
442                 if (hd == vhook)
443 			break;
444 	}
445 
446 	if (hd == NULL)
447 		panic("hook_disestablish: hook %p not established", vhook);
448 #endif
449 	LIST_REMOVE((struct hook_desc *)vhook, hk_list);
450 	free(vhook, M_DEVBUF);
451 }
452 
453 static void
454 hook_destroy(list)
455 	hook_list_t *list;
456 {
457 	struct hook_desc *hd;
458 
459 	while ((hd = LIST_FIRST(list)) != NULL) {
460 		LIST_REMOVE(hd, hk_list);
461 		free(hd, M_DEVBUF);
462 	}
463 }
464 
465 static void
466 hook_proc_run(list, p)
467 	hook_list_t *list;
468 	struct proc *p;
469 {
470 	struct hook_desc *hd;
471 
472 	for (hd = LIST_FIRST(list); hd != NULL; hd = LIST_NEXT(hd, hk_list)) {
473 		((void (*)(struct proc *, void *))*hd->hk_fn)(p,
474 		    hd->hk_arg);
475 	}
476 }
477 
478 /*
479  * "Shutdown hook" types, functions, and variables.
480  *
481  * Should be invoked immediately before the
482  * system is halted or rebooted, i.e. after file systems unmounted,
483  * after crash dump done, etc.
484  *
485  * Each shutdown hook is removed from the list before it's run, so that
486  * it won't be run again.
487  */
488 
489 hook_list_t shutdownhook_list;
490 
491 void *
492 shutdownhook_establish(fn, arg)
493 	void (*fn)(void *);
494 	void *arg;
495 {
496 	return hook_establish(&shutdownhook_list, fn, arg);
497 }
498 
499 void
500 shutdownhook_disestablish(vhook)
501 	void *vhook;
502 {
503 	hook_disestablish(&shutdownhook_list, vhook);
504 }
505 
506 /*
507  * Run shutdown hooks.  Should be invoked immediately before the
508  * system is halted or rebooted, i.e. after file systems unmounted,
509  * after crash dump done, etc.
510  *
511  * Each shutdown hook is removed from the list before it's run, so that
512  * it won't be run again.
513  */
514 void
515 doshutdownhooks()
516 {
517 	struct hook_desc *dp;
518 
519 	while ((dp = LIST_FIRST(&shutdownhook_list)) != NULL) {
520 		LIST_REMOVE(dp, hk_list);
521 		(*dp->hk_fn)(dp->hk_arg);
522 #if 0
523 		/*
524 		 * Don't bother freeing the hook structure,, since we may
525 		 * be rebooting because of a memory corruption problem,
526 		 * and this might only make things worse.  It doesn't
527 		 * matter, anyway, since the system is just about to
528 		 * reboot.
529 		 */
530 		free(dp, M_DEVBUF);
531 #endif
532 	}
533 }
534 
535 /*
536  * "Mountroot hook" types, functions, and variables.
537  */
538 
539 hook_list_t mountroothook_list;
540 
541 void *
542 mountroothook_establish(fn, dev)
543 	void (*fn)(struct device *);
544 	struct device *dev;
545 {
546 	return hook_establish(&mountroothook_list, (void (*)(void *))fn, dev);
547 }
548 
549 void
550 mountroothook_disestablish(vhook)
551 	void *vhook;
552 {
553 	hook_disestablish(&mountroothook_list, vhook);
554 }
555 
556 void
557 mountroothook_destroy()
558 {
559 	hook_destroy(&mountroothook_list);
560 }
561 
562 void
563 domountroothook()
564 {
565 	struct hook_desc *hd;
566 
567 	LIST_FOREACH(hd, &mountroothook_list, hk_list) {
568 		if (hd->hk_arg == (void *)root_device) {
569 			(*hd->hk_fn)(hd->hk_arg);
570 			return;
571 		}
572 	}
573 }
574 
575 hook_list_t exechook_list;
576 
577 void *
578 exechook_establish(fn, arg)
579 	void (*fn)(struct proc *, void *);
580 	void *arg;
581 {
582 	return hook_establish(&exechook_list, (void (*)(void *))fn, arg);
583 }
584 
585 void
586 exechook_disestablish(vhook)
587 	void *vhook;
588 {
589 	hook_disestablish(&exechook_list, vhook);
590 }
591 
592 /*
593  * Run exec hooks.
594  */
595 void
596 doexechooks(p)
597 	struct proc *p;
598 {
599 	hook_proc_run(&exechook_list, p);
600 }
601 
602 hook_list_t exithook_list;
603 
604 void *
605 exithook_establish(fn, arg)
606 	void (*fn)(struct proc *, void *);
607 	void *arg;
608 {
609 	return hook_establish(&exithook_list, (void (*)(void *))fn, arg);
610 }
611 
612 void
613 exithook_disestablish(vhook)
614 	void *vhook;
615 {
616 	hook_disestablish(&exithook_list, vhook);
617 }
618 
619 /*
620  * Run exit hooks.
621  */
622 void
623 doexithooks(p)
624 	struct proc *p;
625 {
626 	hook_proc_run(&exithook_list, p);
627 }
628 
629 hook_list_t forkhook_list;
630 
631 void *
632 forkhook_establish(fn)
633 	void (*fn)(struct proc *, struct proc *);
634 {
635 	return hook_establish(&forkhook_list, (void (*)(void *))fn, NULL);
636 }
637 
638 void
639 forkhook_disestablish(vhook)
640 	void *vhook;
641 {
642 	hook_disestablish(&forkhook_list, vhook);
643 }
644 
645 /*
646  * Run fork hooks.
647  */
648 void
649 doforkhooks(p2, p1)
650 	struct proc *p2, *p1;
651 {
652 	struct hook_desc *hd;
653 
654 	LIST_FOREACH(hd, &forkhook_list, hk_list) {
655 		((void (*)(struct proc *, struct proc *))*hd->hk_fn)
656 		    (p2, p1);
657 	}
658 }
659 
660 /*
661  * "Power hook" types, functions, and variables.
662  * The list of power hooks is kept ordered with the last registered hook
663  * first.
664  * When running the hooks on power down the hooks are called in reverse
665  * registration order, when powering up in registration order.
666  */
667 struct powerhook_desc {
668 	CIRCLEQ_ENTRY(powerhook_desc) sfd_list;
669 	void	(*sfd_fn)(int, void *);
670 	void	*sfd_arg;
671 };
672 
673 CIRCLEQ_HEAD(, powerhook_desc) powerhook_list =
674 	CIRCLEQ_HEAD_INITIALIZER(powerhook_list);
675 
676 void *
677 powerhook_establish(fn, arg)
678 	void (*fn)(int, void *);
679 	void *arg;
680 {
681 	struct powerhook_desc *ndp;
682 
683 	ndp = (struct powerhook_desc *)
684 	    malloc(sizeof(*ndp), M_DEVBUF, M_NOWAIT);
685 	if (ndp == NULL)
686 		return (NULL);
687 
688 	ndp->sfd_fn = fn;
689 	ndp->sfd_arg = arg;
690 	CIRCLEQ_INSERT_HEAD(&powerhook_list, ndp, sfd_list);
691 
692 	return (ndp);
693 }
694 
695 void
696 powerhook_disestablish(vhook)
697 	void *vhook;
698 {
699 #ifdef DIAGNOSTIC
700 	struct powerhook_desc *dp;
701 
702 	CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list)
703                 if (dp == vhook)
704 			goto found;
705 	panic("powerhook_disestablish: hook %p not established", vhook);
706  found:
707 #endif
708 
709 	CIRCLEQ_REMOVE(&powerhook_list, (struct powerhook_desc *)vhook,
710 	    sfd_list);
711 	free(vhook, M_DEVBUF);
712 }
713 
714 /*
715  * Run power hooks.
716  */
717 void
718 dopowerhooks(why)
719 	int why;
720 {
721 	struct powerhook_desc *dp;
722 
723 	if (why == PWR_RESUME || why == PWR_SOFTRESUME) {
724 		CIRCLEQ_FOREACH_REVERSE(dp, &powerhook_list, sfd_list) {
725 			(*dp->sfd_fn)(why, dp->sfd_arg);
726 		}
727 	} else {
728 		CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list) {
729 			(*dp->sfd_fn)(why, dp->sfd_arg);
730 		}
731 	}
732 }
733 
734 /*
735  * Determine the root device and, if instructed to, the root file system.
736  */
737 
738 #include "md.h"
739 #if NMD == 0
740 #undef MEMORY_DISK_HOOKS
741 #endif
742 
743 #ifdef MEMORY_DISK_HOOKS
744 static struct device fakemdrootdev[NMD];
745 #endif
746 
747 #ifdef MEMORY_DISK_IS_ROOT
748 #define BOOT_FROM_MEMORY_HOOKS 1
749 #endif
750 
751 #include "raid.h"
752 #if NRAID == 1
753 #define BOOT_FROM_RAID_HOOKS 1
754 #endif
755 
756 #ifdef BOOT_FROM_RAID_HOOKS
757 extern int numraid;
758 extern struct device *raidrootdev;
759 #endif
760 
761 /*
762  * The device and wedge that we booted from.  If booted_wedge is NULL,
763  * the we might consult booted_partition.
764  */
765 struct device *booted_device;
766 struct device *booted_wedge;
767 int booted_partition;
768 
769 /*
770  * Use partition letters if it's a disk class but not a wedge.
771  * XXX Check for wedge is kinda gross.
772  */
773 #define	DEV_USES_PARTITIONS(dv)						\
774 	((dv)->dv_class == DV_DISK &&					\
775 	((dv)->dv_cfdata == NULL ||					\
776 	 strcmp((dv)->dv_cfdata->cf_name, "dk") != 0))
777 
778 void
779 setroot(bootdv, bootpartition)
780 	struct device *bootdv;
781 	int bootpartition;
782 {
783 	struct device *dv;
784 	int len;
785 #ifdef MEMORY_DISK_HOOKS
786 	int i;
787 #endif
788 	dev_t nrootdev;
789 	dev_t ndumpdev = NODEV;
790 	char buf[128];
791 	const char *rootdevname;
792 	const char *dumpdevname;
793 	struct device *rootdv = NULL;		/* XXX gcc -Wuninitialized */
794 	struct device *dumpdv = NULL;
795 	struct ifnet *ifp;
796 	const char *deffsname;
797 	struct vfsops *vops;
798 
799 #ifdef MEMORY_DISK_HOOKS
800 	for (i = 0; i < NMD; i++) {
801 		fakemdrootdev[i].dv_class  = DV_DISK;
802 		fakemdrootdev[i].dv_cfdata = NULL;
803 		fakemdrootdev[i].dv_unit   = i;
804 		fakemdrootdev[i].dv_parent = NULL;
805 		snprintf(fakemdrootdev[i].dv_xname,
806 		    sizeof(fakemdrootdev[i].dv_xname), "md%d", i);
807 	}
808 #endif /* MEMORY_DISK_HOOKS */
809 
810 #ifdef MEMORY_DISK_IS_ROOT
811 	bootdv = &fakemdrootdev[0];
812 	bootpartition = 0;
813 #endif
814 
815 	/*
816 	 * If NFS is specified as the file system, and we found
817 	 * a DV_DISK boot device (or no boot device at all), then
818 	 * find a reasonable network interface for "rootspec".
819 	 */
820 	vops = vfs_getopsbyname("nfs");
821 	if (vops != NULL && vops->vfs_mountroot == mountroot &&
822 	    rootspec == NULL &&
823 	    (bootdv == NULL || bootdv->dv_class != DV_IFNET)) {
824 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
825 			if ((ifp->if_flags &
826 			     (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0)
827 				break;
828 		}
829 		if (ifp == NULL) {
830 			/*
831 			 * Can't find a suitable interface; ask the
832 			 * user.
833 			 */
834 			boothowto |= RB_ASKNAME;
835 		} else {
836 			/*
837 			 * Have a suitable interface; behave as if
838 			 * the user specified this interface.
839 			 */
840 			rootspec = (const char *)ifp->if_xname;
841 		}
842 	}
843 
844 	/*
845 	 * If wildcarded root and we the boot device wasn't determined,
846 	 * ask the user.
847 	 */
848 	if (rootspec == NULL && bootdv == NULL)
849 		boothowto |= RB_ASKNAME;
850 
851  top:
852 	if (boothowto & RB_ASKNAME) {
853 		struct device *defdumpdv;
854 
855 		for (;;) {
856 			printf("root device");
857 			if (bootdv != NULL) {
858 				printf(" (default %s", bootdv->dv_xname);
859 				if (DEV_USES_PARTITIONS(bootdv))
860 					printf("%c", bootpartition + 'a');
861 				printf(")");
862 			}
863 			printf(": ");
864 			len = cngetsn(buf, sizeof(buf));
865 			if (len == 0 && bootdv != NULL) {
866 				strlcpy(buf, bootdv->dv_xname, sizeof(buf));
867 				len = strlen(buf);
868 			}
869 			if (len > 0 && buf[len - 1] == '*') {
870 				buf[--len] = '\0';
871 				dv = getdisk(buf, len, 1, &nrootdev, 0);
872 				if (dv != NULL) {
873 					rootdv = dv;
874 					break;
875 				}
876 			}
877 			dv = getdisk(buf, len, bootpartition, &nrootdev, 0);
878 			if (dv != NULL) {
879 				rootdv = dv;
880 				break;
881 			}
882 		}
883 
884 		/*
885 		 * Set up the default dump device.  If root is on
886 		 * a network device, there is no default dump
887 		 * device, since we don't support dumps to the
888 		 * network.
889 		 */
890 		if (DEV_USES_PARTITIONS(rootdv) == 0)
891 			defdumpdv = NULL;
892 		else
893 			defdumpdv = rootdv;
894 
895 		for (;;) {
896 			printf("dump device");
897 			if (defdumpdv != NULL) {
898 				/*
899 				 * Note, we know it's a disk if we get here.
900 				 */
901 				printf(" (default %sb)", defdumpdv->dv_xname);
902 			}
903 			printf(": ");
904 			len = cngetsn(buf, sizeof(buf));
905 			if (len == 0) {
906 				if (defdumpdv != NULL) {
907 					ndumpdev = MAKEDISKDEV(major(nrootdev),
908 					    DISKUNIT(nrootdev), 1);
909 				}
910 				dumpdv = defdumpdv;
911 				break;
912 			}
913 			if (len == 4 && strcmp(buf, "none") == 0) {
914 				dumpdv = NULL;
915 				break;
916 			}
917 			dv = getdisk(buf, len, 1, &ndumpdev, 1);
918 			if (dv != NULL) {
919 				dumpdv = dv;
920 				break;
921 			}
922 		}
923 
924 		rootdev = nrootdev;
925 		dumpdev = ndumpdev;
926 
927 		for (vops = LIST_FIRST(&vfs_list); vops != NULL;
928 		     vops = LIST_NEXT(vops, vfs_list)) {
929 			if (vops->vfs_mountroot != NULL &&
930 			    vops->vfs_mountroot == mountroot)
931 			break;
932 		}
933 
934 		if (vops == NULL) {
935 			mountroot = NULL;
936 			deffsname = "generic";
937 		} else
938 			deffsname = vops->vfs_name;
939 
940 		for (;;) {
941 			printf("file system (default %s): ", deffsname);
942 			len = cngetsn(buf, sizeof(buf));
943 			if (len == 0)
944 				break;
945 			if (len == 4 && strcmp(buf, "halt") == 0)
946 				cpu_reboot(RB_HALT, NULL);
947 			else if (len == 6 && strcmp(buf, "reboot") == 0)
948 				cpu_reboot(0, NULL);
949 #if defined(DDB)
950 			else if (len == 3 && strcmp(buf, "ddb") == 0) {
951 				console_debugger();
952 			}
953 #endif
954 			else if (len == 7 && strcmp(buf, "generic") == 0) {
955 				mountroot = NULL;
956 				break;
957 			}
958 			vops = vfs_getopsbyname(buf);
959 			if (vops == NULL || vops->vfs_mountroot == NULL) {
960 				printf("use one of: generic");
961 				for (vops = LIST_FIRST(&vfs_list);
962 				     vops != NULL;
963 				     vops = LIST_NEXT(vops, vfs_list)) {
964 					if (vops->vfs_mountroot != NULL)
965 						printf(" %s", vops->vfs_name);
966 				}
967 #if defined(DDB)
968 				printf(" ddb");
969 #endif
970 				printf(" halt reboot\n");
971 			} else {
972 				mountroot = vops->vfs_mountroot;
973 				break;
974 			}
975 		}
976 
977 	} else if (rootspec == NULL) {
978 		int majdev;
979 
980 		/*
981 		 * Wildcarded root; use the boot device.
982 		 */
983 		rootdv = bootdv;
984 
985 		majdev = devsw_name2blk(bootdv->dv_xname, NULL, 0);
986 		if (majdev >= 0) {
987 			/*
988 			 * Root is on a disk.  `bootpartition' is root,
989 			 * unless the device does not use partitions.
990 			 */
991 			if (DEV_USES_PARTITIONS(bootdv))
992 				rootdev = MAKEDISKDEV(majdev, bootdv->dv_unit,
993 				    bootpartition);
994 			else
995 				rootdev = makedev(majdev, bootdv->dv_unit);
996 		}
997 	} else {
998 
999 		/*
1000 		 * `root on <dev> ...'
1001 		 */
1002 
1003 		/*
1004 		 * If it's a network interface, we can bail out
1005 		 * early.
1006 		 */
1007 		dv = finddevice(rootspec);
1008 		if (dv != NULL && dv->dv_class == DV_IFNET) {
1009 			rootdv = dv;
1010 			goto haveroot;
1011 		}
1012 
1013 		rootdevname = devsw_blk2name(major(rootdev));
1014 		if (rootdevname == NULL) {
1015 			printf("unknown device major 0x%x\n", rootdev);
1016 			boothowto |= RB_ASKNAME;
1017 			goto top;
1018 		}
1019 		memset(buf, 0, sizeof(buf));
1020 		snprintf(buf, sizeof(buf), "%s%d", rootdevname,
1021 		    DISKUNIT(rootdev));
1022 
1023 		rootdv = finddevice(buf);
1024 		if (rootdv == NULL) {
1025 			printf("device %s (0x%x) not configured\n",
1026 			    buf, rootdev);
1027 			boothowto |= RB_ASKNAME;
1028 			goto top;
1029 		}
1030 	}
1031 
1032  haveroot:
1033 
1034 	root_device = rootdv;
1035 
1036 	switch (rootdv->dv_class) {
1037 	case DV_IFNET:
1038 		aprint_normal("root on %s", rootdv->dv_xname);
1039 		break;
1040 
1041 	case DV_DISK:
1042 		aprint_normal("root on %s%c", rootdv->dv_xname,
1043 		    DISKPART(rootdev) + 'a');
1044 		break;
1045 
1046 	default:
1047 		printf("can't determine root device\n");
1048 		boothowto |= RB_ASKNAME;
1049 		goto top;
1050 	}
1051 
1052 	/*
1053 	 * Now configure the dump device.
1054 	 *
1055 	 * If we haven't figured out the dump device, do so, with
1056 	 * the following rules:
1057 	 *
1058 	 *	(a) We already know dumpdv in the RB_ASKNAME case.
1059 	 *
1060 	 *	(b) If dumpspec is set, try to use it.  If the device
1061 	 *	    is not available, punt.
1062 	 *
1063 	 *	(c) If dumpspec is not set, the dump device is
1064 	 *	    wildcarded or unspecified.  If the root device
1065 	 *	    is DV_IFNET, punt.  Otherwise, use partition b
1066 	 *	    of the root device.
1067 	 */
1068 
1069 	if (boothowto & RB_ASKNAME) {		/* (a) */
1070 		if (dumpdv == NULL)
1071 			goto nodumpdev;
1072 	} else if (dumpspec != NULL) {		/* (b) */
1073 		if (strcmp(dumpspec, "none") == 0 || dumpdev == NODEV) {
1074 			/*
1075 			 * Operator doesn't want a dump device.
1076 			 * Or looks like they tried to pick a network
1077 			 * device.  Oops.
1078 			 */
1079 			goto nodumpdev;
1080 		}
1081 
1082 		dumpdevname = devsw_blk2name(major(dumpdev));
1083 		if (dumpdevname == NULL)
1084 			goto nodumpdev;
1085 		memset(buf, 0, sizeof(buf));
1086 		snprintf(buf, sizeof(buf), "%s%d", dumpdevname,
1087 		    DISKUNIT(dumpdev));
1088 
1089 		dumpdv = finddevice(buf);
1090 		if (dumpdv == NULL) {
1091 			/*
1092 			 * Device not configured.
1093 			 */
1094 			goto nodumpdev;
1095 		}
1096 	} else {				/* (c) */
1097 		if (DEV_USES_PARTITIONS(rootdv) == 0)
1098 			goto nodumpdev;
1099 		else {
1100 			dumpdv = rootdv;
1101 			dumpdev = MAKEDISKDEV(major(rootdev),
1102 			    dumpdv->dv_unit, 1);
1103 		}
1104 	}
1105 
1106 	aprint_normal(" dumps on %s%c\n", dumpdv->dv_xname,
1107 	    DISKPART(dumpdev) + 'a');
1108 	return;
1109 
1110  nodumpdev:
1111 	dumpdev = NODEV;
1112 	aprint_normal("\n");
1113 }
1114 
1115 static struct device *
1116 finddevice(name)
1117 	const char *name;
1118 {
1119 	struct device *dv;
1120 #if defined(BOOT_FROM_RAID_HOOKS) || defined(BOOT_FROM_MEMORY_HOOKS)
1121 	int j;
1122 #endif /* BOOT_FROM_RAID_HOOKS || BOOT_FROM_MEMORY_HOOKS */
1123 
1124 #ifdef BOOT_FROM_RAID_HOOKS
1125 	for (j = 0; j < numraid; j++) {
1126 		if (strcmp(name, raidrootdev[j].dv_xname) == 0) {
1127 			dv = &raidrootdev[j];
1128 			return (dv);
1129 		}
1130 	}
1131 #endif /* BOOT_FROM_RAID_HOOKS */
1132 
1133 #ifdef BOOT_FROM_MEMORY_HOOKS
1134 	for (j = 0; j < NMD; j++) {
1135 		if (strcmp(name, fakemdrootdev[j].dv_xname) == 0) {
1136 			dv = &fakemdrootdev[j];
1137 			return (dv);
1138 		}
1139 	}
1140 #endif /* BOOT_FROM_MEMORY_HOOKS */
1141 
1142 	for (dv = TAILQ_FIRST(&alldevs); dv != NULL;
1143 	    dv = TAILQ_NEXT(dv, dv_list))
1144 		if (strcmp(dv->dv_xname, name) == 0)
1145 			break;
1146 	return (dv);
1147 }
1148 
1149 static struct device *
1150 getdisk(str, len, defpart, devp, isdump)
1151 	char *str;
1152 	int len, defpart;
1153 	dev_t *devp;
1154 	int isdump;
1155 {
1156 	struct device	*dv;
1157 #ifdef MEMORY_DISK_HOOKS
1158 	int		i;
1159 #endif
1160 #ifdef BOOT_FROM_RAID_HOOKS
1161 	int 		j;
1162 #endif
1163 
1164 	if ((dv = parsedisk(str, len, defpart, devp)) == NULL) {
1165 		printf("use one of:");
1166 #ifdef MEMORY_DISK_HOOKS
1167 		if (isdump == 0)
1168 			for (i = 0; i < NMD; i++)
1169 				printf(" %s[a-%c]", fakemdrootdev[i].dv_xname,
1170 				    'a' + MAXPARTITIONS - 1);
1171 #endif
1172 #ifdef BOOT_FROM_RAID_HOOKS
1173 		if (isdump == 0)
1174 			for (j = 0; j < numraid; j++)
1175 				printf(" %s[a-%c]", raidrootdev[j].dv_xname,
1176 				    'a' + MAXPARTITIONS - 1);
1177 #endif
1178 		TAILQ_FOREACH(dv, &alldevs, dv_list) {
1179 			if (DEV_USES_PARTITIONS(dv))
1180 				printf(" %s[a-%c]", dv->dv_xname,
1181 				    'a' + MAXPARTITIONS - 1);
1182 			else if (dv->dv_class == DV_DISK)
1183 				printf(" %s", dv->dv_xname);
1184 			if (isdump == 0 && dv->dv_class == DV_IFNET)
1185 				printf(" %s", dv->dv_xname);
1186 		}
1187 		if (isdump)
1188 			printf(" none");
1189 #if defined(DDB)
1190 		printf(" ddb");
1191 #endif
1192 		printf(" halt reboot\n");
1193 	}
1194 	return (dv);
1195 }
1196 
1197 static struct device *
1198 parsedisk(str, len, defpart, devp)
1199 	char *str;
1200 	int len, defpart;
1201 	dev_t *devp;
1202 {
1203 	struct device *dv;
1204 	char *cp, c;
1205 	int majdev, part;
1206 #ifdef MEMORY_DISK_HOOKS
1207 	int i;
1208 #endif
1209 	if (len == 0)
1210 		return (NULL);
1211 
1212 	if (len == 4 && strcmp(str, "halt") == 0)
1213 		cpu_reboot(RB_HALT, NULL);
1214 	else if (len == 6 && strcmp(str, "reboot") == 0)
1215 		cpu_reboot(0, NULL);
1216 #if defined(DDB)
1217 	else if (len == 3 && strcmp(str, "ddb") == 0)
1218 		console_debugger();
1219 #endif
1220 
1221 	cp = str + len - 1;
1222 	c = *cp;
1223 	if (c >= 'a' && c <= ('a' + MAXPARTITIONS - 1)) {
1224 		part = c - 'a';
1225 		*cp = '\0';
1226 	} else
1227 		part = defpart;
1228 
1229 #ifdef MEMORY_DISK_HOOKS
1230 	for (i = 0; i < NMD; i++)
1231 		if (strcmp(str, fakemdrootdev[i].dv_xname) == 0) {
1232 			dv = &fakemdrootdev[i];
1233 			goto gotdisk;
1234 		}
1235 #endif
1236 
1237 	dv = finddevice(str);
1238 	if (dv != NULL) {
1239 		if (dv->dv_class == DV_DISK) {
1240 #ifdef MEMORY_DISK_HOOKS
1241  gotdisk:
1242 #endif
1243 			majdev = devsw_name2blk(dv->dv_xname, NULL, 0);
1244 			if (majdev < 0)
1245 				panic("parsedisk");
1246 			if (DEV_USES_PARTITIONS(dv))
1247 				*devp = MAKEDISKDEV(majdev, dv->dv_unit, part);
1248 			else
1249 				*devp = makedev(majdev, dv->dv_unit);
1250 		}
1251 
1252 		if (dv->dv_class == DV_IFNET)
1253 			*devp = NODEV;
1254 	}
1255 
1256 	*cp = c;
1257 	return (dv);
1258 }
1259 
1260 /*
1261  * snprintf() `bytes' into `buf', reformatting it so that the number,
1262  * plus a possible `x' + suffix extension) fits into len bytes (including
1263  * the terminating NUL).
1264  * Returns the number of bytes stored in buf, or -1 if there was a problem.
1265  * E.g, given a len of 9 and a suffix of `B':
1266  *	bytes		result
1267  *	-----		------
1268  *	99999		`99999 B'
1269  *	100000		`97 kB'
1270  *	66715648	`65152 kB'
1271  *	252215296	`240 MB'
1272  */
1273 int
1274 humanize_number(buf, len, bytes, suffix, divisor)
1275 	char		*buf;
1276 	size_t		 len;
1277 	u_int64_t	 bytes;
1278 	const char	*suffix;
1279 	int 		divisor;
1280 {
1281        	/* prefixes are: (none), kilo, Mega, Giga, Tera, Peta, Exa */
1282 	const char *prefixes;
1283 	int		r;
1284 	u_int64_t	max;
1285 	size_t		i, suffixlen;
1286 
1287 	if (buf == NULL || suffix == NULL)
1288 		return (-1);
1289 	if (len > 0)
1290 		buf[0] = '\0';
1291 	suffixlen = strlen(suffix);
1292 	/* check if enough room for `x y' + suffix + `\0' */
1293 	if (len < 4 + suffixlen)
1294 		return (-1);
1295 
1296 	if (divisor == 1024) {
1297 		/*
1298 		 * binary multiplies
1299 		 * XXX IEC 60027-2 recommends Ki, Mi, Gi...
1300 		 */
1301 		prefixes = " KMGTPE";
1302 	} else
1303 		prefixes = " kMGTPE"; /* SI for decimal multiplies */
1304 
1305 	max = 1;
1306 	for (i = 0; i < len - suffixlen - 3; i++)
1307 		max *= 10;
1308 	for (i = 0; bytes >= max && prefixes[i + 1]; i++)
1309 		bytes /= divisor;
1310 
1311 	r = snprintf(buf, len, "%qu%s%c%s", (unsigned long long)bytes,
1312 	    i == 0 ? "" : " ", prefixes[i], suffix);
1313 
1314 	return (r);
1315 }
1316 
1317 int
1318 format_bytes(buf, len, bytes)
1319 	char		*buf;
1320 	size_t		 len;
1321 	u_int64_t	 bytes;
1322 {
1323 	int	rv;
1324 	size_t	nlen;
1325 
1326 	rv = humanize_number(buf, len, bytes, "B", 1024);
1327 	if (rv != -1) {
1328 			/* nuke the trailing ` B' if it exists */
1329 		nlen = strlen(buf) - 2;
1330 		if (strcmp(&buf[nlen], " B") == 0)
1331 			buf[nlen] = '\0';
1332 	}
1333 	return (rv);
1334 }
1335 
1336 /*
1337  * Start trace of particular system call. If process is being traced,
1338  * this routine is called by MD syscall dispatch code just before
1339  * a system call is actually executed.
1340  * MD caller guarantees the passed 'code' is within the supported
1341  * system call number range for emulation the process runs under.
1342  */
1343 int
1344 trace_enter(struct lwp *l, register_t code,
1345 	register_t realcode, const struct sysent *callp, void *args)
1346 {
1347 #if defined(KTRACE) || defined(SYSTRACE)
1348 	struct proc *p = l->l_proc;
1349 #endif
1350 
1351 #ifdef SYSCALL_DEBUG
1352 	scdebug_call(l, code, args);
1353 #endif /* SYSCALL_DEBUG */
1354 
1355 #ifdef KTRACE
1356 	if (KTRPOINT(p, KTR_SYSCALL))
1357 		ktrsyscall(p, code, realcode, callp, args);
1358 #endif /* KTRACE */
1359 
1360 #ifdef SYSTRACE
1361 	if (ISSET(p->p_flag, P_SYSTRACE))
1362 		return systrace_enter(p, code, args);
1363 #endif
1364 	return 0;
1365 }
1366 
1367 /*
1368  * End trace of particular system call. If process is being traced,
1369  * this routine is called by MD syscall dispatch code just after
1370  * a system call finishes.
1371  * MD caller guarantees the passed 'code' is within the supported
1372  * system call number range for emulation the process runs under.
1373  */
1374 void
1375 trace_exit(struct lwp *l, register_t code, void *args, register_t rval[],
1376     int error)
1377 {
1378 #if defined(KTRACE) || defined(SYSTRACE)
1379 	struct proc *p = l->l_proc;
1380 #endif
1381 
1382 #ifdef SYSCALL_DEBUG
1383 	scdebug_ret(l, code, error, rval);
1384 #endif /* SYSCALL_DEBUG */
1385 
1386 #ifdef KTRACE
1387 	if (KTRPOINT(p, KTR_SYSRET)) {
1388 		KERNEL_PROC_LOCK(l);
1389 		ktrsysret(p, code, error, rval);
1390 		KERNEL_PROC_UNLOCK(l);
1391 	}
1392 #endif /* KTRACE */
1393 
1394 #ifdef SYSTRACE
1395 	if (ISSET(p->p_flag, P_SYSTRACE))
1396 		systrace_exit(p, code, args, rval, error);
1397 #endif
1398 }
1399