xref: /netbsd-src/sys/kern/kern_subr.c (revision 001c68bd94f75ce9270b69227c4199fbf34ee396)
1 /*	$NetBSD: kern_subr.c,v 1.102 2003/06/29 22:31:22 fvdl Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998, 1999, 2002 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center, and by Luke Mewburn.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Copyright (c) 1982, 1986, 1991, 1993
42  *	The Regents of the University of California.  All rights reserved.
43  * (c) UNIX System Laboratories, Inc.
44  * All or some portions of this file are derived from material licensed
45  * to the University of California by American Telephone and Telegraph
46  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47  * the permission of UNIX System Laboratories, Inc.
48  *
49  * Copyright (c) 1992, 1993
50  *	The Regents of the University of California.  All rights reserved.
51  *
52  * This software was developed by the Computer Systems Engineering group
53  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
54  * contributed to Berkeley.
55  *
56  * All advertising materials mentioning features or use of this software
57  * must display the following acknowledgement:
58  *	This product includes software developed by the University of
59  *	California, Lawrence Berkeley Laboratory.
60  *
61  * Redistribution and use in source and binary forms, with or without
62  * modification, are permitted provided that the following conditions
63  * are met:
64  * 1. Redistributions of source code must retain the above copyright
65  *    notice, this list of conditions and the following disclaimer.
66  * 2. Redistributions in binary form must reproduce the above copyright
67  *    notice, this list of conditions and the following disclaimer in the
68  *    documentation and/or other materials provided with the distribution.
69  * 3. All advertising materials mentioning features or use of this software
70  *    must display the following acknowledgement:
71  *	This product includes software developed by the University of
72  *	California, Berkeley and its contributors.
73  * 4. Neither the name of the University nor the names of its contributors
74  *    may be used to endorse or promote products derived from this software
75  *    without specific prior written permission.
76  *
77  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
78  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
79  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
80  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
81  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
82  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
83  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
84  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
85  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
86  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
87  * SUCH DAMAGE.
88  *
89  *	@(#)kern_subr.c	8.4 (Berkeley) 2/14/95
90  */
91 
92 #include <sys/cdefs.h>
93 __KERNEL_RCSID(0, "$NetBSD: kern_subr.c,v 1.102 2003/06/29 22:31:22 fvdl Exp $");
94 
95 #include "opt_ddb.h"
96 #include "opt_md.h"
97 #include "opt_syscall_debug.h"
98 #include "opt_ktrace.h"
99 #include "opt_systrace.h"
100 
101 #include <sys/param.h>
102 #include <sys/systm.h>
103 #include <sys/proc.h>
104 #include <sys/malloc.h>
105 #include <sys/mount.h>
106 #include <sys/device.h>
107 #include <sys/reboot.h>
108 #include <sys/conf.h>
109 #include <sys/disklabel.h>
110 #include <sys/queue.h>
111 #include <sys/systrace.h>
112 #include <sys/ktrace.h>
113 
114 #include <uvm/uvm_extern.h>
115 
116 #include <dev/cons.h>
117 
118 #include <net/if.h>
119 
120 /* XXX these should eventually move to subr_autoconf.c */
121 static struct device *finddevice __P((const char *));
122 static struct device *getdisk __P((char *, int, int, dev_t *, int));
123 static struct device *parsedisk __P((char *, int, int, dev_t *));
124 
125 /*
126  * A generic linear hook.
127  */
128 struct hook_desc {
129 	LIST_ENTRY(hook_desc) hk_list;
130 	void	(*hk_fn) __P((void *));
131 	void	*hk_arg;
132 };
133 typedef LIST_HEAD(, hook_desc) hook_list_t;
134 
135 static void *hook_establish __P((hook_list_t *, void (*)(void *), void *));
136 static void hook_disestablish __P((hook_list_t *, void *));
137 static void hook_destroy __P((hook_list_t *));
138 static void hook_proc_run __P((hook_list_t *, struct proc *));
139 
140 MALLOC_DEFINE(M_IOV, "iov", "large iov's");
141 
142 int
143 uiomove(buf, n, uio)
144 	void *buf;
145 	size_t n;
146 	struct uio *uio;
147 {
148 	struct iovec *iov;
149 	u_int cnt;
150 	int error = 0;
151 	char *cp = buf;
152 	struct proc *p = uio->uio_procp;
153 
154 #ifdef DIAGNOSTIC
155 	if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE)
156 		panic("uiomove: mode");
157 #endif
158 	while (n > 0 && uio->uio_resid) {
159 		iov = uio->uio_iov;
160 		cnt = iov->iov_len;
161 		if (cnt == 0) {
162 			uio->uio_iov++;
163 			uio->uio_iovcnt--;
164 			continue;
165 		}
166 		if (cnt > n)
167 			cnt = n;
168 		switch (uio->uio_segflg) {
169 
170 		case UIO_USERSPACE:
171 			if (curlwp->l_cpu->ci_schedstate.spc_flags &
172 			    SPCF_SHOULDYIELD)
173 				preempt(1);
174 			if (__predict_true(p == curproc)) {
175 				if (uio->uio_rw == UIO_READ)
176 					error = copyout(cp, iov->iov_base, cnt);
177 				else
178 					error = copyin(iov->iov_base, cp, cnt);
179 			} else {
180 				if (uio->uio_rw == UIO_READ)
181 					error = copyout_proc(p, cp,
182 					    iov->iov_base, cnt);
183 				else
184 					error = copyin_proc(p, iov->iov_base,
185 					    cp, cnt);
186 			}
187 			if (error)
188 				return (error);
189 			break;
190 
191 		case UIO_SYSSPACE:
192 			if (uio->uio_rw == UIO_READ)
193 				error = kcopy(cp, iov->iov_base, cnt);
194 			else
195 				error = kcopy(iov->iov_base, cp, cnt);
196 			if (error)
197 				return (error);
198 			break;
199 		}
200 		iov->iov_base = (caddr_t)iov->iov_base + cnt;
201 		iov->iov_len -= cnt;
202 		uio->uio_resid -= cnt;
203 		uio->uio_offset += cnt;
204 		cp += cnt;
205 		KDASSERT(cnt <= n);
206 		n -= cnt;
207 	}
208 	return (error);
209 }
210 
211 /*
212  * Give next character to user as result of read.
213  */
214 int
215 ureadc(c, uio)
216 	int c;
217 	struct uio *uio;
218 {
219 	struct iovec *iov;
220 
221 	if (uio->uio_resid <= 0)
222 		panic("ureadc: non-positive resid");
223 again:
224 	if (uio->uio_iovcnt <= 0)
225 		panic("ureadc: non-positive iovcnt");
226 	iov = uio->uio_iov;
227 	if (iov->iov_len <= 0) {
228 		uio->uio_iovcnt--;
229 		uio->uio_iov++;
230 		goto again;
231 	}
232 	switch (uio->uio_segflg) {
233 
234 	case UIO_USERSPACE:
235 		if (subyte(iov->iov_base, c) < 0)
236 			return (EFAULT);
237 		break;
238 
239 	case UIO_SYSSPACE:
240 		*(char *)iov->iov_base = c;
241 		break;
242 	}
243 	iov->iov_base = (caddr_t)iov->iov_base + 1;
244 	iov->iov_len--;
245 	uio->uio_resid--;
246 	uio->uio_offset++;
247 	return (0);
248 }
249 
250 /*
251  * Like copyin(), but operates on an arbitrary process.
252  */
253 int
254 copyin_proc(struct proc *p, const void *uaddr, void *kaddr, size_t len)
255 {
256 	struct iovec iov;
257 	struct uio uio;
258 	int error;
259 
260 	if (len == 0)
261 		return (0);
262 
263 	iov.iov_base = kaddr;
264 	iov.iov_len = len;
265 	uio.uio_iov = &iov;
266 	uio.uio_iovcnt = 1;
267 	uio.uio_offset = (off_t)(intptr_t)uaddr;
268 	uio.uio_resid = len;
269 	uio.uio_segflg = UIO_SYSSPACE;
270 	uio.uio_rw = UIO_READ;
271 	uio.uio_procp = NULL;
272 
273 	/* XXXCDC: how should locking work here? */
274 	if ((p->p_flag & P_WEXIT) || (p->p_vmspace->vm_refcnt < 1))
275 		return (EFAULT);
276 	p->p_vmspace->vm_refcnt++;	/* XXX */
277 	error = uvm_io(&p->p_vmspace->vm_map, &uio);
278 	uvmspace_free(p->p_vmspace);
279 
280 	return (error);
281 }
282 
283 /*
284  * Like copyout(), but operates on an arbitrary process.
285  */
286 int
287 copyout_proc(struct proc *p, const void *kaddr, void *uaddr, size_t len)
288 {
289 	struct iovec iov;
290 	struct uio uio;
291 	int error;
292 
293 	if (len == 0)
294 		return (0);
295 
296 	iov.iov_base = (void *) kaddr;	/* XXX cast away const */
297 	iov.iov_len = len;
298 	uio.uio_iov = &iov;
299 	uio.uio_iovcnt = 1;
300 	uio.uio_offset = (off_t)(intptr_t)uaddr;
301 	uio.uio_resid = len;
302 	uio.uio_segflg = UIO_SYSSPACE;
303 	uio.uio_rw = UIO_WRITE;
304 	uio.uio_procp = NULL;
305 
306 	/* XXXCDC: how should locking work here? */
307 	if ((p->p_flag & P_WEXIT) || (p->p_vmspace->vm_refcnt < 1))
308 		return (EFAULT);
309 	p->p_vmspace->vm_refcnt++;	/* XXX */
310 	error = uvm_io(&p->p_vmspace->vm_map, &uio);
311 	uvmspace_free(p->p_vmspace);
312 
313 	return (error);
314 }
315 
316 /*
317  * General routine to allocate a hash table.
318  * Allocate enough memory to hold at least `elements' list-head pointers.
319  * Return a pointer to the allocated space and set *hashmask to a pattern
320  * suitable for masking a value to use as an index into the returned array.
321  */
322 void *
323 hashinit(elements, htype, mtype, mflags, hashmask)
324 	u_int elements;
325 	enum hashtype htype;
326 	struct malloc_type *mtype;
327 	int mflags;
328 	u_long *hashmask;
329 {
330 	u_long hashsize, i;
331 	LIST_HEAD(, generic) *hashtbl_list;
332 	TAILQ_HEAD(, generic) *hashtbl_tailq;
333 	size_t esize;
334 	void *p;
335 
336 	if (elements == 0)
337 		panic("hashinit: bad cnt");
338 	for (hashsize = 1; hashsize < elements; hashsize <<= 1)
339 		continue;
340 
341 	switch (htype) {
342 	case HASH_LIST:
343 		esize = sizeof(*hashtbl_list);
344 		break;
345 	case HASH_TAILQ:
346 		esize = sizeof(*hashtbl_tailq);
347 		break;
348 #ifdef DIAGNOSTIC
349 	default:
350 		panic("hashinit: invalid table type");
351 #endif
352 	}
353 
354 	if ((p = malloc(hashsize * esize, mtype, mflags)) == NULL)
355 		return (NULL);
356 
357 	switch (htype) {
358 	case HASH_LIST:
359 		hashtbl_list = p;
360 		for (i = 0; i < hashsize; i++)
361 			LIST_INIT(&hashtbl_list[i]);
362 		break;
363 	case HASH_TAILQ:
364 		hashtbl_tailq = p;
365 		for (i = 0; i < hashsize; i++)
366 			TAILQ_INIT(&hashtbl_tailq[i]);
367 		break;
368 	}
369 	*hashmask = hashsize - 1;
370 	return (p);
371 }
372 
373 /*
374  * Free memory from hash table previosly allocated via hashinit().
375  */
376 void
377 hashdone(hashtbl, mtype)
378 	void *hashtbl;
379 	struct malloc_type *mtype;
380 {
381 
382 	free(hashtbl, mtype);
383 }
384 
385 
386 static void *
387 hook_establish(list, fn, arg)
388 	hook_list_t *list;
389 	void (*fn) __P((void *));
390 	void *arg;
391 {
392 	struct hook_desc *hd;
393 
394 	hd = malloc(sizeof(*hd), M_DEVBUF, M_NOWAIT);
395 	if (hd == NULL)
396 		return (NULL);
397 
398 	hd->hk_fn = fn;
399 	hd->hk_arg = arg;
400 	LIST_INSERT_HEAD(list, hd, hk_list);
401 
402 	return (hd);
403 }
404 
405 static void
406 hook_disestablish(list, vhook)
407 	hook_list_t *list;
408 	void *vhook;
409 {
410 #ifdef DIAGNOSTIC
411 	struct hook_desc *hd;
412 
413 	LIST_FOREACH(hd, list, hk_list) {
414                 if (hd == vhook)
415 			break;
416 	}
417 
418 	if (hd == NULL)
419 		panic("hook_disestablish: hook %p not established", vhook);
420 #endif
421 	LIST_REMOVE((struct hook_desc *)vhook, hk_list);
422 	free(vhook, M_DEVBUF);
423 }
424 
425 static void
426 hook_destroy(list)
427 	hook_list_t *list;
428 {
429 	struct hook_desc *hd;
430 
431 	while ((hd = LIST_FIRST(list)) != NULL) {
432 		LIST_REMOVE(hd, hk_list);
433 		free(hd, M_DEVBUF);
434 	}
435 }
436 
437 static void
438 hook_proc_run(list, p)
439 	hook_list_t *list;
440 	struct proc *p;
441 {
442 	struct hook_desc *hd;
443 
444 	for (hd = LIST_FIRST(list); hd != NULL; hd = LIST_NEXT(hd, hk_list)) {
445 		((void (*) __P((struct proc *, void *)))*hd->hk_fn)(p,
446 		    hd->hk_arg);
447 	}
448 }
449 
450 /*
451  * "Shutdown hook" types, functions, and variables.
452  *
453  * Should be invoked immediately before the
454  * system is halted or rebooted, i.e. after file systems unmounted,
455  * after crash dump done, etc.
456  *
457  * Each shutdown hook is removed from the list before it's run, so that
458  * it won't be run again.
459  */
460 
461 hook_list_t shutdownhook_list;
462 
463 void *
464 shutdownhook_establish(fn, arg)
465 	void (*fn) __P((void *));
466 	void *arg;
467 {
468 	return hook_establish(&shutdownhook_list, fn, arg);
469 }
470 
471 void
472 shutdownhook_disestablish(vhook)
473 	void *vhook;
474 {
475 	hook_disestablish(&shutdownhook_list, vhook);
476 }
477 
478 /*
479  * Run shutdown hooks.  Should be invoked immediately before the
480  * system is halted or rebooted, i.e. after file systems unmounted,
481  * after crash dump done, etc.
482  *
483  * Each shutdown hook is removed from the list before it's run, so that
484  * it won't be run again.
485  */
486 void
487 doshutdownhooks()
488 {
489 	struct hook_desc *dp;
490 
491 	while ((dp = LIST_FIRST(&shutdownhook_list)) != NULL) {
492 		LIST_REMOVE(dp, hk_list);
493 		(*dp->hk_fn)(dp->hk_arg);
494 #if 0
495 		/*
496 		 * Don't bother freeing the hook structure,, since we may
497 		 * be rebooting because of a memory corruption problem,
498 		 * and this might only make things worse.  It doesn't
499 		 * matter, anyway, since the system is just about to
500 		 * reboot.
501 		 */
502 		free(dp, M_DEVBUF);
503 #endif
504 	}
505 }
506 
507 /*
508  * "Mountroot hook" types, functions, and variables.
509  */
510 
511 hook_list_t mountroothook_list;
512 
513 void *
514 mountroothook_establish(fn, dev)
515 	void (*fn) __P((struct device *));
516 	struct device *dev;
517 {
518 	return hook_establish(&mountroothook_list, (void (*)__P((void *)))fn,
519 	    dev);
520 }
521 
522 void
523 mountroothook_disestablish(vhook)
524 	void *vhook;
525 {
526 	hook_disestablish(&mountroothook_list, vhook);
527 }
528 
529 void
530 mountroothook_destroy()
531 {
532 	hook_destroy(&mountroothook_list);
533 }
534 
535 void
536 domountroothook()
537 {
538 	struct hook_desc *hd;
539 
540 	LIST_FOREACH(hd, &mountroothook_list, hk_list) {
541 		if (hd->hk_arg == (void *)root_device) {
542 			(*hd->hk_fn)(hd->hk_arg);
543 			return;
544 		}
545 	}
546 }
547 
548 hook_list_t exechook_list;
549 
550 void *
551 exechook_establish(fn, arg)
552 	void (*fn) __P((struct proc *, void *));
553 	void *arg;
554 {
555 	return hook_establish(&exechook_list, (void (*) __P((void *)))fn, arg);
556 }
557 
558 void
559 exechook_disestablish(vhook)
560 	void *vhook;
561 {
562 	hook_disestablish(&exechook_list, vhook);
563 }
564 
565 /*
566  * Run exec hooks.
567  */
568 void
569 doexechooks(p)
570 	struct proc *p;
571 {
572 	hook_proc_run(&exechook_list, p);
573 }
574 
575 hook_list_t exithook_list;
576 
577 void *
578 exithook_establish(fn, arg)
579 	void (*fn) __P((struct proc *, void *));
580 	void *arg;
581 {
582 	return hook_establish(&exithook_list, (void (*) __P((void *)))fn, arg);
583 }
584 
585 void
586 exithook_disestablish(vhook)
587 	void *vhook;
588 {
589 	hook_disestablish(&exithook_list, vhook);
590 }
591 
592 /*
593  * Run exit hooks.
594  */
595 void
596 doexithooks(p)
597 	struct proc *p;
598 {
599 	hook_proc_run(&exithook_list, p);
600 }
601 
602 hook_list_t forkhook_list;
603 
604 void *
605 forkhook_establish(fn)
606 	void (*fn) __P((struct proc *, struct proc *));
607 {
608 	return hook_establish(&forkhook_list, (void (*) __P((void *)))fn, NULL);
609 }
610 
611 void
612 forkhook_disestablish(vhook)
613 	void *vhook;
614 {
615 	hook_disestablish(&forkhook_list, vhook);
616 }
617 
618 /*
619  * Run fork hooks.
620  */
621 void
622 doforkhooks(p2, p1)
623 	struct proc *p2, *p1;
624 {
625 	struct hook_desc *hd;
626 
627 	LIST_FOREACH(hd, &forkhook_list, hk_list) {
628 		((void (*) __P((struct proc *, struct proc *)))*hd->hk_fn)
629 		    (p2, p1);
630 	}
631 }
632 
633 /*
634  * "Power hook" types, functions, and variables.
635  * The list of power hooks is kept ordered with the last registered hook
636  * first.
637  * When running the hooks on power down the hooks are called in reverse
638  * registration order, when powering up in registration order.
639  */
640 struct powerhook_desc {
641 	CIRCLEQ_ENTRY(powerhook_desc) sfd_list;
642 	void	(*sfd_fn) __P((int, void *));
643 	void	*sfd_arg;
644 };
645 
646 CIRCLEQ_HEAD(, powerhook_desc) powerhook_list =
647 	CIRCLEQ_HEAD_INITIALIZER(powerhook_list);
648 
649 void *
650 powerhook_establish(fn, arg)
651 	void (*fn) __P((int, void *));
652 	void *arg;
653 {
654 	struct powerhook_desc *ndp;
655 
656 	ndp = (struct powerhook_desc *)
657 	    malloc(sizeof(*ndp), M_DEVBUF, M_NOWAIT);
658 	if (ndp == NULL)
659 		return (NULL);
660 
661 	ndp->sfd_fn = fn;
662 	ndp->sfd_arg = arg;
663 	CIRCLEQ_INSERT_HEAD(&powerhook_list, ndp, sfd_list);
664 
665 	return (ndp);
666 }
667 
668 void
669 powerhook_disestablish(vhook)
670 	void *vhook;
671 {
672 #ifdef DIAGNOSTIC
673 	struct powerhook_desc *dp;
674 
675 	CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list)
676                 if (dp == vhook)
677 			goto found;
678 	panic("powerhook_disestablish: hook %p not established", vhook);
679  found:
680 #endif
681 
682 	CIRCLEQ_REMOVE(&powerhook_list, (struct powerhook_desc *)vhook,
683 	    sfd_list);
684 	free(vhook, M_DEVBUF);
685 }
686 
687 /*
688  * Run power hooks.
689  */
690 void
691 dopowerhooks(why)
692 	int why;
693 {
694 	struct powerhook_desc *dp;
695 
696 	if (why == PWR_RESUME || why == PWR_SOFTRESUME) {
697 		CIRCLEQ_FOREACH_REVERSE(dp, &powerhook_list, sfd_list) {
698 			(*dp->sfd_fn)(why, dp->sfd_arg);
699 		}
700 	} else {
701 		CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list) {
702 			(*dp->sfd_fn)(why, dp->sfd_arg);
703 		}
704 	}
705 }
706 
707 /*
708  * Determine the root device and, if instructed to, the root file system.
709  */
710 
711 #include "md.h"
712 #if NMD == 0
713 #undef MEMORY_DISK_HOOKS
714 #endif
715 
716 #ifdef MEMORY_DISK_HOOKS
717 static struct device fakemdrootdev[NMD];
718 #endif
719 
720 #include "raid.h"
721 #if NRAID == 1
722 #define BOOT_FROM_RAID_HOOKS 1
723 #endif
724 
725 #ifdef BOOT_FROM_RAID_HOOKS
726 extern int numraid;
727 extern struct device *raidrootdev;
728 #endif
729 
730 void
731 setroot(bootdv, bootpartition)
732 	struct device *bootdv;
733 	int bootpartition;
734 {
735 	struct device *dv;
736 	int len;
737 #ifdef MEMORY_DISK_HOOKS
738 	int i;
739 #endif
740 	dev_t nrootdev;
741 	dev_t ndumpdev = NODEV;
742 	char buf[128];
743 	const char *rootdevname;
744 	const char *dumpdevname;
745 	struct device *rootdv = NULL;		/* XXX gcc -Wuninitialized */
746 	struct device *dumpdv = NULL;
747 	struct ifnet *ifp;
748 	const char *deffsname;
749 	struct vfsops *vops;
750 
751 #ifdef MEMORY_DISK_HOOKS
752 	for (i = 0; i < NMD; i++) {
753 		fakemdrootdev[i].dv_class  = DV_DISK;
754 		fakemdrootdev[i].dv_cfdata = NULL;
755 		fakemdrootdev[i].dv_unit   = i;
756 		fakemdrootdev[i].dv_parent = NULL;
757 		sprintf(fakemdrootdev[i].dv_xname, "md%d", i);
758 	}
759 #endif /* MEMORY_DISK_HOOKS */
760 
761 #ifdef MEMORY_DISK_IS_ROOT
762 	bootdv = &fakemdrootdev[0];
763 	bootpartition = 0;
764 #endif
765 
766 	/*
767 	 * If NFS is specified as the file system, and we found
768 	 * a DV_DISK boot device (or no boot device at all), then
769 	 * find a reasonable network interface for "rootspec".
770 	 */
771 	vops = vfs_getopsbyname("nfs");
772 	if (vops != NULL && vops->vfs_mountroot == mountroot &&
773 	    rootspec == NULL &&
774 	    (bootdv == NULL || bootdv->dv_class != DV_IFNET)) {
775 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
776 			if ((ifp->if_flags &
777 			     (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0)
778 				break;
779 		}
780 		if (ifp == NULL) {
781 			/*
782 			 * Can't find a suitable interface; ask the
783 			 * user.
784 			 */
785 			boothowto |= RB_ASKNAME;
786 		} else {
787 			/*
788 			 * Have a suitable interface; behave as if
789 			 * the user specified this interface.
790 			 */
791 			rootspec = (const char *)ifp->if_xname;
792 		}
793 	}
794 
795 	/*
796 	 * If wildcarded root and we the boot device wasn't determined,
797 	 * ask the user.
798 	 */
799 	if (rootspec == NULL && bootdv == NULL)
800 		boothowto |= RB_ASKNAME;
801 
802  top:
803 	if (boothowto & RB_ASKNAME) {
804 		struct device *defdumpdv;
805 
806 		for (;;) {
807 			printf("root device");
808 			if (bootdv != NULL) {
809 				printf(" (default %s", bootdv->dv_xname);
810 				if (bootdv->dv_class == DV_DISK)
811 					printf("%c", bootpartition + 'a');
812 				printf(")");
813 			}
814 			printf(": ");
815 			len = cngetsn(buf, sizeof(buf));
816 			if (len == 0 && bootdv != NULL) {
817 				strlcpy(buf, bootdv->dv_xname, sizeof(buf));
818 				len = strlen(buf);
819 			}
820 			if (len > 0 && buf[len - 1] == '*') {
821 				buf[--len] = '\0';
822 				dv = getdisk(buf, len, 1, &nrootdev, 0);
823 				if (dv != NULL) {
824 					rootdv = dv;
825 					break;
826 				}
827 			}
828 			dv = getdisk(buf, len, bootpartition, &nrootdev, 0);
829 			if (dv != NULL) {
830 				rootdv = dv;
831 				break;
832 			}
833 		}
834 
835 		/*
836 		 * Set up the default dump device.  If root is on
837 		 * a network device, there is no default dump
838 		 * device, since we don't support dumps to the
839 		 * network.
840 		 */
841 		if (rootdv->dv_class == DV_IFNET)
842 			defdumpdv = NULL;
843 		else
844 			defdumpdv = rootdv;
845 
846 		for (;;) {
847 			printf("dump device");
848 			if (defdumpdv != NULL) {
849 				/*
850 				 * Note, we know it's a disk if we get here.
851 				 */
852 				printf(" (default %sb)", defdumpdv->dv_xname);
853 			}
854 			printf(": ");
855 			len = cngetsn(buf, sizeof(buf));
856 			if (len == 0) {
857 				if (defdumpdv != NULL) {
858 					ndumpdev = MAKEDISKDEV(major(nrootdev),
859 					    DISKUNIT(nrootdev), 1);
860 				}
861 				dumpdv = defdumpdv;
862 				break;
863 			}
864 			if (len == 4 && strcmp(buf, "none") == 0) {
865 				dumpdv = NULL;
866 				break;
867 			}
868 			dv = getdisk(buf, len, 1, &ndumpdev, 1);
869 			if (dv != NULL) {
870 				dumpdv = dv;
871 				break;
872 			}
873 		}
874 
875 		rootdev = nrootdev;
876 		dumpdev = ndumpdev;
877 
878 		for (vops = LIST_FIRST(&vfs_list); vops != NULL;
879 		     vops = LIST_NEXT(vops, vfs_list)) {
880 			if (vops->vfs_mountroot != NULL &&
881 			    vops->vfs_mountroot == mountroot)
882 			break;
883 		}
884 
885 		if (vops == NULL) {
886 			mountroot = NULL;
887 			deffsname = "generic";
888 		} else
889 			deffsname = vops->vfs_name;
890 
891 		for (;;) {
892 			printf("file system (default %s): ", deffsname);
893 			len = cngetsn(buf, sizeof(buf));
894 			if (len == 0)
895 				break;
896 			if (len == 4 && strcmp(buf, "halt") == 0)
897 				cpu_reboot(RB_HALT, NULL);
898 			else if (len == 6 && strcmp(buf, "reboot") == 0)
899 				cpu_reboot(0, NULL);
900 #if defined(DDB)
901 			else if (len == 3 && strcmp(buf, "ddb") == 0) {
902 				console_debugger();
903 			}
904 #endif
905 			else if (len == 7 && strcmp(buf, "generic") == 0) {
906 				mountroot = NULL;
907 				break;
908 			}
909 			vops = vfs_getopsbyname(buf);
910 			if (vops == NULL || vops->vfs_mountroot == NULL) {
911 				printf("use one of: generic");
912 				for (vops = LIST_FIRST(&vfs_list);
913 				     vops != NULL;
914 				     vops = LIST_NEXT(vops, vfs_list)) {
915 					if (vops->vfs_mountroot != NULL)
916 						printf(" %s", vops->vfs_name);
917 				}
918 #if defined(DDB)
919 				printf(" ddb");
920 #endif
921 				printf(" halt reboot\n");
922 			} else {
923 				mountroot = vops->vfs_mountroot;
924 				break;
925 			}
926 		}
927 
928 	} else if (rootspec == NULL) {
929 		int majdev;
930 
931 		/*
932 		 * Wildcarded root; use the boot device.
933 		 */
934 		rootdv = bootdv;
935 
936 		majdev = devsw_name2blk(bootdv->dv_xname, NULL, 0);
937 		if (majdev >= 0) {
938 			/*
939 			 * Root is on a disk.  `bootpartition' is root.
940 			 */
941 			rootdev = MAKEDISKDEV(majdev, bootdv->dv_unit,
942 			    bootpartition);
943 		}
944 	} else {
945 
946 		/*
947 		 * `root on <dev> ...'
948 		 */
949 
950 		/*
951 		 * If it's a network interface, we can bail out
952 		 * early.
953 		 */
954 		dv = finddevice(rootspec);
955 		if (dv != NULL && dv->dv_class == DV_IFNET) {
956 			rootdv = dv;
957 			goto haveroot;
958 		}
959 
960 		rootdevname = devsw_blk2name(major(rootdev));
961 		if (rootdevname == NULL) {
962 			printf("unknown device major 0x%x\n", rootdev);
963 			boothowto |= RB_ASKNAME;
964 			goto top;
965 		}
966 		memset(buf, 0, sizeof(buf));
967 		sprintf(buf, "%s%d", rootdevname, DISKUNIT(rootdev));
968 
969 		rootdv = finddevice(buf);
970 		if (rootdv == NULL) {
971 			printf("device %s (0x%x) not configured\n",
972 			    buf, rootdev);
973 			boothowto |= RB_ASKNAME;
974 			goto top;
975 		}
976 	}
977 
978  haveroot:
979 
980 	root_device = rootdv;
981 
982 	switch (rootdv->dv_class) {
983 	case DV_IFNET:
984 		aprint_normal("root on %s", rootdv->dv_xname);
985 		break;
986 
987 	case DV_DISK:
988 		aprint_normal("root on %s%c", rootdv->dv_xname,
989 		    DISKPART(rootdev) + 'a');
990 		break;
991 
992 	default:
993 		printf("can't determine root device\n");
994 		boothowto |= RB_ASKNAME;
995 		goto top;
996 	}
997 
998 	/*
999 	 * Now configure the dump device.
1000 	 *
1001 	 * If we haven't figured out the dump device, do so, with
1002 	 * the following rules:
1003 	 *
1004 	 *	(a) We already know dumpdv in the RB_ASKNAME case.
1005 	 *
1006 	 *	(b) If dumpspec is set, try to use it.  If the device
1007 	 *	    is not available, punt.
1008 	 *
1009 	 *	(c) If dumpspec is not set, the dump device is
1010 	 *	    wildcarded or unspecified.  If the root device
1011 	 *	    is DV_IFNET, punt.  Otherwise, use partition b
1012 	 *	    of the root device.
1013 	 */
1014 
1015 	if (boothowto & RB_ASKNAME) {		/* (a) */
1016 		if (dumpdv == NULL)
1017 			goto nodumpdev;
1018 	} else if (dumpspec != NULL) {		/* (b) */
1019 		if (strcmp(dumpspec, "none") == 0 || dumpdev == NODEV) {
1020 			/*
1021 			 * Operator doesn't want a dump device.
1022 			 * Or looks like they tried to pick a network
1023 			 * device.  Oops.
1024 			 */
1025 			goto nodumpdev;
1026 		}
1027 
1028 		dumpdevname = devsw_blk2name(major(dumpdev));
1029 		if (dumpdevname == NULL)
1030 			goto nodumpdev;
1031 		memset(buf, 0, sizeof(buf));
1032 		sprintf(buf, "%s%d", dumpdevname, DISKUNIT(dumpdev));
1033 
1034 		dumpdv = finddevice(buf);
1035 		if (dumpdv == NULL) {
1036 			/*
1037 			 * Device not configured.
1038 			 */
1039 			goto nodumpdev;
1040 		}
1041 	} else {				/* (c) */
1042 		if (rootdv->dv_class == DV_IFNET)
1043 			goto nodumpdev;
1044 		else {
1045 			dumpdv = rootdv;
1046 			dumpdev = MAKEDISKDEV(major(rootdev),
1047 			    dumpdv->dv_unit, 1);
1048 		}
1049 	}
1050 
1051 	aprint_normal(" dumps on %s%c\n", dumpdv->dv_xname,
1052 	    DISKPART(dumpdev) + 'a');
1053 	return;
1054 
1055  nodumpdev:
1056 	dumpdev = NODEV;
1057 	aprint_normal("\n");
1058 }
1059 
1060 static struct device *
1061 finddevice(name)
1062 	const char *name;
1063 {
1064 	struct device *dv;
1065 #ifdef BOOT_FROM_RAID_HOOKS
1066 	int j;
1067 
1068 	for (j = 0; j < numraid; j++) {
1069 		if (strcmp(name, raidrootdev[j].dv_xname) == 0) {
1070 			dv = &raidrootdev[j];
1071 			return (dv);
1072 		}
1073 	}
1074 #endif
1075 
1076 	for (dv = TAILQ_FIRST(&alldevs); dv != NULL;
1077 	    dv = TAILQ_NEXT(dv, dv_list))
1078 		if (strcmp(dv->dv_xname, name) == 0)
1079 			break;
1080 	return (dv);
1081 }
1082 
1083 static struct device *
1084 getdisk(str, len, defpart, devp, isdump)
1085 	char *str;
1086 	int len, defpart;
1087 	dev_t *devp;
1088 	int isdump;
1089 {
1090 	struct device	*dv;
1091 #ifdef MEMORY_DISK_HOOKS
1092 	int		i;
1093 #endif
1094 #ifdef BOOT_FROM_RAID_HOOKS
1095 	int 		j;
1096 #endif
1097 
1098 	if ((dv = parsedisk(str, len, defpart, devp)) == NULL) {
1099 		printf("use one of:");
1100 #ifdef MEMORY_DISK_HOOKS
1101 		if (isdump == 0)
1102 			for (i = 0; i < NMD; i++)
1103 				printf(" %s[a-%c]", fakemdrootdev[i].dv_xname,
1104 				    'a' + MAXPARTITIONS - 1);
1105 #endif
1106 #ifdef BOOT_FROM_RAID_HOOKS
1107 		if (isdump == 0)
1108 			for (j = 0; j < numraid; j++)
1109 				printf(" %s[a-%c]", raidrootdev[j].dv_xname,
1110 				    'a' + MAXPARTITIONS - 1);
1111 #endif
1112 		TAILQ_FOREACH(dv, &alldevs, dv_list) {
1113 			if (dv->dv_class == DV_DISK)
1114 				printf(" %s[a-%c]", dv->dv_xname,
1115 				    'a' + MAXPARTITIONS - 1);
1116 			if (isdump == 0 && dv->dv_class == DV_IFNET)
1117 				printf(" %s", dv->dv_xname);
1118 		}
1119 		if (isdump)
1120 			printf(" none");
1121 #if defined(DDB)
1122 		printf(" ddb");
1123 #endif
1124 		printf(" halt reboot\n");
1125 	}
1126 	return (dv);
1127 }
1128 
1129 static struct device *
1130 parsedisk(str, len, defpart, devp)
1131 	char *str;
1132 	int len, defpart;
1133 	dev_t *devp;
1134 {
1135 	struct device *dv;
1136 	char *cp, c;
1137 	int majdev, part;
1138 #ifdef MEMORY_DISK_HOOKS
1139 	int i;
1140 #endif
1141 	if (len == 0)
1142 		return (NULL);
1143 
1144 	if (len == 4 && strcmp(str, "halt") == 0)
1145 		cpu_reboot(RB_HALT, NULL);
1146 	else if (len == 6 && strcmp(str, "reboot") == 0)
1147 		cpu_reboot(0, NULL);
1148 #if defined(DDB)
1149 	else if (len == 3 && strcmp(str, "ddb") == 0)
1150 		console_debugger();
1151 #endif
1152 
1153 	cp = str + len - 1;
1154 	c = *cp;
1155 	if (c >= 'a' && c <= ('a' + MAXPARTITIONS - 1)) {
1156 		part = c - 'a';
1157 		*cp = '\0';
1158 	} else
1159 		part = defpart;
1160 
1161 #ifdef MEMORY_DISK_HOOKS
1162 	for (i = 0; i < NMD; i++)
1163 		if (strcmp(str, fakemdrootdev[i].dv_xname) == 0) {
1164 			dv = &fakemdrootdev[i];
1165 			goto gotdisk;
1166 		}
1167 #endif
1168 
1169 	dv = finddevice(str);
1170 	if (dv != NULL) {
1171 		if (dv->dv_class == DV_DISK) {
1172 #ifdef MEMORY_DISK_HOOKS
1173  gotdisk:
1174 #endif
1175 			majdev = devsw_name2blk(dv->dv_xname, NULL, 0);
1176 			if (majdev < 0)
1177 				panic("parsedisk");
1178 			*devp = MAKEDISKDEV(majdev, dv->dv_unit, part);
1179 		}
1180 
1181 		if (dv->dv_class == DV_IFNET)
1182 			*devp = NODEV;
1183 	}
1184 
1185 	*cp = c;
1186 	return (dv);
1187 }
1188 
1189 /*
1190  * snprintf() `bytes' into `buf', reformatting it so that the number,
1191  * plus a possible `x' + suffix extension) fits into len bytes (including
1192  * the terminating NUL).
1193  * Returns the number of bytes stored in buf, or -1 if there was a problem.
1194  * E.g, given a len of 9 and a suffix of `B':
1195  *	bytes		result
1196  *	-----		------
1197  *	99999		`99999 B'
1198  *	100000		`97 kB'
1199  *	66715648	`65152 kB'
1200  *	252215296	`240 MB'
1201  */
1202 int
1203 humanize_number(buf, len, bytes, suffix, divisor)
1204 	char		*buf;
1205 	size_t		 len;
1206 	u_int64_t	 bytes;
1207 	const char	*suffix;
1208 	int 		divisor;
1209 {
1210        	/* prefixes are: (none), kilo, Mega, Giga, Tera, Peta, Exa */
1211 	const char *prefixes;
1212 	int		r;
1213 	u_int64_t	max;
1214 	size_t		i, suffixlen;
1215 
1216 	if (buf == NULL || suffix == NULL)
1217 		return (-1);
1218 	if (len > 0)
1219 		buf[0] = '\0';
1220 	suffixlen = strlen(suffix);
1221 	/* check if enough room for `x y' + suffix + `\0' */
1222 	if (len < 4 + suffixlen)
1223 		return (-1);
1224 
1225 	if (divisor == 1024) {
1226 		/*
1227 		 * binary multiplies
1228 		 * XXX IEC 60027-2 recommends Ki, Mi, Gi...
1229 		 */
1230 		prefixes = " KMGTPE";
1231 	} else
1232 		prefixes = " kMGTPE"; /* SI for decimal multiplies */
1233 
1234 	max = 1;
1235 	for (i = 0; i < len - suffixlen - 3; i++)
1236 		max *= 10;
1237 	for (i = 0; bytes >= max && prefixes[i + 1]; i++)
1238 		bytes /= divisor;
1239 
1240 	r = snprintf(buf, len, "%qu%s%c%s", (unsigned long long)bytes,
1241 	    i == 0 ? "" : " ", prefixes[i], suffix);
1242 
1243 	return (r);
1244 }
1245 
1246 int
1247 format_bytes(buf, len, bytes)
1248 	char		*buf;
1249 	size_t		 len;
1250 	u_int64_t	 bytes;
1251 {
1252 	int	rv;
1253 	size_t	nlen;
1254 
1255 	rv = humanize_number(buf, len, bytes, "B", 1024);
1256 	if (rv != -1) {
1257 			/* nuke the trailing ` B' if it exists */
1258 		nlen = strlen(buf) - 2;
1259 		if (strcmp(&buf[nlen], " B") == 0)
1260 			buf[nlen] = '\0';
1261 	}
1262 	return (rv);
1263 }
1264 
1265 /*
1266  * Start trace of particular system call. If process is being traced,
1267  * this routine is called by MD syscall dispatch code just before
1268  * a system call is actually executed.
1269  * MD caller guarantees the passed 'code' is within the supported
1270  * system call number range for emulation the process runs under.
1271  */
1272 int
1273 trace_enter(struct lwp *l, register_t code,
1274 	register_t realcode, const struct sysent *callp, void *args,
1275 	register_t rval[])
1276 {
1277 #if defined(KTRACE) || defined(SYSTRACE)
1278 	struct proc *p = l->l_proc;
1279 #endif
1280 
1281 #ifdef SYSCALL_DEBUG
1282 	scdebug_call(l, code, args);
1283 #endif /* SYSCALL_DEBUG */
1284 
1285 #ifdef KTRACE
1286 	if (KTRPOINT(p, KTR_SYSCALL))
1287 		ktrsyscall(p, code, realcode, callp, args);
1288 #endif /* KTRACE */
1289 
1290 #ifdef SYSTRACE
1291 	if (ISSET(p->p_flag, P_SYSTRACE))
1292 		return systrace_enter(p, code, args, rval);
1293 #endif
1294 	return 0;
1295 }
1296 
1297 /*
1298  * End trace of particular system call. If process is being traced,
1299  * this routine is called by MD syscall dispatch code just after
1300  * a system call finishes.
1301  * MD caller guarantees the passed 'code' is within the supported
1302  * system call number range for emulation the process runs under.
1303  */
1304 void
1305 trace_exit(struct lwp *l, register_t code, void *args, register_t rval[],
1306     int error)
1307 {
1308 #if defined(KTRACE) || defined(SYSTRACE)
1309 	struct proc *p = l->l_proc;
1310 #endif
1311 
1312 #ifdef SYSCALL_DEBUG
1313 	scdebug_ret(l, code, error, rval);
1314 #endif /* SYSCALL_DEBUG */
1315 
1316 #ifdef KTRACE
1317 	if (KTRPOINT(p, KTR_SYSRET)) {
1318 		KERNEL_PROC_LOCK(l);
1319 		ktrsysret(p, code, error, rval);
1320 		KERNEL_PROC_UNLOCK(l);
1321 	}
1322 #endif /* KTRACE */
1323 
1324 #ifdef SYSTRACE
1325 	if (ISSET(p->p_flag, P_SYSTRACE))
1326 		systrace_exit(p, code, args, rval, error);
1327 #endif
1328 }
1329