xref: /netbsd-src/sys/kern/kern_subr.c (revision 96230fab84e26a6435963032070e916a951a8b2e)
1 /*	$NetBSD: kern_subr.c,v 1.192 2008/10/14 14:17:49 pooka Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998, 1999, 2002, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center, and by Luke Mewburn.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  * (c) UNIX System Laboratories, Inc.
37  * All or some portions of this file are derived from material licensed
38  * to the University of California by American Telephone and Telegraph
39  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40  * the permission of UNIX System Laboratories, Inc.
41  *
42  * Copyright (c) 1992, 1993
43  *	The Regents of the University of California.  All rights reserved.
44  *
45  * This software was developed by the Computer Systems Engineering group
46  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
47  * contributed to Berkeley.
48  *
49  * All advertising materials mentioning features or use of this software
50  * must display the following acknowledgement:
51  *	This product includes software developed by the University of
52  *	California, Lawrence Berkeley Laboratory.
53  *
54  * Redistribution and use in source and binary forms, with or without
55  * modification, are permitted provided that the following conditions
56  * are met:
57  * 1. Redistributions of source code must retain the above copyright
58  *    notice, this list of conditions and the following disclaimer.
59  * 2. Redistributions in binary form must reproduce the above copyright
60  *    notice, this list of conditions and the following disclaimer in the
61  *    documentation and/or other materials provided with the distribution.
62  * 3. Neither the name of the University nor the names of its contributors
63  *    may be used to endorse or promote products derived from this software
64  *    without specific prior written permission.
65  *
66  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
67  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
68  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
69  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
70  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
71  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
72  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
73  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
74  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
75  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
76  * SUCH DAMAGE.
77  *
78  *	@(#)kern_subr.c	8.4 (Berkeley) 2/14/95
79  */
80 
81 #include <sys/cdefs.h>
82 __KERNEL_RCSID(0, "$NetBSD: kern_subr.c,v 1.192 2008/10/14 14:17:49 pooka Exp $");
83 
84 #include "opt_ddb.h"
85 #include "opt_md.h"
86 #include "opt_syscall_debug.h"
87 #include "opt_ktrace.h"
88 #include "opt_ptrace.h"
89 #include "opt_powerhook.h"
90 #include "opt_tftproot.h"
91 
92 #include <sys/param.h>
93 #include <sys/systm.h>
94 #include <sys/proc.h>
95 #include <sys/malloc.h>
96 #include <sys/mount.h>
97 #include <sys/device.h>
98 #include <sys/reboot.h>
99 #include <sys/conf.h>
100 #include <sys/disk.h>
101 #include <sys/disklabel.h>
102 #include <sys/queue.h>
103 #include <sys/ktrace.h>
104 #include <sys/ptrace.h>
105 #include <sys/fcntl.h>
106 #include <sys/kauth.h>
107 #include <sys/vnode.h>
108 #include <sys/pmf.h>
109 
110 #include <uvm/uvm_extern.h>
111 
112 #include <dev/cons.h>
113 
114 #include <net/if.h>
115 
116 /* XXX these should eventually move to subr_autoconf.c */
117 static struct device *finddevice(const char *);
118 static struct device *getdisk(char *, int, int, dev_t *, int);
119 static struct device *parsedisk(char *, int, int, dev_t *);
120 static const char *getwedgename(const char *, int);
121 
122 /*
123  * A generic linear hook.
124  */
125 struct hook_desc {
126 	LIST_ENTRY(hook_desc) hk_list;
127 	void	(*hk_fn)(void *);
128 	void	*hk_arg;
129 };
130 typedef LIST_HEAD(, hook_desc) hook_list_t;
131 
132 #ifdef TFTPROOT
133 int tftproot_dhcpboot(struct device *);
134 #endif
135 
136 dev_t	dumpcdev;	/* for savecore */
137 
138 void
139 uio_setup_sysspace(struct uio *uio)
140 {
141 
142 	uio->uio_vmspace = vmspace_kernel();
143 }
144 
145 int
146 uiomove(void *buf, size_t n, struct uio *uio)
147 {
148 	struct vmspace *vm = uio->uio_vmspace;
149 	struct iovec *iov;
150 	size_t cnt;
151 	int error = 0;
152 	char *cp = buf;
153 
154 	ASSERT_SLEEPABLE();
155 
156 #ifdef DIAGNOSTIC
157 	if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE)
158 		panic("uiomove: mode");
159 #endif
160 	while (n > 0 && uio->uio_resid) {
161 		iov = uio->uio_iov;
162 		cnt = iov->iov_len;
163 		if (cnt == 0) {
164 			KASSERT(uio->uio_iovcnt > 0);
165 			uio->uio_iov++;
166 			uio->uio_iovcnt--;
167 			continue;
168 		}
169 		if (cnt > n)
170 			cnt = n;
171 		if (!VMSPACE_IS_KERNEL_P(vm)) {
172 			if (curcpu()->ci_schedstate.spc_flags &
173 			    SPCF_SHOULDYIELD)
174 				preempt();
175 		}
176 
177 		if (uio->uio_rw == UIO_READ) {
178 			error = copyout_vmspace(vm, cp, iov->iov_base,
179 			    cnt);
180 		} else {
181 			error = copyin_vmspace(vm, iov->iov_base, cp,
182 			    cnt);
183 		}
184 		if (error) {
185 			break;
186 		}
187 		iov->iov_base = (char *)iov->iov_base + cnt;
188 		iov->iov_len -= cnt;
189 		uio->uio_resid -= cnt;
190 		uio->uio_offset += cnt;
191 		cp += cnt;
192 		KDASSERT(cnt <= n);
193 		n -= cnt;
194 	}
195 
196 	return (error);
197 }
198 
199 /*
200  * Wrapper for uiomove() that validates the arguments against a known-good
201  * kernel buffer.
202  */
203 int
204 uiomove_frombuf(void *buf, size_t buflen, struct uio *uio)
205 {
206 	size_t offset;
207 
208 	if (uio->uio_offset < 0 || /* uio->uio_resid < 0 || */
209 	    (offset = uio->uio_offset) != uio->uio_offset)
210 		return (EINVAL);
211 	if (offset >= buflen)
212 		return (0);
213 	return (uiomove((char *)buf + offset, buflen - offset, uio));
214 }
215 
216 /*
217  * Give next character to user as result of read.
218  */
219 int
220 ureadc(int c, struct uio *uio)
221 {
222 	struct iovec *iov;
223 
224 	if (uio->uio_resid <= 0)
225 		panic("ureadc: non-positive resid");
226 again:
227 	if (uio->uio_iovcnt <= 0)
228 		panic("ureadc: non-positive iovcnt");
229 	iov = uio->uio_iov;
230 	if (iov->iov_len <= 0) {
231 		uio->uio_iovcnt--;
232 		uio->uio_iov++;
233 		goto again;
234 	}
235 	if (!VMSPACE_IS_KERNEL_P(uio->uio_vmspace)) {
236 		if (subyte(iov->iov_base, c) < 0)
237 			return (EFAULT);
238 	} else {
239 		*(char *)iov->iov_base = c;
240 	}
241 	iov->iov_base = (char *)iov->iov_base + 1;
242 	iov->iov_len--;
243 	uio->uio_resid--;
244 	uio->uio_offset++;
245 	return (0);
246 }
247 
248 /*
249  * Like copyin(), but operates on an arbitrary vmspace.
250  */
251 int
252 copyin_vmspace(struct vmspace *vm, const void *uaddr, void *kaddr, size_t len)
253 {
254 	struct iovec iov;
255 	struct uio uio;
256 	int error;
257 
258 	if (len == 0)
259 		return (0);
260 
261 	if (VMSPACE_IS_KERNEL_P(vm)) {
262 		return kcopy(uaddr, kaddr, len);
263 	}
264 	if (__predict_true(vm == curproc->p_vmspace)) {
265 		return copyin(uaddr, kaddr, len);
266 	}
267 
268 	iov.iov_base = kaddr;
269 	iov.iov_len = len;
270 	uio.uio_iov = &iov;
271 	uio.uio_iovcnt = 1;
272 	uio.uio_offset = (off_t)(uintptr_t)uaddr;
273 	uio.uio_resid = len;
274 	uio.uio_rw = UIO_READ;
275 	UIO_SETUP_SYSSPACE(&uio);
276 	error = uvm_io(&vm->vm_map, &uio);
277 
278 	return (error);
279 }
280 
281 /*
282  * Like copyout(), but operates on an arbitrary vmspace.
283  */
284 int
285 copyout_vmspace(struct vmspace *vm, const void *kaddr, void *uaddr, size_t len)
286 {
287 	struct iovec iov;
288 	struct uio uio;
289 	int error;
290 
291 	if (len == 0)
292 		return (0);
293 
294 	if (VMSPACE_IS_KERNEL_P(vm)) {
295 		return kcopy(kaddr, uaddr, len);
296 	}
297 	if (__predict_true(vm == curproc->p_vmspace)) {
298 		return copyout(kaddr, uaddr, len);
299 	}
300 
301 	iov.iov_base = __UNCONST(kaddr); /* XXXUNCONST cast away const */
302 	iov.iov_len = len;
303 	uio.uio_iov = &iov;
304 	uio.uio_iovcnt = 1;
305 	uio.uio_offset = (off_t)(uintptr_t)uaddr;
306 	uio.uio_resid = len;
307 	uio.uio_rw = UIO_WRITE;
308 	UIO_SETUP_SYSSPACE(&uio);
309 	error = uvm_io(&vm->vm_map, &uio);
310 
311 	return (error);
312 }
313 
314 /*
315  * Like copyin(), but operates on an arbitrary process.
316  */
317 int
318 copyin_proc(struct proc *p, const void *uaddr, void *kaddr, size_t len)
319 {
320 	struct vmspace *vm;
321 	int error;
322 
323 	error = proc_vmspace_getref(p, &vm);
324 	if (error) {
325 		return error;
326 	}
327 	error = copyin_vmspace(vm, uaddr, kaddr, len);
328 	uvmspace_free(vm);
329 
330 	return error;
331 }
332 
333 /*
334  * Like copyout(), but operates on an arbitrary process.
335  */
336 int
337 copyout_proc(struct proc *p, const void *kaddr, void *uaddr, size_t len)
338 {
339 	struct vmspace *vm;
340 	int error;
341 
342 	error = proc_vmspace_getref(p, &vm);
343 	if (error) {
344 		return error;
345 	}
346 	error = copyout_vmspace(vm, kaddr, uaddr, len);
347 	uvmspace_free(vm);
348 
349 	return error;
350 }
351 
352 /*
353  * Like copyin(), except it operates on kernel addresses when the FKIOCTL
354  * flag is passed in `ioctlflags' from the ioctl call.
355  */
356 int
357 ioctl_copyin(int ioctlflags, const void *src, void *dst, size_t len)
358 {
359 	if (ioctlflags & FKIOCTL)
360 		return kcopy(src, dst, len);
361 	return copyin(src, dst, len);
362 }
363 
364 /*
365  * Like copyout(), except it operates on kernel addresses when the FKIOCTL
366  * flag is passed in `ioctlflags' from the ioctl call.
367  */
368 int
369 ioctl_copyout(int ioctlflags, const void *src, void *dst, size_t len)
370 {
371 	if (ioctlflags & FKIOCTL)
372 		return kcopy(src, dst, len);
373 	return copyout(src, dst, len);
374 }
375 
376 static void *
377 hook_establish(hook_list_t *list, void (*fn)(void *), void *arg)
378 {
379 	struct hook_desc *hd;
380 
381 	hd = malloc(sizeof(*hd), M_DEVBUF, M_NOWAIT);
382 	if (hd == NULL)
383 		return (NULL);
384 
385 	hd->hk_fn = fn;
386 	hd->hk_arg = arg;
387 	LIST_INSERT_HEAD(list, hd, hk_list);
388 
389 	return (hd);
390 }
391 
392 static void
393 hook_disestablish(hook_list_t *list, void *vhook)
394 {
395 #ifdef DIAGNOSTIC
396 	struct hook_desc *hd;
397 
398 	LIST_FOREACH(hd, list, hk_list) {
399                 if (hd == vhook)
400 			break;
401 	}
402 
403 	if (hd == NULL)
404 		panic("hook_disestablish: hook %p not established", vhook);
405 #endif
406 	LIST_REMOVE((struct hook_desc *)vhook, hk_list);
407 	free(vhook, M_DEVBUF);
408 }
409 
410 static void
411 hook_destroy(hook_list_t *list)
412 {
413 	struct hook_desc *hd;
414 
415 	while ((hd = LIST_FIRST(list)) != NULL) {
416 		LIST_REMOVE(hd, hk_list);
417 		free(hd, M_DEVBUF);
418 	}
419 }
420 
421 static void
422 hook_proc_run(hook_list_t *list, struct proc *p)
423 {
424 	struct hook_desc *hd;
425 
426 	LIST_FOREACH(hd, list, hk_list)
427 		((void (*)(struct proc *, void *))*hd->hk_fn)(p, hd->hk_arg);
428 }
429 
430 /*
431  * "Shutdown hook" types, functions, and variables.
432  *
433  * Should be invoked immediately before the
434  * system is halted or rebooted, i.e. after file systems unmounted,
435  * after crash dump done, etc.
436  *
437  * Each shutdown hook is removed from the list before it's run, so that
438  * it won't be run again.
439  */
440 
441 static hook_list_t shutdownhook_list;
442 
443 void *
444 shutdownhook_establish(void (*fn)(void *), void *arg)
445 {
446 	return hook_establish(&shutdownhook_list, fn, arg);
447 }
448 
449 void
450 shutdownhook_disestablish(void *vhook)
451 {
452 	hook_disestablish(&shutdownhook_list, vhook);
453 }
454 
455 /*
456  * Run shutdown hooks.  Should be invoked immediately before the
457  * system is halted or rebooted, i.e. after file systems unmounted,
458  * after crash dump done, etc.
459  *
460  * Each shutdown hook is removed from the list before it's run, so that
461  * it won't be run again.
462  */
463 void
464 doshutdownhooks(void)
465 {
466 	struct hook_desc *dp;
467 
468 	while ((dp = LIST_FIRST(&shutdownhook_list)) != NULL) {
469 		LIST_REMOVE(dp, hk_list);
470 		(*dp->hk_fn)(dp->hk_arg);
471 #if 0
472 		/*
473 		 * Don't bother freeing the hook structure,, since we may
474 		 * be rebooting because of a memory corruption problem,
475 		 * and this might only make things worse.  It doesn't
476 		 * matter, anyway, since the system is just about to
477 		 * reboot.
478 		 */
479 		free(dp, M_DEVBUF);
480 #endif
481 	}
482 
483 	pmf_system_shutdown(boothowto);
484 }
485 
486 /*
487  * "Mountroot hook" types, functions, and variables.
488  */
489 
490 static hook_list_t mountroothook_list;
491 
492 void *
493 mountroothook_establish(void (*fn)(struct device *), struct device *dev)
494 {
495 	return hook_establish(&mountroothook_list, (void (*)(void *))fn, dev);
496 }
497 
498 void
499 mountroothook_disestablish(void *vhook)
500 {
501 	hook_disestablish(&mountroothook_list, vhook);
502 }
503 
504 void
505 mountroothook_destroy(void)
506 {
507 	hook_destroy(&mountroothook_list);
508 }
509 
510 void
511 domountroothook(void)
512 {
513 	struct hook_desc *hd;
514 
515 	LIST_FOREACH(hd, &mountroothook_list, hk_list) {
516 		if (hd->hk_arg == (void *)root_device) {
517 			(*hd->hk_fn)(hd->hk_arg);
518 			return;
519 		}
520 	}
521 }
522 
523 static hook_list_t exechook_list;
524 
525 void *
526 exechook_establish(void (*fn)(struct proc *, void *), void *arg)
527 {
528 	return hook_establish(&exechook_list, (void (*)(void *))fn, arg);
529 }
530 
531 void
532 exechook_disestablish(void *vhook)
533 {
534 	hook_disestablish(&exechook_list, vhook);
535 }
536 
537 /*
538  * Run exec hooks.
539  */
540 void
541 doexechooks(struct proc *p)
542 {
543 	hook_proc_run(&exechook_list, p);
544 }
545 
546 static hook_list_t exithook_list;
547 
548 void *
549 exithook_establish(void (*fn)(struct proc *, void *), void *arg)
550 {
551 	return hook_establish(&exithook_list, (void (*)(void *))fn, arg);
552 }
553 
554 void
555 exithook_disestablish(void *vhook)
556 {
557 	hook_disestablish(&exithook_list, vhook);
558 }
559 
560 /*
561  * Run exit hooks.
562  */
563 void
564 doexithooks(struct proc *p)
565 {
566 	hook_proc_run(&exithook_list, p);
567 }
568 
569 static hook_list_t forkhook_list;
570 
571 void *
572 forkhook_establish(void (*fn)(struct proc *, struct proc *))
573 {
574 	return hook_establish(&forkhook_list, (void (*)(void *))fn, NULL);
575 }
576 
577 void
578 forkhook_disestablish(void *vhook)
579 {
580 	hook_disestablish(&forkhook_list, vhook);
581 }
582 
583 /*
584  * Run fork hooks.
585  */
586 void
587 doforkhooks(struct proc *p2, struct proc *p1)
588 {
589 	struct hook_desc *hd;
590 
591 	LIST_FOREACH(hd, &forkhook_list, hk_list) {
592 		((void (*)(struct proc *, struct proc *))*hd->hk_fn)
593 		    (p2, p1);
594 	}
595 }
596 
597 /*
598  * "Power hook" types, functions, and variables.
599  * The list of power hooks is kept ordered with the last registered hook
600  * first.
601  * When running the hooks on power down the hooks are called in reverse
602  * registration order, when powering up in registration order.
603  */
604 struct powerhook_desc {
605 	CIRCLEQ_ENTRY(powerhook_desc) sfd_list;
606 	void	(*sfd_fn)(int, void *);
607 	void	*sfd_arg;
608 	char	sfd_name[16];
609 };
610 
611 static CIRCLEQ_HEAD(, powerhook_desc) powerhook_list =
612     CIRCLEQ_HEAD_INITIALIZER(powerhook_list);
613 
614 void *
615 powerhook_establish(const char *name, void (*fn)(int, void *), void *arg)
616 {
617 	struct powerhook_desc *ndp;
618 
619 	ndp = (struct powerhook_desc *)
620 	    malloc(sizeof(*ndp), M_DEVBUF, M_NOWAIT);
621 	if (ndp == NULL)
622 		return (NULL);
623 
624 	ndp->sfd_fn = fn;
625 	ndp->sfd_arg = arg;
626 	strlcpy(ndp->sfd_name, name, sizeof(ndp->sfd_name));
627 	CIRCLEQ_INSERT_HEAD(&powerhook_list, ndp, sfd_list);
628 
629 	aprint_error("%s: WARNING: powerhook_establish is deprecated\n", name);
630 	return (ndp);
631 }
632 
633 void
634 powerhook_disestablish(void *vhook)
635 {
636 #ifdef DIAGNOSTIC
637 	struct powerhook_desc *dp;
638 
639 	CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list)
640                 if (dp == vhook)
641 			goto found;
642 	panic("powerhook_disestablish: hook %p not established", vhook);
643  found:
644 #endif
645 
646 	CIRCLEQ_REMOVE(&powerhook_list, (struct powerhook_desc *)vhook,
647 	    sfd_list);
648 	free(vhook, M_DEVBUF);
649 }
650 
651 /*
652  * Run power hooks.
653  */
654 void
655 dopowerhooks(int why)
656 {
657 	struct powerhook_desc *dp;
658 
659 #ifdef POWERHOOK_DEBUG
660 	const char *why_name;
661 	static const char * pwr_names[] = {PWR_NAMES};
662 	why_name = why < __arraycount(pwr_names) ? pwr_names[why] : "???";
663 #endif
664 
665 	if (why == PWR_RESUME || why == PWR_SOFTRESUME) {
666 		CIRCLEQ_FOREACH_REVERSE(dp, &powerhook_list, sfd_list) {
667 #ifdef POWERHOOK_DEBUG
668 			printf("dopowerhooks %s: %s (%p)\n", why_name, dp->sfd_name, dp);
669 #endif
670 			(*dp->sfd_fn)(why, dp->sfd_arg);
671 		}
672 	} else {
673 		CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list) {
674 #ifdef POWERHOOK_DEBUG
675 			printf("dopowerhooks %s: %s (%p)\n", why_name, dp->sfd_name, dp);
676 #endif
677 			(*dp->sfd_fn)(why, dp->sfd_arg);
678 		}
679 	}
680 
681 #ifdef POWERHOOK_DEBUG
682 	printf("dopowerhooks: %s done\n", why_name);
683 #endif
684 }
685 
686 static int
687 isswap(struct device *dv)
688 {
689 	struct dkwedge_info wi;
690 	struct vnode *vn;
691 	int error;
692 
693 	if (device_class(dv) != DV_DISK || !device_is_a(dv, "dk"))
694 		return 0;
695 
696 	if ((vn = opendisk(dv)) == NULL)
697 		return 0;
698 
699 	error = VOP_IOCTL(vn, DIOCGWEDGEINFO, &wi, FREAD, NOCRED);
700 	VOP_CLOSE(vn, FREAD, NOCRED);
701 	vput(vn);
702 	if (error) {
703 #ifdef DEBUG_WEDGE
704 		printf("%s: Get wedge info returned %d\n", device_xname(dv), error);
705 #endif
706 		return 0;
707 	}
708 	return strcmp(wi.dkw_ptype, DKW_PTYPE_SWAP) == 0;
709 }
710 
711 /*
712  * Determine the root device and, if instructed to, the root file system.
713  */
714 
715 #include "md.h"
716 
717 #if NMD > 0
718 extern struct cfdriver md_cd;
719 #ifdef MEMORY_DISK_IS_ROOT
720 int md_is_root = 1;
721 #else
722 int md_is_root = 0;
723 #endif
724 #endif
725 
726 /*
727  * The device and wedge that we booted from.  If booted_wedge is NULL,
728  * the we might consult booted_partition.
729  */
730 struct device *booted_device;
731 struct device *booted_wedge;
732 int booted_partition;
733 
734 /*
735  * Use partition letters if it's a disk class but not a wedge.
736  * XXX Check for wedge is kinda gross.
737  */
738 #define	DEV_USES_PARTITIONS(dv)						\
739 	(device_class((dv)) == DV_DISK &&				\
740 	 !device_is_a((dv), "dk"))
741 
742 void
743 setroot(struct device *bootdv, int bootpartition)
744 {
745 	struct device *dv;
746 	int len, majdev;
747 	dev_t nrootdev;
748 	dev_t ndumpdev = NODEV;
749 	char buf[128];
750 	const char *rootdevname;
751 	const char *dumpdevname;
752 	struct device *rootdv = NULL;		/* XXX gcc -Wuninitialized */
753 	struct device *dumpdv = NULL;
754 	struct ifnet *ifp;
755 	const char *deffsname;
756 	struct vfsops *vops;
757 
758 #ifdef TFTPROOT
759 	if (tftproot_dhcpboot(bootdv) != 0)
760 		boothowto |= RB_ASKNAME;
761 #endif
762 
763 #if NMD > 0
764 	if (md_is_root) {
765 		/*
766 		 * XXX there should be "root on md0" in the config file,
767 		 * but it isn't always
768 		 */
769 		bootdv = md_cd.cd_devs[0];
770 		bootpartition = 0;
771 	}
772 #endif
773 
774 	/*
775 	 * If NFS is specified as the file system, and we found
776 	 * a DV_DISK boot device (or no boot device at all), then
777 	 * find a reasonable network interface for "rootspec".
778 	 */
779 	vops = vfs_getopsbyname("nfs");
780 	if (vops != NULL && vops->vfs_mountroot == mountroot &&
781 	    rootspec == NULL &&
782 	    (bootdv == NULL || device_class(bootdv) != DV_IFNET)) {
783 		IFNET_FOREACH(ifp) {
784 			if ((ifp->if_flags &
785 			     (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0)
786 				break;
787 		}
788 		if (ifp == NULL) {
789 			/*
790 			 * Can't find a suitable interface; ask the
791 			 * user.
792 			 */
793 			boothowto |= RB_ASKNAME;
794 		} else {
795 			/*
796 			 * Have a suitable interface; behave as if
797 			 * the user specified this interface.
798 			 */
799 			rootspec = (const char *)ifp->if_xname;
800 		}
801 	}
802 	if (vops != NULL)
803 		vfs_delref(vops);
804 
805 	/*
806 	 * If wildcarded root and we the boot device wasn't determined,
807 	 * ask the user.
808 	 */
809 	if (rootspec == NULL && bootdv == NULL)
810 		boothowto |= RB_ASKNAME;
811 
812  top:
813 	if (boothowto & RB_ASKNAME) {
814 		struct device *defdumpdv;
815 
816 		for (;;) {
817 			printf("root device");
818 			if (bootdv != NULL) {
819 				printf(" (default %s", device_xname(bootdv));
820 				if (DEV_USES_PARTITIONS(bootdv))
821 					printf("%c", bootpartition + 'a');
822 				printf(")");
823 			}
824 			printf(": ");
825 			len = cngetsn(buf, sizeof(buf));
826 			if (len == 0 && bootdv != NULL) {
827 				strlcpy(buf, device_xname(bootdv), sizeof(buf));
828 				len = strlen(buf);
829 			}
830 			if (len > 0 && buf[len - 1] == '*') {
831 				buf[--len] = '\0';
832 				dv = getdisk(buf, len, 1, &nrootdev, 0);
833 				if (dv != NULL) {
834 					rootdv = dv;
835 					break;
836 				}
837 			}
838 			dv = getdisk(buf, len, bootpartition, &nrootdev, 0);
839 			if (dv != NULL) {
840 				rootdv = dv;
841 				break;
842 			}
843 		}
844 
845 		/*
846 		 * Set up the default dump device.  If root is on
847 		 * a network device, there is no default dump
848 		 * device, since we don't support dumps to the
849 		 * network.
850 		 */
851 		if (DEV_USES_PARTITIONS(rootdv) == 0)
852 			defdumpdv = NULL;
853 		else
854 			defdumpdv = rootdv;
855 
856 		for (;;) {
857 			printf("dump device");
858 			if (defdumpdv != NULL) {
859 				/*
860 				 * Note, we know it's a disk if we get here.
861 				 */
862 				printf(" (default %sb)", device_xname(defdumpdv));
863 			}
864 			printf(": ");
865 			len = cngetsn(buf, sizeof(buf));
866 			if (len == 0) {
867 				if (defdumpdv != NULL) {
868 					ndumpdev = MAKEDISKDEV(major(nrootdev),
869 					    DISKUNIT(nrootdev), 1);
870 				}
871 				dumpdv = defdumpdv;
872 				break;
873 			}
874 			if (len == 4 && strcmp(buf, "none") == 0) {
875 				dumpdv = NULL;
876 				break;
877 			}
878 			dv = getdisk(buf, len, 1, &ndumpdev, 1);
879 			if (dv != NULL) {
880 				dumpdv = dv;
881 				break;
882 			}
883 		}
884 
885 		rootdev = nrootdev;
886 		dumpdev = ndumpdev;
887 
888 		for (vops = LIST_FIRST(&vfs_list); vops != NULL;
889 		     vops = LIST_NEXT(vops, vfs_list)) {
890 			if (vops->vfs_mountroot != NULL &&
891 			    vops->vfs_mountroot == mountroot)
892 			break;
893 		}
894 
895 		if (vops == NULL) {
896 			mountroot = NULL;
897 			deffsname = "generic";
898 		} else
899 			deffsname = vops->vfs_name;
900 
901 		for (;;) {
902 			printf("file system (default %s): ", deffsname);
903 			len = cngetsn(buf, sizeof(buf));
904 			if (len == 0)
905 				break;
906 			if (len == 4 && strcmp(buf, "halt") == 0)
907 				cpu_reboot(RB_HALT, NULL);
908 			else if (len == 6 && strcmp(buf, "reboot") == 0)
909 				cpu_reboot(0, NULL);
910 #if defined(DDB)
911 			else if (len == 3 && strcmp(buf, "ddb") == 0) {
912 				console_debugger();
913 			}
914 #endif
915 			else if (len == 7 && strcmp(buf, "generic") == 0) {
916 				mountroot = NULL;
917 				break;
918 			}
919 			vops = vfs_getopsbyname(buf);
920 			if (vops == NULL || vops->vfs_mountroot == NULL) {
921 				printf("use one of: generic");
922 				for (vops = LIST_FIRST(&vfs_list);
923 				     vops != NULL;
924 				     vops = LIST_NEXT(vops, vfs_list)) {
925 					if (vops->vfs_mountroot != NULL)
926 						printf(" %s", vops->vfs_name);
927 				}
928 #if defined(DDB)
929 				printf(" ddb");
930 #endif
931 				printf(" halt reboot\n");
932 			} else {
933 				mountroot = vops->vfs_mountroot;
934 				vfs_delref(vops);
935 				break;
936 			}
937 		}
938 
939 	} else if (rootspec == NULL) {
940 		/*
941 		 * Wildcarded root; use the boot device.
942 		 */
943 		rootdv = bootdv;
944 
945 		if (bootdv)
946 			majdev = devsw_name2blk(device_xname(bootdv), NULL, 0);
947 		else
948 			majdev = -1;
949 		if (majdev >= 0) {
950 			/*
951 			 * Root is on a disk.  `bootpartition' is root,
952 			 * unless the device does not use partitions.
953 			 */
954 			if (DEV_USES_PARTITIONS(bootdv))
955 				rootdev = MAKEDISKDEV(majdev,
956 						      device_unit(bootdv),
957 						      bootpartition);
958 			else
959 				rootdev = makedev(majdev, device_unit(bootdv));
960 		}
961 	} else {
962 
963 		/*
964 		 * `root on <dev> ...'
965 		 */
966 
967 		/*
968 		 * If it's a network interface, we can bail out
969 		 * early.
970 		 */
971 		dv = finddevice(rootspec);
972 		if (dv != NULL && device_class(dv) == DV_IFNET) {
973 			rootdv = dv;
974 			goto haveroot;
975 		}
976 
977 		if (rootdev == NODEV &&
978 		    device_class(dv) == DV_DISK && device_is_a(dv, "dk") &&
979 		    (majdev = devsw_name2blk(device_xname(dv), NULL, 0)) >= 0)
980 			rootdev = makedev(majdev, device_unit(dv));
981 
982 		rootdevname = devsw_blk2name(major(rootdev));
983 		if (rootdevname == NULL) {
984 			printf("unknown device major 0x%x\n", rootdev);
985 			boothowto |= RB_ASKNAME;
986 			goto top;
987 		}
988 		memset(buf, 0, sizeof(buf));
989 		snprintf(buf, sizeof(buf), "%s%d", rootdevname,
990 		    DISKUNIT(rootdev));
991 
992 		rootdv = finddevice(buf);
993 		if (rootdv == NULL) {
994 			printf("device %s (0x%x) not configured\n",
995 			    buf, rootdev);
996 			boothowto |= RB_ASKNAME;
997 			goto top;
998 		}
999 	}
1000 
1001  haveroot:
1002 
1003 	root_device = rootdv;
1004 
1005 	switch (device_class(rootdv)) {
1006 	case DV_IFNET:
1007 	case DV_DISK:
1008 		aprint_normal("root on %s", device_xname(rootdv));
1009 		if (DEV_USES_PARTITIONS(rootdv))
1010 			aprint_normal("%c", DISKPART(rootdev) + 'a');
1011 		break;
1012 
1013 	default:
1014 		printf("can't determine root device\n");
1015 		boothowto |= RB_ASKNAME;
1016 		goto top;
1017 	}
1018 
1019 	/*
1020 	 * Now configure the dump device.
1021 	 *
1022 	 * If we haven't figured out the dump device, do so, with
1023 	 * the following rules:
1024 	 *
1025 	 *	(a) We already know dumpdv in the RB_ASKNAME case.
1026 	 *
1027 	 *	(b) If dumpspec is set, try to use it.  If the device
1028 	 *	    is not available, punt.
1029 	 *
1030 	 *	(c) If dumpspec is not set, the dump device is
1031 	 *	    wildcarded or unspecified.  If the root device
1032 	 *	    is DV_IFNET, punt.  Otherwise, use partition b
1033 	 *	    of the root device.
1034 	 */
1035 
1036 	if (boothowto & RB_ASKNAME) {		/* (a) */
1037 		if (dumpdv == NULL)
1038 			goto nodumpdev;
1039 	} else if (dumpspec != NULL) {		/* (b) */
1040 		if (strcmp(dumpspec, "none") == 0 || dumpdev == NODEV) {
1041 			/*
1042 			 * Operator doesn't want a dump device.
1043 			 * Or looks like they tried to pick a network
1044 			 * device.  Oops.
1045 			 */
1046 			goto nodumpdev;
1047 		}
1048 
1049 		dumpdevname = devsw_blk2name(major(dumpdev));
1050 		if (dumpdevname == NULL)
1051 			goto nodumpdev;
1052 		memset(buf, 0, sizeof(buf));
1053 		snprintf(buf, sizeof(buf), "%s%d", dumpdevname,
1054 		    DISKUNIT(dumpdev));
1055 
1056 		dumpdv = finddevice(buf);
1057 		if (dumpdv == NULL) {
1058 			/*
1059 			 * Device not configured.
1060 			 */
1061 			goto nodumpdev;
1062 		}
1063 	} else {				/* (c) */
1064 		if (DEV_USES_PARTITIONS(rootdv) == 0) {
1065 			for (dv = TAILQ_FIRST(&alldevs); dv != NULL;
1066 			    dv = TAILQ_NEXT(dv, dv_list))
1067 				if (isswap(dv))
1068 					break;
1069 			if (dv == NULL)
1070 				goto nodumpdev;
1071 
1072 			majdev = devsw_name2blk(device_xname(dv), NULL, 0);
1073 			if (majdev < 0)
1074 				goto nodumpdev;
1075 			dumpdv = dv;
1076 			dumpdev = makedev(majdev, device_unit(dumpdv));
1077 		} else {
1078 			dumpdv = rootdv;
1079 			dumpdev = MAKEDISKDEV(major(rootdev),
1080 			    device_unit(dumpdv), 1);
1081 		}
1082 	}
1083 
1084 	dumpcdev = devsw_blk2chr(dumpdev);
1085 	aprint_normal(" dumps on %s", device_xname(dumpdv));
1086 	if (DEV_USES_PARTITIONS(dumpdv))
1087 		aprint_normal("%c", DISKPART(dumpdev) + 'a');
1088 	aprint_normal("\n");
1089 	return;
1090 
1091  nodumpdev:
1092 	dumpdev = NODEV;
1093 	dumpcdev = NODEV;
1094 	aprint_normal("\n");
1095 }
1096 
1097 static struct device *
1098 finddevice(const char *name)
1099 {
1100 	const char *wname;
1101 
1102 	if ((wname = getwedgename(name, strlen(name))) != NULL)
1103 		return dkwedge_find_by_wname(wname);
1104 
1105 	return device_find_by_xname(name);
1106 }
1107 
1108 static struct device *
1109 getdisk(char *str, int len, int defpart, dev_t *devp, int isdump)
1110 {
1111 	struct device	*dv;
1112 
1113 	if ((dv = parsedisk(str, len, defpart, devp)) == NULL) {
1114 		printf("use one of:");
1115 		TAILQ_FOREACH(dv, &alldevs, dv_list) {
1116 			if (DEV_USES_PARTITIONS(dv))
1117 				printf(" %s[a-%c]", device_xname(dv),
1118 				    'a' + MAXPARTITIONS - 1);
1119 			else if (device_class(dv) == DV_DISK)
1120 				printf(" %s", device_xname(dv));
1121 			if (isdump == 0 && device_class(dv) == DV_IFNET)
1122 				printf(" %s", device_xname(dv));
1123 		}
1124 		dkwedge_print_wnames();
1125 		if (isdump)
1126 			printf(" none");
1127 #if defined(DDB)
1128 		printf(" ddb");
1129 #endif
1130 		printf(" halt reboot\n");
1131 	}
1132 	return dv;
1133 }
1134 
1135 static const char *
1136 getwedgename(const char *name, int namelen)
1137 {
1138 	const char *wpfx = "wedge:";
1139 	const int wpfxlen = strlen(wpfx);
1140 
1141 	if (namelen < wpfxlen || strncmp(name, wpfx, wpfxlen) != 0)
1142 		return NULL;
1143 
1144 	return name + wpfxlen;
1145 }
1146 
1147 static struct device *
1148 parsedisk(char *str, int len, int defpart, dev_t *devp)
1149 {
1150 	struct device *dv;
1151 	const char *wname;
1152 	char *cp, c;
1153 	int majdev, part;
1154 	if (len == 0)
1155 		return (NULL);
1156 
1157 	if (len == 4 && strcmp(str, "halt") == 0)
1158 		cpu_reboot(RB_HALT, NULL);
1159 	else if (len == 6 && strcmp(str, "reboot") == 0)
1160 		cpu_reboot(0, NULL);
1161 #if defined(DDB)
1162 	else if (len == 3 && strcmp(str, "ddb") == 0)
1163 		console_debugger();
1164 #endif
1165 
1166 	cp = str + len - 1;
1167 	c = *cp;
1168 
1169 	if ((wname = getwedgename(str, len)) != NULL) {
1170 		if ((dv = dkwedge_find_by_wname(wname)) == NULL)
1171 			return NULL;
1172 		part = defpart;
1173 		goto gotdisk;
1174 	} else if (c >= 'a' && c <= ('a' + MAXPARTITIONS - 1)) {
1175 		part = c - 'a';
1176 		*cp = '\0';
1177 	} else
1178 		part = defpart;
1179 
1180 	dv = finddevice(str);
1181 	if (dv != NULL) {
1182 		if (device_class(dv) == DV_DISK) {
1183  gotdisk:
1184 			majdev = devsw_name2blk(device_xname(dv), NULL, 0);
1185 			if (majdev < 0)
1186 				panic("parsedisk");
1187 			if (DEV_USES_PARTITIONS(dv))
1188 				*devp = MAKEDISKDEV(majdev, device_unit(dv),
1189 						    part);
1190 			else
1191 				*devp = makedev(majdev, device_unit(dv));
1192 		}
1193 
1194 		if (device_class(dv) == DV_IFNET)
1195 			*devp = NODEV;
1196 	}
1197 
1198 	*cp = c;
1199 	return (dv);
1200 }
1201 
1202 /*
1203  * snprintf() `bytes' into `buf', reformatting it so that the number,
1204  * plus a possible `x' + suffix extension) fits into len bytes (including
1205  * the terminating NUL).
1206  * Returns the number of bytes stored in buf, or -1 if there was a problem.
1207  * E.g, given a len of 9 and a suffix of `B':
1208  *	bytes		result
1209  *	-----		------
1210  *	99999		`99999 B'
1211  *	100000		`97 kB'
1212  *	66715648	`65152 kB'
1213  *	252215296	`240 MB'
1214  */
1215 int
1216 humanize_number(char *buf, size_t len, uint64_t bytes, const char *suffix,
1217     int divisor)
1218 {
1219        	/* prefixes are: (none), kilo, Mega, Giga, Tera, Peta, Exa */
1220 	const char *prefixes;
1221 	int		r;
1222 	uint64_t	umax;
1223 	size_t		i, suffixlen;
1224 
1225 	if (buf == NULL || suffix == NULL)
1226 		return (-1);
1227 	if (len > 0)
1228 		buf[0] = '\0';
1229 	suffixlen = strlen(suffix);
1230 	/* check if enough room for `x y' + suffix + `\0' */
1231 	if (len < 4 + suffixlen)
1232 		return (-1);
1233 
1234 	if (divisor == 1024) {
1235 		/*
1236 		 * binary multiplies
1237 		 * XXX IEC 60027-2 recommends Ki, Mi, Gi...
1238 		 */
1239 		prefixes = " KMGTPE";
1240 	} else
1241 		prefixes = " kMGTPE"; /* SI for decimal multiplies */
1242 
1243 	umax = 1;
1244 	for (i = 0; i < len - suffixlen - 3; i++)
1245 		umax *= 10;
1246 	for (i = 0; bytes >= umax && prefixes[i + 1]; i++)
1247 		bytes /= divisor;
1248 
1249 	r = snprintf(buf, len, "%qu%s%c%s", (unsigned long long)bytes,
1250 	    i == 0 ? "" : " ", prefixes[i], suffix);
1251 
1252 	return (r);
1253 }
1254 
1255 int
1256 format_bytes(char *buf, size_t len, uint64_t bytes)
1257 {
1258 	int	rv;
1259 	size_t	nlen;
1260 
1261 	rv = humanize_number(buf, len, bytes, "B", 1024);
1262 	if (rv != -1) {
1263 			/* nuke the trailing ` B' if it exists */
1264 		nlen = strlen(buf) - 2;
1265 		if (strcmp(&buf[nlen], " B") == 0)
1266 			buf[nlen] = '\0';
1267 	}
1268 	return (rv);
1269 }
1270 
1271 /*
1272  * Return true if system call tracing is enabled for the specified process.
1273  */
1274 bool
1275 trace_is_enabled(struct proc *p)
1276 {
1277 #ifdef SYSCALL_DEBUG
1278 	return (true);
1279 #endif
1280 #ifdef KTRACE
1281 	if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET)))
1282 		return (true);
1283 #endif
1284 #ifdef PTRACE
1285 	if (ISSET(p->p_slflag, PSL_SYSCALL))
1286 		return (true);
1287 #endif
1288 
1289 	return (false);
1290 }
1291 
1292 /*
1293  * Start trace of particular system call. If process is being traced,
1294  * this routine is called by MD syscall dispatch code just before
1295  * a system call is actually executed.
1296  */
1297 int
1298 trace_enter(register_t code, const register_t *args, int narg)
1299 {
1300 #ifdef SYSCALL_DEBUG
1301 	scdebug_call(code, args);
1302 #endif /* SYSCALL_DEBUG */
1303 
1304 	ktrsyscall(code, args, narg);
1305 
1306 #ifdef PTRACE
1307 	if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) ==
1308 	    (PSL_SYSCALL|PSL_TRACED))
1309 		process_stoptrace();
1310 #endif
1311 	return 0;
1312 }
1313 
1314 /*
1315  * End trace of particular system call. If process is being traced,
1316  * this routine is called by MD syscall dispatch code just after
1317  * a system call finishes.
1318  * MD caller guarantees the passed 'code' is within the supported
1319  * system call number range for emulation the process runs under.
1320  */
1321 void
1322 trace_exit(register_t code, register_t rval[], int error)
1323 {
1324 #ifdef SYSCALL_DEBUG
1325 	scdebug_ret(code, error, rval);
1326 #endif /* SYSCALL_DEBUG */
1327 
1328 	ktrsysret(code, error, rval);
1329 
1330 #ifdef PTRACE
1331 	if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) ==
1332 	    (PSL_SYSCALL|PSL_TRACED))
1333 		process_stoptrace();
1334 #endif
1335 }
1336