xref: /netbsd-src/sys/kern/kern_subr.c (revision 62a8debe1dc62962e18a1c918def78666141273b)
1 /*	$NetBSD: kern_subr.c,v 1.206 2010/01/31 01:38:48 pooka Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998, 1999, 2002, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center, and by Luke Mewburn.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  * (c) UNIX System Laboratories, Inc.
37  * All or some portions of this file are derived from material licensed
38  * to the University of California by American Telephone and Telegraph
39  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40  * the permission of UNIX System Laboratories, Inc.
41  *
42  * Copyright (c) 1992, 1993
43  *	The Regents of the University of California.  All rights reserved.
44  *
45  * This software was developed by the Computer Systems Engineering group
46  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
47  * contributed to Berkeley.
48  *
49  * All advertising materials mentioning features or use of this software
50  * must display the following acknowledgement:
51  *	This product includes software developed by the University of
52  *	California, Lawrence Berkeley Laboratory.
53  *
54  * Redistribution and use in source and binary forms, with or without
55  * modification, are permitted provided that the following conditions
56  * are met:
57  * 1. Redistributions of source code must retain the above copyright
58  *    notice, this list of conditions and the following disclaimer.
59  * 2. Redistributions in binary form must reproduce the above copyright
60  *    notice, this list of conditions and the following disclaimer in the
61  *    documentation and/or other materials provided with the distribution.
62  * 3. Neither the name of the University nor the names of its contributors
63  *    may be used to endorse or promote products derived from this software
64  *    without specific prior written permission.
65  *
66  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
67  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
68  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
69  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
70  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
71  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
72  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
73  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
74  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
75  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
76  * SUCH DAMAGE.
77  *
78  *	@(#)kern_subr.c	8.4 (Berkeley) 2/14/95
79  */
80 
81 #include <sys/cdefs.h>
82 __KERNEL_RCSID(0, "$NetBSD: kern_subr.c,v 1.206 2010/01/31 01:38:48 pooka Exp $");
83 
84 #include "opt_ddb.h"
85 #include "opt_md.h"
86 #include "opt_syscall_debug.h"
87 #include "opt_ktrace.h"
88 #include "opt_ptrace.h"
89 #include "opt_tftproot.h"
90 
91 #include <sys/param.h>
92 #include <sys/systm.h>
93 #include <sys/proc.h>
94 #include <sys/mount.h>
95 #include <sys/device.h>
96 #include <sys/reboot.h>
97 #include <sys/conf.h>
98 #include <sys/disk.h>
99 #include <sys/disklabel.h>
100 #include <sys/queue.h>
101 #include <sys/ktrace.h>
102 #include <sys/ptrace.h>
103 #include <sys/fcntl.h>
104 #include <sys/kauth.h>
105 #include <sys/vnode.h>
106 #include <sys/syscallvar.h>
107 #include <sys/xcall.h>
108 #include <sys/module.h>
109 
110 #include <uvm/uvm_extern.h>
111 
112 #include <dev/cons.h>
113 
114 #include <net/if.h>
115 
116 /* XXX these should eventually move to subr_autoconf.c */
117 static device_t finddevice(const char *);
118 static device_t getdisk(char *, int, int, dev_t *, int);
119 static device_t parsedisk(char *, int, int, dev_t *);
120 static const char *getwedgename(const char *, int);
121 
122 #ifdef TFTPROOT
123 int tftproot_dhcpboot(device_t);
124 #endif
125 
126 dev_t	dumpcdev;	/* for savecore */
127 
128 static int
129 isswap(device_t dv)
130 {
131 	struct dkwedge_info wi;
132 	struct vnode *vn;
133 	int error;
134 
135 	if (device_class(dv) != DV_DISK || !device_is_a(dv, "dk"))
136 		return 0;
137 
138 	if ((vn = opendisk(dv)) == NULL)
139 		return 0;
140 
141 	error = VOP_IOCTL(vn, DIOCGWEDGEINFO, &wi, FREAD, NOCRED);
142 	VOP_CLOSE(vn, FREAD, NOCRED);
143 	vput(vn);
144 	if (error) {
145 #ifdef DEBUG_WEDGE
146 		printf("%s: Get wedge info returned %d\n", device_xname(dv), error);
147 #endif
148 		return 0;
149 	}
150 	return strcmp(wi.dkw_ptype, DKW_PTYPE_SWAP) == 0;
151 }
152 
153 /*
154  * Determine the root device and, if instructed to, the root file system.
155  */
156 
157 #include "md.h"
158 
159 #if NMD > 0
160 extern struct cfdriver md_cd;
161 #ifdef MEMORY_DISK_IS_ROOT
162 int md_is_root = 1;
163 #else
164 int md_is_root = 0;
165 #endif
166 #endif
167 
168 /*
169  * The device and wedge that we booted from.  If booted_wedge is NULL,
170  * the we might consult booted_partition.
171  */
172 device_t booted_device;
173 device_t booted_wedge;
174 int booted_partition;
175 
176 /*
177  * Use partition letters if it's a disk class but not a wedge.
178  * XXX Check for wedge is kinda gross.
179  */
180 #define	DEV_USES_PARTITIONS(dv)						\
181 	(device_class((dv)) == DV_DISK &&				\
182 	 !device_is_a((dv), "dk"))
183 
184 void
185 setroot(device_t bootdv, int bootpartition)
186 {
187 	device_t dv;
188 	deviter_t di;
189 	int len, majdev;
190 	dev_t nrootdev;
191 	dev_t ndumpdev = NODEV;
192 	char buf[128];
193 	const char *rootdevname;
194 	const char *dumpdevname;
195 	device_t rootdv = NULL;		/* XXX gcc -Wuninitialized */
196 	device_t dumpdv = NULL;
197 	struct ifnet *ifp;
198 	const char *deffsname;
199 	struct vfsops *vops;
200 
201 #ifdef TFTPROOT
202 	if (tftproot_dhcpboot(bootdv) != 0)
203 		boothowto |= RB_ASKNAME;
204 #endif
205 
206 #if NMD > 0
207 	if (md_is_root) {
208 		/*
209 		 * XXX there should be "root on md0" in the config file,
210 		 * but it isn't always
211 		 */
212 		bootdv = md_cd.cd_devs[0];
213 		bootpartition = 0;
214 	}
215 #endif
216 
217 	/*
218 	 * If NFS is specified as the file system, and we found
219 	 * a DV_DISK boot device (or no boot device at all), then
220 	 * find a reasonable network interface for "rootspec".
221 	 */
222 	vops = vfs_getopsbyname(MOUNT_NFS);
223 	if (vops != NULL && strcmp(rootfstype, MOUNT_NFS) == 0 &&
224 	    rootspec == NULL &&
225 	    (bootdv == NULL || device_class(bootdv) != DV_IFNET)) {
226 		IFNET_FOREACH(ifp) {
227 			if ((ifp->if_flags &
228 			     (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0)
229 				break;
230 		}
231 		if (ifp == NULL) {
232 			/*
233 			 * Can't find a suitable interface; ask the
234 			 * user.
235 			 */
236 			boothowto |= RB_ASKNAME;
237 		} else {
238 			/*
239 			 * Have a suitable interface; behave as if
240 			 * the user specified this interface.
241 			 */
242 			rootspec = (const char *)ifp->if_xname;
243 		}
244 	}
245 	if (vops != NULL)
246 		vfs_delref(vops);
247 
248 	/*
249 	 * If wildcarded root and we the boot device wasn't determined,
250 	 * ask the user.
251 	 */
252 	if (rootspec == NULL && bootdv == NULL)
253 		boothowto |= RB_ASKNAME;
254 
255  top:
256 	if (boothowto & RB_ASKNAME) {
257 		device_t defdumpdv;
258 
259 		for (;;) {
260 			printf("root device");
261 			if (bootdv != NULL) {
262 				printf(" (default %s", device_xname(bootdv));
263 				if (DEV_USES_PARTITIONS(bootdv))
264 					printf("%c", bootpartition + 'a');
265 				printf(")");
266 			}
267 			printf(": ");
268 			len = cngetsn(buf, sizeof(buf));
269 			if (len == 0 && bootdv != NULL) {
270 				strlcpy(buf, device_xname(bootdv), sizeof(buf));
271 				len = strlen(buf);
272 			}
273 			if (len > 0 && buf[len - 1] == '*') {
274 				buf[--len] = '\0';
275 				dv = getdisk(buf, len, 1, &nrootdev, 0);
276 				if (dv != NULL) {
277 					rootdv = dv;
278 					break;
279 				}
280 			}
281 			dv = getdisk(buf, len, bootpartition, &nrootdev, 0);
282 			if (dv != NULL) {
283 				rootdv = dv;
284 				break;
285 			}
286 		}
287 
288 		/*
289 		 * Set up the default dump device.  If root is on
290 		 * a network device, there is no default dump
291 		 * device, since we don't support dumps to the
292 		 * network.
293 		 */
294 		if (DEV_USES_PARTITIONS(rootdv) == 0)
295 			defdumpdv = NULL;
296 		else
297 			defdumpdv = rootdv;
298 
299 		for (;;) {
300 			printf("dump device");
301 			if (defdumpdv != NULL) {
302 				/*
303 				 * Note, we know it's a disk if we get here.
304 				 */
305 				printf(" (default %sb)", device_xname(defdumpdv));
306 			}
307 			printf(": ");
308 			len = cngetsn(buf, sizeof(buf));
309 			if (len == 0) {
310 				if (defdumpdv != NULL) {
311 					ndumpdev = MAKEDISKDEV(major(nrootdev),
312 					    DISKUNIT(nrootdev), 1);
313 				}
314 				dumpdv = defdumpdv;
315 				break;
316 			}
317 			if (len == 4 && strcmp(buf, "none") == 0) {
318 				dumpdv = NULL;
319 				break;
320 			}
321 			dv = getdisk(buf, len, 1, &ndumpdev, 1);
322 			if (dv != NULL) {
323 				dumpdv = dv;
324 				break;
325 			}
326 		}
327 
328 		rootdev = nrootdev;
329 		dumpdev = ndumpdev;
330 
331 		for (vops = LIST_FIRST(&vfs_list); vops != NULL;
332 		     vops = LIST_NEXT(vops, vfs_list)) {
333 			if (vops->vfs_mountroot != NULL &&
334 			    strcmp(rootfstype, vops->vfs_name) == 0)
335 			break;
336 		}
337 
338 		if (vops == NULL) {
339 			deffsname = "generic";
340 		} else
341 			deffsname = vops->vfs_name;
342 
343 		for (;;) {
344 			printf("file system (default %s): ", deffsname);
345 			len = cngetsn(buf, sizeof(buf));
346 			if (len == 0) {
347 				if (strcmp(deffsname, "generic") == 0)
348 					rootfstype = ROOT_FSTYPE_ANY;
349 				break;
350 			}
351 			if (len == 4 && strcmp(buf, "halt") == 0)
352 				cpu_reboot(RB_HALT, NULL);
353 			else if (len == 6 && strcmp(buf, "reboot") == 0)
354 				cpu_reboot(0, NULL);
355 #if defined(DDB)
356 			else if (len == 3 && strcmp(buf, "ddb") == 0) {
357 				console_debugger();
358 			}
359 #endif
360 			else if (len == 7 && strcmp(buf, "generic") == 0) {
361 				rootfstype = ROOT_FSTYPE_ANY;
362 				break;
363 			}
364 			vops = vfs_getopsbyname(buf);
365 			if (vops == NULL || vops->vfs_mountroot == NULL) {
366 				printf("use one of: generic");
367 				for (vops = LIST_FIRST(&vfs_list);
368 				     vops != NULL;
369 				     vops = LIST_NEXT(vops, vfs_list)) {
370 					if (vops->vfs_mountroot != NULL)
371 						printf(" %s", vops->vfs_name);
372 				}
373 				if (vops != NULL)
374 					vfs_delref(vops);
375 #if defined(DDB)
376 				printf(" ddb");
377 #endif
378 				printf(" halt reboot\n");
379 			} else {
380 				/*
381 				 * XXX If *vops gets freed between here and
382 				 * the call to mountroot(), rootfstype will
383 				 * point to something unexpected.  But in
384 				 * this case the system will fail anyway.
385 				 */
386 				rootfstype = vops->vfs_name;
387 				vfs_delref(vops);
388 				break;
389 			}
390 		}
391 
392 	} else if (rootspec == NULL) {
393 		/*
394 		 * Wildcarded root; use the boot device.
395 		 */
396 		rootdv = bootdv;
397 
398 		if (bootdv)
399 			majdev = devsw_name2blk(device_xname(bootdv), NULL, 0);
400 		else
401 			majdev = -1;
402 		if (majdev >= 0) {
403 			/*
404 			 * Root is on a disk.  `bootpartition' is root,
405 			 * unless the device does not use partitions.
406 			 */
407 			if (DEV_USES_PARTITIONS(bootdv))
408 				rootdev = MAKEDISKDEV(majdev,
409 						      device_unit(bootdv),
410 						      bootpartition);
411 			else
412 				rootdev = makedev(majdev, device_unit(bootdv));
413 		}
414 	} else {
415 
416 		/*
417 		 * `root on <dev> ...'
418 		 */
419 
420 		/*
421 		 * If it's a network interface, we can bail out
422 		 * early.
423 		 */
424 		dv = finddevice(rootspec);
425 		if (dv != NULL && device_class(dv) == DV_IFNET) {
426 			rootdv = dv;
427 			goto haveroot;
428 		}
429 
430 		if (rootdev == NODEV &&
431 		    device_class(dv) == DV_DISK && device_is_a(dv, "dk") &&
432 		    (majdev = devsw_name2blk(device_xname(dv), NULL, 0)) >= 0)
433 			rootdev = makedev(majdev, device_unit(dv));
434 
435 		rootdevname = devsw_blk2name(major(rootdev));
436 		if (rootdevname == NULL) {
437 			printf("unknown device major 0x%llx\n",
438 			    (unsigned long long)rootdev);
439 			boothowto |= RB_ASKNAME;
440 			goto top;
441 		}
442 		memset(buf, 0, sizeof(buf));
443 		snprintf(buf, sizeof(buf), "%s%llu", rootdevname,
444 		    (unsigned long long)DISKUNIT(rootdev));
445 
446 		rootdv = finddevice(buf);
447 		if (rootdv == NULL) {
448 			printf("device %s (0x%llx) not configured\n",
449 			    buf, (unsigned long long)rootdev);
450 			boothowto |= RB_ASKNAME;
451 			goto top;
452 		}
453 	}
454 
455  haveroot:
456 
457 	root_device = rootdv;
458 
459 	switch (device_class(rootdv)) {
460 	case DV_IFNET:
461 	case DV_DISK:
462 		aprint_normal("root on %s", device_xname(rootdv));
463 		if (DEV_USES_PARTITIONS(rootdv))
464 			aprint_normal("%c", (int)DISKPART(rootdev) + 'a');
465 		break;
466 
467 	default:
468 		printf("can't determine root device\n");
469 		boothowto |= RB_ASKNAME;
470 		goto top;
471 	}
472 
473 	/*
474 	 * Now configure the dump device.
475 	 *
476 	 * If we haven't figured out the dump device, do so, with
477 	 * the following rules:
478 	 *
479 	 *	(a) We already know dumpdv in the RB_ASKNAME case.
480 	 *
481 	 *	(b) If dumpspec is set, try to use it.  If the device
482 	 *	    is not available, punt.
483 	 *
484 	 *	(c) If dumpspec is not set, the dump device is
485 	 *	    wildcarded or unspecified.  If the root device
486 	 *	    is DV_IFNET, punt.  Otherwise, use partition b
487 	 *	    of the root device.
488 	 */
489 
490 	if (boothowto & RB_ASKNAME) {		/* (a) */
491 		if (dumpdv == NULL)
492 			goto nodumpdev;
493 	} else if (dumpspec != NULL) {		/* (b) */
494 		if (strcmp(dumpspec, "none") == 0 || dumpdev == NODEV) {
495 			/*
496 			 * Operator doesn't want a dump device.
497 			 * Or looks like they tried to pick a network
498 			 * device.  Oops.
499 			 */
500 			goto nodumpdev;
501 		}
502 
503 		dumpdevname = devsw_blk2name(major(dumpdev));
504 		if (dumpdevname == NULL)
505 			goto nodumpdev;
506 		memset(buf, 0, sizeof(buf));
507 		snprintf(buf, sizeof(buf), "%s%llu", dumpdevname,
508 		    (unsigned long long)DISKUNIT(dumpdev));
509 
510 		dumpdv = finddevice(buf);
511 		if (dumpdv == NULL) {
512 			/*
513 			 * Device not configured.
514 			 */
515 			goto nodumpdev;
516 		}
517 	} else {				/* (c) */
518 		if (DEV_USES_PARTITIONS(rootdv) == 0) {
519 			for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST);
520 			     dv != NULL;
521 			     dv = deviter_next(&di))
522 				if (isswap(dv))
523 					break;
524 			deviter_release(&di);
525 			if (dv == NULL)
526 				goto nodumpdev;
527 
528 			majdev = devsw_name2blk(device_xname(dv), NULL, 0);
529 			if (majdev < 0)
530 				goto nodumpdev;
531 			dumpdv = dv;
532 			dumpdev = makedev(majdev, device_unit(dumpdv));
533 		} else {
534 			dumpdv = rootdv;
535 			dumpdev = MAKEDISKDEV(major(rootdev),
536 			    device_unit(dumpdv), 1);
537 		}
538 	}
539 
540 	dumpcdev = devsw_blk2chr(dumpdev);
541 	aprint_normal(" dumps on %s", device_xname(dumpdv));
542 	if (DEV_USES_PARTITIONS(dumpdv))
543 		aprint_normal("%c", (int)DISKPART(dumpdev) + 'a');
544 	aprint_normal("\n");
545 	return;
546 
547  nodumpdev:
548 	dumpdev = NODEV;
549 	dumpcdev = NODEV;
550 	aprint_normal("\n");
551 }
552 
553 static device_t
554 finddevice(const char *name)
555 {
556 	const char *wname;
557 
558 	if ((wname = getwedgename(name, strlen(name))) != NULL)
559 		return dkwedge_find_by_wname(wname);
560 
561 	return device_find_by_xname(name);
562 }
563 
564 static device_t
565 getdisk(char *str, int len, int defpart, dev_t *devp, int isdump)
566 {
567 	device_t dv;
568 	deviter_t di;
569 
570 	if ((dv = parsedisk(str, len, defpart, devp)) == NULL) {
571 		printf("use one of:");
572 		for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
573 		     dv = deviter_next(&di)) {
574 			if (DEV_USES_PARTITIONS(dv))
575 				printf(" %s[a-%c]", device_xname(dv),
576 				    'a' + MAXPARTITIONS - 1);
577 			else if (device_class(dv) == DV_DISK)
578 				printf(" %s", device_xname(dv));
579 			if (isdump == 0 && device_class(dv) == DV_IFNET)
580 				printf(" %s", device_xname(dv));
581 		}
582 		deviter_release(&di);
583 		dkwedge_print_wnames();
584 		if (isdump)
585 			printf(" none");
586 #if defined(DDB)
587 		printf(" ddb");
588 #endif
589 		printf(" halt reboot\n");
590 	}
591 	return dv;
592 }
593 
594 static const char *
595 getwedgename(const char *name, int namelen)
596 {
597 	const char *wpfx = "wedge:";
598 	const int wpfxlen = strlen(wpfx);
599 
600 	if (namelen < wpfxlen || strncmp(name, wpfx, wpfxlen) != 0)
601 		return NULL;
602 
603 	return name + wpfxlen;
604 }
605 
606 static device_t
607 parsedisk(char *str, int len, int defpart, dev_t *devp)
608 {
609 	device_t dv;
610 	const char *wname;
611 	char *cp, c;
612 	int majdev, part;
613 	if (len == 0)
614 		return (NULL);
615 
616 	if (len == 4 && strcmp(str, "halt") == 0)
617 		cpu_reboot(RB_HALT, NULL);
618 	else if (len == 6 && strcmp(str, "reboot") == 0)
619 		cpu_reboot(0, NULL);
620 #if defined(DDB)
621 	else if (len == 3 && strcmp(str, "ddb") == 0)
622 		console_debugger();
623 #endif
624 
625 	cp = str + len - 1;
626 	c = *cp;
627 
628 	if ((wname = getwedgename(str, len)) != NULL) {
629 		if ((dv = dkwedge_find_by_wname(wname)) == NULL)
630 			return NULL;
631 		part = defpart;
632 		goto gotdisk;
633 	} else if (c >= 'a' && c <= ('a' + MAXPARTITIONS - 1)) {
634 		part = c - 'a';
635 		*cp = '\0';
636 	} else
637 		part = defpart;
638 
639 	dv = finddevice(str);
640 	if (dv != NULL) {
641 		if (device_class(dv) == DV_DISK) {
642  gotdisk:
643 			majdev = devsw_name2blk(device_xname(dv), NULL, 0);
644 			if (majdev < 0)
645 				panic("parsedisk");
646 			if (DEV_USES_PARTITIONS(dv))
647 				*devp = MAKEDISKDEV(majdev, device_unit(dv),
648 						    part);
649 			else
650 				*devp = makedev(majdev, device_unit(dv));
651 		}
652 
653 		if (device_class(dv) == DV_IFNET)
654 			*devp = NODEV;
655 	}
656 
657 	*cp = c;
658 	return (dv);
659 }
660 
661 /*
662  * Return true if system call tracing is enabled for the specified process.
663  */
664 bool
665 trace_is_enabled(struct proc *p)
666 {
667 #ifdef SYSCALL_DEBUG
668 	return (true);
669 #endif
670 #ifdef KTRACE
671 	if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET)))
672 		return (true);
673 #endif
674 #ifdef PTRACE
675 	if (ISSET(p->p_slflag, PSL_SYSCALL))
676 		return (true);
677 #endif
678 
679 	return (false);
680 }
681 
682 /*
683  * Start trace of particular system call. If process is being traced,
684  * this routine is called by MD syscall dispatch code just before
685  * a system call is actually executed.
686  */
687 int
688 trace_enter(register_t code, const register_t *args, int narg)
689 {
690 #ifdef SYSCALL_DEBUG
691 	scdebug_call(code, args);
692 #endif /* SYSCALL_DEBUG */
693 
694 	ktrsyscall(code, args, narg);
695 
696 #ifdef PTRACE
697 	if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) ==
698 	    (PSL_SYSCALL|PSL_TRACED))
699 		process_stoptrace();
700 #endif
701 	return 0;
702 }
703 
704 /*
705  * End trace of particular system call. If process is being traced,
706  * this routine is called by MD syscall dispatch code just after
707  * a system call finishes.
708  * MD caller guarantees the passed 'code' is within the supported
709  * system call number range for emulation the process runs under.
710  */
711 void
712 trace_exit(register_t code, register_t rval[], int error)
713 {
714 #ifdef SYSCALL_DEBUG
715 	scdebug_ret(code, error, rval);
716 #endif /* SYSCALL_DEBUG */
717 
718 	ktrsysret(code, error, rval);
719 
720 #ifdef PTRACE
721 	if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) ==
722 	    (PSL_SYSCALL|PSL_TRACED))
723 		process_stoptrace();
724 #endif
725 }
726 
727 int
728 syscall_establish(const struct emul *em, const struct syscall_package *sp)
729 {
730 	struct sysent *sy;
731 	int i;
732 
733 	KASSERT(mutex_owned(&module_lock));
734 
735 	if (em == NULL) {
736 		em = &emul_netbsd;
737 	}
738 	sy = em->e_sysent;
739 
740 	/*
741 	 * Ensure that all preconditions are valid, since this is
742 	 * an all or nothing deal.  Once a system call is entered,
743 	 * it can become busy and we could be unable to remove it
744 	 * on error.
745 	 */
746 	for (i = 0; sp[i].sp_call != NULL; i++) {
747 		if (sy[sp[i].sp_code].sy_call != sys_nomodule) {
748 #ifdef DIAGNOSTIC
749 			printf("syscall %d is busy\n", sp[i].sp_code);
750 #endif
751 			return EBUSY;
752 		}
753 	}
754 	/* Everything looks good, patch them in. */
755 	for (i = 0; sp[i].sp_call != NULL; i++) {
756 		sy[sp[i].sp_code].sy_call = sp[i].sp_call;
757 	}
758 
759 	return 0;
760 }
761 
762 int
763 syscall_disestablish(const struct emul *em, const struct syscall_package *sp)
764 {
765 	struct sysent *sy;
766 	uint64_t where;
767 	lwp_t *l;
768 	int i;
769 
770 	KASSERT(mutex_owned(&module_lock));
771 
772 	if (em == NULL) {
773 		em = &emul_netbsd;
774 	}
775 	sy = em->e_sysent;
776 
777 	/*
778 	 * First, patch the system calls to sys_nomodule to gate further
779 	 * activity.
780 	 */
781 	for (i = 0; sp[i].sp_call != NULL; i++) {
782 		KASSERT(sy[sp[i].sp_code].sy_call == sp[i].sp_call);
783 		sy[sp[i].sp_code].sy_call = sys_nomodule;
784 	}
785 
786 	/*
787 	 * Run a cross call to cycle through all CPUs.  This does two
788 	 * things: lock activity provides a barrier and makes our update
789 	 * of sy_call visible to all CPUs, and upon return we can be sure
790 	 * that we see pertinent values of l_sysent posted by remote CPUs.
791 	 */
792 	where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL);
793 	xc_wait(where);
794 
795 	/*
796 	 * Now it's safe to check l_sysent.  Run through all LWPs and see
797 	 * if anyone is still using the system call.
798 	 */
799 	for (i = 0; sp[i].sp_call != NULL; i++) {
800 		mutex_enter(proc_lock);
801 		LIST_FOREACH(l, &alllwp, l_list) {
802 			if (l->l_sysent == &sy[sp[i].sp_code]) {
803 				break;
804 			}
805 		}
806 		mutex_exit(proc_lock);
807 		if (l == NULL) {
808 			continue;
809 		}
810 		/*
811 		 * We lose: one or more calls are still in use.  Put back
812 		 * the old entrypoints and act like nothing happened.
813 		 * When we drop module_lock, any system calls held in
814 		 * sys_nomodule() will be restarted.
815 		 */
816 		for (i = 0; sp[i].sp_call != NULL; i++) {
817 			sy[sp[i].sp_code].sy_call = sp[i].sp_call;
818 		}
819 		return EBUSY;
820 	}
821 
822 	return 0;
823 }
824