1 /* $NetBSD: kern_subr.c,v 1.192 2008/10/14 14:17:49 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 1999, 2002, 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and by Luke Mewburn. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Copyright (c) 1992, 1993 43 * The Regents of the University of California. All rights reserved. 44 * 45 * This software was developed by the Computer Systems Engineering group 46 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 47 * contributed to Berkeley. 48 * 49 * All advertising materials mentioning features or use of this software 50 * must display the following acknowledgement: 51 * This product includes software developed by the University of 52 * California, Lawrence Berkeley Laboratory. 53 * 54 * Redistribution and use in source and binary forms, with or without 55 * modification, are permitted provided that the following conditions 56 * are met: 57 * 1. Redistributions of source code must retain the above copyright 58 * notice, this list of conditions and the following disclaimer. 59 * 2. Redistributions in binary form must reproduce the above copyright 60 * notice, this list of conditions and the following disclaimer in the 61 * documentation and/or other materials provided with the distribution. 62 * 3. Neither the name of the University nor the names of its contributors 63 * may be used to endorse or promote products derived from this software 64 * without specific prior written permission. 65 * 66 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 67 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 68 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 69 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 70 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 71 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 72 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 73 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 74 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 75 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 76 * SUCH DAMAGE. 77 * 78 * @(#)kern_subr.c 8.4 (Berkeley) 2/14/95 79 */ 80 81 #include <sys/cdefs.h> 82 __KERNEL_RCSID(0, "$NetBSD: kern_subr.c,v 1.192 2008/10/14 14:17:49 pooka Exp $"); 83 84 #include "opt_ddb.h" 85 #include "opt_md.h" 86 #include "opt_syscall_debug.h" 87 #include "opt_ktrace.h" 88 #include "opt_ptrace.h" 89 #include "opt_powerhook.h" 90 #include "opt_tftproot.h" 91 92 #include <sys/param.h> 93 #include <sys/systm.h> 94 #include <sys/proc.h> 95 #include <sys/malloc.h> 96 #include <sys/mount.h> 97 #include <sys/device.h> 98 #include <sys/reboot.h> 99 #include <sys/conf.h> 100 #include <sys/disk.h> 101 #include <sys/disklabel.h> 102 #include <sys/queue.h> 103 #include <sys/ktrace.h> 104 #include <sys/ptrace.h> 105 #include <sys/fcntl.h> 106 #include <sys/kauth.h> 107 #include <sys/vnode.h> 108 #include <sys/pmf.h> 109 110 #include <uvm/uvm_extern.h> 111 112 #include <dev/cons.h> 113 114 #include <net/if.h> 115 116 /* XXX these should eventually move to subr_autoconf.c */ 117 static struct device *finddevice(const char *); 118 static struct device *getdisk(char *, int, int, dev_t *, int); 119 static struct device *parsedisk(char *, int, int, dev_t *); 120 static const char *getwedgename(const char *, int); 121 122 /* 123 * A generic linear hook. 124 */ 125 struct hook_desc { 126 LIST_ENTRY(hook_desc) hk_list; 127 void (*hk_fn)(void *); 128 void *hk_arg; 129 }; 130 typedef LIST_HEAD(, hook_desc) hook_list_t; 131 132 #ifdef TFTPROOT 133 int tftproot_dhcpboot(struct device *); 134 #endif 135 136 dev_t dumpcdev; /* for savecore */ 137 138 void 139 uio_setup_sysspace(struct uio *uio) 140 { 141 142 uio->uio_vmspace = vmspace_kernel(); 143 } 144 145 int 146 uiomove(void *buf, size_t n, struct uio *uio) 147 { 148 struct vmspace *vm = uio->uio_vmspace; 149 struct iovec *iov; 150 size_t cnt; 151 int error = 0; 152 char *cp = buf; 153 154 ASSERT_SLEEPABLE(); 155 156 #ifdef DIAGNOSTIC 157 if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE) 158 panic("uiomove: mode"); 159 #endif 160 while (n > 0 && uio->uio_resid) { 161 iov = uio->uio_iov; 162 cnt = iov->iov_len; 163 if (cnt == 0) { 164 KASSERT(uio->uio_iovcnt > 0); 165 uio->uio_iov++; 166 uio->uio_iovcnt--; 167 continue; 168 } 169 if (cnt > n) 170 cnt = n; 171 if (!VMSPACE_IS_KERNEL_P(vm)) { 172 if (curcpu()->ci_schedstate.spc_flags & 173 SPCF_SHOULDYIELD) 174 preempt(); 175 } 176 177 if (uio->uio_rw == UIO_READ) { 178 error = copyout_vmspace(vm, cp, iov->iov_base, 179 cnt); 180 } else { 181 error = copyin_vmspace(vm, iov->iov_base, cp, 182 cnt); 183 } 184 if (error) { 185 break; 186 } 187 iov->iov_base = (char *)iov->iov_base + cnt; 188 iov->iov_len -= cnt; 189 uio->uio_resid -= cnt; 190 uio->uio_offset += cnt; 191 cp += cnt; 192 KDASSERT(cnt <= n); 193 n -= cnt; 194 } 195 196 return (error); 197 } 198 199 /* 200 * Wrapper for uiomove() that validates the arguments against a known-good 201 * kernel buffer. 202 */ 203 int 204 uiomove_frombuf(void *buf, size_t buflen, struct uio *uio) 205 { 206 size_t offset; 207 208 if (uio->uio_offset < 0 || /* uio->uio_resid < 0 || */ 209 (offset = uio->uio_offset) != uio->uio_offset) 210 return (EINVAL); 211 if (offset >= buflen) 212 return (0); 213 return (uiomove((char *)buf + offset, buflen - offset, uio)); 214 } 215 216 /* 217 * Give next character to user as result of read. 218 */ 219 int 220 ureadc(int c, struct uio *uio) 221 { 222 struct iovec *iov; 223 224 if (uio->uio_resid <= 0) 225 panic("ureadc: non-positive resid"); 226 again: 227 if (uio->uio_iovcnt <= 0) 228 panic("ureadc: non-positive iovcnt"); 229 iov = uio->uio_iov; 230 if (iov->iov_len <= 0) { 231 uio->uio_iovcnt--; 232 uio->uio_iov++; 233 goto again; 234 } 235 if (!VMSPACE_IS_KERNEL_P(uio->uio_vmspace)) { 236 if (subyte(iov->iov_base, c) < 0) 237 return (EFAULT); 238 } else { 239 *(char *)iov->iov_base = c; 240 } 241 iov->iov_base = (char *)iov->iov_base + 1; 242 iov->iov_len--; 243 uio->uio_resid--; 244 uio->uio_offset++; 245 return (0); 246 } 247 248 /* 249 * Like copyin(), but operates on an arbitrary vmspace. 250 */ 251 int 252 copyin_vmspace(struct vmspace *vm, const void *uaddr, void *kaddr, size_t len) 253 { 254 struct iovec iov; 255 struct uio uio; 256 int error; 257 258 if (len == 0) 259 return (0); 260 261 if (VMSPACE_IS_KERNEL_P(vm)) { 262 return kcopy(uaddr, kaddr, len); 263 } 264 if (__predict_true(vm == curproc->p_vmspace)) { 265 return copyin(uaddr, kaddr, len); 266 } 267 268 iov.iov_base = kaddr; 269 iov.iov_len = len; 270 uio.uio_iov = &iov; 271 uio.uio_iovcnt = 1; 272 uio.uio_offset = (off_t)(uintptr_t)uaddr; 273 uio.uio_resid = len; 274 uio.uio_rw = UIO_READ; 275 UIO_SETUP_SYSSPACE(&uio); 276 error = uvm_io(&vm->vm_map, &uio); 277 278 return (error); 279 } 280 281 /* 282 * Like copyout(), but operates on an arbitrary vmspace. 283 */ 284 int 285 copyout_vmspace(struct vmspace *vm, const void *kaddr, void *uaddr, size_t len) 286 { 287 struct iovec iov; 288 struct uio uio; 289 int error; 290 291 if (len == 0) 292 return (0); 293 294 if (VMSPACE_IS_KERNEL_P(vm)) { 295 return kcopy(kaddr, uaddr, len); 296 } 297 if (__predict_true(vm == curproc->p_vmspace)) { 298 return copyout(kaddr, uaddr, len); 299 } 300 301 iov.iov_base = __UNCONST(kaddr); /* XXXUNCONST cast away const */ 302 iov.iov_len = len; 303 uio.uio_iov = &iov; 304 uio.uio_iovcnt = 1; 305 uio.uio_offset = (off_t)(uintptr_t)uaddr; 306 uio.uio_resid = len; 307 uio.uio_rw = UIO_WRITE; 308 UIO_SETUP_SYSSPACE(&uio); 309 error = uvm_io(&vm->vm_map, &uio); 310 311 return (error); 312 } 313 314 /* 315 * Like copyin(), but operates on an arbitrary process. 316 */ 317 int 318 copyin_proc(struct proc *p, const void *uaddr, void *kaddr, size_t len) 319 { 320 struct vmspace *vm; 321 int error; 322 323 error = proc_vmspace_getref(p, &vm); 324 if (error) { 325 return error; 326 } 327 error = copyin_vmspace(vm, uaddr, kaddr, len); 328 uvmspace_free(vm); 329 330 return error; 331 } 332 333 /* 334 * Like copyout(), but operates on an arbitrary process. 335 */ 336 int 337 copyout_proc(struct proc *p, const void *kaddr, void *uaddr, size_t len) 338 { 339 struct vmspace *vm; 340 int error; 341 342 error = proc_vmspace_getref(p, &vm); 343 if (error) { 344 return error; 345 } 346 error = copyout_vmspace(vm, kaddr, uaddr, len); 347 uvmspace_free(vm); 348 349 return error; 350 } 351 352 /* 353 * Like copyin(), except it operates on kernel addresses when the FKIOCTL 354 * flag is passed in `ioctlflags' from the ioctl call. 355 */ 356 int 357 ioctl_copyin(int ioctlflags, const void *src, void *dst, size_t len) 358 { 359 if (ioctlflags & FKIOCTL) 360 return kcopy(src, dst, len); 361 return copyin(src, dst, len); 362 } 363 364 /* 365 * Like copyout(), except it operates on kernel addresses when the FKIOCTL 366 * flag is passed in `ioctlflags' from the ioctl call. 367 */ 368 int 369 ioctl_copyout(int ioctlflags, const void *src, void *dst, size_t len) 370 { 371 if (ioctlflags & FKIOCTL) 372 return kcopy(src, dst, len); 373 return copyout(src, dst, len); 374 } 375 376 static void * 377 hook_establish(hook_list_t *list, void (*fn)(void *), void *arg) 378 { 379 struct hook_desc *hd; 380 381 hd = malloc(sizeof(*hd), M_DEVBUF, M_NOWAIT); 382 if (hd == NULL) 383 return (NULL); 384 385 hd->hk_fn = fn; 386 hd->hk_arg = arg; 387 LIST_INSERT_HEAD(list, hd, hk_list); 388 389 return (hd); 390 } 391 392 static void 393 hook_disestablish(hook_list_t *list, void *vhook) 394 { 395 #ifdef DIAGNOSTIC 396 struct hook_desc *hd; 397 398 LIST_FOREACH(hd, list, hk_list) { 399 if (hd == vhook) 400 break; 401 } 402 403 if (hd == NULL) 404 panic("hook_disestablish: hook %p not established", vhook); 405 #endif 406 LIST_REMOVE((struct hook_desc *)vhook, hk_list); 407 free(vhook, M_DEVBUF); 408 } 409 410 static void 411 hook_destroy(hook_list_t *list) 412 { 413 struct hook_desc *hd; 414 415 while ((hd = LIST_FIRST(list)) != NULL) { 416 LIST_REMOVE(hd, hk_list); 417 free(hd, M_DEVBUF); 418 } 419 } 420 421 static void 422 hook_proc_run(hook_list_t *list, struct proc *p) 423 { 424 struct hook_desc *hd; 425 426 LIST_FOREACH(hd, list, hk_list) 427 ((void (*)(struct proc *, void *))*hd->hk_fn)(p, hd->hk_arg); 428 } 429 430 /* 431 * "Shutdown hook" types, functions, and variables. 432 * 433 * Should be invoked immediately before the 434 * system is halted or rebooted, i.e. after file systems unmounted, 435 * after crash dump done, etc. 436 * 437 * Each shutdown hook is removed from the list before it's run, so that 438 * it won't be run again. 439 */ 440 441 static hook_list_t shutdownhook_list; 442 443 void * 444 shutdownhook_establish(void (*fn)(void *), void *arg) 445 { 446 return hook_establish(&shutdownhook_list, fn, arg); 447 } 448 449 void 450 shutdownhook_disestablish(void *vhook) 451 { 452 hook_disestablish(&shutdownhook_list, vhook); 453 } 454 455 /* 456 * Run shutdown hooks. Should be invoked immediately before the 457 * system is halted or rebooted, i.e. after file systems unmounted, 458 * after crash dump done, etc. 459 * 460 * Each shutdown hook is removed from the list before it's run, so that 461 * it won't be run again. 462 */ 463 void 464 doshutdownhooks(void) 465 { 466 struct hook_desc *dp; 467 468 while ((dp = LIST_FIRST(&shutdownhook_list)) != NULL) { 469 LIST_REMOVE(dp, hk_list); 470 (*dp->hk_fn)(dp->hk_arg); 471 #if 0 472 /* 473 * Don't bother freeing the hook structure,, since we may 474 * be rebooting because of a memory corruption problem, 475 * and this might only make things worse. It doesn't 476 * matter, anyway, since the system is just about to 477 * reboot. 478 */ 479 free(dp, M_DEVBUF); 480 #endif 481 } 482 483 pmf_system_shutdown(boothowto); 484 } 485 486 /* 487 * "Mountroot hook" types, functions, and variables. 488 */ 489 490 static hook_list_t mountroothook_list; 491 492 void * 493 mountroothook_establish(void (*fn)(struct device *), struct device *dev) 494 { 495 return hook_establish(&mountroothook_list, (void (*)(void *))fn, dev); 496 } 497 498 void 499 mountroothook_disestablish(void *vhook) 500 { 501 hook_disestablish(&mountroothook_list, vhook); 502 } 503 504 void 505 mountroothook_destroy(void) 506 { 507 hook_destroy(&mountroothook_list); 508 } 509 510 void 511 domountroothook(void) 512 { 513 struct hook_desc *hd; 514 515 LIST_FOREACH(hd, &mountroothook_list, hk_list) { 516 if (hd->hk_arg == (void *)root_device) { 517 (*hd->hk_fn)(hd->hk_arg); 518 return; 519 } 520 } 521 } 522 523 static hook_list_t exechook_list; 524 525 void * 526 exechook_establish(void (*fn)(struct proc *, void *), void *arg) 527 { 528 return hook_establish(&exechook_list, (void (*)(void *))fn, arg); 529 } 530 531 void 532 exechook_disestablish(void *vhook) 533 { 534 hook_disestablish(&exechook_list, vhook); 535 } 536 537 /* 538 * Run exec hooks. 539 */ 540 void 541 doexechooks(struct proc *p) 542 { 543 hook_proc_run(&exechook_list, p); 544 } 545 546 static hook_list_t exithook_list; 547 548 void * 549 exithook_establish(void (*fn)(struct proc *, void *), void *arg) 550 { 551 return hook_establish(&exithook_list, (void (*)(void *))fn, arg); 552 } 553 554 void 555 exithook_disestablish(void *vhook) 556 { 557 hook_disestablish(&exithook_list, vhook); 558 } 559 560 /* 561 * Run exit hooks. 562 */ 563 void 564 doexithooks(struct proc *p) 565 { 566 hook_proc_run(&exithook_list, p); 567 } 568 569 static hook_list_t forkhook_list; 570 571 void * 572 forkhook_establish(void (*fn)(struct proc *, struct proc *)) 573 { 574 return hook_establish(&forkhook_list, (void (*)(void *))fn, NULL); 575 } 576 577 void 578 forkhook_disestablish(void *vhook) 579 { 580 hook_disestablish(&forkhook_list, vhook); 581 } 582 583 /* 584 * Run fork hooks. 585 */ 586 void 587 doforkhooks(struct proc *p2, struct proc *p1) 588 { 589 struct hook_desc *hd; 590 591 LIST_FOREACH(hd, &forkhook_list, hk_list) { 592 ((void (*)(struct proc *, struct proc *))*hd->hk_fn) 593 (p2, p1); 594 } 595 } 596 597 /* 598 * "Power hook" types, functions, and variables. 599 * The list of power hooks is kept ordered with the last registered hook 600 * first. 601 * When running the hooks on power down the hooks are called in reverse 602 * registration order, when powering up in registration order. 603 */ 604 struct powerhook_desc { 605 CIRCLEQ_ENTRY(powerhook_desc) sfd_list; 606 void (*sfd_fn)(int, void *); 607 void *sfd_arg; 608 char sfd_name[16]; 609 }; 610 611 static CIRCLEQ_HEAD(, powerhook_desc) powerhook_list = 612 CIRCLEQ_HEAD_INITIALIZER(powerhook_list); 613 614 void * 615 powerhook_establish(const char *name, void (*fn)(int, void *), void *arg) 616 { 617 struct powerhook_desc *ndp; 618 619 ndp = (struct powerhook_desc *) 620 malloc(sizeof(*ndp), M_DEVBUF, M_NOWAIT); 621 if (ndp == NULL) 622 return (NULL); 623 624 ndp->sfd_fn = fn; 625 ndp->sfd_arg = arg; 626 strlcpy(ndp->sfd_name, name, sizeof(ndp->sfd_name)); 627 CIRCLEQ_INSERT_HEAD(&powerhook_list, ndp, sfd_list); 628 629 aprint_error("%s: WARNING: powerhook_establish is deprecated\n", name); 630 return (ndp); 631 } 632 633 void 634 powerhook_disestablish(void *vhook) 635 { 636 #ifdef DIAGNOSTIC 637 struct powerhook_desc *dp; 638 639 CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list) 640 if (dp == vhook) 641 goto found; 642 panic("powerhook_disestablish: hook %p not established", vhook); 643 found: 644 #endif 645 646 CIRCLEQ_REMOVE(&powerhook_list, (struct powerhook_desc *)vhook, 647 sfd_list); 648 free(vhook, M_DEVBUF); 649 } 650 651 /* 652 * Run power hooks. 653 */ 654 void 655 dopowerhooks(int why) 656 { 657 struct powerhook_desc *dp; 658 659 #ifdef POWERHOOK_DEBUG 660 const char *why_name; 661 static const char * pwr_names[] = {PWR_NAMES}; 662 why_name = why < __arraycount(pwr_names) ? pwr_names[why] : "???"; 663 #endif 664 665 if (why == PWR_RESUME || why == PWR_SOFTRESUME) { 666 CIRCLEQ_FOREACH_REVERSE(dp, &powerhook_list, sfd_list) { 667 #ifdef POWERHOOK_DEBUG 668 printf("dopowerhooks %s: %s (%p)\n", why_name, dp->sfd_name, dp); 669 #endif 670 (*dp->sfd_fn)(why, dp->sfd_arg); 671 } 672 } else { 673 CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list) { 674 #ifdef POWERHOOK_DEBUG 675 printf("dopowerhooks %s: %s (%p)\n", why_name, dp->sfd_name, dp); 676 #endif 677 (*dp->sfd_fn)(why, dp->sfd_arg); 678 } 679 } 680 681 #ifdef POWERHOOK_DEBUG 682 printf("dopowerhooks: %s done\n", why_name); 683 #endif 684 } 685 686 static int 687 isswap(struct device *dv) 688 { 689 struct dkwedge_info wi; 690 struct vnode *vn; 691 int error; 692 693 if (device_class(dv) != DV_DISK || !device_is_a(dv, "dk")) 694 return 0; 695 696 if ((vn = opendisk(dv)) == NULL) 697 return 0; 698 699 error = VOP_IOCTL(vn, DIOCGWEDGEINFO, &wi, FREAD, NOCRED); 700 VOP_CLOSE(vn, FREAD, NOCRED); 701 vput(vn); 702 if (error) { 703 #ifdef DEBUG_WEDGE 704 printf("%s: Get wedge info returned %d\n", device_xname(dv), error); 705 #endif 706 return 0; 707 } 708 return strcmp(wi.dkw_ptype, DKW_PTYPE_SWAP) == 0; 709 } 710 711 /* 712 * Determine the root device and, if instructed to, the root file system. 713 */ 714 715 #include "md.h" 716 717 #if NMD > 0 718 extern struct cfdriver md_cd; 719 #ifdef MEMORY_DISK_IS_ROOT 720 int md_is_root = 1; 721 #else 722 int md_is_root = 0; 723 #endif 724 #endif 725 726 /* 727 * The device and wedge that we booted from. If booted_wedge is NULL, 728 * the we might consult booted_partition. 729 */ 730 struct device *booted_device; 731 struct device *booted_wedge; 732 int booted_partition; 733 734 /* 735 * Use partition letters if it's a disk class but not a wedge. 736 * XXX Check for wedge is kinda gross. 737 */ 738 #define DEV_USES_PARTITIONS(dv) \ 739 (device_class((dv)) == DV_DISK && \ 740 !device_is_a((dv), "dk")) 741 742 void 743 setroot(struct device *bootdv, int bootpartition) 744 { 745 struct device *dv; 746 int len, majdev; 747 dev_t nrootdev; 748 dev_t ndumpdev = NODEV; 749 char buf[128]; 750 const char *rootdevname; 751 const char *dumpdevname; 752 struct device *rootdv = NULL; /* XXX gcc -Wuninitialized */ 753 struct device *dumpdv = NULL; 754 struct ifnet *ifp; 755 const char *deffsname; 756 struct vfsops *vops; 757 758 #ifdef TFTPROOT 759 if (tftproot_dhcpboot(bootdv) != 0) 760 boothowto |= RB_ASKNAME; 761 #endif 762 763 #if NMD > 0 764 if (md_is_root) { 765 /* 766 * XXX there should be "root on md0" in the config file, 767 * but it isn't always 768 */ 769 bootdv = md_cd.cd_devs[0]; 770 bootpartition = 0; 771 } 772 #endif 773 774 /* 775 * If NFS is specified as the file system, and we found 776 * a DV_DISK boot device (or no boot device at all), then 777 * find a reasonable network interface for "rootspec". 778 */ 779 vops = vfs_getopsbyname("nfs"); 780 if (vops != NULL && vops->vfs_mountroot == mountroot && 781 rootspec == NULL && 782 (bootdv == NULL || device_class(bootdv) != DV_IFNET)) { 783 IFNET_FOREACH(ifp) { 784 if ((ifp->if_flags & 785 (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0) 786 break; 787 } 788 if (ifp == NULL) { 789 /* 790 * Can't find a suitable interface; ask the 791 * user. 792 */ 793 boothowto |= RB_ASKNAME; 794 } else { 795 /* 796 * Have a suitable interface; behave as if 797 * the user specified this interface. 798 */ 799 rootspec = (const char *)ifp->if_xname; 800 } 801 } 802 if (vops != NULL) 803 vfs_delref(vops); 804 805 /* 806 * If wildcarded root and we the boot device wasn't determined, 807 * ask the user. 808 */ 809 if (rootspec == NULL && bootdv == NULL) 810 boothowto |= RB_ASKNAME; 811 812 top: 813 if (boothowto & RB_ASKNAME) { 814 struct device *defdumpdv; 815 816 for (;;) { 817 printf("root device"); 818 if (bootdv != NULL) { 819 printf(" (default %s", device_xname(bootdv)); 820 if (DEV_USES_PARTITIONS(bootdv)) 821 printf("%c", bootpartition + 'a'); 822 printf(")"); 823 } 824 printf(": "); 825 len = cngetsn(buf, sizeof(buf)); 826 if (len == 0 && bootdv != NULL) { 827 strlcpy(buf, device_xname(bootdv), sizeof(buf)); 828 len = strlen(buf); 829 } 830 if (len > 0 && buf[len - 1] == '*') { 831 buf[--len] = '\0'; 832 dv = getdisk(buf, len, 1, &nrootdev, 0); 833 if (dv != NULL) { 834 rootdv = dv; 835 break; 836 } 837 } 838 dv = getdisk(buf, len, bootpartition, &nrootdev, 0); 839 if (dv != NULL) { 840 rootdv = dv; 841 break; 842 } 843 } 844 845 /* 846 * Set up the default dump device. If root is on 847 * a network device, there is no default dump 848 * device, since we don't support dumps to the 849 * network. 850 */ 851 if (DEV_USES_PARTITIONS(rootdv) == 0) 852 defdumpdv = NULL; 853 else 854 defdumpdv = rootdv; 855 856 for (;;) { 857 printf("dump device"); 858 if (defdumpdv != NULL) { 859 /* 860 * Note, we know it's a disk if we get here. 861 */ 862 printf(" (default %sb)", device_xname(defdumpdv)); 863 } 864 printf(": "); 865 len = cngetsn(buf, sizeof(buf)); 866 if (len == 0) { 867 if (defdumpdv != NULL) { 868 ndumpdev = MAKEDISKDEV(major(nrootdev), 869 DISKUNIT(nrootdev), 1); 870 } 871 dumpdv = defdumpdv; 872 break; 873 } 874 if (len == 4 && strcmp(buf, "none") == 0) { 875 dumpdv = NULL; 876 break; 877 } 878 dv = getdisk(buf, len, 1, &ndumpdev, 1); 879 if (dv != NULL) { 880 dumpdv = dv; 881 break; 882 } 883 } 884 885 rootdev = nrootdev; 886 dumpdev = ndumpdev; 887 888 for (vops = LIST_FIRST(&vfs_list); vops != NULL; 889 vops = LIST_NEXT(vops, vfs_list)) { 890 if (vops->vfs_mountroot != NULL && 891 vops->vfs_mountroot == mountroot) 892 break; 893 } 894 895 if (vops == NULL) { 896 mountroot = NULL; 897 deffsname = "generic"; 898 } else 899 deffsname = vops->vfs_name; 900 901 for (;;) { 902 printf("file system (default %s): ", deffsname); 903 len = cngetsn(buf, sizeof(buf)); 904 if (len == 0) 905 break; 906 if (len == 4 && strcmp(buf, "halt") == 0) 907 cpu_reboot(RB_HALT, NULL); 908 else if (len == 6 && strcmp(buf, "reboot") == 0) 909 cpu_reboot(0, NULL); 910 #if defined(DDB) 911 else if (len == 3 && strcmp(buf, "ddb") == 0) { 912 console_debugger(); 913 } 914 #endif 915 else if (len == 7 && strcmp(buf, "generic") == 0) { 916 mountroot = NULL; 917 break; 918 } 919 vops = vfs_getopsbyname(buf); 920 if (vops == NULL || vops->vfs_mountroot == NULL) { 921 printf("use one of: generic"); 922 for (vops = LIST_FIRST(&vfs_list); 923 vops != NULL; 924 vops = LIST_NEXT(vops, vfs_list)) { 925 if (vops->vfs_mountroot != NULL) 926 printf(" %s", vops->vfs_name); 927 } 928 #if defined(DDB) 929 printf(" ddb"); 930 #endif 931 printf(" halt reboot\n"); 932 } else { 933 mountroot = vops->vfs_mountroot; 934 vfs_delref(vops); 935 break; 936 } 937 } 938 939 } else if (rootspec == NULL) { 940 /* 941 * Wildcarded root; use the boot device. 942 */ 943 rootdv = bootdv; 944 945 if (bootdv) 946 majdev = devsw_name2blk(device_xname(bootdv), NULL, 0); 947 else 948 majdev = -1; 949 if (majdev >= 0) { 950 /* 951 * Root is on a disk. `bootpartition' is root, 952 * unless the device does not use partitions. 953 */ 954 if (DEV_USES_PARTITIONS(bootdv)) 955 rootdev = MAKEDISKDEV(majdev, 956 device_unit(bootdv), 957 bootpartition); 958 else 959 rootdev = makedev(majdev, device_unit(bootdv)); 960 } 961 } else { 962 963 /* 964 * `root on <dev> ...' 965 */ 966 967 /* 968 * If it's a network interface, we can bail out 969 * early. 970 */ 971 dv = finddevice(rootspec); 972 if (dv != NULL && device_class(dv) == DV_IFNET) { 973 rootdv = dv; 974 goto haveroot; 975 } 976 977 if (rootdev == NODEV && 978 device_class(dv) == DV_DISK && device_is_a(dv, "dk") && 979 (majdev = devsw_name2blk(device_xname(dv), NULL, 0)) >= 0) 980 rootdev = makedev(majdev, device_unit(dv)); 981 982 rootdevname = devsw_blk2name(major(rootdev)); 983 if (rootdevname == NULL) { 984 printf("unknown device major 0x%x\n", rootdev); 985 boothowto |= RB_ASKNAME; 986 goto top; 987 } 988 memset(buf, 0, sizeof(buf)); 989 snprintf(buf, sizeof(buf), "%s%d", rootdevname, 990 DISKUNIT(rootdev)); 991 992 rootdv = finddevice(buf); 993 if (rootdv == NULL) { 994 printf("device %s (0x%x) not configured\n", 995 buf, rootdev); 996 boothowto |= RB_ASKNAME; 997 goto top; 998 } 999 } 1000 1001 haveroot: 1002 1003 root_device = rootdv; 1004 1005 switch (device_class(rootdv)) { 1006 case DV_IFNET: 1007 case DV_DISK: 1008 aprint_normal("root on %s", device_xname(rootdv)); 1009 if (DEV_USES_PARTITIONS(rootdv)) 1010 aprint_normal("%c", DISKPART(rootdev) + 'a'); 1011 break; 1012 1013 default: 1014 printf("can't determine root device\n"); 1015 boothowto |= RB_ASKNAME; 1016 goto top; 1017 } 1018 1019 /* 1020 * Now configure the dump device. 1021 * 1022 * If we haven't figured out the dump device, do so, with 1023 * the following rules: 1024 * 1025 * (a) We already know dumpdv in the RB_ASKNAME case. 1026 * 1027 * (b) If dumpspec is set, try to use it. If the device 1028 * is not available, punt. 1029 * 1030 * (c) If dumpspec is not set, the dump device is 1031 * wildcarded or unspecified. If the root device 1032 * is DV_IFNET, punt. Otherwise, use partition b 1033 * of the root device. 1034 */ 1035 1036 if (boothowto & RB_ASKNAME) { /* (a) */ 1037 if (dumpdv == NULL) 1038 goto nodumpdev; 1039 } else if (dumpspec != NULL) { /* (b) */ 1040 if (strcmp(dumpspec, "none") == 0 || dumpdev == NODEV) { 1041 /* 1042 * Operator doesn't want a dump device. 1043 * Or looks like they tried to pick a network 1044 * device. Oops. 1045 */ 1046 goto nodumpdev; 1047 } 1048 1049 dumpdevname = devsw_blk2name(major(dumpdev)); 1050 if (dumpdevname == NULL) 1051 goto nodumpdev; 1052 memset(buf, 0, sizeof(buf)); 1053 snprintf(buf, sizeof(buf), "%s%d", dumpdevname, 1054 DISKUNIT(dumpdev)); 1055 1056 dumpdv = finddevice(buf); 1057 if (dumpdv == NULL) { 1058 /* 1059 * Device not configured. 1060 */ 1061 goto nodumpdev; 1062 } 1063 } else { /* (c) */ 1064 if (DEV_USES_PARTITIONS(rootdv) == 0) { 1065 for (dv = TAILQ_FIRST(&alldevs); dv != NULL; 1066 dv = TAILQ_NEXT(dv, dv_list)) 1067 if (isswap(dv)) 1068 break; 1069 if (dv == NULL) 1070 goto nodumpdev; 1071 1072 majdev = devsw_name2blk(device_xname(dv), NULL, 0); 1073 if (majdev < 0) 1074 goto nodumpdev; 1075 dumpdv = dv; 1076 dumpdev = makedev(majdev, device_unit(dumpdv)); 1077 } else { 1078 dumpdv = rootdv; 1079 dumpdev = MAKEDISKDEV(major(rootdev), 1080 device_unit(dumpdv), 1); 1081 } 1082 } 1083 1084 dumpcdev = devsw_blk2chr(dumpdev); 1085 aprint_normal(" dumps on %s", device_xname(dumpdv)); 1086 if (DEV_USES_PARTITIONS(dumpdv)) 1087 aprint_normal("%c", DISKPART(dumpdev) + 'a'); 1088 aprint_normal("\n"); 1089 return; 1090 1091 nodumpdev: 1092 dumpdev = NODEV; 1093 dumpcdev = NODEV; 1094 aprint_normal("\n"); 1095 } 1096 1097 static struct device * 1098 finddevice(const char *name) 1099 { 1100 const char *wname; 1101 1102 if ((wname = getwedgename(name, strlen(name))) != NULL) 1103 return dkwedge_find_by_wname(wname); 1104 1105 return device_find_by_xname(name); 1106 } 1107 1108 static struct device * 1109 getdisk(char *str, int len, int defpart, dev_t *devp, int isdump) 1110 { 1111 struct device *dv; 1112 1113 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) { 1114 printf("use one of:"); 1115 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1116 if (DEV_USES_PARTITIONS(dv)) 1117 printf(" %s[a-%c]", device_xname(dv), 1118 'a' + MAXPARTITIONS - 1); 1119 else if (device_class(dv) == DV_DISK) 1120 printf(" %s", device_xname(dv)); 1121 if (isdump == 0 && device_class(dv) == DV_IFNET) 1122 printf(" %s", device_xname(dv)); 1123 } 1124 dkwedge_print_wnames(); 1125 if (isdump) 1126 printf(" none"); 1127 #if defined(DDB) 1128 printf(" ddb"); 1129 #endif 1130 printf(" halt reboot\n"); 1131 } 1132 return dv; 1133 } 1134 1135 static const char * 1136 getwedgename(const char *name, int namelen) 1137 { 1138 const char *wpfx = "wedge:"; 1139 const int wpfxlen = strlen(wpfx); 1140 1141 if (namelen < wpfxlen || strncmp(name, wpfx, wpfxlen) != 0) 1142 return NULL; 1143 1144 return name + wpfxlen; 1145 } 1146 1147 static struct device * 1148 parsedisk(char *str, int len, int defpart, dev_t *devp) 1149 { 1150 struct device *dv; 1151 const char *wname; 1152 char *cp, c; 1153 int majdev, part; 1154 if (len == 0) 1155 return (NULL); 1156 1157 if (len == 4 && strcmp(str, "halt") == 0) 1158 cpu_reboot(RB_HALT, NULL); 1159 else if (len == 6 && strcmp(str, "reboot") == 0) 1160 cpu_reboot(0, NULL); 1161 #if defined(DDB) 1162 else if (len == 3 && strcmp(str, "ddb") == 0) 1163 console_debugger(); 1164 #endif 1165 1166 cp = str + len - 1; 1167 c = *cp; 1168 1169 if ((wname = getwedgename(str, len)) != NULL) { 1170 if ((dv = dkwedge_find_by_wname(wname)) == NULL) 1171 return NULL; 1172 part = defpart; 1173 goto gotdisk; 1174 } else if (c >= 'a' && c <= ('a' + MAXPARTITIONS - 1)) { 1175 part = c - 'a'; 1176 *cp = '\0'; 1177 } else 1178 part = defpart; 1179 1180 dv = finddevice(str); 1181 if (dv != NULL) { 1182 if (device_class(dv) == DV_DISK) { 1183 gotdisk: 1184 majdev = devsw_name2blk(device_xname(dv), NULL, 0); 1185 if (majdev < 0) 1186 panic("parsedisk"); 1187 if (DEV_USES_PARTITIONS(dv)) 1188 *devp = MAKEDISKDEV(majdev, device_unit(dv), 1189 part); 1190 else 1191 *devp = makedev(majdev, device_unit(dv)); 1192 } 1193 1194 if (device_class(dv) == DV_IFNET) 1195 *devp = NODEV; 1196 } 1197 1198 *cp = c; 1199 return (dv); 1200 } 1201 1202 /* 1203 * snprintf() `bytes' into `buf', reformatting it so that the number, 1204 * plus a possible `x' + suffix extension) fits into len bytes (including 1205 * the terminating NUL). 1206 * Returns the number of bytes stored in buf, or -1 if there was a problem. 1207 * E.g, given a len of 9 and a suffix of `B': 1208 * bytes result 1209 * ----- ------ 1210 * 99999 `99999 B' 1211 * 100000 `97 kB' 1212 * 66715648 `65152 kB' 1213 * 252215296 `240 MB' 1214 */ 1215 int 1216 humanize_number(char *buf, size_t len, uint64_t bytes, const char *suffix, 1217 int divisor) 1218 { 1219 /* prefixes are: (none), kilo, Mega, Giga, Tera, Peta, Exa */ 1220 const char *prefixes; 1221 int r; 1222 uint64_t umax; 1223 size_t i, suffixlen; 1224 1225 if (buf == NULL || suffix == NULL) 1226 return (-1); 1227 if (len > 0) 1228 buf[0] = '\0'; 1229 suffixlen = strlen(suffix); 1230 /* check if enough room for `x y' + suffix + `\0' */ 1231 if (len < 4 + suffixlen) 1232 return (-1); 1233 1234 if (divisor == 1024) { 1235 /* 1236 * binary multiplies 1237 * XXX IEC 60027-2 recommends Ki, Mi, Gi... 1238 */ 1239 prefixes = " KMGTPE"; 1240 } else 1241 prefixes = " kMGTPE"; /* SI for decimal multiplies */ 1242 1243 umax = 1; 1244 for (i = 0; i < len - suffixlen - 3; i++) 1245 umax *= 10; 1246 for (i = 0; bytes >= umax && prefixes[i + 1]; i++) 1247 bytes /= divisor; 1248 1249 r = snprintf(buf, len, "%qu%s%c%s", (unsigned long long)bytes, 1250 i == 0 ? "" : " ", prefixes[i], suffix); 1251 1252 return (r); 1253 } 1254 1255 int 1256 format_bytes(char *buf, size_t len, uint64_t bytes) 1257 { 1258 int rv; 1259 size_t nlen; 1260 1261 rv = humanize_number(buf, len, bytes, "B", 1024); 1262 if (rv != -1) { 1263 /* nuke the trailing ` B' if it exists */ 1264 nlen = strlen(buf) - 2; 1265 if (strcmp(&buf[nlen], " B") == 0) 1266 buf[nlen] = '\0'; 1267 } 1268 return (rv); 1269 } 1270 1271 /* 1272 * Return true if system call tracing is enabled for the specified process. 1273 */ 1274 bool 1275 trace_is_enabled(struct proc *p) 1276 { 1277 #ifdef SYSCALL_DEBUG 1278 return (true); 1279 #endif 1280 #ifdef KTRACE 1281 if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET))) 1282 return (true); 1283 #endif 1284 #ifdef PTRACE 1285 if (ISSET(p->p_slflag, PSL_SYSCALL)) 1286 return (true); 1287 #endif 1288 1289 return (false); 1290 } 1291 1292 /* 1293 * Start trace of particular system call. If process is being traced, 1294 * this routine is called by MD syscall dispatch code just before 1295 * a system call is actually executed. 1296 */ 1297 int 1298 trace_enter(register_t code, const register_t *args, int narg) 1299 { 1300 #ifdef SYSCALL_DEBUG 1301 scdebug_call(code, args); 1302 #endif /* SYSCALL_DEBUG */ 1303 1304 ktrsyscall(code, args, narg); 1305 1306 #ifdef PTRACE 1307 if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) == 1308 (PSL_SYSCALL|PSL_TRACED)) 1309 process_stoptrace(); 1310 #endif 1311 return 0; 1312 } 1313 1314 /* 1315 * End trace of particular system call. If process is being traced, 1316 * this routine is called by MD syscall dispatch code just after 1317 * a system call finishes. 1318 * MD caller guarantees the passed 'code' is within the supported 1319 * system call number range for emulation the process runs under. 1320 */ 1321 void 1322 trace_exit(register_t code, register_t rval[], int error) 1323 { 1324 #ifdef SYSCALL_DEBUG 1325 scdebug_ret(code, error, rval); 1326 #endif /* SYSCALL_DEBUG */ 1327 1328 ktrsysret(code, error, rval); 1329 1330 #ifdef PTRACE 1331 if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) == 1332 (PSL_SYSCALL|PSL_TRACED)) 1333 process_stoptrace(); 1334 #endif 1335 } 1336