1 /* $NetBSD: kern_subr.c,v 1.164 2007/08/15 12:07:34 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 1999, 2002 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and by Luke Mewburn. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Copyright (c) 1992, 1993 50 * The Regents of the University of California. All rights reserved. 51 * 52 * This software was developed by the Computer Systems Engineering group 53 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 54 * contributed to Berkeley. 55 * 56 * All advertising materials mentioning features or use of this software 57 * must display the following acknowledgement: 58 * This product includes software developed by the University of 59 * California, Lawrence Berkeley Laboratory. 60 * 61 * Redistribution and use in source and binary forms, with or without 62 * modification, are permitted provided that the following conditions 63 * are met: 64 * 1. Redistributions of source code must retain the above copyright 65 * notice, this list of conditions and the following disclaimer. 66 * 2. Redistributions in binary form must reproduce the above copyright 67 * notice, this list of conditions and the following disclaimer in the 68 * documentation and/or other materials provided with the distribution. 69 * 3. Neither the name of the University nor the names of its contributors 70 * may be used to endorse or promote products derived from this software 71 * without specific prior written permission. 72 * 73 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 76 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 83 * SUCH DAMAGE. 84 * 85 * @(#)kern_subr.c 8.4 (Berkeley) 2/14/95 86 */ 87 88 #include <sys/cdefs.h> 89 __KERNEL_RCSID(0, "$NetBSD: kern_subr.c,v 1.164 2007/08/15 12:07:34 ad Exp $"); 90 91 #include "opt_ddb.h" 92 #include "opt_md.h" 93 #include "opt_syscall_debug.h" 94 #include "opt_ktrace.h" 95 #include "opt_ptrace.h" 96 #include "opt_systrace.h" 97 #include "opt_powerhook.h" 98 #include "opt_tftproot.h" 99 100 #include <sys/param.h> 101 #include <sys/systm.h> 102 #include <sys/proc.h> 103 #include <sys/malloc.h> 104 #include <sys/mount.h> 105 #include <sys/device.h> 106 #include <sys/reboot.h> 107 #include <sys/conf.h> 108 #include <sys/disk.h> 109 #include <sys/disklabel.h> 110 #include <sys/queue.h> 111 #include <sys/systrace.h> 112 #include <sys/ktrace.h> 113 #include <sys/ptrace.h> 114 #include <sys/fcntl.h> 115 #include <sys/kauth.h> 116 #include <sys/vnode.h> 117 118 #include <uvm/uvm_extern.h> 119 120 #include <dev/cons.h> 121 122 #include <net/if.h> 123 124 /* XXX these should eventually move to subr_autoconf.c */ 125 static struct device *finddevice(const char *); 126 static struct device *getdisk(char *, int, int, dev_t *, int); 127 static struct device *parsedisk(char *, int, int, dev_t *); 128 static const char *getwedgename(const char *, int); 129 130 /* 131 * A generic linear hook. 132 */ 133 struct hook_desc { 134 LIST_ENTRY(hook_desc) hk_list; 135 void (*hk_fn)(void *); 136 void *hk_arg; 137 }; 138 typedef LIST_HEAD(, hook_desc) hook_list_t; 139 140 MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 141 142 #ifdef TFTPROOT 143 int tftproot_dhcpboot(struct device *); 144 #endif 145 146 void 147 uio_setup_sysspace(struct uio *uio) 148 { 149 150 uio->uio_vmspace = vmspace_kernel(); 151 } 152 153 int 154 uiomove(void *buf, size_t n, struct uio *uio) 155 { 156 struct vmspace *vm = uio->uio_vmspace; 157 struct iovec *iov; 158 u_int cnt; 159 int error = 0; 160 char *cp = buf; 161 #ifdef MULTIPROCESSOR 162 int hold_count; 163 #endif 164 165 KERNEL_UNLOCK_ALL(NULL, &hold_count); 166 167 ASSERT_SLEEPABLE(NULL, "uiomove"); 168 169 #ifdef DIAGNOSTIC 170 if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE) 171 panic("uiomove: mode"); 172 #endif 173 while (n > 0 && uio->uio_resid) { 174 iov = uio->uio_iov; 175 cnt = iov->iov_len; 176 if (cnt == 0) { 177 KASSERT(uio->uio_iovcnt > 0); 178 uio->uio_iov++; 179 uio->uio_iovcnt--; 180 continue; 181 } 182 if (cnt > n) 183 cnt = n; 184 if (!VMSPACE_IS_KERNEL_P(vm)) { 185 if (curcpu()->ci_schedstate.spc_flags & 186 SPCF_SHOULDYIELD) 187 preempt(); 188 } 189 190 if (uio->uio_rw == UIO_READ) { 191 error = copyout_vmspace(vm, cp, iov->iov_base, 192 cnt); 193 } else { 194 error = copyin_vmspace(vm, iov->iov_base, cp, 195 cnt); 196 } 197 if (error) { 198 break; 199 } 200 iov->iov_base = (char *)iov->iov_base + cnt; 201 iov->iov_len -= cnt; 202 uio->uio_resid -= cnt; 203 uio->uio_offset += cnt; 204 cp += cnt; 205 KDASSERT(cnt <= n); 206 n -= cnt; 207 } 208 KERNEL_LOCK(hold_count, NULL); 209 return (error); 210 } 211 212 /* 213 * Wrapper for uiomove() that validates the arguments against a known-good 214 * kernel buffer. 215 */ 216 int 217 uiomove_frombuf(void *buf, size_t buflen, struct uio *uio) 218 { 219 size_t offset; 220 221 if (uio->uio_offset < 0 || /* uio->uio_resid < 0 || */ 222 (offset = uio->uio_offset) != uio->uio_offset) 223 return (EINVAL); 224 if (offset >= buflen) 225 return (0); 226 return (uiomove((char *)buf + offset, buflen - offset, uio)); 227 } 228 229 /* 230 * Give next character to user as result of read. 231 */ 232 int 233 ureadc(int c, struct uio *uio) 234 { 235 struct iovec *iov; 236 237 if (uio->uio_resid <= 0) 238 panic("ureadc: non-positive resid"); 239 again: 240 if (uio->uio_iovcnt <= 0) 241 panic("ureadc: non-positive iovcnt"); 242 iov = uio->uio_iov; 243 if (iov->iov_len <= 0) { 244 uio->uio_iovcnt--; 245 uio->uio_iov++; 246 goto again; 247 } 248 if (!VMSPACE_IS_KERNEL_P(uio->uio_vmspace)) { 249 if (subyte(iov->iov_base, c) < 0) 250 return (EFAULT); 251 } else { 252 *(char *)iov->iov_base = c; 253 } 254 iov->iov_base = (char *)iov->iov_base + 1; 255 iov->iov_len--; 256 uio->uio_resid--; 257 uio->uio_offset++; 258 return (0); 259 } 260 261 /* 262 * Like copyin(), but operates on an arbitrary vmspace. 263 */ 264 int 265 copyin_vmspace(struct vmspace *vm, const void *uaddr, void *kaddr, size_t len) 266 { 267 struct iovec iov; 268 struct uio uio; 269 int error; 270 271 if (len == 0) 272 return (0); 273 274 if (VMSPACE_IS_KERNEL_P(vm)) { 275 return kcopy(uaddr, kaddr, len); 276 } 277 if (__predict_true(vm == curproc->p_vmspace)) { 278 return copyin(uaddr, kaddr, len); 279 } 280 281 iov.iov_base = kaddr; 282 iov.iov_len = len; 283 uio.uio_iov = &iov; 284 uio.uio_iovcnt = 1; 285 uio.uio_offset = (off_t)(intptr_t)uaddr; 286 uio.uio_resid = len; 287 uio.uio_rw = UIO_READ; 288 UIO_SETUP_SYSSPACE(&uio); 289 error = uvm_io(&vm->vm_map, &uio); 290 291 return (error); 292 } 293 294 /* 295 * Like copyout(), but operates on an arbitrary vmspace. 296 */ 297 int 298 copyout_vmspace(struct vmspace *vm, const void *kaddr, void *uaddr, size_t len) 299 { 300 struct iovec iov; 301 struct uio uio; 302 int error; 303 304 if (len == 0) 305 return (0); 306 307 if (VMSPACE_IS_KERNEL_P(vm)) { 308 return kcopy(kaddr, uaddr, len); 309 } 310 if (__predict_true(vm == curproc->p_vmspace)) { 311 return copyout(kaddr, uaddr, len); 312 } 313 314 iov.iov_base = __UNCONST(kaddr); /* XXXUNCONST cast away const */ 315 iov.iov_len = len; 316 uio.uio_iov = &iov; 317 uio.uio_iovcnt = 1; 318 uio.uio_offset = (off_t)(intptr_t)uaddr; 319 uio.uio_resid = len; 320 uio.uio_rw = UIO_WRITE; 321 UIO_SETUP_SYSSPACE(&uio); 322 error = uvm_io(&vm->vm_map, &uio); 323 324 return (error); 325 } 326 327 /* 328 * Like copyin(), but operates on an arbitrary process. 329 */ 330 int 331 copyin_proc(struct proc *p, const void *uaddr, void *kaddr, size_t len) 332 { 333 struct vmspace *vm; 334 int error; 335 336 error = proc_vmspace_getref(p, &vm); 337 if (error) { 338 return error; 339 } 340 error = copyin_vmspace(vm, uaddr, kaddr, len); 341 uvmspace_free(vm); 342 343 return error; 344 } 345 346 /* 347 * Like copyout(), but operates on an arbitrary process. 348 */ 349 int 350 copyout_proc(struct proc *p, const void *kaddr, void *uaddr, size_t len) 351 { 352 struct vmspace *vm; 353 int error; 354 355 error = proc_vmspace_getref(p, &vm); 356 if (error) { 357 return error; 358 } 359 error = copyout_vmspace(vm, kaddr, uaddr, len); 360 uvmspace_free(vm); 361 362 return error; 363 } 364 365 /* 366 * Like copyin(), except it operates on kernel addresses when the FKIOCTL 367 * flag is passed in `ioctlflags' from the ioctl call. 368 */ 369 int 370 ioctl_copyin(int ioctlflags, const void *src, void *dst, size_t len) 371 { 372 if (ioctlflags & FKIOCTL) 373 return kcopy(src, dst, len); 374 return copyin(src, dst, len); 375 } 376 377 /* 378 * Like copyout(), except it operates on kernel addresses when the FKIOCTL 379 * flag is passed in `ioctlflags' from the ioctl call. 380 */ 381 int 382 ioctl_copyout(int ioctlflags, const void *src, void *dst, size_t len) 383 { 384 if (ioctlflags & FKIOCTL) 385 return kcopy(src, dst, len); 386 return copyout(src, dst, len); 387 } 388 389 static void * 390 hook_establish(hook_list_t *list, void (*fn)(void *), void *arg) 391 { 392 struct hook_desc *hd; 393 394 hd = malloc(sizeof(*hd), M_DEVBUF, M_NOWAIT); 395 if (hd == NULL) 396 return (NULL); 397 398 hd->hk_fn = fn; 399 hd->hk_arg = arg; 400 LIST_INSERT_HEAD(list, hd, hk_list); 401 402 return (hd); 403 } 404 405 static void 406 hook_disestablish(hook_list_t *list, void *vhook) 407 { 408 #ifdef DIAGNOSTIC 409 struct hook_desc *hd; 410 411 LIST_FOREACH(hd, list, hk_list) { 412 if (hd == vhook) 413 break; 414 } 415 416 if (hd == NULL) 417 panic("hook_disestablish: hook %p not established", vhook); 418 #endif 419 LIST_REMOVE((struct hook_desc *)vhook, hk_list); 420 free(vhook, M_DEVBUF); 421 } 422 423 static void 424 hook_destroy(hook_list_t *list) 425 { 426 struct hook_desc *hd; 427 428 while ((hd = LIST_FIRST(list)) != NULL) { 429 LIST_REMOVE(hd, hk_list); 430 free(hd, M_DEVBUF); 431 } 432 } 433 434 static void 435 hook_proc_run(hook_list_t *list, struct proc *p) 436 { 437 struct hook_desc *hd; 438 439 for (hd = LIST_FIRST(list); hd != NULL; hd = LIST_NEXT(hd, hk_list)) { 440 ((void (*)(struct proc *, void *))*hd->hk_fn)(p, 441 hd->hk_arg); 442 } 443 } 444 445 /* 446 * "Shutdown hook" types, functions, and variables. 447 * 448 * Should be invoked immediately before the 449 * system is halted or rebooted, i.e. after file systems unmounted, 450 * after crash dump done, etc. 451 * 452 * Each shutdown hook is removed from the list before it's run, so that 453 * it won't be run again. 454 */ 455 456 static hook_list_t shutdownhook_list; 457 458 void * 459 shutdownhook_establish(void (*fn)(void *), void *arg) 460 { 461 return hook_establish(&shutdownhook_list, fn, arg); 462 } 463 464 void 465 shutdownhook_disestablish(void *vhook) 466 { 467 hook_disestablish(&shutdownhook_list, vhook); 468 } 469 470 /* 471 * Run shutdown hooks. Should be invoked immediately before the 472 * system is halted or rebooted, i.e. after file systems unmounted, 473 * after crash dump done, etc. 474 * 475 * Each shutdown hook is removed from the list before it's run, so that 476 * it won't be run again. 477 */ 478 void 479 doshutdownhooks(void) 480 { 481 struct hook_desc *dp; 482 483 while ((dp = LIST_FIRST(&shutdownhook_list)) != NULL) { 484 LIST_REMOVE(dp, hk_list); 485 (*dp->hk_fn)(dp->hk_arg); 486 #if 0 487 /* 488 * Don't bother freeing the hook structure,, since we may 489 * be rebooting because of a memory corruption problem, 490 * and this might only make things worse. It doesn't 491 * matter, anyway, since the system is just about to 492 * reboot. 493 */ 494 free(dp, M_DEVBUF); 495 #endif 496 } 497 } 498 499 /* 500 * "Mountroot hook" types, functions, and variables. 501 */ 502 503 static hook_list_t mountroothook_list; 504 505 void * 506 mountroothook_establish(void (*fn)(struct device *), struct device *dev) 507 { 508 return hook_establish(&mountroothook_list, (void (*)(void *))fn, dev); 509 } 510 511 void 512 mountroothook_disestablish(void *vhook) 513 { 514 hook_disestablish(&mountroothook_list, vhook); 515 } 516 517 void 518 mountroothook_destroy(void) 519 { 520 hook_destroy(&mountroothook_list); 521 } 522 523 void 524 domountroothook(void) 525 { 526 struct hook_desc *hd; 527 528 LIST_FOREACH(hd, &mountroothook_list, hk_list) { 529 if (hd->hk_arg == (void *)root_device) { 530 (*hd->hk_fn)(hd->hk_arg); 531 return; 532 } 533 } 534 } 535 536 static hook_list_t exechook_list; 537 538 void * 539 exechook_establish(void (*fn)(struct proc *, void *), void *arg) 540 { 541 return hook_establish(&exechook_list, (void (*)(void *))fn, arg); 542 } 543 544 void 545 exechook_disestablish(void *vhook) 546 { 547 hook_disestablish(&exechook_list, vhook); 548 } 549 550 /* 551 * Run exec hooks. 552 */ 553 void 554 doexechooks(struct proc *p) 555 { 556 hook_proc_run(&exechook_list, p); 557 } 558 559 static hook_list_t exithook_list; 560 561 void * 562 exithook_establish(void (*fn)(struct proc *, void *), void *arg) 563 { 564 return hook_establish(&exithook_list, (void (*)(void *))fn, arg); 565 } 566 567 void 568 exithook_disestablish(void *vhook) 569 { 570 hook_disestablish(&exithook_list, vhook); 571 } 572 573 /* 574 * Run exit hooks. 575 */ 576 void 577 doexithooks(struct proc *p) 578 { 579 hook_proc_run(&exithook_list, p); 580 } 581 582 static hook_list_t forkhook_list; 583 584 void * 585 forkhook_establish(void (*fn)(struct proc *, struct proc *)) 586 { 587 return hook_establish(&forkhook_list, (void (*)(void *))fn, NULL); 588 } 589 590 void 591 forkhook_disestablish(void *vhook) 592 { 593 hook_disestablish(&forkhook_list, vhook); 594 } 595 596 /* 597 * Run fork hooks. 598 */ 599 void 600 doforkhooks(struct proc *p2, struct proc *p1) 601 { 602 struct hook_desc *hd; 603 604 LIST_FOREACH(hd, &forkhook_list, hk_list) { 605 ((void (*)(struct proc *, struct proc *))*hd->hk_fn) 606 (p2, p1); 607 } 608 } 609 610 /* 611 * "Power hook" types, functions, and variables. 612 * The list of power hooks is kept ordered with the last registered hook 613 * first. 614 * When running the hooks on power down the hooks are called in reverse 615 * registration order, when powering up in registration order. 616 */ 617 struct powerhook_desc { 618 CIRCLEQ_ENTRY(powerhook_desc) sfd_list; 619 void (*sfd_fn)(int, void *); 620 void *sfd_arg; 621 char sfd_name[16]; 622 }; 623 624 static CIRCLEQ_HEAD(, powerhook_desc) powerhook_list = 625 CIRCLEQ_HEAD_INITIALIZER(powerhook_list); 626 627 void * 628 powerhook_establish(const char *name, void (*fn)(int, void *), void *arg) 629 { 630 struct powerhook_desc *ndp; 631 632 ndp = (struct powerhook_desc *) 633 malloc(sizeof(*ndp), M_DEVBUF, M_NOWAIT); 634 if (ndp == NULL) 635 return (NULL); 636 637 ndp->sfd_fn = fn; 638 ndp->sfd_arg = arg; 639 strlcpy(ndp->sfd_name, name, sizeof(ndp->sfd_name)); 640 CIRCLEQ_INSERT_HEAD(&powerhook_list, ndp, sfd_list); 641 642 return (ndp); 643 } 644 645 void 646 powerhook_disestablish(void *vhook) 647 { 648 #ifdef DIAGNOSTIC 649 struct powerhook_desc *dp; 650 651 CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list) 652 if (dp == vhook) 653 goto found; 654 panic("powerhook_disestablish: hook %p not established", vhook); 655 found: 656 #endif 657 658 CIRCLEQ_REMOVE(&powerhook_list, (struct powerhook_desc *)vhook, 659 sfd_list); 660 free(vhook, M_DEVBUF); 661 } 662 663 /* 664 * Run power hooks. 665 */ 666 void 667 dopowerhooks(int why) 668 { 669 struct powerhook_desc *dp; 670 671 #ifdef POWERHOOK_DEBUG 672 const char *why_name; 673 static const char * pwr_names[] = {PWR_NAMES}; 674 why_name = why < __arraycount(pwr_names) ? pwr_names[why] : "???"; 675 #endif 676 677 if (why == PWR_RESUME || why == PWR_SOFTRESUME) { 678 CIRCLEQ_FOREACH_REVERSE(dp, &powerhook_list, sfd_list) { 679 #ifdef POWERHOOK_DEBUG 680 printf("dopowerhooks %s: %s (%p)\n", why_name, dp->sfd_name, dp); 681 #endif 682 (*dp->sfd_fn)(why, dp->sfd_arg); 683 } 684 } else { 685 CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list) { 686 #ifdef POWERHOOK_DEBUG 687 printf("dopowerhooks %s: %s (%p)\n", why_name, dp->sfd_name, dp); 688 #endif 689 (*dp->sfd_fn)(why, dp->sfd_arg); 690 } 691 } 692 693 #ifdef POWERHOOK_DEBUG 694 printf("dopowerhooks: %s done\n", why_name); 695 #endif 696 } 697 698 static int 699 isswap(struct device *dv) 700 { 701 struct dkwedge_info wi; 702 struct vnode *vn; 703 int error; 704 705 if (device_class(dv) != DV_DISK || !device_is_a(dv, "dk")) 706 return 0; 707 708 if ((vn = opendisk(dv)) == NULL) 709 return 0; 710 711 error = VOP_IOCTL(vn, DIOCGWEDGEINFO, &wi, FREAD, NOCRED, 0); 712 VOP_CLOSE(vn, FREAD, NOCRED, 0); 713 vput(vn); 714 if (error) { 715 #ifdef DEBUG_WEDGE 716 printf("%s: Get wedge info returned %d\n", dv->dv_xname, error); 717 #endif 718 return 0; 719 } 720 return strcmp(wi.dkw_ptype, DKW_PTYPE_SWAP) == 0; 721 } 722 723 /* 724 * Determine the root device and, if instructed to, the root file system. 725 */ 726 727 #include "md.h" 728 #if NMD == 0 729 #undef MEMORY_DISK_HOOKS 730 #endif 731 732 #ifdef MEMORY_DISK_HOOKS 733 static struct device fakemdrootdev[NMD]; 734 extern struct cfdriver md_cd; 735 #endif 736 737 #ifdef MEMORY_DISK_IS_ROOT 738 #define BOOT_FROM_MEMORY_HOOKS 1 739 #endif 740 741 /* 742 * The device and wedge that we booted from. If booted_wedge is NULL, 743 * the we might consult booted_partition. 744 */ 745 struct device *booted_device; 746 struct device *booted_wedge; 747 int booted_partition; 748 749 /* 750 * Use partition letters if it's a disk class but not a wedge. 751 * XXX Check for wedge is kinda gross. 752 */ 753 #define DEV_USES_PARTITIONS(dv) \ 754 (device_class((dv)) == DV_DISK && \ 755 !device_is_a((dv), "dk")) 756 757 void 758 setroot(struct device *bootdv, int bootpartition) 759 { 760 struct device *dv; 761 int len, majdev; 762 #ifdef MEMORY_DISK_HOOKS 763 int i; 764 #endif 765 dev_t nrootdev; 766 dev_t ndumpdev = NODEV; 767 char buf[128]; 768 const char *rootdevname; 769 const char *dumpdevname; 770 struct device *rootdv = NULL; /* XXX gcc -Wuninitialized */ 771 struct device *dumpdv = NULL; 772 struct ifnet *ifp; 773 const char *deffsname; 774 struct vfsops *vops; 775 776 #ifdef TFTPROOT 777 if (tftproot_dhcpboot(bootdv) != 0) 778 boothowto |= RB_ASKNAME; 779 #endif 780 781 #ifdef MEMORY_DISK_HOOKS 782 for (i = 0; i < NMD; i++) { 783 fakemdrootdev[i].dv_class = DV_DISK; 784 fakemdrootdev[i].dv_cfdata = NULL; 785 fakemdrootdev[i].dv_cfdriver = &md_cd; 786 fakemdrootdev[i].dv_unit = i; 787 fakemdrootdev[i].dv_parent = NULL; 788 snprintf(fakemdrootdev[i].dv_xname, 789 sizeof(fakemdrootdev[i].dv_xname), "md%d", i); 790 } 791 #endif /* MEMORY_DISK_HOOKS */ 792 793 #ifdef MEMORY_DISK_IS_ROOT 794 bootdv = &fakemdrootdev[0]; 795 bootpartition = 0; 796 #endif 797 798 /* 799 * If NFS is specified as the file system, and we found 800 * a DV_DISK boot device (or no boot device at all), then 801 * find a reasonable network interface for "rootspec". 802 */ 803 vops = vfs_getopsbyname("nfs"); 804 if (vops != NULL && vops->vfs_mountroot == mountroot && 805 rootspec == NULL && 806 (bootdv == NULL || device_class(bootdv) != DV_IFNET)) { 807 IFNET_FOREACH(ifp) { 808 if ((ifp->if_flags & 809 (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0) 810 break; 811 } 812 if (ifp == NULL) { 813 /* 814 * Can't find a suitable interface; ask the 815 * user. 816 */ 817 boothowto |= RB_ASKNAME; 818 } else { 819 /* 820 * Have a suitable interface; behave as if 821 * the user specified this interface. 822 */ 823 rootspec = (const char *)ifp->if_xname; 824 } 825 } 826 827 /* 828 * If wildcarded root and we the boot device wasn't determined, 829 * ask the user. 830 */ 831 if (rootspec == NULL && bootdv == NULL) 832 boothowto |= RB_ASKNAME; 833 834 top: 835 if (boothowto & RB_ASKNAME) { 836 struct device *defdumpdv; 837 838 for (;;) { 839 printf("root device"); 840 if (bootdv != NULL) { 841 printf(" (default %s", bootdv->dv_xname); 842 if (DEV_USES_PARTITIONS(bootdv)) 843 printf("%c", bootpartition + 'a'); 844 printf(")"); 845 } 846 printf(": "); 847 len = cngetsn(buf, sizeof(buf)); 848 if (len == 0 && bootdv != NULL) { 849 strlcpy(buf, bootdv->dv_xname, sizeof(buf)); 850 len = strlen(buf); 851 } 852 if (len > 0 && buf[len - 1] == '*') { 853 buf[--len] = '\0'; 854 dv = getdisk(buf, len, 1, &nrootdev, 0); 855 if (dv != NULL) { 856 rootdv = dv; 857 break; 858 } 859 } 860 dv = getdisk(buf, len, bootpartition, &nrootdev, 0); 861 if (dv != NULL) { 862 rootdv = dv; 863 break; 864 } 865 } 866 867 /* 868 * Set up the default dump device. If root is on 869 * a network device, there is no default dump 870 * device, since we don't support dumps to the 871 * network. 872 */ 873 if (DEV_USES_PARTITIONS(rootdv) == 0) 874 defdumpdv = NULL; 875 else 876 defdumpdv = rootdv; 877 878 for (;;) { 879 printf("dump device"); 880 if (defdumpdv != NULL) { 881 /* 882 * Note, we know it's a disk if we get here. 883 */ 884 printf(" (default %sb)", defdumpdv->dv_xname); 885 } 886 printf(": "); 887 len = cngetsn(buf, sizeof(buf)); 888 if (len == 0) { 889 if (defdumpdv != NULL) { 890 ndumpdev = MAKEDISKDEV(major(nrootdev), 891 DISKUNIT(nrootdev), 1); 892 } 893 dumpdv = defdumpdv; 894 break; 895 } 896 if (len == 4 && strcmp(buf, "none") == 0) { 897 dumpdv = NULL; 898 break; 899 } 900 dv = getdisk(buf, len, 1, &ndumpdev, 1); 901 if (dv != NULL) { 902 dumpdv = dv; 903 break; 904 } 905 } 906 907 rootdev = nrootdev; 908 dumpdev = ndumpdev; 909 910 for (vops = LIST_FIRST(&vfs_list); vops != NULL; 911 vops = LIST_NEXT(vops, vfs_list)) { 912 if (vops->vfs_mountroot != NULL && 913 vops->vfs_mountroot == mountroot) 914 break; 915 } 916 917 if (vops == NULL) { 918 mountroot = NULL; 919 deffsname = "generic"; 920 } else 921 deffsname = vops->vfs_name; 922 923 for (;;) { 924 printf("file system (default %s): ", deffsname); 925 len = cngetsn(buf, sizeof(buf)); 926 if (len == 0) 927 break; 928 if (len == 4 && strcmp(buf, "halt") == 0) 929 cpu_reboot(RB_HALT, NULL); 930 else if (len == 6 && strcmp(buf, "reboot") == 0) 931 cpu_reboot(0, NULL); 932 #if defined(DDB) 933 else if (len == 3 && strcmp(buf, "ddb") == 0) { 934 console_debugger(); 935 } 936 #endif 937 else if (len == 7 && strcmp(buf, "generic") == 0) { 938 mountroot = NULL; 939 break; 940 } 941 vops = vfs_getopsbyname(buf); 942 if (vops == NULL || vops->vfs_mountroot == NULL) { 943 printf("use one of: generic"); 944 for (vops = LIST_FIRST(&vfs_list); 945 vops != NULL; 946 vops = LIST_NEXT(vops, vfs_list)) { 947 if (vops->vfs_mountroot != NULL) 948 printf(" %s", vops->vfs_name); 949 } 950 #if defined(DDB) 951 printf(" ddb"); 952 #endif 953 printf(" halt reboot\n"); 954 } else { 955 mountroot = vops->vfs_mountroot; 956 break; 957 } 958 } 959 960 } else if (rootspec == NULL) { 961 /* 962 * Wildcarded root; use the boot device. 963 */ 964 rootdv = bootdv; 965 966 majdev = devsw_name2blk(bootdv->dv_xname, NULL, 0); 967 if (majdev >= 0) { 968 /* 969 * Root is on a disk. `bootpartition' is root, 970 * unless the device does not use partitions. 971 */ 972 if (DEV_USES_PARTITIONS(bootdv)) 973 rootdev = MAKEDISKDEV(majdev, 974 device_unit(bootdv), 975 bootpartition); 976 else 977 rootdev = makedev(majdev, device_unit(bootdv)); 978 } 979 } else { 980 981 /* 982 * `root on <dev> ...' 983 */ 984 985 /* 986 * If it's a network interface, we can bail out 987 * early. 988 */ 989 dv = finddevice(rootspec); 990 if (dv != NULL && device_class(dv) == DV_IFNET) { 991 rootdv = dv; 992 goto haveroot; 993 } 994 995 if (rootdev == NODEV && 996 device_class(dv) == DV_DISK && device_is_a(dv, "dk") && 997 (majdev = devsw_name2blk(dv->dv_xname, NULL, 0)) >= 0) 998 rootdev = makedev(majdev, device_unit(dv)); 999 1000 rootdevname = devsw_blk2name(major(rootdev)); 1001 if (rootdevname == NULL) { 1002 printf("unknown device major 0x%x\n", rootdev); 1003 boothowto |= RB_ASKNAME; 1004 goto top; 1005 } 1006 memset(buf, 0, sizeof(buf)); 1007 snprintf(buf, sizeof(buf), "%s%d", rootdevname, 1008 DISKUNIT(rootdev)); 1009 1010 rootdv = finddevice(buf); 1011 if (rootdv == NULL) { 1012 printf("device %s (0x%x) not configured\n", 1013 buf, rootdev); 1014 boothowto |= RB_ASKNAME; 1015 goto top; 1016 } 1017 } 1018 1019 haveroot: 1020 1021 root_device = rootdv; 1022 1023 switch (device_class(rootdv)) { 1024 case DV_IFNET: 1025 case DV_DISK: 1026 aprint_normal("root on %s", rootdv->dv_xname); 1027 if (DEV_USES_PARTITIONS(rootdv)) 1028 aprint_normal("%c", DISKPART(rootdev) + 'a'); 1029 break; 1030 1031 default: 1032 printf("can't determine root device\n"); 1033 boothowto |= RB_ASKNAME; 1034 goto top; 1035 } 1036 1037 /* 1038 * Now configure the dump device. 1039 * 1040 * If we haven't figured out the dump device, do so, with 1041 * the following rules: 1042 * 1043 * (a) We already know dumpdv in the RB_ASKNAME case. 1044 * 1045 * (b) If dumpspec is set, try to use it. If the device 1046 * is not available, punt. 1047 * 1048 * (c) If dumpspec is not set, the dump device is 1049 * wildcarded or unspecified. If the root device 1050 * is DV_IFNET, punt. Otherwise, use partition b 1051 * of the root device. 1052 */ 1053 1054 if (boothowto & RB_ASKNAME) { /* (a) */ 1055 if (dumpdv == NULL) 1056 goto nodumpdev; 1057 } else if (dumpspec != NULL) { /* (b) */ 1058 if (strcmp(dumpspec, "none") == 0 || dumpdev == NODEV) { 1059 /* 1060 * Operator doesn't want a dump device. 1061 * Or looks like they tried to pick a network 1062 * device. Oops. 1063 */ 1064 goto nodumpdev; 1065 } 1066 1067 dumpdevname = devsw_blk2name(major(dumpdev)); 1068 if (dumpdevname == NULL) 1069 goto nodumpdev; 1070 memset(buf, 0, sizeof(buf)); 1071 snprintf(buf, sizeof(buf), "%s%d", dumpdevname, 1072 DISKUNIT(dumpdev)); 1073 1074 dumpdv = finddevice(buf); 1075 if (dumpdv == NULL) { 1076 /* 1077 * Device not configured. 1078 */ 1079 goto nodumpdev; 1080 } 1081 } else { /* (c) */ 1082 if (DEV_USES_PARTITIONS(rootdv) == 0) { 1083 for (dv = TAILQ_FIRST(&alldevs); dv != NULL; 1084 dv = TAILQ_NEXT(dv, dv_list)) 1085 if (isswap(dv)) 1086 break; 1087 if (dv == NULL) 1088 goto nodumpdev; 1089 1090 majdev = devsw_name2blk(dv->dv_xname, NULL, 0); 1091 if (majdev < 0) 1092 goto nodumpdev; 1093 dumpdv = dv; 1094 dumpdev = makedev(majdev, device_unit(dumpdv)); 1095 } else { 1096 dumpdv = rootdv; 1097 dumpdev = MAKEDISKDEV(major(rootdev), 1098 device_unit(dumpdv), 1); 1099 } 1100 } 1101 1102 aprint_normal(" dumps on %s", dumpdv->dv_xname); 1103 if (DEV_USES_PARTITIONS(dumpdv)) 1104 aprint_normal("%c", DISKPART(dumpdev) + 'a'); 1105 aprint_normal("\n"); 1106 return; 1107 1108 nodumpdev: 1109 dumpdev = NODEV; 1110 aprint_normal("\n"); 1111 } 1112 1113 static struct device * 1114 finddevice(const char *name) 1115 { 1116 const char *wname; 1117 struct device *dv; 1118 #if defined(BOOT_FROM_MEMORY_HOOKS) 1119 int j; 1120 #endif /* BOOT_FROM_MEMORY_HOOKS */ 1121 1122 if ((wname = getwedgename(name, strlen(name))) != NULL) 1123 return dkwedge_find_by_wname(wname); 1124 1125 #ifdef BOOT_FROM_MEMORY_HOOKS 1126 for (j = 0; j < NMD; j++) { 1127 if (strcmp(name, fakemdrootdev[j].dv_xname) == 0) 1128 return &fakemdrootdev[j]; 1129 } 1130 #endif /* BOOT_FROM_MEMORY_HOOKS */ 1131 1132 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1133 if (strcmp(dv->dv_xname, name) == 0) 1134 break; 1135 } 1136 return dv; 1137 } 1138 1139 static struct device * 1140 getdisk(char *str, int len, int defpart, dev_t *devp, int isdump) 1141 { 1142 struct device *dv; 1143 #ifdef MEMORY_DISK_HOOKS 1144 int i; 1145 #endif 1146 1147 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) { 1148 printf("use one of:"); 1149 #ifdef MEMORY_DISK_HOOKS 1150 if (isdump == 0) 1151 for (i = 0; i < NMD; i++) 1152 printf(" %s[a-%c]", fakemdrootdev[i].dv_xname, 1153 'a' + MAXPARTITIONS - 1); 1154 #endif 1155 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1156 if (DEV_USES_PARTITIONS(dv)) 1157 printf(" %s[a-%c]", dv->dv_xname, 1158 'a' + MAXPARTITIONS - 1); 1159 else if (device_class(dv) == DV_DISK) 1160 printf(" %s", dv->dv_xname); 1161 if (isdump == 0 && device_class(dv) == DV_IFNET) 1162 printf(" %s", dv->dv_xname); 1163 } 1164 dkwedge_print_wnames(); 1165 if (isdump) 1166 printf(" none"); 1167 #if defined(DDB) 1168 printf(" ddb"); 1169 #endif 1170 printf(" halt reboot\n"); 1171 } 1172 return dv; 1173 } 1174 1175 static const char * 1176 getwedgename(const char *name, int namelen) 1177 { 1178 const char *wpfx = "wedge:"; 1179 const int wpfxlen = strlen(wpfx); 1180 1181 if (namelen < wpfxlen || strncmp(name, wpfx, wpfxlen) != 0) 1182 return NULL; 1183 1184 return name + wpfxlen; 1185 } 1186 1187 static struct device * 1188 parsedisk(char *str, int len, int defpart, dev_t *devp) 1189 { 1190 struct device *dv; 1191 const char *wname; 1192 char *cp, c; 1193 int majdev, part; 1194 #ifdef MEMORY_DISK_HOOKS 1195 int i; 1196 #endif 1197 if (len == 0) 1198 return (NULL); 1199 1200 if (len == 4 && strcmp(str, "halt") == 0) 1201 cpu_reboot(RB_HALT, NULL); 1202 else if (len == 6 && strcmp(str, "reboot") == 0) 1203 cpu_reboot(0, NULL); 1204 #if defined(DDB) 1205 else if (len == 3 && strcmp(str, "ddb") == 0) 1206 console_debugger(); 1207 #endif 1208 1209 cp = str + len - 1; 1210 c = *cp; 1211 1212 if ((wname = getwedgename(str, len)) != NULL) { 1213 if ((dv = dkwedge_find_by_wname(wname)) == NULL) 1214 return NULL; 1215 part = defpart; 1216 goto gotdisk; 1217 } else if (c >= 'a' && c <= ('a' + MAXPARTITIONS - 1)) { 1218 part = c - 'a'; 1219 *cp = '\0'; 1220 } else 1221 part = defpart; 1222 1223 #ifdef MEMORY_DISK_HOOKS 1224 for (i = 0; i < NMD; i++) 1225 if (strcmp(str, fakemdrootdev[i].dv_xname) == 0) { 1226 dv = &fakemdrootdev[i]; 1227 goto gotdisk; 1228 } 1229 #endif 1230 1231 dv = finddevice(str); 1232 if (dv != NULL) { 1233 if (device_class(dv) == DV_DISK) { 1234 gotdisk: 1235 majdev = devsw_name2blk(dv->dv_xname, NULL, 0); 1236 if (majdev < 0) 1237 panic("parsedisk"); 1238 if (DEV_USES_PARTITIONS(dv)) 1239 *devp = MAKEDISKDEV(majdev, device_unit(dv), 1240 part); 1241 else 1242 *devp = makedev(majdev, device_unit(dv)); 1243 } 1244 1245 if (device_class(dv) == DV_IFNET) 1246 *devp = NODEV; 1247 } 1248 1249 *cp = c; 1250 return (dv); 1251 } 1252 1253 /* 1254 * snprintf() `bytes' into `buf', reformatting it so that the number, 1255 * plus a possible `x' + suffix extension) fits into len bytes (including 1256 * the terminating NUL). 1257 * Returns the number of bytes stored in buf, or -1 if there was a problem. 1258 * E.g, given a len of 9 and a suffix of `B': 1259 * bytes result 1260 * ----- ------ 1261 * 99999 `99999 B' 1262 * 100000 `97 kB' 1263 * 66715648 `65152 kB' 1264 * 252215296 `240 MB' 1265 */ 1266 int 1267 humanize_number(char *buf, size_t len, uint64_t bytes, const char *suffix, 1268 int divisor) 1269 { 1270 /* prefixes are: (none), kilo, Mega, Giga, Tera, Peta, Exa */ 1271 const char *prefixes; 1272 int r; 1273 uint64_t umax; 1274 size_t i, suffixlen; 1275 1276 if (buf == NULL || suffix == NULL) 1277 return (-1); 1278 if (len > 0) 1279 buf[0] = '\0'; 1280 suffixlen = strlen(suffix); 1281 /* check if enough room for `x y' + suffix + `\0' */ 1282 if (len < 4 + suffixlen) 1283 return (-1); 1284 1285 if (divisor == 1024) { 1286 /* 1287 * binary multiplies 1288 * XXX IEC 60027-2 recommends Ki, Mi, Gi... 1289 */ 1290 prefixes = " KMGTPE"; 1291 } else 1292 prefixes = " kMGTPE"; /* SI for decimal multiplies */ 1293 1294 umax = 1; 1295 for (i = 0; i < len - suffixlen - 3; i++) 1296 umax *= 10; 1297 for (i = 0; bytes >= umax && prefixes[i + 1]; i++) 1298 bytes /= divisor; 1299 1300 r = snprintf(buf, len, "%qu%s%c%s", (unsigned long long)bytes, 1301 i == 0 ? "" : " ", prefixes[i], suffix); 1302 1303 return (r); 1304 } 1305 1306 int 1307 format_bytes(char *buf, size_t len, uint64_t bytes) 1308 { 1309 int rv; 1310 size_t nlen; 1311 1312 rv = humanize_number(buf, len, bytes, "B", 1024); 1313 if (rv != -1) { 1314 /* nuke the trailing ` B' if it exists */ 1315 nlen = strlen(buf) - 2; 1316 if (strcmp(&buf[nlen], " B") == 0) 1317 buf[nlen] = '\0'; 1318 } 1319 return (rv); 1320 } 1321 1322 /* 1323 * Return true if system call tracing is enabled for the specified process. 1324 */ 1325 bool 1326 trace_is_enabled(struct proc *p) 1327 { 1328 #ifdef SYSCALL_DEBUG 1329 return (true); 1330 #endif 1331 #ifdef KTRACE 1332 if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET))) 1333 return (true); 1334 #endif 1335 #ifdef SYSTRACE 1336 if (ISSET(p->p_flag, PK_SYSTRACE)) 1337 return (true); 1338 #endif 1339 #ifdef PTRACE 1340 if (ISSET(p->p_slflag, PSL_SYSCALL)) 1341 return (true); 1342 #endif 1343 1344 return (false); 1345 } 1346 1347 /* 1348 * Start trace of particular system call. If process is being traced, 1349 * this routine is called by MD syscall dispatch code just before 1350 * a system call is actually executed. 1351 * MD caller guarantees the passed 'code' is within the supported 1352 * system call number range for emulation the process runs under. 1353 */ 1354 int 1355 trace_enter(struct lwp *l, register_t code, 1356 register_t realcode, const struct sysent *callp, void *args) 1357 { 1358 #if defined(SYSCALL_DEBUG) || defined(KTRACE) || defined(PTRACE) || defined(SYSTRACE) 1359 struct proc *p = l->l_proc; 1360 1361 #ifdef SYSCALL_DEBUG 1362 scdebug_call(l, code, args); 1363 #endif /* SYSCALL_DEBUG */ 1364 1365 ktrsyscall(code, realcode, callp, args); 1366 1367 #ifdef PTRACE 1368 if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED)) == 1369 (PSL_SYSCALL|PSL_TRACED)) 1370 process_stoptrace(l); 1371 #endif 1372 1373 #ifdef SYSTRACE 1374 if (ISSET(p->p_flag, PK_SYSTRACE)) { 1375 int error; 1376 KERNEL_LOCK(1, l); 1377 error = systrace_enter(l, code, args); 1378 KERNEL_UNLOCK_ONE(l); 1379 return error; 1380 } 1381 #endif 1382 #endif /* SYSCALL_DEBUG || {K,P,SYS}TRACE */ 1383 return 0; 1384 } 1385 1386 /* 1387 * End trace of particular system call. If process is being traced, 1388 * this routine is called by MD syscall dispatch code just after 1389 * a system call finishes. 1390 * MD caller guarantees the passed 'code' is within the supported 1391 * system call number range for emulation the process runs under. 1392 */ 1393 void 1394 trace_exit(struct lwp *l, register_t code, void *args, register_t rval[], 1395 int error) 1396 { 1397 #if defined(SYSCALL_DEBUG) || defined(KTRACE) || defined(PTRACE) || defined(SYSTRACE) 1398 struct proc *p = l->l_proc; 1399 1400 #ifdef SYSCALL_DEBUG 1401 scdebug_ret(l, code, error, rval); 1402 #endif /* SYSCALL_DEBUG */ 1403 1404 ktrsysret(code, error, rval); 1405 1406 #ifdef PTRACE 1407 if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED)) == 1408 (PSL_SYSCALL|PSL_TRACED)) 1409 process_stoptrace(l); 1410 #endif 1411 1412 #ifdef SYSTRACE 1413 if (ISSET(p->p_flag, PK_SYSTRACE)) { 1414 KERNEL_LOCK(1, l); 1415 systrace_exit(l, code, args, rval, error); 1416 KERNEL_UNLOCK_ONE(l); 1417 } 1418 #endif 1419 #endif /* SYSCALL_DEBUG || {K,P,SYS}TRACE */ 1420 } 1421