1 /* $NetBSD: kern_subr.c,v 1.166 2007/11/26 19:02:02 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 1999, 2002, 2007, 2006 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and by Luke Mewburn. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Copyright (c) 1992, 1993 50 * The Regents of the University of California. All rights reserved. 51 * 52 * This software was developed by the Computer Systems Engineering group 53 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 54 * contributed to Berkeley. 55 * 56 * All advertising materials mentioning features or use of this software 57 * must display the following acknowledgement: 58 * This product includes software developed by the University of 59 * California, Lawrence Berkeley Laboratory. 60 * 61 * Redistribution and use in source and binary forms, with or without 62 * modification, are permitted provided that the following conditions 63 * are met: 64 * 1. Redistributions of source code must retain the above copyright 65 * notice, this list of conditions and the following disclaimer. 66 * 2. Redistributions in binary form must reproduce the above copyright 67 * notice, this list of conditions and the following disclaimer in the 68 * documentation and/or other materials provided with the distribution. 69 * 3. Neither the name of the University nor the names of its contributors 70 * may be used to endorse or promote products derived from this software 71 * without specific prior written permission. 72 * 73 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 76 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 83 * SUCH DAMAGE. 84 * 85 * @(#)kern_subr.c 8.4 (Berkeley) 2/14/95 86 */ 87 88 #include <sys/cdefs.h> 89 __KERNEL_RCSID(0, "$NetBSD: kern_subr.c,v 1.166 2007/11/26 19:02:02 pooka Exp $"); 90 91 #include "opt_ddb.h" 92 #include "opt_md.h" 93 #include "opt_syscall_debug.h" 94 #include "opt_ktrace.h" 95 #include "opt_ptrace.h" 96 #include "opt_systrace.h" 97 #include "opt_powerhook.h" 98 #include "opt_tftproot.h" 99 100 #include <sys/param.h> 101 #include <sys/systm.h> 102 #include <sys/proc.h> 103 #include <sys/malloc.h> 104 #include <sys/mount.h> 105 #include <sys/device.h> 106 #include <sys/reboot.h> 107 #include <sys/conf.h> 108 #include <sys/disk.h> 109 #include <sys/disklabel.h> 110 #include <sys/queue.h> 111 #include <sys/systrace.h> 112 #include <sys/ktrace.h> 113 #include <sys/ptrace.h> 114 #include <sys/fcntl.h> 115 #include <sys/kauth.h> 116 #include <sys/vnode.h> 117 118 #include <uvm/uvm_extern.h> 119 120 #include <dev/cons.h> 121 122 #include <net/if.h> 123 124 /* XXX these should eventually move to subr_autoconf.c */ 125 static struct device *finddevice(const char *); 126 static struct device *getdisk(char *, int, int, dev_t *, int); 127 static struct device *parsedisk(char *, int, int, dev_t *); 128 static const char *getwedgename(const char *, int); 129 130 /* 131 * A generic linear hook. 132 */ 133 struct hook_desc { 134 LIST_ENTRY(hook_desc) hk_list; 135 void (*hk_fn)(void *); 136 void *hk_arg; 137 }; 138 typedef LIST_HEAD(, hook_desc) hook_list_t; 139 140 MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 141 142 #ifdef TFTPROOT 143 int tftproot_dhcpboot(struct device *); 144 #endif 145 146 void 147 uio_setup_sysspace(struct uio *uio) 148 { 149 150 uio->uio_vmspace = vmspace_kernel(); 151 } 152 153 int 154 uiomove(void *buf, size_t n, struct uio *uio) 155 { 156 struct vmspace *vm = uio->uio_vmspace; 157 struct iovec *iov; 158 u_int cnt; 159 int error = 0; 160 size_t on; 161 char *cp = buf; 162 #ifdef MULTIPROCESSOR 163 int hold_count; 164 #endif 165 166 if ((on = n) >= 1024) { 167 KERNEL_UNLOCK_ALL(NULL, &hold_count); 168 } 169 170 ASSERT_SLEEPABLE(NULL, "uiomove"); 171 172 #ifdef DIAGNOSTIC 173 if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE) 174 panic("uiomove: mode"); 175 #endif 176 while (n > 0 && uio->uio_resid) { 177 iov = uio->uio_iov; 178 cnt = iov->iov_len; 179 if (cnt == 0) { 180 KASSERT(uio->uio_iovcnt > 0); 181 uio->uio_iov++; 182 uio->uio_iovcnt--; 183 continue; 184 } 185 if (cnt > n) 186 cnt = n; 187 if (!VMSPACE_IS_KERNEL_P(vm)) { 188 if (curcpu()->ci_schedstate.spc_flags & 189 SPCF_SHOULDYIELD) 190 preempt(); 191 } 192 193 if (uio->uio_rw == UIO_READ) { 194 error = copyout_vmspace(vm, cp, iov->iov_base, 195 cnt); 196 } else { 197 error = copyin_vmspace(vm, iov->iov_base, cp, 198 cnt); 199 } 200 if (error) { 201 break; 202 } 203 iov->iov_base = (char *)iov->iov_base + cnt; 204 iov->iov_len -= cnt; 205 uio->uio_resid -= cnt; 206 uio->uio_offset += cnt; 207 cp += cnt; 208 KDASSERT(cnt <= n); 209 n -= cnt; 210 } 211 212 if (on >= 1024) { 213 KERNEL_LOCK(hold_count, NULL); 214 } 215 return (error); 216 } 217 218 /* 219 * Wrapper for uiomove() that validates the arguments against a known-good 220 * kernel buffer. 221 */ 222 int 223 uiomove_frombuf(void *buf, size_t buflen, struct uio *uio) 224 { 225 size_t offset; 226 227 if (uio->uio_offset < 0 || /* uio->uio_resid < 0 || */ 228 (offset = uio->uio_offset) != uio->uio_offset) 229 return (EINVAL); 230 if (offset >= buflen) 231 return (0); 232 return (uiomove((char *)buf + offset, buflen - offset, uio)); 233 } 234 235 /* 236 * Give next character to user as result of read. 237 */ 238 int 239 ureadc(int c, struct uio *uio) 240 { 241 struct iovec *iov; 242 243 if (uio->uio_resid <= 0) 244 panic("ureadc: non-positive resid"); 245 again: 246 if (uio->uio_iovcnt <= 0) 247 panic("ureadc: non-positive iovcnt"); 248 iov = uio->uio_iov; 249 if (iov->iov_len <= 0) { 250 uio->uio_iovcnt--; 251 uio->uio_iov++; 252 goto again; 253 } 254 if (!VMSPACE_IS_KERNEL_P(uio->uio_vmspace)) { 255 if (subyte(iov->iov_base, c) < 0) 256 return (EFAULT); 257 } else { 258 *(char *)iov->iov_base = c; 259 } 260 iov->iov_base = (char *)iov->iov_base + 1; 261 iov->iov_len--; 262 uio->uio_resid--; 263 uio->uio_offset++; 264 return (0); 265 } 266 267 /* 268 * Like copyin(), but operates on an arbitrary vmspace. 269 */ 270 int 271 copyin_vmspace(struct vmspace *vm, const void *uaddr, void *kaddr, size_t len) 272 { 273 struct iovec iov; 274 struct uio uio; 275 int error; 276 277 if (len == 0) 278 return (0); 279 280 if (VMSPACE_IS_KERNEL_P(vm)) { 281 return kcopy(uaddr, kaddr, len); 282 } 283 if (__predict_true(vm == curproc->p_vmspace)) { 284 return copyin(uaddr, kaddr, len); 285 } 286 287 iov.iov_base = kaddr; 288 iov.iov_len = len; 289 uio.uio_iov = &iov; 290 uio.uio_iovcnt = 1; 291 uio.uio_offset = (off_t)(intptr_t)uaddr; 292 uio.uio_resid = len; 293 uio.uio_rw = UIO_READ; 294 UIO_SETUP_SYSSPACE(&uio); 295 error = uvm_io(&vm->vm_map, &uio); 296 297 return (error); 298 } 299 300 /* 301 * Like copyout(), but operates on an arbitrary vmspace. 302 */ 303 int 304 copyout_vmspace(struct vmspace *vm, const void *kaddr, void *uaddr, size_t len) 305 { 306 struct iovec iov; 307 struct uio uio; 308 int error; 309 310 if (len == 0) 311 return (0); 312 313 if (VMSPACE_IS_KERNEL_P(vm)) { 314 return kcopy(kaddr, uaddr, len); 315 } 316 if (__predict_true(vm == curproc->p_vmspace)) { 317 return copyout(kaddr, uaddr, len); 318 } 319 320 iov.iov_base = __UNCONST(kaddr); /* XXXUNCONST cast away const */ 321 iov.iov_len = len; 322 uio.uio_iov = &iov; 323 uio.uio_iovcnt = 1; 324 uio.uio_offset = (off_t)(intptr_t)uaddr; 325 uio.uio_resid = len; 326 uio.uio_rw = UIO_WRITE; 327 UIO_SETUP_SYSSPACE(&uio); 328 error = uvm_io(&vm->vm_map, &uio); 329 330 return (error); 331 } 332 333 /* 334 * Like copyin(), but operates on an arbitrary process. 335 */ 336 int 337 copyin_proc(struct proc *p, const void *uaddr, void *kaddr, size_t len) 338 { 339 struct vmspace *vm; 340 int error; 341 342 error = proc_vmspace_getref(p, &vm); 343 if (error) { 344 return error; 345 } 346 error = copyin_vmspace(vm, uaddr, kaddr, len); 347 uvmspace_free(vm); 348 349 return error; 350 } 351 352 /* 353 * Like copyout(), but operates on an arbitrary process. 354 */ 355 int 356 copyout_proc(struct proc *p, const void *kaddr, void *uaddr, size_t len) 357 { 358 struct vmspace *vm; 359 int error; 360 361 error = proc_vmspace_getref(p, &vm); 362 if (error) { 363 return error; 364 } 365 error = copyout_vmspace(vm, kaddr, uaddr, len); 366 uvmspace_free(vm); 367 368 return error; 369 } 370 371 /* 372 * Like copyin(), except it operates on kernel addresses when the FKIOCTL 373 * flag is passed in `ioctlflags' from the ioctl call. 374 */ 375 int 376 ioctl_copyin(int ioctlflags, const void *src, void *dst, size_t len) 377 { 378 if (ioctlflags & FKIOCTL) 379 return kcopy(src, dst, len); 380 return copyin(src, dst, len); 381 } 382 383 /* 384 * Like copyout(), except it operates on kernel addresses when the FKIOCTL 385 * flag is passed in `ioctlflags' from the ioctl call. 386 */ 387 int 388 ioctl_copyout(int ioctlflags, const void *src, void *dst, size_t len) 389 { 390 if (ioctlflags & FKIOCTL) 391 return kcopy(src, dst, len); 392 return copyout(src, dst, len); 393 } 394 395 static void * 396 hook_establish(hook_list_t *list, void (*fn)(void *), void *arg) 397 { 398 struct hook_desc *hd; 399 400 hd = malloc(sizeof(*hd), M_DEVBUF, M_NOWAIT); 401 if (hd == NULL) 402 return (NULL); 403 404 hd->hk_fn = fn; 405 hd->hk_arg = arg; 406 LIST_INSERT_HEAD(list, hd, hk_list); 407 408 return (hd); 409 } 410 411 static void 412 hook_disestablish(hook_list_t *list, void *vhook) 413 { 414 #ifdef DIAGNOSTIC 415 struct hook_desc *hd; 416 417 LIST_FOREACH(hd, list, hk_list) { 418 if (hd == vhook) 419 break; 420 } 421 422 if (hd == NULL) 423 panic("hook_disestablish: hook %p not established", vhook); 424 #endif 425 LIST_REMOVE((struct hook_desc *)vhook, hk_list); 426 free(vhook, M_DEVBUF); 427 } 428 429 static void 430 hook_destroy(hook_list_t *list) 431 { 432 struct hook_desc *hd; 433 434 while ((hd = LIST_FIRST(list)) != NULL) { 435 LIST_REMOVE(hd, hk_list); 436 free(hd, M_DEVBUF); 437 } 438 } 439 440 static void 441 hook_proc_run(hook_list_t *list, struct proc *p) 442 { 443 struct hook_desc *hd; 444 445 for (hd = LIST_FIRST(list); hd != NULL; hd = LIST_NEXT(hd, hk_list)) { 446 ((void (*)(struct proc *, void *))*hd->hk_fn)(p, 447 hd->hk_arg); 448 } 449 } 450 451 /* 452 * "Shutdown hook" types, functions, and variables. 453 * 454 * Should be invoked immediately before the 455 * system is halted or rebooted, i.e. after file systems unmounted, 456 * after crash dump done, etc. 457 * 458 * Each shutdown hook is removed from the list before it's run, so that 459 * it won't be run again. 460 */ 461 462 static hook_list_t shutdownhook_list; 463 464 void * 465 shutdownhook_establish(void (*fn)(void *), void *arg) 466 { 467 return hook_establish(&shutdownhook_list, fn, arg); 468 } 469 470 void 471 shutdownhook_disestablish(void *vhook) 472 { 473 hook_disestablish(&shutdownhook_list, vhook); 474 } 475 476 /* 477 * Run shutdown hooks. Should be invoked immediately before the 478 * system is halted or rebooted, i.e. after file systems unmounted, 479 * after crash dump done, etc. 480 * 481 * Each shutdown hook is removed from the list before it's run, so that 482 * it won't be run again. 483 */ 484 void 485 doshutdownhooks(void) 486 { 487 struct hook_desc *dp; 488 489 while ((dp = LIST_FIRST(&shutdownhook_list)) != NULL) { 490 LIST_REMOVE(dp, hk_list); 491 (*dp->hk_fn)(dp->hk_arg); 492 #if 0 493 /* 494 * Don't bother freeing the hook structure,, since we may 495 * be rebooting because of a memory corruption problem, 496 * and this might only make things worse. It doesn't 497 * matter, anyway, since the system is just about to 498 * reboot. 499 */ 500 free(dp, M_DEVBUF); 501 #endif 502 } 503 } 504 505 /* 506 * "Mountroot hook" types, functions, and variables. 507 */ 508 509 static hook_list_t mountroothook_list; 510 511 void * 512 mountroothook_establish(void (*fn)(struct device *), struct device *dev) 513 { 514 return hook_establish(&mountroothook_list, (void (*)(void *))fn, dev); 515 } 516 517 void 518 mountroothook_disestablish(void *vhook) 519 { 520 hook_disestablish(&mountroothook_list, vhook); 521 } 522 523 void 524 mountroothook_destroy(void) 525 { 526 hook_destroy(&mountroothook_list); 527 } 528 529 void 530 domountroothook(void) 531 { 532 struct hook_desc *hd; 533 534 LIST_FOREACH(hd, &mountroothook_list, hk_list) { 535 if (hd->hk_arg == (void *)root_device) { 536 (*hd->hk_fn)(hd->hk_arg); 537 return; 538 } 539 } 540 } 541 542 static hook_list_t exechook_list; 543 544 void * 545 exechook_establish(void (*fn)(struct proc *, void *), void *arg) 546 { 547 return hook_establish(&exechook_list, (void (*)(void *))fn, arg); 548 } 549 550 void 551 exechook_disestablish(void *vhook) 552 { 553 hook_disestablish(&exechook_list, vhook); 554 } 555 556 /* 557 * Run exec hooks. 558 */ 559 void 560 doexechooks(struct proc *p) 561 { 562 hook_proc_run(&exechook_list, p); 563 } 564 565 static hook_list_t exithook_list; 566 567 void * 568 exithook_establish(void (*fn)(struct proc *, void *), void *arg) 569 { 570 return hook_establish(&exithook_list, (void (*)(void *))fn, arg); 571 } 572 573 void 574 exithook_disestablish(void *vhook) 575 { 576 hook_disestablish(&exithook_list, vhook); 577 } 578 579 /* 580 * Run exit hooks. 581 */ 582 void 583 doexithooks(struct proc *p) 584 { 585 hook_proc_run(&exithook_list, p); 586 } 587 588 static hook_list_t forkhook_list; 589 590 void * 591 forkhook_establish(void (*fn)(struct proc *, struct proc *)) 592 { 593 return hook_establish(&forkhook_list, (void (*)(void *))fn, NULL); 594 } 595 596 void 597 forkhook_disestablish(void *vhook) 598 { 599 hook_disestablish(&forkhook_list, vhook); 600 } 601 602 /* 603 * Run fork hooks. 604 */ 605 void 606 doforkhooks(struct proc *p2, struct proc *p1) 607 { 608 struct hook_desc *hd; 609 610 LIST_FOREACH(hd, &forkhook_list, hk_list) { 611 ((void (*)(struct proc *, struct proc *))*hd->hk_fn) 612 (p2, p1); 613 } 614 } 615 616 /* 617 * "Power hook" types, functions, and variables. 618 * The list of power hooks is kept ordered with the last registered hook 619 * first. 620 * When running the hooks on power down the hooks are called in reverse 621 * registration order, when powering up in registration order. 622 */ 623 struct powerhook_desc { 624 CIRCLEQ_ENTRY(powerhook_desc) sfd_list; 625 void (*sfd_fn)(int, void *); 626 void *sfd_arg; 627 char sfd_name[16]; 628 }; 629 630 static CIRCLEQ_HEAD(, powerhook_desc) powerhook_list = 631 CIRCLEQ_HEAD_INITIALIZER(powerhook_list); 632 633 void * 634 powerhook_establish(const char *name, void (*fn)(int, void *), void *arg) 635 { 636 struct powerhook_desc *ndp; 637 638 ndp = (struct powerhook_desc *) 639 malloc(sizeof(*ndp), M_DEVBUF, M_NOWAIT); 640 if (ndp == NULL) 641 return (NULL); 642 643 ndp->sfd_fn = fn; 644 ndp->sfd_arg = arg; 645 strlcpy(ndp->sfd_name, name, sizeof(ndp->sfd_name)); 646 CIRCLEQ_INSERT_HEAD(&powerhook_list, ndp, sfd_list); 647 648 return (ndp); 649 } 650 651 void 652 powerhook_disestablish(void *vhook) 653 { 654 #ifdef DIAGNOSTIC 655 struct powerhook_desc *dp; 656 657 CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list) 658 if (dp == vhook) 659 goto found; 660 panic("powerhook_disestablish: hook %p not established", vhook); 661 found: 662 #endif 663 664 CIRCLEQ_REMOVE(&powerhook_list, (struct powerhook_desc *)vhook, 665 sfd_list); 666 free(vhook, M_DEVBUF); 667 } 668 669 /* 670 * Run power hooks. 671 */ 672 void 673 dopowerhooks(int why) 674 { 675 struct powerhook_desc *dp; 676 677 #ifdef POWERHOOK_DEBUG 678 const char *why_name; 679 static const char * pwr_names[] = {PWR_NAMES}; 680 why_name = why < __arraycount(pwr_names) ? pwr_names[why] : "???"; 681 #endif 682 683 if (why == PWR_RESUME || why == PWR_SOFTRESUME) { 684 CIRCLEQ_FOREACH_REVERSE(dp, &powerhook_list, sfd_list) { 685 #ifdef POWERHOOK_DEBUG 686 printf("dopowerhooks %s: %s (%p)\n", why_name, dp->sfd_name, dp); 687 #endif 688 (*dp->sfd_fn)(why, dp->sfd_arg); 689 } 690 } else { 691 CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list) { 692 #ifdef POWERHOOK_DEBUG 693 printf("dopowerhooks %s: %s (%p)\n", why_name, dp->sfd_name, dp); 694 #endif 695 (*dp->sfd_fn)(why, dp->sfd_arg); 696 } 697 } 698 699 #ifdef POWERHOOK_DEBUG 700 printf("dopowerhooks: %s done\n", why_name); 701 #endif 702 } 703 704 static int 705 isswap(struct device *dv) 706 { 707 struct dkwedge_info wi; 708 struct vnode *vn; 709 int error; 710 711 if (device_class(dv) != DV_DISK || !device_is_a(dv, "dk")) 712 return 0; 713 714 if ((vn = opendisk(dv)) == NULL) 715 return 0; 716 717 error = VOP_IOCTL(vn, DIOCGWEDGEINFO, &wi, FREAD, NOCRED); 718 VOP_CLOSE(vn, FREAD, NOCRED); 719 vput(vn); 720 if (error) { 721 #ifdef DEBUG_WEDGE 722 printf("%s: Get wedge info returned %d\n", dv->dv_xname, error); 723 #endif 724 return 0; 725 } 726 return strcmp(wi.dkw_ptype, DKW_PTYPE_SWAP) == 0; 727 } 728 729 /* 730 * Determine the root device and, if instructed to, the root file system. 731 */ 732 733 #include "md.h" 734 #if NMD == 0 735 #undef MEMORY_DISK_HOOKS 736 #endif 737 738 #ifdef MEMORY_DISK_HOOKS 739 static struct device fakemdrootdev[NMD]; 740 extern struct cfdriver md_cd; 741 #endif 742 743 #ifdef MEMORY_DISK_IS_ROOT 744 #define BOOT_FROM_MEMORY_HOOKS 1 745 #endif 746 747 /* 748 * The device and wedge that we booted from. If booted_wedge is NULL, 749 * the we might consult booted_partition. 750 */ 751 struct device *booted_device; 752 struct device *booted_wedge; 753 int booted_partition; 754 755 /* 756 * Use partition letters if it's a disk class but not a wedge. 757 * XXX Check for wedge is kinda gross. 758 */ 759 #define DEV_USES_PARTITIONS(dv) \ 760 (device_class((dv)) == DV_DISK && \ 761 !device_is_a((dv), "dk")) 762 763 void 764 setroot(struct device *bootdv, int bootpartition) 765 { 766 struct device *dv; 767 int len, majdev; 768 #ifdef MEMORY_DISK_HOOKS 769 int i; 770 #endif 771 dev_t nrootdev; 772 dev_t ndumpdev = NODEV; 773 char buf[128]; 774 const char *rootdevname; 775 const char *dumpdevname; 776 struct device *rootdv = NULL; /* XXX gcc -Wuninitialized */ 777 struct device *dumpdv = NULL; 778 struct ifnet *ifp; 779 const char *deffsname; 780 struct vfsops *vops; 781 782 #ifdef TFTPROOT 783 if (tftproot_dhcpboot(bootdv) != 0) 784 boothowto |= RB_ASKNAME; 785 #endif 786 787 #ifdef MEMORY_DISK_HOOKS 788 for (i = 0; i < NMD; i++) { 789 fakemdrootdev[i].dv_class = DV_DISK; 790 fakemdrootdev[i].dv_cfdata = NULL; 791 fakemdrootdev[i].dv_cfdriver = &md_cd; 792 fakemdrootdev[i].dv_unit = i; 793 fakemdrootdev[i].dv_parent = NULL; 794 snprintf(fakemdrootdev[i].dv_xname, 795 sizeof(fakemdrootdev[i].dv_xname), "md%d", i); 796 } 797 #endif /* MEMORY_DISK_HOOKS */ 798 799 #ifdef MEMORY_DISK_IS_ROOT 800 bootdv = &fakemdrootdev[0]; 801 bootpartition = 0; 802 #endif 803 804 /* 805 * If NFS is specified as the file system, and we found 806 * a DV_DISK boot device (or no boot device at all), then 807 * find a reasonable network interface for "rootspec". 808 */ 809 vops = vfs_getopsbyname("nfs"); 810 if (vops != NULL && vops->vfs_mountroot == mountroot && 811 rootspec == NULL && 812 (bootdv == NULL || device_class(bootdv) != DV_IFNET)) { 813 IFNET_FOREACH(ifp) { 814 if ((ifp->if_flags & 815 (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0) 816 break; 817 } 818 if (ifp == NULL) { 819 /* 820 * Can't find a suitable interface; ask the 821 * user. 822 */ 823 boothowto |= RB_ASKNAME; 824 } else { 825 /* 826 * Have a suitable interface; behave as if 827 * the user specified this interface. 828 */ 829 rootspec = (const char *)ifp->if_xname; 830 } 831 } 832 if (vops != NULL) 833 vfs_delref(vops); 834 835 /* 836 * If wildcarded root and we the boot device wasn't determined, 837 * ask the user. 838 */ 839 if (rootspec == NULL && bootdv == NULL) 840 boothowto |= RB_ASKNAME; 841 842 top: 843 if (boothowto & RB_ASKNAME) { 844 struct device *defdumpdv; 845 846 for (;;) { 847 printf("root device"); 848 if (bootdv != NULL) { 849 printf(" (default %s", bootdv->dv_xname); 850 if (DEV_USES_PARTITIONS(bootdv)) 851 printf("%c", bootpartition + 'a'); 852 printf(")"); 853 } 854 printf(": "); 855 len = cngetsn(buf, sizeof(buf)); 856 if (len == 0 && bootdv != NULL) { 857 strlcpy(buf, bootdv->dv_xname, sizeof(buf)); 858 len = strlen(buf); 859 } 860 if (len > 0 && buf[len - 1] == '*') { 861 buf[--len] = '\0'; 862 dv = getdisk(buf, len, 1, &nrootdev, 0); 863 if (dv != NULL) { 864 rootdv = dv; 865 break; 866 } 867 } 868 dv = getdisk(buf, len, bootpartition, &nrootdev, 0); 869 if (dv != NULL) { 870 rootdv = dv; 871 break; 872 } 873 } 874 875 /* 876 * Set up the default dump device. If root is on 877 * a network device, there is no default dump 878 * device, since we don't support dumps to the 879 * network. 880 */ 881 if (DEV_USES_PARTITIONS(rootdv) == 0) 882 defdumpdv = NULL; 883 else 884 defdumpdv = rootdv; 885 886 for (;;) { 887 printf("dump device"); 888 if (defdumpdv != NULL) { 889 /* 890 * Note, we know it's a disk if we get here. 891 */ 892 printf(" (default %sb)", defdumpdv->dv_xname); 893 } 894 printf(": "); 895 len = cngetsn(buf, sizeof(buf)); 896 if (len == 0) { 897 if (defdumpdv != NULL) { 898 ndumpdev = MAKEDISKDEV(major(nrootdev), 899 DISKUNIT(nrootdev), 1); 900 } 901 dumpdv = defdumpdv; 902 break; 903 } 904 if (len == 4 && strcmp(buf, "none") == 0) { 905 dumpdv = NULL; 906 break; 907 } 908 dv = getdisk(buf, len, 1, &ndumpdev, 1); 909 if (dv != NULL) { 910 dumpdv = dv; 911 break; 912 } 913 } 914 915 rootdev = nrootdev; 916 dumpdev = ndumpdev; 917 918 for (vops = LIST_FIRST(&vfs_list); vops != NULL; 919 vops = LIST_NEXT(vops, vfs_list)) { 920 if (vops->vfs_mountroot != NULL && 921 vops->vfs_mountroot == mountroot) 922 break; 923 } 924 925 if (vops == NULL) { 926 mountroot = NULL; 927 deffsname = "generic"; 928 } else 929 deffsname = vops->vfs_name; 930 931 for (;;) { 932 printf("file system (default %s): ", deffsname); 933 len = cngetsn(buf, sizeof(buf)); 934 if (len == 0) 935 break; 936 if (len == 4 && strcmp(buf, "halt") == 0) 937 cpu_reboot(RB_HALT, NULL); 938 else if (len == 6 && strcmp(buf, "reboot") == 0) 939 cpu_reboot(0, NULL); 940 #if defined(DDB) 941 else if (len == 3 && strcmp(buf, "ddb") == 0) { 942 console_debugger(); 943 } 944 #endif 945 else if (len == 7 && strcmp(buf, "generic") == 0) { 946 mountroot = NULL; 947 break; 948 } 949 vops = vfs_getopsbyname(buf); 950 if (vops == NULL || vops->vfs_mountroot == NULL) { 951 printf("use one of: generic"); 952 for (vops = LIST_FIRST(&vfs_list); 953 vops != NULL; 954 vops = LIST_NEXT(vops, vfs_list)) { 955 if (vops->vfs_mountroot != NULL) 956 printf(" %s", vops->vfs_name); 957 } 958 #if defined(DDB) 959 printf(" ddb"); 960 #endif 961 printf(" halt reboot\n"); 962 } else { 963 mountroot = vops->vfs_mountroot; 964 vfs_delref(vops); 965 break; 966 } 967 } 968 969 } else if (rootspec == NULL) { 970 /* 971 * Wildcarded root; use the boot device. 972 */ 973 rootdv = bootdv; 974 975 majdev = devsw_name2blk(bootdv->dv_xname, NULL, 0); 976 if (majdev >= 0) { 977 /* 978 * Root is on a disk. `bootpartition' is root, 979 * unless the device does not use partitions. 980 */ 981 if (DEV_USES_PARTITIONS(bootdv)) 982 rootdev = MAKEDISKDEV(majdev, 983 device_unit(bootdv), 984 bootpartition); 985 else 986 rootdev = makedev(majdev, device_unit(bootdv)); 987 } 988 } else { 989 990 /* 991 * `root on <dev> ...' 992 */ 993 994 /* 995 * If it's a network interface, we can bail out 996 * early. 997 */ 998 dv = finddevice(rootspec); 999 if (dv != NULL && device_class(dv) == DV_IFNET) { 1000 rootdv = dv; 1001 goto haveroot; 1002 } 1003 1004 if (rootdev == NODEV && 1005 device_class(dv) == DV_DISK && device_is_a(dv, "dk") && 1006 (majdev = devsw_name2blk(dv->dv_xname, NULL, 0)) >= 0) 1007 rootdev = makedev(majdev, device_unit(dv)); 1008 1009 rootdevname = devsw_blk2name(major(rootdev)); 1010 if (rootdevname == NULL) { 1011 printf("unknown device major 0x%x\n", rootdev); 1012 boothowto |= RB_ASKNAME; 1013 goto top; 1014 } 1015 memset(buf, 0, sizeof(buf)); 1016 snprintf(buf, sizeof(buf), "%s%d", rootdevname, 1017 DISKUNIT(rootdev)); 1018 1019 rootdv = finddevice(buf); 1020 if (rootdv == NULL) { 1021 printf("device %s (0x%x) not configured\n", 1022 buf, rootdev); 1023 boothowto |= RB_ASKNAME; 1024 goto top; 1025 } 1026 } 1027 1028 haveroot: 1029 1030 root_device = rootdv; 1031 1032 switch (device_class(rootdv)) { 1033 case DV_IFNET: 1034 case DV_DISK: 1035 aprint_normal("root on %s", rootdv->dv_xname); 1036 if (DEV_USES_PARTITIONS(rootdv)) 1037 aprint_normal("%c", DISKPART(rootdev) + 'a'); 1038 break; 1039 1040 default: 1041 printf("can't determine root device\n"); 1042 boothowto |= RB_ASKNAME; 1043 goto top; 1044 } 1045 1046 /* 1047 * Now configure the dump device. 1048 * 1049 * If we haven't figured out the dump device, do so, with 1050 * the following rules: 1051 * 1052 * (a) We already know dumpdv in the RB_ASKNAME case. 1053 * 1054 * (b) If dumpspec is set, try to use it. If the device 1055 * is not available, punt. 1056 * 1057 * (c) If dumpspec is not set, the dump device is 1058 * wildcarded or unspecified. If the root device 1059 * is DV_IFNET, punt. Otherwise, use partition b 1060 * of the root device. 1061 */ 1062 1063 if (boothowto & RB_ASKNAME) { /* (a) */ 1064 if (dumpdv == NULL) 1065 goto nodumpdev; 1066 } else if (dumpspec != NULL) { /* (b) */ 1067 if (strcmp(dumpspec, "none") == 0 || dumpdev == NODEV) { 1068 /* 1069 * Operator doesn't want a dump device. 1070 * Or looks like they tried to pick a network 1071 * device. Oops. 1072 */ 1073 goto nodumpdev; 1074 } 1075 1076 dumpdevname = devsw_blk2name(major(dumpdev)); 1077 if (dumpdevname == NULL) 1078 goto nodumpdev; 1079 memset(buf, 0, sizeof(buf)); 1080 snprintf(buf, sizeof(buf), "%s%d", dumpdevname, 1081 DISKUNIT(dumpdev)); 1082 1083 dumpdv = finddevice(buf); 1084 if (dumpdv == NULL) { 1085 /* 1086 * Device not configured. 1087 */ 1088 goto nodumpdev; 1089 } 1090 } else { /* (c) */ 1091 if (DEV_USES_PARTITIONS(rootdv) == 0) { 1092 for (dv = TAILQ_FIRST(&alldevs); dv != NULL; 1093 dv = TAILQ_NEXT(dv, dv_list)) 1094 if (isswap(dv)) 1095 break; 1096 if (dv == NULL) 1097 goto nodumpdev; 1098 1099 majdev = devsw_name2blk(dv->dv_xname, NULL, 0); 1100 if (majdev < 0) 1101 goto nodumpdev; 1102 dumpdv = dv; 1103 dumpdev = makedev(majdev, device_unit(dumpdv)); 1104 } else { 1105 dumpdv = rootdv; 1106 dumpdev = MAKEDISKDEV(major(rootdev), 1107 device_unit(dumpdv), 1); 1108 } 1109 } 1110 1111 aprint_normal(" dumps on %s", dumpdv->dv_xname); 1112 if (DEV_USES_PARTITIONS(dumpdv)) 1113 aprint_normal("%c", DISKPART(dumpdev) + 'a'); 1114 aprint_normal("\n"); 1115 return; 1116 1117 nodumpdev: 1118 dumpdev = NODEV; 1119 aprint_normal("\n"); 1120 } 1121 1122 static struct device * 1123 finddevice(const char *name) 1124 { 1125 const char *wname; 1126 struct device *dv; 1127 #if defined(BOOT_FROM_MEMORY_HOOKS) 1128 int j; 1129 #endif /* BOOT_FROM_MEMORY_HOOKS */ 1130 1131 if ((wname = getwedgename(name, strlen(name))) != NULL) 1132 return dkwedge_find_by_wname(wname); 1133 1134 #ifdef BOOT_FROM_MEMORY_HOOKS 1135 for (j = 0; j < NMD; j++) { 1136 if (strcmp(name, fakemdrootdev[j].dv_xname) == 0) 1137 return &fakemdrootdev[j]; 1138 } 1139 #endif /* BOOT_FROM_MEMORY_HOOKS */ 1140 1141 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1142 if (strcmp(dv->dv_xname, name) == 0) 1143 break; 1144 } 1145 return dv; 1146 } 1147 1148 static struct device * 1149 getdisk(char *str, int len, int defpart, dev_t *devp, int isdump) 1150 { 1151 struct device *dv; 1152 #ifdef MEMORY_DISK_HOOKS 1153 int i; 1154 #endif 1155 1156 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) { 1157 printf("use one of:"); 1158 #ifdef MEMORY_DISK_HOOKS 1159 if (isdump == 0) 1160 for (i = 0; i < NMD; i++) 1161 printf(" %s[a-%c]", fakemdrootdev[i].dv_xname, 1162 'a' + MAXPARTITIONS - 1); 1163 #endif 1164 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1165 if (DEV_USES_PARTITIONS(dv)) 1166 printf(" %s[a-%c]", dv->dv_xname, 1167 'a' + MAXPARTITIONS - 1); 1168 else if (device_class(dv) == DV_DISK) 1169 printf(" %s", dv->dv_xname); 1170 if (isdump == 0 && device_class(dv) == DV_IFNET) 1171 printf(" %s", dv->dv_xname); 1172 } 1173 dkwedge_print_wnames(); 1174 if (isdump) 1175 printf(" none"); 1176 #if defined(DDB) 1177 printf(" ddb"); 1178 #endif 1179 printf(" halt reboot\n"); 1180 } 1181 return dv; 1182 } 1183 1184 static const char * 1185 getwedgename(const char *name, int namelen) 1186 { 1187 const char *wpfx = "wedge:"; 1188 const int wpfxlen = strlen(wpfx); 1189 1190 if (namelen < wpfxlen || strncmp(name, wpfx, wpfxlen) != 0) 1191 return NULL; 1192 1193 return name + wpfxlen; 1194 } 1195 1196 static struct device * 1197 parsedisk(char *str, int len, int defpart, dev_t *devp) 1198 { 1199 struct device *dv; 1200 const char *wname; 1201 char *cp, c; 1202 int majdev, part; 1203 #ifdef MEMORY_DISK_HOOKS 1204 int i; 1205 #endif 1206 if (len == 0) 1207 return (NULL); 1208 1209 if (len == 4 && strcmp(str, "halt") == 0) 1210 cpu_reboot(RB_HALT, NULL); 1211 else if (len == 6 && strcmp(str, "reboot") == 0) 1212 cpu_reboot(0, NULL); 1213 #if defined(DDB) 1214 else if (len == 3 && strcmp(str, "ddb") == 0) 1215 console_debugger(); 1216 #endif 1217 1218 cp = str + len - 1; 1219 c = *cp; 1220 1221 if ((wname = getwedgename(str, len)) != NULL) { 1222 if ((dv = dkwedge_find_by_wname(wname)) == NULL) 1223 return NULL; 1224 part = defpart; 1225 goto gotdisk; 1226 } else if (c >= 'a' && c <= ('a' + MAXPARTITIONS - 1)) { 1227 part = c - 'a'; 1228 *cp = '\0'; 1229 } else 1230 part = defpart; 1231 1232 #ifdef MEMORY_DISK_HOOKS 1233 for (i = 0; i < NMD; i++) 1234 if (strcmp(str, fakemdrootdev[i].dv_xname) == 0) { 1235 dv = &fakemdrootdev[i]; 1236 goto gotdisk; 1237 } 1238 #endif 1239 1240 dv = finddevice(str); 1241 if (dv != NULL) { 1242 if (device_class(dv) == DV_DISK) { 1243 gotdisk: 1244 majdev = devsw_name2blk(dv->dv_xname, NULL, 0); 1245 if (majdev < 0) 1246 panic("parsedisk"); 1247 if (DEV_USES_PARTITIONS(dv)) 1248 *devp = MAKEDISKDEV(majdev, device_unit(dv), 1249 part); 1250 else 1251 *devp = makedev(majdev, device_unit(dv)); 1252 } 1253 1254 if (device_class(dv) == DV_IFNET) 1255 *devp = NODEV; 1256 } 1257 1258 *cp = c; 1259 return (dv); 1260 } 1261 1262 /* 1263 * snprintf() `bytes' into `buf', reformatting it so that the number, 1264 * plus a possible `x' + suffix extension) fits into len bytes (including 1265 * the terminating NUL). 1266 * Returns the number of bytes stored in buf, or -1 if there was a problem. 1267 * E.g, given a len of 9 and a suffix of `B': 1268 * bytes result 1269 * ----- ------ 1270 * 99999 `99999 B' 1271 * 100000 `97 kB' 1272 * 66715648 `65152 kB' 1273 * 252215296 `240 MB' 1274 */ 1275 int 1276 humanize_number(char *buf, size_t len, uint64_t bytes, const char *suffix, 1277 int divisor) 1278 { 1279 /* prefixes are: (none), kilo, Mega, Giga, Tera, Peta, Exa */ 1280 const char *prefixes; 1281 int r; 1282 uint64_t umax; 1283 size_t i, suffixlen; 1284 1285 if (buf == NULL || suffix == NULL) 1286 return (-1); 1287 if (len > 0) 1288 buf[0] = '\0'; 1289 suffixlen = strlen(suffix); 1290 /* check if enough room for `x y' + suffix + `\0' */ 1291 if (len < 4 + suffixlen) 1292 return (-1); 1293 1294 if (divisor == 1024) { 1295 /* 1296 * binary multiplies 1297 * XXX IEC 60027-2 recommends Ki, Mi, Gi... 1298 */ 1299 prefixes = " KMGTPE"; 1300 } else 1301 prefixes = " kMGTPE"; /* SI for decimal multiplies */ 1302 1303 umax = 1; 1304 for (i = 0; i < len - suffixlen - 3; i++) 1305 umax *= 10; 1306 for (i = 0; bytes >= umax && prefixes[i + 1]; i++) 1307 bytes /= divisor; 1308 1309 r = snprintf(buf, len, "%qu%s%c%s", (unsigned long long)bytes, 1310 i == 0 ? "" : " ", prefixes[i], suffix); 1311 1312 return (r); 1313 } 1314 1315 int 1316 format_bytes(char *buf, size_t len, uint64_t bytes) 1317 { 1318 int rv; 1319 size_t nlen; 1320 1321 rv = humanize_number(buf, len, bytes, "B", 1024); 1322 if (rv != -1) { 1323 /* nuke the trailing ` B' if it exists */ 1324 nlen = strlen(buf) - 2; 1325 if (strcmp(&buf[nlen], " B") == 0) 1326 buf[nlen] = '\0'; 1327 } 1328 return (rv); 1329 } 1330 1331 /* 1332 * Return true if system call tracing is enabled for the specified process. 1333 */ 1334 bool 1335 trace_is_enabled(struct proc *p) 1336 { 1337 #ifdef SYSCALL_DEBUG 1338 return (true); 1339 #endif 1340 #ifdef KTRACE 1341 if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET))) 1342 return (true); 1343 #endif 1344 #ifdef SYSTRACE 1345 if (ISSET(p->p_flag, PK_SYSTRACE)) 1346 return (true); 1347 #endif 1348 #ifdef PTRACE 1349 if (ISSET(p->p_slflag, PSL_SYSCALL)) 1350 return (true); 1351 #endif 1352 1353 return (false); 1354 } 1355 1356 /* 1357 * Start trace of particular system call. If process is being traced, 1358 * this routine is called by MD syscall dispatch code just before 1359 * a system call is actually executed. 1360 * MD caller guarantees the passed 'code' is within the supported 1361 * system call number range for emulation the process runs under. 1362 */ 1363 int 1364 trace_enter(struct lwp *l, register_t code, 1365 register_t realcode, const struct sysent *callp, void *args) 1366 { 1367 #if defined(SYSCALL_DEBUG) || defined(KTRACE) || defined(PTRACE) || defined(SYSTRACE) 1368 struct proc *p = l->l_proc; 1369 1370 #ifdef SYSCALL_DEBUG 1371 scdebug_call(l, code, args); 1372 #endif /* SYSCALL_DEBUG */ 1373 1374 ktrsyscall(code, realcode, callp, args); 1375 1376 #ifdef PTRACE 1377 if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED)) == 1378 (PSL_SYSCALL|PSL_TRACED)) 1379 process_stoptrace(l); 1380 #endif 1381 1382 #ifdef SYSTRACE 1383 if (ISSET(p->p_flag, PK_SYSTRACE)) { 1384 int error; 1385 KERNEL_LOCK(1, l); 1386 error = systrace_enter(l, code, args); 1387 KERNEL_UNLOCK_ONE(l); 1388 return error; 1389 } 1390 #endif 1391 #endif /* SYSCALL_DEBUG || {K,P,SYS}TRACE */ 1392 return 0; 1393 } 1394 1395 /* 1396 * End trace of particular system call. If process is being traced, 1397 * this routine is called by MD syscall dispatch code just after 1398 * a system call finishes. 1399 * MD caller guarantees the passed 'code' is within the supported 1400 * system call number range for emulation the process runs under. 1401 */ 1402 void 1403 trace_exit(struct lwp *l, register_t code, void *args, register_t rval[], 1404 int error) 1405 { 1406 #if defined(SYSCALL_DEBUG) || defined(KTRACE) || defined(PTRACE) || defined(SYSTRACE) 1407 struct proc *p = l->l_proc; 1408 1409 #ifdef SYSCALL_DEBUG 1410 scdebug_ret(l, code, error, rval); 1411 #endif /* SYSCALL_DEBUG */ 1412 1413 ktrsysret(code, error, rval); 1414 1415 #ifdef PTRACE 1416 if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED)) == 1417 (PSL_SYSCALL|PSL_TRACED)) 1418 process_stoptrace(l); 1419 #endif 1420 1421 #ifdef SYSTRACE 1422 if (ISSET(p->p_flag, PK_SYSTRACE)) { 1423 KERNEL_LOCK(1, l); 1424 systrace_exit(l, code, args, rval, error); 1425 KERNEL_UNLOCK_ONE(l); 1426 } 1427 #endif 1428 #endif /* SYSCALL_DEBUG || {K,P,SYS}TRACE */ 1429 } 1430 1431 /* 1432 * Disable kernel preemption. 1433 */ 1434 void 1435 crit_enter(void) 1436 { 1437 /* nothing */ 1438 } 1439 1440 /* 1441 * Reenable kernel preemption. 1442 */ 1443 void 1444 crit_exit(void) 1445 { 1446 /* nothing */ 1447 } 1448