1 /* $NetBSD: kern_subr.c,v 1.134 2006/03/13 08:52:07 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 1999, 2002 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and by Luke Mewburn. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Copyright (c) 1992, 1993 50 * The Regents of the University of California. All rights reserved. 51 * 52 * This software was developed by the Computer Systems Engineering group 53 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 54 * contributed to Berkeley. 55 * 56 * All advertising materials mentioning features or use of this software 57 * must display the following acknowledgement: 58 * This product includes software developed by the University of 59 * California, Lawrence Berkeley Laboratory. 60 * 61 * Redistribution and use in source and binary forms, with or without 62 * modification, are permitted provided that the following conditions 63 * are met: 64 * 1. Redistributions of source code must retain the above copyright 65 * notice, this list of conditions and the following disclaimer. 66 * 2. Redistributions in binary form must reproduce the above copyright 67 * notice, this list of conditions and the following disclaimer in the 68 * documentation and/or other materials provided with the distribution. 69 * 3. Neither the name of the University nor the names of its contributors 70 * may be used to endorse or promote products derived from this software 71 * without specific prior written permission. 72 * 73 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 76 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 83 * SUCH DAMAGE. 84 * 85 * @(#)kern_subr.c 8.4 (Berkeley) 2/14/95 86 */ 87 88 #include <sys/cdefs.h> 89 __KERNEL_RCSID(0, "$NetBSD: kern_subr.c,v 1.134 2006/03/13 08:52:07 yamt Exp $"); 90 91 #include "opt_ddb.h" 92 #include "opt_md.h" 93 #include "opt_syscall_debug.h" 94 #include "opt_ktrace.h" 95 #include "opt_systrace.h" 96 97 #include <sys/param.h> 98 #include <sys/systm.h> 99 #include <sys/proc.h> 100 #include <sys/malloc.h> 101 #include <sys/mount.h> 102 #include <sys/device.h> 103 #include <sys/reboot.h> 104 #include <sys/conf.h> 105 #include <sys/disklabel.h> 106 #include <sys/queue.h> 107 #include <sys/systrace.h> 108 #include <sys/ktrace.h> 109 #include <sys/ptrace.h> 110 #include <sys/fcntl.h> 111 112 #include <uvm/uvm_extern.h> 113 114 #include <dev/cons.h> 115 116 #include <net/if.h> 117 118 /* XXX these should eventually move to subr_autoconf.c */ 119 static struct device *finddevice(const char *); 120 static struct device *getdisk(char *, int, int, dev_t *, int); 121 static struct device *parsedisk(char *, int, int, dev_t *); 122 123 /* 124 * A generic linear hook. 125 */ 126 struct hook_desc { 127 LIST_ENTRY(hook_desc) hk_list; 128 void (*hk_fn)(void *); 129 void *hk_arg; 130 }; 131 typedef LIST_HEAD(, hook_desc) hook_list_t; 132 133 MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 134 135 void 136 uio_setup_sysspace(struct uio *uio) 137 { 138 139 uio->uio_vmspace = vmspace_kernel(); 140 } 141 142 int 143 uiomove(void *buf, size_t n, struct uio *uio) 144 { 145 struct vmspace *vm = uio->uio_vmspace; 146 struct iovec *iov; 147 u_int cnt; 148 int error = 0; 149 char *cp = buf; 150 int hold_count; 151 152 hold_count = KERNEL_LOCK_RELEASE_ALL(); 153 154 #ifdef LOCKDEBUG 155 spinlock_switchcheck(); 156 simple_lock_only_held(NULL, "uiomove"); 157 #endif 158 159 #ifdef DIAGNOSTIC 160 if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE) 161 panic("uiomove: mode"); 162 #endif 163 while (n > 0 && uio->uio_resid) { 164 iov = uio->uio_iov; 165 cnt = iov->iov_len; 166 if (cnt == 0) { 167 KASSERT(uio->uio_iovcnt > 0); 168 uio->uio_iov++; 169 uio->uio_iovcnt--; 170 continue; 171 } 172 if (cnt > n) 173 cnt = n; 174 if (!VMSPACE_IS_KERNEL_P(vm)) { 175 if (curcpu()->ci_schedstate.spc_flags & 176 SPCF_SHOULDYIELD) 177 preempt(1); 178 } 179 180 if (uio->uio_rw == UIO_READ) { 181 error = copyout_vmspace(vm, cp, iov->iov_base, 182 cnt); 183 } else { 184 error = copyin_vmspace(vm, iov->iov_base, cp, 185 cnt); 186 } 187 if (error) { 188 break; 189 } 190 iov->iov_base = (caddr_t)iov->iov_base + cnt; 191 iov->iov_len -= cnt; 192 uio->uio_resid -= cnt; 193 uio->uio_offset += cnt; 194 cp += cnt; 195 KDASSERT(cnt <= n); 196 n -= cnt; 197 } 198 KERNEL_LOCK_ACQUIRE_COUNT(hold_count); 199 return (error); 200 } 201 202 /* 203 * Wrapper for uiomove() that validates the arguments against a known-good 204 * kernel buffer. 205 */ 206 int 207 uiomove_frombuf(void *buf, size_t buflen, struct uio *uio) 208 { 209 size_t offset; 210 211 if (uio->uio_offset < 0 || uio->uio_resid < 0 || 212 (offset = uio->uio_offset) != uio->uio_offset) 213 return (EINVAL); 214 if (offset >= buflen) 215 return (0); 216 return (uiomove((char *)buf + offset, buflen - offset, uio)); 217 } 218 219 /* 220 * Give next character to user as result of read. 221 */ 222 int 223 ureadc(int c, struct uio *uio) 224 { 225 struct iovec *iov; 226 227 if (uio->uio_resid <= 0) 228 panic("ureadc: non-positive resid"); 229 again: 230 if (uio->uio_iovcnt <= 0) 231 panic("ureadc: non-positive iovcnt"); 232 iov = uio->uio_iov; 233 if (iov->iov_len <= 0) { 234 uio->uio_iovcnt--; 235 uio->uio_iov++; 236 goto again; 237 } 238 if (!VMSPACE_IS_KERNEL_P(uio->uio_vmspace)) { 239 if (subyte(iov->iov_base, c) < 0) 240 return (EFAULT); 241 } else { 242 *(char *)iov->iov_base = c; 243 } 244 iov->iov_base = (caddr_t)iov->iov_base + 1; 245 iov->iov_len--; 246 uio->uio_resid--; 247 uio->uio_offset++; 248 return (0); 249 } 250 251 /* 252 * Like copyin(), but operates on an arbitrary vmspace. 253 */ 254 int 255 copyin_vmspace(struct vmspace *vm, const void *uaddr, void *kaddr, size_t len) 256 { 257 struct iovec iov; 258 struct uio uio; 259 int error; 260 261 if (len == 0) 262 return (0); 263 264 if (VMSPACE_IS_KERNEL_P(vm)) { 265 return kcopy(uaddr, kaddr, len); 266 } 267 if (__predict_true(vm == curproc->p_vmspace)) { 268 return copyin(uaddr, kaddr, len); 269 } 270 271 iov.iov_base = kaddr; 272 iov.iov_len = len; 273 uio.uio_iov = &iov; 274 uio.uio_iovcnt = 1; 275 uio.uio_offset = (off_t)(intptr_t)uaddr; 276 uio.uio_resid = len; 277 uio.uio_rw = UIO_READ; 278 UIO_SETUP_SYSSPACE(&uio); 279 error = uvm_io(&vm->vm_map, &uio); 280 281 return (error); 282 } 283 284 /* 285 * Like copyout(), but operates on an arbitrary vmspace. 286 */ 287 int 288 copyout_vmspace(struct vmspace *vm, const void *kaddr, void *uaddr, size_t len) 289 { 290 struct iovec iov; 291 struct uio uio; 292 int error; 293 294 if (len == 0) 295 return (0); 296 297 if (VMSPACE_IS_KERNEL_P(vm)) { 298 return kcopy(kaddr, uaddr, len); 299 } 300 if (__predict_true(vm == curproc->p_vmspace)) { 301 return copyout(kaddr, uaddr, len); 302 } 303 304 iov.iov_base = __UNCONST(kaddr); /* XXXUNCONST cast away const */ 305 iov.iov_len = len; 306 uio.uio_iov = &iov; 307 uio.uio_iovcnt = 1; 308 uio.uio_offset = (off_t)(intptr_t)uaddr; 309 uio.uio_resid = len; 310 uio.uio_rw = UIO_WRITE; 311 UIO_SETUP_SYSSPACE(&uio); 312 error = uvm_io(&vm->vm_map, &uio); 313 314 return (error); 315 } 316 317 /* 318 * Like copyin(), but operates on an arbitrary process. 319 */ 320 int 321 copyin_proc(struct proc *p, const void *uaddr, void *kaddr, size_t len) 322 { 323 struct vmspace *vm; 324 int error; 325 326 error = proc_vmspace_getref(p, &vm); 327 if (error) { 328 return error; 329 } 330 error = copyin_vmspace(vm, uaddr, kaddr, len); 331 uvmspace_free(vm); 332 333 return error; 334 } 335 336 /* 337 * Like copyout(), but operates on an arbitrary process. 338 */ 339 int 340 copyout_proc(struct proc *p, const void *kaddr, void *uaddr, size_t len) 341 { 342 struct vmspace *vm; 343 int error; 344 345 error = proc_vmspace_getref(p, &vm); 346 if (error) { 347 return error; 348 } 349 error = copyout_vmspace(vm, kaddr, uaddr, len); 350 uvmspace_free(vm); 351 352 return error; 353 } 354 355 /* 356 * Like copyin(), except it operates on kernel addresses when the FKIOCTL 357 * flag is passed in `ioctlflags' from the ioctl call. 358 */ 359 int 360 ioctl_copyin(int ioctlflags, const void *src, void *dst, size_t len) 361 { 362 if (ioctlflags & FKIOCTL) 363 return kcopy(src, dst, len); 364 return copyin(src, dst, len); 365 } 366 367 /* 368 * Like copyout(), except it operates on kernel addresses when the FKIOCTL 369 * flag is passed in `ioctlflags' from the ioctl call. 370 */ 371 int 372 ioctl_copyout(int ioctlflags, const void *src, void *dst, size_t len) 373 { 374 if (ioctlflags & FKIOCTL) 375 return kcopy(src, dst, len); 376 return copyout(src, dst, len); 377 } 378 379 /* 380 * General routine to allocate a hash table. 381 * Allocate enough memory to hold at least `elements' list-head pointers. 382 * Return a pointer to the allocated space and set *hashmask to a pattern 383 * suitable for masking a value to use as an index into the returned array. 384 */ 385 void * 386 hashinit(u_int elements, enum hashtype htype, struct malloc_type *mtype, 387 int mflags, u_long *hashmask) 388 { 389 u_long hashsize, i; 390 LIST_HEAD(, generic) *hashtbl_list; 391 TAILQ_HEAD(, generic) *hashtbl_tailq; 392 size_t esize; 393 void *p; 394 395 if (elements == 0) 396 panic("hashinit: bad cnt"); 397 for (hashsize = 1; hashsize < elements; hashsize <<= 1) 398 continue; 399 400 switch (htype) { 401 case HASH_LIST: 402 esize = sizeof(*hashtbl_list); 403 break; 404 case HASH_TAILQ: 405 esize = sizeof(*hashtbl_tailq); 406 break; 407 default: 408 #ifdef DIAGNOSTIC 409 panic("hashinit: invalid table type"); 410 #else 411 return NULL; 412 #endif 413 } 414 415 if ((p = malloc(hashsize * esize, mtype, mflags)) == NULL) 416 return (NULL); 417 418 switch (htype) { 419 case HASH_LIST: 420 hashtbl_list = p; 421 for (i = 0; i < hashsize; i++) 422 LIST_INIT(&hashtbl_list[i]); 423 break; 424 case HASH_TAILQ: 425 hashtbl_tailq = p; 426 for (i = 0; i < hashsize; i++) 427 TAILQ_INIT(&hashtbl_tailq[i]); 428 break; 429 } 430 *hashmask = hashsize - 1; 431 return (p); 432 } 433 434 /* 435 * Free memory from hash table previosly allocated via hashinit(). 436 */ 437 void 438 hashdone(void *hashtbl, struct malloc_type *mtype) 439 { 440 441 free(hashtbl, mtype); 442 } 443 444 445 static void * 446 hook_establish(hook_list_t *list, void (*fn)(void *), void *arg) 447 { 448 struct hook_desc *hd; 449 450 hd = malloc(sizeof(*hd), M_DEVBUF, M_NOWAIT); 451 if (hd == NULL) 452 return (NULL); 453 454 hd->hk_fn = fn; 455 hd->hk_arg = arg; 456 LIST_INSERT_HEAD(list, hd, hk_list); 457 458 return (hd); 459 } 460 461 static void 462 hook_disestablish(hook_list_t *list, void *vhook) 463 { 464 #ifdef DIAGNOSTIC 465 struct hook_desc *hd; 466 467 LIST_FOREACH(hd, list, hk_list) { 468 if (hd == vhook) 469 break; 470 } 471 472 if (hd == NULL) 473 panic("hook_disestablish: hook %p not established", vhook); 474 #endif 475 LIST_REMOVE((struct hook_desc *)vhook, hk_list); 476 free(vhook, M_DEVBUF); 477 } 478 479 static void 480 hook_destroy(hook_list_t *list) 481 { 482 struct hook_desc *hd; 483 484 while ((hd = LIST_FIRST(list)) != NULL) { 485 LIST_REMOVE(hd, hk_list); 486 free(hd, M_DEVBUF); 487 } 488 } 489 490 static void 491 hook_proc_run(hook_list_t *list, struct proc *p) 492 { 493 struct hook_desc *hd; 494 495 for (hd = LIST_FIRST(list); hd != NULL; hd = LIST_NEXT(hd, hk_list)) { 496 ((void (*)(struct proc *, void *))*hd->hk_fn)(p, 497 hd->hk_arg); 498 } 499 } 500 501 /* 502 * "Shutdown hook" types, functions, and variables. 503 * 504 * Should be invoked immediately before the 505 * system is halted or rebooted, i.e. after file systems unmounted, 506 * after crash dump done, etc. 507 * 508 * Each shutdown hook is removed from the list before it's run, so that 509 * it won't be run again. 510 */ 511 512 static hook_list_t shutdownhook_list; 513 514 void * 515 shutdownhook_establish(void (*fn)(void *), void *arg) 516 { 517 return hook_establish(&shutdownhook_list, fn, arg); 518 } 519 520 void 521 shutdownhook_disestablish(void *vhook) 522 { 523 hook_disestablish(&shutdownhook_list, vhook); 524 } 525 526 /* 527 * Run shutdown hooks. Should be invoked immediately before the 528 * system is halted or rebooted, i.e. after file systems unmounted, 529 * after crash dump done, etc. 530 * 531 * Each shutdown hook is removed from the list before it's run, so that 532 * it won't be run again. 533 */ 534 void 535 doshutdownhooks(void) 536 { 537 struct hook_desc *dp; 538 539 while ((dp = LIST_FIRST(&shutdownhook_list)) != NULL) { 540 LIST_REMOVE(dp, hk_list); 541 (*dp->hk_fn)(dp->hk_arg); 542 #if 0 543 /* 544 * Don't bother freeing the hook structure,, since we may 545 * be rebooting because of a memory corruption problem, 546 * and this might only make things worse. It doesn't 547 * matter, anyway, since the system is just about to 548 * reboot. 549 */ 550 free(dp, M_DEVBUF); 551 #endif 552 } 553 } 554 555 /* 556 * "Mountroot hook" types, functions, and variables. 557 */ 558 559 static hook_list_t mountroothook_list; 560 561 void * 562 mountroothook_establish(void (*fn)(struct device *), struct device *dev) 563 { 564 return hook_establish(&mountroothook_list, (void (*)(void *))fn, dev); 565 } 566 567 void 568 mountroothook_disestablish(void *vhook) 569 { 570 hook_disestablish(&mountroothook_list, vhook); 571 } 572 573 void 574 mountroothook_destroy(void) 575 { 576 hook_destroy(&mountroothook_list); 577 } 578 579 void 580 domountroothook(void) 581 { 582 struct hook_desc *hd; 583 584 LIST_FOREACH(hd, &mountroothook_list, hk_list) { 585 if (hd->hk_arg == (void *)root_device) { 586 (*hd->hk_fn)(hd->hk_arg); 587 return; 588 } 589 } 590 } 591 592 static hook_list_t exechook_list; 593 594 void * 595 exechook_establish(void (*fn)(struct proc *, void *), void *arg) 596 { 597 return hook_establish(&exechook_list, (void (*)(void *))fn, arg); 598 } 599 600 void 601 exechook_disestablish(void *vhook) 602 { 603 hook_disestablish(&exechook_list, vhook); 604 } 605 606 /* 607 * Run exec hooks. 608 */ 609 void 610 doexechooks(struct proc *p) 611 { 612 hook_proc_run(&exechook_list, p); 613 } 614 615 static hook_list_t exithook_list; 616 617 void * 618 exithook_establish(void (*fn)(struct proc *, void *), void *arg) 619 { 620 return hook_establish(&exithook_list, (void (*)(void *))fn, arg); 621 } 622 623 void 624 exithook_disestablish(void *vhook) 625 { 626 hook_disestablish(&exithook_list, vhook); 627 } 628 629 /* 630 * Run exit hooks. 631 */ 632 void 633 doexithooks(struct proc *p) 634 { 635 hook_proc_run(&exithook_list, p); 636 } 637 638 static hook_list_t forkhook_list; 639 640 void * 641 forkhook_establish(void (*fn)(struct proc *, struct proc *)) 642 { 643 return hook_establish(&forkhook_list, (void (*)(void *))fn, NULL); 644 } 645 646 void 647 forkhook_disestablish(void *vhook) 648 { 649 hook_disestablish(&forkhook_list, vhook); 650 } 651 652 /* 653 * Run fork hooks. 654 */ 655 void 656 doforkhooks(struct proc *p2, struct proc *p1) 657 { 658 struct hook_desc *hd; 659 660 LIST_FOREACH(hd, &forkhook_list, hk_list) { 661 ((void (*)(struct proc *, struct proc *))*hd->hk_fn) 662 (p2, p1); 663 } 664 } 665 666 /* 667 * "Power hook" types, functions, and variables. 668 * The list of power hooks is kept ordered with the last registered hook 669 * first. 670 * When running the hooks on power down the hooks are called in reverse 671 * registration order, when powering up in registration order. 672 */ 673 struct powerhook_desc { 674 CIRCLEQ_ENTRY(powerhook_desc) sfd_list; 675 void (*sfd_fn)(int, void *); 676 void *sfd_arg; 677 }; 678 679 static CIRCLEQ_HEAD(, powerhook_desc) powerhook_list = 680 CIRCLEQ_HEAD_INITIALIZER(powerhook_list); 681 682 void * 683 powerhook_establish(void (*fn)(int, void *), void *arg) 684 { 685 struct powerhook_desc *ndp; 686 687 ndp = (struct powerhook_desc *) 688 malloc(sizeof(*ndp), M_DEVBUF, M_NOWAIT); 689 if (ndp == NULL) 690 return (NULL); 691 692 ndp->sfd_fn = fn; 693 ndp->sfd_arg = arg; 694 CIRCLEQ_INSERT_HEAD(&powerhook_list, ndp, sfd_list); 695 696 return (ndp); 697 } 698 699 void 700 powerhook_disestablish(void *vhook) 701 { 702 #ifdef DIAGNOSTIC 703 struct powerhook_desc *dp; 704 705 CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list) 706 if (dp == vhook) 707 goto found; 708 panic("powerhook_disestablish: hook %p not established", vhook); 709 found: 710 #endif 711 712 CIRCLEQ_REMOVE(&powerhook_list, (struct powerhook_desc *)vhook, 713 sfd_list); 714 free(vhook, M_DEVBUF); 715 } 716 717 /* 718 * Run power hooks. 719 */ 720 void 721 dopowerhooks(int why) 722 { 723 struct powerhook_desc *dp; 724 725 if (why == PWR_RESUME || why == PWR_SOFTRESUME) { 726 CIRCLEQ_FOREACH_REVERSE(dp, &powerhook_list, sfd_list) { 727 (*dp->sfd_fn)(why, dp->sfd_arg); 728 } 729 } else { 730 CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list) { 731 (*dp->sfd_fn)(why, dp->sfd_arg); 732 } 733 } 734 } 735 736 /* 737 * Determine the root device and, if instructed to, the root file system. 738 */ 739 740 #include "md.h" 741 #if NMD == 0 742 #undef MEMORY_DISK_HOOKS 743 #endif 744 745 #ifdef MEMORY_DISK_HOOKS 746 static struct device fakemdrootdev[NMD]; 747 extern struct cfdriver md_cd; 748 #endif 749 750 #ifdef MEMORY_DISK_IS_ROOT 751 #define BOOT_FROM_MEMORY_HOOKS 1 752 #endif 753 754 #include "raid.h" 755 #if NRAID == 1 756 #define BOOT_FROM_RAID_HOOKS 1 757 #endif 758 759 #ifdef BOOT_FROM_RAID_HOOKS 760 extern int numraid; 761 extern struct device *raidrootdev; 762 #endif 763 764 /* 765 * The device and wedge that we booted from. If booted_wedge is NULL, 766 * the we might consult booted_partition. 767 */ 768 struct device *booted_device; 769 struct device *booted_wedge; 770 int booted_partition; 771 772 /* 773 * Use partition letters if it's a disk class but not a wedge. 774 * XXX Check for wedge is kinda gross. 775 */ 776 #define DEV_USES_PARTITIONS(dv) \ 777 (device_class((dv)) == DV_DISK && \ 778 !device_is_a((dv), "dk")) 779 780 void 781 setroot(struct device *bootdv, int bootpartition) 782 { 783 struct device *dv; 784 int len; 785 #ifdef MEMORY_DISK_HOOKS 786 int i; 787 #endif 788 dev_t nrootdev; 789 dev_t ndumpdev = NODEV; 790 char buf[128]; 791 const char *rootdevname; 792 const char *dumpdevname; 793 struct device *rootdv = NULL; /* XXX gcc -Wuninitialized */ 794 struct device *dumpdv = NULL; 795 struct ifnet *ifp; 796 const char *deffsname; 797 struct vfsops *vops; 798 799 #ifdef MEMORY_DISK_HOOKS 800 for (i = 0; i < NMD; i++) { 801 fakemdrootdev[i].dv_class = DV_DISK; 802 fakemdrootdev[i].dv_cfdata = NULL; 803 fakemdrootdev[i].dv_cfdriver = &md_cd; 804 fakemdrootdev[i].dv_unit = i; 805 fakemdrootdev[i].dv_parent = NULL; 806 snprintf(fakemdrootdev[i].dv_xname, 807 sizeof(fakemdrootdev[i].dv_xname), "md%d", i); 808 } 809 #endif /* MEMORY_DISK_HOOKS */ 810 811 #ifdef MEMORY_DISK_IS_ROOT 812 bootdv = &fakemdrootdev[0]; 813 bootpartition = 0; 814 #endif 815 816 /* 817 * If NFS is specified as the file system, and we found 818 * a DV_DISK boot device (or no boot device at all), then 819 * find a reasonable network interface for "rootspec". 820 */ 821 vops = vfs_getopsbyname("nfs"); 822 if (vops != NULL && vops->vfs_mountroot == mountroot && 823 rootspec == NULL && 824 (bootdv == NULL || device_class(bootdv) != DV_IFNET)) { 825 IFNET_FOREACH(ifp) { 826 if ((ifp->if_flags & 827 (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0) 828 break; 829 } 830 if (ifp == NULL) { 831 /* 832 * Can't find a suitable interface; ask the 833 * user. 834 */ 835 boothowto |= RB_ASKNAME; 836 } else { 837 /* 838 * Have a suitable interface; behave as if 839 * the user specified this interface. 840 */ 841 rootspec = (const char *)ifp->if_xname; 842 } 843 } 844 845 /* 846 * If wildcarded root and we the boot device wasn't determined, 847 * ask the user. 848 */ 849 if (rootspec == NULL && bootdv == NULL) 850 boothowto |= RB_ASKNAME; 851 852 top: 853 if (boothowto & RB_ASKNAME) { 854 struct device *defdumpdv; 855 856 for (;;) { 857 printf("root device"); 858 if (bootdv != NULL) { 859 printf(" (default %s", bootdv->dv_xname); 860 if (DEV_USES_PARTITIONS(bootdv)) 861 printf("%c", bootpartition + 'a'); 862 printf(")"); 863 } 864 printf(": "); 865 len = cngetsn(buf, sizeof(buf)); 866 if (len == 0 && bootdv != NULL) { 867 strlcpy(buf, bootdv->dv_xname, sizeof(buf)); 868 len = strlen(buf); 869 } 870 if (len > 0 && buf[len - 1] == '*') { 871 buf[--len] = '\0'; 872 dv = getdisk(buf, len, 1, &nrootdev, 0); 873 if (dv != NULL) { 874 rootdv = dv; 875 break; 876 } 877 } 878 dv = getdisk(buf, len, bootpartition, &nrootdev, 0); 879 if (dv != NULL) { 880 rootdv = dv; 881 break; 882 } 883 } 884 885 /* 886 * Set up the default dump device. If root is on 887 * a network device, there is no default dump 888 * device, since we don't support dumps to the 889 * network. 890 */ 891 if (DEV_USES_PARTITIONS(rootdv) == 0) 892 defdumpdv = NULL; 893 else 894 defdumpdv = rootdv; 895 896 for (;;) { 897 printf("dump device"); 898 if (defdumpdv != NULL) { 899 /* 900 * Note, we know it's a disk if we get here. 901 */ 902 printf(" (default %sb)", defdumpdv->dv_xname); 903 } 904 printf(": "); 905 len = cngetsn(buf, sizeof(buf)); 906 if (len == 0) { 907 if (defdumpdv != NULL) { 908 ndumpdev = MAKEDISKDEV(major(nrootdev), 909 DISKUNIT(nrootdev), 1); 910 } 911 dumpdv = defdumpdv; 912 break; 913 } 914 if (len == 4 && strcmp(buf, "none") == 0) { 915 dumpdv = NULL; 916 break; 917 } 918 dv = getdisk(buf, len, 1, &ndumpdev, 1); 919 if (dv != NULL) { 920 dumpdv = dv; 921 break; 922 } 923 } 924 925 rootdev = nrootdev; 926 dumpdev = ndumpdev; 927 928 for (vops = LIST_FIRST(&vfs_list); vops != NULL; 929 vops = LIST_NEXT(vops, vfs_list)) { 930 if (vops->vfs_mountroot != NULL && 931 vops->vfs_mountroot == mountroot) 932 break; 933 } 934 935 if (vops == NULL) { 936 mountroot = NULL; 937 deffsname = "generic"; 938 } else 939 deffsname = vops->vfs_name; 940 941 for (;;) { 942 printf("file system (default %s): ", deffsname); 943 len = cngetsn(buf, sizeof(buf)); 944 if (len == 0) 945 break; 946 if (len == 4 && strcmp(buf, "halt") == 0) 947 cpu_reboot(RB_HALT, NULL); 948 else if (len == 6 && strcmp(buf, "reboot") == 0) 949 cpu_reboot(0, NULL); 950 #if defined(DDB) 951 else if (len == 3 && strcmp(buf, "ddb") == 0) { 952 console_debugger(); 953 } 954 #endif 955 else if (len == 7 && strcmp(buf, "generic") == 0) { 956 mountroot = NULL; 957 break; 958 } 959 vops = vfs_getopsbyname(buf); 960 if (vops == NULL || vops->vfs_mountroot == NULL) { 961 printf("use one of: generic"); 962 for (vops = LIST_FIRST(&vfs_list); 963 vops != NULL; 964 vops = LIST_NEXT(vops, vfs_list)) { 965 if (vops->vfs_mountroot != NULL) 966 printf(" %s", vops->vfs_name); 967 } 968 #if defined(DDB) 969 printf(" ddb"); 970 #endif 971 printf(" halt reboot\n"); 972 } else { 973 mountroot = vops->vfs_mountroot; 974 break; 975 } 976 } 977 978 } else if (rootspec == NULL) { 979 int majdev; 980 981 /* 982 * Wildcarded root; use the boot device. 983 */ 984 rootdv = bootdv; 985 986 majdev = devsw_name2blk(bootdv->dv_xname, NULL, 0); 987 if (majdev >= 0) { 988 /* 989 * Root is on a disk. `bootpartition' is root, 990 * unless the device does not use partitions. 991 */ 992 if (DEV_USES_PARTITIONS(bootdv)) 993 rootdev = MAKEDISKDEV(majdev, bootdv->dv_unit, 994 bootpartition); 995 else 996 rootdev = makedev(majdev, bootdv->dv_unit); 997 } 998 } else { 999 1000 /* 1001 * `root on <dev> ...' 1002 */ 1003 1004 /* 1005 * If it's a network interface, we can bail out 1006 * early. 1007 */ 1008 dv = finddevice(rootspec); 1009 if (dv != NULL && device_class(dv) == DV_IFNET) { 1010 rootdv = dv; 1011 goto haveroot; 1012 } 1013 1014 rootdevname = devsw_blk2name(major(rootdev)); 1015 if (rootdevname == NULL) { 1016 printf("unknown device major 0x%x\n", rootdev); 1017 boothowto |= RB_ASKNAME; 1018 goto top; 1019 } 1020 memset(buf, 0, sizeof(buf)); 1021 snprintf(buf, sizeof(buf), "%s%d", rootdevname, 1022 DISKUNIT(rootdev)); 1023 1024 rootdv = finddevice(buf); 1025 if (rootdv == NULL) { 1026 printf("device %s (0x%x) not configured\n", 1027 buf, rootdev); 1028 boothowto |= RB_ASKNAME; 1029 goto top; 1030 } 1031 } 1032 1033 haveroot: 1034 1035 root_device = rootdv; 1036 1037 switch (device_class(rootdv)) { 1038 case DV_IFNET: 1039 aprint_normal("root on %s", rootdv->dv_xname); 1040 break; 1041 1042 case DV_DISK: 1043 aprint_normal("root on %s%c", rootdv->dv_xname, 1044 DISKPART(rootdev) + 'a'); 1045 break; 1046 1047 default: 1048 printf("can't determine root device\n"); 1049 boothowto |= RB_ASKNAME; 1050 goto top; 1051 } 1052 1053 /* 1054 * Now configure the dump device. 1055 * 1056 * If we haven't figured out the dump device, do so, with 1057 * the following rules: 1058 * 1059 * (a) We already know dumpdv in the RB_ASKNAME case. 1060 * 1061 * (b) If dumpspec is set, try to use it. If the device 1062 * is not available, punt. 1063 * 1064 * (c) If dumpspec is not set, the dump device is 1065 * wildcarded or unspecified. If the root device 1066 * is DV_IFNET, punt. Otherwise, use partition b 1067 * of the root device. 1068 */ 1069 1070 if (boothowto & RB_ASKNAME) { /* (a) */ 1071 if (dumpdv == NULL) 1072 goto nodumpdev; 1073 } else if (dumpspec != NULL) { /* (b) */ 1074 if (strcmp(dumpspec, "none") == 0 || dumpdev == NODEV) { 1075 /* 1076 * Operator doesn't want a dump device. 1077 * Or looks like they tried to pick a network 1078 * device. Oops. 1079 */ 1080 goto nodumpdev; 1081 } 1082 1083 dumpdevname = devsw_blk2name(major(dumpdev)); 1084 if (dumpdevname == NULL) 1085 goto nodumpdev; 1086 memset(buf, 0, sizeof(buf)); 1087 snprintf(buf, sizeof(buf), "%s%d", dumpdevname, 1088 DISKUNIT(dumpdev)); 1089 1090 dumpdv = finddevice(buf); 1091 if (dumpdv == NULL) { 1092 /* 1093 * Device not configured. 1094 */ 1095 goto nodumpdev; 1096 } 1097 } else { /* (c) */ 1098 if (DEV_USES_PARTITIONS(rootdv) == 0) 1099 goto nodumpdev; 1100 else { 1101 dumpdv = rootdv; 1102 dumpdev = MAKEDISKDEV(major(rootdev), 1103 dumpdv->dv_unit, 1); 1104 } 1105 } 1106 1107 aprint_normal(" dumps on %s%c\n", dumpdv->dv_xname, 1108 DISKPART(dumpdev) + 'a'); 1109 return; 1110 1111 nodumpdev: 1112 dumpdev = NODEV; 1113 aprint_normal("\n"); 1114 } 1115 1116 static struct device * 1117 finddevice(const char *name) 1118 { 1119 struct device *dv; 1120 #if defined(BOOT_FROM_RAID_HOOKS) || defined(BOOT_FROM_MEMORY_HOOKS) 1121 int j; 1122 #endif /* BOOT_FROM_RAID_HOOKS || BOOT_FROM_MEMORY_HOOKS */ 1123 1124 #ifdef BOOT_FROM_RAID_HOOKS 1125 for (j = 0; j < numraid; j++) { 1126 if (strcmp(name, raidrootdev[j].dv_xname) == 0) { 1127 dv = &raidrootdev[j]; 1128 return (dv); 1129 } 1130 } 1131 #endif /* BOOT_FROM_RAID_HOOKS */ 1132 1133 #ifdef BOOT_FROM_MEMORY_HOOKS 1134 for (j = 0; j < NMD; j++) { 1135 if (strcmp(name, fakemdrootdev[j].dv_xname) == 0) { 1136 dv = &fakemdrootdev[j]; 1137 return (dv); 1138 } 1139 } 1140 #endif /* BOOT_FROM_MEMORY_HOOKS */ 1141 1142 for (dv = TAILQ_FIRST(&alldevs); dv != NULL; 1143 dv = TAILQ_NEXT(dv, dv_list)) 1144 if (strcmp(dv->dv_xname, name) == 0) 1145 break; 1146 return (dv); 1147 } 1148 1149 static struct device * 1150 getdisk(char *str, int len, int defpart, dev_t *devp, int isdump) 1151 { 1152 struct device *dv; 1153 #ifdef MEMORY_DISK_HOOKS 1154 int i; 1155 #endif 1156 #ifdef BOOT_FROM_RAID_HOOKS 1157 int j; 1158 #endif 1159 1160 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) { 1161 printf("use one of:"); 1162 #ifdef MEMORY_DISK_HOOKS 1163 if (isdump == 0) 1164 for (i = 0; i < NMD; i++) 1165 printf(" %s[a-%c]", fakemdrootdev[i].dv_xname, 1166 'a' + MAXPARTITIONS - 1); 1167 #endif 1168 #ifdef BOOT_FROM_RAID_HOOKS 1169 if (isdump == 0) 1170 for (j = 0; j < numraid; j++) 1171 printf(" %s[a-%c]", raidrootdev[j].dv_xname, 1172 'a' + MAXPARTITIONS - 1); 1173 #endif 1174 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1175 if (DEV_USES_PARTITIONS(dv)) 1176 printf(" %s[a-%c]", dv->dv_xname, 1177 'a' + MAXPARTITIONS - 1); 1178 else if (device_class(dv) == DV_DISK) 1179 printf(" %s", dv->dv_xname); 1180 if (isdump == 0 && device_class(dv) == DV_IFNET) 1181 printf(" %s", dv->dv_xname); 1182 } 1183 if (isdump) 1184 printf(" none"); 1185 #if defined(DDB) 1186 printf(" ddb"); 1187 #endif 1188 printf(" halt reboot\n"); 1189 } 1190 return (dv); 1191 } 1192 1193 static struct device * 1194 parsedisk(char *str, int len, int defpart, dev_t *devp) 1195 { 1196 struct device *dv; 1197 char *cp, c; 1198 int majdev, part; 1199 #ifdef MEMORY_DISK_HOOKS 1200 int i; 1201 #endif 1202 if (len == 0) 1203 return (NULL); 1204 1205 if (len == 4 && strcmp(str, "halt") == 0) 1206 cpu_reboot(RB_HALT, NULL); 1207 else if (len == 6 && strcmp(str, "reboot") == 0) 1208 cpu_reboot(0, NULL); 1209 #if defined(DDB) 1210 else if (len == 3 && strcmp(str, "ddb") == 0) 1211 console_debugger(); 1212 #endif 1213 1214 cp = str + len - 1; 1215 c = *cp; 1216 if (c >= 'a' && c <= ('a' + MAXPARTITIONS - 1)) { 1217 part = c - 'a'; 1218 *cp = '\0'; 1219 } else 1220 part = defpart; 1221 1222 #ifdef MEMORY_DISK_HOOKS 1223 for (i = 0; i < NMD; i++) 1224 if (strcmp(str, fakemdrootdev[i].dv_xname) == 0) { 1225 dv = &fakemdrootdev[i]; 1226 goto gotdisk; 1227 } 1228 #endif 1229 1230 dv = finddevice(str); 1231 if (dv != NULL) { 1232 if (device_class(dv) == DV_DISK) { 1233 #ifdef MEMORY_DISK_HOOKS 1234 gotdisk: 1235 #endif 1236 majdev = devsw_name2blk(dv->dv_xname, NULL, 0); 1237 if (majdev < 0) 1238 panic("parsedisk"); 1239 if (DEV_USES_PARTITIONS(dv)) 1240 *devp = MAKEDISKDEV(majdev, dv->dv_unit, part); 1241 else 1242 *devp = makedev(majdev, dv->dv_unit); 1243 } 1244 1245 if (device_class(dv) == DV_IFNET) 1246 *devp = NODEV; 1247 } 1248 1249 *cp = c; 1250 return (dv); 1251 } 1252 1253 /* 1254 * snprintf() `bytes' into `buf', reformatting it so that the number, 1255 * plus a possible `x' + suffix extension) fits into len bytes (including 1256 * the terminating NUL). 1257 * Returns the number of bytes stored in buf, or -1 if there was a problem. 1258 * E.g, given a len of 9 and a suffix of `B': 1259 * bytes result 1260 * ----- ------ 1261 * 99999 `99999 B' 1262 * 100000 `97 kB' 1263 * 66715648 `65152 kB' 1264 * 252215296 `240 MB' 1265 */ 1266 int 1267 humanize_number(char *buf, size_t len, uint64_t bytes, const char *suffix, 1268 int divisor) 1269 { 1270 /* prefixes are: (none), kilo, Mega, Giga, Tera, Peta, Exa */ 1271 const char *prefixes; 1272 int r; 1273 uint64_t umax; 1274 size_t i, suffixlen; 1275 1276 if (buf == NULL || suffix == NULL) 1277 return (-1); 1278 if (len > 0) 1279 buf[0] = '\0'; 1280 suffixlen = strlen(suffix); 1281 /* check if enough room for `x y' + suffix + `\0' */ 1282 if (len < 4 + suffixlen) 1283 return (-1); 1284 1285 if (divisor == 1024) { 1286 /* 1287 * binary multiplies 1288 * XXX IEC 60027-2 recommends Ki, Mi, Gi... 1289 */ 1290 prefixes = " KMGTPE"; 1291 } else 1292 prefixes = " kMGTPE"; /* SI for decimal multiplies */ 1293 1294 umax = 1; 1295 for (i = 0; i < len - suffixlen - 3; i++) 1296 umax *= 10; 1297 for (i = 0; bytes >= umax && prefixes[i + 1]; i++) 1298 bytes /= divisor; 1299 1300 r = snprintf(buf, len, "%qu%s%c%s", (unsigned long long)bytes, 1301 i == 0 ? "" : " ", prefixes[i], suffix); 1302 1303 return (r); 1304 } 1305 1306 int 1307 format_bytes(char *buf, size_t len, uint64_t bytes) 1308 { 1309 int rv; 1310 size_t nlen; 1311 1312 rv = humanize_number(buf, len, bytes, "B", 1024); 1313 if (rv != -1) { 1314 /* nuke the trailing ` B' if it exists */ 1315 nlen = strlen(buf) - 2; 1316 if (strcmp(&buf[nlen], " B") == 0) 1317 buf[nlen] = '\0'; 1318 } 1319 return (rv); 1320 } 1321 1322 /* 1323 * Return TRUE if system call tracing is enabled for the specified process. 1324 */ 1325 boolean_t 1326 trace_is_enabled(struct proc *p) 1327 { 1328 #ifdef SYSCALL_DEBUG 1329 return (TRUE); 1330 #endif 1331 #ifdef KTRACE 1332 if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET))) 1333 return (TRUE); 1334 #endif 1335 #ifdef SYSTRACE 1336 if (ISSET(p->p_flag, P_SYSTRACE)) 1337 return (TRUE); 1338 #endif 1339 if (ISSET(p->p_flag, P_SYSCALL)) 1340 return (TRUE); 1341 1342 return (FALSE); 1343 } 1344 1345 /* 1346 * Start trace of particular system call. If process is being traced, 1347 * this routine is called by MD syscall dispatch code just before 1348 * a system call is actually executed. 1349 * MD caller guarantees the passed 'code' is within the supported 1350 * system call number range for emulation the process runs under. 1351 */ 1352 int 1353 trace_enter(struct lwp *l, register_t code, 1354 register_t realcode, const struct sysent *callp, void *args) 1355 { 1356 struct proc *p = l->l_proc; 1357 1358 #ifdef SYSCALL_DEBUG 1359 scdebug_call(l, code, args); 1360 #endif /* SYSCALL_DEBUG */ 1361 1362 #ifdef KTRACE 1363 if (KTRPOINT(p, KTR_SYSCALL)) 1364 ktrsyscall(l, code, realcode, callp, args); 1365 #endif /* KTRACE */ 1366 1367 if ((p->p_flag & (P_SYSCALL|P_TRACED)) == (P_SYSCALL|P_TRACED)) 1368 process_stoptrace(l); 1369 1370 #ifdef SYSTRACE 1371 if (ISSET(p->p_flag, P_SYSTRACE)) 1372 return systrace_enter(p, code, args); 1373 #endif 1374 return 0; 1375 } 1376 1377 /* 1378 * End trace of particular system call. If process is being traced, 1379 * this routine is called by MD syscall dispatch code just after 1380 * a system call finishes. 1381 * MD caller guarantees the passed 'code' is within the supported 1382 * system call number range for emulation the process runs under. 1383 */ 1384 void 1385 trace_exit(struct lwp *l, register_t code, void *args, register_t rval[], 1386 int error) 1387 { 1388 struct proc *p = l->l_proc; 1389 1390 #ifdef SYSCALL_DEBUG 1391 scdebug_ret(l, code, error, rval); 1392 #endif /* SYSCALL_DEBUG */ 1393 1394 #ifdef KTRACE 1395 if (KTRPOINT(p, KTR_SYSRET)) { 1396 KERNEL_PROC_LOCK(l); 1397 ktrsysret(l, code, error, rval); 1398 KERNEL_PROC_UNLOCK(l); 1399 } 1400 #endif /* KTRACE */ 1401 1402 if ((p->p_flag & (P_SYSCALL|P_TRACED)) == (P_SYSCALL|P_TRACED)) 1403 process_stoptrace(l); 1404 1405 #ifdef SYSTRACE 1406 if (ISSET(p->p_flag, P_SYSTRACE)) { 1407 KERNEL_PROC_LOCK(l); 1408 systrace_exit(p, code, args, rval, error); 1409 KERNEL_PROC_UNLOCK(l); 1410 } 1411 #endif 1412 } 1413