1 /* $NetBSD: kern_subr.c,v 1.136 2006/06/11 07:32:18 rjs Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 1999, 2002 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and by Luke Mewburn. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Copyright (c) 1992, 1993 50 * The Regents of the University of California. All rights reserved. 51 * 52 * This software was developed by the Computer Systems Engineering group 53 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 54 * contributed to Berkeley. 55 * 56 * All advertising materials mentioning features or use of this software 57 * must display the following acknowledgement: 58 * This product includes software developed by the University of 59 * California, Lawrence Berkeley Laboratory. 60 * 61 * Redistribution and use in source and binary forms, with or without 62 * modification, are permitted provided that the following conditions 63 * are met: 64 * 1. Redistributions of source code must retain the above copyright 65 * notice, this list of conditions and the following disclaimer. 66 * 2. Redistributions in binary form must reproduce the above copyright 67 * notice, this list of conditions and the following disclaimer in the 68 * documentation and/or other materials provided with the distribution. 69 * 3. Neither the name of the University nor the names of its contributors 70 * may be used to endorse or promote products derived from this software 71 * without specific prior written permission. 72 * 73 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 76 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 83 * SUCH DAMAGE. 84 * 85 * @(#)kern_subr.c 8.4 (Berkeley) 2/14/95 86 */ 87 88 #include <sys/cdefs.h> 89 __KERNEL_RCSID(0, "$NetBSD: kern_subr.c,v 1.136 2006/06/11 07:32:18 rjs Exp $"); 90 91 #include "opt_ddb.h" 92 #include "opt_md.h" 93 #include "opt_syscall_debug.h" 94 #include "opt_ktrace.h" 95 #include "opt_systrace.h" 96 #include "opt_lockdebug.h" 97 98 #include <sys/param.h> 99 #include <sys/systm.h> 100 #include <sys/proc.h> 101 #include <sys/malloc.h> 102 #include <sys/mount.h> 103 #include <sys/device.h> 104 #include <sys/reboot.h> 105 #include <sys/conf.h> 106 #include <sys/disklabel.h> 107 #include <sys/queue.h> 108 #include <sys/systrace.h> 109 #include <sys/ktrace.h> 110 #include <sys/ptrace.h> 111 #include <sys/fcntl.h> 112 113 #include <uvm/uvm_extern.h> 114 115 #include <dev/cons.h> 116 117 #include <net/if.h> 118 119 /* XXX these should eventually move to subr_autoconf.c */ 120 static struct device *finddevice(const char *); 121 static struct device *getdisk(char *, int, int, dev_t *, int); 122 static struct device *parsedisk(char *, int, int, dev_t *); 123 124 /* 125 * A generic linear hook. 126 */ 127 struct hook_desc { 128 LIST_ENTRY(hook_desc) hk_list; 129 void (*hk_fn)(void *); 130 void *hk_arg; 131 }; 132 typedef LIST_HEAD(, hook_desc) hook_list_t; 133 134 MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 135 136 void 137 uio_setup_sysspace(struct uio *uio) 138 { 139 140 uio->uio_vmspace = vmspace_kernel(); 141 } 142 143 int 144 uiomove(void *buf, size_t n, struct uio *uio) 145 { 146 struct vmspace *vm = uio->uio_vmspace; 147 struct iovec *iov; 148 u_int cnt; 149 int error = 0; 150 char *cp = buf; 151 int hold_count; 152 153 hold_count = KERNEL_LOCK_RELEASE_ALL(); 154 155 #ifdef LOCKDEBUG 156 spinlock_switchcheck(); 157 simple_lock_only_held(NULL, "uiomove"); 158 #endif 159 160 #ifdef DIAGNOSTIC 161 if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE) 162 panic("uiomove: mode"); 163 #endif 164 while (n > 0 && uio->uio_resid) { 165 iov = uio->uio_iov; 166 cnt = iov->iov_len; 167 if (cnt == 0) { 168 KASSERT(uio->uio_iovcnt > 0); 169 uio->uio_iov++; 170 uio->uio_iovcnt--; 171 continue; 172 } 173 if (cnt > n) 174 cnt = n; 175 if (!VMSPACE_IS_KERNEL_P(vm)) { 176 if (curcpu()->ci_schedstate.spc_flags & 177 SPCF_SHOULDYIELD) 178 preempt(1); 179 } 180 181 if (uio->uio_rw == UIO_READ) { 182 error = copyout_vmspace(vm, cp, iov->iov_base, 183 cnt); 184 } else { 185 error = copyin_vmspace(vm, iov->iov_base, cp, 186 cnt); 187 } 188 if (error) { 189 break; 190 } 191 iov->iov_base = (caddr_t)iov->iov_base + cnt; 192 iov->iov_len -= cnt; 193 uio->uio_resid -= cnt; 194 uio->uio_offset += cnt; 195 cp += cnt; 196 KDASSERT(cnt <= n); 197 n -= cnt; 198 } 199 KERNEL_LOCK_ACQUIRE_COUNT(hold_count); 200 return (error); 201 } 202 203 /* 204 * Wrapper for uiomove() that validates the arguments against a known-good 205 * kernel buffer. 206 */ 207 int 208 uiomove_frombuf(void *buf, size_t buflen, struct uio *uio) 209 { 210 size_t offset; 211 212 if (uio->uio_offset < 0 || uio->uio_resid < 0 || 213 (offset = uio->uio_offset) != uio->uio_offset) 214 return (EINVAL); 215 if (offset >= buflen) 216 return (0); 217 return (uiomove((char *)buf + offset, buflen - offset, uio)); 218 } 219 220 /* 221 * Give next character to user as result of read. 222 */ 223 int 224 ureadc(int c, struct uio *uio) 225 { 226 struct iovec *iov; 227 228 if (uio->uio_resid <= 0) 229 panic("ureadc: non-positive resid"); 230 again: 231 if (uio->uio_iovcnt <= 0) 232 panic("ureadc: non-positive iovcnt"); 233 iov = uio->uio_iov; 234 if (iov->iov_len <= 0) { 235 uio->uio_iovcnt--; 236 uio->uio_iov++; 237 goto again; 238 } 239 if (!VMSPACE_IS_KERNEL_P(uio->uio_vmspace)) { 240 if (subyte(iov->iov_base, c) < 0) 241 return (EFAULT); 242 } else { 243 *(char *)iov->iov_base = c; 244 } 245 iov->iov_base = (caddr_t)iov->iov_base + 1; 246 iov->iov_len--; 247 uio->uio_resid--; 248 uio->uio_offset++; 249 return (0); 250 } 251 252 /* 253 * Like copyin(), but operates on an arbitrary vmspace. 254 */ 255 int 256 copyin_vmspace(struct vmspace *vm, const void *uaddr, void *kaddr, size_t len) 257 { 258 struct iovec iov; 259 struct uio uio; 260 int error; 261 262 if (len == 0) 263 return (0); 264 265 if (VMSPACE_IS_KERNEL_P(vm)) { 266 return kcopy(uaddr, kaddr, len); 267 } 268 if (__predict_true(vm == curproc->p_vmspace)) { 269 return copyin(uaddr, kaddr, len); 270 } 271 272 iov.iov_base = kaddr; 273 iov.iov_len = len; 274 uio.uio_iov = &iov; 275 uio.uio_iovcnt = 1; 276 uio.uio_offset = (off_t)(intptr_t)uaddr; 277 uio.uio_resid = len; 278 uio.uio_rw = UIO_READ; 279 UIO_SETUP_SYSSPACE(&uio); 280 error = uvm_io(&vm->vm_map, &uio); 281 282 return (error); 283 } 284 285 /* 286 * Like copyout(), but operates on an arbitrary vmspace. 287 */ 288 int 289 copyout_vmspace(struct vmspace *vm, const void *kaddr, void *uaddr, size_t len) 290 { 291 struct iovec iov; 292 struct uio uio; 293 int error; 294 295 if (len == 0) 296 return (0); 297 298 if (VMSPACE_IS_KERNEL_P(vm)) { 299 return kcopy(kaddr, uaddr, len); 300 } 301 if (__predict_true(vm == curproc->p_vmspace)) { 302 return copyout(kaddr, uaddr, len); 303 } 304 305 iov.iov_base = __UNCONST(kaddr); /* XXXUNCONST cast away const */ 306 iov.iov_len = len; 307 uio.uio_iov = &iov; 308 uio.uio_iovcnt = 1; 309 uio.uio_offset = (off_t)(intptr_t)uaddr; 310 uio.uio_resid = len; 311 uio.uio_rw = UIO_WRITE; 312 UIO_SETUP_SYSSPACE(&uio); 313 error = uvm_io(&vm->vm_map, &uio); 314 315 return (error); 316 } 317 318 /* 319 * Like copyin(), but operates on an arbitrary process. 320 */ 321 int 322 copyin_proc(struct proc *p, const void *uaddr, void *kaddr, size_t len) 323 { 324 struct vmspace *vm; 325 int error; 326 327 error = proc_vmspace_getref(p, &vm); 328 if (error) { 329 return error; 330 } 331 error = copyin_vmspace(vm, uaddr, kaddr, len); 332 uvmspace_free(vm); 333 334 return error; 335 } 336 337 /* 338 * Like copyout(), but operates on an arbitrary process. 339 */ 340 int 341 copyout_proc(struct proc *p, const void *kaddr, void *uaddr, size_t len) 342 { 343 struct vmspace *vm; 344 int error; 345 346 error = proc_vmspace_getref(p, &vm); 347 if (error) { 348 return error; 349 } 350 error = copyout_vmspace(vm, kaddr, uaddr, len); 351 uvmspace_free(vm); 352 353 return error; 354 } 355 356 /* 357 * Like copyin(), except it operates on kernel addresses when the FKIOCTL 358 * flag is passed in `ioctlflags' from the ioctl call. 359 */ 360 int 361 ioctl_copyin(int ioctlflags, const void *src, void *dst, size_t len) 362 { 363 if (ioctlflags & FKIOCTL) 364 return kcopy(src, dst, len); 365 return copyin(src, dst, len); 366 } 367 368 /* 369 * Like copyout(), except it operates on kernel addresses when the FKIOCTL 370 * flag is passed in `ioctlflags' from the ioctl call. 371 */ 372 int 373 ioctl_copyout(int ioctlflags, const void *src, void *dst, size_t len) 374 { 375 if (ioctlflags & FKIOCTL) 376 return kcopy(src, dst, len); 377 return copyout(src, dst, len); 378 } 379 380 /* 381 * General routine to allocate a hash table. 382 * Allocate enough memory to hold at least `elements' list-head pointers. 383 * Return a pointer to the allocated space and set *hashmask to a pattern 384 * suitable for masking a value to use as an index into the returned array. 385 */ 386 void * 387 hashinit(u_int elements, enum hashtype htype, struct malloc_type *mtype, 388 int mflags, u_long *hashmask) 389 { 390 u_long hashsize, i; 391 LIST_HEAD(, generic) *hashtbl_list; 392 TAILQ_HEAD(, generic) *hashtbl_tailq; 393 size_t esize; 394 void *p; 395 396 if (elements == 0) 397 panic("hashinit: bad cnt"); 398 for (hashsize = 1; hashsize < elements; hashsize <<= 1) 399 continue; 400 401 switch (htype) { 402 case HASH_LIST: 403 esize = sizeof(*hashtbl_list); 404 break; 405 case HASH_TAILQ: 406 esize = sizeof(*hashtbl_tailq); 407 break; 408 default: 409 #ifdef DIAGNOSTIC 410 panic("hashinit: invalid table type"); 411 #else 412 return NULL; 413 #endif 414 } 415 416 if ((p = malloc(hashsize * esize, mtype, mflags)) == NULL) 417 return (NULL); 418 419 switch (htype) { 420 case HASH_LIST: 421 hashtbl_list = p; 422 for (i = 0; i < hashsize; i++) 423 LIST_INIT(&hashtbl_list[i]); 424 break; 425 case HASH_TAILQ: 426 hashtbl_tailq = p; 427 for (i = 0; i < hashsize; i++) 428 TAILQ_INIT(&hashtbl_tailq[i]); 429 break; 430 } 431 *hashmask = hashsize - 1; 432 return (p); 433 } 434 435 /* 436 * Free memory from hash table previosly allocated via hashinit(). 437 */ 438 void 439 hashdone(void *hashtbl, struct malloc_type *mtype) 440 { 441 442 free(hashtbl, mtype); 443 } 444 445 446 static void * 447 hook_establish(hook_list_t *list, void (*fn)(void *), void *arg) 448 { 449 struct hook_desc *hd; 450 451 hd = malloc(sizeof(*hd), M_DEVBUF, M_NOWAIT); 452 if (hd == NULL) 453 return (NULL); 454 455 hd->hk_fn = fn; 456 hd->hk_arg = arg; 457 LIST_INSERT_HEAD(list, hd, hk_list); 458 459 return (hd); 460 } 461 462 static void 463 hook_disestablish(hook_list_t *list, void *vhook) 464 { 465 #ifdef DIAGNOSTIC 466 struct hook_desc *hd; 467 468 LIST_FOREACH(hd, list, hk_list) { 469 if (hd == vhook) 470 break; 471 } 472 473 if (hd == NULL) 474 panic("hook_disestablish: hook %p not established", vhook); 475 #endif 476 LIST_REMOVE((struct hook_desc *)vhook, hk_list); 477 free(vhook, M_DEVBUF); 478 } 479 480 static void 481 hook_destroy(hook_list_t *list) 482 { 483 struct hook_desc *hd; 484 485 while ((hd = LIST_FIRST(list)) != NULL) { 486 LIST_REMOVE(hd, hk_list); 487 free(hd, M_DEVBUF); 488 } 489 } 490 491 static void 492 hook_proc_run(hook_list_t *list, struct proc *p) 493 { 494 struct hook_desc *hd; 495 496 for (hd = LIST_FIRST(list); hd != NULL; hd = LIST_NEXT(hd, hk_list)) { 497 ((void (*)(struct proc *, void *))*hd->hk_fn)(p, 498 hd->hk_arg); 499 } 500 } 501 502 /* 503 * "Shutdown hook" types, functions, and variables. 504 * 505 * Should be invoked immediately before the 506 * system is halted or rebooted, i.e. after file systems unmounted, 507 * after crash dump done, etc. 508 * 509 * Each shutdown hook is removed from the list before it's run, so that 510 * it won't be run again. 511 */ 512 513 static hook_list_t shutdownhook_list; 514 515 void * 516 shutdownhook_establish(void (*fn)(void *), void *arg) 517 { 518 return hook_establish(&shutdownhook_list, fn, arg); 519 } 520 521 void 522 shutdownhook_disestablish(void *vhook) 523 { 524 hook_disestablish(&shutdownhook_list, vhook); 525 } 526 527 /* 528 * Run shutdown hooks. Should be invoked immediately before the 529 * system is halted or rebooted, i.e. after file systems unmounted, 530 * after crash dump done, etc. 531 * 532 * Each shutdown hook is removed from the list before it's run, so that 533 * it won't be run again. 534 */ 535 void 536 doshutdownhooks(void) 537 { 538 struct hook_desc *dp; 539 540 while ((dp = LIST_FIRST(&shutdownhook_list)) != NULL) { 541 LIST_REMOVE(dp, hk_list); 542 (*dp->hk_fn)(dp->hk_arg); 543 #if 0 544 /* 545 * Don't bother freeing the hook structure,, since we may 546 * be rebooting because of a memory corruption problem, 547 * and this might only make things worse. It doesn't 548 * matter, anyway, since the system is just about to 549 * reboot. 550 */ 551 free(dp, M_DEVBUF); 552 #endif 553 } 554 } 555 556 /* 557 * "Mountroot hook" types, functions, and variables. 558 */ 559 560 static hook_list_t mountroothook_list; 561 562 void * 563 mountroothook_establish(void (*fn)(struct device *), struct device *dev) 564 { 565 return hook_establish(&mountroothook_list, (void (*)(void *))fn, dev); 566 } 567 568 void 569 mountroothook_disestablish(void *vhook) 570 { 571 hook_disestablish(&mountroothook_list, vhook); 572 } 573 574 void 575 mountroothook_destroy(void) 576 { 577 hook_destroy(&mountroothook_list); 578 } 579 580 void 581 domountroothook(void) 582 { 583 struct hook_desc *hd; 584 585 LIST_FOREACH(hd, &mountroothook_list, hk_list) { 586 if (hd->hk_arg == (void *)root_device) { 587 (*hd->hk_fn)(hd->hk_arg); 588 return; 589 } 590 } 591 } 592 593 static hook_list_t exechook_list; 594 595 void * 596 exechook_establish(void (*fn)(struct proc *, void *), void *arg) 597 { 598 return hook_establish(&exechook_list, (void (*)(void *))fn, arg); 599 } 600 601 void 602 exechook_disestablish(void *vhook) 603 { 604 hook_disestablish(&exechook_list, vhook); 605 } 606 607 /* 608 * Run exec hooks. 609 */ 610 void 611 doexechooks(struct proc *p) 612 { 613 hook_proc_run(&exechook_list, p); 614 } 615 616 static hook_list_t exithook_list; 617 618 void * 619 exithook_establish(void (*fn)(struct proc *, void *), void *arg) 620 { 621 return hook_establish(&exithook_list, (void (*)(void *))fn, arg); 622 } 623 624 void 625 exithook_disestablish(void *vhook) 626 { 627 hook_disestablish(&exithook_list, vhook); 628 } 629 630 /* 631 * Run exit hooks. 632 */ 633 void 634 doexithooks(struct proc *p) 635 { 636 hook_proc_run(&exithook_list, p); 637 } 638 639 static hook_list_t forkhook_list; 640 641 void * 642 forkhook_establish(void (*fn)(struct proc *, struct proc *)) 643 { 644 return hook_establish(&forkhook_list, (void (*)(void *))fn, NULL); 645 } 646 647 void 648 forkhook_disestablish(void *vhook) 649 { 650 hook_disestablish(&forkhook_list, vhook); 651 } 652 653 /* 654 * Run fork hooks. 655 */ 656 void 657 doforkhooks(struct proc *p2, struct proc *p1) 658 { 659 struct hook_desc *hd; 660 661 LIST_FOREACH(hd, &forkhook_list, hk_list) { 662 ((void (*)(struct proc *, struct proc *))*hd->hk_fn) 663 (p2, p1); 664 } 665 } 666 667 /* 668 * "Power hook" types, functions, and variables. 669 * The list of power hooks is kept ordered with the last registered hook 670 * first. 671 * When running the hooks on power down the hooks are called in reverse 672 * registration order, when powering up in registration order. 673 */ 674 struct powerhook_desc { 675 CIRCLEQ_ENTRY(powerhook_desc) sfd_list; 676 void (*sfd_fn)(int, void *); 677 void *sfd_arg; 678 }; 679 680 static CIRCLEQ_HEAD(, powerhook_desc) powerhook_list = 681 CIRCLEQ_HEAD_INITIALIZER(powerhook_list); 682 683 void * 684 powerhook_establish(void (*fn)(int, void *), void *arg) 685 { 686 struct powerhook_desc *ndp; 687 688 ndp = (struct powerhook_desc *) 689 malloc(sizeof(*ndp), M_DEVBUF, M_NOWAIT); 690 if (ndp == NULL) 691 return (NULL); 692 693 ndp->sfd_fn = fn; 694 ndp->sfd_arg = arg; 695 CIRCLEQ_INSERT_HEAD(&powerhook_list, ndp, sfd_list); 696 697 return (ndp); 698 } 699 700 void 701 powerhook_disestablish(void *vhook) 702 { 703 #ifdef DIAGNOSTIC 704 struct powerhook_desc *dp; 705 706 CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list) 707 if (dp == vhook) 708 goto found; 709 panic("powerhook_disestablish: hook %p not established", vhook); 710 found: 711 #endif 712 713 CIRCLEQ_REMOVE(&powerhook_list, (struct powerhook_desc *)vhook, 714 sfd_list); 715 free(vhook, M_DEVBUF); 716 } 717 718 /* 719 * Run power hooks. 720 */ 721 void 722 dopowerhooks(int why) 723 { 724 struct powerhook_desc *dp; 725 726 if (why == PWR_RESUME || why == PWR_SOFTRESUME) { 727 CIRCLEQ_FOREACH_REVERSE(dp, &powerhook_list, sfd_list) { 728 (*dp->sfd_fn)(why, dp->sfd_arg); 729 } 730 } else { 731 CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list) { 732 (*dp->sfd_fn)(why, dp->sfd_arg); 733 } 734 } 735 } 736 737 /* 738 * Determine the root device and, if instructed to, the root file system. 739 */ 740 741 #include "md.h" 742 #if NMD == 0 743 #undef MEMORY_DISK_HOOKS 744 #endif 745 746 #ifdef MEMORY_DISK_HOOKS 747 static struct device fakemdrootdev[NMD]; 748 extern struct cfdriver md_cd; 749 #endif 750 751 #ifdef MEMORY_DISK_IS_ROOT 752 #define BOOT_FROM_MEMORY_HOOKS 1 753 #endif 754 755 #include "raid.h" 756 #if NRAID == 1 757 #define BOOT_FROM_RAID_HOOKS 1 758 #endif 759 760 #ifdef BOOT_FROM_RAID_HOOKS 761 extern int numraid; 762 extern struct device *raidrootdev; 763 #endif 764 765 /* 766 * The device and wedge that we booted from. If booted_wedge is NULL, 767 * the we might consult booted_partition. 768 */ 769 struct device *booted_device; 770 struct device *booted_wedge; 771 int booted_partition; 772 773 /* 774 * Use partition letters if it's a disk class but not a wedge. 775 * XXX Check for wedge is kinda gross. 776 */ 777 #define DEV_USES_PARTITIONS(dv) \ 778 (device_class((dv)) == DV_DISK && \ 779 !device_is_a((dv), "dk")) 780 781 void 782 setroot(struct device *bootdv, int bootpartition) 783 { 784 struct device *dv; 785 int len; 786 #ifdef MEMORY_DISK_HOOKS 787 int i; 788 #endif 789 dev_t nrootdev; 790 dev_t ndumpdev = NODEV; 791 char buf[128]; 792 const char *rootdevname; 793 const char *dumpdevname; 794 struct device *rootdv = NULL; /* XXX gcc -Wuninitialized */ 795 struct device *dumpdv = NULL; 796 struct ifnet *ifp; 797 const char *deffsname; 798 struct vfsops *vops; 799 800 #ifdef MEMORY_DISK_HOOKS 801 for (i = 0; i < NMD; i++) { 802 fakemdrootdev[i].dv_class = DV_DISK; 803 fakemdrootdev[i].dv_cfdata = NULL; 804 fakemdrootdev[i].dv_cfdriver = &md_cd; 805 fakemdrootdev[i].dv_unit = i; 806 fakemdrootdev[i].dv_parent = NULL; 807 snprintf(fakemdrootdev[i].dv_xname, 808 sizeof(fakemdrootdev[i].dv_xname), "md%d", i); 809 } 810 #endif /* MEMORY_DISK_HOOKS */ 811 812 #ifdef MEMORY_DISK_IS_ROOT 813 bootdv = &fakemdrootdev[0]; 814 bootpartition = 0; 815 #endif 816 817 /* 818 * If NFS is specified as the file system, and we found 819 * a DV_DISK boot device (or no boot device at all), then 820 * find a reasonable network interface for "rootspec". 821 */ 822 vops = vfs_getopsbyname("nfs"); 823 if (vops != NULL && vops->vfs_mountroot == mountroot && 824 rootspec == NULL && 825 (bootdv == NULL || device_class(bootdv) != DV_IFNET)) { 826 IFNET_FOREACH(ifp) { 827 if ((ifp->if_flags & 828 (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0) 829 break; 830 } 831 if (ifp == NULL) { 832 /* 833 * Can't find a suitable interface; ask the 834 * user. 835 */ 836 boothowto |= RB_ASKNAME; 837 } else { 838 /* 839 * Have a suitable interface; behave as if 840 * the user specified this interface. 841 */ 842 rootspec = (const char *)ifp->if_xname; 843 } 844 } 845 846 /* 847 * If wildcarded root and we the boot device wasn't determined, 848 * ask the user. 849 */ 850 if (rootspec == NULL && bootdv == NULL) 851 boothowto |= RB_ASKNAME; 852 853 top: 854 if (boothowto & RB_ASKNAME) { 855 struct device *defdumpdv; 856 857 for (;;) { 858 printf("root device"); 859 if (bootdv != NULL) { 860 printf(" (default %s", bootdv->dv_xname); 861 if (DEV_USES_PARTITIONS(bootdv)) 862 printf("%c", bootpartition + 'a'); 863 printf(")"); 864 } 865 printf(": "); 866 len = cngetsn(buf, sizeof(buf)); 867 if (len == 0 && bootdv != NULL) { 868 strlcpy(buf, bootdv->dv_xname, sizeof(buf)); 869 len = strlen(buf); 870 } 871 if (len > 0 && buf[len - 1] == '*') { 872 buf[--len] = '\0'; 873 dv = getdisk(buf, len, 1, &nrootdev, 0); 874 if (dv != NULL) { 875 rootdv = dv; 876 break; 877 } 878 } 879 dv = getdisk(buf, len, bootpartition, &nrootdev, 0); 880 if (dv != NULL) { 881 rootdv = dv; 882 break; 883 } 884 } 885 886 /* 887 * Set up the default dump device. If root is on 888 * a network device, there is no default dump 889 * device, since we don't support dumps to the 890 * network. 891 */ 892 if (DEV_USES_PARTITIONS(rootdv) == 0) 893 defdumpdv = NULL; 894 else 895 defdumpdv = rootdv; 896 897 for (;;) { 898 printf("dump device"); 899 if (defdumpdv != NULL) { 900 /* 901 * Note, we know it's a disk if we get here. 902 */ 903 printf(" (default %sb)", defdumpdv->dv_xname); 904 } 905 printf(": "); 906 len = cngetsn(buf, sizeof(buf)); 907 if (len == 0) { 908 if (defdumpdv != NULL) { 909 ndumpdev = MAKEDISKDEV(major(nrootdev), 910 DISKUNIT(nrootdev), 1); 911 } 912 dumpdv = defdumpdv; 913 break; 914 } 915 if (len == 4 && strcmp(buf, "none") == 0) { 916 dumpdv = NULL; 917 break; 918 } 919 dv = getdisk(buf, len, 1, &ndumpdev, 1); 920 if (dv != NULL) { 921 dumpdv = dv; 922 break; 923 } 924 } 925 926 rootdev = nrootdev; 927 dumpdev = ndumpdev; 928 929 for (vops = LIST_FIRST(&vfs_list); vops != NULL; 930 vops = LIST_NEXT(vops, vfs_list)) { 931 if (vops->vfs_mountroot != NULL && 932 vops->vfs_mountroot == mountroot) 933 break; 934 } 935 936 if (vops == NULL) { 937 mountroot = NULL; 938 deffsname = "generic"; 939 } else 940 deffsname = vops->vfs_name; 941 942 for (;;) { 943 printf("file system (default %s): ", deffsname); 944 len = cngetsn(buf, sizeof(buf)); 945 if (len == 0) 946 break; 947 if (len == 4 && strcmp(buf, "halt") == 0) 948 cpu_reboot(RB_HALT, NULL); 949 else if (len == 6 && strcmp(buf, "reboot") == 0) 950 cpu_reboot(0, NULL); 951 #if defined(DDB) 952 else if (len == 3 && strcmp(buf, "ddb") == 0) { 953 console_debugger(); 954 } 955 #endif 956 else if (len == 7 && strcmp(buf, "generic") == 0) { 957 mountroot = NULL; 958 break; 959 } 960 vops = vfs_getopsbyname(buf); 961 if (vops == NULL || vops->vfs_mountroot == NULL) { 962 printf("use one of: generic"); 963 for (vops = LIST_FIRST(&vfs_list); 964 vops != NULL; 965 vops = LIST_NEXT(vops, vfs_list)) { 966 if (vops->vfs_mountroot != NULL) 967 printf(" %s", vops->vfs_name); 968 } 969 #if defined(DDB) 970 printf(" ddb"); 971 #endif 972 printf(" halt reboot\n"); 973 } else { 974 mountroot = vops->vfs_mountroot; 975 break; 976 } 977 } 978 979 } else if (rootspec == NULL) { 980 int majdev; 981 982 /* 983 * Wildcarded root; use the boot device. 984 */ 985 rootdv = bootdv; 986 987 majdev = devsw_name2blk(bootdv->dv_xname, NULL, 0); 988 if (majdev >= 0) { 989 /* 990 * Root is on a disk. `bootpartition' is root, 991 * unless the device does not use partitions. 992 */ 993 if (DEV_USES_PARTITIONS(bootdv)) 994 rootdev = MAKEDISKDEV(majdev, 995 device_unit(bootdv), 996 bootpartition); 997 else 998 rootdev = makedev(majdev, device_unit(bootdv)); 999 } 1000 } else { 1001 1002 /* 1003 * `root on <dev> ...' 1004 */ 1005 1006 /* 1007 * If it's a network interface, we can bail out 1008 * early. 1009 */ 1010 dv = finddevice(rootspec); 1011 if (dv != NULL && device_class(dv) == DV_IFNET) { 1012 rootdv = dv; 1013 goto haveroot; 1014 } 1015 1016 rootdevname = devsw_blk2name(major(rootdev)); 1017 if (rootdevname == NULL) { 1018 printf("unknown device major 0x%x\n", rootdev); 1019 boothowto |= RB_ASKNAME; 1020 goto top; 1021 } 1022 memset(buf, 0, sizeof(buf)); 1023 snprintf(buf, sizeof(buf), "%s%d", rootdevname, 1024 DISKUNIT(rootdev)); 1025 1026 rootdv = finddevice(buf); 1027 if (rootdv == NULL) { 1028 printf("device %s (0x%x) not configured\n", 1029 buf, rootdev); 1030 boothowto |= RB_ASKNAME; 1031 goto top; 1032 } 1033 } 1034 1035 haveroot: 1036 1037 root_device = rootdv; 1038 1039 switch (device_class(rootdv)) { 1040 case DV_IFNET: 1041 aprint_normal("root on %s", rootdv->dv_xname); 1042 break; 1043 1044 case DV_DISK: 1045 aprint_normal("root on %s%c", rootdv->dv_xname, 1046 DISKPART(rootdev) + 'a'); 1047 break; 1048 1049 default: 1050 printf("can't determine root device\n"); 1051 boothowto |= RB_ASKNAME; 1052 goto top; 1053 } 1054 1055 /* 1056 * Now configure the dump device. 1057 * 1058 * If we haven't figured out the dump device, do so, with 1059 * the following rules: 1060 * 1061 * (a) We already know dumpdv in the RB_ASKNAME case. 1062 * 1063 * (b) If dumpspec is set, try to use it. If the device 1064 * is not available, punt. 1065 * 1066 * (c) If dumpspec is not set, the dump device is 1067 * wildcarded or unspecified. If the root device 1068 * is DV_IFNET, punt. Otherwise, use partition b 1069 * of the root device. 1070 */ 1071 1072 if (boothowto & RB_ASKNAME) { /* (a) */ 1073 if (dumpdv == NULL) 1074 goto nodumpdev; 1075 } else if (dumpspec != NULL) { /* (b) */ 1076 if (strcmp(dumpspec, "none") == 0 || dumpdev == NODEV) { 1077 /* 1078 * Operator doesn't want a dump device. 1079 * Or looks like they tried to pick a network 1080 * device. Oops. 1081 */ 1082 goto nodumpdev; 1083 } 1084 1085 dumpdevname = devsw_blk2name(major(dumpdev)); 1086 if (dumpdevname == NULL) 1087 goto nodumpdev; 1088 memset(buf, 0, sizeof(buf)); 1089 snprintf(buf, sizeof(buf), "%s%d", dumpdevname, 1090 DISKUNIT(dumpdev)); 1091 1092 dumpdv = finddevice(buf); 1093 if (dumpdv == NULL) { 1094 /* 1095 * Device not configured. 1096 */ 1097 goto nodumpdev; 1098 } 1099 } else { /* (c) */ 1100 if (DEV_USES_PARTITIONS(rootdv) == 0) 1101 goto nodumpdev; 1102 else { 1103 dumpdv = rootdv; 1104 dumpdev = MAKEDISKDEV(major(rootdev), 1105 device_unit(dumpdv), 1); 1106 } 1107 } 1108 1109 aprint_normal(" dumps on %s%c\n", dumpdv->dv_xname, 1110 DISKPART(dumpdev) + 'a'); 1111 return; 1112 1113 nodumpdev: 1114 dumpdev = NODEV; 1115 aprint_normal("\n"); 1116 } 1117 1118 static struct device * 1119 finddevice(const char *name) 1120 { 1121 struct device *dv; 1122 #if defined(BOOT_FROM_RAID_HOOKS) || defined(BOOT_FROM_MEMORY_HOOKS) 1123 int j; 1124 #endif /* BOOT_FROM_RAID_HOOKS || BOOT_FROM_MEMORY_HOOKS */ 1125 1126 #ifdef BOOT_FROM_RAID_HOOKS 1127 for (j = 0; j < numraid; j++) { 1128 if (strcmp(name, raidrootdev[j].dv_xname) == 0) { 1129 dv = &raidrootdev[j]; 1130 return (dv); 1131 } 1132 } 1133 #endif /* BOOT_FROM_RAID_HOOKS */ 1134 1135 #ifdef BOOT_FROM_MEMORY_HOOKS 1136 for (j = 0; j < NMD; j++) { 1137 if (strcmp(name, fakemdrootdev[j].dv_xname) == 0) { 1138 dv = &fakemdrootdev[j]; 1139 return (dv); 1140 } 1141 } 1142 #endif /* BOOT_FROM_MEMORY_HOOKS */ 1143 1144 for (dv = TAILQ_FIRST(&alldevs); dv != NULL; 1145 dv = TAILQ_NEXT(dv, dv_list)) 1146 if (strcmp(dv->dv_xname, name) == 0) 1147 break; 1148 return (dv); 1149 } 1150 1151 static struct device * 1152 getdisk(char *str, int len, int defpart, dev_t *devp, int isdump) 1153 { 1154 struct device *dv; 1155 #ifdef MEMORY_DISK_HOOKS 1156 int i; 1157 #endif 1158 #ifdef BOOT_FROM_RAID_HOOKS 1159 int j; 1160 #endif 1161 1162 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) { 1163 printf("use one of:"); 1164 #ifdef MEMORY_DISK_HOOKS 1165 if (isdump == 0) 1166 for (i = 0; i < NMD; i++) 1167 printf(" %s[a-%c]", fakemdrootdev[i].dv_xname, 1168 'a' + MAXPARTITIONS - 1); 1169 #endif 1170 #ifdef BOOT_FROM_RAID_HOOKS 1171 if (isdump == 0) 1172 for (j = 0; j < numraid; j++) 1173 printf(" %s[a-%c]", raidrootdev[j].dv_xname, 1174 'a' + MAXPARTITIONS - 1); 1175 #endif 1176 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1177 if (DEV_USES_PARTITIONS(dv)) 1178 printf(" %s[a-%c]", dv->dv_xname, 1179 'a' + MAXPARTITIONS - 1); 1180 else if (device_class(dv) == DV_DISK) 1181 printf(" %s", dv->dv_xname); 1182 if (isdump == 0 && device_class(dv) == DV_IFNET) 1183 printf(" %s", dv->dv_xname); 1184 } 1185 if (isdump) 1186 printf(" none"); 1187 #if defined(DDB) 1188 printf(" ddb"); 1189 #endif 1190 printf(" halt reboot\n"); 1191 } 1192 return (dv); 1193 } 1194 1195 static struct device * 1196 parsedisk(char *str, int len, int defpart, dev_t *devp) 1197 { 1198 struct device *dv; 1199 char *cp, c; 1200 int majdev, part; 1201 #ifdef MEMORY_DISK_HOOKS 1202 int i; 1203 #endif 1204 if (len == 0) 1205 return (NULL); 1206 1207 if (len == 4 && strcmp(str, "halt") == 0) 1208 cpu_reboot(RB_HALT, NULL); 1209 else if (len == 6 && strcmp(str, "reboot") == 0) 1210 cpu_reboot(0, NULL); 1211 #if defined(DDB) 1212 else if (len == 3 && strcmp(str, "ddb") == 0) 1213 console_debugger(); 1214 #endif 1215 1216 cp = str + len - 1; 1217 c = *cp; 1218 if (c >= 'a' && c <= ('a' + MAXPARTITIONS - 1)) { 1219 part = c - 'a'; 1220 *cp = '\0'; 1221 } else 1222 part = defpart; 1223 1224 #ifdef MEMORY_DISK_HOOKS 1225 for (i = 0; i < NMD; i++) 1226 if (strcmp(str, fakemdrootdev[i].dv_xname) == 0) { 1227 dv = &fakemdrootdev[i]; 1228 goto gotdisk; 1229 } 1230 #endif 1231 1232 dv = finddevice(str); 1233 if (dv != NULL) { 1234 if (device_class(dv) == DV_DISK) { 1235 #ifdef MEMORY_DISK_HOOKS 1236 gotdisk: 1237 #endif 1238 majdev = devsw_name2blk(dv->dv_xname, NULL, 0); 1239 if (majdev < 0) 1240 panic("parsedisk"); 1241 if (DEV_USES_PARTITIONS(dv)) 1242 *devp = MAKEDISKDEV(majdev, device_unit(dv), 1243 part); 1244 else 1245 *devp = makedev(majdev, device_unit(dv)); 1246 } 1247 1248 if (device_class(dv) == DV_IFNET) 1249 *devp = NODEV; 1250 } 1251 1252 *cp = c; 1253 return (dv); 1254 } 1255 1256 /* 1257 * snprintf() `bytes' into `buf', reformatting it so that the number, 1258 * plus a possible `x' + suffix extension) fits into len bytes (including 1259 * the terminating NUL). 1260 * Returns the number of bytes stored in buf, or -1 if there was a problem. 1261 * E.g, given a len of 9 and a suffix of `B': 1262 * bytes result 1263 * ----- ------ 1264 * 99999 `99999 B' 1265 * 100000 `97 kB' 1266 * 66715648 `65152 kB' 1267 * 252215296 `240 MB' 1268 */ 1269 int 1270 humanize_number(char *buf, size_t len, uint64_t bytes, const char *suffix, 1271 int divisor) 1272 { 1273 /* prefixes are: (none), kilo, Mega, Giga, Tera, Peta, Exa */ 1274 const char *prefixes; 1275 int r; 1276 uint64_t umax; 1277 size_t i, suffixlen; 1278 1279 if (buf == NULL || suffix == NULL) 1280 return (-1); 1281 if (len > 0) 1282 buf[0] = '\0'; 1283 suffixlen = strlen(suffix); 1284 /* check if enough room for `x y' + suffix + `\0' */ 1285 if (len < 4 + suffixlen) 1286 return (-1); 1287 1288 if (divisor == 1024) { 1289 /* 1290 * binary multiplies 1291 * XXX IEC 60027-2 recommends Ki, Mi, Gi... 1292 */ 1293 prefixes = " KMGTPE"; 1294 } else 1295 prefixes = " kMGTPE"; /* SI for decimal multiplies */ 1296 1297 umax = 1; 1298 for (i = 0; i < len - suffixlen - 3; i++) 1299 umax *= 10; 1300 for (i = 0; bytes >= umax && prefixes[i + 1]; i++) 1301 bytes /= divisor; 1302 1303 r = snprintf(buf, len, "%qu%s%c%s", (unsigned long long)bytes, 1304 i == 0 ? "" : " ", prefixes[i], suffix); 1305 1306 return (r); 1307 } 1308 1309 int 1310 format_bytes(char *buf, size_t len, uint64_t bytes) 1311 { 1312 int rv; 1313 size_t nlen; 1314 1315 rv = humanize_number(buf, len, bytes, "B", 1024); 1316 if (rv != -1) { 1317 /* nuke the trailing ` B' if it exists */ 1318 nlen = strlen(buf) - 2; 1319 if (strcmp(&buf[nlen], " B") == 0) 1320 buf[nlen] = '\0'; 1321 } 1322 return (rv); 1323 } 1324 1325 /* 1326 * Return TRUE if system call tracing is enabled for the specified process. 1327 */ 1328 boolean_t 1329 trace_is_enabled(struct proc *p) 1330 { 1331 #ifdef SYSCALL_DEBUG 1332 return (TRUE); 1333 #endif 1334 #ifdef KTRACE 1335 if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET))) 1336 return (TRUE); 1337 #endif 1338 #ifdef SYSTRACE 1339 if (ISSET(p->p_flag, P_SYSTRACE)) 1340 return (TRUE); 1341 #endif 1342 if (ISSET(p->p_flag, P_SYSCALL)) 1343 return (TRUE); 1344 1345 return (FALSE); 1346 } 1347 1348 /* 1349 * Start trace of particular system call. If process is being traced, 1350 * this routine is called by MD syscall dispatch code just before 1351 * a system call is actually executed. 1352 * MD caller guarantees the passed 'code' is within the supported 1353 * system call number range for emulation the process runs under. 1354 */ 1355 int 1356 trace_enter(struct lwp *l, register_t code, 1357 register_t realcode, const struct sysent *callp, void *args) 1358 { 1359 struct proc *p = l->l_proc; 1360 1361 #ifdef SYSCALL_DEBUG 1362 scdebug_call(l, code, args); 1363 #endif /* SYSCALL_DEBUG */ 1364 1365 #ifdef KTRACE 1366 if (KTRPOINT(p, KTR_SYSCALL)) 1367 ktrsyscall(l, code, realcode, callp, args); 1368 #endif /* KTRACE */ 1369 1370 if ((p->p_flag & (P_SYSCALL|P_TRACED)) == (P_SYSCALL|P_TRACED)) 1371 process_stoptrace(l); 1372 1373 #ifdef SYSTRACE 1374 if (ISSET(p->p_flag, P_SYSTRACE)) 1375 return systrace_enter(p, code, args); 1376 #endif 1377 return 0; 1378 } 1379 1380 /* 1381 * End trace of particular system call. If process is being traced, 1382 * this routine is called by MD syscall dispatch code just after 1383 * a system call finishes. 1384 * MD caller guarantees the passed 'code' is within the supported 1385 * system call number range for emulation the process runs under. 1386 */ 1387 void 1388 trace_exit(struct lwp *l, register_t code, void *args, register_t rval[], 1389 int error) 1390 { 1391 struct proc *p = l->l_proc; 1392 1393 #ifdef SYSCALL_DEBUG 1394 scdebug_ret(l, code, error, rval); 1395 #endif /* SYSCALL_DEBUG */ 1396 1397 #ifdef KTRACE 1398 if (KTRPOINT(p, KTR_SYSRET)) { 1399 KERNEL_PROC_LOCK(l); 1400 ktrsysret(l, code, error, rval); 1401 KERNEL_PROC_UNLOCK(l); 1402 } 1403 #endif /* KTRACE */ 1404 1405 if ((p->p_flag & (P_SYSCALL|P_TRACED)) == (P_SYSCALL|P_TRACED)) 1406 process_stoptrace(l); 1407 1408 #ifdef SYSTRACE 1409 if (ISSET(p->p_flag, P_SYSTRACE)) { 1410 KERNEL_PROC_LOCK(l); 1411 systrace_exit(p, code, args, rval, error); 1412 KERNEL_PROC_UNLOCK(l); 1413 } 1414 #endif 1415 } 1416