1 /* $NetBSD: kern_subr.c,v 1.117 2005/06/23 23:15:12 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 1999, 2002 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and by Luke Mewburn. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Copyright (c) 1992, 1993 50 * The Regents of the University of California. All rights reserved. 51 * 52 * This software was developed by the Computer Systems Engineering group 53 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 54 * contributed to Berkeley. 55 * 56 * All advertising materials mentioning features or use of this software 57 * must display the following acknowledgement: 58 * This product includes software developed by the University of 59 * California, Lawrence Berkeley Laboratory. 60 * 61 * Redistribution and use in source and binary forms, with or without 62 * modification, are permitted provided that the following conditions 63 * are met: 64 * 1. Redistributions of source code must retain the above copyright 65 * notice, this list of conditions and the following disclaimer. 66 * 2. Redistributions in binary form must reproduce the above copyright 67 * notice, this list of conditions and the following disclaimer in the 68 * documentation and/or other materials provided with the distribution. 69 * 3. Neither the name of the University nor the names of its contributors 70 * may be used to endorse or promote products derived from this software 71 * without specific prior written permission. 72 * 73 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 76 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 83 * SUCH DAMAGE. 84 * 85 * @(#)kern_subr.c 8.4 (Berkeley) 2/14/95 86 */ 87 88 #include <sys/cdefs.h> 89 __KERNEL_RCSID(0, "$NetBSD: kern_subr.c,v 1.117 2005/06/23 23:15:12 thorpej Exp $"); 90 91 #include "opt_ddb.h" 92 #include "opt_md.h" 93 #include "opt_syscall_debug.h" 94 #include "opt_ktrace.h" 95 #include "opt_systrace.h" 96 97 #include <sys/param.h> 98 #include <sys/systm.h> 99 #include <sys/proc.h> 100 #include <sys/malloc.h> 101 #include <sys/mount.h> 102 #include <sys/device.h> 103 #include <sys/reboot.h> 104 #include <sys/conf.h> 105 #include <sys/disklabel.h> 106 #include <sys/queue.h> 107 #include <sys/systrace.h> 108 #include <sys/ktrace.h> 109 110 #include <uvm/uvm_extern.h> 111 112 #include <dev/cons.h> 113 114 #include <net/if.h> 115 116 /* XXX these should eventually move to subr_autoconf.c */ 117 static struct device *finddevice(const char *); 118 static struct device *getdisk(char *, int, int, dev_t *, int); 119 static struct device *parsedisk(char *, int, int, dev_t *); 120 121 /* 122 * A generic linear hook. 123 */ 124 struct hook_desc { 125 LIST_ENTRY(hook_desc) hk_list; 126 void (*hk_fn)(void *); 127 void *hk_arg; 128 }; 129 typedef LIST_HEAD(, hook_desc) hook_list_t; 130 131 MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 132 133 int 134 uiomove(void *buf, size_t n, struct uio *uio) 135 { 136 struct iovec *iov; 137 u_int cnt; 138 int error = 0; 139 char *cp = buf; 140 struct proc *p = uio->uio_procp; 141 int hold_count; 142 143 hold_count = KERNEL_LOCK_RELEASE_ALL(); 144 145 #if defined(LOCKDEBUG) || defined(DIAGNOSTIC) 146 spinlock_switchcheck(); 147 #endif 148 #ifdef LOCKDEBUG 149 simple_lock_only_held(NULL, "uiomove"); 150 #endif 151 152 #ifdef DIAGNOSTIC 153 if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE) 154 panic("uiomove: mode"); 155 #endif 156 while (n > 0 && uio->uio_resid) { 157 iov = uio->uio_iov; 158 cnt = iov->iov_len; 159 if (cnt == 0) { 160 KASSERT(uio->uio_iovcnt > 0); 161 uio->uio_iov++; 162 uio->uio_iovcnt--; 163 continue; 164 } 165 if (cnt > n) 166 cnt = n; 167 switch (uio->uio_segflg) { 168 169 case UIO_USERSPACE: 170 if (curcpu()->ci_schedstate.spc_flags & 171 SPCF_SHOULDYIELD) 172 preempt(1); 173 if (__predict_true(p == curproc)) { 174 if (uio->uio_rw == UIO_READ) 175 error = copyout(cp, iov->iov_base, cnt); 176 else 177 error = copyin(iov->iov_base, cp, cnt); 178 } else { 179 if (uio->uio_rw == UIO_READ) 180 error = copyout_proc(p, cp, 181 iov->iov_base, cnt); 182 else 183 error = copyin_proc(p, iov->iov_base, 184 cp, cnt); 185 } 186 if (error) 187 goto out; 188 break; 189 190 case UIO_SYSSPACE: 191 if (uio->uio_rw == UIO_READ) 192 error = kcopy(cp, iov->iov_base, cnt); 193 else 194 error = kcopy(iov->iov_base, cp, cnt); 195 if (error) 196 goto out; 197 break; 198 } 199 iov->iov_base = (caddr_t)iov->iov_base + cnt; 200 iov->iov_len -= cnt; 201 uio->uio_resid -= cnt; 202 uio->uio_offset += cnt; 203 cp += cnt; 204 KDASSERT(cnt <= n); 205 n -= cnt; 206 } 207 out: 208 KERNEL_LOCK_ACQUIRE_COUNT(hold_count); 209 return (error); 210 } 211 212 /* 213 * Wrapper for uiomove() that validates the arguments against a known-good 214 * kernel buffer. 215 */ 216 int 217 uiomove_frombuf(void *buf, size_t buflen, struct uio *uio) 218 { 219 size_t offset; 220 221 if (uio->uio_offset < 0 || uio->uio_resid < 0 || 222 (offset = uio->uio_offset) != uio->uio_offset) 223 return (EINVAL); 224 if (offset >= buflen) 225 return (0); 226 return (uiomove((char *)buf + offset, buflen - offset, uio)); 227 } 228 229 /* 230 * Give next character to user as result of read. 231 */ 232 int 233 ureadc(int c, struct uio *uio) 234 { 235 struct iovec *iov; 236 237 if (uio->uio_resid <= 0) 238 panic("ureadc: non-positive resid"); 239 again: 240 if (uio->uio_iovcnt <= 0) 241 panic("ureadc: non-positive iovcnt"); 242 iov = uio->uio_iov; 243 if (iov->iov_len <= 0) { 244 uio->uio_iovcnt--; 245 uio->uio_iov++; 246 goto again; 247 } 248 switch (uio->uio_segflg) { 249 250 case UIO_USERSPACE: 251 if (subyte(iov->iov_base, c) < 0) 252 return (EFAULT); 253 break; 254 255 case UIO_SYSSPACE: 256 *(char *)iov->iov_base = c; 257 break; 258 } 259 iov->iov_base = (caddr_t)iov->iov_base + 1; 260 iov->iov_len--; 261 uio->uio_resid--; 262 uio->uio_offset++; 263 return (0); 264 } 265 266 /* 267 * Like copyin(), but operates on an arbitrary process. 268 */ 269 int 270 copyin_proc(struct proc *p, const void *uaddr, void *kaddr, size_t len) 271 { 272 struct iovec iov; 273 struct uio uio; 274 int error; 275 276 if (len == 0) 277 return (0); 278 279 iov.iov_base = kaddr; 280 iov.iov_len = len; 281 uio.uio_iov = &iov; 282 uio.uio_iovcnt = 1; 283 uio.uio_offset = (off_t)(intptr_t)uaddr; 284 uio.uio_resid = len; 285 uio.uio_segflg = UIO_SYSSPACE; 286 uio.uio_rw = UIO_READ; 287 uio.uio_procp = NULL; 288 289 /* XXXCDC: how should locking work here? */ 290 if ((p->p_flag & P_WEXIT) || (p->p_vmspace->vm_refcnt < 1)) 291 return (EFAULT); 292 p->p_vmspace->vm_refcnt++; /* XXX */ 293 error = uvm_io(&p->p_vmspace->vm_map, &uio); 294 uvmspace_free(p->p_vmspace); 295 296 return (error); 297 } 298 299 /* 300 * Like copyout(), but operates on an arbitrary process. 301 */ 302 int 303 copyout_proc(struct proc *p, const void *kaddr, void *uaddr, size_t len) 304 { 305 struct iovec iov; 306 struct uio uio; 307 int error; 308 309 if (len == 0) 310 return (0); 311 312 iov.iov_base = __UNCONST(kaddr); /* XXXUNCONST cast away const */ 313 iov.iov_len = len; 314 uio.uio_iov = &iov; 315 uio.uio_iovcnt = 1; 316 uio.uio_offset = (off_t)(intptr_t)uaddr; 317 uio.uio_resid = len; 318 uio.uio_segflg = UIO_SYSSPACE; 319 uio.uio_rw = UIO_WRITE; 320 uio.uio_procp = NULL; 321 322 /* XXXCDC: how should locking work here? */ 323 if ((p->p_flag & P_WEXIT) || (p->p_vmspace->vm_refcnt < 1)) 324 return (EFAULT); 325 p->p_vmspace->vm_refcnt++; /* XXX */ 326 error = uvm_io(&p->p_vmspace->vm_map, &uio); 327 uvmspace_free(p->p_vmspace); 328 329 return (error); 330 } 331 332 /* 333 * General routine to allocate a hash table. 334 * Allocate enough memory to hold at least `elements' list-head pointers. 335 * Return a pointer to the allocated space and set *hashmask to a pattern 336 * suitable for masking a value to use as an index into the returned array. 337 */ 338 void * 339 hashinit(u_int elements, enum hashtype htype, struct malloc_type *mtype, 340 int mflags, u_long *hashmask) 341 { 342 u_long hashsize, i; 343 LIST_HEAD(, generic) *hashtbl_list; 344 TAILQ_HEAD(, generic) *hashtbl_tailq; 345 size_t esize; 346 void *p; 347 348 if (elements == 0) 349 panic("hashinit: bad cnt"); 350 for (hashsize = 1; hashsize < elements; hashsize <<= 1) 351 continue; 352 353 switch (htype) { 354 case HASH_LIST: 355 esize = sizeof(*hashtbl_list); 356 break; 357 case HASH_TAILQ: 358 esize = sizeof(*hashtbl_tailq); 359 break; 360 default: 361 #ifdef DIAGNOSTIC 362 panic("hashinit: invalid table type"); 363 #else 364 return NULL; 365 #endif 366 } 367 368 if ((p = malloc(hashsize * esize, mtype, mflags)) == NULL) 369 return (NULL); 370 371 switch (htype) { 372 case HASH_LIST: 373 hashtbl_list = p; 374 for (i = 0; i < hashsize; i++) 375 LIST_INIT(&hashtbl_list[i]); 376 break; 377 case HASH_TAILQ: 378 hashtbl_tailq = p; 379 for (i = 0; i < hashsize; i++) 380 TAILQ_INIT(&hashtbl_tailq[i]); 381 break; 382 } 383 *hashmask = hashsize - 1; 384 return (p); 385 } 386 387 /* 388 * Free memory from hash table previosly allocated via hashinit(). 389 */ 390 void 391 hashdone(void *hashtbl, struct malloc_type *mtype) 392 { 393 394 free(hashtbl, mtype); 395 } 396 397 398 static void * 399 hook_establish(hook_list_t *list, void (*fn)(void *), void *arg) 400 { 401 struct hook_desc *hd; 402 403 hd = malloc(sizeof(*hd), M_DEVBUF, M_NOWAIT); 404 if (hd == NULL) 405 return (NULL); 406 407 hd->hk_fn = fn; 408 hd->hk_arg = arg; 409 LIST_INSERT_HEAD(list, hd, hk_list); 410 411 return (hd); 412 } 413 414 static void 415 hook_disestablish(hook_list_t *list, void *vhook) 416 { 417 #ifdef DIAGNOSTIC 418 struct hook_desc *hd; 419 420 LIST_FOREACH(hd, list, hk_list) { 421 if (hd == vhook) 422 break; 423 } 424 425 if (hd == NULL) 426 panic("hook_disestablish: hook %p not established", vhook); 427 #endif 428 LIST_REMOVE((struct hook_desc *)vhook, hk_list); 429 free(vhook, M_DEVBUF); 430 } 431 432 static void 433 hook_destroy(hook_list_t *list) 434 { 435 struct hook_desc *hd; 436 437 while ((hd = LIST_FIRST(list)) != NULL) { 438 LIST_REMOVE(hd, hk_list); 439 free(hd, M_DEVBUF); 440 } 441 } 442 443 static void 444 hook_proc_run(hook_list_t *list, struct proc *p) 445 { 446 struct hook_desc *hd; 447 448 for (hd = LIST_FIRST(list); hd != NULL; hd = LIST_NEXT(hd, hk_list)) { 449 ((void (*)(struct proc *, void *))*hd->hk_fn)(p, 450 hd->hk_arg); 451 } 452 } 453 454 /* 455 * "Shutdown hook" types, functions, and variables. 456 * 457 * Should be invoked immediately before the 458 * system is halted or rebooted, i.e. after file systems unmounted, 459 * after crash dump done, etc. 460 * 461 * Each shutdown hook is removed from the list before it's run, so that 462 * it won't be run again. 463 */ 464 465 static hook_list_t shutdownhook_list; 466 467 void * 468 shutdownhook_establish(void (*fn)(void *), void *arg) 469 { 470 return hook_establish(&shutdownhook_list, fn, arg); 471 } 472 473 void 474 shutdownhook_disestablish(void *vhook) 475 { 476 hook_disestablish(&shutdownhook_list, vhook); 477 } 478 479 /* 480 * Run shutdown hooks. Should be invoked immediately before the 481 * system is halted or rebooted, i.e. after file systems unmounted, 482 * after crash dump done, etc. 483 * 484 * Each shutdown hook is removed from the list before it's run, so that 485 * it won't be run again. 486 */ 487 void 488 doshutdownhooks(void) 489 { 490 struct hook_desc *dp; 491 492 while ((dp = LIST_FIRST(&shutdownhook_list)) != NULL) { 493 LIST_REMOVE(dp, hk_list); 494 (*dp->hk_fn)(dp->hk_arg); 495 #if 0 496 /* 497 * Don't bother freeing the hook structure,, since we may 498 * be rebooting because of a memory corruption problem, 499 * and this might only make things worse. It doesn't 500 * matter, anyway, since the system is just about to 501 * reboot. 502 */ 503 free(dp, M_DEVBUF); 504 #endif 505 } 506 } 507 508 /* 509 * "Mountroot hook" types, functions, and variables. 510 */ 511 512 static hook_list_t mountroothook_list; 513 514 void * 515 mountroothook_establish(void (*fn)(struct device *), struct device *dev) 516 { 517 return hook_establish(&mountroothook_list, (void (*)(void *))fn, dev); 518 } 519 520 void 521 mountroothook_disestablish(void *vhook) 522 { 523 hook_disestablish(&mountroothook_list, vhook); 524 } 525 526 void 527 mountroothook_destroy(void) 528 { 529 hook_destroy(&mountroothook_list); 530 } 531 532 void 533 domountroothook(void) 534 { 535 struct hook_desc *hd; 536 537 LIST_FOREACH(hd, &mountroothook_list, hk_list) { 538 if (hd->hk_arg == (void *)root_device) { 539 (*hd->hk_fn)(hd->hk_arg); 540 return; 541 } 542 } 543 } 544 545 static hook_list_t exechook_list; 546 547 void * 548 exechook_establish(void (*fn)(struct proc *, void *), void *arg) 549 { 550 return hook_establish(&exechook_list, (void (*)(void *))fn, arg); 551 } 552 553 void 554 exechook_disestablish(void *vhook) 555 { 556 hook_disestablish(&exechook_list, vhook); 557 } 558 559 /* 560 * Run exec hooks. 561 */ 562 void 563 doexechooks(struct proc *p) 564 { 565 hook_proc_run(&exechook_list, p); 566 } 567 568 static hook_list_t exithook_list; 569 570 void * 571 exithook_establish(void (*fn)(struct proc *, void *), void *arg) 572 { 573 return hook_establish(&exithook_list, (void (*)(void *))fn, arg); 574 } 575 576 void 577 exithook_disestablish(void *vhook) 578 { 579 hook_disestablish(&exithook_list, vhook); 580 } 581 582 /* 583 * Run exit hooks. 584 */ 585 void 586 doexithooks(struct proc *p) 587 { 588 hook_proc_run(&exithook_list, p); 589 } 590 591 static hook_list_t forkhook_list; 592 593 void * 594 forkhook_establish(void (*fn)(struct proc *, struct proc *)) 595 { 596 return hook_establish(&forkhook_list, (void (*)(void *))fn, NULL); 597 } 598 599 void 600 forkhook_disestablish(void *vhook) 601 { 602 hook_disestablish(&forkhook_list, vhook); 603 } 604 605 /* 606 * Run fork hooks. 607 */ 608 void 609 doforkhooks(struct proc *p2, struct proc *p1) 610 { 611 struct hook_desc *hd; 612 613 LIST_FOREACH(hd, &forkhook_list, hk_list) { 614 ((void (*)(struct proc *, struct proc *))*hd->hk_fn) 615 (p2, p1); 616 } 617 } 618 619 /* 620 * "Power hook" types, functions, and variables. 621 * The list of power hooks is kept ordered with the last registered hook 622 * first. 623 * When running the hooks on power down the hooks are called in reverse 624 * registration order, when powering up in registration order. 625 */ 626 struct powerhook_desc { 627 CIRCLEQ_ENTRY(powerhook_desc) sfd_list; 628 void (*sfd_fn)(int, void *); 629 void *sfd_arg; 630 }; 631 632 static CIRCLEQ_HEAD(, powerhook_desc) powerhook_list = 633 CIRCLEQ_HEAD_INITIALIZER(powerhook_list); 634 635 void * 636 powerhook_establish(void (*fn)(int, void *), void *arg) 637 { 638 struct powerhook_desc *ndp; 639 640 ndp = (struct powerhook_desc *) 641 malloc(sizeof(*ndp), M_DEVBUF, M_NOWAIT); 642 if (ndp == NULL) 643 return (NULL); 644 645 ndp->sfd_fn = fn; 646 ndp->sfd_arg = arg; 647 CIRCLEQ_INSERT_HEAD(&powerhook_list, ndp, sfd_list); 648 649 return (ndp); 650 } 651 652 void 653 powerhook_disestablish(void *vhook) 654 { 655 #ifdef DIAGNOSTIC 656 struct powerhook_desc *dp; 657 658 CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list) 659 if (dp == vhook) 660 goto found; 661 panic("powerhook_disestablish: hook %p not established", vhook); 662 found: 663 #endif 664 665 CIRCLEQ_REMOVE(&powerhook_list, (struct powerhook_desc *)vhook, 666 sfd_list); 667 free(vhook, M_DEVBUF); 668 } 669 670 /* 671 * Run power hooks. 672 */ 673 void 674 dopowerhooks(int why) 675 { 676 struct powerhook_desc *dp; 677 678 if (why == PWR_RESUME || why == PWR_SOFTRESUME) { 679 CIRCLEQ_FOREACH_REVERSE(dp, &powerhook_list, sfd_list) { 680 (*dp->sfd_fn)(why, dp->sfd_arg); 681 } 682 } else { 683 CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list) { 684 (*dp->sfd_fn)(why, dp->sfd_arg); 685 } 686 } 687 } 688 689 /* 690 * Determine the root device and, if instructed to, the root file system. 691 */ 692 693 #include "md.h" 694 #if NMD == 0 695 #undef MEMORY_DISK_HOOKS 696 #endif 697 698 #ifdef MEMORY_DISK_HOOKS 699 static struct device fakemdrootdev[NMD]; 700 #endif 701 702 #ifdef MEMORY_DISK_IS_ROOT 703 #define BOOT_FROM_MEMORY_HOOKS 1 704 #endif 705 706 #include "raid.h" 707 #if NRAID == 1 708 #define BOOT_FROM_RAID_HOOKS 1 709 #endif 710 711 #ifdef BOOT_FROM_RAID_HOOKS 712 extern int numraid; 713 extern struct device *raidrootdev; 714 #endif 715 716 /* 717 * The device and wedge that we booted from. If booted_wedge is NULL, 718 * the we might consult booted_partition. 719 */ 720 struct device *booted_device; 721 struct device *booted_wedge; 722 int booted_partition; 723 724 /* 725 * Use partition letters if it's a disk class but not a wedge. 726 * XXX Check for wedge is kinda gross. 727 */ 728 #define DEV_USES_PARTITIONS(dv) \ 729 ((dv)->dv_class == DV_DISK && \ 730 ((dv)->dv_cfdata == NULL || \ 731 strcmp((dv)->dv_cfdata->cf_name, "dk") != 0)) 732 733 void 734 setroot(struct device *bootdv, int bootpartition) 735 { 736 struct device *dv; 737 int len; 738 #ifdef MEMORY_DISK_HOOKS 739 int i; 740 #endif 741 dev_t nrootdev; 742 dev_t ndumpdev = NODEV; 743 char buf[128]; 744 const char *rootdevname; 745 const char *dumpdevname; 746 struct device *rootdv = NULL; /* XXX gcc -Wuninitialized */ 747 struct device *dumpdv = NULL; 748 struct ifnet *ifp; 749 const char *deffsname; 750 struct vfsops *vops; 751 752 #ifdef MEMORY_DISK_HOOKS 753 for (i = 0; i < NMD; i++) { 754 fakemdrootdev[i].dv_class = DV_DISK; 755 fakemdrootdev[i].dv_cfdata = NULL; 756 fakemdrootdev[i].dv_unit = i; 757 fakemdrootdev[i].dv_parent = NULL; 758 snprintf(fakemdrootdev[i].dv_xname, 759 sizeof(fakemdrootdev[i].dv_xname), "md%d", i); 760 } 761 #endif /* MEMORY_DISK_HOOKS */ 762 763 #ifdef MEMORY_DISK_IS_ROOT 764 bootdv = &fakemdrootdev[0]; 765 bootpartition = 0; 766 #endif 767 768 /* 769 * If NFS is specified as the file system, and we found 770 * a DV_DISK boot device (or no boot device at all), then 771 * find a reasonable network interface for "rootspec". 772 */ 773 vops = vfs_getopsbyname("nfs"); 774 if (vops != NULL && vops->vfs_mountroot == mountroot && 775 rootspec == NULL && 776 (bootdv == NULL || bootdv->dv_class != DV_IFNET)) { 777 IFNET_FOREACH(ifp) { 778 if ((ifp->if_flags & 779 (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0) 780 break; 781 } 782 if (ifp == NULL) { 783 /* 784 * Can't find a suitable interface; ask the 785 * user. 786 */ 787 boothowto |= RB_ASKNAME; 788 } else { 789 /* 790 * Have a suitable interface; behave as if 791 * the user specified this interface. 792 */ 793 rootspec = (const char *)ifp->if_xname; 794 } 795 } 796 797 /* 798 * If wildcarded root and we the boot device wasn't determined, 799 * ask the user. 800 */ 801 if (rootspec == NULL && bootdv == NULL) 802 boothowto |= RB_ASKNAME; 803 804 top: 805 if (boothowto & RB_ASKNAME) { 806 struct device *defdumpdv; 807 808 for (;;) { 809 printf("root device"); 810 if (bootdv != NULL) { 811 printf(" (default %s", bootdv->dv_xname); 812 if (DEV_USES_PARTITIONS(bootdv)) 813 printf("%c", bootpartition + 'a'); 814 printf(")"); 815 } 816 printf(": "); 817 len = cngetsn(buf, sizeof(buf)); 818 if (len == 0 && bootdv != NULL) { 819 strlcpy(buf, bootdv->dv_xname, sizeof(buf)); 820 len = strlen(buf); 821 } 822 if (len > 0 && buf[len - 1] == '*') { 823 buf[--len] = '\0'; 824 dv = getdisk(buf, len, 1, &nrootdev, 0); 825 if (dv != NULL) { 826 rootdv = dv; 827 break; 828 } 829 } 830 dv = getdisk(buf, len, bootpartition, &nrootdev, 0); 831 if (dv != NULL) { 832 rootdv = dv; 833 break; 834 } 835 } 836 837 /* 838 * Set up the default dump device. If root is on 839 * a network device, there is no default dump 840 * device, since we don't support dumps to the 841 * network. 842 */ 843 if (DEV_USES_PARTITIONS(rootdv) == 0) 844 defdumpdv = NULL; 845 else 846 defdumpdv = rootdv; 847 848 for (;;) { 849 printf("dump device"); 850 if (defdumpdv != NULL) { 851 /* 852 * Note, we know it's a disk if we get here. 853 */ 854 printf(" (default %sb)", defdumpdv->dv_xname); 855 } 856 printf(": "); 857 len = cngetsn(buf, sizeof(buf)); 858 if (len == 0) { 859 if (defdumpdv != NULL) { 860 ndumpdev = MAKEDISKDEV(major(nrootdev), 861 DISKUNIT(nrootdev), 1); 862 } 863 dumpdv = defdumpdv; 864 break; 865 } 866 if (len == 4 && strcmp(buf, "none") == 0) { 867 dumpdv = NULL; 868 break; 869 } 870 dv = getdisk(buf, len, 1, &ndumpdev, 1); 871 if (dv != NULL) { 872 dumpdv = dv; 873 break; 874 } 875 } 876 877 rootdev = nrootdev; 878 dumpdev = ndumpdev; 879 880 for (vops = LIST_FIRST(&vfs_list); vops != NULL; 881 vops = LIST_NEXT(vops, vfs_list)) { 882 if (vops->vfs_mountroot != NULL && 883 vops->vfs_mountroot == mountroot) 884 break; 885 } 886 887 if (vops == NULL) { 888 mountroot = NULL; 889 deffsname = "generic"; 890 } else 891 deffsname = vops->vfs_name; 892 893 for (;;) { 894 printf("file system (default %s): ", deffsname); 895 len = cngetsn(buf, sizeof(buf)); 896 if (len == 0) 897 break; 898 if (len == 4 && strcmp(buf, "halt") == 0) 899 cpu_reboot(RB_HALT, NULL); 900 else if (len == 6 && strcmp(buf, "reboot") == 0) 901 cpu_reboot(0, NULL); 902 #if defined(DDB) 903 else if (len == 3 && strcmp(buf, "ddb") == 0) { 904 console_debugger(); 905 } 906 #endif 907 else if (len == 7 && strcmp(buf, "generic") == 0) { 908 mountroot = NULL; 909 break; 910 } 911 vops = vfs_getopsbyname(buf); 912 if (vops == NULL || vops->vfs_mountroot == NULL) { 913 printf("use one of: generic"); 914 for (vops = LIST_FIRST(&vfs_list); 915 vops != NULL; 916 vops = LIST_NEXT(vops, vfs_list)) { 917 if (vops->vfs_mountroot != NULL) 918 printf(" %s", vops->vfs_name); 919 } 920 #if defined(DDB) 921 printf(" ddb"); 922 #endif 923 printf(" halt reboot\n"); 924 } else { 925 mountroot = vops->vfs_mountroot; 926 break; 927 } 928 } 929 930 } else if (rootspec == NULL) { 931 int majdev; 932 933 /* 934 * Wildcarded root; use the boot device. 935 */ 936 rootdv = bootdv; 937 938 majdev = devsw_name2blk(bootdv->dv_xname, NULL, 0); 939 if (majdev >= 0) { 940 /* 941 * Root is on a disk. `bootpartition' is root, 942 * unless the device does not use partitions. 943 */ 944 if (DEV_USES_PARTITIONS(bootdv)) 945 rootdev = MAKEDISKDEV(majdev, bootdv->dv_unit, 946 bootpartition); 947 else 948 rootdev = makedev(majdev, bootdv->dv_unit); 949 } 950 } else { 951 952 /* 953 * `root on <dev> ...' 954 */ 955 956 /* 957 * If it's a network interface, we can bail out 958 * early. 959 */ 960 dv = finddevice(rootspec); 961 if (dv != NULL && dv->dv_class == DV_IFNET) { 962 rootdv = dv; 963 goto haveroot; 964 } 965 966 rootdevname = devsw_blk2name(major(rootdev)); 967 if (rootdevname == NULL) { 968 printf("unknown device major 0x%x\n", rootdev); 969 boothowto |= RB_ASKNAME; 970 goto top; 971 } 972 memset(buf, 0, sizeof(buf)); 973 snprintf(buf, sizeof(buf), "%s%d", rootdevname, 974 DISKUNIT(rootdev)); 975 976 rootdv = finddevice(buf); 977 if (rootdv == NULL) { 978 printf("device %s (0x%x) not configured\n", 979 buf, rootdev); 980 boothowto |= RB_ASKNAME; 981 goto top; 982 } 983 } 984 985 haveroot: 986 987 root_device = rootdv; 988 989 switch (rootdv->dv_class) { 990 case DV_IFNET: 991 aprint_normal("root on %s", rootdv->dv_xname); 992 break; 993 994 case DV_DISK: 995 aprint_normal("root on %s%c", rootdv->dv_xname, 996 DISKPART(rootdev) + 'a'); 997 break; 998 999 default: 1000 printf("can't determine root device\n"); 1001 boothowto |= RB_ASKNAME; 1002 goto top; 1003 } 1004 1005 /* 1006 * Now configure the dump device. 1007 * 1008 * If we haven't figured out the dump device, do so, with 1009 * the following rules: 1010 * 1011 * (a) We already know dumpdv in the RB_ASKNAME case. 1012 * 1013 * (b) If dumpspec is set, try to use it. If the device 1014 * is not available, punt. 1015 * 1016 * (c) If dumpspec is not set, the dump device is 1017 * wildcarded or unspecified. If the root device 1018 * is DV_IFNET, punt. Otherwise, use partition b 1019 * of the root device. 1020 */ 1021 1022 if (boothowto & RB_ASKNAME) { /* (a) */ 1023 if (dumpdv == NULL) 1024 goto nodumpdev; 1025 } else if (dumpspec != NULL) { /* (b) */ 1026 if (strcmp(dumpspec, "none") == 0 || dumpdev == NODEV) { 1027 /* 1028 * Operator doesn't want a dump device. 1029 * Or looks like they tried to pick a network 1030 * device. Oops. 1031 */ 1032 goto nodumpdev; 1033 } 1034 1035 dumpdevname = devsw_blk2name(major(dumpdev)); 1036 if (dumpdevname == NULL) 1037 goto nodumpdev; 1038 memset(buf, 0, sizeof(buf)); 1039 snprintf(buf, sizeof(buf), "%s%d", dumpdevname, 1040 DISKUNIT(dumpdev)); 1041 1042 dumpdv = finddevice(buf); 1043 if (dumpdv == NULL) { 1044 /* 1045 * Device not configured. 1046 */ 1047 goto nodumpdev; 1048 } 1049 } else { /* (c) */ 1050 if (DEV_USES_PARTITIONS(rootdv) == 0) 1051 goto nodumpdev; 1052 else { 1053 dumpdv = rootdv; 1054 dumpdev = MAKEDISKDEV(major(rootdev), 1055 dumpdv->dv_unit, 1); 1056 } 1057 } 1058 1059 aprint_normal(" dumps on %s%c\n", dumpdv->dv_xname, 1060 DISKPART(dumpdev) + 'a'); 1061 return; 1062 1063 nodumpdev: 1064 dumpdev = NODEV; 1065 aprint_normal("\n"); 1066 } 1067 1068 static struct device * 1069 finddevice(const char *name) 1070 { 1071 struct device *dv; 1072 #if defined(BOOT_FROM_RAID_HOOKS) || defined(BOOT_FROM_MEMORY_HOOKS) 1073 int j; 1074 #endif /* BOOT_FROM_RAID_HOOKS || BOOT_FROM_MEMORY_HOOKS */ 1075 1076 #ifdef BOOT_FROM_RAID_HOOKS 1077 for (j = 0; j < numraid; j++) { 1078 if (strcmp(name, raidrootdev[j].dv_xname) == 0) { 1079 dv = &raidrootdev[j]; 1080 return (dv); 1081 } 1082 } 1083 #endif /* BOOT_FROM_RAID_HOOKS */ 1084 1085 #ifdef BOOT_FROM_MEMORY_HOOKS 1086 for (j = 0; j < NMD; j++) { 1087 if (strcmp(name, fakemdrootdev[j].dv_xname) == 0) { 1088 dv = &fakemdrootdev[j]; 1089 return (dv); 1090 } 1091 } 1092 #endif /* BOOT_FROM_MEMORY_HOOKS */ 1093 1094 for (dv = TAILQ_FIRST(&alldevs); dv != NULL; 1095 dv = TAILQ_NEXT(dv, dv_list)) 1096 if (strcmp(dv->dv_xname, name) == 0) 1097 break; 1098 return (dv); 1099 } 1100 1101 static struct device * 1102 getdisk(char *str, int len, int defpart, dev_t *devp, int isdump) 1103 { 1104 struct device *dv; 1105 #ifdef MEMORY_DISK_HOOKS 1106 int i; 1107 #endif 1108 #ifdef BOOT_FROM_RAID_HOOKS 1109 int j; 1110 #endif 1111 1112 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) { 1113 printf("use one of:"); 1114 #ifdef MEMORY_DISK_HOOKS 1115 if (isdump == 0) 1116 for (i = 0; i < NMD; i++) 1117 printf(" %s[a-%c]", fakemdrootdev[i].dv_xname, 1118 'a' + MAXPARTITIONS - 1); 1119 #endif 1120 #ifdef BOOT_FROM_RAID_HOOKS 1121 if (isdump == 0) 1122 for (j = 0; j < numraid; j++) 1123 printf(" %s[a-%c]", raidrootdev[j].dv_xname, 1124 'a' + MAXPARTITIONS - 1); 1125 #endif 1126 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1127 if (DEV_USES_PARTITIONS(dv)) 1128 printf(" %s[a-%c]", dv->dv_xname, 1129 'a' + MAXPARTITIONS - 1); 1130 else if (dv->dv_class == DV_DISK) 1131 printf(" %s", dv->dv_xname); 1132 if (isdump == 0 && dv->dv_class == DV_IFNET) 1133 printf(" %s", dv->dv_xname); 1134 } 1135 if (isdump) 1136 printf(" none"); 1137 #if defined(DDB) 1138 printf(" ddb"); 1139 #endif 1140 printf(" halt reboot\n"); 1141 } 1142 return (dv); 1143 } 1144 1145 static struct device * 1146 parsedisk(char *str, int len, int defpart, dev_t *devp) 1147 { 1148 struct device *dv; 1149 char *cp, c; 1150 int majdev, part; 1151 #ifdef MEMORY_DISK_HOOKS 1152 int i; 1153 #endif 1154 if (len == 0) 1155 return (NULL); 1156 1157 if (len == 4 && strcmp(str, "halt") == 0) 1158 cpu_reboot(RB_HALT, NULL); 1159 else if (len == 6 && strcmp(str, "reboot") == 0) 1160 cpu_reboot(0, NULL); 1161 #if defined(DDB) 1162 else if (len == 3 && strcmp(str, "ddb") == 0) 1163 console_debugger(); 1164 #endif 1165 1166 cp = str + len - 1; 1167 c = *cp; 1168 if (c >= 'a' && c <= ('a' + MAXPARTITIONS - 1)) { 1169 part = c - 'a'; 1170 *cp = '\0'; 1171 } else 1172 part = defpart; 1173 1174 #ifdef MEMORY_DISK_HOOKS 1175 for (i = 0; i < NMD; i++) 1176 if (strcmp(str, fakemdrootdev[i].dv_xname) == 0) { 1177 dv = &fakemdrootdev[i]; 1178 goto gotdisk; 1179 } 1180 #endif 1181 1182 dv = finddevice(str); 1183 if (dv != NULL) { 1184 if (dv->dv_class == DV_DISK) { 1185 #ifdef MEMORY_DISK_HOOKS 1186 gotdisk: 1187 #endif 1188 majdev = devsw_name2blk(dv->dv_xname, NULL, 0); 1189 if (majdev < 0) 1190 panic("parsedisk"); 1191 if (DEV_USES_PARTITIONS(dv)) 1192 *devp = MAKEDISKDEV(majdev, dv->dv_unit, part); 1193 else 1194 *devp = makedev(majdev, dv->dv_unit); 1195 } 1196 1197 if (dv->dv_class == DV_IFNET) 1198 *devp = NODEV; 1199 } 1200 1201 *cp = c; 1202 return (dv); 1203 } 1204 1205 /* 1206 * snprintf() `bytes' into `buf', reformatting it so that the number, 1207 * plus a possible `x' + suffix extension) fits into len bytes (including 1208 * the terminating NUL). 1209 * Returns the number of bytes stored in buf, or -1 if there was a problem. 1210 * E.g, given a len of 9 and a suffix of `B': 1211 * bytes result 1212 * ----- ------ 1213 * 99999 `99999 B' 1214 * 100000 `97 kB' 1215 * 66715648 `65152 kB' 1216 * 252215296 `240 MB' 1217 */ 1218 int 1219 humanize_number(char *buf, size_t len, uint64_t bytes, const char *suffix, 1220 int divisor) 1221 { 1222 /* prefixes are: (none), kilo, Mega, Giga, Tera, Peta, Exa */ 1223 const char *prefixes; 1224 int r; 1225 u_int64_t umax; 1226 size_t i, suffixlen; 1227 1228 if (buf == NULL || suffix == NULL) 1229 return (-1); 1230 if (len > 0) 1231 buf[0] = '\0'; 1232 suffixlen = strlen(suffix); 1233 /* check if enough room for `x y' + suffix + `\0' */ 1234 if (len < 4 + suffixlen) 1235 return (-1); 1236 1237 if (divisor == 1024) { 1238 /* 1239 * binary multiplies 1240 * XXX IEC 60027-2 recommends Ki, Mi, Gi... 1241 */ 1242 prefixes = " KMGTPE"; 1243 } else 1244 prefixes = " kMGTPE"; /* SI for decimal multiplies */ 1245 1246 umax = 1; 1247 for (i = 0; i < len - suffixlen - 3; i++) 1248 umax *= 10; 1249 for (i = 0; bytes >= umax && prefixes[i + 1]; i++) 1250 bytes /= divisor; 1251 1252 r = snprintf(buf, len, "%qu%s%c%s", (unsigned long long)bytes, 1253 i == 0 ? "" : " ", prefixes[i], suffix); 1254 1255 return (r); 1256 } 1257 1258 int 1259 format_bytes(char *buf, size_t len, uint64_t bytes) 1260 { 1261 int rv; 1262 size_t nlen; 1263 1264 rv = humanize_number(buf, len, bytes, "B", 1024); 1265 if (rv != -1) { 1266 /* nuke the trailing ` B' if it exists */ 1267 nlen = strlen(buf) - 2; 1268 if (strcmp(&buf[nlen], " B") == 0) 1269 buf[nlen] = '\0'; 1270 } 1271 return (rv); 1272 } 1273 1274 /* 1275 * Start trace of particular system call. If process is being traced, 1276 * this routine is called by MD syscall dispatch code just before 1277 * a system call is actually executed. 1278 * MD caller guarantees the passed 'code' is within the supported 1279 * system call number range for emulation the process runs under. 1280 */ 1281 int 1282 trace_enter(struct lwp *l, register_t code, 1283 register_t realcode, const struct sysent *callp, void *args) 1284 { 1285 #if defined(KTRACE) || defined(SYSTRACE) 1286 struct proc *p = l->l_proc; 1287 #endif 1288 1289 #ifdef SYSCALL_DEBUG 1290 scdebug_call(l, code, args); 1291 #endif /* SYSCALL_DEBUG */ 1292 1293 #ifdef KTRACE 1294 if (KTRPOINT(p, KTR_SYSCALL)) 1295 ktrsyscall(p, code, realcode, callp, args); 1296 #endif /* KTRACE */ 1297 1298 #ifdef SYSTRACE 1299 if (ISSET(p->p_flag, P_SYSTRACE)) 1300 return systrace_enter(p, code, args); 1301 #endif 1302 return 0; 1303 } 1304 1305 /* 1306 * End trace of particular system call. If process is being traced, 1307 * this routine is called by MD syscall dispatch code just after 1308 * a system call finishes. 1309 * MD caller guarantees the passed 'code' is within the supported 1310 * system call number range for emulation the process runs under. 1311 */ 1312 void 1313 trace_exit(struct lwp *l, register_t code, void *args, register_t rval[], 1314 int error) 1315 { 1316 #if defined(KTRACE) || defined(SYSTRACE) 1317 struct proc *p = l->l_proc; 1318 #endif 1319 1320 #ifdef SYSCALL_DEBUG 1321 scdebug_ret(l, code, error, rval); 1322 #endif /* SYSCALL_DEBUG */ 1323 1324 #ifdef KTRACE 1325 if (KTRPOINT(p, KTR_SYSRET)) { 1326 KERNEL_PROC_LOCK(l); 1327 ktrsysret(p, code, error, rval); 1328 KERNEL_PROC_UNLOCK(l); 1329 } 1330 #endif /* KTRACE */ 1331 1332 #ifdef SYSTRACE 1333 if (ISSET(p->p_flag, P_SYSTRACE)) 1334 systrace_exit(p, code, args, rval, error); 1335 #endif 1336 } 1337