1 /* $NetBSD: kern_subr.c,v 1.206 2010/01/31 01:38:48 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 1999, 2002, 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and by Luke Mewburn. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Copyright (c) 1992, 1993 43 * The Regents of the University of California. All rights reserved. 44 * 45 * This software was developed by the Computer Systems Engineering group 46 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 47 * contributed to Berkeley. 48 * 49 * All advertising materials mentioning features or use of this software 50 * must display the following acknowledgement: 51 * This product includes software developed by the University of 52 * California, Lawrence Berkeley Laboratory. 53 * 54 * Redistribution and use in source and binary forms, with or without 55 * modification, are permitted provided that the following conditions 56 * are met: 57 * 1. Redistributions of source code must retain the above copyright 58 * notice, this list of conditions and the following disclaimer. 59 * 2. Redistributions in binary form must reproduce the above copyright 60 * notice, this list of conditions and the following disclaimer in the 61 * documentation and/or other materials provided with the distribution. 62 * 3. Neither the name of the University nor the names of its contributors 63 * may be used to endorse or promote products derived from this software 64 * without specific prior written permission. 65 * 66 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 67 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 68 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 69 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 70 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 71 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 72 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 73 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 74 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 75 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 76 * SUCH DAMAGE. 77 * 78 * @(#)kern_subr.c 8.4 (Berkeley) 2/14/95 79 */ 80 81 #include <sys/cdefs.h> 82 __KERNEL_RCSID(0, "$NetBSD: kern_subr.c,v 1.206 2010/01/31 01:38:48 pooka Exp $"); 83 84 #include "opt_ddb.h" 85 #include "opt_md.h" 86 #include "opt_syscall_debug.h" 87 #include "opt_ktrace.h" 88 #include "opt_ptrace.h" 89 #include "opt_tftproot.h" 90 91 #include <sys/param.h> 92 #include <sys/systm.h> 93 #include <sys/proc.h> 94 #include <sys/mount.h> 95 #include <sys/device.h> 96 #include <sys/reboot.h> 97 #include <sys/conf.h> 98 #include <sys/disk.h> 99 #include <sys/disklabel.h> 100 #include <sys/queue.h> 101 #include <sys/ktrace.h> 102 #include <sys/ptrace.h> 103 #include <sys/fcntl.h> 104 #include <sys/kauth.h> 105 #include <sys/vnode.h> 106 #include <sys/syscallvar.h> 107 #include <sys/xcall.h> 108 #include <sys/module.h> 109 110 #include <uvm/uvm_extern.h> 111 112 #include <dev/cons.h> 113 114 #include <net/if.h> 115 116 /* XXX these should eventually move to subr_autoconf.c */ 117 static device_t finddevice(const char *); 118 static device_t getdisk(char *, int, int, dev_t *, int); 119 static device_t parsedisk(char *, int, int, dev_t *); 120 static const char *getwedgename(const char *, int); 121 122 #ifdef TFTPROOT 123 int tftproot_dhcpboot(device_t); 124 #endif 125 126 dev_t dumpcdev; /* for savecore */ 127 128 static int 129 isswap(device_t dv) 130 { 131 struct dkwedge_info wi; 132 struct vnode *vn; 133 int error; 134 135 if (device_class(dv) != DV_DISK || !device_is_a(dv, "dk")) 136 return 0; 137 138 if ((vn = opendisk(dv)) == NULL) 139 return 0; 140 141 error = VOP_IOCTL(vn, DIOCGWEDGEINFO, &wi, FREAD, NOCRED); 142 VOP_CLOSE(vn, FREAD, NOCRED); 143 vput(vn); 144 if (error) { 145 #ifdef DEBUG_WEDGE 146 printf("%s: Get wedge info returned %d\n", device_xname(dv), error); 147 #endif 148 return 0; 149 } 150 return strcmp(wi.dkw_ptype, DKW_PTYPE_SWAP) == 0; 151 } 152 153 /* 154 * Determine the root device and, if instructed to, the root file system. 155 */ 156 157 #include "md.h" 158 159 #if NMD > 0 160 extern struct cfdriver md_cd; 161 #ifdef MEMORY_DISK_IS_ROOT 162 int md_is_root = 1; 163 #else 164 int md_is_root = 0; 165 #endif 166 #endif 167 168 /* 169 * The device and wedge that we booted from. If booted_wedge is NULL, 170 * the we might consult booted_partition. 171 */ 172 device_t booted_device; 173 device_t booted_wedge; 174 int booted_partition; 175 176 /* 177 * Use partition letters if it's a disk class but not a wedge. 178 * XXX Check for wedge is kinda gross. 179 */ 180 #define DEV_USES_PARTITIONS(dv) \ 181 (device_class((dv)) == DV_DISK && \ 182 !device_is_a((dv), "dk")) 183 184 void 185 setroot(device_t bootdv, int bootpartition) 186 { 187 device_t dv; 188 deviter_t di; 189 int len, majdev; 190 dev_t nrootdev; 191 dev_t ndumpdev = NODEV; 192 char buf[128]; 193 const char *rootdevname; 194 const char *dumpdevname; 195 device_t rootdv = NULL; /* XXX gcc -Wuninitialized */ 196 device_t dumpdv = NULL; 197 struct ifnet *ifp; 198 const char *deffsname; 199 struct vfsops *vops; 200 201 #ifdef TFTPROOT 202 if (tftproot_dhcpboot(bootdv) != 0) 203 boothowto |= RB_ASKNAME; 204 #endif 205 206 #if NMD > 0 207 if (md_is_root) { 208 /* 209 * XXX there should be "root on md0" in the config file, 210 * but it isn't always 211 */ 212 bootdv = md_cd.cd_devs[0]; 213 bootpartition = 0; 214 } 215 #endif 216 217 /* 218 * If NFS is specified as the file system, and we found 219 * a DV_DISK boot device (or no boot device at all), then 220 * find a reasonable network interface for "rootspec". 221 */ 222 vops = vfs_getopsbyname(MOUNT_NFS); 223 if (vops != NULL && strcmp(rootfstype, MOUNT_NFS) == 0 && 224 rootspec == NULL && 225 (bootdv == NULL || device_class(bootdv) != DV_IFNET)) { 226 IFNET_FOREACH(ifp) { 227 if ((ifp->if_flags & 228 (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0) 229 break; 230 } 231 if (ifp == NULL) { 232 /* 233 * Can't find a suitable interface; ask the 234 * user. 235 */ 236 boothowto |= RB_ASKNAME; 237 } else { 238 /* 239 * Have a suitable interface; behave as if 240 * the user specified this interface. 241 */ 242 rootspec = (const char *)ifp->if_xname; 243 } 244 } 245 if (vops != NULL) 246 vfs_delref(vops); 247 248 /* 249 * If wildcarded root and we the boot device wasn't determined, 250 * ask the user. 251 */ 252 if (rootspec == NULL && bootdv == NULL) 253 boothowto |= RB_ASKNAME; 254 255 top: 256 if (boothowto & RB_ASKNAME) { 257 device_t defdumpdv; 258 259 for (;;) { 260 printf("root device"); 261 if (bootdv != NULL) { 262 printf(" (default %s", device_xname(bootdv)); 263 if (DEV_USES_PARTITIONS(bootdv)) 264 printf("%c", bootpartition + 'a'); 265 printf(")"); 266 } 267 printf(": "); 268 len = cngetsn(buf, sizeof(buf)); 269 if (len == 0 && bootdv != NULL) { 270 strlcpy(buf, device_xname(bootdv), sizeof(buf)); 271 len = strlen(buf); 272 } 273 if (len > 0 && buf[len - 1] == '*') { 274 buf[--len] = '\0'; 275 dv = getdisk(buf, len, 1, &nrootdev, 0); 276 if (dv != NULL) { 277 rootdv = dv; 278 break; 279 } 280 } 281 dv = getdisk(buf, len, bootpartition, &nrootdev, 0); 282 if (dv != NULL) { 283 rootdv = dv; 284 break; 285 } 286 } 287 288 /* 289 * Set up the default dump device. If root is on 290 * a network device, there is no default dump 291 * device, since we don't support dumps to the 292 * network. 293 */ 294 if (DEV_USES_PARTITIONS(rootdv) == 0) 295 defdumpdv = NULL; 296 else 297 defdumpdv = rootdv; 298 299 for (;;) { 300 printf("dump device"); 301 if (defdumpdv != NULL) { 302 /* 303 * Note, we know it's a disk if we get here. 304 */ 305 printf(" (default %sb)", device_xname(defdumpdv)); 306 } 307 printf(": "); 308 len = cngetsn(buf, sizeof(buf)); 309 if (len == 0) { 310 if (defdumpdv != NULL) { 311 ndumpdev = MAKEDISKDEV(major(nrootdev), 312 DISKUNIT(nrootdev), 1); 313 } 314 dumpdv = defdumpdv; 315 break; 316 } 317 if (len == 4 && strcmp(buf, "none") == 0) { 318 dumpdv = NULL; 319 break; 320 } 321 dv = getdisk(buf, len, 1, &ndumpdev, 1); 322 if (dv != NULL) { 323 dumpdv = dv; 324 break; 325 } 326 } 327 328 rootdev = nrootdev; 329 dumpdev = ndumpdev; 330 331 for (vops = LIST_FIRST(&vfs_list); vops != NULL; 332 vops = LIST_NEXT(vops, vfs_list)) { 333 if (vops->vfs_mountroot != NULL && 334 strcmp(rootfstype, vops->vfs_name) == 0) 335 break; 336 } 337 338 if (vops == NULL) { 339 deffsname = "generic"; 340 } else 341 deffsname = vops->vfs_name; 342 343 for (;;) { 344 printf("file system (default %s): ", deffsname); 345 len = cngetsn(buf, sizeof(buf)); 346 if (len == 0) { 347 if (strcmp(deffsname, "generic") == 0) 348 rootfstype = ROOT_FSTYPE_ANY; 349 break; 350 } 351 if (len == 4 && strcmp(buf, "halt") == 0) 352 cpu_reboot(RB_HALT, NULL); 353 else if (len == 6 && strcmp(buf, "reboot") == 0) 354 cpu_reboot(0, NULL); 355 #if defined(DDB) 356 else if (len == 3 && strcmp(buf, "ddb") == 0) { 357 console_debugger(); 358 } 359 #endif 360 else if (len == 7 && strcmp(buf, "generic") == 0) { 361 rootfstype = ROOT_FSTYPE_ANY; 362 break; 363 } 364 vops = vfs_getopsbyname(buf); 365 if (vops == NULL || vops->vfs_mountroot == NULL) { 366 printf("use one of: generic"); 367 for (vops = LIST_FIRST(&vfs_list); 368 vops != NULL; 369 vops = LIST_NEXT(vops, vfs_list)) { 370 if (vops->vfs_mountroot != NULL) 371 printf(" %s", vops->vfs_name); 372 } 373 if (vops != NULL) 374 vfs_delref(vops); 375 #if defined(DDB) 376 printf(" ddb"); 377 #endif 378 printf(" halt reboot\n"); 379 } else { 380 /* 381 * XXX If *vops gets freed between here and 382 * the call to mountroot(), rootfstype will 383 * point to something unexpected. But in 384 * this case the system will fail anyway. 385 */ 386 rootfstype = vops->vfs_name; 387 vfs_delref(vops); 388 break; 389 } 390 } 391 392 } else if (rootspec == NULL) { 393 /* 394 * Wildcarded root; use the boot device. 395 */ 396 rootdv = bootdv; 397 398 if (bootdv) 399 majdev = devsw_name2blk(device_xname(bootdv), NULL, 0); 400 else 401 majdev = -1; 402 if (majdev >= 0) { 403 /* 404 * Root is on a disk. `bootpartition' is root, 405 * unless the device does not use partitions. 406 */ 407 if (DEV_USES_PARTITIONS(bootdv)) 408 rootdev = MAKEDISKDEV(majdev, 409 device_unit(bootdv), 410 bootpartition); 411 else 412 rootdev = makedev(majdev, device_unit(bootdv)); 413 } 414 } else { 415 416 /* 417 * `root on <dev> ...' 418 */ 419 420 /* 421 * If it's a network interface, we can bail out 422 * early. 423 */ 424 dv = finddevice(rootspec); 425 if (dv != NULL && device_class(dv) == DV_IFNET) { 426 rootdv = dv; 427 goto haveroot; 428 } 429 430 if (rootdev == NODEV && 431 device_class(dv) == DV_DISK && device_is_a(dv, "dk") && 432 (majdev = devsw_name2blk(device_xname(dv), NULL, 0)) >= 0) 433 rootdev = makedev(majdev, device_unit(dv)); 434 435 rootdevname = devsw_blk2name(major(rootdev)); 436 if (rootdevname == NULL) { 437 printf("unknown device major 0x%llx\n", 438 (unsigned long long)rootdev); 439 boothowto |= RB_ASKNAME; 440 goto top; 441 } 442 memset(buf, 0, sizeof(buf)); 443 snprintf(buf, sizeof(buf), "%s%llu", rootdevname, 444 (unsigned long long)DISKUNIT(rootdev)); 445 446 rootdv = finddevice(buf); 447 if (rootdv == NULL) { 448 printf("device %s (0x%llx) not configured\n", 449 buf, (unsigned long long)rootdev); 450 boothowto |= RB_ASKNAME; 451 goto top; 452 } 453 } 454 455 haveroot: 456 457 root_device = rootdv; 458 459 switch (device_class(rootdv)) { 460 case DV_IFNET: 461 case DV_DISK: 462 aprint_normal("root on %s", device_xname(rootdv)); 463 if (DEV_USES_PARTITIONS(rootdv)) 464 aprint_normal("%c", (int)DISKPART(rootdev) + 'a'); 465 break; 466 467 default: 468 printf("can't determine root device\n"); 469 boothowto |= RB_ASKNAME; 470 goto top; 471 } 472 473 /* 474 * Now configure the dump device. 475 * 476 * If we haven't figured out the dump device, do so, with 477 * the following rules: 478 * 479 * (a) We already know dumpdv in the RB_ASKNAME case. 480 * 481 * (b) If dumpspec is set, try to use it. If the device 482 * is not available, punt. 483 * 484 * (c) If dumpspec is not set, the dump device is 485 * wildcarded or unspecified. If the root device 486 * is DV_IFNET, punt. Otherwise, use partition b 487 * of the root device. 488 */ 489 490 if (boothowto & RB_ASKNAME) { /* (a) */ 491 if (dumpdv == NULL) 492 goto nodumpdev; 493 } else if (dumpspec != NULL) { /* (b) */ 494 if (strcmp(dumpspec, "none") == 0 || dumpdev == NODEV) { 495 /* 496 * Operator doesn't want a dump device. 497 * Or looks like they tried to pick a network 498 * device. Oops. 499 */ 500 goto nodumpdev; 501 } 502 503 dumpdevname = devsw_blk2name(major(dumpdev)); 504 if (dumpdevname == NULL) 505 goto nodumpdev; 506 memset(buf, 0, sizeof(buf)); 507 snprintf(buf, sizeof(buf), "%s%llu", dumpdevname, 508 (unsigned long long)DISKUNIT(dumpdev)); 509 510 dumpdv = finddevice(buf); 511 if (dumpdv == NULL) { 512 /* 513 * Device not configured. 514 */ 515 goto nodumpdev; 516 } 517 } else { /* (c) */ 518 if (DEV_USES_PARTITIONS(rootdv) == 0) { 519 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); 520 dv != NULL; 521 dv = deviter_next(&di)) 522 if (isswap(dv)) 523 break; 524 deviter_release(&di); 525 if (dv == NULL) 526 goto nodumpdev; 527 528 majdev = devsw_name2blk(device_xname(dv), NULL, 0); 529 if (majdev < 0) 530 goto nodumpdev; 531 dumpdv = dv; 532 dumpdev = makedev(majdev, device_unit(dumpdv)); 533 } else { 534 dumpdv = rootdv; 535 dumpdev = MAKEDISKDEV(major(rootdev), 536 device_unit(dumpdv), 1); 537 } 538 } 539 540 dumpcdev = devsw_blk2chr(dumpdev); 541 aprint_normal(" dumps on %s", device_xname(dumpdv)); 542 if (DEV_USES_PARTITIONS(dumpdv)) 543 aprint_normal("%c", (int)DISKPART(dumpdev) + 'a'); 544 aprint_normal("\n"); 545 return; 546 547 nodumpdev: 548 dumpdev = NODEV; 549 dumpcdev = NODEV; 550 aprint_normal("\n"); 551 } 552 553 static device_t 554 finddevice(const char *name) 555 { 556 const char *wname; 557 558 if ((wname = getwedgename(name, strlen(name))) != NULL) 559 return dkwedge_find_by_wname(wname); 560 561 return device_find_by_xname(name); 562 } 563 564 static device_t 565 getdisk(char *str, int len, int defpart, dev_t *devp, int isdump) 566 { 567 device_t dv; 568 deviter_t di; 569 570 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) { 571 printf("use one of:"); 572 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL; 573 dv = deviter_next(&di)) { 574 if (DEV_USES_PARTITIONS(dv)) 575 printf(" %s[a-%c]", device_xname(dv), 576 'a' + MAXPARTITIONS - 1); 577 else if (device_class(dv) == DV_DISK) 578 printf(" %s", device_xname(dv)); 579 if (isdump == 0 && device_class(dv) == DV_IFNET) 580 printf(" %s", device_xname(dv)); 581 } 582 deviter_release(&di); 583 dkwedge_print_wnames(); 584 if (isdump) 585 printf(" none"); 586 #if defined(DDB) 587 printf(" ddb"); 588 #endif 589 printf(" halt reboot\n"); 590 } 591 return dv; 592 } 593 594 static const char * 595 getwedgename(const char *name, int namelen) 596 { 597 const char *wpfx = "wedge:"; 598 const int wpfxlen = strlen(wpfx); 599 600 if (namelen < wpfxlen || strncmp(name, wpfx, wpfxlen) != 0) 601 return NULL; 602 603 return name + wpfxlen; 604 } 605 606 static device_t 607 parsedisk(char *str, int len, int defpart, dev_t *devp) 608 { 609 device_t dv; 610 const char *wname; 611 char *cp, c; 612 int majdev, part; 613 if (len == 0) 614 return (NULL); 615 616 if (len == 4 && strcmp(str, "halt") == 0) 617 cpu_reboot(RB_HALT, NULL); 618 else if (len == 6 && strcmp(str, "reboot") == 0) 619 cpu_reboot(0, NULL); 620 #if defined(DDB) 621 else if (len == 3 && strcmp(str, "ddb") == 0) 622 console_debugger(); 623 #endif 624 625 cp = str + len - 1; 626 c = *cp; 627 628 if ((wname = getwedgename(str, len)) != NULL) { 629 if ((dv = dkwedge_find_by_wname(wname)) == NULL) 630 return NULL; 631 part = defpart; 632 goto gotdisk; 633 } else if (c >= 'a' && c <= ('a' + MAXPARTITIONS - 1)) { 634 part = c - 'a'; 635 *cp = '\0'; 636 } else 637 part = defpart; 638 639 dv = finddevice(str); 640 if (dv != NULL) { 641 if (device_class(dv) == DV_DISK) { 642 gotdisk: 643 majdev = devsw_name2blk(device_xname(dv), NULL, 0); 644 if (majdev < 0) 645 panic("parsedisk"); 646 if (DEV_USES_PARTITIONS(dv)) 647 *devp = MAKEDISKDEV(majdev, device_unit(dv), 648 part); 649 else 650 *devp = makedev(majdev, device_unit(dv)); 651 } 652 653 if (device_class(dv) == DV_IFNET) 654 *devp = NODEV; 655 } 656 657 *cp = c; 658 return (dv); 659 } 660 661 /* 662 * Return true if system call tracing is enabled for the specified process. 663 */ 664 bool 665 trace_is_enabled(struct proc *p) 666 { 667 #ifdef SYSCALL_DEBUG 668 return (true); 669 #endif 670 #ifdef KTRACE 671 if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET))) 672 return (true); 673 #endif 674 #ifdef PTRACE 675 if (ISSET(p->p_slflag, PSL_SYSCALL)) 676 return (true); 677 #endif 678 679 return (false); 680 } 681 682 /* 683 * Start trace of particular system call. If process is being traced, 684 * this routine is called by MD syscall dispatch code just before 685 * a system call is actually executed. 686 */ 687 int 688 trace_enter(register_t code, const register_t *args, int narg) 689 { 690 #ifdef SYSCALL_DEBUG 691 scdebug_call(code, args); 692 #endif /* SYSCALL_DEBUG */ 693 694 ktrsyscall(code, args, narg); 695 696 #ifdef PTRACE 697 if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) == 698 (PSL_SYSCALL|PSL_TRACED)) 699 process_stoptrace(); 700 #endif 701 return 0; 702 } 703 704 /* 705 * End trace of particular system call. If process is being traced, 706 * this routine is called by MD syscall dispatch code just after 707 * a system call finishes. 708 * MD caller guarantees the passed 'code' is within the supported 709 * system call number range for emulation the process runs under. 710 */ 711 void 712 trace_exit(register_t code, register_t rval[], int error) 713 { 714 #ifdef SYSCALL_DEBUG 715 scdebug_ret(code, error, rval); 716 #endif /* SYSCALL_DEBUG */ 717 718 ktrsysret(code, error, rval); 719 720 #ifdef PTRACE 721 if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) == 722 (PSL_SYSCALL|PSL_TRACED)) 723 process_stoptrace(); 724 #endif 725 } 726 727 int 728 syscall_establish(const struct emul *em, const struct syscall_package *sp) 729 { 730 struct sysent *sy; 731 int i; 732 733 KASSERT(mutex_owned(&module_lock)); 734 735 if (em == NULL) { 736 em = &emul_netbsd; 737 } 738 sy = em->e_sysent; 739 740 /* 741 * Ensure that all preconditions are valid, since this is 742 * an all or nothing deal. Once a system call is entered, 743 * it can become busy and we could be unable to remove it 744 * on error. 745 */ 746 for (i = 0; sp[i].sp_call != NULL; i++) { 747 if (sy[sp[i].sp_code].sy_call != sys_nomodule) { 748 #ifdef DIAGNOSTIC 749 printf("syscall %d is busy\n", sp[i].sp_code); 750 #endif 751 return EBUSY; 752 } 753 } 754 /* Everything looks good, patch them in. */ 755 for (i = 0; sp[i].sp_call != NULL; i++) { 756 sy[sp[i].sp_code].sy_call = sp[i].sp_call; 757 } 758 759 return 0; 760 } 761 762 int 763 syscall_disestablish(const struct emul *em, const struct syscall_package *sp) 764 { 765 struct sysent *sy; 766 uint64_t where; 767 lwp_t *l; 768 int i; 769 770 KASSERT(mutex_owned(&module_lock)); 771 772 if (em == NULL) { 773 em = &emul_netbsd; 774 } 775 sy = em->e_sysent; 776 777 /* 778 * First, patch the system calls to sys_nomodule to gate further 779 * activity. 780 */ 781 for (i = 0; sp[i].sp_call != NULL; i++) { 782 KASSERT(sy[sp[i].sp_code].sy_call == sp[i].sp_call); 783 sy[sp[i].sp_code].sy_call = sys_nomodule; 784 } 785 786 /* 787 * Run a cross call to cycle through all CPUs. This does two 788 * things: lock activity provides a barrier and makes our update 789 * of sy_call visible to all CPUs, and upon return we can be sure 790 * that we see pertinent values of l_sysent posted by remote CPUs. 791 */ 792 where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL); 793 xc_wait(where); 794 795 /* 796 * Now it's safe to check l_sysent. Run through all LWPs and see 797 * if anyone is still using the system call. 798 */ 799 for (i = 0; sp[i].sp_call != NULL; i++) { 800 mutex_enter(proc_lock); 801 LIST_FOREACH(l, &alllwp, l_list) { 802 if (l->l_sysent == &sy[sp[i].sp_code]) { 803 break; 804 } 805 } 806 mutex_exit(proc_lock); 807 if (l == NULL) { 808 continue; 809 } 810 /* 811 * We lose: one or more calls are still in use. Put back 812 * the old entrypoints and act like nothing happened. 813 * When we drop module_lock, any system calls held in 814 * sys_nomodule() will be restarted. 815 */ 816 for (i = 0; sp[i].sp_call != NULL; i++) { 817 sy[sp[i].sp_code].sy_call = sp[i].sp_call; 818 } 819 return EBUSY; 820 } 821 822 return 0; 823 } 824