1 /* $NetBSD: rump.c,v 1.234 2011/03/22 15:16:23 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __KERNEL_RCSID(0, "$NetBSD: rump.c,v 1.234 2011/03/22 15:16:23 pooka Exp $"); 30 31 #include <sys/systm.h> 32 #define ELFSIZE ARCH_ELFSIZE 33 34 #include <sys/param.h> 35 #include <sys/atomic.h> 36 #include <sys/buf.h> 37 #include <sys/callout.h> 38 #include <sys/conf.h> 39 #include <sys/cpu.h> 40 #include <sys/device.h> 41 #include <sys/evcnt.h> 42 #include <sys/event.h> 43 #include <sys/exec_elf.h> 44 #include <sys/filedesc.h> 45 #include <sys/iostat.h> 46 #include <sys/kauth.h> 47 #include <sys/kernel.h> 48 #include <sys/kmem.h> 49 #include <sys/kprintf.h> 50 #include <sys/kthread.h> 51 #include <sys/ksyms.h> 52 #include <sys/msgbuf.h> 53 #include <sys/module.h> 54 #include <sys/namei.h> 55 #include <sys/once.h> 56 #include <sys/percpu.h> 57 #include <sys/pipe.h> 58 #include <sys/pool.h> 59 #include <sys/queue.h> 60 #include <sys/reboot.h> 61 #include <sys/resourcevar.h> 62 #include <sys/select.h> 63 #include <sys/sysctl.h> 64 #include <sys/syscall.h> 65 #include <sys/syscallvar.h> 66 #include <sys/timetc.h> 67 #include <sys/tty.h> 68 #include <sys/uidinfo.h> 69 #include <sys/vmem.h> 70 #include <sys/xcall.h> 71 #include <sys/simplelock.h> 72 73 #include <rump/rumpuser.h> 74 75 #include <secmodel/suser/suser.h> 76 77 #include <prop/proplib.h> 78 79 #include <uvm/uvm_extern.h> 80 #include <uvm/uvm_readahead.h> 81 82 #include "rump_private.h" 83 #include "rump_net_private.h" 84 #include "rump_vfs_private.h" 85 #include "rump_dev_private.h" 86 87 char machine[] = MACHINE; 88 89 struct proc *initproc; 90 91 struct device rump_rootdev = { 92 .dv_class = DV_VIRTUAL 93 }; 94 95 #ifdef RUMP_WITHOUT_THREADS 96 int rump_threads = 0; 97 #else 98 int rump_threads = 1; 99 #endif 100 101 static int rump_proxy_syscall(int, void *, register_t *); 102 static int rump_proxy_rfork(void *, int, const char *); 103 static void rump_proxy_lwpexit(void); 104 static void rump_proxy_execnotify(const char *); 105 106 static char rump_msgbuf[16*1024]; /* 16k should be enough for std rump needs */ 107 108 #ifdef LOCKDEBUG 109 const int rump_lockdebug = 1; 110 #else 111 const int rump_lockdebug = 0; 112 #endif 113 bool rump_ttycomponent = false; 114 115 static void 116 rump_aiodone_worker(struct work *wk, void *dummy) 117 { 118 struct buf *bp = (struct buf *)wk; 119 120 KASSERT(&bp->b_work == wk); 121 bp->b_iodone(bp); 122 } 123 124 static int rump_inited; 125 126 /* 127 * Make sure pnbuf_cache is available even without vfs 128 */ 129 int rump_initpnbufpool(void); 130 int rump_initpnbufpool(void) 131 { 132 133 pnbuf_cache = pool_cache_init(MAXPATHLEN, 0, 0, 0, "pnbufpl", 134 NULL, IPL_NONE, NULL, NULL, NULL); 135 return EOPNOTSUPP; 136 } 137 138 int rump__unavailable(void); 139 int rump__unavailable() {return EOPNOTSUPP;} 140 __weak_alias(rump_net_init,rump__unavailable); 141 __weak_alias(rump_vfs_init,rump_initpnbufpool); 142 __weak_alias(rump_dev_init,rump__unavailable); 143 144 __weak_alias(rump_vfs_fini,rump__unavailable); 145 146 __weak_alias(biodone,rump__unavailable); 147 __weak_alias(sopoll,rump__unavailable); 148 149 __weak_alias(rump_vfs_drainbufs,rump__unavailable); 150 151 void rump__unavailable_vfs_panic(void); 152 void rump__unavailable_vfs_panic() {panic("vfs component not available");} 153 __weak_alias(usermount_common_policy,rump__unavailable_vfs_panic); 154 155 /* easier to write vfs-less clients */ 156 __weak_alias(rump_pub_etfs_register,rump__unavailable); 157 __weak_alias(rump_pub_etfs_register_withsize,rump__unavailable); 158 __weak_alias(rump_pub_etfs_remove,rump__unavailable); 159 160 rump_proc_vfs_init_fn rump_proc_vfs_init; 161 rump_proc_vfs_release_fn rump_proc_vfs_release; 162 163 static void add_linkedin_modules(const struct modinfo *const *, size_t); 164 165 static void __noinline 166 messthestack(void) 167 { 168 volatile uint32_t mess[64]; 169 uint64_t d1, d2; 170 int i, error; 171 172 for (i = 0; i < 64; i++) { 173 rumpuser_gettime(&d1, &d2, &error); 174 mess[i] = d2; 175 } 176 } 177 178 /* 179 * Create kern.hostname. why only this you ask. well, init_sysctl 180 * is a kitchen sink in need of some gardening. but i want to use 181 * kern.hostname today. 182 */ 183 static void 184 mksysctls(void) 185 { 186 187 sysctl_createv(NULL, 0, NULL, NULL, 188 CTLFLAG_PERMANENT, CTLTYPE_NODE, "kern", NULL, 189 NULL, 0, NULL, 0, CTL_KERN, CTL_EOL); 190 191 /* XXX: setting hostnamelen is missing */ 192 sysctl_createv(NULL, 0, NULL, NULL, 193 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_STRING, "hostname", 194 SYSCTL_DESCR("System hostname"), NULL, 0, 195 &hostname, MAXHOSTNAMELEN, CTL_KERN, KERN_HOSTNAME, CTL_EOL); 196 } 197 198 /* there's no convenient kernel entry point for this, so just craft out own */ 199 static pid_t 200 spgetpid(void) 201 { 202 203 return curproc->p_pid; 204 } 205 206 static const struct rumpuser_sp_ops spops = { 207 .spop_schedule = rump_schedule, 208 .spop_unschedule = rump_unschedule, 209 .spop_lwproc_switch = rump_lwproc_switch, 210 .spop_lwproc_release = rump_lwproc_releaselwp, 211 .spop_lwproc_rfork = rump_proxy_rfork, 212 .spop_lwproc_newlwp = rump_lwproc_newlwp, 213 .spop_lwproc_curlwp = rump_lwproc_curlwp, 214 .spop_lwpexit = rump_proxy_lwpexit, 215 .spop_syscall = rump_proxy_syscall, 216 .spop_execnotify = rump_proxy_execnotify, 217 .spop_getpid = spgetpid, 218 }; 219 220 int 221 rump_daemonize_begin(void) 222 { 223 224 if (rump_inited) 225 return EALREADY; 226 227 return rumpuser_daemonize_begin(); 228 } 229 230 int 231 rump_daemonize_done(int error) 232 { 233 234 return rumpuser_daemonize_done(error); 235 } 236 237 int 238 rump__init(int rump_version) 239 { 240 char buf[256]; 241 struct timespec ts; 242 uint64_t sec, nsec; 243 struct lwp *l; 244 int i, numcpu; 245 int error; 246 247 /* not reentrant */ 248 if (rump_inited) 249 return 0; 250 else if (rump_inited == -1) 251 panic("rump_init: host process restart required"); 252 else 253 rump_inited = 1; 254 255 if (rumpuser_getversion() != RUMPUSER_VERSION) { 256 /* let's hope the ABI of rumpuser_dprintf is the same ;) */ 257 rumpuser_dprintf("rumpuser version mismatch: %d vs. %d\n", 258 rumpuser_getversion(), RUMPUSER_VERSION); 259 return EPROGMISMATCH; 260 } 261 262 if (rumpuser_getenv("RUMP_VERBOSE", buf, sizeof(buf), &error) == 0) { 263 if (*buf != '0') 264 boothowto = AB_VERBOSE; 265 } 266 267 if (rumpuser_getenv("RUMP_NCPU", buf, sizeof(buf), &error) == 0) 268 error = 0; 269 if (error == 0) { 270 numcpu = strtoll(buf, NULL, 10); 271 if (numcpu < 1) 272 numcpu = 1; 273 } else { 274 numcpu = rumpuser_getnhostcpu(); 275 } 276 rump_cpus_bootstrap(&numcpu); 277 278 rumpuser_gettime(&sec, &nsec, &error); 279 boottime.tv_sec = sec; 280 boottime.tv_nsec = nsec; 281 282 initmsgbuf(rump_msgbuf, sizeof(rump_msgbuf)); 283 aprint_verbose("%s%s", copyright, version); 284 285 /* 286 * Seed arc4random() with a "reasonable" amount of randomness. 287 * Yes, this is a quick kludge which depends on the arc4random 288 * implementation. 289 */ 290 messthestack(); 291 arc4random(); 292 293 if (rump_version != RUMP_VERSION) { 294 printf("rump version mismatch, %d vs. %d\n", 295 rump_version, RUMP_VERSION); 296 return EPROGMISMATCH; 297 } 298 299 if (rumpuser_getenv("RUMP_THREADS", buf, sizeof(buf), &error) == 0) { 300 rump_threads = *buf != '0'; 301 } 302 rumpuser_thrinit(rump_user_schedule, rump_user_unschedule, 303 rump_threads); 304 rump_intr_init(numcpu); 305 rump_tsleep_init(); 306 307 /* init minimal lwp/cpu context */ 308 l = &lwp0; 309 l->l_lid = 1; 310 l->l_cpu = l->l_target_cpu = rump_cpu; 311 l->l_fd = &filedesc0; 312 rumpuser_set_curlwp(l); 313 314 rumpuser_mutex_init(&rump_giantlock); 315 ksyms_init(); 316 uvm_init(); 317 evcnt_init(); 318 319 once_init(); 320 kernconfig_lock_init(); 321 prop_kern_init(); 322 323 pool_subsystem_init(); 324 kmem_init(); 325 326 uvm_ra_init(); 327 uao_init(); 328 329 mutex_obj_init(); 330 callout_startup(); 331 332 kprintf_init(); 333 loginit(); 334 335 kauth_init(); 336 337 procinit(); 338 proc0_init(); 339 uid_init(); 340 chgproccnt(0, 1); 341 342 l->l_proc = &proc0; 343 lwp_update_creds(l); 344 345 lwpinit_specificdata(); 346 lwp_initspecific(&lwp0); 347 348 rump_scheduler_init(numcpu); 349 /* revert temporary context and schedule a semireal context */ 350 rumpuser_set_curlwp(NULL); 351 initproc = &proc0; /* borrow proc0 before we get initproc started */ 352 rump_schedule(); 353 354 percpu_init(); 355 inittimecounter(); 356 ntp_init(); 357 358 rumpuser_gettime(&sec, &nsec, &error); 359 ts.tv_sec = sec; 360 ts.tv_nsec = nsec; 361 tc_setclock(&ts); 362 363 /* we are mostly go. do per-cpu subsystem init */ 364 for (i = 0; i < numcpu; i++) { 365 struct cpu_info *ci = cpu_lookup(i); 366 367 /* attach non-bootstrap CPUs */ 368 if (i > 0) { 369 rump_cpu_attach(ci); 370 ncpu++; 371 } 372 373 callout_init_cpu(ci); 374 softint_init(ci); 375 xc_init_cpu(ci); 376 pool_cache_cpu_init(ci); 377 selsysinit(ci); 378 percpu_init_cpu(ci); 379 380 TAILQ_INIT(&ci->ci_data.cpu_ld_locks); 381 __cpu_simple_lock_init(&ci->ci_data.cpu_ld_lock); 382 383 aprint_verbose("cpu%d at thinair0: rump virtual cpu\n", i); 384 } 385 386 sysctl_init(); 387 mksysctls(); 388 kqueue_init(); 389 iostat_init(); 390 fd_sys_init(); 391 module_init(); 392 devsw_init(); 393 pipe_init(); 394 resource_init(); 395 procinit_sysctl(); 396 397 /* start page baroness */ 398 if (rump_threads) { 399 if (kthread_create(PRI_PGDAEMON, KTHREAD_MPSAFE, NULL, 400 uvm_pageout, NULL, &uvm.pagedaemon_lwp, "pdaemon") != 0) 401 panic("pagedaemon create failed"); 402 } else 403 uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */ 404 405 /* process dso's */ 406 rumpuser_dl_bootstrap(add_linkedin_modules, rump_kernelfsym_load); 407 408 rump_component_init(RUMP_COMPONENT_KERN); 409 410 /* these do nothing if not present */ 411 rump_vfs_init(); 412 rump_net_init(); 413 rump_dev_init(); 414 415 rump_component_init(RUMP_COMPONENT_KERN_VFS); 416 417 /* 418 * if we initialized the tty component above, the tyttymtx is 419 * now initialized. otherwise, we need to initialize it. 420 */ 421 if (!rump_ttycomponent) 422 mutex_init(&tty_lock, MUTEX_DEFAULT, IPL_VM); 423 424 cold = 0; 425 426 /* aieeeedondest */ 427 if (rump_threads) { 428 if (workqueue_create(&uvm.aiodone_queue, "aiodoned", 429 rump_aiodone_worker, NULL, 0, 0, WQ_MPSAFE)) 430 panic("aiodoned"); 431 } 432 433 sysctl_finalize(); 434 435 module_init_class(MODULE_CLASS_ANY); 436 437 rumpuser_gethostname(hostname, MAXHOSTNAMELEN, &error); 438 hostnamelen = strlen(hostname); 439 440 sigemptyset(&sigcantmask); 441 442 if (rump_threads) 443 vmem_rehash_start(); 444 445 /* 446 * Create init, used to attach implicit threads in rump. 447 * (note: must be done after vfsinit to get cwdi) 448 */ 449 (void)rump__lwproc_alloclwp(NULL); /* dummy thread for initproc */ 450 mutex_enter(proc_lock); 451 initproc = proc_find_raw(1); 452 mutex_exit(proc_lock); 453 if (initproc == NULL) 454 panic("where in the world is initproc?"); 455 456 /* 457 * Adjust syscall vector in case factions were dlopen()'d 458 * before calling rump_init(). 459 * (modules will handle dynamic syscalls the usual way) 460 * 461 * Note: this will adjust the function vectors of 462 * syscalls which use a funcalias (getpid etc.), but 463 * it makes no difference. 464 */ 465 for (i = 0; i < SYS_NSYSENT; i++) { 466 void *sym; 467 468 if (rump_sysent[i].sy_flags & SYCALL_NOSYS || 469 *syscallnames[i] == '#' || 470 rump_sysent[i].sy_call == sys_nomodule) 471 continue; 472 473 /* 474 * deal with compat wrappers. makesyscalls.sh should 475 * generate the necessary info instead of this hack, 476 * though. ugly, fix it later. 477 */ 478 #define CPFX "compat_" 479 #define CPFXLEN (sizeof(CPFX)-1) 480 if (strncmp(syscallnames[i], CPFX, CPFXLEN) == 0) { 481 const char *p = syscallnames[i] + CPFXLEN; 482 size_t namelen; 483 484 /* skip version number */ 485 while (*p >= '0' && *p <= '9') 486 p++; 487 if (p == syscallnames[i] + CPFXLEN || *p != '_') 488 panic("invalid syscall name %s\n", 489 syscallnames[i]); 490 491 /* skip over the next underscore */ 492 p++; 493 namelen = p + (sizeof("rumpns_")-1) - syscallnames[i]; 494 495 strcpy(buf, "rumpns_"); 496 strcat(buf, syscallnames[i]); 497 /* XXX: no strncat in the kernel */ 498 strcpy(buf+namelen, "sys_"); 499 strcat(buf, p); 500 #undef CPFX 501 #undef CPFXLEN 502 } else { 503 sprintf(buf, "rumpns_sys_%s", syscallnames[i]); 504 } 505 if ((sym = rumpuser_dl_globalsym(buf)) != NULL 506 && sym != rump_sysent[i].sy_call) { 507 #if 0 508 rumpuser_dprintf("adjusting %s: %p (old %p)\n", 509 syscallnames[i], sym, rump_sysent[i].sy_call); 510 #endif 511 rump_sysent[i].sy_call = sym; 512 } 513 } 514 515 /* release cpu */ 516 rump_unschedule(); 517 518 return 0; 519 } 520 521 int 522 rump_init_server(const char *url) 523 { 524 525 return rumpuser_sp_init(url, &spops, ostype, osrelease, MACHINE); 526 } 527 528 void 529 cpu_reboot(int howto, char *bootstr) 530 { 531 int ruhow = 0; 532 void *finiarg; 533 534 printf("rump kernel halting...\n"); 535 536 if (!RUMP_LOCALPROC_P(curproc)) 537 finiarg = curproc->p_vmspace->vm_map.pmap; 538 else 539 finiarg = NULL; 540 541 /* dump means we really take the dive here */ 542 if ((howto & RB_DUMP) || panicstr) { 543 ruhow = RUMPUSER_PANIC; 544 goto out; 545 } 546 547 /* try to sync */ 548 if (!((howto & RB_NOSYNC) || panicstr)) { 549 rump_vfs_fini(); 550 } 551 552 /* your wish is my command */ 553 if (howto & RB_HALT) { 554 printf("rump kernel halted\n"); 555 rumpuser_sp_fini(finiarg); 556 for (;;) { 557 uint64_t sec = 5, nsec = 0; 558 int error; 559 560 rumpuser_nanosleep(&sec, &nsec, &error); 561 } 562 } 563 564 /* this function is __dead, we must exit */ 565 out: 566 printf("halted\n"); 567 rumpuser_sp_fini(finiarg); 568 rumpuser_exit(ruhow); 569 } 570 571 struct uio * 572 rump_uio_setup(void *buf, size_t bufsize, off_t offset, enum rump_uiorw rw) 573 { 574 struct uio *uio; 575 enum uio_rw uiorw; 576 577 switch (rw) { 578 case RUMPUIO_READ: 579 uiorw = UIO_READ; 580 break; 581 case RUMPUIO_WRITE: 582 uiorw = UIO_WRITE; 583 break; 584 default: 585 panic("%s: invalid rw %d", __func__, rw); 586 } 587 588 uio = kmem_alloc(sizeof(struct uio), KM_SLEEP); 589 uio->uio_iov = kmem_alloc(sizeof(struct iovec), KM_SLEEP); 590 591 uio->uio_iov->iov_base = buf; 592 uio->uio_iov->iov_len = bufsize; 593 594 uio->uio_iovcnt = 1; 595 uio->uio_offset = offset; 596 uio->uio_resid = bufsize; 597 uio->uio_rw = uiorw; 598 UIO_SETUP_SYSSPACE(uio); 599 600 return uio; 601 } 602 603 size_t 604 rump_uio_getresid(struct uio *uio) 605 { 606 607 return uio->uio_resid; 608 } 609 610 off_t 611 rump_uio_getoff(struct uio *uio) 612 { 613 614 return uio->uio_offset; 615 } 616 617 size_t 618 rump_uio_free(struct uio *uio) 619 { 620 size_t resid; 621 622 resid = uio->uio_resid; 623 kmem_free(uio->uio_iov, sizeof(*uio->uio_iov)); 624 kmem_free(uio, sizeof(*uio)); 625 626 return resid; 627 } 628 629 kauth_cred_t 630 rump_cred_create(uid_t uid, gid_t gid, size_t ngroups, gid_t *groups) 631 { 632 kauth_cred_t cred; 633 int rv; 634 635 cred = kauth_cred_alloc(); 636 kauth_cred_setuid(cred, uid); 637 kauth_cred_seteuid(cred, uid); 638 kauth_cred_setsvuid(cred, uid); 639 kauth_cred_setgid(cred, gid); 640 kauth_cred_setgid(cred, gid); 641 kauth_cred_setegid(cred, gid); 642 kauth_cred_setsvgid(cred, gid); 643 rv = kauth_cred_setgroups(cred, groups, ngroups, 0, UIO_SYSSPACE); 644 /* oh this is silly. and by "this" I mean kauth_cred_setgroups() */ 645 assert(rv == 0); 646 647 return cred; 648 } 649 650 void 651 rump_cred_put(kauth_cred_t cred) 652 { 653 654 kauth_cred_free(cred); 655 } 656 657 static int compcounter[RUMP_COMPONENT_MAX]; 658 659 static void 660 rump_component_init_cb(struct rump_component *rc, int type) 661 { 662 663 KASSERT(type < RUMP_COMPONENT_MAX); 664 if (rc->rc_type == type) { 665 rc->rc_init(); 666 compcounter[type]++; 667 } 668 } 669 670 int 671 rump_component_count(enum rump_component_type type) 672 { 673 674 KASSERT(type <= RUMP_COMPONENT_MAX); 675 return compcounter[type]; 676 } 677 678 void 679 rump_component_init(enum rump_component_type type) 680 { 681 682 rumpuser_dl_component_init(type, rump_component_init_cb); 683 } 684 685 /* 686 * Initialize a module which has already been loaded and linked 687 * with dlopen(). This is fundamentally the same as a builtin module. 688 */ 689 int 690 rump_module_init(const struct modinfo * const *mip, size_t nmodinfo) 691 { 692 693 return module_builtin_add(mip, nmodinfo, true); 694 } 695 696 /* 697 * Finish module (flawless victory, fatality!). 698 */ 699 int 700 rump_module_fini(const struct modinfo *mi) 701 { 702 703 return module_builtin_remove(mi, true); 704 } 705 706 /* 707 * Add loaded and linked module to the builtin list. It will 708 * later be initialized with module_init_class(). 709 */ 710 711 static void 712 add_linkedin_modules(const struct modinfo * const *mip, size_t nmodinfo) 713 { 714 715 module_builtin_add(mip, nmodinfo, false); 716 } 717 718 int 719 rump_kernelfsym_load(void *symtab, uint64_t symsize, 720 char *strtab, uint64_t strsize) 721 { 722 static int inited = 0; 723 Elf64_Ehdr ehdr; 724 725 if (inited) 726 return EBUSY; 727 inited = 1; 728 729 /* 730 * Use 64bit header since it's bigger. Shouldn't make a 731 * difference, since we're passing in all zeroes anyway. 732 */ 733 memset(&ehdr, 0, sizeof(ehdr)); 734 ksyms_addsyms_explicit(&ehdr, symtab, symsize, strtab, strsize); 735 736 return 0; 737 } 738 739 static int 740 rump_proxy_syscall(int num, void *arg, register_t *retval) 741 { 742 struct lwp *l; 743 struct sysent *callp; 744 int rv; 745 746 if (__predict_false(num >= SYS_NSYSENT)) 747 return ENOSYS; 748 749 callp = rump_sysent + num; 750 l = curlwp; 751 rv = sy_call(callp, l, (void *)arg, retval); 752 753 return rv; 754 } 755 756 static int 757 rump_proxy_rfork(void *priv, int flags, const char *comm) 758 { 759 struct vmspace *newspace; 760 struct proc *p; 761 int error; 762 763 if ((error = rump_lwproc_rfork(flags)) != 0) 764 return error; 765 766 /* 767 * Since it's a proxy proc, adjust the vmspace. 768 * Refcount will eternally be 1. 769 */ 770 p = curproc; 771 newspace = kmem_zalloc(sizeof(*newspace), KM_SLEEP); 772 newspace->vm_refcnt = 1; 773 newspace->vm_map.pmap = priv; 774 KASSERT(p->p_vmspace == vmspace_kernel()); 775 p->p_vmspace = newspace; 776 if (comm) 777 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 778 779 return 0; 780 } 781 782 /* 783 * Order all lwps in a process to exit. does *not* wait for them to drain. 784 */ 785 static void 786 rump_proxy_lwpexit(void) 787 { 788 struct proc *p = curproc; 789 uint64_t where; 790 struct lwp *l; 791 792 mutex_enter(p->p_lock); 793 /* 794 * First pass: mark all lwps in the process with LW_RUMP_QEXIT 795 * so that they know they should exit. 796 */ 797 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 798 if (l == curlwp) 799 continue; 800 l->l_flag |= LW_RUMP_QEXIT; 801 } 802 mutex_exit(p->p_lock); 803 804 /* 805 * Next, make sure everyone on all CPUs sees our status 806 * update. This keeps threads inside cv_wait() and makes 807 * sure we don't access a stale cv pointer later when 808 * we wake up the threads. 809 */ 810 811 where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL); 812 xc_wait(where); 813 814 /* 815 * Ok, all lwps are either: 816 * 1) not in the cv code 817 * 2) sleeping on l->l_private 818 * 3) sleeping on p->p_waitcv 819 * 820 * Either way, l_private is stable until we set PS_RUMP_LWPEXIT 821 * in p->p_sflag. 822 */ 823 824 mutex_enter(p->p_lock); 825 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 826 if (l->l_private) 827 cv_broadcast(l->l_private); 828 } 829 p->p_sflag |= PS_RUMP_LWPEXIT; 830 cv_broadcast(&p->p_waitcv); 831 mutex_exit(p->p_lock); 832 } 833 834 /* 835 * Notify process that all threads have been drained and exec is complete. 836 */ 837 static void 838 rump_proxy_execnotify(const char *comm) 839 { 840 struct proc *p = curproc; 841 842 fd_closeexec(); 843 mutex_enter(p->p_lock); 844 KASSERT(p->p_nlwps == 1 && p->p_sflag & PS_RUMP_LWPEXIT); 845 p->p_sflag &= ~PS_RUMP_LWPEXIT; 846 mutex_exit(p->p_lock); 847 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 848 } 849 850 int 851 rump_boot_gethowto() 852 { 853 854 return boothowto; 855 } 856 857 void 858 rump_boot_sethowto(int howto) 859 { 860 861 boothowto = howto; 862 } 863 864 int 865 rump_getversion(void) 866 { 867 868 return __NetBSD_Version__; 869 } 870 871 /* 872 * Note: may be called unscheduled. Not fully safe since no locking 873 * of allevents (currently that's not even available). 874 */ 875 void 876 rump_printevcnts() 877 { 878 struct evcnt *ev; 879 880 TAILQ_FOREACH(ev, &allevents, ev_list) 881 rumpuser_dprintf("%s / %s: %" PRIu64 "\n", 882 ev->ev_group, ev->ev_name, ev->ev_count); 883 } 884 885 /* 886 * If you use this interface ... well ... all bets are off. 887 * The original purpose is for the p2k fs server library to be 888 * able to use the same pid/lid for VOPs as the host kernel. 889 */ 890 void 891 rump_allbetsareoff_setid(pid_t pid, int lid) 892 { 893 struct lwp *l = curlwp; 894 struct proc *p = l->l_proc; 895 896 l->l_lid = lid; 897 p->p_pid = pid; 898 } 899