1 /* $NetBSD: rump.c,v 1.247 2012/10/09 13:35:50 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __KERNEL_RCSID(0, "$NetBSD: rump.c,v 1.247 2012/10/09 13:35:50 pooka Exp $"); 30 31 #include <sys/systm.h> 32 #define ELFSIZE ARCH_ELFSIZE 33 34 #include <sys/param.h> 35 #include <sys/atomic.h> 36 #include <sys/buf.h> 37 #include <sys/callout.h> 38 #include <sys/conf.h> 39 #include <sys/cpu.h> 40 #include <sys/device.h> 41 #include <sys/evcnt.h> 42 #include <sys/event.h> 43 #include <sys/exec_elf.h> 44 #include <sys/filedesc.h> 45 #include <sys/iostat.h> 46 #include <sys/kauth.h> 47 #include <sys/kcpuset.h> 48 #include <sys/kernel.h> 49 #include <sys/kmem.h> 50 #include <sys/kprintf.h> 51 #include <sys/kthread.h> 52 #include <sys/ksyms.h> 53 #include <sys/msgbuf.h> 54 #include <sys/module.h> 55 #include <sys/namei.h> 56 #include <sys/once.h> 57 #include <sys/percpu.h> 58 #include <sys/pipe.h> 59 #include <sys/pool.h> 60 #include <sys/pserialize.h> 61 #include <sys/queue.h> 62 #include <sys/reboot.h> 63 #include <sys/resourcevar.h> 64 #include <sys/select.h> 65 #include <sys/sysctl.h> 66 #include <sys/syscall.h> 67 #include <sys/syscallvar.h> 68 #include <sys/timetc.h> 69 #include <sys/tty.h> 70 #include <sys/uidinfo.h> 71 #include <sys/vmem.h> 72 #include <sys/xcall.h> 73 #include <sys/simplelock.h> 74 #include <sys/cprng.h> 75 76 #include <rump/rumpuser.h> 77 78 #include <secmodel/suser/suser.h> 79 80 #include <prop/proplib.h> 81 82 #include <uvm/uvm_extern.h> 83 #include <uvm/uvm_readahead.h> 84 85 #include "rump_private.h" 86 #include "rump_net_private.h" 87 #include "rump_vfs_private.h" 88 #include "rump_dev_private.h" 89 90 char machine[] = MACHINE; 91 92 struct proc *initproc; 93 94 struct device rump_rootdev = { 95 .dv_class = DV_VIRTUAL 96 }; 97 98 #ifdef RUMP_WITHOUT_THREADS 99 int rump_threads = 0; 100 #else 101 int rump_threads = 1; 102 #endif 103 104 static int rump_proxy_syscall(int, void *, register_t *); 105 static int rump_proxy_rfork(void *, int, const char *); 106 static void rump_proxy_lwpexit(void); 107 static void rump_proxy_execnotify(const char *); 108 109 static char rump_msgbuf[16*1024]; /* 16k should be enough for std rump needs */ 110 111 #ifdef LOCKDEBUG 112 const int rump_lockdebug = 1; 113 #else 114 const int rump_lockdebug = 0; 115 #endif 116 bool rump_ttycomponent = false; 117 118 static void 119 rump_aiodone_worker(struct work *wk, void *dummy) 120 { 121 struct buf *bp = (struct buf *)wk; 122 123 KASSERT(&bp->b_work == wk); 124 bp->b_iodone(bp); 125 } 126 127 static int rump_inited; 128 129 /* 130 * Make sure pnbuf_cache is available even without vfs 131 */ 132 int rump_initpnbufpool(void); 133 int rump_initpnbufpool(void) 134 { 135 136 pnbuf_cache = pool_cache_init(MAXPATHLEN, 0, 0, 0, "pnbufpl", 137 NULL, IPL_NONE, NULL, NULL, NULL); 138 return EOPNOTSUPP; 139 } 140 141 int rump__unavailable(void); 142 int rump__unavailable() {return EOPNOTSUPP;} 143 __weak_alias(rump_net_init,rump__unavailable); 144 __weak_alias(rump_vfs_init,rump_initpnbufpool); 145 __weak_alias(rump_dev_init,rump__unavailable); 146 147 __weak_alias(rump_vfs_fini,rump__unavailable); 148 149 __weak_alias(biodone,rump__unavailable); 150 __weak_alias(sopoll,rump__unavailable); 151 152 __weak_alias(rump_vfs_drainbufs,rump__unavailable); 153 154 void rump__unavailable_vfs_panic(void); 155 void rump__unavailable_vfs_panic() {panic("vfs component not available");} 156 __weak_alias(usermount_common_policy,rump__unavailable_vfs_panic); 157 158 /* easier to write vfs-less clients */ 159 __weak_alias(rump_pub_etfs_register,rump__unavailable); 160 __weak_alias(rump_pub_etfs_register_withsize,rump__unavailable); 161 __weak_alias(rump_pub_etfs_remove,rump__unavailable); 162 163 rump_proc_vfs_init_fn rump_proc_vfs_init; 164 rump_proc_vfs_release_fn rump_proc_vfs_release; 165 166 static void add_linkedin_modules(const struct modinfo *const *, size_t); 167 168 /* 169 * Create kern.hostname. why only this you ask. well, init_sysctl 170 * is a kitchen sink in need of some gardening. but i want to use 171 * kern.hostname today. 172 */ 173 static void 174 mksysctls(void) 175 { 176 177 sysctl_createv(NULL, 0, NULL, NULL, 178 CTLFLAG_PERMANENT, CTLTYPE_NODE, "kern", NULL, 179 NULL, 0, NULL, 0, CTL_KERN, CTL_EOL); 180 181 /* XXX: setting hostnamelen is missing */ 182 sysctl_createv(NULL, 0, NULL, NULL, 183 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_STRING, "hostname", 184 SYSCTL_DESCR("System hostname"), NULL, 0, 185 hostname, MAXHOSTNAMELEN, CTL_KERN, KERN_HOSTNAME, CTL_EOL); 186 } 187 188 /* there's no convenient kernel entry point for this, so just craft out own */ 189 static pid_t 190 spgetpid(void) 191 { 192 193 return curproc->p_pid; 194 } 195 196 static const struct rumpuser_sp_ops spops = { 197 .spop_schedule = rump_schedule, 198 .spop_unschedule = rump_unschedule, 199 .spop_lwproc_switch = rump_lwproc_switch, 200 .spop_lwproc_release = rump_lwproc_releaselwp, 201 .spop_lwproc_rfork = rump_proxy_rfork, 202 .spop_lwproc_newlwp = rump_lwproc_newlwp, 203 .spop_lwproc_curlwp = rump_lwproc_curlwp, 204 .spop_lwpexit = rump_proxy_lwpexit, 205 .spop_syscall = rump_proxy_syscall, 206 .spop_execnotify = rump_proxy_execnotify, 207 .spop_getpid = spgetpid, 208 }; 209 210 int 211 rump_daemonize_begin(void) 212 { 213 214 if (rump_inited) 215 return EALREADY; 216 217 return rumpuser_daemonize_begin(); 218 } 219 220 int 221 rump_daemonize_done(int error) 222 { 223 224 return rumpuser_daemonize_done(error); 225 } 226 227 int 228 rump__init(int rump_version) 229 { 230 char buf[256]; 231 struct timespec ts; 232 uint64_t sec, nsec; 233 struct lwp *l; 234 int i, numcpu; 235 int error; 236 237 /* not reentrant */ 238 if (rump_inited) 239 return 0; 240 else if (rump_inited == -1) 241 panic("rump_init: host process restart required"); 242 else 243 rump_inited = 1; 244 245 if (rumpuser_getversion() != RUMPUSER_VERSION) { 246 /* let's hope the ABI of rumpuser_dprintf is the same ;) */ 247 rumpuser_dprintf("rumpuser version mismatch: %d vs. %d\n", 248 rumpuser_getversion(), RUMPUSER_VERSION); 249 return EPROGMISMATCH; 250 } 251 252 if (rumpuser_getenv("RUMP_VERBOSE", buf, sizeof(buf), &error) == 0) { 253 if (*buf != '0') 254 boothowto = AB_VERBOSE; 255 } 256 257 if (rumpuser_getenv("RUMP_NCPU", buf, sizeof(buf), &error) == 0) 258 error = 0; 259 if (error == 0) { 260 numcpu = strtoll(buf, NULL, 10); 261 if (numcpu < 1) 262 numcpu = 1; 263 } else { 264 numcpu = rumpuser_getnhostcpu(); 265 } 266 rump_cpus_bootstrap(&numcpu); 267 268 rumpuser_gettime(&sec, &nsec, &error); 269 boottime.tv_sec = sec; 270 boottime.tv_nsec = nsec; 271 272 initmsgbuf(rump_msgbuf, sizeof(rump_msgbuf)); 273 aprint_verbose("%s%s", copyright, version); 274 275 if (rump_version != RUMP_VERSION) { 276 printf("rump version mismatch, %d vs. %d\n", 277 rump_version, RUMP_VERSION); 278 return EPROGMISMATCH; 279 } 280 281 if (rumpuser_getenv("RUMP_THREADS", buf, sizeof(buf), &error) == 0) { 282 rump_threads = *buf != '0'; 283 } 284 rumpuser_thrinit(rump_user_schedule, rump_user_unschedule, 285 rump_threads); 286 rump_intr_init(numcpu); 287 rump_tsleep_init(); 288 289 /* init minimal lwp/cpu context */ 290 l = &lwp0; 291 l->l_lid = 1; 292 l->l_cpu = l->l_target_cpu = rump_cpu; 293 l->l_fd = &filedesc0; 294 rumpuser_set_curlwp(l); 295 296 rumpuser_mutex_init(&rump_giantlock); 297 ksyms_init(); 298 uvm_init(); 299 evcnt_init(); 300 301 kcpuset_sysinit(); 302 once_init(); 303 kernconfig_lock_init(); 304 prop_kern_init(); 305 306 kmem_init(); 307 308 uvm_ra_init(); 309 uao_init(); 310 311 mutex_obj_init(); 312 callout_startup(); 313 314 kprintf_init(); 315 pserialize_init(); 316 loginit(); 317 318 kauth_init(); 319 320 secmodel_init(); 321 322 rnd_init(); 323 324 /* 325 * Create the kernel cprng. Yes, it's currently stubbed out 326 * to arc4random() for RUMP, but this won't always be so. 327 */ 328 kern_cprng = cprng_strong_create("kernel", IPL_VM, 329 CPRNG_INIT_ANY|CPRNG_REKEY_ANY); 330 331 procinit(); 332 proc0_init(); 333 sysctl_init(); 334 uid_init(); 335 chgproccnt(0, 1); 336 337 l->l_proc = &proc0; 338 lwp_update_creds(l); 339 340 lwpinit_specificdata(); 341 lwp_initspecific(&lwp0); 342 343 rump_biglock_init(); 344 345 rump_scheduler_init(numcpu); 346 /* revert temporary context and schedule a semireal context */ 347 rumpuser_set_curlwp(NULL); 348 initproc = &proc0; /* borrow proc0 before we get initproc started */ 349 rump_schedule(); 350 351 percpu_init(); 352 inittimecounter(); 353 ntp_init(); 354 355 rumpuser_gettime(&sec, &nsec, &error); 356 ts.tv_sec = sec; 357 ts.tv_nsec = nsec; 358 tc_setclock(&ts); 359 360 /* we are mostly go. do per-cpu subsystem init */ 361 for (i = 0; i < numcpu; i++) { 362 struct cpu_info *ci = cpu_lookup(i); 363 364 /* attach non-bootstrap CPUs */ 365 if (i > 0) { 366 rump_cpu_attach(ci); 367 ncpu++; 368 } 369 370 callout_init_cpu(ci); 371 softint_init(ci); 372 xc_init_cpu(ci); 373 pool_cache_cpu_init(ci); 374 selsysinit(ci); 375 percpu_init_cpu(ci); 376 377 TAILQ_INIT(&ci->ci_data.cpu_ld_locks); 378 __cpu_simple_lock_init(&ci->ci_data.cpu_ld_lock); 379 380 aprint_verbose("cpu%d at thinair0: rump virtual cpu\n", i); 381 } 382 383 mksysctls(); 384 kqueue_init(); 385 iostat_init(); 386 fd_sys_init(); 387 module_init(); 388 devsw_init(); 389 pipe_init(); 390 resource_init(); 391 procinit_sysctl(); 392 393 /* start page baroness */ 394 if (rump_threads) { 395 if (kthread_create(PRI_PGDAEMON, KTHREAD_MPSAFE, NULL, 396 uvm_pageout, NULL, &uvm.pagedaemon_lwp, "pdaemon") != 0) 397 panic("pagedaemon create failed"); 398 } else 399 uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */ 400 401 /* process dso's */ 402 rumpuser_dl_bootstrap(add_linkedin_modules, rump_kernelfsym_load); 403 404 rump_component_init(RUMP_COMPONENT_KERN); 405 406 /* these do nothing if not present */ 407 rump_vfs_init(); 408 rump_net_init(); 409 rump_dev_init(); 410 411 rump_component_init(RUMP_COMPONENT_KERN_VFS); 412 413 /* 414 * if we initialized the tty component above, the tyttymtx is 415 * now initialized. otherwise, we need to initialize it. 416 */ 417 if (!rump_ttycomponent) 418 mutex_init(&tty_lock, MUTEX_DEFAULT, IPL_VM); 419 420 cold = 0; 421 422 /* aieeeedondest */ 423 if (rump_threads) { 424 if (workqueue_create(&uvm.aiodone_queue, "aiodoned", 425 rump_aiodone_worker, NULL, 0, 0, WQ_MPSAFE)) 426 panic("aiodoned"); 427 } 428 429 sysctl_finalize(); 430 431 module_init_class(MODULE_CLASS_ANY); 432 433 rumpuser_gethostname(hostname, MAXHOSTNAMELEN, &error); 434 hostnamelen = strlen(hostname); 435 436 sigemptyset(&sigcantmask); 437 438 if (rump_threads) 439 vmem_rehash_start(); 440 441 /* 442 * Create init, used to attach implicit threads in rump. 443 * (note: must be done after vfsinit to get cwdi) 444 */ 445 (void)rump__lwproc_alloclwp(NULL); /* dummy thread for initproc */ 446 mutex_enter(proc_lock); 447 initproc = proc_find_raw(1); 448 mutex_exit(proc_lock); 449 if (initproc == NULL) 450 panic("where in the world is initproc?"); 451 452 /* 453 * Adjust syscall vector in case factions were dlopen()'d 454 * before calling rump_init(). 455 * (modules will handle dynamic syscalls the usual way) 456 * 457 * Note: this will adjust the function vectors of 458 * syscalls which use a funcalias (getpid etc.), but 459 * it makes no difference. 460 */ 461 for (i = 0; i < SYS_NSYSENT; i++) { 462 void *sym; 463 464 if (rump_sysent[i].sy_flags & SYCALL_NOSYS || 465 *syscallnames[i] == '#' || 466 rump_sysent[i].sy_call == sys_nomodule) 467 continue; 468 469 /* 470 * deal with compat wrappers. makesyscalls.sh should 471 * generate the necessary info instead of this hack, 472 * though. ugly, fix it later. 473 */ 474 #define CPFX "compat_" 475 #define CPFXLEN (sizeof(CPFX)-1) 476 if (strncmp(syscallnames[i], CPFX, CPFXLEN) == 0) { 477 const char *p = syscallnames[i] + CPFXLEN; 478 size_t namelen; 479 480 /* skip version number */ 481 while (*p >= '0' && *p <= '9') 482 p++; 483 if (p == syscallnames[i] + CPFXLEN || *p != '_') 484 panic("invalid syscall name %s\n", 485 syscallnames[i]); 486 487 /* skip over the next underscore */ 488 p++; 489 namelen = p + (sizeof("rumpns_")-1) - syscallnames[i]; 490 491 strcpy(buf, "rumpns_"); 492 strcat(buf, syscallnames[i]); 493 /* XXX: no strncat in the kernel */ 494 strcpy(buf+namelen, "sys_"); 495 strcat(buf, p); 496 #undef CPFX 497 #undef CPFXLEN 498 } else { 499 sprintf(buf, "rumpns_sys_%s", syscallnames[i]); 500 } 501 if ((sym = rumpuser_dl_globalsym(buf)) != NULL 502 && sym != rump_sysent[i].sy_call) { 503 #if 0 504 rumpuser_dprintf("adjusting %s: %p (old %p)\n", 505 syscallnames[i], sym, rump_sysent[i].sy_call); 506 #endif 507 rump_sysent[i].sy_call = sym; 508 } 509 } 510 511 /* release cpu */ 512 rump_unschedule(); 513 514 return 0; 515 } 516 517 int 518 rump_init_server(const char *url) 519 { 520 521 return rumpuser_sp_init(url, &spops, ostype, osrelease, MACHINE); 522 } 523 524 void 525 cpu_reboot(int howto, char *bootstr) 526 { 527 int ruhow = 0; 528 void *finiarg; 529 530 printf("rump kernel halting...\n"); 531 532 if (!RUMP_LOCALPROC_P(curproc)) 533 finiarg = curproc->p_vmspace->vm_map.pmap; 534 else 535 finiarg = NULL; 536 537 /* dump means we really take the dive here */ 538 if ((howto & RB_DUMP) || panicstr) { 539 ruhow = RUMPUSER_PANIC; 540 goto out; 541 } 542 543 /* try to sync */ 544 if (!((howto & RB_NOSYNC) || panicstr)) { 545 rump_vfs_fini(); 546 } 547 548 /* your wish is my command */ 549 if (howto & RB_HALT) { 550 printf("rump kernel halted\n"); 551 rumpuser_sp_fini(finiarg); 552 for (;;) { 553 uint64_t sec = 5, nsec = 0; 554 int error; 555 556 rumpuser_nanosleep(&sec, &nsec, &error); 557 } 558 } 559 560 /* this function is __dead, we must exit */ 561 out: 562 printf("halted\n"); 563 rumpuser_sp_fini(finiarg); 564 rumpuser_exit(ruhow); 565 } 566 567 struct uio * 568 rump_uio_setup(void *buf, size_t bufsize, off_t offset, enum rump_uiorw rw) 569 { 570 struct uio *uio; 571 enum uio_rw uiorw; 572 573 switch (rw) { 574 case RUMPUIO_READ: 575 uiorw = UIO_READ; 576 break; 577 case RUMPUIO_WRITE: 578 uiorw = UIO_WRITE; 579 break; 580 default: 581 panic("%s: invalid rw %d", __func__, rw); 582 } 583 584 uio = kmem_alloc(sizeof(struct uio), KM_SLEEP); 585 uio->uio_iov = kmem_alloc(sizeof(struct iovec), KM_SLEEP); 586 587 uio->uio_iov->iov_base = buf; 588 uio->uio_iov->iov_len = bufsize; 589 590 uio->uio_iovcnt = 1; 591 uio->uio_offset = offset; 592 uio->uio_resid = bufsize; 593 uio->uio_rw = uiorw; 594 UIO_SETUP_SYSSPACE(uio); 595 596 return uio; 597 } 598 599 size_t 600 rump_uio_getresid(struct uio *uio) 601 { 602 603 return uio->uio_resid; 604 } 605 606 off_t 607 rump_uio_getoff(struct uio *uio) 608 { 609 610 return uio->uio_offset; 611 } 612 613 size_t 614 rump_uio_free(struct uio *uio) 615 { 616 size_t resid; 617 618 resid = uio->uio_resid; 619 kmem_free(uio->uio_iov, sizeof(*uio->uio_iov)); 620 kmem_free(uio, sizeof(*uio)); 621 622 return resid; 623 } 624 625 kauth_cred_t 626 rump_cred_create(uid_t uid, gid_t gid, size_t ngroups, gid_t *groups) 627 { 628 kauth_cred_t cred; 629 int rv; 630 631 cred = kauth_cred_alloc(); 632 kauth_cred_setuid(cred, uid); 633 kauth_cred_seteuid(cred, uid); 634 kauth_cred_setsvuid(cred, uid); 635 kauth_cred_setgid(cred, gid); 636 kauth_cred_setgid(cred, gid); 637 kauth_cred_setegid(cred, gid); 638 kauth_cred_setsvgid(cred, gid); 639 rv = kauth_cred_setgroups(cred, groups, ngroups, 0, UIO_SYSSPACE); 640 /* oh this is silly. and by "this" I mean kauth_cred_setgroups() */ 641 assert(rv == 0); 642 643 return cred; 644 } 645 646 void 647 rump_cred_put(kauth_cred_t cred) 648 { 649 650 kauth_cred_free(cred); 651 } 652 653 static int compcounter[RUMP_COMPONENT_MAX]; 654 655 static void 656 rump_component_init_cb(struct rump_component *rc, int type) 657 { 658 659 KASSERT(type < RUMP_COMPONENT_MAX); 660 if (rc->rc_type == type) { 661 rc->rc_init(); 662 compcounter[type]++; 663 } 664 } 665 666 int 667 rump_component_count(enum rump_component_type type) 668 { 669 670 KASSERT(type <= RUMP_COMPONENT_MAX); 671 return compcounter[type]; 672 } 673 674 void 675 rump_component_init(enum rump_component_type type) 676 { 677 678 rumpuser_dl_component_init(type, rump_component_init_cb); 679 } 680 681 /* 682 * Initialize a module which has already been loaded and linked 683 * with dlopen(). This is fundamentally the same as a builtin module. 684 */ 685 int 686 rump_module_init(const struct modinfo * const *mip, size_t nmodinfo) 687 { 688 689 return module_builtin_add(mip, nmodinfo, true); 690 } 691 692 /* 693 * Finish module (flawless victory, fatality!). 694 */ 695 int 696 rump_module_fini(const struct modinfo *mi) 697 { 698 699 return module_builtin_remove(mi, true); 700 } 701 702 /* 703 * Add loaded and linked module to the builtin list. It will 704 * later be initialized with module_init_class(). 705 */ 706 707 static void 708 add_linkedin_modules(const struct modinfo * const *mip, size_t nmodinfo) 709 { 710 711 module_builtin_add(mip, nmodinfo, false); 712 } 713 714 int 715 rump_kernelfsym_load(void *symtab, uint64_t symsize, 716 char *strtab, uint64_t strsize) 717 { 718 static int inited = 0; 719 Elf64_Ehdr ehdr; 720 721 if (inited) 722 return EBUSY; 723 inited = 1; 724 725 /* 726 * Use 64bit header since it's bigger. Shouldn't make a 727 * difference, since we're passing in all zeroes anyway. 728 */ 729 memset(&ehdr, 0, sizeof(ehdr)); 730 ksyms_addsyms_explicit(&ehdr, symtab, symsize, strtab, strsize); 731 732 return 0; 733 } 734 735 static int 736 rump_proxy_syscall(int num, void *arg, register_t *retval) 737 { 738 struct lwp *l; 739 struct sysent *callp; 740 int rv; 741 742 if (__predict_false(num >= SYS_NSYSENT)) 743 return ENOSYS; 744 745 callp = rump_sysent + num; 746 l = curlwp; 747 rv = sy_call(callp, l, (void *)arg, retval); 748 749 return rv; 750 } 751 752 static int 753 rump_proxy_rfork(void *priv, int flags, const char *comm) 754 { 755 struct vmspace *newspace; 756 struct proc *p; 757 int error; 758 759 if ((error = rump_lwproc_rfork(flags)) != 0) 760 return error; 761 762 /* 763 * Since it's a proxy proc, adjust the vmspace. 764 * Refcount will eternally be 1. 765 */ 766 p = curproc; 767 newspace = kmem_zalloc(sizeof(*newspace), KM_SLEEP); 768 newspace->vm_refcnt = 1; 769 newspace->vm_map.pmap = priv; 770 KASSERT(p->p_vmspace == vmspace_kernel()); 771 p->p_vmspace = newspace; 772 if (comm) 773 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 774 775 return 0; 776 } 777 778 /* 779 * Order all lwps in a process to exit. does *not* wait for them to drain. 780 */ 781 static void 782 rump_proxy_lwpexit(void) 783 { 784 struct proc *p = curproc; 785 uint64_t where; 786 struct lwp *l; 787 788 mutex_enter(p->p_lock); 789 /* 790 * First pass: mark all lwps in the process with LW_RUMP_QEXIT 791 * so that they know they should exit. 792 */ 793 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 794 if (l == curlwp) 795 continue; 796 l->l_flag |= LW_RUMP_QEXIT; 797 } 798 mutex_exit(p->p_lock); 799 800 /* 801 * Next, make sure everyone on all CPUs sees our status 802 * update. This keeps threads inside cv_wait() and makes 803 * sure we don't access a stale cv pointer later when 804 * we wake up the threads. 805 */ 806 807 where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL); 808 xc_wait(where); 809 810 /* 811 * Ok, all lwps are either: 812 * 1) not in the cv code 813 * 2) sleeping on l->l_private 814 * 3) sleeping on p->p_waitcv 815 * 816 * Either way, l_private is stable until we set PS_RUMP_LWPEXIT 817 * in p->p_sflag. 818 */ 819 820 mutex_enter(p->p_lock); 821 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 822 if (l->l_private) 823 cv_broadcast(l->l_private); 824 } 825 p->p_sflag |= PS_RUMP_LWPEXIT; 826 cv_broadcast(&p->p_waitcv); 827 mutex_exit(p->p_lock); 828 } 829 830 /* 831 * Notify process that all threads have been drained and exec is complete. 832 */ 833 static void 834 rump_proxy_execnotify(const char *comm) 835 { 836 struct proc *p = curproc; 837 838 fd_closeexec(); 839 mutex_enter(p->p_lock); 840 KASSERT(p->p_nlwps == 1 && p->p_sflag & PS_RUMP_LWPEXIT); 841 p->p_sflag &= ~PS_RUMP_LWPEXIT; 842 mutex_exit(p->p_lock); 843 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 844 } 845 846 int 847 rump_boot_gethowto() 848 { 849 850 return boothowto; 851 } 852 853 void 854 rump_boot_sethowto(int howto) 855 { 856 857 boothowto = howto; 858 } 859 860 int 861 rump_getversion(void) 862 { 863 864 return __NetBSD_Version__; 865 } 866 867 /* 868 * Note: may be called unscheduled. Not fully safe since no locking 869 * of allevents (currently that's not even available). 870 */ 871 void 872 rump_printevcnts() 873 { 874 struct evcnt *ev; 875 876 TAILQ_FOREACH(ev, &allevents, ev_list) 877 rumpuser_dprintf("%s / %s: %" PRIu64 "\n", 878 ev->ev_group, ev->ev_name, ev->ev_count); 879 } 880 881 /* 882 * If you use this interface ... well ... all bets are off. 883 * The original purpose is for the p2k fs server library to be 884 * able to use the same pid/lid for VOPs as the host kernel. 885 */ 886 void 887 rump_allbetsareoff_setid(pid_t pid, int lid) 888 { 889 struct lwp *l = curlwp; 890 struct proc *p = l->l_proc; 891 892 l->l_lid = lid; 893 p->p_pid = pid; 894 } 895