1 /* $NetBSD: rump.c,v 1.42 2008/03/24 19:40:18 martin Exp $ */ 2 3 /* 4 * Copyright (c) 2007 Antti Kantee. All Rights Reserved. 5 * 6 * Development of this software was supported by Google Summer of Code. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/param.h> 31 #include <sys/cpu.h> 32 #include <sys/filedesc.h> 33 #include <sys/kauth.h> 34 #include <sys/kmem.h> 35 #include <sys/mount.h> 36 #include <sys/namei.h> 37 #include <sys/queue.h> 38 #include <sys/resourcevar.h> 39 #include <sys/select.h> 40 #include <sys/vnode.h> 41 #include <sys/vfs_syscalls.h> 42 43 #include <miscfs/specfs/specdev.h> 44 45 #include "rump_private.h" 46 #include "rumpuser.h" 47 48 struct proc proc0; 49 struct cwdinfo rump_cwdi; 50 struct pstats rump_stats; 51 struct plimit rump_limits; 52 kauth_cred_t rump_cred = RUMPCRED_SUSER; 53 struct cpu_info rump_cpu; 54 struct filedesc rump_filedesc0; 55 struct proclist allproc; 56 57 kmutex_t rump_giantlock; 58 59 sigset_t sigcantmask; 60 61 struct fakeblk { 62 char path[MAXPATHLEN]; 63 LIST_ENTRY(fakeblk) entries; 64 }; 65 66 static LIST_HEAD(, fakeblk) fakeblks = LIST_HEAD_INITIALIZER(fakeblks); 67 68 #ifndef RUMP_WITHOUT_THREADS 69 static void 70 rump_aiodone_worker(struct work *wk, void *dummy) 71 { 72 struct buf *bp = (struct buf *)wk; 73 74 KASSERT(&bp->b_work == wk); 75 bp->b_iodone(bp); 76 } 77 #endif /* RUMP_WITHOUT_THREADS */ 78 79 int rump_inited; 80 81 void 82 rump_init() 83 { 84 extern char hostname[]; 85 extern size_t hostnamelen; 86 extern kmutex_t rump_atomic_lock; 87 char buf[256]; 88 struct proc *p; 89 struct lwp *l; 90 int error; 91 92 /* XXX */ 93 if (rump_inited) 94 return; 95 rump_inited = 1; 96 97 if (rumpuser_getenv("RUMP_NVNODES", buf, sizeof(buf), &error) == 0) { 98 desiredvnodes = strtoul(buf, NULL, 10); 99 } else { 100 desiredvnodes = 1<<16; 101 } 102 103 rw_init(&rump_cwdi.cwdi_lock); 104 l = &lwp0; 105 p = &proc0; 106 p->p_stats = &rump_stats; 107 p->p_cwdi = &rump_cwdi; 108 p->p_limit = &rump_limits; 109 p->p_pid = 0; 110 p->p_fd = &rump_filedesc0; 111 p->p_vmspace = &rump_vmspace; 112 l->l_cred = rump_cred; 113 l->l_proc = p; 114 l->l_lid = 1; 115 116 LIST_INSERT_HEAD(&allproc, p, p_list); 117 118 mutex_init(&rump_atomic_lock, MUTEX_DEFAULT, IPL_NONE); 119 rumpvm_init(); 120 121 rump_limits.pl_rlimit[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; 122 rump_limits.pl_rlimit[RLIMIT_NOFILE].rlim_cur = RLIM_INFINITY; 123 124 syncdelay = 0; 125 dovfsusermount = 1; 126 127 vfsinit(); 128 bufinit(); 129 fd_sys_init(); 130 131 rumpvfs_init(); 132 133 rump_sleepers_init(); 134 rumpuser_thrinit(); 135 136 rumpuser_mutex_recursive_init(&rump_giantlock.kmtx_mtx); 137 138 #ifndef RUMP_WITHOUT_THREADS 139 /* aieeeedondest */ 140 if (workqueue_create(&uvm.aiodone_queue, "aiodoned", 141 rump_aiodone_worker, NULL, 0, 0, 0)) 142 panic("aiodoned"); 143 #endif /* RUMP_WITHOUT_THREADS */ 144 145 rumpuser_gethostname(hostname, MAXHOSTNAMELEN, &error); 146 hostnamelen = strlen(hostname); 147 148 sigemptyset(&sigcantmask); 149 150 fd_init(&rump_filedesc0); 151 rump_cwdi.cwdi_cdir = rootvnode; 152 } 153 154 struct mount * 155 rump_mnt_init(struct vfsops *vfsops, int mntflags) 156 { 157 struct mount *mp; 158 159 mp = kmem_zalloc(sizeof(struct mount), KM_SLEEP); 160 161 mp->mnt_op = vfsops; 162 mp->mnt_flag = mntflags; 163 TAILQ_INIT(&mp->mnt_vnodelist); 164 rw_init(&mp->mnt_lock); 165 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE); 166 mp->mnt_refcnt = 1; 167 168 mount_initspecific(mp); 169 170 return mp; 171 } 172 173 int 174 rump_mnt_mount(struct mount *mp, const char *path, void *data, size_t *dlen) 175 { 176 int rv; 177 178 rv = VFS_MOUNT(mp, path, data, dlen); 179 if (rv) 180 return rv; 181 182 (void) VFS_STATVFS(mp, &mp->mnt_stat); 183 rv = VFS_START(mp, 0); 184 if (rv) 185 VFS_UNMOUNT(mp, MNT_FORCE); 186 187 return rv; 188 } 189 190 void 191 rump_mnt_destroy(struct mount *mp) 192 { 193 194 mount_finispecific(mp); 195 kmem_free(mp, sizeof(*mp)); 196 } 197 198 struct componentname * 199 rump_makecn(u_long nameiop, u_long flags, const char *name, size_t namelen, 200 kauth_cred_t creds, struct lwp *l) 201 { 202 struct componentname *cnp; 203 const char *cp = NULL; 204 205 cnp = kmem_zalloc(sizeof(struct componentname), KM_SLEEP); 206 207 cnp->cn_nameiop = nameiop; 208 cnp->cn_flags = flags; 209 210 cnp->cn_pnbuf = PNBUF_GET(); 211 strcpy(cnp->cn_pnbuf, name); 212 cnp->cn_nameptr = cnp->cn_pnbuf; 213 cnp->cn_namelen = namelen; 214 cnp->cn_hash = namei_hash(name, &cp); 215 216 cnp->cn_cred = creds; 217 218 return cnp; 219 } 220 221 void 222 rump_freecn(struct componentname *cnp, int flags) 223 { 224 225 if (flags & RUMPCN_FREECRED) 226 rump_cred_destroy(cnp->cn_cred); 227 228 if ((flags & RUMPCN_HASNTBUF) == 0) { 229 if (cnp->cn_flags & SAVENAME) { 230 if (flags & RUMPCN_ISLOOKUP ||cnp->cn_flags & SAVESTART) 231 PNBUF_PUT(cnp->cn_pnbuf); 232 } else { 233 PNBUF_PUT(cnp->cn_pnbuf); 234 } 235 } 236 kmem_free(cnp, sizeof(*cnp)); 237 } 238 239 /* hey baby, what's your namei? */ 240 int 241 rump_namei(uint32_t op, uint32_t flags, const char *namep, 242 struct vnode **dvpp, struct vnode **vpp, struct componentname **cnpp) 243 { 244 struct nameidata nd; 245 int rv; 246 247 NDINIT(&nd, op, flags, UIO_SYSSPACE, namep); 248 rv = namei(&nd); 249 if (rv) 250 return rv; 251 252 if (dvpp) { 253 KASSERT(flags & LOCKPARENT); 254 *dvpp = nd.ni_dvp; 255 } else { 256 KASSERT((flags & LOCKPARENT) == 0); 257 } 258 259 if (vpp) { 260 *vpp = nd.ni_vp; 261 } else { 262 if (nd.ni_vp) { 263 if (flags & LOCKLEAF) 264 vput(nd.ni_vp); 265 else 266 vrele(nd.ni_vp); 267 } 268 } 269 270 if (cnpp) { 271 struct componentname *cnp; 272 273 cnp = kmem_alloc(sizeof(*cnp), KM_SLEEP); 274 memcpy(cnp, &nd.ni_cnd, sizeof(*cnp)); 275 *cnpp = cnp; 276 } else if (nd.ni_cnd.cn_flags & HASBUF) { 277 panic("%s: pathbuf mismatch", __func__); 278 } 279 280 return rv; 281 } 282 283 static struct fakeblk * 284 _rump_fakeblk_find(const char *path) 285 { 286 char buf[MAXPATHLEN]; 287 struct fakeblk *fblk; 288 int error; 289 290 if (rumpuser_realpath(path, buf, &error) == NULL) 291 return NULL; 292 293 LIST_FOREACH(fblk, &fakeblks, entries) 294 if (strcmp(fblk->path, buf) == 0) 295 return fblk; 296 297 return NULL; 298 } 299 300 int 301 rump_fakeblk_register(const char *path) 302 { 303 char buf[MAXPATHLEN]; 304 struct fakeblk *fblk; 305 int error; 306 307 if (_rump_fakeblk_find(path)) 308 return EEXIST; 309 310 if (rumpuser_realpath(path, buf, &error) == NULL) 311 return error; 312 313 fblk = kmem_alloc(sizeof(struct fakeblk), KM_NOSLEEP); 314 if (fblk == NULL) 315 return ENOMEM; 316 317 strlcpy(fblk->path, buf, MAXPATHLEN); 318 LIST_INSERT_HEAD(&fakeblks, fblk, entries); 319 320 return 0; 321 } 322 323 int 324 rump_fakeblk_find(const char *path) 325 { 326 327 return _rump_fakeblk_find(path) != NULL; 328 } 329 330 void 331 rump_fakeblk_deregister(const char *path) 332 { 333 struct fakeblk *fblk; 334 335 fblk = _rump_fakeblk_find(path); 336 if (fblk == NULL) 337 return; 338 339 LIST_REMOVE(fblk, entries); 340 kmem_free(fblk, sizeof(*fblk)); 341 } 342 343 void 344 rump_getvninfo(struct vnode *vp, enum vtype *vtype, voff_t *vsize, dev_t *vdev) 345 { 346 347 *vtype = vp->v_type; 348 *vsize = vp->v_size; 349 if (vp->v_specnode) 350 *vdev = vp->v_rdev; 351 else 352 *vdev = 0; 353 } 354 355 struct vfsops * 356 rump_vfslist_iterate(struct vfsops *ops) 357 { 358 359 if (ops == NULL) 360 return LIST_FIRST(&vfs_list); 361 else 362 return LIST_NEXT(ops, vfs_list); 363 } 364 365 struct vfsops * 366 rump_vfs_getopsbyname(const char *name) 367 { 368 369 return vfs_getopsbyname(name); 370 } 371 372 struct vattr* 373 rump_vattr_init() 374 { 375 struct vattr *vap; 376 377 vap = kmem_alloc(sizeof(struct vattr), KM_SLEEP); 378 vattr_null(vap); 379 380 return vap; 381 } 382 383 void 384 rump_vattr_settype(struct vattr *vap, enum vtype vt) 385 { 386 387 vap->va_type = vt; 388 } 389 390 void 391 rump_vattr_setmode(struct vattr *vap, mode_t mode) 392 { 393 394 vap->va_mode = mode; 395 } 396 397 void 398 rump_vattr_setrdev(struct vattr *vap, dev_t dev) 399 { 400 401 vap->va_rdev = dev; 402 } 403 404 void 405 rump_vattr_free(struct vattr *vap) 406 { 407 408 kmem_free(vap, sizeof(*vap)); 409 } 410 411 void 412 rump_vp_incref(struct vnode *vp) 413 { 414 415 mutex_enter(&vp->v_interlock); 416 ++vp->v_usecount; 417 mutex_exit(&vp->v_interlock); 418 } 419 420 int 421 rump_vp_getref(struct vnode *vp) 422 { 423 424 return vp->v_usecount; 425 } 426 427 void 428 rump_vp_decref(struct vnode *vp) 429 { 430 431 mutex_enter(&vp->v_interlock); 432 --vp->v_usecount; 433 mutex_exit(&vp->v_interlock); 434 } 435 436 /* 437 * Really really recycle with a cherry on top. We should be 438 * extra-sure we can do this. For example with p2k there is 439 * no problem, since puffs in the kernel takes care of refcounting 440 * for us. 441 */ 442 void 443 rump_vp_recycle_nokidding(struct vnode *vp) 444 { 445 446 mutex_enter(&vp->v_interlock); 447 vp->v_usecount = 1; 448 vclean(vp, DOCLOSE); 449 vrelel(vp, 0); 450 } 451 452 void 453 rump_vp_rele(struct vnode *vp) 454 { 455 456 vrele(vp); 457 } 458 459 struct uio * 460 rump_uio_setup(void *buf, size_t bufsize, off_t offset, enum rump_uiorw rw) 461 { 462 struct uio *uio; 463 enum uio_rw uiorw; 464 465 switch (rw) { 466 case RUMPUIO_READ: 467 uiorw = UIO_READ; 468 break; 469 case RUMPUIO_WRITE: 470 uiorw = UIO_WRITE; 471 break; 472 default: 473 panic("%s: invalid rw %d", __func__, rw); 474 } 475 476 uio = kmem_alloc(sizeof(struct uio), KM_SLEEP); 477 uio->uio_iov = kmem_alloc(sizeof(struct iovec), KM_SLEEP); 478 479 uio->uio_iov->iov_base = buf; 480 uio->uio_iov->iov_len = bufsize; 481 482 uio->uio_iovcnt = 1; 483 uio->uio_offset = offset; 484 uio->uio_resid = bufsize; 485 uio->uio_rw = uiorw; 486 uio->uio_vmspace = UIO_VMSPACE_SYS; 487 488 return uio; 489 } 490 491 size_t 492 rump_uio_getresid(struct uio *uio) 493 { 494 495 return uio->uio_resid; 496 } 497 498 off_t 499 rump_uio_getoff(struct uio *uio) 500 { 501 502 return uio->uio_offset; 503 } 504 505 size_t 506 rump_uio_free(struct uio *uio) 507 { 508 size_t resid; 509 510 resid = uio->uio_resid; 511 kmem_free(uio->uio_iov, sizeof(*uio->uio_iov)); 512 kmem_free(uio, sizeof(*uio)); 513 514 return resid; 515 } 516 517 void 518 rump_vp_lock_exclusive(struct vnode *vp) 519 { 520 521 /* we can skip vn_lock() */ 522 VOP_LOCK(vp, LK_EXCLUSIVE); 523 } 524 525 void 526 rump_vp_lock_shared(struct vnode *vp) 527 { 528 529 VOP_LOCK(vp, LK_SHARED); 530 } 531 532 void 533 rump_vp_unlock(struct vnode *vp) 534 { 535 536 VOP_UNLOCK(vp, 0); 537 } 538 539 int 540 rump_vp_islocked(struct vnode *vp) 541 { 542 543 return VOP_ISLOCKED(vp); 544 } 545 546 void 547 rump_vp_interlock(struct vnode *vp) 548 { 549 550 mutex_enter(&vp->v_interlock); 551 } 552 553 int 554 rump_vfs_unmount(struct mount *mp, int mntflags) 555 { 556 557 return VFS_UNMOUNT(mp, mntflags); 558 } 559 560 int 561 rump_vfs_root(struct mount *mp, struct vnode **vpp, int lock) 562 { 563 int rv; 564 565 rv = VFS_ROOT(mp, vpp); 566 if (rv) 567 return rv; 568 569 if (!lock) 570 VOP_UNLOCK(*vpp, 0); 571 572 return 0; 573 } 574 575 int 576 rump_vfs_statvfs(struct mount *mp, struct statvfs *sbp) 577 { 578 579 return VFS_STATVFS(mp, sbp); 580 } 581 582 int 583 rump_vfs_sync(struct mount *mp, int wait, kauth_cred_t cred) 584 { 585 586 return VFS_SYNC(mp, wait ? MNT_WAIT : MNT_NOWAIT, cred); 587 } 588 589 int 590 rump_vfs_fhtovp(struct mount *mp, struct fid *fid, struct vnode **vpp) 591 { 592 593 return VFS_FHTOVP(mp, fid, vpp); 594 } 595 596 int 597 rump_vfs_vptofh(struct vnode *vp, struct fid *fid, size_t *fidsize) 598 { 599 600 return VFS_VPTOFH(vp, fid, fidsize); 601 } 602 603 /*ARGSUSED*/ 604 void 605 rump_vfs_syncwait(struct mount *mp) 606 { 607 int n; 608 609 n = buf_syncwait(); 610 if (n) 611 printf("syncwait: unsynced buffers: %d\n", n); 612 } 613 614 void 615 rump_bioops_sync() 616 { 617 618 if (bioopsp) 619 bioopsp->io_sync(NULL); 620 } 621 622 struct lwp * 623 rump_setup_curlwp(pid_t pid, lwpid_t lid, int set) 624 { 625 struct lwp *l; 626 struct proc *p; 627 628 l = kmem_zalloc(sizeof(struct lwp), KM_SLEEP); 629 p = kmem_zalloc(sizeof(struct proc), KM_SLEEP); 630 p->p_cwdi = cwdinit(); 631 632 p->p_stats = &rump_stats; 633 p->p_limit = &rump_limits; 634 p->p_pid = pid; 635 p->p_vmspace = &rump_vmspace; 636 l->l_cred = rump_cred; 637 l->l_proc = p; 638 l->l_lid = lid; 639 640 p->p_fd = fd_init(&rump_filedesc0); 641 l->l_fd = p->p_fd; 642 643 if (set) 644 rumpuser_set_curlwp(l); 645 646 return l; 647 } 648 649 void 650 rump_clear_curlwp() 651 { 652 struct lwp *l; 653 654 l = rumpuser_get_curlwp(); 655 fd_free(); 656 cwdfree(l->l_proc->p_cwdi); 657 kmem_free(l->l_proc, sizeof(*l->l_proc)); 658 kmem_free(l, sizeof(*l)); 659 rumpuser_set_curlwp(NULL); 660 } 661 662 struct lwp * 663 rump_get_curlwp() 664 { 665 struct lwp *l; 666 667 l = rumpuser_get_curlwp(); 668 if (l == NULL) 669 l = &lwp0; 670 671 return l; 672 } 673 674 int 675 rump_splfoo() 676 { 677 678 if (rumpuser_whatis_ipl() != RUMPUSER_IPL_INTR) { 679 rumpuser_rw_enter(&rumpspl, 0); 680 rumpuser_set_ipl(RUMPUSER_IPL_SPLFOO); 681 } 682 683 return 0; 684 } 685 686 static void 687 rump_intr_enter(void) 688 { 689 690 rumpuser_set_ipl(RUMPUSER_IPL_INTR); 691 rumpuser_rw_enter(&rumpspl, 1); 692 } 693 694 static void 695 rump_intr_exit(void) 696 { 697 698 rumpuser_rw_exit(&rumpspl); 699 rumpuser_clear_ipl(RUMPUSER_IPL_INTR); 700 } 701 702 void 703 rump_splx(int dummy) 704 { 705 706 if (rumpuser_whatis_ipl() != RUMPUSER_IPL_INTR) { 707 rumpuser_clear_ipl(RUMPUSER_IPL_SPLFOO); 708 rumpuser_rw_exit(&rumpspl); 709 } 710 } 711 712 void 713 rump_biodone(void *arg, size_t count, int error) 714 { 715 struct buf *bp = arg; 716 717 bp->b_resid = bp->b_bcount - count; 718 KASSERT(bp->b_resid >= 0); 719 bp->b_error = error; 720 721 rump_intr_enter(); 722 biodone(bp); 723 rump_intr_exit(); 724 } 725