xref: /netbsd-src/sys/rump/librump/rumpkern/rump.c (revision 8ac07aec990b9d2e483062509d0a9fa5b4f57cf2)
1 /*	$NetBSD: rump.c,v 1.42 2008/03/24 19:40:18 martin Exp $	*/
2 
3 /*
4  * Copyright (c) 2007 Antti Kantee.  All Rights Reserved.
5  *
6  * Development of this software was supported by Google Summer of Code.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/param.h>
31 #include <sys/cpu.h>
32 #include <sys/filedesc.h>
33 #include <sys/kauth.h>
34 #include <sys/kmem.h>
35 #include <sys/mount.h>
36 #include <sys/namei.h>
37 #include <sys/queue.h>
38 #include <sys/resourcevar.h>
39 #include <sys/select.h>
40 #include <sys/vnode.h>
41 #include <sys/vfs_syscalls.h>
42 
43 #include <miscfs/specfs/specdev.h>
44 
45 #include "rump_private.h"
46 #include "rumpuser.h"
47 
48 struct proc proc0;
49 struct cwdinfo rump_cwdi;
50 struct pstats rump_stats;
51 struct plimit rump_limits;
52 kauth_cred_t rump_cred = RUMPCRED_SUSER;
53 struct cpu_info rump_cpu;
54 struct filedesc rump_filedesc0;
55 struct proclist allproc;
56 
57 kmutex_t rump_giantlock;
58 
59 sigset_t sigcantmask;
60 
61 struct fakeblk {
62 	char path[MAXPATHLEN];
63 	LIST_ENTRY(fakeblk) entries;
64 };
65 
66 static LIST_HEAD(, fakeblk) fakeblks = LIST_HEAD_INITIALIZER(fakeblks);
67 
68 #ifndef RUMP_WITHOUT_THREADS
69 static void
70 rump_aiodone_worker(struct work *wk, void *dummy)
71 {
72 	struct buf *bp = (struct buf *)wk;
73 
74 	KASSERT(&bp->b_work == wk);
75 	bp->b_iodone(bp);
76 }
77 #endif /* RUMP_WITHOUT_THREADS */
78 
79 int rump_inited;
80 
81 void
82 rump_init()
83 {
84 	extern char hostname[];
85 	extern size_t hostnamelen;
86 	extern kmutex_t rump_atomic_lock;
87 	char buf[256];
88 	struct proc *p;
89 	struct lwp *l;
90 	int error;
91 
92 	/* XXX */
93 	if (rump_inited)
94 		return;
95 	rump_inited = 1;
96 
97 	if (rumpuser_getenv("RUMP_NVNODES", buf, sizeof(buf), &error) == 0) {
98 		desiredvnodes = strtoul(buf, NULL, 10);
99 	} else {
100 		desiredvnodes = 1<<16;
101 	}
102 
103 	rw_init(&rump_cwdi.cwdi_lock);
104 	l = &lwp0;
105 	p = &proc0;
106 	p->p_stats = &rump_stats;
107 	p->p_cwdi = &rump_cwdi;
108 	p->p_limit = &rump_limits;
109 	p->p_pid = 0;
110 	p->p_fd = &rump_filedesc0;
111 	p->p_vmspace = &rump_vmspace;
112 	l->l_cred = rump_cred;
113 	l->l_proc = p;
114 	l->l_lid = 1;
115 
116 	LIST_INSERT_HEAD(&allproc, p, p_list);
117 
118 	mutex_init(&rump_atomic_lock, MUTEX_DEFAULT, IPL_NONE);
119 	rumpvm_init();
120 
121 	rump_limits.pl_rlimit[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
122 	rump_limits.pl_rlimit[RLIMIT_NOFILE].rlim_cur = RLIM_INFINITY;
123 
124 	syncdelay = 0;
125 	dovfsusermount = 1;
126 
127 	vfsinit();
128 	bufinit();
129 	fd_sys_init();
130 
131 	rumpvfs_init();
132 
133 	rump_sleepers_init();
134 	rumpuser_thrinit();
135 
136 	rumpuser_mutex_recursive_init(&rump_giantlock.kmtx_mtx);
137 
138 #ifndef RUMP_WITHOUT_THREADS
139 	/* aieeeedondest */
140 	if (workqueue_create(&uvm.aiodone_queue, "aiodoned",
141 	    rump_aiodone_worker, NULL, 0, 0, 0))
142 		panic("aiodoned");
143 #endif /* RUMP_WITHOUT_THREADS */
144 
145 	rumpuser_gethostname(hostname, MAXHOSTNAMELEN, &error);
146 	hostnamelen = strlen(hostname);
147 
148 	sigemptyset(&sigcantmask);
149 
150 	fd_init(&rump_filedesc0);
151 	rump_cwdi.cwdi_cdir = rootvnode;
152 }
153 
154 struct mount *
155 rump_mnt_init(struct vfsops *vfsops, int mntflags)
156 {
157 	struct mount *mp;
158 
159 	mp = kmem_zalloc(sizeof(struct mount), KM_SLEEP);
160 
161 	mp->mnt_op = vfsops;
162 	mp->mnt_flag = mntflags;
163 	TAILQ_INIT(&mp->mnt_vnodelist);
164 	rw_init(&mp->mnt_lock);
165 	mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
166 	mp->mnt_refcnt = 1;
167 
168 	mount_initspecific(mp);
169 
170 	return mp;
171 }
172 
173 int
174 rump_mnt_mount(struct mount *mp, const char *path, void *data, size_t *dlen)
175 {
176 	int rv;
177 
178 	rv = VFS_MOUNT(mp, path, data, dlen);
179 	if (rv)
180 		return rv;
181 
182 	(void) VFS_STATVFS(mp, &mp->mnt_stat);
183 	rv = VFS_START(mp, 0);
184 	if (rv)
185 		VFS_UNMOUNT(mp, MNT_FORCE);
186 
187 	return rv;
188 }
189 
190 void
191 rump_mnt_destroy(struct mount *mp)
192 {
193 
194 	mount_finispecific(mp);
195 	kmem_free(mp, sizeof(*mp));
196 }
197 
198 struct componentname *
199 rump_makecn(u_long nameiop, u_long flags, const char *name, size_t namelen,
200 	kauth_cred_t creds, struct lwp *l)
201 {
202 	struct componentname *cnp;
203 	const char *cp = NULL;
204 
205 	cnp = kmem_zalloc(sizeof(struct componentname), KM_SLEEP);
206 
207 	cnp->cn_nameiop = nameiop;
208 	cnp->cn_flags = flags;
209 
210 	cnp->cn_pnbuf = PNBUF_GET();
211 	strcpy(cnp->cn_pnbuf, name);
212 	cnp->cn_nameptr = cnp->cn_pnbuf;
213 	cnp->cn_namelen = namelen;
214 	cnp->cn_hash = namei_hash(name, &cp);
215 
216 	cnp->cn_cred = creds;
217 
218 	return cnp;
219 }
220 
221 void
222 rump_freecn(struct componentname *cnp, int flags)
223 {
224 
225 	if (flags & RUMPCN_FREECRED)
226 		rump_cred_destroy(cnp->cn_cred);
227 
228 	if ((flags & RUMPCN_HASNTBUF) == 0) {
229 		if (cnp->cn_flags & SAVENAME) {
230 			if (flags & RUMPCN_ISLOOKUP ||cnp->cn_flags & SAVESTART)
231 				PNBUF_PUT(cnp->cn_pnbuf);
232 		} else {
233 			PNBUF_PUT(cnp->cn_pnbuf);
234 		}
235 	}
236 	kmem_free(cnp, sizeof(*cnp));
237 }
238 
239 /* hey baby, what's your namei? */
240 int
241 rump_namei(uint32_t op, uint32_t flags, const char *namep,
242 	struct vnode **dvpp, struct vnode **vpp, struct componentname **cnpp)
243 {
244 	struct nameidata nd;
245 	int rv;
246 
247 	NDINIT(&nd, op, flags, UIO_SYSSPACE, namep);
248 	rv = namei(&nd);
249 	if (rv)
250 		return rv;
251 
252 	if (dvpp) {
253 		KASSERT(flags & LOCKPARENT);
254 		*dvpp = nd.ni_dvp;
255 	} else {
256 		KASSERT((flags & LOCKPARENT) == 0);
257 	}
258 
259 	if (vpp) {
260 		*vpp = nd.ni_vp;
261 	} else {
262 		if (nd.ni_vp) {
263 			if (flags & LOCKLEAF)
264 				vput(nd.ni_vp);
265 			else
266 				vrele(nd.ni_vp);
267 		}
268 	}
269 
270 	if (cnpp) {
271 		struct componentname *cnp;
272 
273 		cnp = kmem_alloc(sizeof(*cnp), KM_SLEEP);
274 		memcpy(cnp, &nd.ni_cnd, sizeof(*cnp));
275 		*cnpp = cnp;
276 	} else if (nd.ni_cnd.cn_flags & HASBUF) {
277 		panic("%s: pathbuf mismatch", __func__);
278 	}
279 
280 	return rv;
281 }
282 
283 static struct fakeblk *
284 _rump_fakeblk_find(const char *path)
285 {
286 	char buf[MAXPATHLEN];
287 	struct fakeblk *fblk;
288 	int error;
289 
290 	if (rumpuser_realpath(path, buf, &error) == NULL)
291 		return NULL;
292 
293 	LIST_FOREACH(fblk, &fakeblks, entries)
294 		if (strcmp(fblk->path, buf) == 0)
295 			return fblk;
296 
297 	return NULL;
298 }
299 
300 int
301 rump_fakeblk_register(const char *path)
302 {
303 	char buf[MAXPATHLEN];
304 	struct fakeblk *fblk;
305 	int error;
306 
307 	if (_rump_fakeblk_find(path))
308 		return EEXIST;
309 
310 	if (rumpuser_realpath(path, buf, &error) == NULL)
311 		return error;
312 
313 	fblk = kmem_alloc(sizeof(struct fakeblk), KM_NOSLEEP);
314 	if (fblk == NULL)
315 		return ENOMEM;
316 
317 	strlcpy(fblk->path, buf, MAXPATHLEN);
318 	LIST_INSERT_HEAD(&fakeblks, fblk, entries);
319 
320 	return 0;
321 }
322 
323 int
324 rump_fakeblk_find(const char *path)
325 {
326 
327 	return _rump_fakeblk_find(path) != NULL;
328 }
329 
330 void
331 rump_fakeblk_deregister(const char *path)
332 {
333 	struct fakeblk *fblk;
334 
335 	fblk = _rump_fakeblk_find(path);
336 	if (fblk == NULL)
337 		return;
338 
339 	LIST_REMOVE(fblk, entries);
340 	kmem_free(fblk, sizeof(*fblk));
341 }
342 
343 void
344 rump_getvninfo(struct vnode *vp, enum vtype *vtype, voff_t *vsize, dev_t *vdev)
345 {
346 
347 	*vtype = vp->v_type;
348 	*vsize = vp->v_size;
349 	if (vp->v_specnode)
350 		*vdev = vp->v_rdev;
351 	else
352 		*vdev = 0;
353 }
354 
355 struct vfsops *
356 rump_vfslist_iterate(struct vfsops *ops)
357 {
358 
359 	if (ops == NULL)
360 		return LIST_FIRST(&vfs_list);
361 	else
362 		return LIST_NEXT(ops, vfs_list);
363 }
364 
365 struct vfsops *
366 rump_vfs_getopsbyname(const char *name)
367 {
368 
369 	return vfs_getopsbyname(name);
370 }
371 
372 struct vattr*
373 rump_vattr_init()
374 {
375 	struct vattr *vap;
376 
377 	vap = kmem_alloc(sizeof(struct vattr), KM_SLEEP);
378 	vattr_null(vap);
379 
380 	return vap;
381 }
382 
383 void
384 rump_vattr_settype(struct vattr *vap, enum vtype vt)
385 {
386 
387 	vap->va_type = vt;
388 }
389 
390 void
391 rump_vattr_setmode(struct vattr *vap, mode_t mode)
392 {
393 
394 	vap->va_mode = mode;
395 }
396 
397 void
398 rump_vattr_setrdev(struct vattr *vap, dev_t dev)
399 {
400 
401 	vap->va_rdev = dev;
402 }
403 
404 void
405 rump_vattr_free(struct vattr *vap)
406 {
407 
408 	kmem_free(vap, sizeof(*vap));
409 }
410 
411 void
412 rump_vp_incref(struct vnode *vp)
413 {
414 
415 	mutex_enter(&vp->v_interlock);
416 	++vp->v_usecount;
417 	mutex_exit(&vp->v_interlock);
418 }
419 
420 int
421 rump_vp_getref(struct vnode *vp)
422 {
423 
424 	return vp->v_usecount;
425 }
426 
427 void
428 rump_vp_decref(struct vnode *vp)
429 {
430 
431 	mutex_enter(&vp->v_interlock);
432 	--vp->v_usecount;
433 	mutex_exit(&vp->v_interlock);
434 }
435 
436 /*
437  * Really really recycle with a cherry on top.  We should be
438  * extra-sure we can do this.  For example with p2k there is
439  * no problem, since puffs in the kernel takes care of refcounting
440  * for us.
441  */
442 void
443 rump_vp_recycle_nokidding(struct vnode *vp)
444 {
445 
446 	mutex_enter(&vp->v_interlock);
447 	vp->v_usecount = 1;
448 	vclean(vp, DOCLOSE);
449 	vrelel(vp, 0);
450 }
451 
452 void
453 rump_vp_rele(struct vnode *vp)
454 {
455 
456 	vrele(vp);
457 }
458 
459 struct uio *
460 rump_uio_setup(void *buf, size_t bufsize, off_t offset, enum rump_uiorw rw)
461 {
462 	struct uio *uio;
463 	enum uio_rw uiorw;
464 
465 	switch (rw) {
466 	case RUMPUIO_READ:
467 		uiorw = UIO_READ;
468 		break;
469 	case RUMPUIO_WRITE:
470 		uiorw = UIO_WRITE;
471 		break;
472 	default:
473 		panic("%s: invalid rw %d", __func__, rw);
474 	}
475 
476 	uio = kmem_alloc(sizeof(struct uio), KM_SLEEP);
477 	uio->uio_iov = kmem_alloc(sizeof(struct iovec), KM_SLEEP);
478 
479 	uio->uio_iov->iov_base = buf;
480 	uio->uio_iov->iov_len = bufsize;
481 
482 	uio->uio_iovcnt = 1;
483 	uio->uio_offset = offset;
484 	uio->uio_resid = bufsize;
485 	uio->uio_rw = uiorw;
486 	uio->uio_vmspace = UIO_VMSPACE_SYS;
487 
488 	return uio;
489 }
490 
491 size_t
492 rump_uio_getresid(struct uio *uio)
493 {
494 
495 	return uio->uio_resid;
496 }
497 
498 off_t
499 rump_uio_getoff(struct uio *uio)
500 {
501 
502 	return uio->uio_offset;
503 }
504 
505 size_t
506 rump_uio_free(struct uio *uio)
507 {
508 	size_t resid;
509 
510 	resid = uio->uio_resid;
511 	kmem_free(uio->uio_iov, sizeof(*uio->uio_iov));
512 	kmem_free(uio, sizeof(*uio));
513 
514 	return resid;
515 }
516 
517 void
518 rump_vp_lock_exclusive(struct vnode *vp)
519 {
520 
521 	/* we can skip vn_lock() */
522 	VOP_LOCK(vp, LK_EXCLUSIVE);
523 }
524 
525 void
526 rump_vp_lock_shared(struct vnode *vp)
527 {
528 
529 	VOP_LOCK(vp, LK_SHARED);
530 }
531 
532 void
533 rump_vp_unlock(struct vnode *vp)
534 {
535 
536 	VOP_UNLOCK(vp, 0);
537 }
538 
539 int
540 rump_vp_islocked(struct vnode *vp)
541 {
542 
543 	return VOP_ISLOCKED(vp);
544 }
545 
546 void
547 rump_vp_interlock(struct vnode *vp)
548 {
549 
550 	mutex_enter(&vp->v_interlock);
551 }
552 
553 int
554 rump_vfs_unmount(struct mount *mp, int mntflags)
555 {
556 
557 	return VFS_UNMOUNT(mp, mntflags);
558 }
559 
560 int
561 rump_vfs_root(struct mount *mp, struct vnode **vpp, int lock)
562 {
563 	int rv;
564 
565 	rv = VFS_ROOT(mp, vpp);
566 	if (rv)
567 		return rv;
568 
569 	if (!lock)
570 		VOP_UNLOCK(*vpp, 0);
571 
572 	return 0;
573 }
574 
575 int
576 rump_vfs_statvfs(struct mount *mp, struct statvfs *sbp)
577 {
578 
579 	return VFS_STATVFS(mp, sbp);
580 }
581 
582 int
583 rump_vfs_sync(struct mount *mp, int wait, kauth_cred_t cred)
584 {
585 
586 	return VFS_SYNC(mp, wait ? MNT_WAIT : MNT_NOWAIT, cred);
587 }
588 
589 int
590 rump_vfs_fhtovp(struct mount *mp, struct fid *fid, struct vnode **vpp)
591 {
592 
593 	return VFS_FHTOVP(mp, fid, vpp);
594 }
595 
596 int
597 rump_vfs_vptofh(struct vnode *vp, struct fid *fid, size_t *fidsize)
598 {
599 
600 	return VFS_VPTOFH(vp, fid, fidsize);
601 }
602 
603 /*ARGSUSED*/
604 void
605 rump_vfs_syncwait(struct mount *mp)
606 {
607 	int n;
608 
609 	n = buf_syncwait();
610 	if (n)
611 		printf("syncwait: unsynced buffers: %d\n", n);
612 }
613 
614 void
615 rump_bioops_sync()
616 {
617 
618 	if (bioopsp)
619 		bioopsp->io_sync(NULL);
620 }
621 
622 struct lwp *
623 rump_setup_curlwp(pid_t pid, lwpid_t lid, int set)
624 {
625 	struct lwp *l;
626 	struct proc *p;
627 
628 	l = kmem_zalloc(sizeof(struct lwp), KM_SLEEP);
629 	p = kmem_zalloc(sizeof(struct proc), KM_SLEEP);
630 	p->p_cwdi = cwdinit();
631 
632 	p->p_stats = &rump_stats;
633 	p->p_limit = &rump_limits;
634         p->p_pid = pid;
635 	p->p_vmspace = &rump_vmspace;
636 	l->l_cred = rump_cred;
637 	l->l_proc = p;
638         l->l_lid = lid;
639 
640 	p->p_fd = fd_init(&rump_filedesc0);
641         l->l_fd = p->p_fd;
642 
643 	if (set)
644 		rumpuser_set_curlwp(l);
645 
646 	return l;
647 }
648 
649 void
650 rump_clear_curlwp()
651 {
652 	struct lwp *l;
653 
654 	l = rumpuser_get_curlwp();
655 	fd_free();
656 	cwdfree(l->l_proc->p_cwdi);
657 	kmem_free(l->l_proc, sizeof(*l->l_proc));
658 	kmem_free(l, sizeof(*l));
659 	rumpuser_set_curlwp(NULL);
660 }
661 
662 struct lwp *
663 rump_get_curlwp()
664 {
665 	struct lwp *l;
666 
667 	l = rumpuser_get_curlwp();
668 	if (l == NULL)
669 		l = &lwp0;
670 
671 	return l;
672 }
673 
674 int
675 rump_splfoo()
676 {
677 
678 	if (rumpuser_whatis_ipl() != RUMPUSER_IPL_INTR) {
679 		rumpuser_rw_enter(&rumpspl, 0);
680 		rumpuser_set_ipl(RUMPUSER_IPL_SPLFOO);
681 	}
682 
683 	return 0;
684 }
685 
686 static void
687 rump_intr_enter(void)
688 {
689 
690 	rumpuser_set_ipl(RUMPUSER_IPL_INTR);
691 	rumpuser_rw_enter(&rumpspl, 1);
692 }
693 
694 static void
695 rump_intr_exit(void)
696 {
697 
698 	rumpuser_rw_exit(&rumpspl);
699 	rumpuser_clear_ipl(RUMPUSER_IPL_INTR);
700 }
701 
702 void
703 rump_splx(int dummy)
704 {
705 
706 	if (rumpuser_whatis_ipl() != RUMPUSER_IPL_INTR) {
707 		rumpuser_clear_ipl(RUMPUSER_IPL_SPLFOO);
708 		rumpuser_rw_exit(&rumpspl);
709 	}
710 }
711 
712 void
713 rump_biodone(void *arg, size_t count, int error)
714 {
715 	struct buf *bp = arg;
716 
717 	bp->b_resid = bp->b_bcount - count;
718 	KASSERT(bp->b_resid >= 0);
719 	bp->b_error = error;
720 
721 	rump_intr_enter();
722 	biodone(bp);
723 	rump_intr_exit();
724 }
725