xref: /netbsd-src/sys/rump/librump/rumpvfs/rumpfs.c (revision 75f6d617e282811cb173c2ccfbf5df0dd71f7045)
1 /*	$NetBSD: rumpfs.c,v 1.135 2015/06/23 10:41:32 hannken Exp $	*/
2 
3 /*
4  * Copyright (c) 2009, 2010, 2011 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: rumpfs.c,v 1.135 2015/06/23 10:41:32 hannken Exp $");
30 
31 #include <sys/param.h>
32 #include <sys/atomic.h>
33 #include <sys/buf.h>
34 #include <sys/dirent.h>
35 #include <sys/errno.h>
36 #include <sys/filedesc.h>
37 #include <sys/fcntl.h>
38 #include <sys/kauth.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/lock.h>
44 #include <sys/lockf.h>
45 #include <sys/queue.h>
46 #include <sys/stat.h>
47 #include <sys/syscallargs.h>
48 #include <sys/vnode.h>
49 #include <sys/unistd.h>
50 
51 #include <miscfs/fifofs/fifo.h>
52 #include <miscfs/specfs/specdev.h>
53 #include <miscfs/genfs/genfs.h>
54 #include <miscfs/genfs/genfs_node.h>
55 
56 #include <uvm/uvm_extern.h>
57 
58 #include <rump/rumpuser.h>
59 
60 #include "rump_private.h"
61 #include "rump_vfs_private.h"
62 
63 static int rump_vop_lookup(void *);
64 static int rump_vop_getattr(void *);
65 static int rump_vop_setattr(void *);
66 static int rump_vop_mkdir(void *);
67 static int rump_vop_rmdir(void *);
68 static int rump_vop_remove(void *);
69 static int rump_vop_mknod(void *);
70 static int rump_vop_create(void *);
71 static int rump_vop_inactive(void *);
72 static int rump_vop_reclaim(void *);
73 static int rump_vop_success(void *);
74 static int rump_vop_readdir(void *);
75 static int rump_vop_spec(void *);
76 static int rump_vop_read(void *);
77 static int rump_vop_write(void *);
78 static int rump_vop_open(void *);
79 static int rump_vop_symlink(void *);
80 static int rump_vop_readlink(void *);
81 static int rump_vop_whiteout(void *);
82 static int rump_vop_pathconf(void *);
83 static int rump_vop_bmap(void *);
84 static int rump_vop_strategy(void *);
85 static int rump_vop_advlock(void *);
86 static int rump_vop_access(void *);
87 
88 int (**fifo_vnodeop_p)(void *);
89 const struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
90 	{ &vop_default_desc, vn_default_error },
91 	{ &vop_putpages_desc, genfs_null_putpages },
92 	{ NULL, NULL }
93 };
94 const struct vnodeopv_desc fifo_vnodeop_opv_desc =
95 	{ &fifo_vnodeop_p, fifo_vnodeop_entries };
96 
97 int (**rump_vnodeop_p)(void *);
98 const struct vnodeopv_entry_desc rump_vnodeop_entries[] = {
99 	{ &vop_default_desc, vn_default_error },
100 	{ &vop_lookup_desc, rump_vop_lookup },
101 	{ &vop_getattr_desc, rump_vop_getattr },
102 	{ &vop_setattr_desc, rump_vop_setattr },
103 	{ &vop_mkdir_desc, rump_vop_mkdir },
104 	{ &vop_rmdir_desc, rump_vop_rmdir },
105 	{ &vop_remove_desc, rump_vop_remove },
106 	{ &vop_mknod_desc, rump_vop_mknod },
107 	{ &vop_create_desc, rump_vop_create },
108 	{ &vop_symlink_desc, rump_vop_symlink },
109 	{ &vop_readlink_desc, rump_vop_readlink },
110 	{ &vop_access_desc, rump_vop_access },
111 	{ &vop_readdir_desc, rump_vop_readdir },
112 	{ &vop_read_desc, rump_vop_read },
113 	{ &vop_write_desc, rump_vop_write },
114 	{ &vop_open_desc, rump_vop_open },
115 	{ &vop_close_desc, genfs_nullop },
116 	{ &vop_seek_desc, genfs_seek },
117 	{ &vop_getpages_desc, genfs_getpages },
118 	{ &vop_putpages_desc, genfs_putpages },
119 	{ &vop_whiteout_desc, rump_vop_whiteout },
120 	{ &vop_fsync_desc, rump_vop_success },
121 	{ &vop_lock_desc, genfs_lock },
122 	{ &vop_unlock_desc, genfs_unlock },
123 	{ &vop_islocked_desc, genfs_islocked },
124 	{ &vop_inactive_desc, rump_vop_inactive },
125 	{ &vop_reclaim_desc, rump_vop_reclaim },
126 	{ &vop_link_desc, genfs_eopnotsupp },
127 	{ &vop_pathconf_desc, rump_vop_pathconf },
128 	{ &vop_bmap_desc, rump_vop_bmap },
129 	{ &vop_strategy_desc, rump_vop_strategy },
130 	{ &vop_advlock_desc, rump_vop_advlock },
131 	{ NULL, NULL }
132 };
133 const struct vnodeopv_desc rump_vnodeop_opv_desc =
134 	{ &rump_vnodeop_p, rump_vnodeop_entries };
135 
136 int (**rump_specop_p)(void *);
137 const struct vnodeopv_entry_desc rump_specop_entries[] = {
138 	{ &vop_default_desc, rump_vop_spec },
139 	{ NULL, NULL }
140 };
141 const struct vnodeopv_desc rump_specop_opv_desc =
142 	{ &rump_specop_p, rump_specop_entries };
143 
144 const struct vnodeopv_desc * const rump_opv_descs[] = {
145 	&rump_vnodeop_opv_desc,
146 	&rump_specop_opv_desc,
147 	NULL
148 };
149 
150 #define RUMPFS_WHITEOUT ((void *)-1)
151 #define RDENT_ISWHITEOUT(rdp) (rdp->rd_node == RUMPFS_WHITEOUT)
152 struct rumpfs_dent {
153 	char *rd_name;
154 	int rd_namelen;
155 	struct rumpfs_node *rd_node;
156 
157 	LIST_ENTRY(rumpfs_dent) rd_entries;
158 };
159 
160 struct genfs_ops rumpfs_genfsops = {
161 	.gop_size = genfs_size,
162 	.gop_write = genfs_gop_write,
163 
164 	/* optional */
165 	.gop_alloc = NULL,
166 	.gop_markupdate = NULL,
167 };
168 
169 struct rumpfs_node {
170 	struct genfs_node rn_gn;
171 	struct vattr rn_va;
172 	struct vnode *rn_vp;
173 	char *rn_hostpath;
174 	int rn_flags;
175 	struct lockf *rn_lockf;
176 
177 	union {
178 		struct {		/* VREG */
179 			int readfd;
180 			int writefd;
181 			uint64_t offset;
182 		} reg;
183 		struct {
184 			void *data;
185 			size_t dlen;
186 		} reg_noet;
187 		struct {		/* VDIR */
188 			LIST_HEAD(, rumpfs_dent) dents;
189 			struct rumpfs_node *parent;
190 			int flags;
191 		} dir;
192 		struct {
193 			char *target;
194 			size_t len;
195 		} link;
196 	} rn_u;
197 };
198 #define rn_readfd	rn_u.reg.readfd
199 #define rn_writefd	rn_u.reg.writefd
200 #define rn_offset	rn_u.reg.offset
201 #define rn_data		rn_u.reg_noet.data
202 #define rn_dlen		rn_u.reg_noet.dlen
203 #define rn_dir		rn_u.dir.dents
204 #define rn_parent	rn_u.dir.parent
205 #define rn_linktarg	rn_u.link.target
206 #define rn_linklen	rn_u.link.len
207 
208 #define RUMPNODE_CANRECLAIM	0x01
209 #define RUMPNODE_DIR_ET		0x02
210 #define RUMPNODE_DIR_ETSUBS	0x04
211 #define RUMPNODE_ET_PHONE_HOST	0x10
212 
213 struct rumpfs_mount {
214 	struct vnode *rfsmp_rvp;
215 };
216 
217 #define INO_WHITEOUT 1
218 static int lastino = 2;
219 static kmutex_t reclock;
220 
221 #define RUMPFS_DEFAULTMODE 0755
222 static void freedir(struct rumpfs_node *, struct componentname *);
223 static struct rumpfs_node *makeprivate(enum vtype, mode_t, dev_t, off_t, bool);
224 static void freeprivate(struct rumpfs_node *);
225 
226 /*
227  * Extra Terrestrial stuff.  We map a given key (pathname) to a file on
228  * the host FS.  ET phones home only from the root node of rumpfs.
229  *
230  * When an etfs node is removed, a vnode potentially behind it is not
231  * immediately recycled.
232  */
233 
234 struct etfs {
235 	char et_key[MAXPATHLEN];
236 	size_t et_keylen;
237 	bool et_prefixkey;
238 	bool et_removing;
239 	devminor_t et_blkmin;
240 
241 	LIST_ENTRY(etfs) et_entries;
242 
243 	struct rumpfs_node *et_rn;
244 };
245 static kmutex_t etfs_lock;
246 static LIST_HEAD(, etfs) etfs_list = LIST_HEAD_INITIALIZER(etfs_list);
247 
248 static enum vtype
249 ettype_to_vtype(enum rump_etfs_type et)
250 {
251 	enum vtype vt;
252 
253 	switch (et) {
254 	case RUMP_ETFS_REG:
255 		vt = VREG;
256 		break;
257 	case RUMP_ETFS_BLK:
258 		vt = VBLK;
259 		break;
260 	case RUMP_ETFS_CHR:
261 		vt = VCHR;
262 		break;
263 	case RUMP_ETFS_DIR:
264 		vt = VDIR;
265 		break;
266 	case RUMP_ETFS_DIR_SUBDIRS:
267 		vt = VDIR;
268 		break;
269 	default:
270 		panic("invalid et type: %d", et);
271 	}
272 
273 	return vt;
274 }
275 
276 static enum vtype
277 hft_to_vtype(int hft)
278 {
279 	enum vtype vt;
280 
281 	switch (hft) {
282 	case RUMPUSER_FT_OTHER:
283 		vt = VNON;
284 		break;
285 	case RUMPUSER_FT_DIR:
286 		vt = VDIR;
287 		break;
288 	case RUMPUSER_FT_REG:
289 		vt = VREG;
290 		break;
291 	case RUMPUSER_FT_BLK:
292 		vt = VBLK;
293 		break;
294 	case RUMPUSER_FT_CHR:
295 		vt = VCHR;
296 		break;
297 	default:
298 		vt = VNON;
299 		break;
300 	}
301 
302 	return vt;
303 }
304 
305 static bool
306 etfs_find(const char *key, struct etfs **etp, bool forceprefix)
307 {
308 	struct etfs *et;
309 	size_t keylen = strlen(key);
310 
311 	KASSERT(mutex_owned(&etfs_lock));
312 
313 	LIST_FOREACH(et, &etfs_list, et_entries) {
314 		if ((keylen == et->et_keylen || et->et_prefixkey || forceprefix)
315 		    && strncmp(key, et->et_key, et->et_keylen) == 0) {
316 			if (etp)
317 				*etp = et;
318 			return true;
319 		}
320 	}
321 
322 	return false;
323 }
324 
325 #define REGDIR(ftype) \
326     ((ftype) == RUMP_ETFS_DIR || (ftype) == RUMP_ETFS_DIR_SUBDIRS)
327 static int
328 etfsregister(const char *key, const char *hostpath,
329 	enum rump_etfs_type ftype, uint64_t begin, uint64_t size)
330 {
331 	char buf[9];
332 	struct etfs *et;
333 	struct rumpfs_node *rn;
334 	uint64_t fsize;
335 	dev_t rdev = NODEV;
336 	devminor_t dmin = -1;
337 	int hft, error;
338 
339 	if (key[0] != '/') {
340 		return EINVAL;
341 	}
342 	while (key[0] == '/') {
343 		key++;
344 	}
345 
346 	if ((error = rumpuser_getfileinfo(hostpath, &fsize, &hft)) != 0)
347 		return error;
348 
349 	/* etfs directory requires a directory on the host */
350 	if (REGDIR(ftype)) {
351 		if (hft != RUMPUSER_FT_DIR)
352 			return ENOTDIR;
353 		if (begin != 0)
354 			return EISDIR;
355 		if (size != RUMP_ETFS_SIZE_ENDOFF)
356 			return EISDIR;
357 		size = fsize;
358 	} else {
359 		if (begin > fsize)
360 			return EINVAL;
361 		if (size == RUMP_ETFS_SIZE_ENDOFF)
362 			size = fsize - begin;
363 		if (begin + size > fsize)
364 			return EINVAL;
365 	}
366 
367 	if (ftype == RUMP_ETFS_BLK || ftype == RUMP_ETFS_CHR) {
368 		error = rumpblk_register(hostpath, &dmin, begin, size);
369 		if (error != 0) {
370 			return error;
371 		}
372 		rdev = makedev(RUMPBLK_DEVMAJOR, dmin);
373 	}
374 
375 	et = kmem_alloc(sizeof(*et), KM_SLEEP);
376 	strcpy(et->et_key, key);
377 	et->et_keylen = strlen(et->et_key);
378 	et->et_rn = rn = makeprivate(ettype_to_vtype(ftype), RUMPFS_DEFAULTMODE,
379 	    rdev, size, true);
380 	et->et_removing = false;
381 	et->et_blkmin = dmin;
382 
383 	rn->rn_flags |= RUMPNODE_ET_PHONE_HOST;
384 
385 	if (ftype == RUMP_ETFS_REG || REGDIR(ftype) || et->et_blkmin != -1) {
386 		size_t len = strlen(hostpath)+1;
387 
388 		rn->rn_hostpath = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
389 		memcpy(rn->rn_hostpath, hostpath, len);
390 		rn->rn_offset = begin;
391 	}
392 
393 	if (REGDIR(ftype)) {
394 		rn->rn_flags |= RUMPNODE_DIR_ET;
395 		et->et_prefixkey = true;
396 	} else {
397 		et->et_prefixkey = false;
398 	}
399 
400 	if (ftype == RUMP_ETFS_DIR_SUBDIRS)
401 		rn->rn_flags |= RUMPNODE_DIR_ETSUBS;
402 
403 	mutex_enter(&etfs_lock);
404 	if (etfs_find(key, NULL, REGDIR(ftype))) {
405 		mutex_exit(&etfs_lock);
406 		if (et->et_blkmin != -1)
407 			rumpblk_deregister(hostpath);
408 		if (et->et_rn->rn_hostpath != NULL)
409 			free(et->et_rn->rn_hostpath, M_TEMP);
410 		freeprivate(et->et_rn);
411 		kmem_free(et, sizeof(*et));
412 		return EEXIST;
413 	}
414 	LIST_INSERT_HEAD(&etfs_list, et, et_entries);
415 	mutex_exit(&etfs_lock);
416 
417 	if (ftype == RUMP_ETFS_BLK) {
418 		format_bytes(buf, sizeof(buf), size);
419 		aprint_verbose("/%s: hostpath %s (%s)\n", key, hostpath, buf);
420 	}
421 
422 	return 0;
423 }
424 #undef REGDIR
425 
426 /* remove etfs mapping.  caller's responsibility to make sure it's not in use */
427 static int
428 etfsremove(const char *key)
429 {
430 	struct etfs *et;
431 	size_t keylen;
432 	int rv __diagused;
433 
434 	if (key[0] != '/') {
435 		return EINVAL;
436 	}
437 	while (key[0] == '/') {
438 		key++;
439 	}
440 
441 	keylen = strlen(key);
442 
443 	mutex_enter(&etfs_lock);
444 	LIST_FOREACH(et, &etfs_list, et_entries) {
445 		if (keylen == et->et_keylen && strcmp(et->et_key, key) == 0) {
446 			if (et->et_removing)
447 				et = NULL;
448 			else
449 				et->et_removing = true;
450 			break;
451 		}
452 	}
453 	mutex_exit(&etfs_lock);
454 	if (!et)
455 		return ENOENT;
456 
457 	/*
458 	 * ok, we know what we want to remove and have signalled there
459 	 * actually are men at work.  first, unregister from rumpblk
460 	 */
461 	if (et->et_blkmin != -1) {
462 		rv = rumpblk_deregister(et->et_rn->rn_hostpath);
463 	} else {
464 		rv = 0;
465 	}
466 	KASSERT(rv == 0);
467 
468 	/* then do the actual removal */
469 	mutex_enter(&etfs_lock);
470 	LIST_REMOVE(et, et_entries);
471 	mutex_exit(&etfs_lock);
472 
473 	/* node is unreachable, safe to nuke all device copies */
474 	if (et->et_blkmin != -1) {
475 		vdevgone(RUMPBLK_DEVMAJOR, et->et_blkmin, et->et_blkmin, VBLK);
476 	} else {
477 		struct vnode *vp;
478 		struct mount *mp;
479 		struct rumpfs_node *rn;
480 
481 		mutex_enter(&reclock);
482 		if ((vp = et->et_rn->rn_vp) != NULL) {
483 			mp = vp->v_mount;
484 			rn = vp->v_data;
485 			KASSERT(rn == et->et_rn);
486 		} else {
487 			mp = NULL;
488 		}
489 		mutex_exit(&reclock);
490 		if (mp && vcache_get(mp, &rn, sizeof(rn), &vp) == 0)
491 			vgone(vp);
492 	}
493 
494 	if (et->et_rn->rn_hostpath != NULL)
495 		free(et->et_rn->rn_hostpath, M_TEMP);
496 	freeprivate(et->et_rn);
497 	kmem_free(et, sizeof(*et));
498 
499 	return 0;
500 }
501 
502 /*
503  * rumpfs
504  */
505 
506 static struct rumpfs_node *
507 makeprivate(enum vtype vt, mode_t mode, dev_t rdev, off_t size, bool et)
508 {
509 	struct rumpfs_node *rn;
510 	struct vattr *va;
511 	struct timespec ts;
512 
513 	KASSERT((mode & ~ALLPERMS) == 0);
514 	rn = kmem_zalloc(sizeof(*rn), KM_SLEEP);
515 
516 	switch (vt) {
517 	case VDIR:
518 		LIST_INIT(&rn->rn_dir);
519 		break;
520 	case VREG:
521 		if (et) {
522 			rn->rn_readfd = -1;
523 			rn->rn_writefd = -1;
524 		}
525 		break;
526 	default:
527 		break;
528 	}
529 
530 	nanotime(&ts);
531 
532 	va = &rn->rn_va;
533 	va->va_type = vt;
534 	va->va_mode = mode;
535 	if (vt == VDIR)
536 		va->va_nlink = 2;
537 	else
538 		va->va_nlink = 1;
539 	va->va_uid = 0;
540 	va->va_gid = 0;
541 	va->va_fsid =
542 	va->va_fileid = atomic_inc_uint_nv(&lastino);
543 	va->va_size = size;
544 	va->va_blocksize = 512;
545 	va->va_atime = ts;
546 	va->va_mtime = ts;
547 	va->va_ctime = ts;
548 	va->va_birthtime = ts;
549 	va->va_gen = 0;
550 	va->va_flags = 0;
551 	va->va_rdev = rdev;
552 	va->va_bytes = 512;
553 	va->va_filerev = 0;
554 	va->va_vaflags = 0;
555 
556 	return rn;
557 }
558 
559 static void
560 freeprivate(struct rumpfs_node *rn)
561 {
562 
563 	kmem_free(rn, sizeof(*rn));
564 }
565 
566 static void
567 makedir(struct rumpfs_node *rnd,
568 	struct componentname *cnp, struct rumpfs_node *rn)
569 {
570 	struct rumpfs_dent *rdent;
571 
572 	rdent = kmem_alloc(sizeof(*rdent), KM_SLEEP);
573 	rdent->rd_name = kmem_alloc(cnp->cn_namelen+1, KM_SLEEP);
574 	rdent->rd_node = rn;
575 	strlcpy(rdent->rd_name, cnp->cn_nameptr, cnp->cn_namelen+1);
576 	rdent->rd_namelen = strlen(rdent->rd_name);
577 
578 	if ((cnp->cn_flags & ISWHITEOUT) != 0) {
579 		KASSERT((cnp->cn_flags & DOWHITEOUT) == 0);
580 		freedir(rnd, cnp);
581 	}
582 	LIST_INSERT_HEAD(&rnd->rn_dir, rdent, rd_entries);
583 }
584 
585 static void
586 freedir(struct rumpfs_node *rnd, struct componentname *cnp)
587 {
588 	struct rumpfs_dent *rd = NULL;
589 
590 	LIST_FOREACH(rd, &rnd->rn_dir, rd_entries) {
591 		if (rd->rd_namelen == cnp->cn_namelen &&
592 		    strncmp(rd->rd_name, cnp->cn_nameptr,
593 		            cnp->cn_namelen) == 0)
594 			break;
595 	}
596 	if (rd == NULL)
597 		panic("could not find directory entry: %s", cnp->cn_nameptr);
598 
599 	if (cnp->cn_flags & DOWHITEOUT) {
600 		rd->rd_node = RUMPFS_WHITEOUT;
601 	} else {
602 		LIST_REMOVE(rd, rd_entries);
603 		kmem_free(rd->rd_name, rd->rd_namelen+1);
604 		kmem_free(rd, sizeof(*rd));
605 	}
606 }
607 
608 /*
609  * Simple lookup for rump file systems.
610  *
611  * uhm, this is twisted.  C F C C, hope of C C F C looming
612  */
613 static int
614 rump_vop_lookup(void *v)
615 {
616 	struct vop_lookup_v2_args /* {
617 		struct vnode *a_dvp;
618 		struct vnode **a_vpp;
619 		struct componentname *a_cnp;
620 	}; */ *ap = v;
621 	struct componentname *cnp = ap->a_cnp;
622 	struct vnode *dvp = ap->a_dvp;
623 	struct vnode **vpp = ap->a_vpp;
624 	struct rumpfs_node *rnd = dvp->v_data, *rn;
625 	struct rumpfs_dent *rd = NULL;
626 	struct etfs *et;
627 	bool dotdot = (cnp->cn_flags & ISDOTDOT) != 0;
628 	int rv = 0;
629 	const char *cp;
630 
631 	*vpp = NULL;
632 
633 	rv = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred);
634 	if (rv)
635 		return rv;
636 
637 	if ((cnp->cn_flags & ISLASTCN)
638 	    && (dvp->v_mount->mnt_flag & MNT_RDONLY)
639 	    && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
640 		return EROFS;
641 
642 	/* check for dot, return directly if the case */
643 	if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
644 		vref(dvp);
645 		*vpp = dvp;
646 		return 0;
647 	}
648 
649 	/* we don't do rename */
650 	if (!(((cnp->cn_flags & ISLASTCN) == 0) || (cnp->cn_nameiop != RENAME)))
651 		return EOPNOTSUPP;
652 
653 	/* check for etfs */
654 	if (dvp == rootvnode &&
655 	    (cnp->cn_nameiop == LOOKUP || cnp->cn_nameiop == CREATE)) {
656 		bool found;
657 		mutex_enter(&etfs_lock);
658 		found = etfs_find(cnp->cn_nameptr, &et, false);
659 		mutex_exit(&etfs_lock);
660 
661 		if (found) {
662 			rn = et->et_rn;
663 			cnp->cn_consume += et->et_keylen - cnp->cn_namelen;
664 			/*
665 			 * consume trailing slashes if any and clear
666 			 * REQUIREDIR if we consumed the full path.
667 			 */
668 			cp = &cnp->cn_nameptr[cnp->cn_namelen];
669 			cp += cnp->cn_consume;
670 			KASSERT(*cp == '\0' || *cp == '/');
671 			if (*cp == '\0' && rn->rn_va.va_type != VDIR)
672 				cnp->cn_flags &= ~REQUIREDIR;
673 			while (*cp++ == '/')
674 				cnp->cn_consume++;
675 			goto getvnode;
676 		}
677 	}
678 
679 	if (rnd->rn_flags & RUMPNODE_DIR_ET) {
680 		uint64_t fsize;
681 		char *newpath;
682 		size_t newpathlen;
683 		int hft, error;
684 
685 		if (dotdot)
686 			return EOPNOTSUPP;
687 
688 		newpathlen = strlen(rnd->rn_hostpath) + 1 + cnp->cn_namelen + 1;
689 		newpath = malloc(newpathlen, M_TEMP, M_WAITOK);
690 
691 		strlcpy(newpath, rnd->rn_hostpath, newpathlen);
692 		strlcat(newpath, "/", newpathlen);
693 		strlcat(newpath, cnp->cn_nameptr, newpathlen);
694 
695 		if ((error = rumpuser_getfileinfo(newpath, &fsize, &hft)) != 0){
696 			free(newpath, M_TEMP);
697 			return error;
698 		}
699 
700 		/* allow only dirs and regular files */
701 		if (hft != RUMPUSER_FT_REG && hft != RUMPUSER_FT_DIR) {
702 			free(newpath, M_TEMP);
703 			return ENOENT;
704 		}
705 
706 		rn = makeprivate(hft_to_vtype(hft), RUMPFS_DEFAULTMODE,
707 		    NODEV, fsize, true);
708 		rn->rn_flags |= RUMPNODE_CANRECLAIM;
709 		if (rnd->rn_flags & RUMPNODE_DIR_ETSUBS) {
710 			rn->rn_flags |= RUMPNODE_DIR_ET | RUMPNODE_DIR_ETSUBS;
711 			rn->rn_flags |= RUMPNODE_ET_PHONE_HOST;
712 		}
713 		rn->rn_hostpath = newpath;
714 
715 		goto getvnode;
716 	} else {
717 		if (dotdot) {
718 			if ((rn = rnd->rn_parent) != NULL)
719 				goto getvnode;
720 		} else {
721 			LIST_FOREACH(rd, &rnd->rn_dir, rd_entries) {
722 				if (rd->rd_namelen == cnp->cn_namelen &&
723 				    strncmp(rd->rd_name, cnp->cn_nameptr,
724 				      cnp->cn_namelen) == 0)
725 					break;
726 			}
727 		}
728 	}
729 
730 	if (!rd && ((cnp->cn_flags & ISLASTCN) == 0||cnp->cn_nameiop != CREATE))
731 		return ENOENT;
732 
733 	if (!rd && (cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE) {
734 		if (dvp->v_mount->mnt_flag & MNT_RDONLY)
735 			return EROFS;
736 		rv = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
737 		if (rv)
738 			return rv;
739 		return EJUSTRETURN;
740 	}
741 
742 	if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == DELETE) {
743 		rv = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
744 		if (rv)
745 			return rv;
746 	}
747 
748 	if (RDENT_ISWHITEOUT(rd)) {
749 		cnp->cn_flags |= ISWHITEOUT;
750 		if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE)
751 			return EJUSTRETURN;
752 		return ENOENT;
753 	}
754 
755 	rn = rd->rd_node;
756 
757  getvnode:
758 	KASSERT(rn);
759 	rv = vcache_get(dvp->v_mount, &rn, sizeof(rn), vpp);
760 	if (rv) {
761 		if (rnd->rn_flags & RUMPNODE_DIR_ET)
762 			freeprivate(rn);
763 		return rv;
764 	}
765 
766 	return 0;
767 }
768 
769 static int
770 rump_check_possible(struct vnode *vp, struct rumpfs_node *rnode,
771     mode_t mode)
772 {
773 
774 	if ((mode & VWRITE) == 0)
775 		return 0;
776 
777 	switch (vp->v_type) {
778 	case VDIR:
779 	case VLNK:
780 	case VREG:
781 		break;
782 	default:
783 		/* special file is always writable. */
784 		return 0;
785 	}
786 
787 	return vp->v_mount->mnt_flag & MNT_RDONLY ? EROFS : 0;
788 }
789 
790 static int
791 rump_check_permitted(struct vnode *vp, struct rumpfs_node *rnode,
792     mode_t mode, kauth_cred_t cred)
793 {
794 	struct vattr *attr = &rnode->rn_va;
795 
796 	return kauth_authorize_vnode(cred, KAUTH_ACCESS_ACTION(mode,
797 	    vp->v_type, attr->va_mode), vp, NULL, genfs_can_access(vp->v_type,
798 	    attr->va_mode, attr->va_uid, attr->va_gid, mode, cred));
799 }
800 
801 int
802 rump_vop_access(void *v)
803 {
804 	struct vop_access_args /* {
805 		const struct vnodeop_desc *a_desc;
806 		struct vnode *a_vp;
807 		int a_mode;
808 		kauth_cred_t a_cred;
809 	} */ *ap = v;
810 	struct vnode *vp = ap->a_vp;
811 	struct rumpfs_node *rn = vp->v_data;
812 	int error;
813 
814 	error = rump_check_possible(vp, rn, ap->a_mode);
815 	if (error)
816 		return error;
817 
818 	error = rump_check_permitted(vp, rn, ap->a_mode, ap->a_cred);
819 
820 	return error;
821 }
822 
823 static int
824 rump_vop_getattr(void *v)
825 {
826 	struct vop_getattr_args /* {
827 		struct vnode *a_vp;
828 		struct vattr *a_vap;
829 		kauth_cred_t a_cred;
830 	} */ *ap = v;
831 	struct vnode *vp = ap->a_vp;
832 	struct rumpfs_node *rn = vp->v_data;
833 	struct vattr *vap = ap->a_vap;
834 
835 	memcpy(vap, &rn->rn_va, sizeof(struct vattr));
836 	vap->va_size = vp->v_size;
837 	return 0;
838 }
839 
840 static int
841 rump_vop_setattr(void *v)
842 {
843 	struct vop_setattr_args /* {
844 		struct vnode *a_vp;
845 		struct vattr *a_vap;
846 		kauth_cred_t a_cred;
847 	} */ *ap = v;
848 	struct vnode *vp = ap->a_vp;
849 	struct vattr *vap = ap->a_vap;
850 	struct rumpfs_node *rn = vp->v_data;
851 	struct vattr *attr = &rn->rn_va;
852 	kauth_cred_t cred = ap->a_cred;
853 	int error;
854 
855 #define	CHANGED(a, t)	(vap->a != (t)VNOVAL)
856 #define SETIFVAL(a,t) if (CHANGED(a, t)) rn->rn_va.a = vap->a
857 	if (CHANGED(va_atime.tv_sec, time_t) ||
858 	    CHANGED(va_ctime.tv_sec, time_t) ||
859 	    CHANGED(va_mtime.tv_sec, time_t) ||
860 	    CHANGED(va_birthtime.tv_sec, time_t) ||
861 	    CHANGED(va_atime.tv_nsec, long) ||
862 	    CHANGED(va_ctime.tv_nsec, long) ||
863 	    CHANGED(va_mtime.tv_nsec, long) ||
864 	    CHANGED(va_birthtime.tv_nsec, long)) {
865 		error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp,
866 		    NULL, genfs_can_chtimes(vp, vap->va_vaflags, attr->va_uid,
867 		    cred));
868 		if (error)
869 			return error;
870 	}
871 
872 	SETIFVAL(va_atime.tv_sec, time_t);
873 	SETIFVAL(va_ctime.tv_sec, time_t);
874 	SETIFVAL(va_mtime.tv_sec, time_t);
875 	SETIFVAL(va_birthtime.tv_sec, time_t);
876 	SETIFVAL(va_atime.tv_nsec, long);
877 	SETIFVAL(va_ctime.tv_nsec, long);
878 	SETIFVAL(va_mtime.tv_nsec, long);
879 	SETIFVAL(va_birthtime.tv_nsec, long);
880 
881 	if (CHANGED(va_flags, u_long)) {
882 		/* XXX Can we handle system flags here...? */
883 		error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_FLAGS, vp,
884 		    NULL, genfs_can_chflags(cred, vp->v_type, attr->va_uid,
885 		    false));
886 		if (error)
887 			return error;
888 	}
889 
890 	SETIFVAL(va_flags, u_long);
891 #undef  SETIFVAL
892 #undef	CHANGED
893 
894 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (uid_t)VNOVAL) {
895 		uid_t uid =
896 		    (vap->va_uid != (uid_t)VNOVAL) ? vap->va_uid : attr->va_uid;
897 		gid_t gid =
898 		    (vap->va_gid != (gid_t)VNOVAL) ? vap->va_gid : attr->va_gid;
899 		error = kauth_authorize_vnode(cred,
900 		    KAUTH_VNODE_CHANGE_OWNERSHIP, vp, NULL,
901 		    genfs_can_chown(cred, attr->va_uid, attr->va_gid, uid,
902 		    gid));
903 		if (error)
904 			return error;
905 		attr->va_uid = uid;
906 		attr->va_gid = gid;
907 	}
908 
909 	if (vap->va_mode != (mode_t)VNOVAL) {
910 		mode_t mode = vap->va_mode;
911 		error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_SECURITY,
912 		    vp, NULL, genfs_can_chmod(vp->v_type, cred, attr->va_uid,
913 		    attr->va_gid, mode));
914 		if (error)
915 			return error;
916 		attr->va_mode = mode;
917 	}
918 
919 	if (vp->v_type == VREG &&
920 	    vap->va_size != VSIZENOTSET &&
921 	    vap->va_size != rn->rn_dlen &&
922 	    (rn->rn_flags & RUMPNODE_ET_PHONE_HOST) == 0) {
923 		void *newdata;
924 		size_t copylen, newlen;
925 
926 		newlen = vap->va_size;
927 		newdata = rump_hypermalloc(newlen, 0, false, "rumpfs");
928 		if (newdata == NULL)
929 			return ENOSPC;
930 
931 		copylen = MIN(rn->rn_dlen, newlen);
932 		memset(newdata, 0, newlen);
933 		memcpy(newdata, rn->rn_data, copylen);
934 		rump_hyperfree(rn->rn_data, rn->rn_dlen);
935 
936 		rn->rn_data = newdata;
937 		rn->rn_dlen = newlen;
938 		uvm_vnp_setsize(vp, newlen);
939 	}
940 	return 0;
941 }
942 
943 static int
944 rump_vop_mkdir(void *v)
945 {
946 	struct vop_mkdir_v3_args /* {
947 		struct vnode *a_dvp;
948 		struct vnode **a_vpp;
949 		struct componentname *a_cnp;
950 		struct vattr *a_vap;
951 	}; */ *ap = v;
952 	struct vnode *dvp = ap->a_dvp;
953 	struct vnode **vpp = ap->a_vpp;
954 	struct componentname *cnp = ap->a_cnp;
955 	struct vattr *va = ap->a_vap;
956 	struct rumpfs_node *rnd = dvp->v_data, *rn;
957 	int rv = 0;
958 
959 	rn = makeprivate(VDIR, va->va_mode & ALLPERMS, NODEV, DEV_BSIZE, false);
960 	if ((cnp->cn_flags & ISWHITEOUT) != 0)
961 		rn->rn_va.va_flags |= UF_OPAQUE;
962 	rn->rn_parent = rnd;
963 	rv = vcache_get(dvp->v_mount, &rn, sizeof(rn), vpp);
964 	if (rv) {
965 		freeprivate(rn);
966 		return rv;
967 	}
968 
969 	makedir(rnd, cnp, rn);
970 
971 	return rv;
972 }
973 
974 static int
975 rump_vop_rmdir(void *v)
976 {
977         struct vop_rmdir_args /* {
978                 struct vnode *a_dvp;
979                 struct vnode *a_vp;
980                 struct componentname *a_cnp;
981         }; */ *ap = v;
982 	struct vnode *dvp = ap->a_dvp;
983 	struct vnode *vp = ap->a_vp;
984 	struct componentname *cnp = ap->a_cnp;
985 	struct rumpfs_node *rnd = dvp->v_data;
986 	struct rumpfs_node *rn = vp->v_data;
987 	struct rumpfs_dent *rd;
988 	int rv = 0;
989 
990 	LIST_FOREACH(rd, &rn->rn_dir, rd_entries) {
991 		if (rd->rd_node != RUMPFS_WHITEOUT) {
992 			rv = ENOTEMPTY;
993 			goto out;
994 		}
995 	}
996 	while ((rd = LIST_FIRST(&rn->rn_dir)) != NULL) {
997 		KASSERT(rd->rd_node == RUMPFS_WHITEOUT);
998 		LIST_REMOVE(rd, rd_entries);
999 		kmem_free(rd->rd_name, rd->rd_namelen+1);
1000 		kmem_free(rd, sizeof(*rd));
1001 	}
1002 
1003 	freedir(rnd, cnp);
1004 	rn->rn_flags |= RUMPNODE_CANRECLAIM;
1005 	rn->rn_parent = NULL;
1006 
1007 out:
1008 	vput(dvp);
1009 	vput(vp);
1010 
1011 	return rv;
1012 }
1013 
1014 static int
1015 rump_vop_remove(void *v)
1016 {
1017         struct vop_remove_args /* {
1018                 struct vnode *a_dvp;
1019                 struct vnode *a_vp;
1020                 struct componentname *a_cnp;
1021         }; */ *ap = v;
1022 	struct vnode *dvp = ap->a_dvp;
1023 	struct vnode *vp = ap->a_vp;
1024 	struct componentname *cnp = ap->a_cnp;
1025 	struct rumpfs_node *rnd = dvp->v_data;
1026 	struct rumpfs_node *rn = vp->v_data;
1027 	int rv = 0;
1028 
1029 	if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST)
1030 		return EOPNOTSUPP;
1031 
1032 	freedir(rnd, cnp);
1033 	rn->rn_flags |= RUMPNODE_CANRECLAIM;
1034 
1035 	vput(dvp);
1036 	vput(vp);
1037 
1038 	return rv;
1039 }
1040 
1041 static int
1042 rump_vop_mknod(void *v)
1043 {
1044 	struct vop_mknod_v3_args /* {
1045 		struct vnode *a_dvp;
1046 		struct vnode **a_vpp;
1047 		struct componentname *a_cnp;
1048 		struct vattr *a_vap;
1049 	}; */ *ap = v;
1050 	struct vnode *dvp = ap->a_dvp;
1051 	struct vnode **vpp = ap->a_vpp;
1052 	struct componentname *cnp = ap->a_cnp;
1053 	struct vattr *va = ap->a_vap;
1054 	struct rumpfs_node *rnd = dvp->v_data, *rn;
1055 	int rv;
1056 
1057 	rn = makeprivate(va->va_type, va->va_mode & ALLPERMS, va->va_rdev,
1058 	    DEV_BSIZE, false);
1059 	if ((cnp->cn_flags & ISWHITEOUT) != 0)
1060 		rn->rn_va.va_flags |= UF_OPAQUE;
1061 	rv = vcache_get(dvp->v_mount, &rn, sizeof(rn), vpp);
1062 	if (rv) {
1063 		freeprivate(rn);
1064 		return rv;
1065 	}
1066 
1067 	makedir(rnd, cnp, rn);
1068 
1069 	return rv;
1070 }
1071 
1072 static int
1073 rump_vop_create(void *v)
1074 {
1075 	struct vop_create_v3_args /* {
1076 		struct vnode *a_dvp;
1077 		struct vnode **a_vpp;
1078 		struct componentname *a_cnp;
1079 		struct vattr *a_vap;
1080 	}; */ *ap = v;
1081 	struct vnode *dvp = ap->a_dvp;
1082 	struct vnode **vpp = ap->a_vpp;
1083 	struct componentname *cnp = ap->a_cnp;
1084 	struct vattr *va = ap->a_vap;
1085 	struct rumpfs_node *rnd = dvp->v_data, *rn;
1086 	off_t newsize;
1087 	int rv;
1088 
1089 	newsize = va->va_type == VSOCK ? DEV_BSIZE : 0;
1090 	rn = makeprivate(va->va_type, va->va_mode & ALLPERMS, NODEV,
1091 	    newsize, false);
1092 	if ((cnp->cn_flags & ISWHITEOUT) != 0)
1093 		rn->rn_va.va_flags |= UF_OPAQUE;
1094 	rv = vcache_get(dvp->v_mount, &rn, sizeof(rn), vpp);
1095 	if (rv) {
1096 		freeprivate(rn);
1097 		return rv;
1098 	}
1099 
1100 	makedir(rnd, cnp, rn);
1101 
1102 	return rv;
1103 }
1104 
1105 static int
1106 rump_vop_symlink(void *v)
1107 {
1108 	struct vop_symlink_v3_args /* {
1109 		struct vnode *a_dvp;
1110 		struct vnode **a_vpp;
1111 		struct componentname *a_cnp;
1112 		struct vattr *a_vap;
1113 		char *a_target;
1114 	}; */ *ap = v;
1115 	struct vnode *dvp = ap->a_dvp;
1116 	struct vnode **vpp = ap->a_vpp;
1117 	struct componentname *cnp = ap->a_cnp;
1118 	struct vattr *va = ap->a_vap;
1119 	struct rumpfs_node *rnd = dvp->v_data, *rn;
1120 	const char *target = ap->a_target;
1121 	size_t linklen;
1122 	int rv;
1123 
1124 	linklen = strlen(target);
1125 	KASSERT(linklen < MAXPATHLEN);
1126 	rn = makeprivate(VLNK, va->va_mode & ALLPERMS, NODEV, linklen, false);
1127 	if ((cnp->cn_flags & ISWHITEOUT) != 0)
1128 		rn->rn_va.va_flags |= UF_OPAQUE;
1129 	rv = vcache_get(dvp->v_mount, &rn, sizeof(rn), vpp);
1130 	if (rv) {
1131 		freeprivate(rn);
1132 		return rv;
1133 	}
1134 
1135 	makedir(rnd, cnp, rn);
1136 
1137 	KASSERT(linklen < MAXPATHLEN);
1138 	rn->rn_linktarg = PNBUF_GET();
1139 	rn->rn_linklen = linklen;
1140 	strcpy(rn->rn_linktarg, target);
1141 
1142 	return rv;
1143 }
1144 
1145 static int
1146 rump_vop_readlink(void *v)
1147 {
1148 	struct vop_readlink_args /* {
1149 		struct vnode *a_vp;
1150 		struct uio *a_uio;
1151 		kauth_cred_t a_cred;
1152 	}; */ *ap = v;
1153 	struct vnode *vp = ap->a_vp;
1154 	struct rumpfs_node *rn = vp->v_data;
1155 	struct uio *uio = ap->a_uio;
1156 
1157 	return uiomove(rn->rn_linktarg, rn->rn_linklen, uio);
1158 }
1159 
1160 static int
1161 rump_vop_whiteout(void *v)
1162 {
1163 	struct vop_whiteout_args /* {
1164 		struct vnode            *a_dvp;
1165 		struct componentname    *a_cnp;
1166 		int                     a_flags;
1167 	} */ *ap = v;
1168 	struct vnode *dvp = ap->a_dvp;
1169 	struct rumpfs_node *rnd = dvp->v_data;
1170 	struct componentname *cnp = ap->a_cnp;
1171 	int flags = ap->a_flags;
1172 
1173 	switch (flags) {
1174 	case LOOKUP:
1175 		break;
1176 	case CREATE:
1177 		makedir(rnd, cnp, RUMPFS_WHITEOUT);
1178 		break;
1179 	case DELETE:
1180 		cnp->cn_flags &= ~DOWHITEOUT; /* cargo culting never fails ? */
1181 		freedir(rnd, cnp);
1182 		break;
1183 	default:
1184 		panic("unknown whiteout op %d", flags);
1185 	}
1186 
1187 	return 0;
1188 }
1189 
1190 static int
1191 rump_vop_open(void *v)
1192 {
1193 	struct vop_open_args /* {
1194 		struct vnode *a_vp;
1195 		int a_mode;
1196 		kauth_cred_t a_cred;
1197 	} */ *ap = v;
1198 	struct vnode *vp = ap->a_vp;
1199 	struct rumpfs_node *rn = vp->v_data;
1200 	int mode = ap->a_mode;
1201 	int error = EINVAL;
1202 
1203 	if (vp->v_type != VREG || (rn->rn_flags & RUMPNODE_ET_PHONE_HOST) == 0)
1204 		return 0;
1205 
1206 	if (mode & FREAD) {
1207 		if (rn->rn_readfd != -1)
1208 			return 0;
1209 		error = rumpuser_open(rn->rn_hostpath,
1210 		    RUMPUSER_OPEN_RDONLY, &rn->rn_readfd);
1211 	}
1212 
1213 	if (mode & FWRITE) {
1214 		if (rn->rn_writefd != -1)
1215 			return 0;
1216 		error = rumpuser_open(rn->rn_hostpath,
1217 		    RUMPUSER_OPEN_WRONLY, &rn->rn_writefd);
1218 	}
1219 
1220 	return error;
1221 }
1222 
1223 /* simple readdir.  even omits dotstuff and periods */
1224 static int
1225 rump_vop_readdir(void *v)
1226 {
1227 	struct vop_readdir_args /* {
1228 		struct vnode *a_vp;
1229 		struct uio *a_uio;
1230 		kauth_cred_t a_cred;
1231 		int *a_eofflag;
1232 		off_t **a_cookies;
1233 		int *a_ncookies;
1234 	} */ *ap = v;
1235 	struct vnode *vp = ap->a_vp;
1236 	struct uio *uio = ap->a_uio;
1237 	struct rumpfs_node *rnd = vp->v_data;
1238 	struct rumpfs_dent *rdent;
1239 	struct dirent *dentp = NULL;
1240 	unsigned i;
1241 	int rv = 0;
1242 
1243 	/* seek to current entry */
1244 	for (i = 0, rdent = LIST_FIRST(&rnd->rn_dir);
1245 	    (i < uio->uio_offset) && rdent;
1246 	    i++, rdent = LIST_NEXT(rdent, rd_entries))
1247 		continue;
1248 	if (!rdent)
1249 		goto out;
1250 
1251 	/* copy entries */
1252 	dentp = kmem_alloc(sizeof(*dentp), KM_SLEEP);
1253 	for (; rdent && uio->uio_resid > 0;
1254 	    rdent = LIST_NEXT(rdent, rd_entries), i++) {
1255 		strlcpy(dentp->d_name, rdent->rd_name, sizeof(dentp->d_name));
1256 		dentp->d_namlen = strlen(dentp->d_name);
1257 		dentp->d_reclen = _DIRENT_RECLEN(dentp, dentp->d_namlen);
1258 
1259 		if (__predict_false(RDENT_ISWHITEOUT(rdent))) {
1260 			dentp->d_fileno = INO_WHITEOUT;
1261 			dentp->d_type = DT_WHT;
1262 		} else {
1263 			dentp->d_fileno = rdent->rd_node->rn_va.va_fileid;
1264 			dentp->d_type = vtype2dt(rdent->rd_node->rn_va.va_type);
1265 		}
1266 
1267 		if (uio->uio_resid < dentp->d_reclen) {
1268 			i--;
1269 			break;
1270 		}
1271 
1272 		rv = uiomove(dentp, dentp->d_reclen, uio);
1273 		if (rv) {
1274 			i--;
1275 			break;
1276 		}
1277 	}
1278 	kmem_free(dentp, sizeof(*dentp));
1279 	dentp = NULL;
1280 
1281  out:
1282 	KASSERT(dentp == NULL);
1283 	if (ap->a_cookies) {
1284 		*ap->a_ncookies = 0;
1285 		*ap->a_cookies = NULL;
1286 	}
1287 	if (rdent)
1288 		*ap->a_eofflag = 0;
1289 	else
1290 		*ap->a_eofflag = 1;
1291 	uio->uio_offset = i;
1292 
1293 	return rv;
1294 }
1295 
1296 static int
1297 etread(struct rumpfs_node *rn, struct uio *uio)
1298 {
1299 	struct rumpuser_iovec iov;
1300 	uint8_t *buf;
1301 	size_t bufsize, n;
1302 	int error = 0;
1303 
1304 	bufsize = uio->uio_resid;
1305 	if (bufsize == 0)
1306 		return 0;
1307 	buf = kmem_alloc(bufsize, KM_SLEEP);
1308 
1309 	iov.iov_base = buf;
1310 	iov.iov_len = bufsize;
1311 	if ((error = rumpuser_iovread(rn->rn_readfd, &iov, 1,
1312 	    uio->uio_offset + rn->rn_offset, &n)) == 0) {
1313 		KASSERT(n <= bufsize);
1314 		error = uiomove(buf, n, uio);
1315 	}
1316 
1317 	kmem_free(buf, bufsize);
1318 	return error;
1319 }
1320 
1321 static int
1322 rump_vop_read(void *v)
1323 {
1324 	struct vop_read_args /* {
1325 		struct vnode *a_vp;
1326 		struct uio *a_uio;
1327 		int ioflags a_ioflag;
1328 		kauth_cred_t a_cred;
1329 	}; */ *ap = v;
1330 	struct vnode *vp = ap->a_vp;
1331 	struct rumpfs_node *rn = vp->v_data;
1332 	struct uio *uio = ap->a_uio;
1333 	const int advice = IO_ADV_DECODE(ap->a_ioflag);
1334 	off_t chunk;
1335 	int error = 0;
1336 
1337 	if (vp->v_type == VDIR)
1338 		return EISDIR;
1339 
1340 	/* et op? */
1341 	if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST)
1342 		return etread(rn, uio);
1343 
1344 	/* otherwise, it's off to ubc with us */
1345 	while (uio->uio_resid > 0) {
1346 		chunk = MIN(uio->uio_resid, (off_t)rn->rn_dlen-uio->uio_offset);
1347 		if (chunk == 0)
1348 			break;
1349 		error = ubc_uiomove(&vp->v_uobj, uio, chunk, advice,
1350 		    UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
1351 		if (error)
1352 			break;
1353 	}
1354 
1355 	return error;
1356 }
1357 
1358 static int
1359 etwrite(struct rumpfs_node *rn, struct uio *uio)
1360 {
1361 	struct rumpuser_iovec iov;
1362 	uint8_t *buf;
1363 	size_t bufsize, n;
1364 	int error = 0;
1365 
1366 	bufsize = uio->uio_resid;
1367 	if (bufsize == 0)
1368 		return 0;
1369 	buf = kmem_alloc(bufsize, KM_SLEEP);
1370 	error = uiomove(buf, bufsize, uio);
1371 	if (error)
1372 		goto out;
1373 
1374 	KASSERT(uio->uio_resid == 0);
1375 	iov.iov_base = buf;
1376 	iov.iov_len = bufsize;
1377 	if ((error = rumpuser_iovwrite(rn->rn_writefd, &iov, 1,
1378 	    (uio->uio_offset-bufsize) + rn->rn_offset, &n)) == 0) {
1379 		KASSERT(n <= bufsize);
1380 		uio->uio_resid = bufsize - n;
1381 	}
1382 
1383  out:
1384 	kmem_free(buf, bufsize);
1385 	return error;
1386 }
1387 
1388 static int
1389 rump_vop_write(void *v)
1390 {
1391 	struct vop_write_args /* {
1392 		struct vnode *a_vp;
1393 		struct uio *a_uio;
1394 		int ioflags a_ioflag;
1395 		kauth_cred_t a_cred;
1396 	}; */ *ap = v;
1397 	struct vnode *vp = ap->a_vp;
1398 	struct rumpfs_node *rn = vp->v_data;
1399 	struct uio *uio = ap->a_uio;
1400 	const int advice = IO_ADV_DECODE(ap->a_ioflag);
1401 	void *olddata;
1402 	size_t oldlen, newlen;
1403 	off_t chunk;
1404 	int error = 0;
1405 	bool allocd = false;
1406 
1407 	if (ap->a_ioflag & IO_APPEND)
1408 		uio->uio_offset = vp->v_size;
1409 
1410 	/* consult et? */
1411 	if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST)
1412 		return etwrite(rn, uio);
1413 
1414 	/*
1415 	 * Otherwise, it's a case of ubcmove.
1416 	 */
1417 
1418 	/*
1419 	 * First, make sure we have enough storage.
1420 	 *
1421 	 * No, you don't need to tell me it's not very efficient.
1422 	 * No, it doesn't really support sparse files, just fakes it.
1423 	 */
1424 	newlen = uio->uio_offset + uio->uio_resid;
1425 	oldlen = 0; /* XXXgcc */
1426 	olddata = NULL;
1427 	if (rn->rn_dlen < newlen) {
1428 		oldlen = rn->rn_dlen;
1429 		olddata = rn->rn_data;
1430 
1431 		rn->rn_data = rump_hypermalloc(newlen, 0, false, "rumpfs");
1432 		if (rn->rn_data == NULL)
1433 			return ENOSPC;
1434 		rn->rn_dlen = newlen;
1435 		memset(rn->rn_data, 0, newlen);
1436 		memcpy(rn->rn_data, olddata, oldlen);
1437 		allocd = true;
1438 		uvm_vnp_setsize(vp, newlen);
1439 	}
1440 
1441 	/* ok, we have enough stooorage.  write */
1442 	while (uio->uio_resid > 0) {
1443 		chunk = MIN(uio->uio_resid, (off_t)rn->rn_dlen-uio->uio_offset);
1444 		if (chunk == 0)
1445 			break;
1446 		error = ubc_uiomove(&vp->v_uobj, uio, chunk, advice,
1447 		    UBC_WRITE | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
1448 		if (error)
1449 			break;
1450 	}
1451 
1452 	if (allocd) {
1453 		if (error) {
1454 			rump_hyperfree(rn->rn_data, newlen);
1455 			rn->rn_data = olddata;
1456 			rn->rn_dlen = oldlen;
1457 			uvm_vnp_setsize(vp, oldlen);
1458 		} else {
1459 			rump_hyperfree(olddata, oldlen);
1460 		}
1461 	}
1462 
1463 	return error;
1464 }
1465 
1466 static int
1467 rump_vop_bmap(void *v)
1468 {
1469 	struct vop_bmap_args /* {
1470 		struct vnode *a_vp;
1471 		daddr_t a_bn;
1472 		struct vnode **a_vpp;
1473 		daddr_t *a_bnp;
1474 		int *a_runp;
1475 	} */ *ap = v;
1476 
1477 	/* 1:1 mapping */
1478 	if (ap->a_vpp)
1479 		*ap->a_vpp = ap->a_vp;
1480 	if (ap->a_bnp)
1481 		*ap->a_bnp = ap->a_bn;
1482 	if (ap->a_runp)
1483 		*ap->a_runp = 16;
1484 
1485 	return 0;
1486 }
1487 
1488 static int
1489 rump_vop_strategy(void *v)
1490 {
1491 	struct vop_strategy_args /* {
1492 		struct vnode *a_vp;
1493 		struct buf *a_bp;
1494 	} */ *ap = v;
1495 	struct vnode *vp = ap->a_vp;
1496 	struct rumpfs_node *rn = vp->v_data;
1497 	struct buf *bp = ap->a_bp;
1498 	off_t copylen, copyoff;
1499 	int error;
1500 
1501 	if (vp->v_type != VREG || rn->rn_flags & RUMPNODE_ET_PHONE_HOST) {
1502 		error = EINVAL;
1503 		goto out;
1504 	}
1505 
1506 	copyoff = bp->b_blkno << DEV_BSHIFT;
1507 	copylen = MIN(rn->rn_dlen - copyoff, bp->b_bcount);
1508 	if (BUF_ISWRITE(bp)) {
1509 		memcpy((uint8_t *)rn->rn_data + copyoff, bp->b_data, copylen);
1510 	} else {
1511 		memset((uint8_t*)bp->b_data + copylen, 0, bp->b_bcount-copylen);
1512 		memcpy(bp->b_data, (uint8_t *)rn->rn_data + copyoff, copylen);
1513 	}
1514 	bp->b_resid = 0;
1515 	error = 0;
1516 
1517  out:
1518 	bp->b_error = error;
1519 	biodone(bp);
1520 	return 0;
1521 }
1522 
1523 static int
1524 rump_vop_pathconf(void *v)
1525 {
1526 	struct vop_pathconf_args /* {
1527 		struct vnode *a_vp;
1528 		int a_name;
1529 		register_t *a_retval;
1530 	}; */ *ap = v;
1531 	int name = ap->a_name;
1532 	register_t *retval = ap->a_retval;
1533 
1534 	switch (name) {
1535 	case _PC_LINK_MAX:
1536 		*retval = LINK_MAX;
1537 		return 0;
1538 	case _PC_NAME_MAX:
1539 		*retval = RUMPFS_MAXNAMLEN;
1540 		return 0;
1541 	case _PC_PATH_MAX:
1542 		*retval = PATH_MAX;
1543 		return 0;
1544 	case _PC_PIPE_BUF:
1545 		*retval = PIPE_BUF;
1546 		return 0;
1547 	case _PC_CHOWN_RESTRICTED:
1548 		*retval = 1;
1549 		return 0;
1550 	case _PC_NO_TRUNC:
1551 		*retval = 1;
1552 		return 0;
1553 	case _PC_SYNC_IO:
1554 		*retval = 1;
1555 		return 0;
1556 	case _PC_FILESIZEBITS:
1557 		*retval = 43; /* this one goes to 11 */
1558 		return 0;
1559 	case _PC_SYMLINK_MAX:
1560 		*retval = MAXPATHLEN;
1561 		return 0;
1562 	case _PC_2_SYMLINKS:
1563 		*retval = 1;
1564 		return 0;
1565 	default:
1566 		return EINVAL;
1567 	}
1568 }
1569 
1570 static int
1571 rump_vop_success(void *v)
1572 {
1573 
1574 	return 0;
1575 }
1576 
1577 static int
1578 rump_vop_inactive(void *v)
1579 {
1580 	struct vop_inactive_args /* {
1581 		struct vnode *a_vp;
1582 		bool *a_recycle;
1583 	} */ *ap = v;
1584 	struct vnode *vp = ap->a_vp;
1585 	struct rumpfs_node *rn = vp->v_data;
1586 
1587 	if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST && vp->v_type == VREG) {
1588 		if (rn->rn_readfd != -1) {
1589 			rumpuser_close(rn->rn_readfd);
1590 			rn->rn_readfd = -1;
1591 		}
1592 		if (rn->rn_writefd != -1) {
1593 			rumpuser_close(rn->rn_writefd);
1594 			rn->rn_writefd = -1;
1595 		}
1596 	}
1597 	*ap->a_recycle = (rn->rn_flags & RUMPNODE_CANRECLAIM) ? true : false;
1598 
1599 	VOP_UNLOCK(vp);
1600 	return 0;
1601 }
1602 
1603 static int
1604 rump_vop_reclaim(void *v)
1605 {
1606 	struct vop_reclaim_args /* {
1607 		struct vnode *a_vp;
1608 	} */ *ap = v;
1609 	struct vnode *vp = ap->a_vp;
1610 	struct rumpfs_node *rn = vp->v_data;
1611 
1612 	vcache_remove(vp->v_mount, &rn, sizeof(rn));
1613 	mutex_enter(&reclock);
1614 	rn->rn_vp = NULL;
1615 	mutex_exit(&reclock);
1616 	genfs_node_destroy(vp);
1617 	vp->v_data = NULL;
1618 
1619 	if (rn->rn_flags & RUMPNODE_CANRECLAIM) {
1620 		if (vp->v_type == VREG
1621 		    && (rn->rn_flags & RUMPNODE_ET_PHONE_HOST) == 0
1622 		    && rn->rn_data) {
1623 			rump_hyperfree(rn->rn_data, rn->rn_dlen);
1624 			rn->rn_data = NULL;
1625 		}
1626 
1627 		if (vp->v_type == VLNK)
1628 			PNBUF_PUT(rn->rn_linktarg);
1629 		if (rn->rn_hostpath)
1630 			free(rn->rn_hostpath, M_TEMP);
1631 		freeprivate(rn);
1632 	}
1633 
1634 	return 0;
1635 }
1636 
1637 static int
1638 rump_vop_spec(void *v)
1639 {
1640 	struct vop_generic_args *ap = v;
1641 	int (**opvec)(void *);
1642 
1643 	switch (ap->a_desc->vdesc_offset) {
1644 	case VOP_ACCESS_DESCOFFSET:
1645 	case VOP_GETATTR_DESCOFFSET:
1646 	case VOP_SETATTR_DESCOFFSET:
1647 	case VOP_LOCK_DESCOFFSET:
1648 	case VOP_UNLOCK_DESCOFFSET:
1649 	case VOP_ISLOCKED_DESCOFFSET:
1650 	case VOP_INACTIVE_DESCOFFSET:
1651 	case VOP_RECLAIM_DESCOFFSET:
1652 		opvec = rump_vnodeop_p;
1653 		break;
1654 	default:
1655 		opvec = spec_vnodeop_p;
1656 		break;
1657 	}
1658 
1659 	return VOCALL(opvec, ap->a_desc->vdesc_offset, v);
1660 }
1661 
1662 static int
1663 rump_vop_advlock(void *v)
1664 {
1665 	struct vop_advlock_args /* {
1666 		const struct vnodeop_desc *a_desc;
1667 		struct vnode *a_vp;
1668 		void *a_id;
1669 		int a_op;
1670 		struct flock *a_fl;
1671 		int a_flags;
1672 	} */ *ap = v;
1673 	struct vnode *vp = ap->a_vp;
1674 	struct rumpfs_node *rn = vp->v_data;
1675 
1676 	return lf_advlock(ap, &rn->rn_lockf, vp->v_size);
1677 }
1678 
1679 /*
1680  * Begin vfs-level stuff
1681  */
1682 
1683 VFS_PROTOS(rumpfs);
1684 struct vfsops rumpfs_vfsops = {
1685 	.vfs_name =		MOUNT_RUMPFS,
1686 	.vfs_min_mount_data = 	0,
1687 	.vfs_mount =		rumpfs_mount,
1688 	.vfs_start =		(void *)nullop,
1689 	.vfs_unmount = 		rumpfs_unmount,
1690 	.vfs_root =		rumpfs_root,
1691 	.vfs_quotactl =		(void *)eopnotsupp,
1692 	.vfs_statvfs =		genfs_statvfs,
1693 	.vfs_sync =		(void *)nullop,
1694 	.vfs_vget =		rumpfs_vget,
1695 	.vfs_loadvnode =	rumpfs_loadvnode,
1696 	.vfs_fhtovp =		(void *)eopnotsupp,
1697 	.vfs_vptofh =		(void *)eopnotsupp,
1698 	.vfs_init =		rumpfs_init,
1699 	.vfs_reinit =		NULL,
1700 	.vfs_done =		rumpfs_done,
1701 	.vfs_mountroot =	rumpfs_mountroot,
1702 	.vfs_snapshot =		(void *)eopnotsupp,
1703 	.vfs_extattrctl =	(void *)eopnotsupp,
1704 	.vfs_suspendctl =	(void *)eopnotsupp,
1705 	.vfs_renamelock_enter =	genfs_renamelock_enter,
1706 	.vfs_renamelock_exit =	genfs_renamelock_exit,
1707 	.vfs_opv_descs =	rump_opv_descs,
1708 	/* vfs_refcount */
1709 	/* vfs_list */
1710 };
1711 
1712 static int
1713 rumpfs_mountfs(struct mount *mp)
1714 {
1715 	struct rumpfs_mount *rfsmp;
1716 	struct rumpfs_node *rn;
1717 	int error;
1718 
1719 	rfsmp = kmem_alloc(sizeof(*rfsmp), KM_SLEEP);
1720 
1721 	rn = makeprivate(VDIR, RUMPFS_DEFAULTMODE, NODEV, DEV_BSIZE, false);
1722 	rn->rn_parent = rn;
1723 	if ((error = vcache_get(mp, &rn, sizeof(rn), &rfsmp->rfsmp_rvp))
1724 	    != 0) {
1725 		freeprivate(rn);
1726 		kmem_free(rfsmp, sizeof(*rfsmp));
1727 		return error;
1728 	}
1729 
1730 	rfsmp->rfsmp_rvp->v_vflag |= VV_ROOT;
1731 
1732 	mp->mnt_data = rfsmp;
1733 	mp->mnt_stat.f_namemax = RUMPFS_MAXNAMLEN;
1734 	mp->mnt_stat.f_iosize = 512;
1735 	mp->mnt_flag |= MNT_LOCAL;
1736 	mp->mnt_iflag |= IMNT_MPSAFE | IMNT_CAN_RWTORO;
1737 	mp->mnt_fs_bshift = DEV_BSHIFT;
1738 	vfs_getnewfsid(mp);
1739 
1740 	return 0;
1741 }
1742 
1743 int
1744 rumpfs_mount(struct mount *mp, const char *mntpath, void *arg, size_t *alen)
1745 {
1746 	int error;
1747 
1748 	if (mp->mnt_flag & MNT_UPDATE) {
1749 		return 0;
1750 	}
1751 
1752 	error = set_statvfs_info(mntpath, UIO_USERSPACE, "rumpfs", UIO_SYSSPACE,
1753 	    mp->mnt_op->vfs_name, mp, curlwp);
1754 	if (error)
1755 		return error;
1756 
1757 	return rumpfs_mountfs(mp);
1758 }
1759 
1760 int
1761 rumpfs_unmount(struct mount *mp, int mntflags)
1762 {
1763 	struct rumpfs_mount *rfsmp = mp->mnt_data;
1764 	int flags = 0, error;
1765 
1766 	if (panicstr || mntflags & MNT_FORCE)
1767 		flags |= FORCECLOSE;
1768 
1769 	if (rfsmp->rfsmp_rvp->v_usecount > 1 && (flags & FORCECLOSE) == 0)
1770 		return EBUSY;
1771 
1772 	if ((error = vflush(mp, rfsmp->rfsmp_rvp, flags)) != 0)
1773 		return error;
1774 	vgone(rfsmp->rfsmp_rvp);
1775 
1776 	kmem_free(rfsmp, sizeof(*rfsmp));
1777 
1778 	return 0;
1779 }
1780 
1781 int
1782 rumpfs_root(struct mount *mp, struct vnode **vpp)
1783 {
1784 	struct rumpfs_mount *rfsmp = mp->mnt_data;
1785 
1786 	vref(rfsmp->rfsmp_rvp);
1787 	vn_lock(rfsmp->rfsmp_rvp, LK_EXCLUSIVE | LK_RETRY);
1788 	*vpp = rfsmp->rfsmp_rvp;
1789 	return 0;
1790 }
1791 
1792 int
1793 rumpfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
1794 {
1795 
1796 	return EOPNOTSUPP;
1797 }
1798 
1799 int
1800 rumpfs_loadvnode(struct mount *mp, struct vnode *vp,
1801     const void *key, size_t key_len, const void **new_key)
1802 {
1803 	struct rumpfs_node *rn;
1804 	struct vattr *va;
1805 
1806 	KASSERT(!mutex_owned(&reclock));
1807 
1808 	KASSERT(key_len == sizeof(rn));
1809 	memcpy(&rn, key, key_len);
1810 
1811 	va = &rn->rn_va;
1812 
1813 	vp->v_tag = VT_RUMP;
1814 	vp->v_type = va->va_type;
1815 	switch (vp->v_type) {
1816 	case VCHR:
1817 	case VBLK:
1818 		vp->v_op = rump_specop_p;
1819 		spec_node_init(vp, va->va_rdev);
1820 		break;
1821 	default:
1822 		vp->v_op = rump_vnodeop_p;
1823 		break;
1824 	}
1825 	vp->v_size = vp->v_writesize = va->va_size;
1826 	vp->v_data = rn;
1827 
1828 	genfs_node_init(vp, &rumpfs_genfsops);
1829 	mutex_enter(&reclock);
1830 	rn->rn_vp = vp;
1831 	mutex_exit(&reclock);
1832 
1833 	*new_key = &vp->v_data;
1834 
1835 	return 0;
1836 }
1837 
1838 void
1839 rumpfs_init()
1840 {
1841 	extern rump_etfs_register_withsize_fn rump__etfs_register;
1842 	extern rump_etfs_remove_fn rump__etfs_remove;
1843 	extern struct rump_boot_etfs *ebstart;
1844 	struct rump_boot_etfs *eb;
1845 
1846 	CTASSERT(RUMP_ETFS_SIZE_ENDOFF == RUMPBLK_SIZENOTSET);
1847 
1848 	mutex_init(&reclock, MUTEX_DEFAULT, IPL_NONE);
1849 	mutex_init(&etfs_lock, MUTEX_DEFAULT, IPL_NONE);
1850 
1851 	rump__etfs_register = etfsregister;
1852 	rump__etfs_remove = etfsremove;
1853 
1854 	for (eb = ebstart; eb; eb = eb->_eb_next) {
1855 		eb->eb_status = etfsregister(eb->eb_key, eb->eb_hostpath,
1856 		    eb->eb_type, eb->eb_begin, eb->eb_size);
1857 	}
1858 }
1859 
1860 void
1861 rumpfs_done()
1862 {
1863 
1864 	mutex_destroy(&reclock);
1865 	mutex_destroy(&etfs_lock);
1866 }
1867 
1868 int
1869 rumpfs_mountroot()
1870 {
1871 	struct mount *mp;
1872 	int error;
1873 
1874 	if ((error = vfs_rootmountalloc(MOUNT_RUMPFS, "rootdev", &mp)) != 0) {
1875 		vrele(rootvp);
1876 		return error;
1877 	}
1878 
1879 	if ((error = rumpfs_mountfs(mp)) != 0)
1880 		panic("mounting rootfs failed: %d", error);
1881 
1882 	mountlist_append(mp);
1883 
1884 	error = set_statvfs_info("/", UIO_SYSSPACE, "rumpfs", UIO_SYSSPACE,
1885 	    mp->mnt_op->vfs_name, mp, curlwp);
1886 	if (error)
1887 		panic("set_statvfs_info failed for rootfs: %d", error);
1888 
1889 	mp->mnt_flag &= ~MNT_RDONLY;
1890 	vfs_unbusy(mp, false, NULL);
1891 
1892 	return 0;
1893 }
1894