xref: /netbsd-src/sys/rump/librump/rumpvfs/rumpfs.c (revision a5847cc334d9a7029f6352b847e9e8d71a0f9e0c)
1 /*	$NetBSD: rumpfs.c,v 1.103 2011/09/27 14:24:52 mbalmer Exp $	*/
2 
3 /*
4  * Copyright (c) 2009, 2010, 2011 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: rumpfs.c,v 1.103 2011/09/27 14:24:52 mbalmer Exp $");
30 
31 #include <sys/param.h>
32 #include <sys/atomic.h>
33 #include <sys/buf.h>
34 #include <sys/dirent.h>
35 #include <sys/errno.h>
36 #include <sys/filedesc.h>
37 #include <sys/fcntl.h>
38 #include <sys/kauth.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/lock.h>
44 #include <sys/lockf.h>
45 #include <sys/queue.h>
46 #include <sys/stat.h>
47 #include <sys/syscallargs.h>
48 #include <sys/vnode.h>
49 #include <sys/unistd.h>
50 
51 #include <miscfs/fifofs/fifo.h>
52 #include <miscfs/specfs/specdev.h>
53 #include <miscfs/genfs/genfs.h>
54 #include <miscfs/genfs/genfs_node.h>
55 
56 #include <uvm/uvm_extern.h>
57 
58 #include <rump/rumpuser.h>
59 
60 #include "rump_private.h"
61 #include "rump_vfs_private.h"
62 
63 static int rump_vop_lookup(void *);
64 static int rump_vop_getattr(void *);
65 static int rump_vop_setattr(void *);
66 static int rump_vop_mkdir(void *);
67 static int rump_vop_rmdir(void *);
68 static int rump_vop_remove(void *);
69 static int rump_vop_mknod(void *);
70 static int rump_vop_create(void *);
71 static int rump_vop_inactive(void *);
72 static int rump_vop_reclaim(void *);
73 static int rump_vop_success(void *);
74 static int rump_vop_readdir(void *);
75 static int rump_vop_spec(void *);
76 static int rump_vop_read(void *);
77 static int rump_vop_write(void *);
78 static int rump_vop_open(void *);
79 static int rump_vop_symlink(void *);
80 static int rump_vop_readlink(void *);
81 static int rump_vop_whiteout(void *);
82 static int rump_vop_pathconf(void *);
83 static int rump_vop_bmap(void *);
84 static int rump_vop_strategy(void *);
85 static int rump_vop_advlock(void *);
86 static int rump_vop_access(void *);
87 
88 int (**fifo_vnodeop_p)(void *);
89 const struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
90 	{ &vop_default_desc, vn_default_error },
91 	{ NULL, NULL }
92 };
93 const struct vnodeopv_desc fifo_vnodeop_opv_desc =
94 	{ &fifo_vnodeop_p, fifo_vnodeop_entries };
95 
96 int (**rump_vnodeop_p)(void *);
97 const struct vnodeopv_entry_desc rump_vnodeop_entries[] = {
98 	{ &vop_default_desc, vn_default_error },
99 	{ &vop_lookup_desc, rump_vop_lookup },
100 	{ &vop_getattr_desc, rump_vop_getattr },
101 	{ &vop_setattr_desc, rump_vop_setattr },
102 	{ &vop_mkdir_desc, rump_vop_mkdir },
103 	{ &vop_rmdir_desc, rump_vop_rmdir },
104 	{ &vop_remove_desc, rump_vop_remove },
105 	{ &vop_mknod_desc, rump_vop_mknod },
106 	{ &vop_create_desc, rump_vop_create },
107 	{ &vop_symlink_desc, rump_vop_symlink },
108 	{ &vop_readlink_desc, rump_vop_readlink },
109 	{ &vop_access_desc, rump_vop_access },
110 	{ &vop_readdir_desc, rump_vop_readdir },
111 	{ &vop_read_desc, rump_vop_read },
112 	{ &vop_write_desc, rump_vop_write },
113 	{ &vop_open_desc, rump_vop_open },
114 	{ &vop_close_desc, genfs_nullop },
115 	{ &vop_seek_desc, genfs_seek },
116 	{ &vop_getpages_desc, genfs_getpages },
117 	{ &vop_putpages_desc, genfs_putpages },
118 	{ &vop_whiteout_desc, rump_vop_whiteout },
119 	{ &vop_fsync_desc, rump_vop_success },
120 	{ &vop_lock_desc, genfs_lock },
121 	{ &vop_unlock_desc, genfs_unlock },
122 	{ &vop_islocked_desc, genfs_islocked },
123 	{ &vop_inactive_desc, rump_vop_inactive },
124 	{ &vop_reclaim_desc, rump_vop_reclaim },
125 	{ &vop_link_desc, genfs_eopnotsupp },
126 	{ &vop_pathconf_desc, rump_vop_pathconf },
127 	{ &vop_bmap_desc, rump_vop_bmap },
128 	{ &vop_strategy_desc, rump_vop_strategy },
129 	{ &vop_advlock_desc, rump_vop_advlock },
130 	{ NULL, NULL }
131 };
132 const struct vnodeopv_desc rump_vnodeop_opv_desc =
133 	{ &rump_vnodeop_p, rump_vnodeop_entries };
134 
135 int (**rump_specop_p)(void *);
136 const struct vnodeopv_entry_desc rump_specop_entries[] = {
137 	{ &vop_default_desc, rump_vop_spec },
138 	{ NULL, NULL }
139 };
140 const struct vnodeopv_desc rump_specop_opv_desc =
141 	{ &rump_specop_p, rump_specop_entries };
142 
143 const struct vnodeopv_desc * const rump_opv_descs[] = {
144 	&rump_vnodeop_opv_desc,
145 	&rump_specop_opv_desc,
146 	NULL
147 };
148 
149 #define RUMPFS_WHITEOUT ((void *)-1)
150 #define RDENT_ISWHITEOUT(rdp) (rdp->rd_node == RUMPFS_WHITEOUT)
151 struct rumpfs_dent {
152 	char *rd_name;
153 	int rd_namelen;
154 	struct rumpfs_node *rd_node;
155 
156 	LIST_ENTRY(rumpfs_dent) rd_entries;
157 };
158 
159 struct genfs_ops rumpfs_genfsops = {
160 	.gop_size = genfs_size,
161 	.gop_write = genfs_gop_write,
162 
163 	/* optional */
164 	.gop_alloc = NULL,
165 	.gop_markupdate = NULL,
166 };
167 
168 struct rumpfs_node {
169 	struct genfs_node rn_gn;
170 	struct vattr rn_va;
171 	struct vnode *rn_vp;
172 	char *rn_hostpath;
173 	int rn_flags;
174 	struct lockf *rn_lockf;
175 
176 	union {
177 		struct {		/* VREG */
178 			int readfd;
179 			int writefd;
180 			uint64_t offset;
181 		} reg;
182 		struct {
183 			void *data;
184 			size_t dlen;
185 		} reg_noet;
186 		struct {		/* VDIR */
187 			LIST_HEAD(, rumpfs_dent) dents;
188 			struct rumpfs_node *parent;
189 			int flags;
190 		} dir;
191 		struct {
192 			char *target;
193 			size_t len;
194 		} link;
195 	} rn_u;
196 };
197 #define rn_readfd	rn_u.reg.readfd
198 #define rn_writefd	rn_u.reg.writefd
199 #define rn_offset	rn_u.reg.offset
200 #define rn_data		rn_u.reg_noet.data
201 #define rn_dlen		rn_u.reg_noet.dlen
202 #define rn_dir		rn_u.dir.dents
203 #define rn_parent	rn_u.dir.parent
204 #define rn_linktarg	rn_u.link.target
205 #define rn_linklen	rn_u.link.len
206 
207 #define RUMPNODE_CANRECLAIM	0x01
208 #define RUMPNODE_DIR_ET		0x02
209 #define RUMPNODE_DIR_ETSUBS	0x04
210 #define RUMPNODE_ET_PHONE_HOST	0x10
211 
212 struct rumpfs_mount {
213 	struct vnode *rfsmp_rvp;
214 };
215 
216 #define INO_WHITEOUT 1
217 static int lastino = 2;
218 static kmutex_t reclock;
219 
220 static void freedir(struct rumpfs_node *, struct componentname *);
221 static struct rumpfs_node *makeprivate(enum vtype, dev_t, off_t, bool);
222 
223 /*
224  * Extra Terrestrial stuff.  We map a given key (pathname) to a file on
225  * the host FS.  ET phones home only from the root node of rumpfs.
226  *
227  * When an etfs node is removed, a vnode potentially behind it is not
228  * immediately recycled.
229  */
230 
231 struct etfs {
232 	char et_key[MAXPATHLEN];
233 	size_t et_keylen;
234 	bool et_prefixkey;
235 	bool et_removing;
236 	devminor_t et_blkmin;
237 
238 	LIST_ENTRY(etfs) et_entries;
239 
240 	struct rumpfs_node *et_rn;
241 };
242 static kmutex_t etfs_lock;
243 static LIST_HEAD(, etfs) etfs_list = LIST_HEAD_INITIALIZER(etfs_list);
244 
245 static enum vtype
246 ettype_to_vtype(enum rump_etfs_type et)
247 {
248 	enum vtype vt;
249 
250 	switch (et) {
251 	case RUMP_ETFS_REG:
252 		vt = VREG;
253 		break;
254 	case RUMP_ETFS_BLK:
255 		vt = VBLK;
256 		break;
257 	case RUMP_ETFS_CHR:
258 		vt = VCHR;
259 		break;
260 	case RUMP_ETFS_DIR:
261 		vt = VDIR;
262 		break;
263 	case RUMP_ETFS_DIR_SUBDIRS:
264 		vt = VDIR;
265 		break;
266 	default:
267 		panic("invalid et type: %d", et);
268 	}
269 
270 	return vt;
271 }
272 
273 static enum vtype
274 hft_to_vtype(int hft)
275 {
276 	enum vtype vt;
277 
278 	switch (hft) {
279 	case RUMPUSER_FT_OTHER:
280 		vt = VNON;
281 		break;
282 	case RUMPUSER_FT_DIR:
283 		vt = VDIR;
284 		break;
285 	case RUMPUSER_FT_REG:
286 		vt = VREG;
287 		break;
288 	case RUMPUSER_FT_BLK:
289 		vt = VBLK;
290 		break;
291 	case RUMPUSER_FT_CHR:
292 		vt = VCHR;
293 		break;
294 	default:
295 		vt = VNON;
296 		break;
297 	}
298 
299 	return vt;
300 }
301 
302 static bool
303 etfs_find(const char *key, struct etfs **etp, bool forceprefix)
304 {
305 	struct etfs *et;
306 	size_t keylen = strlen(key);
307 
308 	KASSERT(mutex_owned(&etfs_lock));
309 
310 	LIST_FOREACH(et, &etfs_list, et_entries) {
311 		if ((keylen == et->et_keylen || et->et_prefixkey || forceprefix)
312 		    && strncmp(key, et->et_key, et->et_keylen) == 0) {
313 			if (etp)
314 				*etp = et;
315 			return true;
316 		}
317 	}
318 
319 	return false;
320 }
321 
322 #define REGDIR(ftype) \
323     ((ftype) == RUMP_ETFS_DIR || (ftype) == RUMP_ETFS_DIR_SUBDIRS)
324 static int
325 doregister(const char *key, const char *hostpath,
326 	enum rump_etfs_type ftype, uint64_t begin, uint64_t size)
327 {
328 	char buf[9];
329 	struct etfs *et;
330 	struct rumpfs_node *rn;
331 	uint64_t fsize;
332 	dev_t rdev = NODEV;
333 	devminor_t dmin = -1;
334 	int hft, error;
335 
336 	if (key[0] != '/') {
337 		return EINVAL;
338 	}
339 	while (key[0] == '/') {
340 		key++;
341 	}
342 
343 	if (rumpuser_getfileinfo(hostpath, &fsize, &hft, &error))
344 		return error;
345 
346 	/* etfs directory requires a directory on the host */
347 	if (REGDIR(ftype)) {
348 		if (hft != RUMPUSER_FT_DIR)
349 			return ENOTDIR;
350 		if (begin != 0)
351 			return EISDIR;
352 		if (size != RUMP_ETFS_SIZE_ENDOFF)
353 			return EISDIR;
354 		size = fsize;
355 	} else {
356 		if (begin > fsize)
357 			return EINVAL;
358 		if (size == RUMP_ETFS_SIZE_ENDOFF)
359 			size = fsize - begin;
360 		if (begin + size > fsize)
361 			return EINVAL;
362 	}
363 
364 	if (ftype == RUMP_ETFS_BLK || ftype == RUMP_ETFS_CHR) {
365 		error = rumpblk_register(hostpath, &dmin, begin, size);
366 		if (error != 0) {
367 			return error;
368 		}
369 		rdev = makedev(RUMPBLK_DEVMAJOR, dmin);
370 	}
371 
372 	et = kmem_alloc(sizeof(*et), KM_SLEEP);
373 	strcpy(et->et_key, key);
374 	et->et_keylen = strlen(et->et_key);
375 	et->et_rn = rn = makeprivate(ettype_to_vtype(ftype), rdev, size, true);
376 	et->et_removing = false;
377 	et->et_blkmin = dmin;
378 
379 	rn->rn_flags |= RUMPNODE_ET_PHONE_HOST;
380 
381 	if (ftype == RUMP_ETFS_REG || REGDIR(ftype) || et->et_blkmin != -1) {
382 		size_t len = strlen(hostpath)+1;
383 
384 		rn->rn_hostpath = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
385 		memcpy(rn->rn_hostpath, hostpath, len);
386 		rn->rn_offset = begin;
387 	}
388 
389 	if (REGDIR(ftype)) {
390 		rn->rn_flags |= RUMPNODE_DIR_ET;
391 		et->et_prefixkey = true;
392 	} else {
393 		et->et_prefixkey = false;
394 	}
395 
396 	if (ftype == RUMP_ETFS_DIR_SUBDIRS)
397 		rn->rn_flags |= RUMPNODE_DIR_ETSUBS;
398 
399 	mutex_enter(&etfs_lock);
400 	if (etfs_find(key, NULL, REGDIR(ftype))) {
401 		mutex_exit(&etfs_lock);
402 		if (et->et_blkmin != -1)
403 			rumpblk_deregister(hostpath);
404 		if (et->et_rn->rn_hostpath != NULL)
405 			free(et->et_rn->rn_hostpath, M_TEMP);
406 		kmem_free(et->et_rn, sizeof(*et->et_rn));
407 		kmem_free(et, sizeof(*et));
408 		return EEXIST;
409 	}
410 	LIST_INSERT_HEAD(&etfs_list, et, et_entries);
411 	mutex_exit(&etfs_lock);
412 
413 	if (ftype == RUMP_ETFS_BLK) {
414 		format_bytes(buf, sizeof(buf), size);
415 		aprint_verbose("/%s: hostpath %s (%s)\n", key, hostpath, buf);
416 	}
417 
418 	return 0;
419 }
420 #undef REGDIR
421 
422 int
423 rump_etfs_register(const char *key, const char *hostpath,
424 	enum rump_etfs_type ftype)
425 {
426 
427 	return doregister(key, hostpath, ftype, 0, RUMP_ETFS_SIZE_ENDOFF);
428 }
429 
430 int
431 rump_etfs_register_withsize(const char *key, const char *hostpath,
432 	enum rump_etfs_type ftype, uint64_t begin, uint64_t size)
433 {
434 
435 	return doregister(key, hostpath, ftype, begin, size);
436 }
437 
438 /* remove etfs mapping.  caller's responsibility to make sure it's not in use */
439 int
440 rump_etfs_remove(const char *key)
441 {
442 	struct etfs *et;
443 	size_t keylen;
444 	int rv;
445 
446 	if (key[0] != '/') {
447 		return EINVAL;
448 	}
449 	while (key[0] == '/') {
450 		key++;
451 	}
452 
453 	keylen = strlen(key);
454 
455 	mutex_enter(&etfs_lock);
456 	LIST_FOREACH(et, &etfs_list, et_entries) {
457 		if (keylen == et->et_keylen && strcmp(et->et_key, key) == 0) {
458 			if (et->et_removing)
459 				et = NULL;
460 			else
461 				et->et_removing = true;
462 			break;
463 		}
464 	}
465 	mutex_exit(&etfs_lock);
466 	if (!et)
467 		return ENOENT;
468 
469 	/*
470 	 * ok, we know what we want to remove and have signalled there
471 	 * actually are men at work.  first, unregister from rumpblk
472 	 */
473 	if (et->et_blkmin != -1) {
474 		rv = rumpblk_deregister(et->et_rn->rn_hostpath);
475 	} else {
476 		rv = 0;
477 	}
478 	KASSERT(rv == 0);
479 
480 	/* then do the actual removal */
481 	mutex_enter(&etfs_lock);
482 	LIST_REMOVE(et, et_entries);
483 	mutex_exit(&etfs_lock);
484 
485 	/* node is unreachable, safe to nuke all device copies */
486 	if (et->et_blkmin != -1) {
487 		vdevgone(RUMPBLK_DEVMAJOR, et->et_blkmin, et->et_blkmin, VBLK);
488 	} else {
489 		struct vnode *vp;
490 
491 		mutex_enter(&reclock);
492 		if ((vp = et->et_rn->rn_vp) != NULL)
493 			mutex_enter(vp->v_interlock);
494 		mutex_exit(&reclock);
495 		if (vp && vget(vp, 0) == 0)
496 			vgone(vp);
497 	}
498 
499 	if (et->et_rn->rn_hostpath != NULL)
500 		free(et->et_rn->rn_hostpath, M_TEMP);
501 	kmem_free(et->et_rn, sizeof(*et->et_rn));
502 	kmem_free(et, sizeof(*et));
503 
504 	return 0;
505 }
506 
507 /*
508  * rumpfs
509  */
510 
511 static struct rumpfs_node *
512 makeprivate(enum vtype vt, dev_t rdev, off_t size, bool et)
513 {
514 	struct rumpfs_node *rn;
515 	struct vattr *va;
516 	struct timespec ts;
517 
518 	rn = kmem_zalloc(sizeof(*rn), KM_SLEEP);
519 
520 	switch (vt) {
521 	case VDIR:
522 		LIST_INIT(&rn->rn_dir);
523 		break;
524 	case VREG:
525 		if (et) {
526 			rn->rn_readfd = -1;
527 			rn->rn_writefd = -1;
528 		}
529 		break;
530 	default:
531 		break;
532 	}
533 
534 	nanotime(&ts);
535 
536 	va = &rn->rn_va;
537 	va->va_type = vt;
538 	va->va_mode = 0755;
539 	if (vt == VDIR)
540 		va->va_nlink = 2;
541 	else
542 		va->va_nlink = 1;
543 	va->va_uid = 0;
544 	va->va_gid = 0;
545 	va->va_fsid =
546 	va->va_fileid = atomic_inc_uint_nv(&lastino);
547 	va->va_size = size;
548 	va->va_blocksize = 512;
549 	va->va_atime = ts;
550 	va->va_mtime = ts;
551 	va->va_ctime = ts;
552 	va->va_birthtime = ts;
553 	va->va_gen = 0;
554 	va->va_flags = 0;
555 	va->va_rdev = rdev;
556 	va->va_bytes = 512;
557 	va->va_filerev = 0;
558 	va->va_vaflags = 0;
559 
560 	return rn;
561 }
562 
563 static int
564 makevnode(struct mount *mp, struct rumpfs_node *rn, struct vnode **vpp)
565 {
566 	struct vnode *vp;
567 	int (**vpops)(void *);
568 	struct vattr *va = &rn->rn_va;
569 	int rv;
570 
571 	KASSERT(!mutex_owned(&reclock));
572 
573 	if (va->va_type == VCHR || va->va_type == VBLK) {
574 		vpops = rump_specop_p;
575 	} else {
576 		vpops = rump_vnodeop_p;
577 	}
578 
579 	rv = getnewvnode(VT_RUMP, mp, vpops, NULL, &vp);
580 	if (rv)
581 		return rv;
582 
583 	vp->v_size = vp->v_writesize = va->va_size;
584 	vp->v_type = va->va_type;
585 
586 	if (vpops == rump_specop_p) {
587 		spec_node_init(vp, va->va_rdev);
588 	}
589 	vp->v_data = rn;
590 
591 	genfs_node_init(vp, &rumpfs_genfsops);
592 	vn_lock(vp, LK_RETRY | LK_EXCLUSIVE);
593 	mutex_enter(&reclock);
594 	rn->rn_vp = vp;
595 	mutex_exit(&reclock);
596 
597 	*vpp = vp;
598 
599 	return 0;
600 }
601 
602 
603 static void
604 makedir(struct rumpfs_node *rnd,
605 	struct componentname *cnp, struct rumpfs_node *rn)
606 {
607 	struct rumpfs_dent *rdent;
608 
609 	rdent = kmem_alloc(sizeof(*rdent), KM_SLEEP);
610 	rdent->rd_name = kmem_alloc(cnp->cn_namelen+1, KM_SLEEP);
611 	rdent->rd_node = rn;
612 	strlcpy(rdent->rd_name, cnp->cn_nameptr, cnp->cn_namelen+1);
613 	rdent->rd_namelen = strlen(rdent->rd_name);
614 
615 	if ((cnp->cn_flags & ISWHITEOUT) != 0) {
616 		KASSERT((cnp->cn_flags & DOWHITEOUT) == 0);
617 		freedir(rnd, cnp);
618 	}
619 	LIST_INSERT_HEAD(&rnd->rn_dir, rdent, rd_entries);
620 }
621 
622 static void
623 freedir(struct rumpfs_node *rnd, struct componentname *cnp)
624 {
625 	struct rumpfs_dent *rd = NULL;
626 
627 	LIST_FOREACH(rd, &rnd->rn_dir, rd_entries) {
628 		if (rd->rd_namelen == cnp->cn_namelen &&
629 		    strncmp(rd->rd_name, cnp->cn_nameptr,
630 		            cnp->cn_namelen) == 0)
631 			break;
632 	}
633 	if (rd == NULL)
634 		panic("could not find directory entry: %s", cnp->cn_nameptr);
635 
636 	if (cnp->cn_flags & DOWHITEOUT) {
637 		rd->rd_node = RUMPFS_WHITEOUT;
638 	} else {
639 		LIST_REMOVE(rd, rd_entries);
640 		kmem_free(rd->rd_name, rd->rd_namelen+1);
641 		kmem_free(rd, sizeof(*rd));
642 	}
643 }
644 
645 /*
646  * Simple lookup for rump file systems.
647  *
648  * uhm, this is twisted.  C F C C, hope of C C F C looming
649  */
650 static int
651 rump_vop_lookup(void *v)
652 {
653 	struct vop_lookup_args /* {
654 		struct vnode *a_dvp;
655 		struct vnode **a_vpp;
656 		struct componentname *a_cnp;
657 	}; */ *ap = v;
658 	struct componentname *cnp = ap->a_cnp;
659 	struct vnode *dvp = ap->a_dvp;
660 	struct vnode **vpp = ap->a_vpp;
661 	struct vnode *vp;
662 	struct rumpfs_node *rnd = dvp->v_data, *rn;
663 	struct rumpfs_dent *rd = NULL;
664 	struct etfs *et;
665 	bool dotdot = (cnp->cn_flags & ISDOTDOT) != 0;
666 	int rv = 0;
667 	const char *cp;
668 
669 	*vpp = NULL;
670 
671 	if ((cnp->cn_flags & ISLASTCN)
672 	    && (dvp->v_mount->mnt_flag & MNT_RDONLY)
673 	    && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
674 		return EROFS;
675 
676 	/* check for dot, return directly if the case */
677 	if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
678 		vref(dvp);
679 		*vpp = dvp;
680 		return 0;
681 	}
682 
683 	/* we don't do rename */
684 	if (!(((cnp->cn_flags & ISLASTCN) == 0) || (cnp->cn_nameiop != RENAME)))
685 		return EOPNOTSUPP;
686 
687 	/* check for etfs */
688 	if (dvp == rootvnode &&
689 	    (cnp->cn_nameiop == LOOKUP || cnp->cn_nameiop == CREATE)) {
690 		bool found;
691 		mutex_enter(&etfs_lock);
692 		found = etfs_find(cnp->cn_nameptr, &et, false);
693 		mutex_exit(&etfs_lock);
694 
695 		if (found) {
696 			rn = et->et_rn;
697 			cnp->cn_consume += et->et_keylen - cnp->cn_namelen;
698 			/*
699 			 * consume trailing slashes if any and clear
700 			 * REQUIREDIR if we consumed the full path.
701 			 */
702 			cp = &cnp->cn_nameptr[cnp->cn_namelen];
703 			cp += cnp->cn_consume;
704 			KASSERT(*cp == '\0' || *cp == '/');
705 			if (*cp == '\0' && rn->rn_va.va_type != VDIR)
706 				cnp->cn_flags &= ~REQUIREDIR;
707 			while (*cp++ == '/')
708 				cnp->cn_consume++;
709 			goto getvnode;
710 		}
711 	}
712 
713 	if (rnd->rn_flags & RUMPNODE_DIR_ET) {
714 		uint64_t fsize;
715 		char *newpath;
716 		size_t newpathlen;
717 		int hft, error;
718 
719 		if (dotdot)
720 			return EOPNOTSUPP;
721 
722 		newpathlen = strlen(rnd->rn_hostpath) + 1 + cnp->cn_namelen + 1;
723 		newpath = malloc(newpathlen, M_TEMP, M_WAITOK);
724 
725 		strlcpy(newpath, rnd->rn_hostpath, newpathlen);
726 		strlcat(newpath, "/", newpathlen);
727 		strlcat(newpath, cnp->cn_nameptr, newpathlen);
728 
729 		if (rumpuser_getfileinfo(newpath, &fsize, &hft, &error)) {
730 			free(newpath, M_TEMP);
731 			return error;
732 		}
733 
734 		/* allow only dirs and regular files */
735 		if (hft != RUMPUSER_FT_REG && hft != RUMPUSER_FT_DIR) {
736 			free(newpath, M_TEMP);
737 			return ENOENT;
738 		}
739 
740 		rn = makeprivate(hft_to_vtype(hft), NODEV, fsize, true);
741 		rn->rn_flags |= RUMPNODE_CANRECLAIM;
742 		if (rnd->rn_flags & RUMPNODE_DIR_ETSUBS) {
743 			rn->rn_flags |= RUMPNODE_DIR_ET | RUMPNODE_DIR_ETSUBS;
744 			rn->rn_flags |= RUMPNODE_ET_PHONE_HOST;
745 		}
746 		rn->rn_hostpath = newpath;
747 
748 		goto getvnode;
749 	} else {
750 		if (dotdot) {
751 			if ((rn = rnd->rn_parent) != NULL)
752 				goto getvnode;
753 		} else {
754 			LIST_FOREACH(rd, &rnd->rn_dir, rd_entries) {
755 				if (rd->rd_namelen == cnp->cn_namelen &&
756 				    strncmp(rd->rd_name, cnp->cn_nameptr,
757 				      cnp->cn_namelen) == 0)
758 					break;
759 			}
760 		}
761 	}
762 
763 	if (!rd && ((cnp->cn_flags & ISLASTCN) == 0||cnp->cn_nameiop != CREATE))
764 		return ENOENT;
765 
766 	if (!rd && (cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE) {
767 		if (dvp->v_mount->mnt_flag & MNT_RDONLY)
768 			return EROFS;
769 		return EJUSTRETURN;
770 	}
771 
772 	if (RDENT_ISWHITEOUT(rd)) {
773 		cnp->cn_flags |= ISWHITEOUT;
774 		if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE)
775 			return EJUSTRETURN;
776 		return ENOENT;
777 	}
778 
779 	rn = rd->rd_node;
780 
781  getvnode:
782 	KASSERT(rn);
783 	if (dotdot)
784 		VOP_UNLOCK(dvp);
785 	mutex_enter(&reclock);
786 	if ((vp = rn->rn_vp)) {
787 		mutex_enter(vp->v_interlock);
788 		mutex_exit(&reclock);
789 		if (vget(vp, LK_EXCLUSIVE)) {
790 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
791 			goto getvnode;
792 		}
793 		*vpp = vp;
794 	} else {
795 		mutex_exit(&reclock);
796 		rv = makevnode(dvp->v_mount, rn, vpp);
797 	}
798 	if (dotdot)
799 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
800 
801 	return rv;
802 }
803 
804 int
805 rump_vop_access(void *v)
806 {
807 	struct vop_access_args /* {
808 		const struct vnodeop_desc *a_desc;
809 		struct vnode *a_vp;
810 		int a_mode;
811 		kauth_cred_t a_cred;
812 	} */ *ap = v;
813 	struct vnode *vp = ap->a_vp;
814 	int mode = ap->a_mode;
815 
816 	if (mode & VWRITE) {
817 		switch (vp->v_type) {
818 		case VDIR:
819 		case VLNK:
820 		case VREG:
821 			if ((vp->v_mount->mnt_flag & MNT_RDONLY))
822 				return EROFS;
823 			break;
824 		default:
825 			break;
826 		}
827 	}
828 
829 	return 0;
830 }
831 
832 static int
833 rump_vop_getattr(void *v)
834 {
835 	struct vop_getattr_args /* {
836 		struct vnode *a_vp;
837 		struct vattr *a_vap;
838 		kauth_cred_t a_cred;
839 	} */ *ap = v;
840 	struct vnode *vp = ap->a_vp;
841 	struct rumpfs_node *rn = vp->v_data;
842 	struct vattr *vap = ap->a_vap;
843 
844 	memcpy(vap, &rn->rn_va, sizeof(struct vattr));
845 	vap->va_size = vp->v_size;
846 	return 0;
847 }
848 
849 static int
850 rump_vop_setattr(void *v)
851 {
852 	struct vop_getattr_args /* {
853 		struct vnode *a_vp;
854 		struct vattr *a_vap;
855 		kauth_cred_t a_cred;
856 	} */ *ap = v;
857 	struct vnode *vp = ap->a_vp;
858 	struct vattr *vap = ap->a_vap;
859 	struct rumpfs_node *rn = vp->v_data;
860 
861 #define SETIFVAL(a,t) if (vap->a != (t)VNOVAL) rn->rn_va.a = vap->a
862 	SETIFVAL(va_mode, mode_t);
863 	SETIFVAL(va_uid, uid_t);
864 	SETIFVAL(va_gid, gid_t);
865 	SETIFVAL(va_atime.tv_sec, time_t);
866 	SETIFVAL(va_ctime.tv_sec, time_t);
867 	SETIFVAL(va_mtime.tv_sec, time_t);
868 	SETIFVAL(va_birthtime.tv_sec, time_t);
869 	SETIFVAL(va_atime.tv_nsec, long);
870 	SETIFVAL(va_ctime.tv_nsec, long);
871 	SETIFVAL(va_mtime.tv_nsec, long);
872 	SETIFVAL(va_birthtime.tv_nsec, long);
873 	SETIFVAL(va_flags, u_long);
874 #undef  SETIFVAL
875 
876 	if (vp->v_type == VREG &&
877 	    vap->va_size != VSIZENOTSET &&
878 	    vap->va_size != rn->rn_dlen) {
879 		void *newdata;
880 		size_t copylen, newlen;
881 
882 		newlen = vap->va_size;
883 		newdata = rump_hypermalloc(newlen, 0, true, "rumpfs");
884 
885 		copylen = MIN(rn->rn_dlen, newlen);
886 		memset(newdata, 0, newlen);
887 		memcpy(newdata, rn->rn_data, copylen);
888 		rump_hyperfree(rn->rn_data, rn->rn_dlen);
889 
890 		rn->rn_data = newdata;
891 		rn->rn_dlen = newlen;
892 		uvm_vnp_setsize(vp, newlen);
893 	}
894 	return 0;
895 }
896 
897 static int
898 rump_vop_mkdir(void *v)
899 {
900 	struct vop_mkdir_args /* {
901 		struct vnode *a_dvp;
902 		struct vnode **a_vpp;
903 		struct componentname *a_cnp;
904 		struct vattr *a_vap;
905 	}; */ *ap = v;
906 	struct vnode *dvp = ap->a_dvp;
907 	struct vnode **vpp = ap->a_vpp;
908 	struct componentname *cnp = ap->a_cnp;
909 	struct rumpfs_node *rnd = dvp->v_data, *rn;
910 	int rv = 0;
911 
912 	rn = makeprivate(VDIR, NODEV, DEV_BSIZE, false);
913 	if ((cnp->cn_flags & ISWHITEOUT) != 0)
914 		rn->rn_va.va_flags |= UF_OPAQUE;
915 	rn->rn_parent = rnd;
916 	rv = makevnode(dvp->v_mount, rn, vpp);
917 	if (rv)
918 		goto out;
919 
920 	makedir(rnd, cnp, rn);
921 
922  out:
923 	vput(dvp);
924 	return rv;
925 }
926 
927 static int
928 rump_vop_rmdir(void *v)
929 {
930         struct vop_rmdir_args /* {
931                 struct vnode *a_dvp;
932                 struct vnode *a_vp;
933                 struct componentname *a_cnp;
934         }; */ *ap = v;
935 	struct vnode *dvp = ap->a_dvp;
936 	struct vnode *vp = ap->a_vp;
937 	struct componentname *cnp = ap->a_cnp;
938 	struct rumpfs_node *rnd = dvp->v_data;
939 	struct rumpfs_node *rn = vp->v_data;
940 	struct rumpfs_dent *rd;
941 	int rv = 0;
942 
943 	LIST_FOREACH(rd, &rn->rn_dir, rd_entries) {
944 		if (rd->rd_node != RUMPFS_WHITEOUT) {
945 			rv = ENOTEMPTY;
946 			goto out;
947 		}
948 	}
949 	while ((rd = LIST_FIRST(&rn->rn_dir)) != NULL) {
950 		KASSERT(rd->rd_node == RUMPFS_WHITEOUT);
951 		LIST_REMOVE(rd, rd_entries);
952 		kmem_free(rd->rd_name, rd->rd_namelen+1);
953 		kmem_free(rd, sizeof(*rd));
954 	}
955 
956 	freedir(rnd, cnp);
957 	rn->rn_flags |= RUMPNODE_CANRECLAIM;
958 	rn->rn_parent = NULL;
959 
960 out:
961 	vput(dvp);
962 	vput(vp);
963 
964 	return rv;
965 }
966 
967 static int
968 rump_vop_remove(void *v)
969 {
970         struct vop_rmdir_args /* {
971                 struct vnode *a_dvp;
972                 struct vnode *a_vp;
973                 struct componentname *a_cnp;
974         }; */ *ap = v;
975 	struct vnode *dvp = ap->a_dvp;
976 	struct vnode *vp = ap->a_vp;
977 	struct componentname *cnp = ap->a_cnp;
978 	struct rumpfs_node *rnd = dvp->v_data;
979 	struct rumpfs_node *rn = vp->v_data;
980 	int rv = 0;
981 
982 	if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST)
983 		return EOPNOTSUPP;
984 
985 	if (vp->v_type == VREG) {
986 		rump_hyperfree(rn->rn_data, rn->rn_dlen);
987 	}
988 
989 	freedir(rnd, cnp);
990 	rn->rn_flags |= RUMPNODE_CANRECLAIM;
991 
992 	vput(dvp);
993 	vput(vp);
994 
995 	return rv;
996 }
997 
998 static int
999 rump_vop_mknod(void *v)
1000 {
1001 	struct vop_mknod_args /* {
1002 		struct vnode *a_dvp;
1003 		struct vnode **a_vpp;
1004 		struct componentname *a_cnp;
1005 		struct vattr *a_vap;
1006 	}; */ *ap = v;
1007 	struct vnode *dvp = ap->a_dvp;
1008 	struct vnode **vpp = ap->a_vpp;
1009 	struct componentname *cnp = ap->a_cnp;
1010 	struct vattr *va = ap->a_vap;
1011 	struct rumpfs_node *rnd = dvp->v_data, *rn;
1012 	int rv;
1013 
1014 	rn = makeprivate(va->va_type, va->va_rdev, DEV_BSIZE, false);
1015 	if ((cnp->cn_flags & ISWHITEOUT) != 0)
1016 		rn->rn_va.va_flags |= UF_OPAQUE;
1017 	rv = makevnode(dvp->v_mount, rn, vpp);
1018 	if (rv)
1019 		goto out;
1020 
1021 	makedir(rnd, cnp, rn);
1022 
1023  out:
1024 	vput(dvp);
1025 	return rv;
1026 }
1027 
1028 static int
1029 rump_vop_create(void *v)
1030 {
1031 	struct vop_create_args /* {
1032 		struct vnode *a_dvp;
1033 		struct vnode **a_vpp;
1034 		struct componentname *a_cnp;
1035 		struct vattr *a_vap;
1036 	}; */ *ap = v;
1037 	struct vnode *dvp = ap->a_dvp;
1038 	struct vnode **vpp = ap->a_vpp;
1039 	struct componentname *cnp = ap->a_cnp;
1040 	struct vattr *va = ap->a_vap;
1041 	struct rumpfs_node *rnd = dvp->v_data, *rn;
1042 	off_t newsize;
1043 	int rv;
1044 
1045 	newsize = va->va_type == VSOCK ? DEV_BSIZE : 0;
1046 	rn = makeprivate(va->va_type, NODEV, newsize, false);
1047 	if ((cnp->cn_flags & ISWHITEOUT) != 0)
1048 		rn->rn_va.va_flags |= UF_OPAQUE;
1049 	rv = makevnode(dvp->v_mount, rn, vpp);
1050 	if (rv)
1051 		goto out;
1052 
1053 	makedir(rnd, cnp, rn);
1054 
1055  out:
1056 	vput(dvp);
1057 	return rv;
1058 }
1059 
1060 static int
1061 rump_vop_symlink(void *v)
1062 {
1063 	struct vop_symlink_args /* {
1064 		struct vnode *a_dvp;
1065 		struct vnode **a_vpp;
1066 		struct componentname *a_cnp;
1067 		struct vattr *a_vap;
1068 		char *a_target;
1069 	}; */ *ap = v;
1070 	struct vnode *dvp = ap->a_dvp;
1071 	struct vnode **vpp = ap->a_vpp;
1072 	struct componentname *cnp = ap->a_cnp;
1073 	struct rumpfs_node *rnd = dvp->v_data, *rn;
1074 	const char *target = ap->a_target;
1075 	size_t linklen;
1076 	int rv;
1077 
1078 	linklen = strlen(target);
1079 	KASSERT(linklen < MAXPATHLEN);
1080 	rn = makeprivate(VLNK, NODEV, linklen, false);
1081 	if ((cnp->cn_flags & ISWHITEOUT) != 0)
1082 		rn->rn_va.va_flags |= UF_OPAQUE;
1083 	rv = makevnode(dvp->v_mount, rn, vpp);
1084 	if (rv)
1085 		goto out;
1086 
1087 	makedir(rnd, cnp, rn);
1088 
1089 	KASSERT(linklen < MAXPATHLEN);
1090 	rn->rn_linktarg = PNBUF_GET();
1091 	rn->rn_linklen = linklen;
1092 	strcpy(rn->rn_linktarg, target);
1093 
1094  out:
1095 	vput(dvp);
1096 	return rv;
1097 }
1098 
1099 static int
1100 rump_vop_readlink(void *v)
1101 {
1102 	struct vop_readlink_args /* {
1103 		struct vnode *a_vp;
1104 		struct uio *a_uio;
1105 		kauth_cred_t a_cred;
1106 	}; */ *ap = v;
1107 	struct vnode *vp = ap->a_vp;
1108 	struct rumpfs_node *rn = vp->v_data;
1109 	struct uio *uio = ap->a_uio;
1110 
1111 	return uiomove(rn->rn_linktarg, rn->rn_linklen, uio);
1112 }
1113 
1114 static int
1115 rump_vop_whiteout(void *v)
1116 {
1117 	struct vop_whiteout_args /* {
1118 		struct vnode            *a_dvp;
1119 		struct componentname    *a_cnp;
1120 		int                     a_flags;
1121 	} */ *ap = v;
1122 	struct vnode *dvp = ap->a_dvp;
1123 	struct rumpfs_node *rnd = dvp->v_data;
1124 	struct componentname *cnp = ap->a_cnp;
1125 	int flags = ap->a_flags;
1126 
1127 	switch (flags) {
1128 	case LOOKUP:
1129 		break;
1130 	case CREATE:
1131 		makedir(rnd, cnp, RUMPFS_WHITEOUT);
1132 		break;
1133 	case DELETE:
1134 		cnp->cn_flags &= ~DOWHITEOUT; /* cargo culting never fails ? */
1135 		freedir(rnd, cnp);
1136 		break;
1137 	default:
1138 		panic("unknown whiteout op %d", flags);
1139 	}
1140 
1141 	return 0;
1142 }
1143 
1144 static int
1145 rump_vop_open(void *v)
1146 {
1147 	struct vop_open_args /* {
1148 		struct vnode *a_vp;
1149 		int a_mode;
1150 		kauth_cred_t a_cred;
1151 	} */ *ap = v;
1152 	struct vnode *vp = ap->a_vp;
1153 	struct rumpfs_node *rn = vp->v_data;
1154 	int mode = ap->a_mode;
1155 	int error = EINVAL;
1156 
1157 	if (vp->v_type != VREG || (rn->rn_flags & RUMPNODE_ET_PHONE_HOST) == 0)
1158 		return 0;
1159 
1160 	if (mode & FREAD) {
1161 		if (rn->rn_readfd != -1)
1162 			return 0;
1163 		rn->rn_readfd = rumpuser_open(rn->rn_hostpath,
1164 		    O_RDONLY, &error);
1165 	}
1166 
1167 	if (mode & FWRITE) {
1168 		if (rn->rn_writefd != -1)
1169 			return 0;
1170 		rn->rn_writefd = rumpuser_open(rn->rn_hostpath,
1171 		    O_WRONLY, &error);
1172 	}
1173 
1174 	return error;
1175 }
1176 
1177 /* simple readdir.  event omits dotstuff and periods */
1178 static int
1179 rump_vop_readdir(void *v)
1180 {
1181 	struct vop_readdir_args /* {
1182 		struct vnode *a_vp;
1183 		struct uio *a_uio;
1184 		kauth_cred_t a_cred;
1185 		int *a_eofflag;
1186 		off_t **a_cookies;
1187 		int *a_ncookies;
1188 	} */ *ap = v;
1189 	struct vnode *vp = ap->a_vp;
1190 	struct uio *uio = ap->a_uio;
1191 	struct rumpfs_node *rnd = vp->v_data;
1192 	struct rumpfs_dent *rdent;
1193 	unsigned i;
1194 	int rv = 0;
1195 
1196 	/* seek to current entry */
1197 	for (i = 0, rdent = LIST_FIRST(&rnd->rn_dir);
1198 	    (i < uio->uio_offset) && rdent;
1199 	    i++, rdent = LIST_NEXT(rdent, rd_entries))
1200 		continue;
1201 	if (!rdent)
1202 		goto out;
1203 
1204 	/* copy entries */
1205 	for (; rdent && uio->uio_resid > 0;
1206 	    rdent = LIST_NEXT(rdent, rd_entries), i++) {
1207 		struct dirent dent;
1208 
1209 		strlcpy(dent.d_name, rdent->rd_name, sizeof(dent.d_name));
1210 		dent.d_namlen = strlen(dent.d_name);
1211 		dent.d_reclen = _DIRENT_RECLEN(&dent, dent.d_namlen);
1212 
1213 		if (__predict_false(RDENT_ISWHITEOUT(rdent))) {
1214 			dent.d_fileno = INO_WHITEOUT;
1215 			dent.d_type = DT_WHT;
1216 		} else {
1217 			dent.d_fileno = rdent->rd_node->rn_va.va_fileid;
1218 			dent.d_type = vtype2dt(rdent->rd_node->rn_va.va_type);
1219 		}
1220 
1221 		if (uio->uio_resid < dent.d_reclen) {
1222 			i--;
1223 			break;
1224 		}
1225 
1226 		rv = uiomove(&dent, dent.d_reclen, uio);
1227 		if (rv) {
1228 			i--;
1229 			break;
1230 		}
1231 	}
1232 
1233  out:
1234 	if (ap->a_cookies) {
1235 		*ap->a_ncookies = 0;
1236 		*ap->a_cookies = NULL;
1237 	}
1238 	if (rdent)
1239 		*ap->a_eofflag = 0;
1240 	else
1241 		*ap->a_eofflag = 1;
1242 	uio->uio_offset = i;
1243 
1244 	return rv;
1245 }
1246 
1247 static int
1248 etread(struct rumpfs_node *rn, struct uio *uio)
1249 {
1250 	uint8_t *buf;
1251 	size_t bufsize;
1252 	ssize_t n;
1253 	int error = 0;
1254 
1255 	bufsize = uio->uio_resid;
1256 	if (bufsize == 0)
1257 		return 0;
1258 	buf = kmem_alloc(bufsize, KM_SLEEP);
1259 	if ((n = rumpuser_pread(rn->rn_readfd, buf, bufsize,
1260 	    uio->uio_offset + rn->rn_offset, &error)) == -1)
1261 		goto out;
1262 	KASSERT(n <= bufsize);
1263 	error = uiomove(buf, n, uio);
1264 
1265  out:
1266 	kmem_free(buf, bufsize);
1267 	return error;
1268 
1269 }
1270 
1271 static int
1272 rump_vop_read(void *v)
1273 {
1274 	struct vop_read_args /* {
1275 		struct vnode *a_vp;
1276 		struct uio *a_uio;
1277 		int ioflags a_ioflag;
1278 		kauth_cred_t a_cred;
1279 	}; */ *ap = v;
1280 	struct vnode *vp = ap->a_vp;
1281 	struct rumpfs_node *rn = vp->v_data;
1282 	struct uio *uio = ap->a_uio;
1283 	const int advice = IO_ADV_DECODE(ap->a_ioflag);
1284 	off_t chunk;
1285 	int error = 0;
1286 
1287 	/* et op? */
1288 	if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST)
1289 		return etread(rn, uio);
1290 
1291 	/* otherwise, it's off to ubc with us */
1292 	while (uio->uio_resid > 0) {
1293 		chunk = MIN(uio->uio_resid, (off_t)rn->rn_dlen-uio->uio_offset);
1294 		if (chunk == 0)
1295 			break;
1296 		error = ubc_uiomove(&vp->v_uobj, uio, chunk, advice,
1297 		    UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
1298 		if (error)
1299 			break;
1300 	}
1301 
1302 	return error;
1303 }
1304 
1305 static int
1306 etwrite(struct rumpfs_node *rn, struct uio *uio)
1307 {
1308 	uint8_t *buf;
1309 	size_t bufsize;
1310 	ssize_t n;
1311 	int error = 0;
1312 
1313 	bufsize = uio->uio_resid;
1314 	if (bufsize == 0)
1315 		return 0;
1316 	buf = kmem_alloc(bufsize, KM_SLEEP);
1317 	error = uiomove(buf, bufsize, uio);
1318 	if (error)
1319 		goto out;
1320 	KASSERT(uio->uio_resid == 0);
1321 	n = rumpuser_pwrite(rn->rn_writefd, buf, bufsize,
1322 	    (uio->uio_offset-bufsize) + rn->rn_offset, &error);
1323 	if (n >= 0) {
1324 		KASSERT(n <= bufsize);
1325 		uio->uio_resid = bufsize - n;
1326 	}
1327 
1328  out:
1329 	kmem_free(buf, bufsize);
1330 	return error;
1331 }
1332 
1333 static int
1334 rump_vop_write(void *v)
1335 {
1336 	struct vop_read_args /* {
1337 		struct vnode *a_vp;
1338 		struct uio *a_uio;
1339 		int ioflags a_ioflag;
1340 		kauth_cred_t a_cred;
1341 	}; */ *ap = v;
1342 	struct vnode *vp = ap->a_vp;
1343 	struct rumpfs_node *rn = vp->v_data;
1344 	struct uio *uio = ap->a_uio;
1345 	const int advice = IO_ADV_DECODE(ap->a_ioflag);
1346 	void *olddata;
1347 	size_t oldlen, newlen;
1348 	off_t chunk;
1349 	int error = 0;
1350 	bool allocd = false;
1351 
1352 	if (ap->a_ioflag & IO_APPEND)
1353 		uio->uio_offset = vp->v_size;
1354 
1355 	/* consult et? */
1356 	if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST)
1357 		return etwrite(rn, uio);
1358 
1359 	/*
1360 	 * Otherwise, it's a case of ubcmove.
1361 	 */
1362 
1363 	/*
1364 	 * First, make sure we have enough storage.
1365 	 *
1366 	 * No, you don't need to tell me it's not very efficient.
1367 	 * No, it doesn't really support sparse files, just fakes it.
1368 	 */
1369 	newlen = uio->uio_offset + uio->uio_resid;
1370 	oldlen = 0; /* XXXgcc */
1371 	olddata = NULL;
1372 	if (rn->rn_dlen < newlen) {
1373 		oldlen = rn->rn_dlen;
1374 		olddata = rn->rn_data;
1375 
1376 		rn->rn_data = rump_hypermalloc(newlen, 0, true, "rumpfs");
1377 		rn->rn_dlen = newlen;
1378 		memset(rn->rn_data, 0, newlen);
1379 		memcpy(rn->rn_data, olddata, oldlen);
1380 		allocd = true;
1381 		uvm_vnp_setsize(vp, newlen);
1382 	}
1383 
1384 	/* ok, we have enough stooorage.  write */
1385 	while (uio->uio_resid > 0) {
1386 		chunk = MIN(uio->uio_resid, (off_t)rn->rn_dlen-uio->uio_offset);
1387 		if (chunk == 0)
1388 			break;
1389 		error = ubc_uiomove(&vp->v_uobj, uio, chunk, advice,
1390 		    UBC_WRITE | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
1391 		if (error)
1392 			break;
1393 	}
1394 
1395 	if (allocd) {
1396 		if (error) {
1397 			rump_hyperfree(rn->rn_data, newlen);
1398 			rn->rn_data = olddata;
1399 			rn->rn_dlen = oldlen;
1400 			uvm_vnp_setsize(vp, oldlen);
1401 		} else {
1402 			rump_hyperfree(olddata, oldlen);
1403 		}
1404 	}
1405 
1406 	return error;
1407 }
1408 
1409 static int
1410 rump_vop_bmap(void *v)
1411 {
1412 	struct vop_bmap_args /* {
1413 		struct vnode *a_vp;
1414 		daddr_t a_bn;
1415 		struct vnode **a_vpp;
1416 		daddr_t *a_bnp;
1417 		int *a_runp;
1418 	} */ *ap = v;
1419 
1420 	/* 1:1 mapping */
1421 	if (ap->a_vpp)
1422 		*ap->a_vpp = ap->a_vp;
1423 	if (ap->a_bnp)
1424 		*ap->a_bnp = ap->a_bn;
1425 	if (ap->a_runp)
1426 		*ap->a_runp = 16;
1427 
1428 	return 0;
1429 }
1430 
1431 static int
1432 rump_vop_strategy(void *v)
1433 {
1434 	struct vop_strategy_args /* {
1435 		struct vnode *a_vp;
1436 		struct buf *a_bp;
1437 	} */ *ap = v;
1438 	struct vnode *vp = ap->a_vp;
1439 	struct rumpfs_node *rn = vp->v_data;
1440 	struct buf *bp = ap->a_bp;
1441 	off_t copylen, copyoff;
1442 	int error;
1443 
1444 	if (vp->v_type != VREG || rn->rn_flags & RUMPNODE_ET_PHONE_HOST) {
1445 		error = EINVAL;
1446 		goto out;
1447 	}
1448 
1449 	copyoff = bp->b_blkno << DEV_BSHIFT;
1450 	copylen = MIN(rn->rn_dlen - copyoff, bp->b_bcount);
1451 	if (BUF_ISWRITE(bp)) {
1452 		memcpy((uint8_t *)rn->rn_data + copyoff, bp->b_data, copylen);
1453 	} else {
1454 		memset((uint8_t*)bp->b_data + copylen, 0, bp->b_bcount-copylen);
1455 		memcpy(bp->b_data, (uint8_t *)rn->rn_data + copyoff, copylen);
1456 	}
1457 	bp->b_resid = 0;
1458 	error = 0;
1459 
1460  out:
1461 	bp->b_error = error;
1462 	biodone(bp);
1463 	return 0;
1464 }
1465 
1466 static int
1467 rump_vop_pathconf(void *v)
1468 {
1469 	struct vop_pathconf_args /* {
1470 		struct vnode *a_vp;
1471 		int a_name;
1472 		register_t *a_retval;
1473 	}; */ *ap = v;
1474 	int name = ap->a_name;
1475 	register_t *retval = ap->a_retval;
1476 
1477 	switch (name) {
1478 	case _PC_LINK_MAX:
1479 		*retval = LINK_MAX;
1480 		return 0;
1481 	case _PC_NAME_MAX:
1482 		*retval = RUMPFS_MAXNAMLEN;
1483 		return 0;
1484 	case _PC_PATH_MAX:
1485 		*retval = PATH_MAX;
1486 		return 0;
1487 	case _PC_PIPE_BUF:
1488 		*retval = PIPE_BUF;
1489 		return 0;
1490 	case _PC_CHOWN_RESTRICTED:
1491 		*retval = 1;
1492 		return 0;
1493 	case _PC_NO_TRUNC:
1494 		*retval = 1;
1495 		return 0;
1496 	case _PC_SYNC_IO:
1497 		*retval = 1;
1498 		return 0;
1499 	case _PC_FILESIZEBITS:
1500 		*retval = 43; /* this one goes to 11 */
1501 		return 0;
1502 	case _PC_SYMLINK_MAX:
1503 		*retval = MAXPATHLEN;
1504 		return 0;
1505 	case _PC_2_SYMLINKS:
1506 		*retval = 1;
1507 		return 0;
1508 	default:
1509 		return EINVAL;
1510 	}
1511 }
1512 
1513 static int
1514 rump_vop_success(void *v)
1515 {
1516 
1517 	return 0;
1518 }
1519 
1520 static int
1521 rump_vop_inactive(void *v)
1522 {
1523 	struct vop_inactive_args /* {
1524 		struct vnode *a_vp;
1525 		bool *a_recycle;
1526 	} */ *ap = v;
1527 	struct vnode *vp = ap->a_vp;
1528 	struct rumpfs_node *rn = vp->v_data;
1529 	int error;
1530 
1531 	if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST && vp->v_type == VREG) {
1532 		if (rn->rn_readfd != -1) {
1533 			rumpuser_close(rn->rn_readfd, &error);
1534 			rn->rn_readfd = -1;
1535 		}
1536 		if (rn->rn_writefd != -1) {
1537 			rumpuser_close(rn->rn_writefd, &error);
1538 			rn->rn_writefd = -1;
1539 		}
1540 	}
1541 	*ap->a_recycle = (rn->rn_flags & RUMPNODE_CANRECLAIM) ? true : false;
1542 
1543 	VOP_UNLOCK(vp);
1544 	return 0;
1545 }
1546 
1547 static int
1548 rump_vop_reclaim(void *v)
1549 {
1550 	struct vop_reclaim_args /* {
1551 		struct vnode *a_vp;
1552 	} */ *ap = v;
1553 	struct vnode *vp = ap->a_vp;
1554 	struct rumpfs_node *rn = vp->v_data;
1555 
1556 	mutex_enter(&reclock);
1557 	rn->rn_vp = NULL;
1558 	mutex_exit(&reclock);
1559 	genfs_node_destroy(vp);
1560 	vp->v_data = NULL;
1561 
1562 	if (rn->rn_flags & RUMPNODE_CANRECLAIM) {
1563 		if (vp->v_type == VLNK)
1564 			PNBUF_PUT(rn->rn_linktarg);
1565 		if (rn->rn_hostpath)
1566 			free(rn->rn_hostpath, M_TEMP);
1567 		kmem_free(rn, sizeof(*rn));
1568 	}
1569 
1570 	return 0;
1571 }
1572 
1573 static int
1574 rump_vop_spec(void *v)
1575 {
1576 	struct vop_generic_args *ap = v;
1577 	int (**opvec)(void *);
1578 
1579 	switch (ap->a_desc->vdesc_offset) {
1580 	case VOP_ACCESS_DESCOFFSET:
1581 	case VOP_GETATTR_DESCOFFSET:
1582 	case VOP_SETATTR_DESCOFFSET:
1583 	case VOP_LOCK_DESCOFFSET:
1584 	case VOP_UNLOCK_DESCOFFSET:
1585 	case VOP_ISLOCKED_DESCOFFSET:
1586 	case VOP_RECLAIM_DESCOFFSET:
1587 		opvec = rump_vnodeop_p;
1588 		break;
1589 	default:
1590 		opvec = spec_vnodeop_p;
1591 		break;
1592 	}
1593 
1594 	return VOCALL(opvec, ap->a_desc->vdesc_offset, v);
1595 }
1596 
1597 static int
1598 rump_vop_advlock(void *v)
1599 {
1600 	struct vop_advlock_args /* {
1601 		const struct vnodeop_desc *a_desc;
1602 		struct vnode *a_vp;
1603 		void *a_id;
1604 		int a_op;
1605 		struct flock *a_fl;
1606 		int a_flags;
1607 	} */ *ap = v;
1608 	struct vnode *vp = ap->a_vp;
1609 	struct rumpfs_node *rn = vp->v_data;
1610 
1611 	return lf_advlock(ap, &rn->rn_lockf, vp->v_size);
1612 }
1613 
1614 /*
1615  * Begin vfs-level stuff
1616  */
1617 
1618 VFS_PROTOS(rumpfs);
1619 struct vfsops rumpfs_vfsops = {
1620 	.vfs_name =		MOUNT_RUMPFS,
1621 	.vfs_min_mount_data = 	0,
1622 	.vfs_mount =		rumpfs_mount,
1623 	.vfs_start =		(void *)nullop,
1624 	.vfs_unmount = 		rumpfs_unmount,
1625 	.vfs_root =		rumpfs_root,
1626 	.vfs_quotactl =		(void *)eopnotsupp,
1627 	.vfs_statvfs =		genfs_statvfs,
1628 	.vfs_sync =		(void *)nullop,
1629 	.vfs_vget =		rumpfs_vget,
1630 	.vfs_fhtovp =		(void *)eopnotsupp,
1631 	.vfs_vptofh =		(void *)eopnotsupp,
1632 	.vfs_init =		rumpfs_init,
1633 	.vfs_reinit =		NULL,
1634 	.vfs_done =		rumpfs_done,
1635 	.vfs_mountroot =	rumpfs_mountroot,
1636 	.vfs_snapshot =		(void *)eopnotsupp,
1637 	.vfs_extattrctl =	(void *)eopnotsupp,
1638 	.vfs_suspendctl =	(void *)eopnotsupp,
1639 	.vfs_renamelock_enter =	genfs_renamelock_enter,
1640 	.vfs_renamelock_exit =	genfs_renamelock_exit,
1641 	.vfs_opv_descs =	rump_opv_descs,
1642 	/* vfs_refcount */
1643 	/* vfs_list */
1644 };
1645 
1646 static int
1647 rumpfs_mountfs(struct mount *mp)
1648 {
1649 	struct rumpfs_mount *rfsmp;
1650 	struct rumpfs_node *rn;
1651 	int error;
1652 
1653 	rfsmp = kmem_alloc(sizeof(*rfsmp), KM_SLEEP);
1654 
1655 	rn = makeprivate(VDIR, NODEV, DEV_BSIZE, false);
1656 	rn->rn_parent = rn;
1657 	if ((error = makevnode(mp, rn, &rfsmp->rfsmp_rvp)) != 0)
1658 		return error;
1659 
1660 	rfsmp->rfsmp_rvp->v_vflag |= VV_ROOT;
1661 	VOP_UNLOCK(rfsmp->rfsmp_rvp);
1662 
1663 	mp->mnt_data = rfsmp;
1664 	mp->mnt_stat.f_namemax = RUMPFS_MAXNAMLEN;
1665 	mp->mnt_stat.f_iosize = 512;
1666 	mp->mnt_flag |= MNT_LOCAL;
1667 	mp->mnt_iflag |= IMNT_MPSAFE | IMNT_CAN_RWTORO;
1668 	mp->mnt_fs_bshift = DEV_BSHIFT;
1669 	vfs_getnewfsid(mp);
1670 
1671 	return 0;
1672 }
1673 
1674 int
1675 rumpfs_mount(struct mount *mp, const char *mntpath, void *arg, size_t *alen)
1676 {
1677 	int error;
1678 
1679 	if (mp->mnt_flag & MNT_UPDATE) {
1680 		return 0;
1681 	}
1682 
1683 	error = set_statvfs_info(mntpath, UIO_USERSPACE, "rumpfs", UIO_SYSSPACE,
1684 	    mp->mnt_op->vfs_name, mp, curlwp);
1685 	if (error)
1686 		return error;
1687 
1688 	return rumpfs_mountfs(mp);
1689 }
1690 
1691 int
1692 rumpfs_unmount(struct mount *mp, int mntflags)
1693 {
1694 	struct rumpfs_mount *rfsmp = mp->mnt_data;
1695 	int flags = 0, error;
1696 
1697 	if (panicstr || mntflags & MNT_FORCE)
1698 		flags |= FORCECLOSE;
1699 
1700 	if ((error = vflush(mp, rfsmp->rfsmp_rvp, flags)) != 0)
1701 		return error;
1702 	vgone(rfsmp->rfsmp_rvp); /* XXX */
1703 
1704 	kmem_free(rfsmp, sizeof(*rfsmp));
1705 
1706 	return 0;
1707 }
1708 
1709 int
1710 rumpfs_root(struct mount *mp, struct vnode **vpp)
1711 {
1712 	struct rumpfs_mount *rfsmp = mp->mnt_data;
1713 
1714 	vref(rfsmp->rfsmp_rvp);
1715 	vn_lock(rfsmp->rfsmp_rvp, LK_EXCLUSIVE | LK_RETRY);
1716 	*vpp = rfsmp->rfsmp_rvp;
1717 	return 0;
1718 }
1719 
1720 int
1721 rumpfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
1722 {
1723 
1724 	return EOPNOTSUPP;
1725 }
1726 
1727 void
1728 rumpfs_init()
1729 {
1730 
1731 	CTASSERT(RUMP_ETFS_SIZE_ENDOFF == RUMPBLK_SIZENOTSET);
1732 
1733 	mutex_init(&reclock, MUTEX_DEFAULT, IPL_NONE);
1734 	mutex_init(&etfs_lock, MUTEX_DEFAULT, IPL_NONE);
1735 }
1736 
1737 void
1738 rumpfs_done()
1739 {
1740 
1741 	mutex_destroy(&reclock);
1742 	mutex_destroy(&etfs_lock);
1743 }
1744 
1745 int
1746 rumpfs_mountroot()
1747 {
1748 	struct mount *mp;
1749 	int error;
1750 
1751 	if ((error = vfs_rootmountalloc(MOUNT_RUMPFS, "rootdev", &mp)) != 0) {
1752 		vrele(rootvp);
1753 		return error;
1754 	}
1755 
1756 	if ((error = rumpfs_mountfs(mp)) != 0)
1757 		panic("mounting rootfs failed: %d", error);
1758 
1759 	mutex_enter(&mountlist_lock);
1760 	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1761 	mutex_exit(&mountlist_lock);
1762 
1763 	error = set_statvfs_info("/", UIO_SYSSPACE, "rumpfs", UIO_SYSSPACE,
1764 	    mp->mnt_op->vfs_name, mp, curlwp);
1765 	if (error)
1766 		panic("set_statvfs_info failed for rootfs: %d", error);
1767 
1768 	mp->mnt_flag &= ~MNT_RDONLY;
1769 	vfs_unbusy(mp, false, NULL);
1770 
1771 	return 0;
1772 }
1773