xref: /netbsd-src/lib/libp2k/p2k.c (revision f75f5aae154fcd0572e8889e4fea2a51d67bbf08)
1 /*	$NetBSD: p2k.c,v 1.21 2009/10/09 16:37:30 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2007, 2008, 2009  Antti Kantee.  All Rights Reserved.
5  *
6  * Development of this software was supported by the
7  * Finnish Cultural Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
19  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 /*
32  * puffs 2k, i.e. puffs 2 kernel.  Converts the puffs protocol to
33  * the kernel vfs protocol and vice versa.
34  *
35  * A word about reference counting: puffs in the kernel is the king of
36  * reference counting.  We must maintain a vnode alive and kicking
37  * until the kernel tells us to reclaim it.  Therefore we make sure
38  * we never accidentally lose a vnode.  Before calling operations which
39  * decrease the refcount we always bump the refcount up to compensate.
40  * Come inactive, if the file system thinks that the vnode should be
41  * put out of its misery, it will set the recycle flag.  We use this
42  * to tell the kernel to reclaim the vnode.  Only in reclaim do we
43  * really nuke the last reference.
44  */
45 
46 #include <sys/cdefs.h>
47 #include <sys/mount.h>
48 #include <sys/param.h>
49 #include <sys/vnode.h>
50 #include <sys/lock.h>
51 #include <sys/namei.h>
52 #include <sys/dirent.h>
53 #include <sys/hash.h>
54 
55 #include <assert.h>
56 #include <errno.h>
57 #include <puffs.h>
58 #include <stdlib.h>
59 #include <stdio.h>
60 
61 #include <rump/rump.h>
62 #include <rump/p2k.h>
63 #include <rump/ukfs.h>
64 
65 PUFFSOP_PROTOS(p2k)
66 
67 LIST_HEAD(p2k_vp_hash, p2k_node);
68 #define NHASHBUCK (1<<16)
69 struct p2k_mount {
70 	struct vnode *p2m_rvp;
71 	struct puffs_usermount *p2m_pu;
72 	struct ukfs *p2m_ukfs;
73 	struct p2k_vp_hash p2m_vphash[NHASHBUCK];
74 	int p2m_nvnodes;
75 };
76 
77 struct p2k_node {
78 	struct vnode *p2n_vp;
79 	struct componentname *p2n_cn;
80 
81 	/*
82 	 * Ok, then, uhm, we need .. *drumroll*.. two componentname
83 	 * storages for rename.  This is because the source dir is
84 	 * unlocked after the first lookup, and someone else might
85 	 * race in here.  However, we know it's not another rename
86 	 * because of the kernel rename lock.  And we need two since
87 	 * srcdir and targdir might be the same.  It's a wonderful world.
88 	 */
89 	struct componentname *p2n_cn_ren_src, *p2n_cn_ren_targ;
90 
91 	LIST_ENTRY(p2k_node) p2n_entries;
92 };
93 
94 #define OPC2VP(opc) (((struct p2k_node *)opc)->p2n_vp)
95 
96 static kauth_cred_t
97 cred_create(const struct puffs_cred *pcr)
98 {
99 	gid_t groups[NGROUPS];
100 	uid_t uid;
101 	gid_t gid;
102 	short ngroups = 0;
103 
104 	if (puffs_cred_getuid(pcr, &uid) == -1)
105 		uid = 0;
106 	if (puffs_cred_getgid(pcr, &gid) == -1)
107 		gid = 0;
108 	puffs_cred_getgroups(pcr, groups, &ngroups);
109 
110 	/* LINTED: ngroups is ok */
111 	return rump_cred_create(uid, gid, ngroups, groups);
112 }
113 
114 static __inline void
115 cred_destroy(kauth_cred_t cred)
116 {
117 
118 	rump_cred_put(cred);
119 }
120 
121 static struct componentname *
122 makecn(const struct puffs_cn *pcn, int myflags)
123 {
124 	kauth_cred_t cred;
125 
126 	cred = cred_create(pcn->pcn_cred);
127 	/* LINTED: prehistoric types in first two args */
128 	return rump_makecn(pcn->pcn_nameiop, pcn->pcn_flags | myflags,
129 	    pcn->pcn_name, pcn->pcn_namelen, cred, curlwp);
130 }
131 
132 static __inline void
133 freecn(struct componentname *cnp, int flags)
134 {
135 
136 	rump_freecn(cnp, flags | RUMPCN_FREECRED);
137 }
138 
139 static void
140 makelwp(struct puffs_usermount *pu)
141 {
142 	pid_t pid;
143 	lwpid_t lid;
144 
145 	puffs_cc_getcaller(puffs_cc_getcc(pu), &pid, &lid);
146 	rump_setup_curlwp(pid, lid, 1);
147 }
148 
149 /*ARGSUSED*/
150 static void
151 clearlwp(struct puffs_usermount *pu)
152 {
153 
154 	rump_clear_curlwp();
155 }
156 
157 static __inline struct p2k_vp_hash *
158 gethash(struct p2k_mount *p2m, struct vnode *vp)
159 {
160 	uint32_t hash;
161 
162 	hash = hash32_buf(&vp, sizeof(vp), HASH32_BUF_INIT);
163 	return &p2m->p2m_vphash[hash % NHASHBUCK];
164 }
165 
166 /*
167  * Find node based on hash of vnode pointer.  If vnode is found,
168  * releases one reference to vnode based on the fact that we just
169  * performed a lookup for it.
170  *
171  * If the optinal p2n_storage parameter is passed, it is used instead
172  * of allocating more memory.  This allows for easier error recovery.
173  */
174 static struct p2k_node *
175 getp2n(struct p2k_mount *p2m, struct vnode *vp, bool initial,
176 	struct p2k_node *p2n_storage)
177 {
178 	struct p2k_vp_hash *hl;
179 	struct p2k_node *p2n = NULL;
180 
181 	/* p2n_storage => initial */
182 	assert(!p2n_storage || initial);
183 
184 	hl = gethash(p2m, vp);
185 	if (!initial)
186 		LIST_FOREACH(p2n, hl, p2n_entries)
187 			if (p2n->p2n_vp == vp)
188 				break;
189 
190 	hl = gethash(p2m, vp);
191 	if (p2n) {
192 		rump_vp_rele(vp);
193 	} else {
194 		if (p2n_storage)
195 			p2n = p2n_storage;
196 		else
197 			p2n = malloc(sizeof(*p2n));
198 		if (!p2n) {
199 			rump_vp_rele(vp);
200 			return NULL;
201 		}
202 		memset(p2n, 0, sizeof(*p2n));
203 		LIST_INSERT_HEAD(hl, p2n, p2n_entries);
204 		p2n->p2n_vp = vp;
205 	}
206 	return p2n;
207 }
208 
209 static void
210 freep2n(struct p2k_node *p2n)
211 {
212 
213 	assert(p2n->p2n_vp == NULL);
214 	assert(p2n->p2n_cn == NULL);
215 	LIST_REMOVE(p2n, p2n_entries);
216 	free(p2n);
217 }
218 
219 /*ARGSUSED*/
220 static void
221 p2k_errcatcher(struct puffs_usermount *pu, uint8_t type, int error,
222 	const char *str, puffs_cookie_t cook)
223 {
224 
225 	fprintf(stderr, "type %d, error %d, cookie %p (%s)\n",
226 	    type, error, cook, str);
227 
228 	/*
229 	 * Trap all EINVAL responses to lookup.  It most likely means
230 	 * that we supplied VNON/VBAD as the type.  The real kernel
231 	 * doesn't panic from this either, but just handles it.
232 	 */
233 	if (type != PUFFS_VN_LOOKUP && error == EINVAL)
234 		abort();
235 }
236 
237 /* just to avoid annoying loop when singlestepping */
238 static void
239 allocp2m(struct ukfs *ukfs)
240 {
241 	struct p2k_mount *p2m;
242 	int i;
243 
244 	p2m = malloc(sizeof(*p2m));
245 	memset(p2m, 0, sizeof(*p2m));
246 
247 	for (i = 0; i < NHASHBUCK; i++)
248 		LIST_INIT(&p2m->p2m_vphash[i]);
249 	ukfs_setspecific(ukfs, p2m);
250 }
251 
252 static struct p2k_mount *
253 setupfs(const char *vfsname, const char *devpath, int partition,
254 	const char *mountpath, int mntflags, void *arg, size_t alen,
255 	uint32_t puffs_flags)
256 {
257 	char partpath[UKFS_PARTITION_MAXPATHLEN];
258 	char typebuf[PUFFS_TYPELEN];
259 	struct puffs_ops *pops;
260 	struct puffs_usermount *pu = NULL;
261 	struct p2k_node *p2n_root;
262 	struct ukfs *ukfs = NULL;
263 	struct p2k_mount *p2m = NULL;
264 	extern int puffs_fakecc;
265 	int rv = -1, sverrno;
266 	bool dodaemon;
267 
268 	PUFFSOP_INIT(pops);
269 
270 	PUFFSOP_SET(pops, p2k, fs, statvfs);
271 	PUFFSOP_SET(pops, p2k, fs, unmount);
272 	PUFFSOP_SET(pops, p2k, fs, sync);
273 	PUFFSOP_SET(pops, p2k, fs, fhtonode);
274 	PUFFSOP_SET(pops, p2k, fs, nodetofh);
275 
276 	PUFFSOP_SET(pops, p2k, node, lookup);
277 	PUFFSOP_SET(pops, p2k, node, create);
278 	PUFFSOP_SET(pops, p2k, node, mknod);
279 	PUFFSOP_SET(pops, p2k, node, open);
280 	PUFFSOP_SET(pops, p2k, node, close);
281 	PUFFSOP_SET(pops, p2k, node, access);
282 	PUFFSOP_SET(pops, p2k, node, getattr);
283 	PUFFSOP_SET(pops, p2k, node, setattr);
284 #if 0
285 	PUFFSOP_SET(pops, p2k, node, poll);
286 #endif
287 	PUFFSOP_SET(pops, p2k, node, mmap);
288 	PUFFSOP_SET(pops, p2k, node, fsync);
289 	PUFFSOP_SET(pops, p2k, node, seek);
290 	PUFFSOP_SET(pops, p2k, node, remove);
291 	PUFFSOP_SET(pops, p2k, node, link);
292 	PUFFSOP_SET(pops, p2k, node, rename);
293 	PUFFSOP_SET(pops, p2k, node, mkdir);
294 	PUFFSOP_SET(pops, p2k, node, rmdir);
295 	PUFFSOP_SET(pops, p2k, node, symlink);
296 	PUFFSOP_SET(pops, p2k, node, readdir);
297 	PUFFSOP_SET(pops, p2k, node, readlink);
298 	PUFFSOP_SET(pops, p2k, node, read);
299 	PUFFSOP_SET(pops, p2k, node, write);
300 
301 	PUFFSOP_SET(pops, p2k, node, inactive);
302 	PUFFSOP_SET(pops, p2k, node, reclaim);
303 
304 	dodaemon = true;
305 	if (getenv("P2K_DEBUG") != NULL) {
306 		puffs_flags |= PUFFS_FLAG_OPDUMP;
307 		dodaemon = false;
308 	}
309 	if (getenv("P2K_NODETACH") != NULL) {
310 		dodaemon = false;
311 	}
312 	if (getenv("P2K_NOCACHE_PAGE") != NULL) {
313 		puffs_flags |= PUFFS_KFLAG_NOCACHE_PAGE;
314 	}
315 	if (getenv("P2K_NOCACHE_NAME") != NULL) {
316 		puffs_flags |= PUFFS_KFLAG_NOCACHE_NAME;
317 	}
318 	if (getenv("P2K_NOCACHE") != NULL) {
319 		puffs_flags |= PUFFS_KFLAG_NOCACHE;
320 	}
321 
322 	strcpy(typebuf, "p2k|");
323 	if (strcmp(vfsname, "puffs") == 0) { /* XXX */
324 		struct puffs_kargs *args = arg;
325 		strlcat(typebuf, args->pa_typename, sizeof(typebuf));
326 		dodaemon = false;
327 	} else {
328 		strlcat(typebuf, vfsname, sizeof(typebuf));
329 	}
330 
331 	if (UKFS_USEPARTITION(partition)) {
332 		char partbuf[UKFS_PARTITION_MAGICLEN+1];
333 
334 		strlcpy(partpath, devpath, sizeof(partpath));
335 		snprintf(partbuf, sizeof(partbuf), "%s%c%%",
336 		    UKFS_PARTITION_SCANMAGIC, partition + 'a');
337 		strlcat(partpath, partbuf, sizeof(partpath));
338 	} else {
339 		strlcpy(partpath, devpath, sizeof(partpath));
340 	}
341 	pu = puffs_init(pops, partpath, typebuf, NULL, puffs_flags);
342 	if (pu == NULL)
343 		goto out;
344 
345 	if (dodaemon)
346 		puffs_daemon(pu, 1, 1);
347 
348 	if (ukfs_init() == -1)
349 		goto out;
350 	if (partition != UKFS_PARTITION_NA)
351 		ukfs = ukfs_mount_disk(vfsname, devpath, partition,
352 		    mountpath, mntflags, arg, alen);
353 	else
354 		ukfs = ukfs_mount(vfsname, devpath, mountpath, mntflags,
355 		    arg, alen);
356 	if (ukfs == NULL)
357 		goto out;
358 	allocp2m(ukfs);
359 	p2m = ukfs_getspecific(ukfs);
360 	p2m->p2m_ukfs = ukfs;
361 	p2m->p2m_pu = pu;
362 
363 	p2m->p2m_rvp = ukfs_getrvp(ukfs);
364 	p2n_root = getp2n(p2m, p2m->p2m_rvp, true, NULL);
365 	puffs_setfhsize(pu, 0, PUFFS_FHFLAG_PASSTHROUGH);
366 	puffs_setstacksize(pu, PUFFS_STACKSIZE_MIN);
367 	puffs_fakecc = 1;
368 	puffs_set_prepost(pu, makelwp, clearlwp);
369 	puffs_set_errnotify(pu, p2k_errcatcher);
370 
371 	puffs_setspecific(pu, ukfs);
372 	if ((rv = puffs_mount(pu, mountpath, mntflags, p2n_root))== -1)
373 		goto out;
374 
375  out:
376 	sverrno = errno;
377 	if (rv) {
378 		if (ukfs)
379 			ukfs_release(p2m->p2m_ukfs, UKFS_RELFLAG_FORCE);
380 		if (pu)
381 			puffs_cancel(pu, sverrno);
382 		if (p2m)
383 			free(p2m);
384 		errno = sverrno;
385 		p2m = NULL;
386 	}
387 
388 	return p2m;
389 }
390 
391 int
392 p2k_mainloop(struct p2k_mount *p2m)
393 {
394 	int rv, sverrno;
395 
396 	rv = puffs_mainloop(p2m->p2m_pu);
397 	sverrno = errno;
398 	puffs_exit(p2m->p2m_pu, 1);
399 	if (p2m->p2m_ukfs)
400 		ukfs_release(p2m->p2m_ukfs, UKFS_RELFLAG_FORCE);
401 	free(p2m);
402 
403 	if (rv == -1)
404 		errno = sverrno;
405 	return rv;
406 }
407 
408 int
409 p2k_run_fs(const char *vfsname, const char *devpath, const char *mountpath,
410 	int mntflags, void *arg, size_t alen, uint32_t puffs_flags)
411 {
412 	struct p2k_mount *p2m;
413 
414 	p2m = setupfs(vfsname, devpath, UKFS_PARTITION_NA, mountpath,
415 	    mntflags, arg, alen, puffs_flags);
416 	if (p2m == NULL)
417 		return -1;
418 	return p2k_mainloop(p2m);
419 }
420 
421 int
422 p2k_run_diskfs(const char *vfsname, const char *devpath, int partition,
423 	const char *mountpath, int mntflags, void *arg, size_t alen,
424 	uint32_t puffs_flags)
425 {
426 	struct p2k_mount *p2m;
427 
428 	p2m = setupfs(vfsname, devpath, partition, mountpath, mntflags,
429 	    arg, alen, puffs_flags);
430 	if (p2m == NULL)
431 		return -1;
432 	return p2k_mainloop(p2m);
433 }
434 
435 struct p2k_mount *
436 p2k_setup_fs(const char *vfsname, const char *devpath, const char *mountpath,
437 	int mntflags, void *arg, size_t alen, uint32_t puffs_flags)
438 {
439 
440 	return setupfs(vfsname, devpath, UKFS_PARTITION_NA, mountpath,
441 	    mntflags, arg, alen, puffs_flags);
442 }
443 
444 struct p2k_mount *
445 p2k_setup_diskfs(const char *vfsname, const char *devpath, int partition,
446 	const char *mountpath, int mntflags, void *arg, size_t alen,
447 	uint32_t puffs_flags)
448 {
449 
450 	return setupfs(vfsname, devpath, partition, mountpath, mntflags,
451 	    arg, alen, puffs_flags);
452 }
453 
454 int
455 p2k_fs_statvfs(struct puffs_usermount *pu, struct statvfs *sbp)
456 {
457 	struct mount *mp = ukfs_getmp(puffs_getspecific(pu));
458 
459 	return rump_vfs_statvfs(mp, sbp);
460 }
461 
462 /*ARGSUSED*/
463 int
464 p2k_fs_unmount(struct puffs_usermount *pu, int flags)
465 {
466 	struct ukfs *fs = puffs_getspecific(pu);
467 	struct p2k_mount *p2m = ukfs_getspecific(fs);
468 	int error = 0;
469 
470 	rump_clear_curlwp(); /* ukfs does its own curlwp tricks */
471 
472 	rump_vp_rele(p2m->p2m_rvp);
473 	if (ukfs_release(fs, 0) != 0) {
474 		ukfs_release(fs, UKFS_RELFLAG_FORCE);
475 		error = 0;
476 	}
477 	p2m->p2m_ukfs = NULL;
478 
479 	rump_setup_curlwp(0, 1, 1);
480 	return error;
481 }
482 
483 int
484 p2k_fs_sync(struct puffs_usermount *pu, int waitfor,
485 	const struct puffs_cred *pcr)
486 {
487 	struct mount *mp = ukfs_getmp(puffs_getspecific(pu));
488 	kauth_cred_t cred;
489 	int rv;
490 
491 	cred = cred_create(pcr);
492 	rv = rump_vfs_sync(mp, waitfor, (kauth_cred_t)cred);
493 	cred_destroy(cred);
494 
495 	return rv;
496 }
497 
498 /*ARGSUSED*/
499 int
500 p2k_fs_fhtonode(struct puffs_usermount *pu, void *fid, size_t fidsize,
501 	struct puffs_newinfo *pni)
502 {
503 	struct mount *mp = ukfs_getmp(puffs_getspecific(pu));
504 	struct p2k_mount *p2m = ukfs_getspecific(puffs_getspecific(pu));
505 	struct p2k_node *p2n;
506 	struct vnode *vp;
507 	enum vtype vtype;
508 	voff_t vsize;
509 	uint64_t rdev; /* XXX: allows running this on NetBSD 5.0 */
510 	int rv;
511 
512 	rv = rump_vfs_fhtovp(mp, fid, &vp);
513 	if (rv)
514 		return rv;
515 	RUMP_VOP_UNLOCK(vp, 0);
516 
517 	p2n = getp2n(p2m, vp, false, NULL);
518 	if (p2n == NULL)
519 		return ENOMEM;
520 
521 	puffs_newinfo_setcookie(pni, p2n);
522 	rump_getvninfo(vp, &vtype, &vsize, (void *)&rdev);
523 	puffs_newinfo_setvtype(pni, vtype);
524 	puffs_newinfo_setsize(pni, vsize);
525 	/* LINTED: yea, it'll lose accuracy, but that's life */
526 	puffs_newinfo_setrdev(pni, rdev);
527 
528 	return 0;
529 }
530 
531 /*ARGSUSED*/
532 int
533 p2k_fs_nodetofh(struct puffs_usermount *pu, puffs_cookie_t cookie, void *fid,
534 	size_t *fidsize)
535 {
536 	struct vnode *vp = cookie;
537 
538 	return rump_vfs_vptofh(vp, fid, fidsize);
539 }
540 
541 /*ARGSUSED*/
542 int
543 p2k_node_lookup(struct puffs_usermount *pu, puffs_cookie_t opc,
544 	struct puffs_newinfo *pni, const struct puffs_cn *pcn)
545 {
546 	struct p2k_mount *p2m = ukfs_getspecific(puffs_getspecific(pu));
547 	struct p2k_node *p2n_dir = opc, *p2n;
548 	struct componentname *cn;
549 	struct vnode *dvp = p2n_dir->p2n_vp, *vp;
550 	enum vtype vtype;
551 	voff_t vsize;
552 	uint64_t rdev; /* XXX: uint64_t because of stack overwrite in compat */
553 	int rv;
554 
555 	cn = makecn(pcn, 0);
556 	RUMP_VOP_LOCK(dvp, LK_EXCLUSIVE);
557 	rv = RUMP_VOP_LOOKUP(dvp, &vp, cn);
558 	RUMP_VOP_UNLOCK(dvp, 0);
559 	if (rump_checksavecn(cn)) {
560 		/*
561 		 * XXX: detect RENAME by SAVESTART, both src and targ lookups
562 		 *
563 		 * XXX part deux: rename syscall actually does two lookups
564 		 * for the source, the second without SAVESTART.  So detect
565 		 * this also and compensate.
566 		 */
567 		if (pcn->pcn_flags & NAMEI_SAVESTART) {
568 			if (pcn->pcn_nameiop == NAMEI_DELETE) {
569 				assert(p2n_dir->p2n_cn_ren_src == NULL);
570 				p2n_dir->p2n_cn_ren_src = cn;
571 			} else {
572 				assert(pcn->pcn_nameiop == NAMEI_RENAME);
573 				assert(p2n_dir->p2n_cn_ren_targ == NULL);
574 				p2n_dir->p2n_cn_ren_targ = cn;
575 			}
576 		} else {
577 			if (pcn->pcn_nameiop == NAMEI_DELETE
578 			    && p2n_dir->p2n_cn_ren_src) {
579 				freecn(cn, RUMPCN_FORCEFREE);
580 				cn = NULL;
581 			} else {
582 				assert(p2n_dir->p2n_cn == NULL);
583 				p2n_dir->p2n_cn = cn;
584 			}
585 		}
586 	} else {
587 		freecn(cn, 0);
588 		cn = NULL;
589 	}
590 	if (rv) {
591 		if (rv == EJUSTRETURN) {
592 			rv = ENOENT;
593 		}
594 		return rv;
595 	}
596 	RUMP_VOP_UNLOCK(vp, 0);
597 
598 	p2n = getp2n(p2m, vp, false, NULL);
599 	if (p2n == NULL) {
600 		if (pcn->pcn_flags & NAMEI_SAVESTART) {
601 			if (pcn->pcn_nameiop == NAMEI_DELETE) {
602 				p2n_dir->p2n_cn_ren_src = NULL;
603 			} else {
604 				p2n_dir->p2n_cn_ren_targ = NULL;
605 			}
606 		} else {
607 			p2n_dir->p2n_cn = NULL;
608 		}
609 		/* XXX: what in the world should happen with SAVESTART? */
610 		RUMP_VOP_ABORTOP(dvp, cn);
611 		return ENOMEM;
612 	}
613 
614 	puffs_newinfo_setcookie(pni, p2n);
615 	rump_getvninfo(vp, &vtype, &vsize, (void *)&rdev);
616 	puffs_newinfo_setvtype(pni, vtype);
617 	puffs_newinfo_setsize(pni, vsize);
618 	/* LINTED: yea, it'll lose accuracy, but that's life */
619 	puffs_newinfo_setrdev(pni, rdev);
620 
621 	return 0;
622 }
623 
624 #define VERS_TIMECHANGE 599000700
625 static int
626 needcompat(void)
627 {
628 
629 	/*LINTED*/
630 	return __NetBSD_Version__ < VERS_TIMECHANGE
631 	    && rump_getversion() >= VERS_TIMECHANGE;
632 }
633 
634 #define DOCOMPAT(va, va_compat)						\
635 do {									\
636 	if (needcompat()) {						\
637 		va_compat = rump_vattr_init();				\
638 		rump_vattr50_to_vattr(va, va_compat);			\
639 	} else {							\
640 		va_compat = __UNCONST(va);				\
641 	}								\
642 } while (/*CONSTCOND*/0)
643 
644 #define UNDOCOMPAT(va_compat)						\
645 do {									\
646 	if (needcompat())						\
647 		rump_vattr_free(va_compat);				\
648 } while (/*CONSTCOND*/0)
649 
650 static int
651 do_makenode(struct puffs_usermount *pu, struct p2k_node *p2n_dir,
652 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
653 	const struct vattr *vap, char *link_target,
654 	int (*makefn)(struct vnode *, struct vnode **, struct componentname *,
655 		      struct vattr *),
656 	int (*symfn)(struct vnode *, struct vnode **, struct componentname *,
657 		      struct vattr *, char *))
658 {
659 	struct p2k_mount *p2m = ukfs_getspecific(puffs_getspecific(pu));
660 	struct vnode *dvp = p2n_dir->p2n_vp;
661 	struct p2k_node *p2n;
662 	struct componentname *cn;
663 	struct vattr *va_x;
664 	struct vnode *vp;
665 	int rv;
666 
667 	p2n = malloc(sizeof(*p2n));
668 	if (p2n == NULL)
669 		return ENOMEM;
670 	DOCOMPAT(vap, va_x);
671 
672 	if (p2n_dir->p2n_cn) {
673 		cn = p2n_dir->p2n_cn;
674 		p2n_dir->p2n_cn = NULL;
675 	} else {
676 		cn = makecn(pcn, RUMP_NAMEI_HASBUF);
677 	}
678 
679 	RUMP_VOP_LOCK(dvp, LK_EXCLUSIVE);
680 	rump_vp_incref(dvp);
681 	if (makefn) {
682 		rv = makefn(dvp, &vp, cn, va_x);
683 	} else {
684 		rv = symfn(dvp, &vp, cn, va_x, link_target);
685 	}
686 	assert(RUMP_VOP_ISLOCKED(dvp) == 0);
687 	freecn(cn, 0);
688 
689 	if (rv == 0) {
690 		RUMP_VOP_UNLOCK(vp, 0);
691 		p2n = getp2n(p2m, vp, true, p2n);
692 		puffs_newinfo_setcookie(pni, p2n);
693 	} else {
694 		free(p2n);
695 	}
696 
697 	UNDOCOMPAT(va_x);
698 
699 	return rv;
700 
701 }
702 
703 /*ARGSUSED*/
704 int
705 p2k_node_create(struct puffs_usermount *pu, puffs_cookie_t opc,
706 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
707 	const struct vattr *vap)
708 {
709 
710 	return do_makenode(pu, opc, pni, pcn, vap, NULL, RUMP_VOP_CREATE, NULL);
711 }
712 
713 /*ARGSUSED*/
714 int
715 p2k_node_mknod(struct puffs_usermount *pu, puffs_cookie_t opc,
716 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
717 	const struct vattr *vap)
718 {
719 
720 	return do_makenode(pu, opc, pni, pcn, vap, NULL, RUMP_VOP_MKNOD, NULL);
721 }
722 
723 /*ARGSUSED*/
724 int
725 p2k_node_open(struct puffs_usermount *pu, puffs_cookie_t opc, int mode,
726 	const struct puffs_cred *pcr)
727 {
728 	struct vnode *vp = OPC2VP(opc);
729 	kauth_cred_t cred;
730 	int rv;
731 
732 	cred = cred_create(pcr);
733 	RUMP_VOP_LOCK(vp, LK_EXCLUSIVE);
734 	rv = RUMP_VOP_OPEN(vp, mode, cred);
735 	RUMP_VOP_UNLOCK(vp, 0);
736 	cred_destroy(cred);
737 
738 	return rv;
739 }
740 
741 /*ARGSUSED*/
742 int
743 p2k_node_close(struct puffs_usermount *pu, puffs_cookie_t opc, int flags,
744 	const struct puffs_cred *pcr)
745 {
746 	struct vnode *vp = OPC2VP(opc);
747 	kauth_cred_t cred;
748 
749 	cred = cred_create(pcr);
750 	RUMP_VOP_LOCK(vp, LK_EXCLUSIVE);
751 	RUMP_VOP_CLOSE(vp, flags, cred);
752 	RUMP_VOP_UNLOCK(vp, 0);
753 	cred_destroy(cred);
754 
755 	return 0;
756 }
757 
758 /*ARGSUSED*/
759 int
760 p2k_node_access(struct puffs_usermount *pu, puffs_cookie_t opc, int mode,
761 	const struct puffs_cred *pcr)
762 {
763 	struct vnode *vp = OPC2VP(opc);
764 	kauth_cred_t cred;
765 	int rv;
766 
767 	cred = cred_create(pcr);
768 	RUMP_VOP_LOCK(vp, LK_EXCLUSIVE);
769 	rv = RUMP_VOP_ACCESS(vp, mode, cred);
770 	RUMP_VOP_UNLOCK(vp, 0);
771 	cred_destroy(cred);
772 
773 	return rv;
774 }
775 
776 /*ARGSUSED*/
777 int
778 p2k_node_getattr(struct puffs_usermount *pu, puffs_cookie_t opc,
779 	struct vattr *vap, const struct puffs_cred *pcr)
780 {
781 	struct vnode *vp = OPC2VP(opc);
782 	kauth_cred_t cred;
783 	struct vattr *va_x;
784 	int rv;
785 
786 	/* "deadfs" */
787 	if (!vp)
788 		return 0;
789 
790 	if (needcompat()) {
791 		va_x = rump_vattr_init();
792 	} else {
793 		va_x = vap;
794 	}
795 
796 	cred = cred_create(pcr);
797 	RUMP_VOP_LOCK(vp, LK_EXCLUSIVE);
798 	rv = RUMP_VOP_GETATTR(vp, va_x, cred);
799 	RUMP_VOP_UNLOCK(vp, 0);
800 	cred_destroy(cred);
801 
802 	if (needcompat()) {
803 		rump_vattr_to_vattr50(va_x, vap);
804 		rump_vattr_free(va_x);
805 	}
806 
807 	return rv;
808 }
809 
810 /*ARGSUSED*/
811 int
812 p2k_node_setattr(struct puffs_usermount *pu, puffs_cookie_t opc,
813 	const struct vattr *vap, const struct puffs_cred *pcr)
814 {
815 	struct vnode *vp = OPC2VP(opc);
816 	kauth_cred_t cred;
817 	struct vattr *va_x;
818 	int rv;
819 
820 	/* "deadfs" */
821 	if (!vp)
822 		return 0;
823 
824 	DOCOMPAT(vap, va_x);
825 
826 	cred = cred_create(pcr);
827 	RUMP_VOP_LOCK(vp, LK_EXCLUSIVE);
828 	rv = RUMP_VOP_SETATTR(vp, va_x, cred);
829 	RUMP_VOP_UNLOCK(vp, 0);
830 	cred_destroy(cred);
831 
832 	UNDOCOMPAT(va_x);
833 
834 	return rv;
835 }
836 
837 /*ARGSUSED*/
838 int
839 p2k_node_fsync(struct puffs_usermount *pu, puffs_cookie_t opc,
840 	const struct puffs_cred *pcr, int flags, off_t offlo, off_t offhi)
841 {
842 	struct vnode *vp = OPC2VP(opc);
843 	kauth_cred_t cred;
844 	int rv;
845 
846 	/* "deadfs" */
847 	if (!vp)
848 		return 0;
849 
850 	cred = cred_create(pcr);
851 	RUMP_VOP_LOCK(vp, LK_EXCLUSIVE);
852 	rv = RUMP_VOP_FSYNC(vp, cred, flags, offlo, offhi);
853 	RUMP_VOP_UNLOCK(vp, 0);
854 	cred_destroy(cred);
855 
856 	return rv;
857 }
858 
859 /*ARGSUSED*/
860 int
861 p2k_node_mmap(struct puffs_usermount *pu, puffs_cookie_t opc, vm_prot_t flags,
862 	const struct puffs_cred *pcr)
863 {
864 	kauth_cred_t cred;
865 	int rv;
866 
867 	cred = cred_create(pcr);
868 	rv = RUMP_VOP_MMAP(OPC2VP(opc), flags, cred);
869 	cred_destroy(cred);
870 
871 	return rv;
872 }
873 
874 /*ARGSUSED*/
875 int
876 p2k_node_seek(struct puffs_usermount *pu, puffs_cookie_t opc,
877 	off_t oldoff, off_t newoff, const struct puffs_cred *pcr)
878 {
879 	struct vnode *vp = OPC2VP(opc);
880 	kauth_cred_t cred;
881 	int rv;
882 
883 	cred = cred_create(pcr);
884 	RUMP_VOP_LOCK(vp, LK_EXCLUSIVE);
885 	rv = RUMP_VOP_SEEK(vp, oldoff, newoff, cred);
886 	RUMP_VOP_UNLOCK(vp, 0);
887 	cred_destroy(cred);
888 
889 	return rv;
890 }
891 
892 static int
893 do_nukenode(struct p2k_node *p2n_dir, struct p2k_node *p2n,
894 	const struct puffs_cn *pcn,
895 	int (*nukefn)(struct vnode *, struct vnode *, struct componentname *))
896 {
897 	struct vnode *dvp = p2n_dir->p2n_vp, *vp = p2n->p2n_vp;
898 	struct componentname *cn;
899 	int rv;
900 
901 	if (p2n_dir->p2n_cn) {
902 		cn = p2n_dir->p2n_cn;
903 		p2n_dir->p2n_cn = NULL;
904 	} else {
905 		cn = makecn(pcn, RUMP_NAMEI_HASBUF);
906 	}
907 
908 	RUMP_VOP_LOCK(dvp, LK_EXCLUSIVE);
909 	rump_vp_incref(dvp);
910 	RUMP_VOP_LOCK(vp, LK_EXCLUSIVE);
911 	rump_vp_incref(vp);
912 	rv = nukefn(dvp, vp, cn);
913 	assert(RUMP_VOP_ISLOCKED(dvp) == 0);
914 	assert(RUMP_VOP_ISLOCKED(vp) == 0);
915 	freecn(cn, 0);
916 
917 	return rv;
918 
919 }
920 
921 /*ARGSUSED*/
922 int
923 p2k_node_remove(struct puffs_usermount *pu, puffs_cookie_t opc,
924 	puffs_cookie_t targ, const struct puffs_cn *pcn)
925 {
926 
927 	return do_nukenode(opc, targ, pcn, RUMP_VOP_REMOVE);
928 }
929 
930 /*ARGSUSED*/
931 int
932 p2k_node_link(struct puffs_usermount *pu, puffs_cookie_t opc,
933 	puffs_cookie_t targ, const struct puffs_cn *pcn)
934 {
935 	struct vnode *dvp = OPC2VP(opc);
936 	struct p2k_node *p2n_dir = opc;
937 	struct componentname *cn;
938 	int rv;
939 
940 	if (p2n_dir->p2n_cn) {
941 		cn = p2n_dir->p2n_cn;
942 		p2n_dir->p2n_cn = NULL;
943 	} else {
944 		cn = makecn(pcn, RUMP_NAMEI_HASBUF);
945 	}
946 
947 	RUMP_VOP_LOCK(dvp, LK_EXCLUSIVE);
948 	rump_vp_incref(dvp);
949 	rv = RUMP_VOP_LINK(dvp, OPC2VP(targ), cn);
950 	freecn(cn, 0);
951 
952 	return rv;
953 }
954 
955 /*ARGSUSED*/
956 int
957 p2k_node_rename(struct puffs_usermount *pu,
958 	puffs_cookie_t src_dir, puffs_cookie_t src,
959 	const struct puffs_cn *pcn_src,
960 	puffs_cookie_t targ_dir, puffs_cookie_t targ,
961 	const struct puffs_cn *pcn_targ)
962 {
963 	struct p2k_node *p2n_srcdir = src_dir, *p2n_targdir = targ_dir;
964 	struct vnode *dvp, *vp, *tdvp, *tvp = NULL;
965 	struct componentname *cn_src, *cn_targ;
966 	int rv;
967 
968 	if (p2n_srcdir->p2n_cn_ren_src) {
969 		cn_src = p2n_srcdir->p2n_cn_ren_src;
970 		p2n_srcdir->p2n_cn_ren_src = NULL;
971 	} else {
972 		cn_src = makecn(pcn_src, RUMP_NAMEI_HASBUF);
973 	}
974 
975 	if (p2n_targdir->p2n_cn_ren_targ) {
976 		cn_targ = p2n_targdir->p2n_cn_ren_targ;
977 		p2n_targdir->p2n_cn_ren_targ = NULL;
978 	} else {
979 		cn_targ = makecn(pcn_targ, RUMP_NAMEI_HASBUF);
980 	}
981 
982 	dvp = OPC2VP(src_dir);
983 	vp = OPC2VP(src);
984 	tdvp = OPC2VP(targ_dir);
985 	if (targ) {
986 		tvp = OPC2VP(targ);
987 	}
988 
989 	rump_vp_incref(dvp);
990 	rump_vp_incref(vp);
991 	RUMP_VOP_LOCK(tdvp, LK_EXCLUSIVE);
992 	rump_vp_incref(tdvp);
993 	if (tvp) {
994 		RUMP_VOP_LOCK(tvp, LK_EXCLUSIVE);
995 		rump_vp_incref(tvp);
996 	}
997 	rv = RUMP_VOP_RENAME(dvp, vp, cn_src, tdvp, tvp, cn_targ);
998 	assert(RUMP_VOP_ISLOCKED(tdvp) == 0);
999 	if (tvp) {
1000 		assert(RUMP_VOP_ISLOCKED(tvp) == 0);
1001 	}
1002 	freecn(cn_src, RUMPCN_FORCEFREE);
1003 	freecn(cn_targ, RUMPCN_FORCEFREE);
1004 
1005 	return rv;
1006 }
1007 
1008 /*ARGSUSED*/
1009 int
1010 p2k_node_mkdir(struct puffs_usermount *pu, puffs_cookie_t opc,
1011 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
1012 	const struct vattr *vap)
1013 {
1014 
1015 	return do_makenode(pu, opc, pni, pcn, vap, NULL, RUMP_VOP_MKDIR, NULL);
1016 }
1017 
1018 /*ARGSUSED*/
1019 int
1020 p2k_node_rmdir(struct puffs_usermount *pu, puffs_cookie_t opc,
1021 	puffs_cookie_t targ, const struct puffs_cn *pcn)
1022 {
1023 
1024 	return do_nukenode(opc, targ, pcn, RUMP_VOP_RMDIR);
1025 }
1026 
1027 /*ARGSUSED*/
1028 int
1029 p2k_node_symlink(struct puffs_usermount *pu, puffs_cookie_t opc,
1030 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
1031 	const struct vattr *vap, const char *link_target)
1032 {
1033 
1034 	return do_makenode(pu, opc, pni, pcn, vap,
1035 	    __UNCONST(link_target), NULL, RUMP_VOP_SYMLINK);
1036 }
1037 
1038 /*ARGSUSED*/
1039 int
1040 p2k_node_readdir(struct puffs_usermount *pu, puffs_cookie_t opc,
1041 	struct dirent *dent, off_t *readoff, size_t *reslen,
1042 	const struct puffs_cred *pcr, int *eofflag,
1043 	off_t *cookies, size_t *ncookies)
1044 {
1045 	struct vnode *vp = OPC2VP(opc);
1046 	kauth_cred_t cred;
1047 	struct uio *uio;
1048 	off_t *vop_cookies;
1049 	int vop_ncookies;
1050 	int rv;
1051 
1052 	cred = cred_create(pcr);
1053 	uio = rump_uio_setup(dent, *reslen, *readoff, RUMPUIO_READ);
1054 	RUMP_VOP_LOCK(vp, LK_SHARED);
1055 	if (cookies) {
1056 		rv = RUMP_VOP_READDIR(vp, uio, cred, eofflag,
1057 		    &vop_cookies, &vop_ncookies);
1058 		memcpy(cookies, vop_cookies, vop_ncookies * sizeof(*cookies));
1059 		*ncookies = vop_ncookies;
1060 		free(vop_cookies);
1061 	} else {
1062 		rv = RUMP_VOP_READDIR(vp, uio, cred, eofflag, NULL, NULL);
1063 	}
1064 	RUMP_VOP_UNLOCK(vp, 0);
1065 	if (rv == 0) {
1066 		*reslen = rump_uio_getresid(uio);
1067 		*readoff = rump_uio_getoff(uio);
1068 	}
1069 	rump_uio_free(uio);
1070 	cred_destroy(cred);
1071 
1072 	return rv;
1073 }
1074 
1075 /*ARGSUSED*/
1076 int
1077 p2k_node_readlink(struct puffs_usermount *pu, puffs_cookie_t opc,
1078 	const struct puffs_cred *pcr, char *linkname, size_t *linklen)
1079 {
1080 	struct vnode *vp = OPC2VP(opc);
1081 	kauth_cred_t cred;
1082 	struct uio *uio;
1083 	int rv;
1084 
1085 	cred = cred_create(pcr);
1086 	uio = rump_uio_setup(linkname, *linklen, 0, RUMPUIO_READ);
1087 	RUMP_VOP_LOCK(vp, LK_EXCLUSIVE);
1088 	rv = RUMP_VOP_READLINK(vp, uio, cred);
1089 	RUMP_VOP_UNLOCK(vp, 0);
1090 	*linklen -= rump_uio_free(uio);
1091 	cred_destroy(cred);
1092 
1093 	return rv;
1094 }
1095 
1096 /*ARGSUSED*/
1097 int
1098 p2k_node_read(struct puffs_usermount *pu, puffs_cookie_t opc,
1099 	uint8_t *buf, off_t offset, size_t *resid,
1100 	const struct puffs_cred *pcr, int ioflag)
1101 {
1102 	struct vnode *vp = OPC2VP(opc);
1103 	kauth_cred_t cred;
1104 	struct uio *uio;
1105 	int rv;
1106 
1107 	cred = cred_create(pcr);
1108 	uio = rump_uio_setup(buf, *resid, offset, RUMPUIO_READ);
1109 	RUMP_VOP_LOCK(vp, LK_SHARED);
1110 	rv = RUMP_VOP_READ(vp, uio, ioflag, cred);
1111 	RUMP_VOP_UNLOCK(vp, 0);
1112 	*resid = rump_uio_free(uio);
1113 	cred_destroy(cred);
1114 
1115 	return rv;
1116 }
1117 
1118 /*ARGSUSED*/
1119 int
1120 p2k_node_write(struct puffs_usermount *pu, puffs_cookie_t opc,
1121 	uint8_t *buf, off_t offset, size_t *resid,
1122 	const struct puffs_cred *pcr, int ioflag)
1123 {
1124 	struct vnode *vp = OPC2VP(opc);
1125 	kauth_cred_t cred;
1126 	struct uio *uio;
1127 	int rv;
1128 
1129 	/* "deadfs" */
1130 	if (!vp)
1131 		return 0;
1132 
1133 	cred = cred_create(pcr);
1134 	uio = rump_uio_setup(buf, *resid, offset, RUMPUIO_WRITE);
1135 	RUMP_VOP_LOCK(vp, LK_EXCLUSIVE);
1136 	rv = RUMP_VOP_WRITE(vp, uio, ioflag, cred);
1137 	RUMP_VOP_UNLOCK(vp, 0);
1138 	*resid = rump_uio_free(uio);
1139 	cred_destroy(cred);
1140 
1141 	return rv;
1142 }
1143 
1144 /* the kernel releases its last reference here */
1145 int
1146 p2k_node_inactive(struct puffs_usermount *pu, puffs_cookie_t opc)
1147 {
1148 	struct p2k_node *p2n = opc;
1149 	struct vnode *vp = OPC2VP(opc);
1150 	bool recycle;
1151 	int rv;
1152 
1153 	/* deadfs */
1154 	if (!vp)
1155 		return 0;
1156 
1157 	/*
1158 	 * Flush all cached vnode pages from the rump kernel -- they
1159 	 * are kept in puffs for all things that matter.
1160 	 */
1161 	rump_vp_interlock(vp);
1162 	(void) RUMP_VOP_PUTPAGES(vp, 0, 0, PGO_ALLPAGES|PGO_CLEANIT|PGO_FREE);
1163 
1164 	/*
1165 	 * Ok, this is where we get nasty.  We pretend the vnode is
1166 	 * inactive and already tell the file system that.  However,
1167 	 * we are allowed to pretend it also grows a reference immediately
1168 	 * after per vget(), so this does not do harm.  Cheap trick, but ...
1169 	 *
1170 	 * If the file system thinks the inode is done for, we release
1171 	 * our reference and clear all knowledge of the vnode.  If,
1172 	 * however, the inode is still active, we retain our reference
1173 	 * until reclaim, since puffs might be flushing out some data
1174 	 * later.
1175 	 */
1176 	RUMP_VOP_LOCK(vp, LK_EXCLUSIVE);
1177 	rv = RUMP_VOP_INACTIVE(vp, &recycle);
1178 	if (recycle) {
1179 		puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N1);
1180 		rump_vp_rele(p2n->p2n_vp);
1181 		p2n->p2n_vp = NULL;
1182 	}
1183 
1184 	return rv;
1185 }
1186 
1187 /*ARGSUSED*/
1188 int
1189 p2k_node_reclaim(struct puffs_usermount *pu, puffs_croissant_t opc)
1190 {
1191 	struct p2k_node *p2n = opc;
1192 
1193 	if (p2n->p2n_vp) {
1194 		rump_vp_rele(p2n->p2n_vp);
1195 		p2n->p2n_vp = NULL;
1196 	}
1197 
1198 	freep2n(p2n);
1199 	return 0;
1200 }
1201