xref: /netbsd-src/lib/libperfuse/ops.c (revision 0953dc8744b62dfdecb2f203329e730593755659)
1 /*  $NetBSD: ops.c,v 1.53 2012/04/08 15:13:06 manu Exp $ */
2 
3 /*-
4  *  Copyright (c) 2010-2011 Emmanuel Dreyfus. All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *     notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *  THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
16  *  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
17  *  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18  *  PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
19  *  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25  *  POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <stdio.h>
29 #include <unistd.h>
30 #include <stdlib.h>
31 #include <libgen.h>
32 #include <errno.h>
33 #include <err.h>
34 #include <sysexits.h>
35 #include <syslog.h>
36 #include <puffs.h>
37 #include <sys/socket.h>
38 #include <sys/socket.h>
39 #include <sys/extattr.h>
40 #include <sys/time.h>
41 #include <machine/vmparam.h>
42 
43 #include "perfuse_priv.h"
44 #include "fuse.h"
45 
46 extern int perfuse_diagflags;
47 
48 #if 0
49 static void print_node(const char *, puffs_cookie_t);
50 #endif
51 static void set_expire(puffs_cookie_t, struct fuse_entry_out *,
52     struct fuse_attr_out *);
53 #ifndef PUFFS_KFLAG_CACHE_FS_TTL
54 static int attr_expired(puffs_cookie_t);
55 static int entry_expired(puffs_cookie_t);
56 #endif /* PUFFS_KFLAG_CACHE_FS_TTL */
57 static int xchg_msg(struct puffs_usermount *, puffs_cookie_t,
58     perfuse_msg_t *, size_t, enum perfuse_xchg_pb_reply);
59 static int mode_access(puffs_cookie_t, const struct puffs_cred *, mode_t);
60 static int sticky_access(struct puffs_node *, const struct puffs_cred *);
61 static void fuse_attr_to_vap(struct perfuse_state *,
62     struct vattr *, struct fuse_attr *);
63 static int node_lookup_dir_nodot(struct puffs_usermount *,
64     puffs_cookie_t, char *, size_t, struct puffs_node **);
65 static int node_lookup_common(struct puffs_usermount *, puffs_cookie_t,
66     const char *, const struct puffs_cred *, struct puffs_node **);
67 static int node_mk_common(struct puffs_usermount *, puffs_cookie_t,
68     struct puffs_newinfo *, const struct puffs_cn *pcn, perfuse_msg_t *);
69 static int node_mk_common_final(struct puffs_usermount *, puffs_cookie_t,
70     struct puffs_node *, const struct puffs_cn *pcn);
71 static uint64_t readdir_last_cookie(struct fuse_dirent *, size_t);
72 static ssize_t fuse_to_dirent(struct puffs_usermount *, puffs_cookie_t,
73     struct fuse_dirent *, size_t);
74 static int readdir_buffered(puffs_cookie_t, struct dirent *, off_t *,
75     size_t *);
76 static void requeue_request(struct puffs_usermount *,
77     puffs_cookie_t opc, enum perfuse_qtype);
78 static int dequeue_requests(struct perfuse_state *,
79     puffs_cookie_t opc, enum perfuse_qtype, int);
80 #define DEQUEUE_ALL 0
81 
82 /*
83  *  From <sys/vnode>, inside #ifdef _KERNEL section
84  */
85 #define IO_SYNC		(0x40|IO_DSYNC)
86 #define IO_DSYNC	0x00200
87 #define IO_DIRECT	0x02000
88 
89 /*
90  *  From <fcntl>, inside #ifdef _KERNEL section
91  */
92 #define F_WAIT		0x010
93 #define F_FLOCK		0x020
94 #define OFLAGS(fflags)  ((fflags) - 1)
95 
96 /*
97  * Borrowed from src/sys/kern/vfs_subr.c and src/sys/sys/vnode.h
98  */
99 const enum vtype iftovt_tab[16] = {
100 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
101         VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
102 };
103 const int vttoif_tab[9] = {
104 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
105         S_IFSOCK, S_IFIFO, S_IFMT,
106 };
107 
108 #define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12])
109 #define VTTOIF(indx) (vttoif_tab[(int)(indx)])
110 
111 #if 0
112 static void
113 print_node(const char *func, puffs_cookie_t opc)
114 {
115 	struct puffs_node *pn;
116 	struct perfuse_node_data *pnd;
117 	struct vattr *vap;
118 
119 	pn = (struct puffs_node *)opc;
120 	pnd = PERFUSE_NODE_DATA(opc);
121 	vap = &pn->pn_va;
122 
123 	printf("%s: \"%s\", opc = %p, nodeid = 0x%"PRIx64" ino = %"PRIu64"\n",
124 	       func, pnd->pnd_name, opc, pnd->pnd_nodeid, vap->va_fileid);
125 
126 	return;
127 }
128 #endif /* PERFUSE_DEBUG */
129 
130 int
131 perfuse_node_close_common(struct puffs_usermount *pu, puffs_cookie_t opc,
132 	int mode)
133 {
134 	struct perfuse_state *ps;
135 	perfuse_msg_t *pm;
136 	int op;
137 	uint64_t fh;
138 	struct fuse_release_in *fri;
139 	struct perfuse_node_data *pnd;
140 	struct puffs_node *pn;
141 	int error;
142 
143 	ps = puffs_getspecific(pu);
144 	pn = (struct puffs_node *)opc;
145 	pnd = PERFUSE_NODE_DATA(pn);
146 
147 	if (puffs_pn_getvap(pn)->va_type == VDIR) {
148 		op = FUSE_RELEASEDIR;
149 		mode = FREAD;
150 	} else {
151 		op = FUSE_RELEASE;
152 	}
153 
154 	/*
155 	 * Destroy the filehandle before sending the
156 	 * request to the FUSE filesystem, otherwise
157 	 * we may get a second close() while we wait
158 	 * for the reply, and we would end up closing
159 	 * the same fh twice instead of closng both.
160 	 */
161 	fh = perfuse_get_fh(opc, mode);
162 	perfuse_destroy_fh(pn, fh);
163 
164 	/*
165 	 * release_flags may be set to FUSE_RELEASE_FLUSH
166 	 * to flush locks. lock_owner must be set in that case
167 	 *
168 	 * ps_new_msg() is called with NULL creds, which will
169 	 * be interpreted as FUSE superuser. We come here from the
170 	 * inactive method, which provides no creds, but obviously
171 	 * runs with kernel privilege.
172 	 */
173 	pm = ps->ps_new_msg(pu, opc, op, sizeof(*fri), NULL);
174 	fri = GET_INPAYLOAD(ps, pm, fuse_release_in);
175 	fri->fh = fh;
176 	fri->flags = 0;
177 	fri->release_flags = 0;
178 	fri->lock_owner = pnd->pnd_lock_owner;
179 	fri->flags = (fri->lock_owner != 0) ? FUSE_RELEASE_FLUSH : 0;
180 
181 #ifdef PERFUSE_DEBUG
182 	if (perfuse_diagflags & PDF_FH)
183 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", fh = 0x%"PRIx64"\n",
184 			 __func__, (void *)opc, pnd->pnd_nodeid, fri->fh);
185 #endif
186 
187 	if ((error = xchg_msg(pu, opc, pm,
188 			      NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0)
189 		DERRX(EX_SOFTWARE, "%s: freed fh = 0x%"PRIx64" but filesystem "
190 		      "returned error = %d", __func__, fh, error);
191 
192 	ps->ps_destroy_msg(pm);
193 
194 	return 0;
195 }
196 
197 static int
198 xchg_msg(struct puffs_usermount *pu, puffs_cookie_t opc, perfuse_msg_t *pm,
199 	size_t len, enum perfuse_xchg_pb_reply wait)
200 {
201 	struct perfuse_state *ps;
202 	struct perfuse_node_data *pnd;
203 	struct perfuse_trace *pt = NULL;
204 	int error;
205 
206 	ps = puffs_getspecific(pu);
207 	pnd = NULL;
208 	if ((struct puffs_node *)opc != NULL)
209 		pnd = PERFUSE_NODE_DATA(opc);
210 
211 #ifdef PERFUSE_DEBUG
212 	if ((perfuse_diagflags & PDF_FILENAME) && (opc != 0))
213 		DPRINTF("file = \"%s\", ino = %"PRIu64" flags = 0x%x\n",
214 			perfuse_node_path(opc),
215 			((struct puffs_node *)opc)->pn_va.va_fileid,
216 			PERFUSE_NODE_DATA(opc)->pnd_flags);
217 #endif
218 	if (pnd)
219 		pnd->pnd_flags |= PND_INXCHG;
220 
221 	/*
222 	 * Record FUSE call start if requested
223 	 */
224 	if (perfuse_diagflags & PDF_TRACE)
225 		pt = perfuse_trace_begin(ps, opc, pm);
226 
227 	/*
228 	 * Do actual FUSE exchange
229 	 */
230 	if ((error = ps->ps_xchg_msg(pu, pm, len, wait)) != 0)
231 		ps->ps_destroy_msg(pm);
232 
233 	/*
234 	 * Record FUSE call end if requested
235 	 */
236 	if (pt != NULL)
237 		perfuse_trace_end(ps, pt, error);
238 
239 	if (pnd) {
240 		pnd->pnd_flags &= ~PND_INXCHG;
241 		(void)dequeue_requests(ps, opc, PCQ_AFTERXCHG, DEQUEUE_ALL);
242 	}
243 
244 	return error;
245 }
246 
247 static int
248 mode_access(puffs_cookie_t opc, const struct puffs_cred *pcr, mode_t mode)
249 {
250 	struct puffs_node *pn;
251 	struct vattr *va;
252 
253 	/*
254 	 * pcr is NULL for self open through fsync or readdir.
255 	 * In both case, access control is useless, as it was
256 	 * done before, at open time.
257 	 */
258 	if (pcr == NULL)
259 		return 0;
260 
261 	pn = (struct puffs_node *)opc;
262 	va = puffs_pn_getvap(pn);
263 	return puffs_access(va->va_type, va->va_mode,
264 			    va->va_uid, va->va_gid,
265 			    mode, pcr);
266 }
267 
268 static int
269 sticky_access(struct puffs_node *targ, const struct puffs_cred *pcr)
270 {
271 	uid_t uid;
272 	struct puffs_node *tdir;
273 	int sticky, owner;
274 
275 	tdir = PERFUSE_NODE_DATA(targ)->pnd_parent;
276 
277 	/*
278 	 * This covers the case where the kernel requests a DELETE
279 	 * or RENAME on its own, and where puffs_cred_getuid would
280 	 * return -1. While such a situation should not happen,
281 	 * we allow it here.
282 	 *
283 	 * This also allows root to tamper with other users' files
284 	 * that have the sticky bit.
285 	 */
286 	if (puffs_cred_isjuggernaut(pcr))
287 		return 0;
288 
289 	if (puffs_cred_getuid(pcr, &uid) != 0)
290 		DERRX(EX_SOFTWARE, "puffs_cred_getuid fails in %s", __func__);
291 
292 	sticky = puffs_pn_getvap(tdir)->va_mode & S_ISTXT;
293 	owner = puffs_pn_getvap(targ)->va_uid == uid;
294 
295 	if (sticky && !owner)
296 		return EACCES;
297 
298 	return 0;
299 }
300 
301 
302 static void
303 fuse_attr_to_vap(struct perfuse_state *ps, struct vattr *vap,
304 	struct fuse_attr *fa)
305 {
306 	vap->va_type = IFTOVT(fa->mode);
307 	vap->va_mode = fa->mode & ALLPERMS;
308 	vap->va_nlink = fa->nlink;
309 	vap->va_uid = fa->uid;
310 	vap->va_gid = fa->gid;
311 	vap->va_fsid = (long)ps->ps_fsid;
312 	vap->va_fileid = fa->ino;
313 	vap->va_size = fa->size;
314 	vap->va_blocksize = fa->blksize;
315 	vap->va_atime.tv_sec = (time_t)fa->atime;
316 	vap->va_atime.tv_nsec = (long) fa->atimensec;
317 	vap->va_mtime.tv_sec = (time_t)fa->mtime;
318 	vap->va_mtime.tv_nsec = (long)fa->mtimensec;
319 	vap->va_ctime.tv_sec = (time_t)fa->ctime;
320 	vap->va_ctime.tv_nsec = (long)fa->ctimensec;
321 	vap->va_birthtime.tv_sec = 0;
322 	vap->va_birthtime.tv_nsec = 0;
323 	vap->va_gen = 0;
324 	vap->va_flags = 0;
325 	vap->va_rdev = fa->rdev;
326 	vap->va_bytes = fa->size;
327 	vap->va_filerev = (u_quad_t)PUFFS_VNOVAL;
328 	vap->va_vaflags = 0;
329 
330 	if (vap->va_blocksize == 0)
331 		vap->va_blocksize = DEV_BSIZE;
332 
333 	if (vap->va_size == (size_t)PUFFS_VNOVAL) /* XXX */
334 		vap->va_size = 0;
335 
336 	return;
337 }
338 
339 static void
340 set_expire(puffs_cookie_t opc, struct fuse_entry_out *feo,
341 	   struct fuse_attr_out *fao)
342 {
343  	struct puffs_node *pn = (struct puffs_node *)opc;
344 #ifndef PUFFS_KFLAG_CACHE_FS_TTL
345 	struct perfuse_node_data *pnd = PERFUSE_NODE_DATA(opc);
346 	struct timespec entry_ts;
347 	struct timespec attr_ts;
348 	struct timespec now;
349 
350 	if (clock_gettime(CLOCK_REALTIME, &now) != 0)
351 		DERR(EX_OSERR, "clock_gettime failed");
352 #endif /* PUFFS_KFLAG_CACHE_FS_TTL */
353 
354 	if ((feo == NULL) && (fao == NULL))
355 		DERRX(EX_SOFTWARE, "%s: feo and fao NULL", __func__);
356 
357 	if ((feo != NULL) && (fao != NULL))
358 		DERRX(EX_SOFTWARE, "%s: feo and fao != NULL", __func__);
359 
360 	if (feo != NULL) {
361 #ifdef PUFFS_KFLAG_CACHE_FS_TTL
362 		pn->pn_cn_ttl.tv_sec = feo->entry_valid;
363 		pn->pn_cn_ttl.tv_nsec = feo->entry_valid_nsec;
364 		pn->pn_va_ttl.tv_sec = feo->attr_valid;
365 		pn->pn_va_ttl.tv_nsec = feo->attr_valid_nsec;
366 #else /* PUFFS_KFLAG_CACHE_FS_TTL */
367 		entry_ts.tv_sec = (time_t)feo->entry_valid;
368 		entry_ts.tv_nsec = (long)feo->entry_valid_nsec;
369 
370 		timespecadd(&now, &entry_ts, &pnd->pnd_entry_expire);
371 
372 		attr_ts.tv_sec = (time_t)feo->attr_valid;
373 		attr_ts.tv_nsec = (long)feo->attr_valid_nsec;
374 
375 		timespecadd(&now, &attr_ts, &pnd->pnd_attr_expire);
376 #endif /* PUFFS_KFLAG_CACHE_FS_TTL */
377 	}
378 
379 	if (fao != NULL) {
380 #ifdef PUFFS_KFLAG_CACHE_FS_TTL
381 		pn->pn_va_ttl.tv_sec = fao->attr_valid;
382 		pn->pn_va_ttl.tv_nsec = fao->attr_valid_nsec;
383 #else /* PUFFS_KFLAG_CACHE_FS_TTL */
384 		attr_ts.tv_sec = (time_t)fao->attr_valid;
385 		attr_ts.tv_nsec = (long)fao->attr_valid_nsec;
386 
387 		timespecadd(&now, &attr_ts, &pnd->pnd_attr_expire);
388 #endif /* PUFFS_KFLAG_CACHE_FS_TTL */
389 	}
390 
391 	return;
392 }
393 
394 #ifndef PUFFS_KFLAG_CACHE_FS_TTL
395 static int
396 attr_expired(puffs_cookie_t opc)
397 {
398 	struct perfuse_node_data *pnd;
399 	struct timespec expire;
400 	struct timespec now;
401 
402 	pnd = PERFUSE_NODE_DATA(opc);
403 	expire = pnd->pnd_attr_expire;
404 
405 	if (clock_gettime(CLOCK_REALTIME, &now) != 0)
406 		DERR(EX_OSERR, "clock_gettime failed");
407 
408 	return timespeccmp(&expire, &now, <);
409 }
410 
411 static int
412 entry_expired(puffs_cookie_t opc)
413 {
414 	struct perfuse_node_data *pnd;
415 	struct timespec expire;
416 	struct timespec now;
417 
418 	pnd = PERFUSE_NODE_DATA(opc);
419 	expire = pnd->pnd_entry_expire;
420 
421 	if (clock_gettime(CLOCK_REALTIME, &now) != 0)
422 		DERR(EX_OSERR, "clock_gettime failed");
423 
424 	return timespeccmp(&expire, &now, <);
425 }
426 #endif /* PUFFS_KFLAG_CACHE_FS_TTL */
427 
428 
429 /*
430  * Lookup name in directory opc
431  * We take special care of name being . or ..
432  * These are returned by readdir and deserve tweaks.
433  */
434 static int
435 node_lookup_dir_nodot(struct puffs_usermount *pu, puffs_cookie_t opc,
436 	char *name, size_t namelen, struct puffs_node **pnp)
437 {
438 	/*
439 	 * "dot" is easy as we already know it
440 	 */
441 	if (strncmp(name, ".", namelen) == 0) {
442 		*pnp = (struct puffs_node *)opc;
443 		return 0;
444 	}
445 
446 	/*
447 	 * "dotdot" is also known
448 	 */
449 	if (strncmp(name, "..", namelen) == 0) {
450 		*pnp = PERFUSE_NODE_DATA(opc)->pnd_parent;
451 		return 0;
452 	}
453 
454 	return node_lookup_common(pu, opc, name, NULL, pnp);
455 }
456 
457 static int
458 node_lookup_common(struct puffs_usermount *pu, puffs_cookie_t opc,
459 	const char *path, const struct puffs_cred *pcr, struct puffs_node **pnp)
460 {
461 	struct perfuse_state *ps;
462 	struct perfuse_node_data *oldpnd;
463 	perfuse_msg_t *pm;
464 	struct fuse_entry_out *feo;
465 	struct puffs_node *pn;
466 	size_t len;
467 	int error;
468 
469 	/*
470 	 * Prevent further lookups if the parent was removed
471 	 */
472 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
473 		return ESTALE;
474 
475 	if (pnp == NULL)
476 		DERRX(EX_SOFTWARE, "pnp must be != NULL");
477 
478 	ps = puffs_getspecific(pu);
479 
480 #ifdef PERFUSE_DEBUG
481 	if (perfuse_diagflags & PDF_FILENAME)
482 		DPRINTF("%s: opc = %p, file = \"%s\" looking up \"%s\"\n",
483 			__func__, (void *)opc, perfuse_node_path(opc), path);
484 #endif
485 	/*
486 	 * Is the node already known?
487 	 */
488 	TAILQ_FOREACH(oldpnd, &PERFUSE_NODE_DATA(opc)->pnd_children, pnd_next) {
489 		if ((oldpnd->pnd_flags & PND_REMOVED) ||
490 		    (strcmp(oldpnd->pnd_name, path) != 0))
491 			continue;
492 
493 #ifdef PERFUSE_DEBUG
494 		if (perfuse_diagflags & PDF_FILENAME)
495 			DPRINTF("%s: opc = %p, file = \"%s\" found "
496 				"cookie = %p, nodeid = 0x%"PRIx64" "
497 				"for \"%s\"\n", __func__,
498 				(void *)opc, perfuse_node_path(opc),
499 				(void *)oldpnd->pnd_pn, oldpnd->pnd_nodeid,
500 				path);
501 #endif
502 		break;
503 	}
504 
505 #ifndef PUFFS_KFLAG_CACHE_FS_TTL
506 	/*
507 	 * Check for cached name
508 	 */
509 	if ((oldpnd != NULL) && !entry_expired(oldpnd->pnd_pn)) {
510 		oldpnd->pnd_puffs_nlookup++;
511 		*pnp = oldpnd->pnd_pn;
512 		return 0;
513 	}
514 #endif /* PUFFS_KFLAG_CACHE_FS_TTL */
515 
516 	len = strlen(path) + 1;
517 
518 	pm = ps->ps_new_msg(pu, opc, FUSE_LOOKUP, len, pcr);
519 	(void)strlcpy(_GET_INPAYLOAD(ps, pm, char *), path, len);
520 
521 	error = xchg_msg(pu, opc, pm, sizeof(*feo), wait_reply);
522 
523 	switch (error) {
524 	case 0:
525 		break;
526 	case ENOENT:
527 		if (oldpnd != NULL) {
528 			oldpnd->pnd_flags |= PND_REMOVED;
529 #ifdef PERFUSE_DEBUG
530 			if (perfuse_diagflags & PDF_FILENAME)
531 				DPRINTF("%s: opc = %p nodeid = 0x%"PRIx64" "
532 					"file = \"%s\" removed\n", __func__,
533 					oldpnd->pnd_pn, oldpnd->pnd_nodeid,
534 					oldpnd->pnd_name);
535 #endif
536 		}
537 		/* FALLTHROUGH */
538 	default:
539 		return error;
540 		/* NOTREACHED */
541 		break;
542 	}
543 
544 	feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
545 
546 	if (oldpnd != NULL) {
547 		if (oldpnd->pnd_nodeid == feo->nodeid) {
548 			oldpnd->pnd_fuse_nlookup++;
549 			oldpnd->pnd_puffs_nlookup++;
550 			*pnp = oldpnd->pnd_pn;
551 
552 			ps->ps_destroy_msg(pm);
553 			return 0;
554 		} else {
555 			oldpnd->pnd_flags |= PND_REMOVED;
556 #ifdef PERFUSE_DEBUG
557 			if (perfuse_diagflags & PDF_FILENAME)
558 				DPRINTF("%s: opc = %p nodeid = 0x%"PRIx64" "
559 					"file = \"%s\" replaced\n", __func__,
560 					oldpnd->pnd_pn, oldpnd->pnd_nodeid,
561 					oldpnd->pnd_name);
562 #endif
563 		}
564 	}
565 
566 	pn = perfuse_new_pn(pu, path, opc);
567 	PERFUSE_NODE_DATA(pn)->pnd_nodeid = feo->nodeid;
568 
569 	fuse_attr_to_vap(ps, &pn->pn_va, &feo->attr);
570 	pn->pn_va.va_gen = (u_long)(feo->generation);
571 	set_expire((puffs_cookie_t)pn, feo, NULL);
572 
573 	*pnp = pn;
574 
575 #ifdef PERFUSE_DEBUG
576 	if (perfuse_diagflags & PDF_FILENAME)
577 		DPRINTF("%s: opc = %p, looked up opc = %p, "
578 			"nodeid = 0x%"PRIx64" file = \"%s\"\n", __func__,
579 			(void *)opc, pn, feo->nodeid, path);
580 #endif
581 
582 	ps->ps_destroy_msg(pm);
583 
584 	return 0;
585 }
586 
587 
588 /*
589  * Common code for methods that create objects:
590  * perfuse_node_mkdir
591  * perfuse_node_mknod
592  * perfuse_node_symlink
593  */
594 static int
595 node_mk_common(struct puffs_usermount *pu, puffs_cookie_t opc,
596 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
597 	perfuse_msg_t *pm)
598 {
599 	struct perfuse_state *ps;
600 	struct puffs_node *pn;
601 	struct fuse_entry_out *feo;
602 	int error;
603 
604 	ps =  puffs_getspecific(pu);
605 
606 	if ((error = xchg_msg(pu, opc, pm, sizeof(*feo), wait_reply)) != 0)
607 		return error;
608 
609 	feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
610 	if (feo->nodeid == PERFUSE_UNKNOWN_NODEID)
611 		DERRX(EX_SOFTWARE, "%s: no nodeid", __func__);
612 
613 	pn = perfuse_new_pn(pu, pcn->pcn_name, opc);
614 	PERFUSE_NODE_DATA(pn)->pnd_nodeid = feo->nodeid;
615 
616 	fuse_attr_to_vap(ps, &pn->pn_va, &feo->attr);
617 	pn->pn_va.va_gen = (u_long)(feo->generation);
618 	set_expire((puffs_cookie_t)pn, feo, NULL);
619 
620 	puffs_newinfo_setcookie(pni, pn);
621 
622 #ifdef PERFUSE_DEBUG
623 	if (perfuse_diagflags & PDF_FILENAME)
624 		DPRINTF("%s: opc = %p, file = \"%s\", flags = 0x%x "
625 			"nodeid = 0x%"PRIx64"\n",
626 			__func__, (void *)pn, pcn->pcn_name,
627 			PERFUSE_NODE_DATA(pn)->pnd_flags, feo->nodeid);
628 #endif
629 	ps->ps_destroy_msg(pm);
630 
631 	return node_mk_common_final(pu, opc, pn, pcn);
632 }
633 
634 /*
635  * Common final code for methods that create objects:
636  * perfuse_node_mkdir via node_mk_common
637  * perfuse_node_mknod via node_mk_common
638  * perfuse_node_symlink via node_mk_common
639  * perfuse_node_create
640  */
641 static int
642 node_mk_common_final(struct puffs_usermount *pu, puffs_cookie_t opc,
643 	struct puffs_node *pn, const struct puffs_cn *pcn)
644 {
645 	struct perfuse_state *ps;
646 	perfuse_msg_t *pm;
647 	struct fuse_setattr_in *fsi;
648 	struct fuse_attr_out *fao;
649 	int error;
650 
651 	ps =  puffs_getspecific(pu);
652 
653 	/*
654 	 * Set owner and group. The kernel cannot create a file
655 	 * on its own (puffs_cred_getuid would return -1), right?
656 	 */
657 	if (puffs_cred_getuid(pcn->pcn_cred, &pn->pn_va.va_uid) != 0)
658 		DERRX(EX_SOFTWARE, "puffs_cred_getuid fails in %s", __func__);
659 	if (puffs_cred_getgid(pcn->pcn_cred, &pn->pn_va.va_gid) != 0)
660 		DERRX(EX_SOFTWARE, "puffs_cred_getgid fails in %s", __func__);
661 
662 	pm = ps->ps_new_msg(pu, (puffs_cookie_t)pn,
663 			    FUSE_SETATTR, sizeof(*fsi), pcn->pcn_cred);
664 	fsi = GET_INPAYLOAD(ps, pm, fuse_setattr_in);
665 	fsi->uid = pn->pn_va.va_uid;
666 	fsi->gid = pn->pn_va.va_gid;
667 	fsi->valid = FUSE_FATTR_UID|FUSE_FATTR_GID;
668 
669 	if ((error = xchg_msg(pu, (puffs_cookie_t)pn, pm,
670 			      sizeof(*fao), wait_reply)) != 0)
671 		return error;
672 
673 	fao = GET_OUTPAYLOAD(ps, pm, fuse_attr_out);
674 	fuse_attr_to_vap(ps, &pn->pn_va, &fao->attr);
675 	set_expire((puffs_cookie_t)pn, NULL, fao);
676 
677 	/*
678 	 * The parent directory needs a sync
679 	 */
680 	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
681 
682 	ps->ps_destroy_msg(pm);
683 
684 	return 0;
685 }
686 
687 static uint64_t
688 readdir_last_cookie(struct fuse_dirent *fd, size_t fd_len)
689 {
690 	size_t len;
691 	size_t seen = 0;
692 	char *ndp;
693 
694 	do {
695 		len = FUSE_DIRENT_ALIGN(sizeof(*fd) + fd->namelen);
696 		seen += len;
697 
698 		if (seen >= fd_len)
699 			break;
700 
701 		ndp = (char *)(void *)fd + (size_t)len;
702 		fd = (struct fuse_dirent *)(void *)ndp;
703 	} while (1 /* CONSTCOND */);
704 
705 	return fd->off;
706 }
707 
708 static ssize_t
709 fuse_to_dirent(struct puffs_usermount *pu, puffs_cookie_t opc,
710 	struct fuse_dirent *fd, size_t fd_len)
711 {
712 	struct dirent *dents;
713 	size_t dents_len;
714 	ssize_t written;
715 	uint64_t fd_offset;
716 	struct fuse_dirent *fd_base;
717 	size_t len;
718 
719 	fd_base = fd;
720 	fd_offset = 0;
721 	written = 0;
722 	dents = PERFUSE_NODE_DATA(opc)->pnd_dirent;
723 	dents_len = (size_t)PERFUSE_NODE_DATA(opc)->pnd_dirent_len;
724 
725 	do {
726 		char *ndp;
727 		size_t reclen;
728 
729 		reclen = _DIRENT_RECLEN(dents, fd->namelen);
730 
731 		/*
732 		 * Check we do not overflow the output buffer
733 		 * struct fuse_dirent is bigger than struct dirent,
734 		 * so we should always use fd_len and never reallocate
735 		 * later.
736 		 * If we have to reallocate,try to double the buffer
737 		 * each time so that we do not have to do it too often.
738 		 */
739 		if (written + reclen > dents_len) {
740 			if (dents_len == 0)
741 				dents_len = fd_len;
742 			else
743 				dents_len =
744 				   MAX(2 * dents_len, written + reclen);
745 
746 			dents = PERFUSE_NODE_DATA(opc)->pnd_dirent;
747 			if ((dents = realloc(dents, dents_len)) == NULL)
748 				DERR(EX_OSERR, "%s: malloc failed", __func__);
749 
750 			PERFUSE_NODE_DATA(opc)->pnd_dirent = dents;
751 			PERFUSE_NODE_DATA(opc)->pnd_dirent_len = dents_len;
752 
753 			/*
754 			 * (void *) for delint
755 			 */
756 			ndp = (char *)(void *)dents + written;
757 			dents = (struct dirent *)(void *)ndp;
758 		}
759 
760 		/*
761 		 * Filesystem was mounted without -o use_ino
762 		 * Perform a lookup to find it.
763 		 */
764 		if (fd->ino == PERFUSE_UNKNOWN_INO) {
765 			struct puffs_node *pn;
766 
767 			if (node_lookup_dir_nodot(pu, opc, fd->name,
768 						  fd->namelen, &pn) != 0) {
769 				DWARNX("node_lookup_dir_nodot failed");
770 			} else {
771 				fd->ino = pn->pn_va.va_fileid;
772 			}
773 		}
774 
775 		dents->d_fileno = fd->ino;
776 		dents->d_reclen = (unsigned short)reclen;
777 		dents->d_namlen = fd->namelen;
778 		dents->d_type = fd->type;
779 		strlcpy(dents->d_name, fd->name, fd->namelen + 1);
780 
781 #ifdef PERFUSE_DEBUG
782 		if (perfuse_diagflags & PDF_READDIR)
783 			DPRINTF("%s: translated \"%s\" ino = %"PRIu64"\n",
784 				__func__, dents->d_name, dents->d_fileno);
785 #endif
786 
787 		dents = _DIRENT_NEXT(dents);
788 		written += reclen;
789 
790 		/*
791 		 * Move to the next record.
792 		 * fd->off is not the offset, it is an opaque cookie
793 		 * given by the filesystem to keep state across multiple
794 		 * readdir() operation.
795 		 * Use record alignement instead.
796 		 */
797 		len = FUSE_DIRENT_ALIGN(sizeof(*fd) + fd->namelen);
798 #ifdef PERFUSE_DEBUG
799 		if (perfuse_diagflags & PDF_READDIR)
800 			DPRINTF("%s: record at %"PRId64"/0x%"PRIx64" "
801 				"length = %zd/0x%zx. "
802 				"next record at %"PRId64"/0x%"PRIx64" "
803 				"max %zd/0x%zx\n",
804 				__func__, fd_offset, fd_offset, len, len,
805 				fd_offset + len, fd_offset + len,
806 				fd_len, fd_len);
807 #endif
808 		fd_offset += len;
809 
810 		/*
811 		 * Check if next record is still within the packet
812 		 * If it is not, we reached the end of the buffer.
813 		 */
814 		if (fd_offset >= fd_len)
815 			break;
816 
817 		/*
818 		 * (void *) for delint
819 		 */
820 		ndp = (char *)(void *)fd_base + (size_t)fd_offset;
821 		fd = (struct fuse_dirent *)(void *)ndp;
822 
823 	} while (1 /* CONSTCOND */);
824 
825 	/*
826 	 * Adjust the dirent output length
827 	 */
828 	if (written != -1)
829 		PERFUSE_NODE_DATA(opc)->pnd_dirent_len = written;
830 
831 	return written;
832 }
833 
834 static int
835 readdir_buffered(puffs_cookie_t opc, struct dirent *dent, off_t *readoff,
836 	size_t *reslen)
837 {
838 	struct dirent *fromdent;
839 	struct perfuse_node_data *pnd;
840 	char *ndp;
841 
842 	pnd = PERFUSE_NODE_DATA(opc);
843 
844 	while (*readoff < pnd->pnd_dirent_len) {
845 		/*
846 		 * (void *) for delint
847 		 */
848 		ndp = (char *)(void *)pnd->pnd_dirent + (size_t)*readoff;
849 		fromdent = (struct dirent *)(void *)ndp;
850 
851 		if (*reslen < _DIRENT_SIZE(fromdent))
852 			break;
853 
854 		memcpy(dent, fromdent, _DIRENT_SIZE(fromdent));
855 		*readoff += _DIRENT_SIZE(fromdent);
856 		*reslen -= _DIRENT_SIZE(fromdent);
857 
858 		dent = _DIRENT_NEXT(dent);
859 	}
860 
861 #ifdef PERFUSE_DEBUG
862 	if (perfuse_diagflags & PDF_READDIR)
863 		DPRINTF("%s: readoff = %"PRId64",  "
864 			"pnd->pnd_dirent_len = %"PRId64"\n",
865 			__func__, *readoff, pnd->pnd_dirent_len);
866 #endif
867 	if (*readoff >=  pnd->pnd_dirent_len) {
868 		free(pnd->pnd_dirent);
869 		pnd->pnd_dirent = NULL;
870 		pnd->pnd_dirent_len = 0;
871 	}
872 
873 	return 0;
874 }
875 
876 static void
877 requeue_request(struct puffs_usermount *pu, puffs_cookie_t opc,
878 	enum perfuse_qtype type)
879 {
880 	struct perfuse_cc_queue pcq;
881 	struct perfuse_node_data *pnd;
882 #ifdef PERFUSE_DEBUG
883 	struct perfuse_state *ps;
884 
885 	ps = perfuse_getspecific(pu);
886 #endif
887 
888 	pnd = PERFUSE_NODE_DATA(opc);
889 	pcq.pcq_type = type;
890 	pcq.pcq_cc = puffs_cc_getcc(pu);
891 	TAILQ_INSERT_TAIL(&pnd->pnd_pcq, &pcq, pcq_next);
892 
893 #ifdef PERFUSE_DEBUG
894 	if (perfuse_diagflags & PDF_REQUEUE)
895 		DPRINTF("%s: REQUEUE opc = %p, pcc = %p (%s)\n",
896 		        __func__, (void *)opc, pcq.pcq_cc,
897 			perfuse_qtypestr[type]);
898 #endif
899 
900 	puffs_cc_yield(pcq.pcq_cc);
901 	TAILQ_REMOVE(&pnd->pnd_pcq, &pcq, pcq_next);
902 
903 #ifdef PERFUSE_DEBUG
904 	if (perfuse_diagflags & PDF_REQUEUE)
905 		DPRINTF("%s: RESUME opc = %p, pcc = %p (%s)\n",
906 		        __func__, (void *)opc, pcq.pcq_cc,
907 			perfuse_qtypestr[type]);
908 #endif
909 
910 	return;
911 }
912 
913 /* ARGSUSED0 */
914 static int
915 dequeue_requests(struct perfuse_state *ps, puffs_cookie_t opc,
916 	enum perfuse_qtype type, int max)
917 {
918 	struct perfuse_cc_queue *pcq;
919 	struct perfuse_node_data *pnd;
920 	int dequeued;
921 
922 	pnd = PERFUSE_NODE_DATA(opc);
923 	dequeued = 0;
924 	TAILQ_FOREACH(pcq, &pnd->pnd_pcq, pcq_next) {
925 		if (pcq->pcq_type != type)
926 			continue;
927 
928 #ifdef PERFUSE_DEBUG
929 		if (perfuse_diagflags & PDF_REQUEUE)
930 			DPRINTF("%s: SCHEDULE opc = %p, pcc = %p (%s)\n",
931 				__func__, (void *)opc, pcq->pcq_cc,
932 				 perfuse_qtypestr[type]);
933 #endif
934 		puffs_cc_schedule(pcq->pcq_cc);
935 
936 		if (++dequeued == max)
937 			break;
938 	}
939 
940 #ifdef PERFUSE_DEBUG
941 	if (perfuse_diagflags & PDF_REQUEUE)
942 		DPRINTF("%s: DONE  opc = %p\n", __func__, (void *)opc);
943 #endif
944 
945 	return dequeued;
946 }
947 
948 void
949 perfuse_fs_init(struct puffs_usermount *pu)
950 {
951 	struct perfuse_state *ps;
952 	perfuse_msg_t *pm;
953 	struct fuse_init_in *fii;
954 	struct fuse_init_out *fio;
955 	int error;
956 
957 	ps = puffs_getspecific(pu);
958 
959         if (puffs_mount(pu, ps->ps_target, ps->ps_mountflags, ps->ps_root) != 0)
960                 DERR(EX_OSERR, "%s: puffs_mount failed", __func__);
961 
962 	/*
963 	 * Linux 2.6.34.1 sends theses flags:
964 	 * FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC
965 	 * FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK
966 	 *
967 	 * Linux also sets max_readahead at 32 pages (128 kB)
968 	 *
969 	 * ps_new_msg() is called with NULL creds, which will
970 	 * be interpreted as FUSE superuser.
971 	 */
972 	pm = ps->ps_new_msg(pu, 0, FUSE_INIT, sizeof(*fii), NULL);
973 	fii = GET_INPAYLOAD(ps, pm, fuse_init_in);
974 	fii->major = FUSE_KERNEL_VERSION;
975 	fii->minor = FUSE_KERNEL_MINOR_VERSION;
976 	fii->max_readahead = (unsigned int)(32 * sysconf(_SC_PAGESIZE));
977 	fii->flags = (FUSE_ASYNC_READ|FUSE_POSIX_LOCKS|FUSE_ATOMIC_O_TRUNC);
978 
979 	if ((error = xchg_msg(pu, 0, pm, sizeof(*fio), wait_reply)) != 0)
980 		DERRX(EX_SOFTWARE, "init message exchange failed (%d)", error);
981 
982 	fio = GET_OUTPAYLOAD(ps, pm, fuse_init_out);
983 	ps->ps_max_readahead = fio->max_readahead;
984 	ps->ps_max_write = fio->max_write;
985 
986 	ps->ps_destroy_msg(pm);
987 
988 	return;
989 }
990 
991 int
992 perfuse_fs_unmount(struct puffs_usermount *pu, int flags)
993 {
994 	perfuse_msg_t *pm;
995 	struct perfuse_state *ps;
996 	puffs_cookie_t opc;
997 	int error;
998 
999 	ps = puffs_getspecific(pu);
1000 	opc = (puffs_cookie_t)puffs_getroot(pu);
1001 
1002 	/*
1003 	 * ps_new_msg() is called with NULL creds, which will
1004 	 * be interpreted as FUSE superuser.
1005 	 */
1006 	pm = ps->ps_new_msg(pu, opc, FUSE_DESTROY, 0, NULL);
1007 
1008 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0){
1009 		DWARN("unmount %s", ps->ps_target);
1010 		if (!(flags & MNT_FORCE))
1011 			return error;
1012 		else
1013 			error = 0;
1014 	} else {
1015 		ps->ps_destroy_msg(pm);
1016 	}
1017 
1018 	ps->ps_umount(pu);
1019 
1020 	if (perfuse_diagflags & PDF_MISC)
1021 		DPRINTF("%s unmounted, exit\n", ps->ps_target);
1022 
1023 	return 0;
1024 }
1025 
1026 int
1027 perfuse_fs_statvfs(struct puffs_usermount *pu, struct statvfs *svfsb)
1028 {
1029 	struct perfuse_state *ps;
1030 	perfuse_msg_t *pm;
1031 	puffs_cookie_t opc;
1032 	struct fuse_statfs_out *fso;
1033 	int error;
1034 
1035 	ps = puffs_getspecific(pu);
1036 	opc = (puffs_cookie_t)puffs_getroot(pu);
1037 
1038 	/*
1039 	 * ps_new_msg() is called with NULL creds, which will
1040 	 * be interpreted as FUSE superuser.
1041 	 */
1042 	pm = ps->ps_new_msg(pu, opc, FUSE_STATFS, 0, NULL);
1043 
1044 	if ((error = xchg_msg(pu, opc, pm, sizeof(*fso), wait_reply)) != 0)
1045 		return error;
1046 
1047 	fso = GET_OUTPAYLOAD(ps, pm, fuse_statfs_out);
1048 	svfsb->f_flag = ps->ps_mountflags;
1049 	svfsb->f_bsize = fso->st.bsize;
1050 	svfsb->f_frsize = fso->st.frsize;
1051 	svfsb->f_iosize = ((struct puffs_node *)opc)->pn_va.va_blocksize;
1052 	svfsb->f_blocks = fso->st.blocks;
1053 	svfsb->f_bfree = fso->st.bfree;
1054 	svfsb->f_bavail = fso->st.bavail;
1055 	svfsb->f_bresvd = fso->st.bfree - fso->st.bavail;
1056 	svfsb->f_files = fso->st.files;
1057 	svfsb->f_ffree = fso->st.ffree;
1058 	svfsb->f_favail = fso->st.ffree;/* files not reserved for root */
1059 	svfsb->f_fresvd = 0;		/* files reserved for root */
1060 
1061 	svfsb->f_syncreads = ps->ps_syncreads;
1062 	svfsb->f_syncwrites = ps->ps_syncwrites;
1063 
1064 	svfsb->f_asyncreads = ps->ps_asyncreads;
1065 	svfsb->f_asyncwrites = ps->ps_asyncwrites;
1066 
1067 	(void)memcpy(&svfsb->f_fsidx, &ps->ps_fsid, sizeof(ps->ps_fsid));
1068 	svfsb->f_fsid = (unsigned long)ps->ps_fsid;
1069 	svfsb->f_namemax = MAXPATHLEN;	/* XXX */
1070 	svfsb->f_owner = ps->ps_owner_uid;
1071 
1072 	(void)strlcpy(svfsb->f_mntonname, ps->ps_target, _VFS_NAMELEN);
1073 
1074 	if (ps->ps_filesystemtype != NULL)
1075 		(void)strlcpy(svfsb->f_fstypename,
1076 			      ps->ps_filesystemtype, _VFS_NAMELEN);
1077 	else
1078 		(void)strlcpy(svfsb->f_fstypename, "fuse", _VFS_NAMELEN);
1079 
1080 	if (ps->ps_source != NULL)
1081 		strlcpy(svfsb->f_mntfromname, ps->ps_source, _VFS_NAMELEN);
1082 	else
1083 		strlcpy(svfsb->f_mntfromname, _PATH_FUSE, _VFS_NAMELEN);
1084 
1085 	ps->ps_destroy_msg(pm);
1086 
1087 	return 0;
1088 }
1089 
1090 int
1091 perfuse_fs_sync(struct puffs_usermount *pu, int waitfor,
1092 	const struct puffs_cred *pcr)
1093 {
1094 	/*
1095 	 * FUSE does not seem to have a FS sync callback.
1096 	 * Maybe do not even register this callback
1097 	 */
1098 	return puffs_fsnop_sync(pu, waitfor, pcr);
1099 }
1100 
1101 /* ARGSUSED0 */
1102 int
1103 perfuse_fs_fhtonode(struct puffs_usermount *pu, void *fid, size_t fidsize,
1104 	struct puffs_newinfo *pni)
1105 {
1106 	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
1107 	return 0;
1108 }
1109 
1110 /* ARGSUSED0 */
1111 int
1112 perfuse_fs_nodetofh(struct puffs_usermount *pu, puffs_cookie_t cookie,
1113 	void *fid, size_t *fidsize)
1114 {
1115 	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
1116 	return 0;
1117 }
1118 
1119 #if 0
1120 /* ARGSUSED0 */
1121 void
1122 perfuse_fs_extattrctl(struct puffs_usermount *pu, int cmd,
1123 	puffs_cookie_t *cookie, int flags, int namespace, const char *attrname)
1124 {
1125 	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
1126 	return 0;
1127 }
1128 #endif /* 0 */
1129 
1130 /* ARGSUSED0 */
1131 void
1132 perfuse_fs_suspend(struct puffs_usermount *pu, int status)
1133 {
1134 	return;
1135 }
1136 
1137 
1138 
1139 int
1140 perfuse_node_lookup(struct puffs_usermount *pu, puffs_cookie_t opc,
1141 	struct puffs_newinfo *pni, const struct puffs_cn *pcn)
1142 {
1143 	struct puffs_node *pn;
1144 	mode_t mode;
1145 	int error;
1146 
1147 	/*
1148 	 * Check permissions
1149 	 */
1150 	switch(pcn->pcn_nameiop) {
1151 	case NAMEI_DELETE: /* FALLTHROUGH */
1152 	case NAMEI_RENAME: /* FALLTHROUGH */
1153 	case NAMEI_CREATE:
1154 		if (pcn->pcn_flags & NAMEI_ISLASTCN)
1155 			mode = PUFFS_VEXEC|PUFFS_VWRITE;
1156 		else
1157 			mode = PUFFS_VEXEC;
1158 		break;
1159 	case NAMEI_LOOKUP: /* FALLTHROUGH */
1160 	default:
1161 		mode = PUFFS_VEXEC;
1162 		break;
1163 	}
1164 
1165 	if ((error = mode_access(opc, pcn->pcn_cred, mode)) != 0)
1166 		return error;
1167 
1168 	/*
1169 	 * Special case for ..
1170 	 */
1171 	if (strcmp(pcn->pcn_name, "..") == 0)
1172 		pn = PERFUSE_NODE_DATA(opc)->pnd_parent;
1173 	else
1174 		error = node_lookup_common(pu, (puffs_cookie_t)opc,
1175 					   pcn->pcn_name, pcn->pcn_cred, &pn);
1176 	if (error != 0)
1177 		return error;
1178 
1179 	/*
1180 	 * Kernel would kill us if the filesystem returned the parent
1181 	 * itself. If we want to live, hide that!
1182 	 */
1183 	if ((opc == (puffs_cookie_t)pn) && (strcmp(pcn->pcn_name, ".") != 0)) {
1184 		DERRX(EX_SOFTWARE, "lookup \"%s\" in \"%s\" returned parent",
1185 		      pcn->pcn_name, perfuse_node_path(opc));
1186 		/* NOTREACHED */
1187 		return ESTALE;
1188 	}
1189 
1190 	/*
1191 	 * Removed node
1192 	 */
1193 	if (PERFUSE_NODE_DATA(pn)->pnd_flags & PND_REMOVED)
1194 		return ENOENT;
1195 
1196 	/*
1197 	 * Check for sticky bit. Unfortunately there is no way to
1198 	 * do this before creating the puffs_node, since we require
1199 	 * this operation to get the node owner.
1200 	 */
1201 	switch (pcn->pcn_nameiop) {
1202 	case NAMEI_DELETE: /* FALLTHROUGH */
1203 	case NAMEI_RENAME:
1204 		error = sticky_access(pn, pcn->pcn_cred);
1205 		if (error != 0) {
1206 			/*
1207 			 * kernel will never know about it and will
1208 			 * not reclaim it. The filesystem needs to
1209 			 * clean it up anyway, therefore mimick a forget.
1210 			 */
1211 			PERFUSE_NODE_DATA(pn)->pnd_flags |= PND_RECLAIMED;
1212 			(void)perfuse_node_reclaim(pu, (puffs_cookie_t)pn);
1213 			return error;
1214 		}
1215 		break;
1216 	default:
1217 		break;
1218 	}
1219 
1220 	/*
1221 	 * If that node had a pending reclaim, wipe it out.
1222 	 */
1223 	PERFUSE_NODE_DATA(pn)->pnd_flags &= ~PND_RECLAIMED;
1224 
1225 	puffs_newinfo_setcookie(pni, pn);
1226 	puffs_newinfo_setvtype(pni, pn->pn_va.va_type);
1227 	puffs_newinfo_setsize(pni, (voff_t)pn->pn_va.va_size);
1228 	puffs_newinfo_setrdev(pni, pn->pn_va.va_rdev);
1229 
1230 	return error;
1231 }
1232 
1233 int
1234 perfuse_node_create(struct puffs_usermount *pu, puffs_cookie_t opc,
1235 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
1236 	const struct vattr *vap)
1237 {
1238 	perfuse_msg_t *pm;
1239 	struct perfuse_state *ps;
1240 	struct fuse_create_in *fci;
1241 	struct fuse_entry_out *feo;
1242 	struct fuse_open_out *foo;
1243 	struct puffs_node *pn;
1244 	const char *name;
1245 	size_t namelen;
1246 	size_t len;
1247 	int error;
1248 
1249 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
1250 		return ENOENT;
1251 
1252 	/*
1253 	 * If create is unimplemented: Check that it does not
1254 	 * already exists, and if not, do mknod and open
1255 	 */
1256 	ps = puffs_getspecific(pu);
1257 	if (ps->ps_flags & PS_NO_CREAT) {
1258 		error = node_lookup_common(pu, opc, pcn->pcn_name,
1259 					   pcn->pcn_cred, &pn);
1260 		if (error == 0)
1261 			return EEXIST;
1262 
1263 		error = perfuse_node_mknod(pu, opc, pni, pcn, vap);
1264 		if (error != 0)
1265 			return error;
1266 
1267 		error = node_lookup_common(pu, opc, pcn->pcn_name,
1268 					   pcn->pcn_cred, &pn);
1269 		if (error != 0)
1270 			return error;
1271 
1272 		/*
1273 		 * FUSE does the open at create time, while
1274 		 * NetBSD will open in a subsequent operation.
1275 		 * We need to open now, in order to retain FUSE
1276 		 * semantics. The calling process will not get
1277 		 * a file descriptor before the kernel sends
1278 		 * the open operation.
1279 		 */
1280 		opc = (puffs_cookie_t)pn;
1281 		error = perfuse_node_open(pu, opc, FWRITE, pcn->pcn_cred);
1282 		if (error != 0)
1283 			return error;
1284 
1285 		return 0;
1286 	}
1287 
1288 	name = pcn->pcn_name;
1289 	namelen = pcn->pcn_namelen + 1;
1290 	len = sizeof(*fci) + namelen;
1291 
1292 	/*
1293 	 * flags should use O_WRONLY instead of O_RDWR, but it
1294 	 * breaks when the caller tries to read from file.
1295 	 *
1296 	 * mode must contain file type (ie: S_IFREG), use VTTOIF(vap->va_type)
1297 	 */
1298 	pm = ps->ps_new_msg(pu, opc, FUSE_CREATE, len, pcn->pcn_cred);
1299 	fci = GET_INPAYLOAD(ps, pm, fuse_create_in);
1300 	fci->flags = O_CREAT | O_TRUNC | O_RDWR;
1301 	fci->mode = vap->va_mode | VTTOIF(vap->va_type);
1302 	fci->umask = 0; 	/* Seems unused by libfuse */
1303 	(void)strlcpy((char*)(void *)(fci + 1), name, namelen);
1304 
1305 	len = sizeof(*feo) + sizeof(*foo);
1306 	if ((error = xchg_msg(pu, opc, pm, len, wait_reply)) != 0) {
1307 		/*
1308 		 * create is unimplmented, remember it for later,
1309 		 * and start over using mknod and open instead.
1310 		 */
1311 		if (error == ENOSYS) {
1312 			ps->ps_flags |= PS_NO_CREAT;
1313 			return perfuse_node_create(pu, opc, pni, pcn, vap);
1314 		}
1315 
1316 		return error;
1317 	}
1318 
1319 	feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
1320 	foo = (struct fuse_open_out *)(void *)(feo + 1);
1321 	if (feo->nodeid == PERFUSE_UNKNOWN_NODEID)
1322 		DERRX(EX_SOFTWARE, "%s: no nodeid", __func__);
1323 
1324 	/*
1325 	 * Save the file handle and inode in node private data
1326 	 * so that we can reuse it later
1327 	 */
1328 	pn = perfuse_new_pn(pu, name, opc);
1329 	perfuse_new_fh((puffs_cookie_t)pn, foo->fh, FWRITE);
1330 	PERFUSE_NODE_DATA(pn)->pnd_nodeid = feo->nodeid;
1331 
1332 	fuse_attr_to_vap(ps, &pn->pn_va, &feo->attr);
1333 	pn->pn_va.va_gen = (u_long)(feo->generation);
1334 	set_expire((puffs_cookie_t)pn, feo, NULL);
1335 
1336 	puffs_newinfo_setcookie(pni, pn);
1337 
1338 #ifdef PERFUSE_DEBUG
1339 	if (perfuse_diagflags & (PDF_FH|PDF_FILENAME))
1340 		DPRINTF("%s: opc = %p, file = \"%s\", flags = 0x%x "
1341 			"nodeid = 0x%"PRIx64", wfh = 0x%"PRIx64"\n",
1342 			__func__, (void *)pn, pcn->pcn_name,
1343 			PERFUSE_NODE_DATA(pn)->pnd_flags, feo->nodeid,
1344 			foo->fh);
1345 #endif
1346 
1347 	ps->ps_destroy_msg(pm);
1348 
1349 	return node_mk_common_final(pu, opc, pn, pcn);
1350 }
1351 
1352 
1353 int
1354 perfuse_node_mknod(struct puffs_usermount *pu, puffs_cookie_t opc,
1355 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
1356 	const struct vattr *vap)
1357 {
1358 	struct perfuse_state *ps;
1359 	perfuse_msg_t *pm;
1360 	struct fuse_mknod_in *fmi;
1361 	const char* path;
1362 	size_t len;
1363 
1364 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
1365 		return ENOENT;
1366 
1367 	/*
1368 	 * Only superuser can mknod objects other than
1369 	 * directories, files, socks, fifo and links.
1370 	 *
1371 	 * Create an object require -WX permission in the parent directory
1372 	 */
1373 	switch (vap->va_type) {
1374 	case VDIR:	/* FALLTHROUGH */
1375 	case VREG:	/* FALLTHROUGH */
1376 	case VFIFO:	/* FALLTHROUGH */
1377 	case VSOCK:
1378 		break;
1379 	default:	/* VNON, VBLK, VCHR, VBAD */
1380 		if (!puffs_cred_isjuggernaut(pcn->pcn_cred))
1381 			return EACCES;
1382 		break;
1383 	}
1384 
1385 
1386 	ps = puffs_getspecific(pu);
1387 	path = pcn->pcn_name;
1388 	len = sizeof(*fmi) + pcn->pcn_namelen + 1;
1389 
1390 	/*
1391 	 * mode must contain file type (ie: S_IFREG), use VTTOIF(vap->va_type)
1392 	 */
1393 	pm = ps->ps_new_msg(pu, opc, FUSE_MKNOD, len, pcn->pcn_cred);
1394 	fmi = GET_INPAYLOAD(ps, pm, fuse_mknod_in);
1395 	fmi->mode = vap->va_mode | VTTOIF(vap->va_type);
1396 	fmi->rdev = (uint32_t)vap->va_rdev;
1397 	fmi->umask = 0; 	/* Seems unused bu libfuse */
1398 	(void)strlcpy((char *)(void *)(fmi + 1), path, len - sizeof(*fmi));
1399 
1400 	return node_mk_common(pu, opc, pni, pcn, pm);
1401 }
1402 
1403 
1404 int
1405 perfuse_node_open(struct puffs_usermount *pu, puffs_cookie_t opc, int mode,
1406 	const struct puffs_cred *pcr)
1407 {
1408 	struct perfuse_state *ps;
1409 	struct perfuse_node_data *pnd;
1410 	perfuse_msg_t *pm;
1411 	mode_t fmode;
1412 	int op;
1413 	struct fuse_open_in *foi;
1414 	struct fuse_open_out *foo;
1415 	struct puffs_node *pn;
1416 	int error;
1417 
1418 	ps = puffs_getspecific(pu);
1419 	pn = (struct puffs_node *)opc;
1420 	pnd = PERFUSE_NODE_DATA(opc);
1421 	error = 0;
1422 
1423 	if (pnd->pnd_flags & PND_REMOVED)
1424 		return ENOENT;
1425 
1426 	if (puffs_pn_getvap(pn)->va_type == VDIR)
1427 		op = FUSE_OPENDIR;
1428 	else
1429 		op = FUSE_OPEN;
1430 
1431 	/*
1432 	 * libfuse docs says
1433 	 * - O_CREAT and O_EXCL should never be set.
1434 	 * - O_TRUNC may be used if mount option atomic_o_trunc is used XXX
1435 	 *
1436 	 * O_APPEND makes no sense since FUSE always sends
1437 	 * the file offset for write operations. If the
1438 	 * filesystem uses pwrite(), O_APPEND would cause
1439 	 * the offset to be ignored and cause file corruption.
1440 	 */
1441 	mode &= ~(O_CREAT|O_EXCL|O_APPEND);
1442 
1443 	/*
1444 	 * Do not open twice, and do not reopen for reading
1445 	 * if we already have write handle.
1446 	 */
1447 	if (((mode & FREAD) && (pnd->pnd_flags & PND_RFH)) ||
1448 	    ((mode & FREAD) && (pnd->pnd_flags & PND_WFH)) ||
1449 	    ((mode & FWRITE) && (pnd->pnd_flags & PND_WFH)))
1450 		goto out;
1451 
1452 	/*
1453 	 * Queue open on a node so that we do not open
1454 	 * twice. This would be better with read and
1455 	 * write distinguished.
1456 	 */
1457 	while (pnd->pnd_flags & PND_INOPEN)
1458 		requeue_request(pu, opc, PCQ_OPEN);
1459 	pnd->pnd_flags |= PND_INOPEN;
1460 
1461 	/*
1462 	 * Convert PUFFS mode to FUSE mode: convert FREAD/FWRITE
1463 	 * to O_RDONLY/O_WRONLY while perserving the other options.
1464 	 */
1465 	fmode = mode & ~(FREAD|FWRITE);
1466 	fmode |= (mode & FWRITE) ? O_RDWR : O_RDONLY;
1467 
1468 	pm = ps->ps_new_msg(pu, opc, op, sizeof(*foi), pcr);
1469 	foi = GET_INPAYLOAD(ps, pm, fuse_open_in);
1470 	foi->flags = fmode;
1471 	foi->unused = 0;
1472 
1473 	if ((error = xchg_msg(pu, opc, pm, sizeof(*foo), wait_reply)) != 0)
1474 		goto out;
1475 
1476 	foo = GET_OUTPAYLOAD(ps, pm, fuse_open_out);
1477 
1478 	/*
1479 	 * Save the file handle in node private data
1480 	 * so that we can reuse it later
1481 	 */
1482 	perfuse_new_fh(opc, foo->fh, mode);
1483 
1484 #ifdef PERFUSE_DEBUG
1485 	if (perfuse_diagflags & (PDF_FH|PDF_FILENAME))
1486 		DPRINTF("%s: opc = %p, file = \"%s\", "
1487 			"nodeid = 0x%"PRIx64", %s%sfh = 0x%"PRIx64"\n",
1488 			__func__, (void *)opc, perfuse_node_path(opc),
1489 			pnd->pnd_nodeid, mode & FREAD ? "r" : "",
1490 			mode & FWRITE ? "w" : "", foo->fh);
1491 #endif
1492 
1493 	ps->ps_destroy_msg(pm);
1494 out:
1495 
1496 	pnd->pnd_flags &= ~PND_INOPEN;
1497 	(void)dequeue_requests(ps, opc, PCQ_OPEN, DEQUEUE_ALL);
1498 
1499 	return error;
1500 }
1501 
1502 /* ARGSUSED0 */
1503 int
1504 perfuse_node_close(struct puffs_usermount *pu, puffs_cookie_t opc, int flags,
1505 	const struct puffs_cred *pcr)
1506 {
1507 	struct perfuse_node_data *pnd;
1508 
1509 	pnd = PERFUSE_NODE_DATA(opc);
1510 
1511 	if (!(pnd->pnd_flags & PND_OPEN))
1512 		return EBADF;
1513 
1514 	/*
1515 	 * Actual close is postponed at inactive time.
1516 	 */
1517 	return 0;
1518 }
1519 
1520 int
1521 perfuse_node_access(struct puffs_usermount *pu, puffs_cookie_t opc, int mode,
1522 	const struct puffs_cred *pcr)
1523 {
1524 	perfuse_msg_t *pm;
1525 	struct perfuse_state *ps;
1526 	struct fuse_access_in *fai;
1527 	int error;
1528 
1529 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
1530 		return ENOENT;
1531 
1532 	/*
1533 	 * If we previously detected the filesystem does not
1534 	 * implement access(), short-circuit the call and skip
1535 	 * to libpuffs access() emulation.
1536 	 */
1537 	ps = puffs_getspecific(pu);
1538 	if (ps->ps_flags & PS_NO_ACCESS) {
1539 		const struct vattr *vap;
1540 
1541 		vap = puffs_pn_getvap((struct puffs_node *)opc);
1542 
1543 		error = puffs_access(IFTOVT(vap->va_mode),
1544 				     vap->va_mode & ACCESSPERMS,
1545 				     vap->va_uid, vap->va_gid,
1546 				     (mode_t)mode, pcr);
1547 		return error;
1548 	}
1549 
1550 	/*
1551 	 * Plain access call
1552 	 */
1553 	pm = ps->ps_new_msg(pu, opc, FUSE_ACCESS, sizeof(*fai), pcr);
1554 	fai = GET_INPAYLOAD(ps, pm, fuse_access_in);
1555 	fai->mask = 0;
1556 	fai->mask |= (mode & PUFFS_VREAD) ? R_OK : 0;
1557 	fai->mask |= (mode & PUFFS_VWRITE) ? W_OK : 0;
1558 	fai->mask |= (mode & PUFFS_VEXEC) ? X_OK : 0;
1559 
1560 	error = xchg_msg(pu, opc, pm, NO_PAYLOAD_REPLY_LEN, wait_reply);
1561 
1562 	ps->ps_destroy_msg(pm);
1563 
1564 	/*
1565 	 * If unimplemented, start over with emulation
1566 	 */
1567 	if (error == ENOSYS) {
1568 		ps->ps_flags |= PS_NO_ACCESS;
1569 		return perfuse_node_access(pu, opc, mode, pcr);
1570 	}
1571 
1572 	return error;
1573 }
1574 
1575 int
1576 perfuse_node_getattr(struct puffs_usermount *pu, puffs_cookie_t opc,
1577 	struct vattr *vap, const struct puffs_cred *pcr)
1578 {
1579 	perfuse_msg_t *pm = NULL;
1580 	struct perfuse_state *ps;
1581 	struct perfuse_node_data *pnd = PERFUSE_NODE_DATA(opc);
1582 	struct fuse_getattr_in *fgi;
1583 	struct fuse_attr_out *fao;
1584 	int error = 0;
1585 
1586 	if (pnd->pnd_flags & PND_REMOVED)
1587 		return ENOENT;
1588 
1589 	/*
1590 	 * Serialize size access, see comment in perfuse_node_setattr().
1591 	 */
1592 	while (pnd->pnd_flags & PND_INRESIZE)
1593 		requeue_request(pu, opc, PCQ_RESIZE);
1594 	pnd->pnd_flags |= PND_INRESIZE;
1595 
1596 	ps = puffs_getspecific(pu);
1597 
1598 #ifndef PUFFS_KFLAG_CACHE_FS_TTL
1599 	/*
1600 	 * Check for cached attributes
1601 	 * This still require serialized access to size.
1602 	 */
1603 	if (!attr_expired(opc)) {
1604 		(void)memcpy(vap, puffs_pn_getvap((struct puffs_node *)opc),
1605 			     sizeof(*vap));
1606 		goto out;
1607 	}
1608 #endif /* PUFFS_KFLAG_CACHE_FS_TTL */
1609 
1610 	/*
1611 	 * FUSE_GETATTR_FH must be set in fgi->flags
1612 	 * if we use for fgi->fh
1613 	 */
1614 	pm = ps->ps_new_msg(pu, opc, FUSE_GETATTR, sizeof(*fgi), pcr);
1615 	fgi = GET_INPAYLOAD(ps, pm, fuse_getattr_in);
1616 	fgi->getattr_flags = 0;
1617 	fgi->dummy = 0;
1618 	fgi->fh = 0;
1619 
1620 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_OPEN) {
1621 		fgi->fh = perfuse_get_fh(opc, FREAD);
1622 		fgi->getattr_flags |= FUSE_GETATTR_FH;
1623 	}
1624 
1625 #ifdef PERFUSE_DEBUG
1626 	if (perfuse_diagflags & PDF_RESIZE)
1627 		DPRINTF(">> %s %p %" PRIu64 "\n", __func__, (void *)opc,
1628 		    vap->va_size);
1629 #endif
1630 
1631 	if ((error = xchg_msg(pu, opc, pm, sizeof(*fao), wait_reply)) != 0)
1632 		goto out;
1633 
1634 	fao = GET_OUTPAYLOAD(ps, pm, fuse_attr_out);
1635 
1636 #ifdef PERFUSE_DEBUG
1637 	if (perfuse_diagflags & PDF_RESIZE)
1638 		DPRINTF("<< %s %p %" PRIu64 " -> %" PRIu64 "\n", __func__,
1639 		    (void *)opc, vap->va_size, fao->attr.size);
1640 #endif
1641 
1642 	/*
1643 	 * We set birthtime, flags, filerev,vaflags to 0.
1644 	 * This seems the best bet, since the information is
1645 	 * not available from filesystem.
1646 	 */
1647 	fuse_attr_to_vap(ps, vap, &fao->attr);
1648 	set_expire(opc, NULL, fao);
1649 
1650 	ps->ps_destroy_msg(pm);
1651 out:
1652 
1653 	pnd->pnd_flags &= ~PND_INRESIZE;
1654 	(void)dequeue_requests(ps, opc, PCQ_RESIZE, DEQUEUE_ALL);
1655 
1656 	return error;
1657 }
1658 
1659 int
1660 perfuse_node_setattr(struct puffs_usermount *pu, puffs_cookie_t opc,
1661 	const struct vattr *vap, const struct puffs_cred *pcr)
1662 {
1663 	perfuse_msg_t *pm;
1664 	uint64_t fh;
1665 	struct perfuse_state *ps;
1666 	struct perfuse_node_data *pnd;
1667 	struct fuse_setattr_in *fsi;
1668 	struct fuse_attr_out *fao;
1669 	struct vattr *old_va;
1670 	int error;
1671 #ifdef PERFUSE_DEBUG
1672 	struct vattr *old_vap;
1673 	int resize_debug = 0;
1674 #endif
1675 
1676 	ps = puffs_getspecific(pu);
1677 	pnd = PERFUSE_NODE_DATA(opc);
1678 
1679 	/*
1680 	 * The only operation we can do once the file is removed
1681 	 * is to resize it, and we can do it only if it is open.
1682 	 * Do not even send the operation to the filesystem: the
1683 	 * file is not there anymore.
1684 	 */
1685 	if (pnd->pnd_flags & PND_REMOVED) {
1686 		if (!(pnd->pnd_flags & PND_OPEN))
1687 			return ENOENT;
1688 
1689 		error = 0;
1690 		goto out;
1691 	}
1692 
1693 	old_va = puffs_pn_getvap((struct puffs_node *)opc);
1694 
1695 	/*
1696 	 * Check for permission to change size
1697 	 */
1698 	if ((vap->va_size != (u_quad_t)PUFFS_VNOVAL) &&
1699 	    (error = mode_access(opc, pcr, PUFFS_VWRITE)) != 0)
1700 		return error;
1701 
1702 	/*
1703 	 * Check for permission to change dates
1704 	 */
1705 	if (((vap->va_atime.tv_sec != (time_t)PUFFS_VNOVAL) ||
1706 	     (vap->va_mtime.tv_sec != (time_t)PUFFS_VNOVAL)) &&
1707 	    (puffs_access_times(old_va->va_uid, old_va->va_gid,
1708 				old_va->va_mode, 0, pcr) != 0))
1709 		return EACCES;
1710 
1711 	/*
1712 	 * Check for permission to change owner and group
1713 	 */
1714 	if (((vap->va_uid != (uid_t)PUFFS_VNOVAL) ||
1715 	     (vap->va_gid != (gid_t)PUFFS_VNOVAL)) &&
1716 	    (puffs_access_chown(old_va->va_uid, old_va->va_gid,
1717 				vap->va_uid, vap->va_gid, pcr)) != 0)
1718 		return EACCES;
1719 
1720 	/*
1721 	 * Check for permission to change permissions
1722 	 */
1723 	if ((vap->va_mode != (mode_t)PUFFS_VNOVAL) &&
1724 	    (puffs_access_chmod(old_va->va_uid, old_va->va_gid,
1725 				old_va->va_type, vap->va_mode, pcr)) != 0)
1726 		return EACCES;
1727 
1728 	pm = ps->ps_new_msg(pu, opc, FUSE_SETATTR, sizeof(*fsi), pcr);
1729 	fsi = GET_INPAYLOAD(ps, pm, fuse_setattr_in);
1730 	fsi->valid = 0;
1731 
1732 	/*
1733 	 * Get a fh if the node is open for writing
1734 	 */
1735 	if (pnd->pnd_flags & PND_WFH) {
1736 		fh = perfuse_get_fh(opc, FWRITE);
1737 		fsi->fh = fh;
1738 		fsi->valid |= FUSE_FATTR_FH;
1739 	}
1740 
1741 	if (vap->va_size != (u_quad_t)PUFFS_VNOVAL) {
1742 		fsi->size = vap->va_size;
1743 		fsi->valid |= FUSE_FATTR_SIZE;
1744 
1745 		/*
1746 		 * Serialize anything that can touch file size
1747 		 * to avoid reordered GETATTR and SETATTR.
1748 		 * Out of order SETATTR can report stale size,
1749 		 * which will cause the kernel to truncate the file.
1750 		 * XXX Probably useless now we have a lock on GETATTR
1751 		 */
1752 		while (pnd->pnd_flags & PND_INRESIZE)
1753 			requeue_request(pu, opc, PCQ_RESIZE);
1754 		pnd->pnd_flags |= PND_INRESIZE;
1755 	}
1756 
1757 	/*
1758  	 * Setting mtime without atime or vice versa leads to
1759 	 * dates being reset to Epoch on glusterfs. If one
1760 	 * is missing, use the old value.
1761  	 */
1762 	if ((vap->va_mtime.tv_sec != (time_t)PUFFS_VNOVAL) ||
1763 	    (vap->va_atime.tv_sec != (time_t)PUFFS_VNOVAL)) {
1764 
1765 		if (vap->va_atime.tv_sec != (time_t)PUFFS_VNOVAL) {
1766 			fsi->atime = vap->va_atime.tv_sec;
1767 			fsi->atimensec = (uint32_t)vap->va_atime.tv_nsec;
1768 		} else {
1769 			fsi->atime = old_va->va_atime.tv_sec;
1770 			fsi->atimensec = (uint32_t)old_va->va_atime.tv_nsec;
1771 		}
1772 
1773 		if (vap->va_mtime.tv_sec != (time_t)PUFFS_VNOVAL) {
1774 			fsi->mtime = vap->va_mtime.tv_sec;
1775 			fsi->mtimensec = (uint32_t)vap->va_mtime.tv_nsec;
1776 		} else {
1777 			fsi->mtime = old_va->va_mtime.tv_sec;
1778 			fsi->mtimensec = (uint32_t)old_va->va_mtime.tv_nsec;
1779 		}
1780 
1781 		fsi->valid |= (FUSE_FATTR_MTIME|FUSE_FATTR_ATIME);
1782 	}
1783 
1784 	if (vap->va_mode != (mode_t)PUFFS_VNOVAL) {
1785 		fsi->mode = vap->va_mode;
1786 		fsi->valid |= FUSE_FATTR_MODE;
1787 	}
1788 
1789 	if (vap->va_uid != (uid_t)PUFFS_VNOVAL) {
1790 		fsi->uid = vap->va_uid;
1791 		fsi->valid |= FUSE_FATTR_UID;
1792 	}
1793 
1794 	if (vap->va_gid != (gid_t)PUFFS_VNOVAL) {
1795 		fsi->gid = vap->va_gid;
1796 		fsi->valid |= FUSE_FATTR_GID;
1797 	}
1798 
1799 	if (pnd->pnd_lock_owner != 0) {
1800 		fsi->lock_owner = pnd->pnd_lock_owner;
1801 		fsi->valid |= FUSE_FATTR_LOCKOWNER;
1802 	}
1803 
1804 	/*
1805 	 * ftruncate() sends only va_size, and metadata cache
1806 	 * flush adds va_atime and va_mtime. Some FUSE
1807 	 * filesystems will attempt to detect ftruncate by
1808 	 * checking for FATTR_SIZE being set without
1809 	 * FATTR_UID|FATTR_GID|FATTR_ATIME|FATTR_MTIME|FATTR_MODE
1810 	 *
1811 	 * Try to adapt and remove FATTR_ATIME|FATTR_MTIME
1812 	 * if we suspect a ftruncate().
1813 	 */
1814 	if ((vap->va_size != (u_quad_t)PUFFS_VNOVAL) &&
1815 	    ((vap->va_mode == (mode_t)PUFFS_VNOVAL) &&
1816 	     (vap->va_uid == (uid_t)PUFFS_VNOVAL) &&
1817 	     (vap->va_gid == (gid_t)PUFFS_VNOVAL))) {
1818 		fsi->atime = 0;
1819 		fsi->atimensec = 0;
1820 		fsi->mtime = 0;
1821 		fsi->mtimensec = 0;
1822 		fsi->valid &= ~(FUSE_FATTR_ATIME|FUSE_FATTR_MTIME);
1823 	}
1824 
1825 	/*
1826 	 * If nothing remain, discard the operation.
1827 	 */
1828 	if (!(fsi->valid & (FUSE_FATTR_SIZE|FUSE_FATTR_ATIME|FUSE_FATTR_MTIME|
1829 			    FUSE_FATTR_MODE|FUSE_FATTR_UID|FUSE_FATTR_GID))) {
1830 		error = 0;
1831 		goto out;
1832 	}
1833 
1834 #ifdef PERFUSE_DEBUG
1835 	old_vap = puffs_pn_getvap((struct puffs_node *)opc);
1836 
1837 	if ((perfuse_diagflags & PDF_RESIZE) &&
1838 	    (old_vap->va_size != (u_quad_t)PUFFS_VNOVAL)) {
1839 		resize_debug = 1;
1840 
1841 		DPRINTF(">> %s %p %" PRIu64 " -> %" PRIu64 "\n", __func__,
1842 		    (void *)opc,
1843 		    puffs_pn_getvap((struct puffs_node *)opc)->va_size,
1844 		    fsi->size);
1845 	}
1846 #endif
1847 
1848 	if ((error = xchg_msg(pu, opc, pm, sizeof(*fao), wait_reply)) != 0)
1849 		goto out;
1850 
1851 	/*
1852 	 * Copy back the new values
1853 	 */
1854 	fao = GET_OUTPAYLOAD(ps, pm, fuse_attr_out);
1855 
1856 #ifdef PERFUSE_DEBUG
1857 	if (resize_debug)
1858 		DPRINTF("<< %s %p %" PRIu64 " -> %" PRIu64 "\n", __func__,
1859 		    (void *)opc, old_vap->va_size, fao->attr.size);
1860 #endif
1861 
1862 	fuse_attr_to_vap(ps, old_va, &fao->attr);
1863 	set_expire(opc, NULL, fao);
1864 
1865 	ps->ps_destroy_msg(pm);
1866 
1867 out:
1868 	if (pnd->pnd_flags & PND_INRESIZE) {
1869 		pnd->pnd_flags &= ~PND_INRESIZE;
1870 		(void)dequeue_requests(ps, opc, PCQ_RESIZE, DEQUEUE_ALL);
1871 	}
1872 
1873 	return error;
1874 }
1875 
1876 int
1877 perfuse_node_poll(struct puffs_usermount *pu, puffs_cookie_t opc, int *events)
1878 {
1879 	struct perfuse_state *ps;
1880 	perfuse_msg_t *pm;
1881 	struct fuse_poll_in *fpi;
1882 	struct fuse_poll_out *fpo;
1883 	int error;
1884 
1885 	ps = puffs_getspecific(pu);
1886 	/*
1887 	 * kh is set if FUSE_POLL_SCHEDULE_NOTIFY is set.
1888 	 *
1889 	 * XXX ps_new_msg() is called with NULL creds, which will
1890 	 * be interpreted as FUSE superuser. We have no way to
1891 	 * know the requesting process' credential, but since poll
1892 	 * is supposed to operate on a file that has been open,
1893 	 * permission should have already been checked at open time.
1894 	 * That still may breaks on filesystems that provides odd
1895 	 * semantics.
1896  	 */
1897 	pm = ps->ps_new_msg(pu, opc, FUSE_POLL, sizeof(*fpi), NULL);
1898 	fpi = GET_INPAYLOAD(ps, pm, fuse_poll_in);
1899 	fpi->fh = perfuse_get_fh(opc, FREAD);
1900 	fpi->kh = 0;
1901 	fpi->flags = 0;
1902 
1903 #ifdef PERFUSE_DEBUG
1904 	if (perfuse_diagflags & PDF_FH)
1905 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", "
1906 			"fh = 0x%"PRIx64"\n", __func__, (void *)opc,
1907 			PERFUSE_NODE_DATA(opc)->pnd_nodeid, fpi->fh);
1908 #endif
1909 	if ((error = xchg_msg(pu, opc, pm, sizeof(*fpo), wait_reply)) != 0)
1910 		return error;
1911 
1912 	fpo = GET_OUTPAYLOAD(ps, pm, fuse_poll_out);
1913 	*events = fpo->revents;
1914 
1915 	ps->ps_destroy_msg(pm);
1916 
1917 	return 0;
1918 }
1919 
1920 /* ARGSUSED0 */
1921 int
1922 perfuse_node_mmap(struct puffs_usermount *pu, puffs_cookie_t opc, int flags,
1923 	const struct puffs_cred *pcr)
1924 {
1925 	/*
1926 	 * Not implemented anymore in libfuse
1927 	 */
1928 	return ENOSYS;
1929 }
1930 
1931 /* ARGSUSED2 */
1932 int
1933 perfuse_node_fsync(struct puffs_usermount *pu, puffs_cookie_t opc,
1934 	const struct puffs_cred *pcr, int flags, off_t offlo, off_t offhi)
1935 {
1936 	int op;
1937 	perfuse_msg_t *pm;
1938 	struct perfuse_state *ps;
1939 	struct perfuse_node_data *pnd;
1940 	struct fuse_fsync_in *ffi;
1941 	uint64_t fh;
1942 	int error;
1943 
1944 	pm = NULL;
1945 	ps = puffs_getspecific(pu);
1946 	pnd = PERFUSE_NODE_DATA(opc);
1947 
1948 	/*
1949 	 * No need to sync a removed node
1950 	 */
1951 	if (pnd->pnd_flags & PND_REMOVED)
1952 		return 0;
1953 
1954 	/*
1955 	 * We do not sync closed files. They have been
1956 	 * sync at inactive time already.
1957 	 */
1958 	if (!(pnd->pnd_flags & PND_OPEN))
1959 		return 0;
1960 
1961 	if (puffs_pn_getvap((struct puffs_node *)opc)->va_type == VDIR)
1962 		op = FUSE_FSYNCDIR;
1963 	else 		/* VREG but also other types such as VLNK */
1964 		op = FUSE_FSYNC;
1965 
1966 	/*
1967 	 * Do not sync if there are no change to sync
1968 	 * XXX remove that test on files if we implement mmap
1969 	 */
1970 #ifdef PERFUSE_DEBUG
1971 	if (perfuse_diagflags & PDF_SYNC)
1972 		DPRINTF("%s: TEST opc = %p, file = \"%s\" is %sdirty\n",
1973 			__func__, (void*)opc, perfuse_node_path(opc),
1974 			pnd->pnd_flags & PND_DIRTY ? "" : "not ");
1975 #endif
1976 	if (!(pnd->pnd_flags & PND_DIRTY))
1977 		return 0;
1978 
1979 	/*
1980 	 * It seems NetBSD can call fsync without open first
1981 	 * glusterfs complain in such a situation:
1982 	 * "FSYNC() ERR => -1 (Invalid argument)"
1983 	 * The file will be closed at inactive time.
1984 	 *
1985 	 * We open the directory for reading in order to sync.
1986 	 * This sounds rather counterintuitive, but it works.
1987 	 */
1988 	if (!(pnd->pnd_flags & PND_WFH)) {
1989 		if ((error = perfuse_node_open(pu, opc, FREAD, pcr)) != 0)
1990 			goto out;
1991 	}
1992 
1993 	if (op == FUSE_FSYNCDIR)
1994 		fh = perfuse_get_fh(opc, FREAD);
1995 	else
1996 		fh = perfuse_get_fh(opc, FWRITE);
1997 
1998 	/*
1999 	 * If fsync_flags  is set, meta data should not be flushed.
2000 	 */
2001 	pm = ps->ps_new_msg(pu, opc, op, sizeof(*ffi), pcr);
2002 	ffi = GET_INPAYLOAD(ps, pm, fuse_fsync_in);
2003 	ffi->fh = fh;
2004 	ffi->fsync_flags = (flags & FFILESYNC) ? 0 : 1;
2005 
2006 #ifdef PERFUSE_DEBUG
2007 	if (perfuse_diagflags & PDF_FH)
2008 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", fh = 0x%"PRIx64"\n",
2009 			__func__, (void *)opc,
2010 			PERFUSE_NODE_DATA(opc)->pnd_nodeid, ffi->fh);
2011 #endif
2012 
2013 	if ((error = xchg_msg(pu, opc, pm,
2014 			      NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0)
2015 		goto out;
2016 
2017 	/*
2018 	 * No reply beyond fuse_out_header: nothing to do on success
2019 	 * just clear the dirty flag
2020 	 */
2021 	pnd->pnd_flags &= ~PND_DIRTY;
2022 
2023 #ifdef PERFUSE_DEBUG
2024 	if (perfuse_diagflags & PDF_SYNC)
2025 		DPRINTF("%s: CLEAR opc = %p, file = \"%s\"\n",
2026 			__func__, (void*)opc, perfuse_node_path(opc));
2027 #endif
2028 
2029 	ps->ps_destroy_msg(pm);
2030 
2031 out:
2032 	/*
2033 	 * ENOSYS is not returned to kernel,
2034 	 */
2035 	if (error == ENOSYS)
2036 		error = 0;
2037 
2038 	return error;
2039 }
2040 
2041 /* ARGSUSED0 */
2042 int
2043 perfuse_node_seek(struct puffs_usermount *pu, puffs_cookie_t opc,
2044 	off_t oldoff, off_t newoff, const struct puffs_cred *pcr)
2045 {
2046 	return 0;
2047 }
2048 
2049 int
2050 perfuse_node_remove(struct puffs_usermount *pu, puffs_cookie_t opc,
2051 	puffs_cookie_t targ, const struct puffs_cn *pcn)
2052 {
2053 	struct perfuse_state *ps;
2054 	struct perfuse_node_data *pnd;
2055 	perfuse_msg_t *pm;
2056 	char *path;
2057 	const char *name;
2058 	size_t len;
2059 	int error;
2060 
2061 	pnd = PERFUSE_NODE_DATA(opc);
2062 
2063 	if ((pnd->pnd_flags & PND_REMOVED) ||
2064 	    (PERFUSE_NODE_DATA(targ)->pnd_flags & PND_REMOVED))
2065 		return ENOENT;
2066 
2067 #ifdef PERFUSE_DEBUG
2068 	if (targ == NULL)
2069 		DERRX(EX_SOFTWARE, "%s: targ is NULL", __func__);
2070 
2071 	if (perfuse_diagflags & (PDF_FH|PDF_FILENAME))
2072 		DPRINTF("%s: opc = %p, remove opc = %p, file = \"%s\"\n",
2073 			__func__, (void *)opc, (void *)targ, pcn->pcn_name);
2074 #endif
2075 	/*
2076 	 * Await for all operations on the deleted node to drain,
2077 	 * as the filesystem may be confused to have it deleted
2078 	 * during a getattr
2079 	 */
2080 	while (PERFUSE_NODE_DATA(targ)->pnd_flags & PND_INXCHG)
2081 		requeue_request(pu, targ, PCQ_AFTERXCHG);
2082 
2083 	ps = puffs_getspecific(pu);
2084 	pnd = PERFUSE_NODE_DATA(opc);
2085 	name = pcn->pcn_name;
2086 	len = pcn->pcn_namelen + 1;
2087 
2088 	pm = ps->ps_new_msg(pu, opc, FUSE_UNLINK, len, pcn->pcn_cred);
2089 	path = _GET_INPAYLOAD(ps, pm, char *);
2090 	(void)strlcpy(path, name, len);
2091 
2092 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2093 		return error;
2094 
2095 	PERFUSE_NODE_DATA(targ)->pnd_flags |= PND_REMOVED;
2096 	if (!(PERFUSE_NODE_DATA(targ)->pnd_flags & PND_OPEN))
2097 		puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N2);
2098 
2099 	/*
2100 	 * The parent directory needs a sync
2101 	 */
2102 	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
2103 
2104 #ifdef PERFUSE_DEBUG
2105 	if (perfuse_diagflags & PDF_FILENAME)
2106 		DPRINTF("%s: remove nodeid = 0x%"PRIx64" file = \"%s\"\n",
2107 			__func__, PERFUSE_NODE_DATA(targ)->pnd_nodeid,
2108 			pcn->pcn_name);
2109 #endif
2110 	ps->ps_destroy_msg(pm);
2111 
2112 	return 0;
2113 }
2114 
2115 int
2116 perfuse_node_link(struct puffs_usermount *pu, puffs_cookie_t opc,
2117 	puffs_cookie_t targ, const struct puffs_cn *pcn)
2118 {
2119 	struct perfuse_state *ps;
2120 	perfuse_msg_t *pm;
2121 	const char *name;
2122 	size_t len;
2123 	struct puffs_node *pn;
2124 	struct fuse_link_in *fli;
2125 	int error;
2126 
2127 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
2128 		return ENOENT;
2129 
2130 	ps = puffs_getspecific(pu);
2131 	pn = (struct puffs_node *)targ;
2132 	name = pcn->pcn_name;
2133 	len =  sizeof(*fli) + pcn->pcn_namelen + 1;
2134 
2135 	pm = ps->ps_new_msg(pu, opc, FUSE_LINK, len, pcn->pcn_cred);
2136 	fli = GET_INPAYLOAD(ps, pm, fuse_link_in);
2137 	fli->oldnodeid = PERFUSE_NODE_DATA(pn)->pnd_nodeid;
2138 	(void)strlcpy((char *)(void *)(fli + 1), name, len - sizeof(*fli));
2139 
2140 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2141 		return error;
2142 
2143 	ps->ps_destroy_msg(pm);
2144 
2145 	return 0;
2146 }
2147 
2148 int
2149 perfuse_node_rename(struct puffs_usermount *pu, puffs_cookie_t opc,
2150 	puffs_cookie_t src, const struct puffs_cn *pcn_src,
2151 	puffs_cookie_t targ_dir, puffs_cookie_t targ,
2152 	const struct puffs_cn *pcn_targ)
2153 {
2154 	struct perfuse_state *ps;
2155 	perfuse_msg_t *pm;
2156 	struct fuse_rename_in *fri;
2157 	const char *newname;
2158 	const char *oldname;
2159 	char *np;
2160 	int error;
2161 	size_t len;
2162 	size_t newname_len;
2163 	size_t oldname_len;
2164 
2165 	if ((PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED) ||
2166 	    (PERFUSE_NODE_DATA(src)->pnd_flags & PND_REMOVED) ||
2167 	    (PERFUSE_NODE_DATA(targ_dir)->pnd_flags & PND_REMOVED))
2168 		return ENOENT;
2169 
2170 	/*
2171 	 * Await for all operations on the deleted node to drain,
2172 	 * as the filesystem may be confused to have it deleted
2173 	 * during a getattr
2174 	 */
2175 	if ((struct puffs_node *)targ != NULL) {
2176 		while (PERFUSE_NODE_DATA(targ)->pnd_flags & PND_INXCHG)
2177 			requeue_request(pu, targ, PCQ_AFTERXCHG);
2178 	} else {
2179 		while (PERFUSE_NODE_DATA(src)->pnd_flags & PND_INXCHG)
2180 			requeue_request(pu, src, PCQ_AFTERXCHG);
2181 	}
2182 
2183 	ps = puffs_getspecific(pu);
2184 	newname =  pcn_targ->pcn_name;
2185 	newname_len = pcn_targ->pcn_namelen + 1;
2186 	oldname =  pcn_src->pcn_name;
2187 	oldname_len = pcn_src->pcn_namelen + 1;
2188 
2189 	len = sizeof(*fri) + oldname_len + newname_len;
2190 	pm = ps->ps_new_msg(pu, opc, FUSE_RENAME, len, pcn_targ->pcn_cred);
2191 	fri = GET_INPAYLOAD(ps, pm, fuse_rename_in);
2192 	fri->newdir = PERFUSE_NODE_DATA(targ_dir)->pnd_nodeid;
2193 	np = (char *)(void *)(fri + 1);
2194 	(void)strlcpy(np, oldname, oldname_len);
2195 	np += oldname_len;
2196 	(void)strlcpy(np, newname, newname_len);
2197 
2198 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2199 		return error;
2200 
2201 	if (opc != targ_dir) {
2202 		struct perfuse_node_data *srcdir_pnd;
2203 		struct perfuse_node_data *dstdir_pnd;
2204 		struct perfuse_node_data *src_pnd;
2205 
2206 		srcdir_pnd = PERFUSE_NODE_DATA(opc);
2207 		dstdir_pnd = PERFUSE_NODE_DATA(targ_dir);
2208 		src_pnd = PERFUSE_NODE_DATA(src);
2209 
2210 		TAILQ_REMOVE(&srcdir_pnd->pnd_children, src_pnd, pnd_next);
2211 		TAILQ_INSERT_TAIL(&dstdir_pnd->pnd_children, src_pnd, pnd_next);
2212 
2213 		srcdir_pnd->pnd_childcount--;
2214 		dstdir_pnd->pnd_childcount++;
2215 
2216 		src_pnd->pnd_parent = targ_dir;
2217 
2218 		PERFUSE_NODE_DATA(targ_dir)->pnd_flags |= PND_DIRTY;
2219 	}
2220 
2221 	(void)strlcpy(PERFUSE_NODE_DATA(src)->pnd_name, newname, MAXPATHLEN);
2222 
2223 	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
2224 
2225 	if ((struct puffs_node *)targ != NULL)
2226 		PERFUSE_NODE_DATA(targ)->pnd_flags |= PND_REMOVED;
2227 
2228 #ifdef PERFUSE_DEBUG
2229 	if (perfuse_diagflags & PDF_FILENAME)
2230 		DPRINTF("%s: nodeid = 0x%"PRIx64" file = \"%s\" renamed \"%s\" "
2231 			"nodeid = 0x%"PRIx64" -> nodeid = 0x%"PRIx64" \"%s\"\n",
2232 	 		__func__, PERFUSE_NODE_DATA(src)->pnd_nodeid,
2233 			pcn_src->pcn_name, pcn_targ->pcn_name,
2234 			PERFUSE_NODE_DATA(opc)->pnd_nodeid,
2235 			PERFUSE_NODE_DATA(targ_dir)->pnd_nodeid,
2236 			perfuse_node_path(targ_dir));
2237 #endif
2238 
2239 	ps->ps_destroy_msg(pm);
2240 
2241 	return 0;
2242 }
2243 
2244 int
2245 perfuse_node_mkdir(struct puffs_usermount *pu, puffs_cookie_t opc,
2246 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
2247 	const struct vattr *vap)
2248 {
2249 	struct perfuse_state *ps;
2250 	perfuse_msg_t *pm;
2251 	struct fuse_mkdir_in *fmi;
2252 	const char *path;
2253 	size_t len;
2254 
2255 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
2256 		return ENOENT;
2257 
2258 	ps = puffs_getspecific(pu);
2259 	path = pcn->pcn_name;
2260 	len = sizeof(*fmi) + pcn->pcn_namelen + 1;
2261 
2262 	pm = ps->ps_new_msg(pu, opc, FUSE_MKDIR, len, pcn->pcn_cred);
2263 	fmi = GET_INPAYLOAD(ps, pm, fuse_mkdir_in);
2264 	fmi->mode = vap->va_mode;
2265 	fmi->umask = 0; 	/* Seems unused by libfuse? */
2266 	(void)strlcpy((char *)(void *)(fmi + 1), path, len - sizeof(*fmi));
2267 
2268 	return node_mk_common(pu, opc, pni, pcn, pm);
2269 }
2270 
2271 
2272 int
2273 perfuse_node_rmdir(struct puffs_usermount *pu, puffs_cookie_t opc,
2274 	puffs_cookie_t targ, const struct puffs_cn *pcn)
2275 {
2276 	struct perfuse_state *ps;
2277 	struct perfuse_node_data *pnd;
2278 	perfuse_msg_t *pm;
2279 	char *path;
2280 	const char *name;
2281 	size_t len;
2282 	int error;
2283 
2284 	pnd = PERFUSE_NODE_DATA(opc);
2285 
2286 	if ((pnd->pnd_flags & PND_REMOVED) ||
2287 	    (PERFUSE_NODE_DATA(targ)->pnd_flags & PND_REMOVED))
2288 		return ENOENT;
2289 
2290 	/*
2291 	 * Await for all operations on the deleted node to drain,
2292 	 * as the filesystem may be confused to have it deleted
2293 	 * during a getattr
2294 	 */
2295 	while (PERFUSE_NODE_DATA(targ)->pnd_flags & PND_INXCHG)
2296 		requeue_request(pu, targ, PCQ_AFTERXCHG);
2297 
2298 	ps = puffs_getspecific(pu);
2299 	name = pcn->pcn_name;
2300 	len = pcn->pcn_namelen + 1;
2301 
2302 	pm = ps->ps_new_msg(pu, opc, FUSE_RMDIR, len, pcn->pcn_cred);
2303 	path = _GET_INPAYLOAD(ps, pm, char *);
2304 	(void)strlcpy(path, name, len);
2305 
2306 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2307 		return error;
2308 
2309 	PERFUSE_NODE_DATA(targ)->pnd_flags |= PND_REMOVED;
2310 	if (!(PERFUSE_NODE_DATA(targ)->pnd_flags & PND_OPEN))
2311 		puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N2);
2312 
2313 	/*
2314 	 * The parent directory needs a sync
2315 	 */
2316 	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
2317 
2318 #ifdef PERFUSE_DEBUG
2319 	if (perfuse_diagflags & PDF_FILENAME)
2320 		DPRINTF("%s: remove nodeid = 0x%"PRIx64" file = \"%s\"\n",
2321 			__func__, PERFUSE_NODE_DATA(targ)->pnd_nodeid,
2322 			perfuse_node_path(targ));
2323 #endif
2324 	ps->ps_destroy_msg(pm);
2325 
2326 	return 0;
2327 }
2328 
2329 /* vap is unused */
2330 /* ARGSUSED4 */
2331 int
2332 perfuse_node_symlink(struct puffs_usermount *pu, puffs_cookie_t opc,
2333 	struct puffs_newinfo *pni, const struct puffs_cn *pcn_src,
2334 	const struct vattr *vap, const char *link_target)
2335 {
2336 	struct perfuse_state *ps;
2337 	perfuse_msg_t *pm;
2338 	char *np;
2339 	const char *path;
2340 	size_t path_len;
2341 	size_t linkname_len;
2342 	size_t len;
2343 
2344 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
2345 		return ENOENT;
2346 
2347 	ps = puffs_getspecific(pu);
2348 	path = pcn_src->pcn_name;
2349 	path_len = pcn_src->pcn_namelen + 1;
2350 	linkname_len = strlen(link_target) + 1;
2351 	len = path_len + linkname_len;
2352 
2353 	pm = ps->ps_new_msg(pu, opc, FUSE_SYMLINK, len, pcn_src->pcn_cred);
2354 	np = _GET_INPAYLOAD(ps, pm, char *);
2355 	(void)strlcpy(np, path, path_len);
2356 	np += path_len;
2357 	(void)strlcpy(np, link_target, linkname_len);
2358 
2359 	return node_mk_common(pu, opc, pni, pcn_src, pm);
2360 }
2361 
2362 /* ARGSUSED4 */
2363 int
2364 perfuse_node_readdir(struct puffs_usermount *pu, puffs_cookie_t opc,
2365 	struct dirent *dent, off_t *readoff, size_t *reslen,
2366 	const struct puffs_cred *pcr, int *eofflag, off_t *cookies,
2367 	size_t *ncookies)
2368 {
2369 	perfuse_msg_t *pm;
2370 	uint64_t fh;
2371 	struct perfuse_state *ps;
2372 	struct perfuse_node_data *pnd;
2373 	struct fuse_read_in *fri;
2374 	struct fuse_out_header *foh;
2375 	struct fuse_dirent *fd;
2376 	size_t foh_len;
2377 	int error;
2378 	size_t fd_maxlen;
2379 
2380 	error = 0;
2381 	ps = puffs_getspecific(pu);
2382 
2383 	/*
2384 	 * readdir state is kept at node level, and several readdir
2385 	 * requests can be issued at the same time on the same node.
2386 	 * We need to queue requests so that only one is in readdir
2387 	 * code at the same time.
2388 	 */
2389 	pnd = PERFUSE_NODE_DATA(opc);
2390 	while (pnd->pnd_flags & PND_INREADDIR)
2391 		requeue_request(pu, opc, PCQ_READDIR);
2392 	pnd->pnd_flags |= PND_INREADDIR;
2393 
2394 #ifdef PERFUSE_DEBUG
2395 	if (perfuse_diagflags & PDF_READDIR)
2396 		DPRINTF("%s: READDIR opc = %p enter critical section\n",
2397 			__func__, (void *)opc);
2398 #endif
2399 	/*
2400 	 * Re-initialize pnd->pnd_fd_cookie on the first readdir for a node
2401 	 */
2402 	if (*readoff == 0)
2403 		pnd->pnd_fd_cookie = 0;
2404 
2405 	/*
2406 	 * Do we already have the data bufered?
2407 	 */
2408 	if (pnd->pnd_dirent != NULL)
2409 		goto out;
2410 	pnd->pnd_dirent_len = 0;
2411 
2412 	/*
2413 	 * It seems NetBSD can call readdir without open first
2414 	 * libfuse will crash if it is done that way, hence open first.
2415 	 */
2416 	if (!(pnd->pnd_flags & PND_OPEN)) {
2417 		if ((error = perfuse_node_open(pu, opc, FREAD, pcr)) != 0)
2418 			goto out;
2419 	}
2420 
2421 	fh = perfuse_get_fh(opc, FREAD);
2422 
2423 #ifdef PERFUSE_DEBUG
2424 	if (perfuse_diagflags & PDF_FH)
2425 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", "
2426 			"rfh = 0x%"PRIx64"\n", __func__, (void *)opc,
2427 			PERFUSE_NODE_DATA(opc)->pnd_nodeid, fh);
2428 #endif
2429 
2430 	pnd->pnd_all_fd = NULL;
2431 	pnd->pnd_all_fd_len = 0;
2432 	fd_maxlen = ps->ps_max_readahead - sizeof(*foh);
2433 
2434 	do {
2435 		size_t fd_len;
2436 		char *afdp;
2437 
2438 		pm = ps->ps_new_msg(pu, opc, FUSE_READDIR, sizeof(*fri), pcr);
2439 
2440 		/*
2441 		 * read_flags, lock_owner and flags are unused in libfuse
2442 		 */
2443 		fri = GET_INPAYLOAD(ps, pm, fuse_read_in);
2444 		fri->fh = fh;
2445 		fri->offset = pnd->pnd_fd_cookie;
2446 		fri->size = (uint32_t)fd_maxlen;
2447 		fri->read_flags = 0;
2448 		fri->lock_owner = 0;
2449 		fri->flags = 0;
2450 
2451 		if ((error = xchg_msg(pu, opc, pm,
2452 				      UNSPEC_REPLY_LEN, wait_reply)) != 0)
2453 			goto out;
2454 
2455 		/*
2456 		 * There are many puffs_framebufs calls later,
2457 		 * therefore foh will not be valid for a long time.
2458 		 * Just get the length and forget it.
2459 		 */
2460 		foh = GET_OUTHDR(ps, pm);
2461 		foh_len = foh->len;
2462 
2463 		/*
2464 		 * Empty read: we reached the end of the buffer.
2465 		 */
2466 		if (foh_len == sizeof(*foh)) {
2467 			ps->ps_destroy_msg(pm);
2468 			*eofflag = 1;
2469 			break;
2470 		}
2471 
2472 		/*
2473 		 * Check for corrupted message.
2474 		 */
2475 		if (foh_len < sizeof(*foh) + sizeof(*fd)) {
2476 			ps->ps_destroy_msg(pm);
2477 			DWARNX("readdir reply too short");
2478 			error = EIO;
2479 			goto out;
2480 		}
2481 
2482 
2483 		fd = GET_OUTPAYLOAD(ps, pm, fuse_dirent);
2484 		fd_len = foh_len - sizeof(*foh);
2485 
2486 		pnd->pnd_all_fd = realloc(pnd->pnd_all_fd,
2487 					  pnd->pnd_all_fd_len + fd_len);
2488 		if (pnd->pnd_all_fd  == NULL)
2489 			DERR(EX_OSERR, "%s: malloc failed", __func__);
2490 
2491 		afdp = (char *)(void *)pnd->pnd_all_fd + pnd->pnd_all_fd_len;
2492 		(void)memcpy(afdp, fd, fd_len);
2493 
2494 		pnd->pnd_all_fd_len += fd_len;
2495 
2496 		/*
2497 		 * The fd->off field is used as a cookie for
2498 		 * resuming the next readdir() where this one was left.
2499 	 	 */
2500 		pnd->pnd_fd_cookie = readdir_last_cookie(fd, fd_len);
2501 
2502 		ps->ps_destroy_msg(pm);
2503 	} while (1 /* CONSTCOND */);
2504 
2505 	if (pnd->pnd_all_fd != NULL) {
2506 		if (fuse_to_dirent(pu, opc, pnd->pnd_all_fd,
2507 				   pnd->pnd_all_fd_len) == -1)
2508 			error = EIO;
2509 	}
2510 
2511 out:
2512 	if (pnd->pnd_all_fd != NULL) {
2513 		free(pnd->pnd_all_fd);
2514 		pnd->pnd_all_fd = NULL;
2515 		pnd->pnd_all_fd_len = 0;
2516 	}
2517 
2518 	if (error == 0)
2519 		error = readdir_buffered(opc, dent, readoff, reslen);
2520 
2521 	/*
2522 	 * Schedule queued readdir requests
2523 	 */
2524 	pnd->pnd_flags &= ~PND_INREADDIR;
2525 	(void)dequeue_requests(ps, opc, PCQ_READDIR, DEQUEUE_ALL);
2526 
2527 #ifdef PERFUSE_DEBUG
2528 	if (perfuse_diagflags & PDF_READDIR)
2529 		DPRINTF("%s: READDIR opc = %p exit critical section\n",
2530 			__func__, (void *)opc);
2531 #endif
2532 
2533 	return error;
2534 }
2535 
2536 int
2537 perfuse_node_readlink(struct puffs_usermount *pu, puffs_cookie_t opc,
2538 	const struct puffs_cred *pcr, char *linkname, size_t *linklen)
2539 {
2540 	struct perfuse_state *ps;
2541 	perfuse_msg_t *pm;
2542 	int error;
2543 	size_t len;
2544 	struct fuse_out_header *foh;
2545 
2546 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
2547 		return ENOENT;
2548 
2549 	ps = puffs_getspecific(pu);
2550 
2551 	pm = ps->ps_new_msg(pu, opc, FUSE_READLINK, 0, pcr);
2552 
2553 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2554 		return error;
2555 
2556 	foh = GET_OUTHDR(ps, pm);
2557 	len = foh->len - sizeof(*foh);
2558 	if (len > *linklen)
2559 		DERRX(EX_PROTOCOL, "path len = %zd too long", len);
2560 	if (len == 0)
2561 		DERRX(EX_PROTOCOL, "path len = %zd too short", len);
2562 
2563 	/*
2564 	 * FUSE filesystems return a NUL terminated string, we
2565 	 * do not want to trailing \0
2566 	 */
2567 	*linklen = len - 1;
2568 	(void)memcpy(linkname, _GET_OUTPAYLOAD(ps, pm, char *), len);
2569 
2570 	ps->ps_destroy_msg(pm);
2571 
2572 	return 0;
2573 }
2574 
2575 int
2576 perfuse_node_reclaim(struct puffs_usermount *pu, puffs_cookie_t opc)
2577 {
2578 	struct perfuse_state *ps;
2579 	perfuse_msg_t *pm;
2580 	struct perfuse_node_data *pnd;
2581 	struct fuse_forget_in *ffi;
2582 	struct puffs_node *pn;
2583 	struct puffs_node *pn_root;
2584 
2585 	ps = puffs_getspecific(pu);
2586 	pnd = PERFUSE_NODE_DATA(opc);
2587 
2588 	/*
2589 	 * Never forget the root.
2590 	 */
2591 	if (pnd->pnd_nodeid == FUSE_ROOT_ID)
2592 		return 0;
2593 
2594 	pnd->pnd_flags |= PND_RECLAIMED;
2595 	pnd->pnd_puffs_nlookup--;
2596 
2597 #ifdef PERFUSE_DEBUG
2598 	if (perfuse_diagflags & PDF_RECLAIM)
2599 		DPRINTF("%s (nodeid %"PRId64") reclaimed\n",
2600 			perfuse_node_path(opc), pnd->pnd_nodeid);
2601 #endif
2602 
2603 	pn_root = puffs_getroot(pu);
2604 	pn = (struct puffs_node *)opc;
2605 	while (pn != pn_root) {
2606 		struct puffs_node *parent_pn;
2607 
2608 		pnd = PERFUSE_NODE_DATA(pn);
2609 
2610 #ifdef PERFUSE_DEBUG
2611 	if (perfuse_diagflags & PDF_RECLAIM)
2612 		DPRINTF("%s (nodeid %"PRId64") is %sreclaimed, nlookup = %d "
2613 			"has childcount %d %s%s%s%s, pending ops:%s%s%s\n",
2614 		        perfuse_node_path((puffs_cookie_t)pn), pnd->pnd_nodeid,
2615 		        pnd->pnd_flags & PND_RECLAIMED ? "" : "not ",
2616 			pnd->pnd_puffs_nlookup, pnd->pnd_childcount,
2617 			pnd->pnd_flags & PND_OPEN ? "open " : "not open",
2618 			pnd->pnd_flags & PND_RFH ? "r" : "",
2619 			pnd->pnd_flags & PND_WFH ? "w" : "",
2620 			pnd->pnd_flags & PND_BUSY ? "" : " none",
2621 			pnd->pnd_flags & PND_INREADDIR ? " readdir" : "",
2622 			pnd->pnd_flags & PND_INWRITE ? " write" : "",
2623 			pnd->pnd_flags & PND_INOPEN ? " open" : "");
2624 #endif
2625 		if (!(pnd->pnd_flags & PND_RECLAIMED) ||
2626 		    (pnd->pnd_childcount != 0))
2627 			return 0;
2628 
2629 		/*
2630 		 * lookup/reclaim activity differs whether name cache
2631 		 * is used or not.
2632 		 * - With namecache off, we get as many reclaims as lookups,
2633 		 *   we therefore must keep track of pnd_puffs_nlookup
2634 		 * - With namecache on we have a single
2635 		 *   reclaim for any amount of lookups. We therfore
2636 		 *   ignore pnd_puffs_nlookup. On netbsd-5 there is a
2637 		 *   bug and this behavior occurs whatever cache setting
2638 		 *   we have.
2639 		 */
2640 #if !defined(PUFFS_KFLAG_CACHE_FS_TTL) && __NetBSD_Prereq__(5,99,0)
2641 		if (pnd->pnd_puffs_nlookup != 0)
2642 			return 0;
2643 #endif /* !PUFFS_KFLAG_CACHE_FS_TTL && NetBSD > 5.99.0 */
2644 
2645 #ifdef PERFUSE_DEBUG
2646 		if ((pnd->pnd_flags & PND_OPEN) ||
2647 		       !TAILQ_EMPTY(&pnd->pnd_pcq))
2648 			DERRX(EX_SOFTWARE, "%s: opc = %p: still open",
2649 			      __func__, (void *)opc);
2650 
2651 		if ((pnd->pnd_flags & PND_BUSY) ||
2652 		       !TAILQ_EMPTY(&pnd->pnd_pcq))
2653 			DERRX(EX_SOFTWARE, "%s: opc = %p: ongoing operations",
2654 			      __func__, (void *)opc);
2655 #endif
2656 
2657 		/*
2658 		 * Send the FORGET message
2659 		 *
2660 		 * ps_new_msg() is called with NULL creds, which will
2661 		 * be interpreted as FUSE superuser. This is obviously
2662 		 * fine since we operate with kernel creds here.
2663 		 */
2664 		pm = ps->ps_new_msg(pu, (puffs_cookie_t)pn, FUSE_FORGET,
2665 			      sizeof(*ffi), NULL);
2666 		ffi = GET_INPAYLOAD(ps, pm, fuse_forget_in);
2667 		ffi->nlookup = pnd->pnd_fuse_nlookup;
2668 
2669 		/*
2670 		 * No reply is expected, pm is freed in xchg_msg
2671 		 */
2672 		(void)xchg_msg(pu, (puffs_cookie_t)pn,
2673 			       pm, UNSPEC_REPLY_LEN, no_reply);
2674 
2675 		parent_pn = pnd->pnd_parent;
2676 
2677 		perfuse_destroy_pn(pn);
2678 
2679 		pn = parent_pn;
2680 	}
2681 
2682 	return 0;
2683 }
2684 
2685 int
2686 perfuse_node_inactive(struct puffs_usermount *pu, puffs_cookie_t opc)
2687 {
2688 	struct perfuse_state *ps;
2689 	struct perfuse_node_data *pnd;
2690 	int error;
2691 
2692 	ps = puffs_getspecific(pu);
2693 	pnd = PERFUSE_NODE_DATA(opc);
2694 
2695 	if (!(pnd->pnd_flags & (PND_OPEN|PND_REMOVED)))
2696 		return 0;
2697 
2698 	/*
2699 	 * Make sure all operation are finished
2700 	 * There can be an ongoing write. Other
2701 	 * operation wait for all data before
2702 	 * the close/inactive.
2703 	 */
2704 	while (pnd->pnd_flags & PND_INWRITE)
2705 		requeue_request(pu, opc, PCQ_AFTERWRITE);
2706 
2707 	/*
2708 	 * The inactive operation may be cancelled,
2709 	 * If no open is in progress, set PND_INOPEN
2710 	 * so that a new open will be queued.
2711 	 */
2712 	if (pnd->pnd_flags & PND_INOPEN)
2713 		return 0;
2714 
2715 	pnd->pnd_flags |= PND_INOPEN;
2716 
2717 	/*
2718 	 * Sync data
2719 	 */
2720 	if (pnd->pnd_flags & PND_DIRTY) {
2721 		if ((error = perfuse_node_fsync(pu, opc, NULL, 0, 0, 0)) != 0)
2722 			DWARN("%s: perfuse_node_fsync failed error = %d",
2723 			      __func__, error);
2724 	}
2725 
2726 
2727 	/*
2728 	 * Close handles
2729 	 */
2730 	if (pnd->pnd_flags & PND_WFH) {
2731 		if ((error = perfuse_node_close_common(pu, opc, FWRITE)) != 0)
2732 			DWARN("%s: close write FH failed error = %d",
2733 			      __func__, error);
2734 	}
2735 
2736 	if (pnd->pnd_flags & PND_RFH) {
2737 		if ((error = perfuse_node_close_common(pu, opc, FREAD)) != 0)
2738 			DWARN("%s: close read FH failed error = %d",
2739 			      __func__, error);
2740 	}
2741 
2742 	/*
2743 	 * This will cause a reclaim to be sent
2744 	 */
2745 	if (pnd->pnd_flags & PND_REMOVED)
2746 		puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N1);
2747 
2748 	/*
2749 	 * Schedule awaiting operations
2750 	 */
2751 	pnd->pnd_flags &= ~PND_INOPEN;
2752 	(void)dequeue_requests(ps, opc, PCQ_OPEN, DEQUEUE_ALL);
2753 
2754 	return 0;
2755 }
2756 
2757 
2758 /* ARGSUSED0 */
2759 int
2760 perfuse_node_print(struct puffs_usermount *pu, puffs_cookie_t opc)
2761 {
2762 	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
2763 	return 0;
2764 }
2765 
2766 /* ARGSUSED0 */
2767 int
2768 perfuse_node_pathconf(struct puffs_usermount *pu, puffs_cookie_t opc,
2769 	int name, int *retval)
2770 {
2771 	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
2772 	return 0;
2773 }
2774 
2775 int
2776 perfuse_node_advlock(struct puffs_usermount *pu, puffs_cookie_t opc,
2777 	void *id, int op, struct flock *fl, int flags)
2778 {
2779 	struct perfuse_state *ps;
2780 	int fop;
2781 	perfuse_msg_t *pm;
2782 	uint64_t fh;
2783 	struct fuse_lk_in *fli;
2784 	struct fuse_out_header *foh;
2785 	struct fuse_lk_out *flo;
2786 	uint32_t owner;
2787 	size_t len;
2788 	int error;
2789 
2790 	/*
2791 	 * Make sure we do have a filehandle, as the FUSE filesystem
2792 	 * expect one. E.g.: if we provide none, GlusterFS logs an error
2793 	 * "0-glusterfs-fuse: xl is NULL"
2794 	 *
2795 	 * We need the read file handle if the file is open read only,
2796 	 * in order to support shared locks on read-only files.
2797 	 * NB: The kernel always sends advlock for read-only
2798 	 * files at exit time when the process used lock, see
2799 	 * sys_exit -> exit1 -> fd_free -> fd_close -> VOP_ADVLOCK
2800 	 */
2801 	if ((fh = perfuse_get_fh(opc, FREAD)) == FUSE_UNKNOWN_FH)
2802 		return EBADF;
2803 
2804 	ps = puffs_getspecific(pu);
2805 
2806 	if (op == F_GETLK)
2807 		fop = FUSE_GETLK;
2808 	else
2809 		fop = (flags & F_WAIT) ? FUSE_SETLKW : FUSE_SETLK;
2810 
2811 	/*
2812 	 * XXX ps_new_msg() is called with NULL creds, which will
2813 	 * be interpreted as FUSE superuser. We have no way to
2814 	 * know the requesting process' credential, but since advlock()
2815 	 * is supposed to operate on a file that has been open(),
2816 	 * permission should have already been checked at open() time.
2817 	 */
2818 	pm = ps->ps_new_msg(pu, opc, fop, sizeof(*fli), NULL);
2819 	fli = GET_INPAYLOAD(ps, pm, fuse_lk_in);
2820 	fli->fh = fh;
2821 	fli->owner = (uint64_t)(vaddr_t)id;
2822 	fli->lk.start = fl->l_start;
2823 	fli->lk.end = fl->l_start + fl->l_len;
2824 	fli->lk.type = fl->l_type;
2825 	fli->lk.pid = fl->l_pid;
2826 	fli->lk_flags = (flags & F_FLOCK) ? FUSE_LK_FLOCK : 0;
2827 
2828 	owner = (uint32_t)(vaddr_t)id;
2829 
2830 #ifdef PERFUSE_DEBUG
2831 	if (perfuse_diagflags & PDF_FH)
2832 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", fh = 0x%"PRIx64"\n",
2833 			__func__, (void *)opc,
2834 			PERFUSE_NODE_DATA(opc)->pnd_nodeid, fli->fh);
2835 #endif
2836 
2837 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2838 		return error;
2839 
2840 	foh = GET_OUTHDR(ps, pm);
2841 	len = foh->len - sizeof(*foh);
2842 
2843 	/*
2844 	 * Save or clear the lock
2845 	 */
2846 	switch (op) {
2847 	case F_GETLK:
2848 		if (len != sizeof(*flo))
2849 			DERRX(EX_SOFTWARE,
2850 			      "%s: Unexpected lock reply len %zd",
2851 			      __func__, len);
2852 
2853 		flo = GET_OUTPAYLOAD(ps, pm, fuse_lk_out);
2854 		fl->l_start = flo->lk.start;
2855 		fl->l_len = flo->lk.end - flo->lk.start;
2856 		fl->l_pid = flo->lk.pid;
2857 		fl->l_type = flo->lk.type;
2858 		fl->l_whence = SEEK_SET;	/* libfuse hardcodes it */
2859 
2860 		PERFUSE_NODE_DATA(opc)->pnd_lock_owner = flo->lk.pid;
2861 		break;
2862 	case F_UNLCK:
2863 		owner = 0;
2864 		/* FALLTHROUGH */
2865 	case F_SETLK:
2866 		/* FALLTHROUGH */
2867 	case F_SETLKW:
2868 		if (error != 0)
2869 			PERFUSE_NODE_DATA(opc)->pnd_lock_owner = owner;
2870 
2871 		if (len != 0)
2872 			DERRX(EX_SOFTWARE,
2873 			      "%s: Unexpected unlock reply len %zd",
2874 			      __func__, len);
2875 
2876 		break;
2877 	default:
2878 		DERRX(EX_SOFTWARE, "%s: Unexpected op %d", __func__, op);
2879 		break;
2880 	}
2881 
2882 	ps->ps_destroy_msg(pm);
2883 
2884 	return 0;
2885 }
2886 
2887 int
2888 perfuse_node_read(struct puffs_usermount *pu, puffs_cookie_t opc, uint8_t *buf,
2889 	off_t offset, size_t *resid, const struct puffs_cred *pcr, int ioflag)
2890 {
2891 	struct perfuse_state *ps;
2892 	struct perfuse_node_data *pnd;
2893 	const struct vattr *vap;
2894 	perfuse_msg_t *pm;
2895 	struct fuse_read_in *fri;
2896 	struct fuse_out_header *foh;
2897 	size_t readen;
2898 	int error;
2899 
2900 	ps = puffs_getspecific(pu);
2901 	pnd = PERFUSE_NODE_DATA(opc);
2902 	vap = puffs_pn_getvap((struct puffs_node *)opc);
2903 
2904 	/*
2905 	 * NetBSD turns that into a getdents(2) output
2906 	 * We just do a EISDIR as this feature is of little use.
2907 	 */
2908 	if (vap->va_type == VDIR)
2909 		return EISDIR;
2910 
2911 	if ((u_quad_t)offset + *resid > vap->va_size)
2912 		DWARNX("%s %p read %lld@%zu beyond EOF %" PRIu64 "\n",
2913 		       __func__, (void *)opc, (long long)offset,
2914 		       *resid, vap->va_size);
2915 
2916 	do {
2917 		size_t max_read;
2918 
2919 		max_read = ps->ps_max_readahead - sizeof(*foh);
2920 		/*
2921 		 * flags may be set to FUSE_READ_LOCKOWNER
2922 		 * if lock_owner is provided.
2923 		 */
2924 		pm = ps->ps_new_msg(pu, opc, FUSE_READ, sizeof(*fri), pcr);
2925 		fri = GET_INPAYLOAD(ps, pm, fuse_read_in);
2926 		fri->fh = perfuse_get_fh(opc, FREAD);
2927 		fri->offset = offset;
2928 		fri->size = (uint32_t)MIN(*resid, max_read);
2929 		fri->read_flags = 0; /* XXX Unused by libfuse? */
2930 		fri->lock_owner = pnd->pnd_lock_owner;
2931 		fri->flags = 0;
2932 		fri->flags |= (fri->lock_owner != 0) ? FUSE_READ_LOCKOWNER : 0;
2933 
2934 #ifdef PERFUSE_DEBUG
2935 	if (perfuse_diagflags & PDF_FH)
2936 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", fh = 0x%"PRIx64"\n",
2937 			__func__, (void *)opc, pnd->pnd_nodeid, fri->fh);
2938 #endif
2939 		error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply);
2940 		if (error  != 0)
2941 			return error;
2942 
2943 		foh = GET_OUTHDR(ps, pm);
2944 		readen = foh->len - sizeof(*foh);
2945 
2946 #ifdef PERFUSE_DEBUG
2947 		if (readen > *resid)
2948 			DERRX(EX_SOFTWARE, "%s: Unexpected big read %zd",
2949 			      __func__, readen);
2950 #endif
2951 
2952 		(void)memcpy(buf,  _GET_OUTPAYLOAD(ps, pm, char *), readen);
2953 
2954 		buf += readen;
2955 		offset += readen;
2956 		*resid -= readen;
2957 
2958 		ps->ps_destroy_msg(pm);
2959 	} while ((*resid != 0) && (readen != 0));
2960 
2961 	if (ioflag & (IO_SYNC|IO_DSYNC))
2962 		ps->ps_syncreads++;
2963 	else
2964 		ps->ps_asyncreads++;
2965 
2966 	return 0;
2967 }
2968 
2969 int
2970 perfuse_node_write(struct puffs_usermount *pu, puffs_cookie_t opc, uint8_t *buf,
2971 	off_t offset, size_t *resid, const struct puffs_cred *pcr, int ioflag)
2972 {
2973 	struct perfuse_state *ps;
2974 	struct perfuse_node_data *pnd;
2975 	struct vattr *vap;
2976 	perfuse_msg_t *pm;
2977 	struct fuse_write_in *fwi;
2978 	struct fuse_write_out *fwo;
2979 	size_t data_len;
2980 	size_t payload_len;
2981 	size_t written;
2982 	int inresize;
2983 	int error;
2984 
2985 	ps = puffs_getspecific(pu);
2986 	pnd = PERFUSE_NODE_DATA(opc);
2987 	vap = puffs_pn_getvap((struct puffs_node *)opc);
2988 	written = 0;
2989 	inresize = 0;
2990 	error = 0;
2991 
2992 	if (vap->va_type == VDIR)
2993 		return EISDIR;
2994 
2995 	/*
2996 	 * We need to queue write requests in order to avoid
2997 	 * dequeueing PCQ_AFTERWRITE when there are pending writes.
2998 	 */
2999 	while (pnd->pnd_flags & PND_INWRITE)
3000 		requeue_request(pu, opc, PCQ_WRITE);
3001 	pnd->pnd_flags |= PND_INWRITE;
3002 
3003 	/*
3004 	 * Serialize size access, see comment in perfuse_node_setattr().
3005 	 */
3006 	if ((u_quad_t)offset + *resid > vap->va_size) {
3007 		while (pnd->pnd_flags & PND_INRESIZE)
3008 			requeue_request(pu, opc, PCQ_RESIZE);
3009 		pnd->pnd_flags |= PND_INRESIZE;
3010 		inresize = 1;
3011 	}
3012 
3013 	/*
3014 	 * append flag: re-read the file size so that
3015 	 * we get the latest value.
3016 	 */
3017 	if (ioflag & PUFFS_IO_APPEND) {
3018 		DWARNX("%s: PUFFS_IO_APPEND set, untested code", __func__);
3019 
3020 		if ((error = perfuse_node_getattr(pu, opc, vap, pcr)) != 0)
3021 			goto out;
3022 
3023 		offset = vap->va_size;
3024 	}
3025 
3026 #ifdef PERFUSE_DEBUG
3027 	if (perfuse_diagflags & PDF_RESIZE)
3028 		DPRINTF(">> %s %p %" PRIu64 "\n", __func__,
3029 			(void *)opc, vap->va_size);
3030 #endif
3031 
3032 	do {
3033 		size_t max_write;
3034 		/*
3035 		 * There is a writepage flag when data
3036 		 * is aligned to page size. Use it for
3037 		 * everything but the data after the last
3038 		 * page boundary.
3039 		 */
3040 		max_write = ps->ps_max_write - sizeof(*fwi);
3041 
3042 		data_len = MIN(*resid, max_write);
3043 		if (data_len > (size_t)sysconf(_SC_PAGESIZE))
3044 			data_len = data_len & ~(sysconf(_SC_PAGESIZE) - 1);
3045 
3046 		payload_len = data_len + sizeof(*fwi);
3047 
3048 		/*
3049 		 * flags may be set to FUSE_WRITE_CACHE (XXX usage?)
3050 		 * or FUSE_WRITE_LOCKOWNER, if lock_owner is provided.
3051 		 * write_flags is set to 1 for writepage.
3052 		 */
3053 		pm = ps->ps_new_msg(pu, opc, FUSE_WRITE, payload_len, pcr);
3054 		fwi = GET_INPAYLOAD(ps, pm, fuse_write_in);
3055 		fwi->fh = perfuse_get_fh(opc, FWRITE);
3056 		fwi->offset = offset;
3057 		fwi->size = (uint32_t)data_len;
3058 		fwi->write_flags = (fwi->size % sysconf(_SC_PAGESIZE)) ? 0 : 1;
3059 		fwi->lock_owner = pnd->pnd_lock_owner;
3060 		fwi->flags = 0;
3061 		fwi->flags |= (fwi->lock_owner != 0) ? FUSE_WRITE_LOCKOWNER : 0;
3062 		fwi->flags |= (ioflag & IO_DIRECT) ? 0 : FUSE_WRITE_CACHE;
3063 		(void)memcpy((fwi + 1), buf, data_len);
3064 
3065 
3066 #ifdef PERFUSE_DEBUG
3067 		if (perfuse_diagflags & PDF_FH)
3068 			DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", "
3069 				"fh = 0x%"PRIx64"\n", __func__,
3070 				(void *)opc, pnd->pnd_nodeid, fwi->fh);
3071 #endif
3072 		if ((error = xchg_msg(pu, opc, pm,
3073 				      sizeof(*fwo), wait_reply)) != 0)
3074 			goto out;
3075 
3076 		fwo = GET_OUTPAYLOAD(ps, pm, fuse_write_out);
3077 		written = fwo->size;
3078 #ifdef PERFUSE_DEBUG
3079 		if (written > *resid)
3080 			DERRX(EX_SOFTWARE, "%s: Unexpected big write %zd",
3081 			      __func__, written);
3082 #endif
3083 		*resid -= written;
3084 		offset += written;
3085 		buf += written;
3086 
3087 		ps->ps_destroy_msg(pm);
3088 	} while (*resid != 0);
3089 
3090 	/*
3091 	 * puffs_ops(3) says
3092 	 *  "everything must be written or an error will be generated"
3093 	 */
3094 	if (*resid != 0)
3095 		error = EFBIG;
3096 
3097 #ifdef PERFUSE_DEBUG
3098 	if (perfuse_diagflags & PDF_RESIZE) {
3099 		if (offset > (off_t)vap->va_size)
3100 			DPRINTF("<< %s %p %" PRIu64 " -> %lld\n", __func__,
3101 				(void *)opc, vap->va_size, (long long)offset);
3102 		else
3103 			DPRINTF("<< %s %p \n", __func__, (void *)opc);
3104 	}
3105 #endif
3106 
3107 	/*
3108 	 * Update file size if we wrote beyond the end
3109 	 */
3110 	if (offset > (off_t)vap->va_size)
3111 		vap->va_size = offset;
3112 
3113 	if (inresize) {
3114 #ifdef PERFUSE_DEBUG
3115 		if (!(pnd->pnd_flags & PND_INRESIZE))
3116 			DERRX(EX_SOFTWARE, "file write grow without resize");
3117 #endif
3118 		pnd->pnd_flags &= ~PND_INRESIZE;
3119 		(void)dequeue_requests(ps, opc, PCQ_RESIZE, DEQUEUE_ALL);
3120 	}
3121 
3122 
3123 	/*
3124 	 * Statistics
3125 	 */
3126 	if (ioflag & (IO_SYNC|IO_DSYNC))
3127 		ps->ps_syncwrites++;
3128 	else
3129 		ps->ps_asyncwrites++;
3130 
3131 	/*
3132 	 * Remember to sync the file
3133 	 */
3134 	pnd->pnd_flags |= PND_DIRTY;
3135 
3136 #ifdef PERFUSE_DEBUG
3137 	if (perfuse_diagflags & PDF_SYNC)
3138 		DPRINTF("%s: DIRTY opc = %p, file = \"%s\"\n",
3139 			__func__, (void*)opc, perfuse_node_path(opc));
3140 #endif
3141 
3142 out:
3143 	/*
3144 	 * If there are no more queued write, we can resume
3145 	 * an operation awaiting write completion.
3146 	 */
3147 	pnd->pnd_flags &= ~PND_INWRITE;
3148 	if (dequeue_requests(ps, opc, PCQ_WRITE, 1) == 0)
3149 		(void)dequeue_requests(ps, opc, PCQ_AFTERWRITE, DEQUEUE_ALL);
3150 
3151 	return error;
3152 }
3153 
3154 /* ARGSUSED0 */
3155 void
3156 perfuse_cache_write(struct puffs_usermount *pu, puffs_cookie_t opc, size_t size,
3157 	struct puffs_cacherun *runs)
3158 {
3159 	return;
3160 }
3161 
3162 /* ARGSUSED4 */
3163 int
3164 perfuse_node_getextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
3165 	int attrns, const char *attrname, size_t *attrsize, uint8_t *attr,
3166 	size_t *resid, const struct puffs_cred *pcr)
3167 {
3168 	struct perfuse_state *ps;
3169 	char fuse_attrname[LINUX_XATTR_NAME_MAX + 1];
3170 	perfuse_msg_t *pm;
3171 	struct fuse_getxattr_in *fgi;
3172 	struct fuse_getxattr_out *fgo;
3173 	struct fuse_out_header *foh;
3174 	size_t attrnamelen;
3175 	size_t len;
3176 	char *np;
3177 	int error;
3178 
3179 	ps = puffs_getspecific(pu);
3180 	attrname = perfuse_native_ns(attrns, attrname, fuse_attrname);
3181 	attrnamelen = strlen(attrname) + 1;
3182 	len = sizeof(*fgi) + attrnamelen;
3183 
3184 	pm = ps->ps_new_msg(pu, opc, FUSE_GETXATTR, len, pcr);
3185 	fgi = GET_INPAYLOAD(ps, pm, fuse_getxattr_in);
3186 	fgi->size = (unsigned int)((resid != NULL) ? *resid : 0);
3187 	np = (char *)(void *)(fgi + 1);
3188 	(void)strlcpy(np, attrname, attrnamelen);
3189 
3190 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
3191 		return error;
3192 
3193 	/*
3194 	 * We just get fuse_getattr_out with list size if we requested
3195 	 * a null size.
3196 	 */
3197 	if (resid == NULL) {
3198 		fgo = GET_OUTPAYLOAD(ps, pm, fuse_getxattr_out);
3199 
3200 		if (attrsize != NULL)
3201 			*attrsize = fgo->size;
3202 
3203 		ps->ps_destroy_msg(pm);
3204 		return 0;
3205 	}
3206 
3207 	/*
3208 	 * And with a non null requested size, we get the list just
3209 	 * after the header
3210 	 */
3211 	foh = GET_OUTHDR(ps, pm);
3212 	np = (char *)(void *)(foh + 1);
3213 
3214 	if (resid != NULL) {
3215 		len = MAX(foh->len - sizeof(*foh), *resid);
3216 		(void)memcpy(attr, np, len);
3217 		*resid -= len;
3218 	}
3219 
3220 	ps->ps_destroy_msg(pm);
3221 
3222 	return 0;
3223 }
3224 
3225 int
3226 perfuse_node_setextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
3227 	int attrns, const char *attrname, uint8_t *attr, size_t *resid,
3228 	const struct puffs_cred *pcr)
3229 {
3230 	struct perfuse_state *ps;
3231 	char fuse_attrname[LINUX_XATTR_NAME_MAX + 1];
3232 	perfuse_msg_t *pm;
3233 	struct fuse_setxattr_in *fsi;
3234 	size_t attrnamelen;
3235 	size_t len;
3236 	char *np;
3237 	int error;
3238 
3239 	ps = puffs_getspecific(pu);
3240 	attrname = perfuse_native_ns(attrns, attrname, fuse_attrname);
3241 	attrnamelen = strlen(attrname) + 1;
3242 	len = sizeof(*fsi) + attrnamelen + *resid;
3243 
3244 	pm = ps->ps_new_msg(pu, opc, FUSE_SETXATTR, len, pcr);
3245 	fsi = GET_INPAYLOAD(ps, pm, fuse_setxattr_in);
3246 	fsi->size = (unsigned int)*resid;
3247 	fsi->flags = 0;
3248 	np = (char *)(void *)(fsi + 1);
3249 	(void)strlcpy(np, attrname, attrnamelen);
3250 	np += attrnamelen;
3251 	(void)memcpy(np, (char *)attr, *resid);
3252 
3253 	if ((error = xchg_msg(pu, opc, pm,
3254 			      NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0)
3255 		return error;
3256 
3257 	*resid = 0;
3258 	ps->ps_destroy_msg(pm);
3259 
3260 	return 0;
3261 }
3262 
3263 /* ARGSUSED2 */
3264 int
3265 perfuse_node_listextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
3266 	int attrns, size_t *attrsize, uint8_t *attrs, size_t *resid, int flag,
3267 	const struct puffs_cred *pcr)
3268 {
3269 	struct perfuse_state *ps;
3270 	perfuse_msg_t *pm;
3271 	struct fuse_getxattr_in *fgi;
3272 	struct fuse_getxattr_out *fgo;
3273 	struct fuse_out_header *foh;
3274 	char *np;
3275 	size_t len, puffs_len;
3276 	int error;
3277 
3278 	ps = puffs_getspecific(pu);
3279 	len = sizeof(*fgi);
3280 
3281 	pm = ps->ps_new_msg(pu, opc, FUSE_LISTXATTR, len, pcr);
3282 	fgi = GET_INPAYLOAD(ps, pm, fuse_getxattr_in);
3283 	if (resid != NULL)
3284 		fgi->size = (unsigned int)*resid;
3285 	else
3286 		fgi->size = 0;
3287 
3288 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
3289 		return error;
3290 
3291 	/*
3292 	 * We just get fuse_getattr_out with list size if we requested
3293 	 * a null size.
3294 	 */
3295 	if (resid == NULL) {
3296 		fgo = GET_OUTPAYLOAD(ps, pm, fuse_getxattr_out);
3297 
3298 		if (attrsize != NULL)
3299 			*attrsize = fgo->size;
3300 
3301 		ps->ps_destroy_msg(pm);
3302 
3303 		return 0;
3304 	}
3305 
3306 	/*
3307 	 * And with a non null requested size, we get the list just
3308 	 * after the header
3309 	 */
3310 	foh = GET_OUTHDR(ps, pm);
3311 	np = (char *)(void *)(foh + 1);
3312 	puffs_len = foh->len - sizeof(*foh);
3313 
3314 	if (attrs != NULL) {
3315 #ifdef PUFFS_EXTATTR_LIST_LENPREFIX
3316 		/*
3317 		 * Convert the FUSE reply to length prefixed strings
3318 		 * if this is what the kernel wants.
3319 		 */
3320 		if (flag & PUFFS_EXTATTR_LIST_LENPREFIX) {
3321 			size_t i, attrlen;
3322 
3323 			for (i = 0; i < puffs_len; i += attrlen + 1) {
3324 				attrlen = strlen(np + i);
3325 				(void)memmove(np + i + 1, np + i, attrlen);
3326 				*(np + i) = (uint8_t)attrlen;
3327 			}
3328 		}
3329 #endif /* PUFFS_EXTATTR_LIST_LENPREFIX */
3330 		(void)memcpy(attrs, np, puffs_len);
3331 		*resid -= puffs_len;
3332 	}
3333 
3334 	if (attrsize != NULL)
3335 		*attrsize = puffs_len;
3336 
3337 	ps->ps_destroy_msg(pm);
3338 
3339 	return 0;
3340 }
3341 
3342 int
3343 perfuse_node_deleteextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
3344 	int attrns, const char *attrname, const struct puffs_cred *pcr)
3345 {
3346 	struct perfuse_state *ps;
3347 	char fuse_attrname[LINUX_XATTR_NAME_MAX + 1];
3348 	perfuse_msg_t *pm;
3349 	size_t attrnamelen;
3350 	char *np;
3351 	int error;
3352 
3353 	ps = puffs_getspecific(pu);
3354 	attrname = perfuse_native_ns(attrns, attrname, fuse_attrname);
3355 	attrnamelen = strlen(attrname) + 1;
3356 
3357 	pm = ps->ps_new_msg(pu, opc, FUSE_REMOVEXATTR, attrnamelen, pcr);
3358 	np = _GET_INPAYLOAD(ps, pm, char *);
3359 	(void)strlcpy(np, attrname, attrnamelen);
3360 
3361 	error = xchg_msg(pu, opc, pm, NO_PAYLOAD_REPLY_LEN, wait_reply);
3362 
3363 	ps->ps_destroy_msg(pm);
3364 
3365 	return error;
3366 }
3367