xref: /netbsd-src/lib/libperfuse/ops.c (revision b757af438b42b93f8c6571f026d8b8ef3eaf5fc9)
1 /*  $NetBSD: ops.c,v 1.52 2012/03/21 10:10:36 matt Exp $ */
2 
3 /*-
4  *  Copyright (c) 2010-2011 Emmanuel Dreyfus. All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *     notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *  THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
16  *  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
17  *  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18  *  PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
19  *  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25  *  POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <stdio.h>
29 #include <unistd.h>
30 #include <stdlib.h>
31 #include <libgen.h>
32 #include <errno.h>
33 #include <err.h>
34 #include <sysexits.h>
35 #include <syslog.h>
36 #include <puffs.h>
37 #include <sys/socket.h>
38 #include <sys/socket.h>
39 #include <sys/extattr.h>
40 #include <sys/time.h>
41 #include <machine/vmparam.h>
42 
43 #include "perfuse_priv.h"
44 #include "fuse.h"
45 
46 extern int perfuse_diagflags;
47 
48 #if 0
49 static void print_node(const char *, puffs_cookie_t);
50 #endif
51 static void set_expire(puffs_cookie_t, struct fuse_entry_out *,
52    struct fuse_attr_out *);
53 static int attr_expired(puffs_cookie_t);
54 static int entry_expired(puffs_cookie_t);
55 static int xchg_msg(struct puffs_usermount *, puffs_cookie_t,
56     perfuse_msg_t *, size_t, enum perfuse_xchg_pb_reply);
57 static int mode_access(puffs_cookie_t, const struct puffs_cred *, mode_t);
58 static int sticky_access(struct puffs_node *, const struct puffs_cred *);
59 static void fuse_attr_to_vap(struct perfuse_state *,
60     struct vattr *, struct fuse_attr *);
61 static int node_lookup_dir_nodot(struct puffs_usermount *,
62     puffs_cookie_t, char *, size_t, struct puffs_node **);
63 static int node_lookup_common(struct puffs_usermount *, puffs_cookie_t,
64     const char *, const struct puffs_cred *, struct puffs_node **);
65 static int node_mk_common(struct puffs_usermount *, puffs_cookie_t,
66     struct puffs_newinfo *, const struct puffs_cn *pcn, perfuse_msg_t *);
67 static int node_mk_common_final(struct puffs_usermount *, puffs_cookie_t,
68     struct puffs_node *, const struct puffs_cn *pcn);
69 static uint64_t readdir_last_cookie(struct fuse_dirent *, size_t);
70 static ssize_t fuse_to_dirent(struct puffs_usermount *, puffs_cookie_t,
71     struct fuse_dirent *, size_t);
72 static int readdir_buffered(puffs_cookie_t, struct dirent *, off_t *,
73     size_t *);
74 static void requeue_request(struct puffs_usermount *,
75     puffs_cookie_t opc, enum perfuse_qtype);
76 static int dequeue_requests(struct perfuse_state *,
77     puffs_cookie_t opc, enum perfuse_qtype, int);
78 #define DEQUEUE_ALL 0
79 
80 /*
81  *  From <sys/vnode>, inside #ifdef _KERNEL section
82  */
83 #define IO_SYNC		(0x40|IO_DSYNC)
84 #define IO_DSYNC	0x00200
85 #define IO_DIRECT	0x02000
86 
87 /*
88  *  From <fcntl>, inside #ifdef _KERNEL section
89  */
90 #define F_WAIT		0x010
91 #define F_FLOCK		0x020
92 #define OFLAGS(fflags)  ((fflags) - 1)
93 
94 /*
95  * Borrowed from src/sys/kern/vfs_subr.c and src/sys/sys/vnode.h
96  */
97 const enum vtype iftovt_tab[16] = {
98 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
99         VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
100 };
101 const int vttoif_tab[9] = {
102 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
103         S_IFSOCK, S_IFIFO, S_IFMT,
104 };
105 
106 #define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12])
107 #define VTTOIF(indx) (vttoif_tab[(int)(indx)])
108 
109 #if 0
110 static void
111 print_node(const char *func, puffs_cookie_t opc)
112 {
113 	struct puffs_node *pn;
114 	struct perfuse_node_data *pnd;
115 	struct vattr *vap;
116 
117 	pn = (struct puffs_node *)opc;
118 	pnd = PERFUSE_NODE_DATA(opc);
119 	vap = &pn->pn_va;
120 
121 	printf("%s: \"%s\", opc = %p, nodeid = 0x%"PRIx64" ino = %"PRIu64"\n",
122 	       func, pnd->pnd_name, opc, pnd->pnd_nodeid, vap->va_fileid);
123 
124 	return;
125 }
126 #endif /* PERFUSE_DEBUG */
127 
128 int
129 perfuse_node_close_common(struct puffs_usermount *pu, puffs_cookie_t opc,
130 	int mode)
131 {
132 	struct perfuse_state *ps;
133 	perfuse_msg_t *pm;
134 	int op;
135 	uint64_t fh;
136 	struct fuse_release_in *fri;
137 	struct perfuse_node_data *pnd;
138 	struct puffs_node *pn;
139 	int error;
140 
141 	ps = puffs_getspecific(pu);
142 	pn = (struct puffs_node *)opc;
143 	pnd = PERFUSE_NODE_DATA(pn);
144 
145 	if (puffs_pn_getvap(pn)->va_type == VDIR) {
146 		op = FUSE_RELEASEDIR;
147 		mode = FREAD;
148 	} else {
149 		op = FUSE_RELEASE;
150 	}
151 
152 	/*
153 	 * Destroy the filehandle before sending the
154 	 * request to the FUSE filesystem, otherwise
155 	 * we may get a second close() while we wait
156 	 * for the reply, and we would end up closing
157 	 * the same fh twice instead of closng both.
158 	 */
159 	fh = perfuse_get_fh(opc, mode);
160 	perfuse_destroy_fh(pn, fh);
161 
162 	/*
163 	 * release_flags may be set to FUSE_RELEASE_FLUSH
164 	 * to flush locks. lock_owner must be set in that case
165 	 *
166 	 * ps_new_msg() is called with NULL creds, which will
167 	 * be interpreted as FUSE superuser. We come here from the
168 	 * inactive method, which provides no creds, but obviously
169 	 * runs with kernel privilege.
170 	 */
171 	pm = ps->ps_new_msg(pu, opc, op, sizeof(*fri), NULL);
172 	fri = GET_INPAYLOAD(ps, pm, fuse_release_in);
173 	fri->fh = fh;
174 	fri->flags = 0;
175 	fri->release_flags = 0;
176 	fri->lock_owner = pnd->pnd_lock_owner;
177 	fri->flags = (fri->lock_owner != 0) ? FUSE_RELEASE_FLUSH : 0;
178 
179 #ifdef PERFUSE_DEBUG
180 	if (perfuse_diagflags & PDF_FH)
181 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", fh = 0x%"PRIx64"\n",
182 			 __func__, (void *)opc, pnd->pnd_nodeid, fri->fh);
183 #endif
184 
185 	if ((error = xchg_msg(pu, opc, pm,
186 			      NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0)
187 		DERRX(EX_SOFTWARE, "%s: freed fh = 0x%"PRIx64" but filesystem "
188 		      "returned error = %d", __func__, fh, error);
189 
190 	ps->ps_destroy_msg(pm);
191 
192 	return 0;
193 }
194 
195 static int
196 xchg_msg(struct puffs_usermount *pu, puffs_cookie_t opc, perfuse_msg_t *pm,
197 	size_t len, enum perfuse_xchg_pb_reply wait)
198 {
199 	struct perfuse_state *ps;
200 	struct perfuse_node_data *pnd;
201 	struct perfuse_trace *pt = NULL;
202 	int error;
203 
204 	ps = puffs_getspecific(pu);
205 	pnd = NULL;
206 	if ((struct puffs_node *)opc != NULL)
207 		pnd = PERFUSE_NODE_DATA(opc);
208 
209 #ifdef PERFUSE_DEBUG
210 	if ((perfuse_diagflags & PDF_FILENAME) && (opc != 0))
211 		DPRINTF("file = \"%s\", ino = %"PRIu64" flags = 0x%x\n",
212 			perfuse_node_path(opc),
213 			((struct puffs_node *)opc)->pn_va.va_fileid,
214 			PERFUSE_NODE_DATA(opc)->pnd_flags);
215 #endif
216 	if (pnd)
217 		pnd->pnd_flags |= PND_INXCHG;
218 
219 	/*
220 	 * Record FUSE call start if requested
221 	 */
222 	if (perfuse_diagflags & PDF_TRACE)
223 		pt = perfuse_trace_begin(ps, opc, pm);
224 
225 	/*
226 	 * Do actual FUSE exchange
227 	 */
228 	if ((error = ps->ps_xchg_msg(pu, pm, len, wait)) != 0)
229 		ps->ps_destroy_msg(pm);
230 
231 	/*
232 	 * Record FUSE call end if requested
233 	 */
234 	if (pt != NULL)
235 		perfuse_trace_end(ps, pt, error);
236 
237 	if (pnd) {
238 		pnd->pnd_flags &= ~PND_INXCHG;
239 		(void)dequeue_requests(ps, opc, PCQ_AFTERXCHG, DEQUEUE_ALL);
240 	}
241 
242 	return error;
243 }
244 
245 static int
246 mode_access(puffs_cookie_t opc, const struct puffs_cred *pcr, mode_t mode)
247 {
248 	struct puffs_node *pn;
249 	struct vattr *va;
250 
251 	/*
252 	 * pcr is NULL for self open through fsync or readdir.
253 	 * In both case, access control is useless, as it was
254 	 * done before, at open time.
255 	 */
256 	if (pcr == NULL)
257 		return 0;
258 
259 	pn = (struct puffs_node *)opc;
260 	va = puffs_pn_getvap(pn);
261 	return puffs_access(va->va_type, va->va_mode,
262 			    va->va_uid, va->va_gid,
263 			    mode, pcr);
264 }
265 
266 static int
267 sticky_access(struct puffs_node *targ, const struct puffs_cred *pcr)
268 {
269 	uid_t uid;
270 	struct puffs_node *tdir;
271 	int sticky, owner;
272 
273 	tdir = PERFUSE_NODE_DATA(targ)->pnd_parent;
274 
275 	/*
276 	 * This covers the case where the kernel requests a DELETE
277 	 * or RENAME on its own, and where puffs_cred_getuid would
278 	 * return -1. While such a situation should not happen,
279 	 * we allow it here.
280 	 *
281 	 * This also allows root to tamper with other users' files
282 	 * that have the sticky bit.
283 	 */
284 	if (puffs_cred_isjuggernaut(pcr))
285 		return 0;
286 
287 	if (puffs_cred_getuid(pcr, &uid) != 0)
288 		DERRX(EX_SOFTWARE, "puffs_cred_getuid fails in %s", __func__);
289 
290 	sticky = puffs_pn_getvap(tdir)->va_mode & S_ISTXT;
291 	owner = puffs_pn_getvap(targ)->va_uid == uid;
292 
293 	if (sticky && !owner)
294 		return EACCES;
295 
296 	return 0;
297 }
298 
299 
300 static void
301 fuse_attr_to_vap(struct perfuse_state *ps, struct vattr *vap,
302 	struct fuse_attr *fa)
303 {
304 	vap->va_type = IFTOVT(fa->mode);
305 	vap->va_mode = fa->mode & ALLPERMS;
306 	vap->va_nlink = fa->nlink;
307 	vap->va_uid = fa->uid;
308 	vap->va_gid = fa->gid;
309 	vap->va_fsid = (long)ps->ps_fsid;
310 	vap->va_fileid = fa->ino;
311 	vap->va_size = fa->size;
312 	vap->va_blocksize = fa->blksize;
313 	vap->va_atime.tv_sec = (time_t)fa->atime;
314 	vap->va_atime.tv_nsec = (long) fa->atimensec;
315 	vap->va_mtime.tv_sec = (time_t)fa->mtime;
316 	vap->va_mtime.tv_nsec = (long)fa->mtimensec;
317 	vap->va_ctime.tv_sec = (time_t)fa->ctime;
318 	vap->va_ctime.tv_nsec = (long)fa->ctimensec;
319 	vap->va_birthtime.tv_sec = 0;
320 	vap->va_birthtime.tv_nsec = 0;
321 	vap->va_gen = 0;
322 	vap->va_flags = 0;
323 	vap->va_rdev = fa->rdev;
324 	vap->va_bytes = fa->size;
325 	vap->va_filerev = (u_quad_t)PUFFS_VNOVAL;
326 	vap->va_vaflags = 0;
327 
328 	if (vap->va_blocksize == 0)
329 		vap->va_blocksize = DEV_BSIZE;
330 
331 	if (vap->va_size == (size_t)PUFFS_VNOVAL) /* XXX */
332 		vap->va_size = 0;
333 
334 	return;
335 }
336 
337 static void
338 set_expire(puffs_cookie_t opc, struct fuse_entry_out *feo,
339 	struct fuse_attr_out *fao)
340 {
341 	struct perfuse_node_data *pnd = PERFUSE_NODE_DATA(opc);
342 	struct timespec entry_ts;
343 	struct timespec attr_ts;
344 	struct timespec now;
345 
346 	if ((feo == NULL) && (fao == NULL))
347 		DERRX(EX_SOFTWARE, "%s: feo and fao NULL", __func__);
348 
349 	if ((feo != NULL) && (fao != NULL))
350 		DERRX(EX_SOFTWARE, "%s: feo and fao != NULL", __func__);
351 
352 	if (clock_gettime(CLOCK_REALTIME, &now) != 0)
353 		DERR(EX_OSERR, "clock_gettime failed");
354 
355 	if (feo != NULL) {
356 		entry_ts.tv_sec = (time_t)feo->entry_valid;
357 		entry_ts.tv_nsec = (long)feo->entry_valid_nsec;
358 
359 		timespecadd(&now, &entry_ts, &pnd->pnd_entry_expire);
360 
361 		attr_ts.tv_sec = (time_t)feo->attr_valid;
362 		attr_ts.tv_nsec = (long)feo->attr_valid_nsec;
363 
364 		timespecadd(&now, &attr_ts, &pnd->pnd_attr_expire);
365 	}
366 
367 	if (fao != NULL) {
368 		attr_ts.tv_sec = (time_t)fao->attr_valid;
369 		attr_ts.tv_nsec = (long)fao->attr_valid_nsec;
370 
371 		timespecadd(&now, &attr_ts, &pnd->pnd_attr_expire);
372 	}
373 
374 	return;
375 }
376 
377 static int
378 attr_expired(puffs_cookie_t opc)
379 {
380 	struct perfuse_node_data *pnd;
381 	struct timespec expire;
382 	struct timespec now;
383 
384 	pnd = PERFUSE_NODE_DATA(opc);
385 	expire = pnd->pnd_attr_expire;
386 
387 	if (clock_gettime(CLOCK_REALTIME, &now) != 0)
388 		DERR(EX_OSERR, "clock_gettime failed");
389 
390 	return timespeccmp(&expire, &now, <);
391 }
392 
393 static int
394 entry_expired(puffs_cookie_t opc)
395 {
396 	struct perfuse_node_data *pnd;
397 	struct timespec expire;
398 	struct timespec now;
399 
400 	pnd = PERFUSE_NODE_DATA(opc);
401 	expire = pnd->pnd_entry_expire;
402 
403 	if (clock_gettime(CLOCK_REALTIME, &now) != 0)
404 		DERR(EX_OSERR, "clock_gettime failed");
405 
406 	return timespeccmp(&expire, &now, <);
407 }
408 
409 
410 /*
411  * Lookup name in directory opc
412  * We take special care of name being . or ..
413  * These are returned by readdir and deserve tweaks.
414  */
415 static int
416 node_lookup_dir_nodot(struct puffs_usermount *pu, puffs_cookie_t opc,
417 	char *name, size_t namelen, struct puffs_node **pnp)
418 {
419 	/*
420 	 * "dot" is easy as we already know it
421 	 */
422 	if (strncmp(name, ".", namelen) == 0) {
423 		*pnp = (struct puffs_node *)opc;
424 		return 0;
425 	}
426 
427 	/*
428 	 * "dotdot" is also known
429 	 */
430 	if (strncmp(name, "..", namelen) == 0) {
431 		*pnp = PERFUSE_NODE_DATA(opc)->pnd_parent;
432 		return 0;
433 	}
434 
435 	return node_lookup_common(pu, opc, name, NULL, pnp);
436 }
437 
438 static int
439 node_lookup_common(struct puffs_usermount *pu, puffs_cookie_t opc,
440 	const char *path, const struct puffs_cred *pcr, struct puffs_node **pnp)
441 {
442 	struct perfuse_state *ps;
443 	struct perfuse_node_data *oldpnd;
444 	perfuse_msg_t *pm;
445 	struct fuse_entry_out *feo;
446 	struct puffs_node *pn;
447 	size_t len;
448 	int error;
449 
450 	/*
451 	 * Prevent further lookups if the parent was removed
452 	 */
453 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
454 		return ESTALE;
455 
456 	if (pnp == NULL)
457 		DERRX(EX_SOFTWARE, "pnp must be != NULL");
458 
459 	ps = puffs_getspecific(pu);
460 
461 #ifdef PERFUSE_DEBUG
462 	if (perfuse_diagflags & PDF_FILENAME)
463 		DPRINTF("%s: opc = %p, file = \"%s\" looking up \"%s\"\n",
464 			__func__, (void *)opc, perfuse_node_path(opc), path);
465 #endif
466 	/*
467 	 * Is the node already known?
468 	 */
469 	TAILQ_FOREACH(oldpnd, &PERFUSE_NODE_DATA(opc)->pnd_children, pnd_next) {
470 		if ((oldpnd->pnd_flags & PND_REMOVED) ||
471 		    (strcmp(oldpnd->pnd_name, path) != 0))
472 			continue;
473 
474 #ifdef PERFUSE_DEBUG
475 		if (perfuse_diagflags & PDF_FILENAME)
476 			DPRINTF("%s: opc = %p, file = \"%s\" found "
477 				"cookie = %p, nodeid = 0x%"PRIx64" "
478 				"for \"%s\"\n", __func__,
479 				(void *)opc, perfuse_node_path(opc),
480 				(void *)oldpnd->pnd_pn, oldpnd->pnd_nodeid,
481 				path);
482 #endif
483 		break;
484 	}
485 
486 	/*
487 	 * Check for cached name
488 	 */
489 	if ((oldpnd != NULL) && !entry_expired(oldpnd->pnd_pn)) {
490 		oldpnd->pnd_puffs_nlookup++;
491 		*pnp = oldpnd->pnd_pn;
492 		return 0;
493 	}
494 
495 	len = strlen(path) + 1;
496 
497 	pm = ps->ps_new_msg(pu, opc, FUSE_LOOKUP, len, pcr);
498 	(void)strlcpy(_GET_INPAYLOAD(ps, pm, char *), path, len);
499 
500 	error = xchg_msg(pu, opc, pm, sizeof(*feo), wait_reply);
501 
502 	switch (error) {
503 	case 0:
504 		break;
505 	case ENOENT:
506 		if (oldpnd != NULL) {
507 			oldpnd->pnd_flags |= PND_REMOVED;
508 #ifdef PERFUSE_DEBUG
509 			if (perfuse_diagflags & PDF_FILENAME)
510 				DPRINTF("%s: opc = %p nodeid = 0x%"PRIx64" "
511 					"file = \"%s\" removed\n", __func__,
512 					oldpnd->pnd_pn, oldpnd->pnd_nodeid,
513 					oldpnd->pnd_name);
514 #endif
515 		}
516 		/* FALLTHROUGH */
517 	default:
518 		return error;
519 		/* NOTREACHED */
520 		break;
521 	}
522 
523 	feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
524 
525 	if (oldpnd != NULL) {
526 		if (oldpnd->pnd_nodeid == feo->nodeid) {
527 			oldpnd->pnd_fuse_nlookup++;
528 			oldpnd->pnd_puffs_nlookup++;
529 			*pnp = oldpnd->pnd_pn;
530 
531 			ps->ps_destroy_msg(pm);
532 			return 0;
533 		} else {
534 			oldpnd->pnd_flags |= PND_REMOVED;
535 #ifdef PERFUSE_DEBUG
536 			if (perfuse_diagflags & PDF_FILENAME)
537 				DPRINTF("%s: opc = %p nodeid = 0x%"PRIx64" "
538 					"file = \"%s\" replaced\n", __func__,
539 					oldpnd->pnd_pn, oldpnd->pnd_nodeid,
540 					oldpnd->pnd_name);
541 #endif
542 		}
543 	}
544 
545 	pn = perfuse_new_pn(pu, path, opc);
546 	PERFUSE_NODE_DATA(pn)->pnd_nodeid = feo->nodeid;
547 
548 	fuse_attr_to_vap(ps, &pn->pn_va, &feo->attr);
549 	pn->pn_va.va_gen = (u_long)(feo->generation);
550 	set_expire((puffs_cookie_t)pn, feo, NULL);
551 
552 	*pnp = pn;
553 
554 #ifdef PERFUSE_DEBUG
555 	if (perfuse_diagflags & PDF_FILENAME)
556 		DPRINTF("%s: opc = %p, looked up opc = %p, "
557 			"nodeid = 0x%"PRIx64" file = \"%s\"\n", __func__,
558 			(void *)opc, pn, feo->nodeid, path);
559 #endif
560 
561 	ps->ps_destroy_msg(pm);
562 
563 	return 0;
564 }
565 
566 
567 /*
568  * Common code for methods that create objects:
569  * perfuse_node_mkdir
570  * perfuse_node_mknod
571  * perfuse_node_symlink
572  */
573 static int
574 node_mk_common(struct puffs_usermount *pu, puffs_cookie_t opc,
575 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
576 	perfuse_msg_t *pm)
577 {
578 	struct perfuse_state *ps;
579 	struct puffs_node *pn;
580 	struct fuse_entry_out *feo;
581 	int error;
582 
583 	ps =  puffs_getspecific(pu);
584 
585 	if ((error = xchg_msg(pu, opc, pm, sizeof(*feo), wait_reply)) != 0)
586 		return error;
587 
588 	feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
589 	if (feo->nodeid == PERFUSE_UNKNOWN_NODEID)
590 		DERRX(EX_SOFTWARE, "%s: no nodeid", __func__);
591 
592 	pn = perfuse_new_pn(pu, pcn->pcn_name, opc);
593 	PERFUSE_NODE_DATA(pn)->pnd_nodeid = feo->nodeid;
594 
595 	fuse_attr_to_vap(ps, &pn->pn_va, &feo->attr);
596 	pn->pn_va.va_gen = (u_long)(feo->generation);
597 	set_expire((puffs_cookie_t)pn, feo, NULL);
598 
599 	puffs_newinfo_setcookie(pni, pn);
600 
601 #ifdef PERFUSE_DEBUG
602 	if (perfuse_diagflags & PDF_FILENAME)
603 		DPRINTF("%s: opc = %p, file = \"%s\", flags = 0x%x "
604 			"nodeid = 0x%"PRIx64"\n",
605 			__func__, (void *)pn, pcn->pcn_name,
606 			PERFUSE_NODE_DATA(pn)->pnd_flags, feo->nodeid);
607 #endif
608 	ps->ps_destroy_msg(pm);
609 
610 	return node_mk_common_final(pu, opc, pn, pcn);
611 }
612 
613 /*
614  * Common final code for methods that create objects:
615  * perfuse_node_mkdir via node_mk_common
616  * perfuse_node_mknod via node_mk_common
617  * perfuse_node_symlink via node_mk_common
618  * perfuse_node_create
619  */
620 static int
621 node_mk_common_final(struct puffs_usermount *pu, puffs_cookie_t opc,
622 	struct puffs_node *pn, const struct puffs_cn *pcn)
623 {
624 	struct perfuse_state *ps;
625 	perfuse_msg_t *pm;
626 	struct fuse_setattr_in *fsi;
627 	struct fuse_attr_out *fao;
628 	int error;
629 
630 	ps =  puffs_getspecific(pu);
631 
632 	/*
633 	 * Set owner and group. The kernel cannot create a file
634 	 * on its own (puffs_cred_getuid would return -1), right?
635 	 */
636 	if (puffs_cred_getuid(pcn->pcn_cred, &pn->pn_va.va_uid) != 0)
637 		DERRX(EX_SOFTWARE, "puffs_cred_getuid fails in %s", __func__);
638 	if (puffs_cred_getgid(pcn->pcn_cred, &pn->pn_va.va_gid) != 0)
639 		DERRX(EX_SOFTWARE, "puffs_cred_getgid fails in %s", __func__);
640 
641 	pm = ps->ps_new_msg(pu, (puffs_cookie_t)pn,
642 			    FUSE_SETATTR, sizeof(*fsi), pcn->pcn_cred);
643 	fsi = GET_INPAYLOAD(ps, pm, fuse_setattr_in);
644 	fsi->uid = pn->pn_va.va_uid;
645 	fsi->gid = pn->pn_va.va_gid;
646 	fsi->valid = FUSE_FATTR_UID|FUSE_FATTR_GID;
647 
648 	if ((error = xchg_msg(pu, (puffs_cookie_t)pn, pm,
649 			      sizeof(*fao), wait_reply)) != 0)
650 		return error;
651 
652 	fao = GET_OUTPAYLOAD(ps, pm, fuse_attr_out);
653 	fuse_attr_to_vap(ps, &pn->pn_va, &fao->attr);
654 	set_expire((puffs_cookie_t)pn, NULL, fao);
655 
656 	/*
657 	 * The parent directory needs a sync
658 	 */
659 	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
660 
661 	ps->ps_destroy_msg(pm);
662 
663 	return 0;
664 }
665 
666 static uint64_t
667 readdir_last_cookie(struct fuse_dirent *fd, size_t fd_len)
668 {
669 	size_t len;
670 	size_t seen = 0;
671 	char *ndp;
672 
673 	do {
674 		len = FUSE_DIRENT_ALIGN(sizeof(*fd) + fd->namelen);
675 		seen += len;
676 
677 		if (seen >= fd_len)
678 			break;
679 
680 		ndp = (char *)(void *)fd + (size_t)len;
681 		fd = (struct fuse_dirent *)(void *)ndp;
682 	} while (1 /* CONSTCOND */);
683 
684 	return fd->off;
685 }
686 
687 static ssize_t
688 fuse_to_dirent(struct puffs_usermount *pu, puffs_cookie_t opc,
689 	struct fuse_dirent *fd, size_t fd_len)
690 {
691 	struct dirent *dents;
692 	size_t dents_len;
693 	ssize_t written;
694 	uint64_t fd_offset;
695 	struct fuse_dirent *fd_base;
696 	size_t len;
697 
698 	fd_base = fd;
699 	fd_offset = 0;
700 	written = 0;
701 	dents = PERFUSE_NODE_DATA(opc)->pnd_dirent;
702 	dents_len = (size_t)PERFUSE_NODE_DATA(opc)->pnd_dirent_len;
703 
704 	do {
705 		char *ndp;
706 		size_t reclen;
707 
708 		reclen = _DIRENT_RECLEN(dents, fd->namelen);
709 
710 		/*
711 		 * Check we do not overflow the output buffer
712 		 * struct fuse_dirent is bigger than struct dirent,
713 		 * so we should always use fd_len and never reallocate
714 		 * later.
715 		 * If we have to reallocate,try to double the buffer
716 		 * each time so that we do not have to do it too often.
717 		 */
718 		if (written + reclen > dents_len) {
719 			if (dents_len == 0)
720 				dents_len = fd_len;
721 			else
722 				dents_len =
723 				   MAX(2 * dents_len, written + reclen);
724 
725 			dents = PERFUSE_NODE_DATA(opc)->pnd_dirent;
726 			if ((dents = realloc(dents, dents_len)) == NULL)
727 				DERR(EX_OSERR, "%s: malloc failed", __func__);
728 
729 			PERFUSE_NODE_DATA(opc)->pnd_dirent = dents;
730 			PERFUSE_NODE_DATA(opc)->pnd_dirent_len = dents_len;
731 
732 			/*
733 			 * (void *) for delint
734 			 */
735 			ndp = (char *)(void *)dents + written;
736 			dents = (struct dirent *)(void *)ndp;
737 		}
738 
739 		/*
740 		 * Filesystem was mounted without -o use_ino
741 		 * Perform a lookup to find it.
742 		 */
743 		if (fd->ino == PERFUSE_UNKNOWN_INO) {
744 			struct puffs_node *pn;
745 
746 			if (node_lookup_dir_nodot(pu, opc, fd->name,
747 						  fd->namelen, &pn) != 0) {
748 				DWARNX("node_lookup_dir_nodot failed");
749 			} else {
750 				fd->ino = pn->pn_va.va_fileid;
751 			}
752 		}
753 
754 		dents->d_fileno = fd->ino;
755 		dents->d_reclen = (unsigned short)reclen;
756 		dents->d_namlen = fd->namelen;
757 		dents->d_type = fd->type;
758 		strlcpy(dents->d_name, fd->name, fd->namelen + 1);
759 
760 #ifdef PERFUSE_DEBUG
761 		if (perfuse_diagflags & PDF_READDIR)
762 			DPRINTF("%s: translated \"%s\" ino = %"PRIu64"\n",
763 				__func__, dents->d_name, dents->d_fileno);
764 #endif
765 
766 		dents = _DIRENT_NEXT(dents);
767 		written += reclen;
768 
769 		/*
770 		 * Move to the next record.
771 		 * fd->off is not the offset, it is an opaque cookie
772 		 * given by the filesystem to keep state across multiple
773 		 * readdir() operation.
774 		 * Use record alignement instead.
775 		 */
776 		len = FUSE_DIRENT_ALIGN(sizeof(*fd) + fd->namelen);
777 #ifdef PERFUSE_DEBUG
778 		if (perfuse_diagflags & PDF_READDIR)
779 			DPRINTF("%s: record at %"PRId64"/0x%"PRIx64" "
780 				"length = %zd/0x%zx. "
781 				"next record at %"PRId64"/0x%"PRIx64" "
782 				"max %zd/0x%zx\n",
783 				__func__, fd_offset, fd_offset, len, len,
784 				fd_offset + len, fd_offset + len,
785 				fd_len, fd_len);
786 #endif
787 		fd_offset += len;
788 
789 		/*
790 		 * Check if next record is still within the packet
791 		 * If it is not, we reached the end of the buffer.
792 		 */
793 		if (fd_offset >= fd_len)
794 			break;
795 
796 		/*
797 		 * (void *) for delint
798 		 */
799 		ndp = (char *)(void *)fd_base + (size_t)fd_offset;
800 		fd = (struct fuse_dirent *)(void *)ndp;
801 
802 	} while (1 /* CONSTCOND */);
803 
804 	/*
805 	 * Adjust the dirent output length
806 	 */
807 	if (written != -1)
808 		PERFUSE_NODE_DATA(opc)->pnd_dirent_len = written;
809 
810 	return written;
811 }
812 
813 static int
814 readdir_buffered(puffs_cookie_t opc, struct dirent *dent, off_t *readoff,
815 	size_t *reslen)
816 {
817 	struct dirent *fromdent;
818 	struct perfuse_node_data *pnd;
819 	char *ndp;
820 
821 	pnd = PERFUSE_NODE_DATA(opc);
822 
823 	while (*readoff < pnd->pnd_dirent_len) {
824 		/*
825 		 * (void *) for delint
826 		 */
827 		ndp = (char *)(void *)pnd->pnd_dirent + (size_t)*readoff;
828 		fromdent = (struct dirent *)(void *)ndp;
829 
830 		if (*reslen < _DIRENT_SIZE(fromdent))
831 			break;
832 
833 		memcpy(dent, fromdent, _DIRENT_SIZE(fromdent));
834 		*readoff += _DIRENT_SIZE(fromdent);
835 		*reslen -= _DIRENT_SIZE(fromdent);
836 
837 		dent = _DIRENT_NEXT(dent);
838 	}
839 
840 #ifdef PERFUSE_DEBUG
841 	if (perfuse_diagflags & PDF_READDIR)
842 		DPRINTF("%s: readoff = %"PRId64",  "
843 			"pnd->pnd_dirent_len = %"PRId64"\n",
844 			__func__, *readoff, pnd->pnd_dirent_len);
845 #endif
846 	if (*readoff >=  pnd->pnd_dirent_len) {
847 		free(pnd->pnd_dirent);
848 		pnd->pnd_dirent = NULL;
849 		pnd->pnd_dirent_len = 0;
850 	}
851 
852 	return 0;
853 }
854 
855 static void
856 requeue_request(struct puffs_usermount *pu, puffs_cookie_t opc,
857 	enum perfuse_qtype type)
858 {
859 	struct perfuse_cc_queue pcq;
860 	struct perfuse_node_data *pnd;
861 #ifdef PERFUSE_DEBUG
862 	struct perfuse_state *ps;
863 
864 	ps = perfuse_getspecific(pu);
865 #endif
866 
867 	pnd = PERFUSE_NODE_DATA(opc);
868 	pcq.pcq_type = type;
869 	pcq.pcq_cc = puffs_cc_getcc(pu);
870 	TAILQ_INSERT_TAIL(&pnd->pnd_pcq, &pcq, pcq_next);
871 
872 #ifdef PERFUSE_DEBUG
873 	if (perfuse_diagflags & PDF_REQUEUE)
874 		DPRINTF("%s: REQUEUE opc = %p, pcc = %p (%s)\n",
875 		        __func__, (void *)opc, pcq.pcq_cc,
876 			perfuse_qtypestr[type]);
877 #endif
878 
879 	puffs_cc_yield(pcq.pcq_cc);
880 	TAILQ_REMOVE(&pnd->pnd_pcq, &pcq, pcq_next);
881 
882 #ifdef PERFUSE_DEBUG
883 	if (perfuse_diagflags & PDF_REQUEUE)
884 		DPRINTF("%s: RESUME opc = %p, pcc = %p (%s)\n",
885 		        __func__, (void *)opc, pcq.pcq_cc,
886 			perfuse_qtypestr[type]);
887 #endif
888 
889 	return;
890 }
891 
892 /* ARGSUSED0 */
893 static int
894 dequeue_requests(struct perfuse_state *ps, puffs_cookie_t opc,
895 	enum perfuse_qtype type, int max)
896 {
897 	struct perfuse_cc_queue *pcq;
898 	struct perfuse_node_data *pnd;
899 	int dequeued;
900 
901 	pnd = PERFUSE_NODE_DATA(opc);
902 	dequeued = 0;
903 	TAILQ_FOREACH(pcq, &pnd->pnd_pcq, pcq_next) {
904 		if (pcq->pcq_type != type)
905 			continue;
906 
907 #ifdef PERFUSE_DEBUG
908 		if (perfuse_diagflags & PDF_REQUEUE)
909 			DPRINTF("%s: SCHEDULE opc = %p, pcc = %p (%s)\n",
910 				__func__, (void *)opc, pcq->pcq_cc,
911 				 perfuse_qtypestr[type]);
912 #endif
913 		puffs_cc_schedule(pcq->pcq_cc);
914 
915 		if (++dequeued == max)
916 			break;
917 	}
918 
919 #ifdef PERFUSE_DEBUG
920 	if (perfuse_diagflags & PDF_REQUEUE)
921 		DPRINTF("%s: DONE  opc = %p\n", __func__, (void *)opc);
922 #endif
923 
924 	return dequeued;
925 }
926 
927 void
928 perfuse_fs_init(struct puffs_usermount *pu)
929 {
930 	struct perfuse_state *ps;
931 	perfuse_msg_t *pm;
932 	struct fuse_init_in *fii;
933 	struct fuse_init_out *fio;
934 	int error;
935 
936 	ps = puffs_getspecific(pu);
937 
938         if (puffs_mount(pu, ps->ps_target, ps->ps_mountflags, ps->ps_root) != 0)
939                 DERR(EX_OSERR, "%s: puffs_mount failed", __func__);
940 
941 	/*
942 	 * Linux 2.6.34.1 sends theses flags:
943 	 * FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC
944 	 * FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK
945 	 *
946 	 * Linux also sets max_readahead at 32 pages (128 kB)
947 	 *
948 	 * ps_new_msg() is called with NULL creds, which will
949 	 * be interpreted as FUSE superuser.
950 	 */
951 	pm = ps->ps_new_msg(pu, 0, FUSE_INIT, sizeof(*fii), NULL);
952 	fii = GET_INPAYLOAD(ps, pm, fuse_init_in);
953 	fii->major = FUSE_KERNEL_VERSION;
954 	fii->minor = FUSE_KERNEL_MINOR_VERSION;
955 	fii->max_readahead = (unsigned int)(32 * sysconf(_SC_PAGESIZE));
956 	fii->flags = (FUSE_ASYNC_READ|FUSE_POSIX_LOCKS|FUSE_ATOMIC_O_TRUNC);
957 
958 	if ((error = xchg_msg(pu, 0, pm, sizeof(*fio), wait_reply)) != 0)
959 		DERRX(EX_SOFTWARE, "init message exchange failed (%d)", error);
960 
961 	fio = GET_OUTPAYLOAD(ps, pm, fuse_init_out);
962 	ps->ps_max_readahead = fio->max_readahead;
963 	ps->ps_max_write = fio->max_write;
964 
965 	ps->ps_destroy_msg(pm);
966 
967 	return;
968 }
969 
970 int
971 perfuse_fs_unmount(struct puffs_usermount *pu, int flags)
972 {
973 	perfuse_msg_t *pm;
974 	struct perfuse_state *ps;
975 	puffs_cookie_t opc;
976 	int error;
977 
978 	ps = puffs_getspecific(pu);
979 	opc = (puffs_cookie_t)puffs_getroot(pu);
980 
981 	/*
982 	 * ps_new_msg() is called with NULL creds, which will
983 	 * be interpreted as FUSE superuser.
984 	 */
985 	pm = ps->ps_new_msg(pu, opc, FUSE_DESTROY, 0, NULL);
986 
987 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0){
988 		DWARN("unmount %s", ps->ps_target);
989 		if (!(flags & MNT_FORCE))
990 			return error;
991 		else
992 			error = 0;
993 	} else {
994 		ps->ps_destroy_msg(pm);
995 	}
996 
997 	ps->ps_umount(pu);
998 
999 	if (perfuse_diagflags & PDF_MISC)
1000 		DPRINTF("%s unmounted, exit\n", ps->ps_target);
1001 
1002 	return 0;
1003 }
1004 
1005 int
1006 perfuse_fs_statvfs(struct puffs_usermount *pu, struct statvfs *svfsb)
1007 {
1008 	struct perfuse_state *ps;
1009 	perfuse_msg_t *pm;
1010 	puffs_cookie_t opc;
1011 	struct fuse_statfs_out *fso;
1012 	int error;
1013 
1014 	ps = puffs_getspecific(pu);
1015 	opc = (puffs_cookie_t)puffs_getroot(pu);
1016 
1017 	/*
1018 	 * ps_new_msg() is called with NULL creds, which will
1019 	 * be interpreted as FUSE superuser.
1020 	 */
1021 	pm = ps->ps_new_msg(pu, opc, FUSE_STATFS, 0, NULL);
1022 
1023 	if ((error = xchg_msg(pu, opc, pm, sizeof(*fso), wait_reply)) != 0)
1024 		return error;
1025 
1026 	fso = GET_OUTPAYLOAD(ps, pm, fuse_statfs_out);
1027 	svfsb->f_flag = ps->ps_mountflags;
1028 	svfsb->f_bsize = fso->st.bsize;
1029 	svfsb->f_frsize = fso->st.frsize;
1030 	svfsb->f_iosize = ((struct puffs_node *)opc)->pn_va.va_blocksize;
1031 	svfsb->f_blocks = fso->st.blocks;
1032 	svfsb->f_bfree = fso->st.bfree;
1033 	svfsb->f_bavail = fso->st.bavail;
1034 	svfsb->f_bresvd = fso->st.bfree - fso->st.bavail;
1035 	svfsb->f_files = fso->st.files;
1036 	svfsb->f_ffree = fso->st.ffree;
1037 	svfsb->f_favail = fso->st.ffree;/* files not reserved for root */
1038 	svfsb->f_fresvd = 0;		/* files reserved for root */
1039 
1040 	svfsb->f_syncreads = ps->ps_syncreads;
1041 	svfsb->f_syncwrites = ps->ps_syncwrites;
1042 
1043 	svfsb->f_asyncreads = ps->ps_asyncreads;
1044 	svfsb->f_asyncwrites = ps->ps_asyncwrites;
1045 
1046 	(void)memcpy(&svfsb->f_fsidx, &ps->ps_fsid, sizeof(ps->ps_fsid));
1047 	svfsb->f_fsid = (unsigned long)ps->ps_fsid;
1048 	svfsb->f_namemax = MAXPATHLEN;	/* XXX */
1049 	svfsb->f_owner = ps->ps_owner_uid;
1050 
1051 	(void)strlcpy(svfsb->f_mntonname, ps->ps_target, _VFS_NAMELEN);
1052 
1053 	if (ps->ps_filesystemtype != NULL)
1054 		(void)strlcpy(svfsb->f_fstypename,
1055 			      ps->ps_filesystemtype, _VFS_NAMELEN);
1056 	else
1057 		(void)strlcpy(svfsb->f_fstypename, "fuse", _VFS_NAMELEN);
1058 
1059 	if (ps->ps_source != NULL)
1060 		strlcpy(svfsb->f_mntfromname, ps->ps_source, _VFS_NAMELEN);
1061 	else
1062 		strlcpy(svfsb->f_mntfromname, _PATH_FUSE, _VFS_NAMELEN);
1063 
1064 	ps->ps_destroy_msg(pm);
1065 
1066 	return 0;
1067 }
1068 
1069 int
1070 perfuse_fs_sync(struct puffs_usermount *pu, int waitfor,
1071 	const struct puffs_cred *pcr)
1072 {
1073 	/*
1074 	 * FUSE does not seem to have a FS sync callback.
1075 	 * Maybe do not even register this callback
1076 	 */
1077 	return puffs_fsnop_sync(pu, waitfor, pcr);
1078 }
1079 
1080 /* ARGSUSED0 */
1081 int
1082 perfuse_fs_fhtonode(struct puffs_usermount *pu, void *fid, size_t fidsize,
1083 	struct puffs_newinfo *pni)
1084 {
1085 	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
1086 	return 0;
1087 }
1088 
1089 /* ARGSUSED0 */
1090 int
1091 perfuse_fs_nodetofh(struct puffs_usermount *pu, puffs_cookie_t cookie,
1092 	void *fid, size_t *fidsize)
1093 {
1094 	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
1095 	return 0;
1096 }
1097 
1098 #if 0
1099 /* ARGSUSED0 */
1100 void
1101 perfuse_fs_extattrctl(struct puffs_usermount *pu, int cmd,
1102 	puffs_cookie_t *cookie, int flags, int namespace, const char *attrname)
1103 {
1104 	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
1105 	return 0;
1106 }
1107 #endif /* 0 */
1108 
1109 /* ARGSUSED0 */
1110 void
1111 perfuse_fs_suspend(struct puffs_usermount *pu, int status)
1112 {
1113 	return;
1114 }
1115 
1116 
1117 
1118 int
1119 perfuse_node_lookup(struct puffs_usermount *pu, puffs_cookie_t opc,
1120 	struct puffs_newinfo *pni, const struct puffs_cn *pcn)
1121 {
1122 	struct puffs_node *pn;
1123 	mode_t mode;
1124 	int error;
1125 
1126 	/*
1127 	 * Check permissions
1128 	 */
1129 	switch(pcn->pcn_nameiop) {
1130 	case NAMEI_DELETE: /* FALLTHROUGH */
1131 	case NAMEI_RENAME: /* FALLTHROUGH */
1132 	case NAMEI_CREATE:
1133 		if (pcn->pcn_flags & NAMEI_ISLASTCN)
1134 			mode = PUFFS_VEXEC|PUFFS_VWRITE;
1135 		else
1136 			mode = PUFFS_VEXEC;
1137 		break;
1138 	case NAMEI_LOOKUP: /* FALLTHROUGH */
1139 	default:
1140 		mode = PUFFS_VEXEC;
1141 		break;
1142 	}
1143 
1144 	if ((error = mode_access(opc, pcn->pcn_cred, mode)) != 0)
1145 		return error;
1146 
1147 	/*
1148 	 * Special case for ..
1149 	 */
1150 	if (strcmp(pcn->pcn_name, "..") == 0)
1151 		pn = PERFUSE_NODE_DATA(opc)->pnd_parent;
1152 	else
1153 		error = node_lookup_common(pu, (puffs_cookie_t)opc,
1154 					   pcn->pcn_name, pcn->pcn_cred, &pn);
1155 	if (error != 0)
1156 		return error;
1157 
1158 	/*
1159 	 * Kernel would kill us if the filesystem returned the parent
1160 	 * itself. If we want to live, hide that!
1161 	 */
1162 	if ((opc == (puffs_cookie_t)pn) && (strcmp(pcn->pcn_name, ".") != 0)) {
1163 		DWARNX("lookup returned parent");
1164 		return ESTALE;
1165 	}
1166 
1167 	/*
1168 	 * Removed node
1169 	 */
1170 	if (PERFUSE_NODE_DATA(pn)->pnd_flags & PND_REMOVED)
1171 		return ENOENT;
1172 
1173 	/*
1174 	 * Check for sticky bit. Unfortunately there is no way to
1175 	 * do this before creating the puffs_node, since we require
1176 	 * this operation to get the node owner.
1177 	 */
1178 	switch (pcn->pcn_nameiop) {
1179 	case NAMEI_DELETE: /* FALLTHROUGH */
1180 	case NAMEI_RENAME:
1181 		error = sticky_access(pn, pcn->pcn_cred);
1182 		if (error != 0) {
1183 			/*
1184 			 * kernel will never know about it and will
1185 			 * not reclaim it. The filesystem needs to
1186 			 * clean it up anyway, therefore mimick a forget.
1187 			 */
1188 			PERFUSE_NODE_DATA(pn)->pnd_flags |= PND_RECLAIMED;
1189 			(void)perfuse_node_reclaim(pu, (puffs_cookie_t)pn);
1190 			return error;
1191 		}
1192 		break;
1193 	default:
1194 		break;
1195 	}
1196 
1197 	/*
1198 	 * If that node had a pending reclaim, wipe it out.
1199 	 */
1200 	PERFUSE_NODE_DATA(pn)->pnd_flags &= ~PND_RECLAIMED;
1201 
1202 	puffs_newinfo_setcookie(pni, pn);
1203 	puffs_newinfo_setvtype(pni, pn->pn_va.va_type);
1204 	puffs_newinfo_setsize(pni, (voff_t)pn->pn_va.va_size);
1205 	puffs_newinfo_setrdev(pni, pn->pn_va.va_rdev);
1206 
1207 	return error;
1208 }
1209 
1210 int
1211 perfuse_node_create(struct puffs_usermount *pu, puffs_cookie_t opc,
1212 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
1213 	const struct vattr *vap)
1214 {
1215 	perfuse_msg_t *pm;
1216 	struct perfuse_state *ps;
1217 	struct fuse_create_in *fci;
1218 	struct fuse_entry_out *feo;
1219 	struct fuse_open_out *foo;
1220 	struct puffs_node *pn;
1221 	const char *name;
1222 	size_t namelen;
1223 	size_t len;
1224 	int error;
1225 
1226 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
1227 		return ENOENT;
1228 
1229 	/*
1230 	 * If create is unimplemented: Check that it does not
1231 	 * already exists, and if not, do mknod and open
1232 	 */
1233 	ps = puffs_getspecific(pu);
1234 	if (ps->ps_flags & PS_NO_CREAT) {
1235 		error = node_lookup_common(pu, opc, pcn->pcn_name,
1236 					   pcn->pcn_cred, &pn);
1237 		if (error == 0)
1238 			return EEXIST;
1239 
1240 		error = perfuse_node_mknod(pu, opc, pni, pcn, vap);
1241 		if (error != 0)
1242 			return error;
1243 
1244 		error = node_lookup_common(pu, opc, pcn->pcn_name,
1245 					   pcn->pcn_cred, &pn);
1246 		if (error != 0)
1247 			return error;
1248 
1249 		/*
1250 		 * FUSE does the open at create time, while
1251 		 * NetBSD will open in a subsequent operation.
1252 		 * We need to open now, in order to retain FUSE
1253 		 * semantics. The calling process will not get
1254 		 * a file descriptor before the kernel sends
1255 		 * the open operation.
1256 		 */
1257 		opc = (puffs_cookie_t)pn;
1258 		error = perfuse_node_open(pu, opc, FWRITE, pcn->pcn_cred);
1259 		if (error != 0)
1260 			return error;
1261 
1262 		return 0;
1263 	}
1264 
1265 	name = pcn->pcn_name;
1266 	namelen = pcn->pcn_namelen + 1;
1267 	len = sizeof(*fci) + namelen;
1268 
1269 	/*
1270 	 * flags should use O_WRONLY instead of O_RDWR, but it
1271 	 * breaks when the caller tries to read from file.
1272 	 *
1273 	 * mode must contain file type (ie: S_IFREG), use VTTOIF(vap->va_type)
1274 	 */
1275 	pm = ps->ps_new_msg(pu, opc, FUSE_CREATE, len, pcn->pcn_cred);
1276 	fci = GET_INPAYLOAD(ps, pm, fuse_create_in);
1277 	fci->flags = O_CREAT | O_TRUNC | O_RDWR;
1278 	fci->mode = vap->va_mode | VTTOIF(vap->va_type);
1279 	fci->umask = 0; 	/* Seems unused by libfuse */
1280 	(void)strlcpy((char*)(void *)(fci + 1), name, namelen);
1281 
1282 	len = sizeof(*feo) + sizeof(*foo);
1283 	if ((error = xchg_msg(pu, opc, pm, len, wait_reply)) != 0) {
1284 		/*
1285 		 * create is unimplmented, remember it for later,
1286 		 * and start over using mknod and open instead.
1287 		 */
1288 		if (error == ENOSYS) {
1289 			ps->ps_flags |= PS_NO_CREAT;
1290 			return perfuse_node_create(pu, opc, pni, pcn, vap);
1291 		}
1292 
1293 		return error;
1294 	}
1295 
1296 	feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
1297 	foo = (struct fuse_open_out *)(void *)(feo + 1);
1298 	if (feo->nodeid == PERFUSE_UNKNOWN_NODEID)
1299 		DERRX(EX_SOFTWARE, "%s: no nodeid", __func__);
1300 
1301 	/*
1302 	 * Save the file handle and inode in node private data
1303 	 * so that we can reuse it later
1304 	 */
1305 	pn = perfuse_new_pn(pu, name, opc);
1306 	perfuse_new_fh((puffs_cookie_t)pn, foo->fh, FWRITE);
1307 	PERFUSE_NODE_DATA(pn)->pnd_nodeid = feo->nodeid;
1308 
1309 	fuse_attr_to_vap(ps, &pn->pn_va, &feo->attr);
1310 	pn->pn_va.va_gen = (u_long)(feo->generation);
1311 	set_expire((puffs_cookie_t)pn, feo, NULL);
1312 
1313 	puffs_newinfo_setcookie(pni, pn);
1314 
1315 #ifdef PERFUSE_DEBUG
1316 	if (perfuse_diagflags & (PDF_FH|PDF_FILENAME))
1317 		DPRINTF("%s: opc = %p, file = \"%s\", flags = 0x%x "
1318 			"nodeid = 0x%"PRIx64", wfh = 0x%"PRIx64"\n",
1319 			__func__, (void *)pn, pcn->pcn_name,
1320 			PERFUSE_NODE_DATA(pn)->pnd_flags, feo->nodeid,
1321 			foo->fh);
1322 #endif
1323 
1324 	ps->ps_destroy_msg(pm);
1325 
1326 	return node_mk_common_final(pu, opc, pn, pcn);
1327 }
1328 
1329 
1330 int
1331 perfuse_node_mknod(struct puffs_usermount *pu, puffs_cookie_t opc,
1332 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
1333 	const struct vattr *vap)
1334 {
1335 	struct perfuse_state *ps;
1336 	perfuse_msg_t *pm;
1337 	struct fuse_mknod_in *fmi;
1338 	const char* path;
1339 	size_t len;
1340 
1341 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
1342 		return ENOENT;
1343 
1344 	/*
1345 	 * Only superuser can mknod objects other than
1346 	 * directories, files, socks, fifo and links.
1347 	 *
1348 	 * Create an object require -WX permission in the parent directory
1349 	 */
1350 	switch (vap->va_type) {
1351 	case VDIR:	/* FALLTHROUGH */
1352 	case VREG:	/* FALLTHROUGH */
1353 	case VFIFO:	/* FALLTHROUGH */
1354 	case VSOCK:
1355 		break;
1356 	default:	/* VNON, VBLK, VCHR, VBAD */
1357 		if (!puffs_cred_isjuggernaut(pcn->pcn_cred))
1358 			return EACCES;
1359 		break;
1360 	}
1361 
1362 
1363 	ps = puffs_getspecific(pu);
1364 	path = pcn->pcn_name;
1365 	len = sizeof(*fmi) + pcn->pcn_namelen + 1;
1366 
1367 	/*
1368 	 * mode must contain file type (ie: S_IFREG), use VTTOIF(vap->va_type)
1369 	 */
1370 	pm = ps->ps_new_msg(pu, opc, FUSE_MKNOD, len, pcn->pcn_cred);
1371 	fmi = GET_INPAYLOAD(ps, pm, fuse_mknod_in);
1372 	fmi->mode = vap->va_mode | VTTOIF(vap->va_type);
1373 	fmi->rdev = (uint32_t)vap->va_rdev;
1374 	fmi->umask = 0; 	/* Seems unused bu libfuse */
1375 	(void)strlcpy((char *)(void *)(fmi + 1), path, len - sizeof(*fmi));
1376 
1377 	return node_mk_common(pu, opc, pni, pcn, pm);
1378 }
1379 
1380 
1381 int
1382 perfuse_node_open(struct puffs_usermount *pu, puffs_cookie_t opc, int mode,
1383 	const struct puffs_cred *pcr)
1384 {
1385 	struct perfuse_state *ps;
1386 	struct perfuse_node_data *pnd;
1387 	perfuse_msg_t *pm;
1388 	mode_t fmode;
1389 	int op;
1390 	struct fuse_open_in *foi;
1391 	struct fuse_open_out *foo;
1392 	struct puffs_node *pn;
1393 	int error;
1394 
1395 	ps = puffs_getspecific(pu);
1396 	pn = (struct puffs_node *)opc;
1397 	pnd = PERFUSE_NODE_DATA(opc);
1398 	error = 0;
1399 
1400 	if (pnd->pnd_flags & PND_REMOVED)
1401 		return ENOENT;
1402 
1403 	if (puffs_pn_getvap(pn)->va_type == VDIR)
1404 		op = FUSE_OPENDIR;
1405 	else
1406 		op = FUSE_OPEN;
1407 
1408 	/*
1409 	 * libfuse docs says
1410 	 * - O_CREAT and O_EXCL should never be set.
1411 	 * - O_TRUNC may be used if mount option atomic_o_trunc is used XXX
1412 	 *
1413 	 * O_APPEND makes no sense since FUSE always sends
1414 	 * the file offset for write operations. If the
1415 	 * filesystem uses pwrite(), O_APPEND would cause
1416 	 * the offset to be ignored and cause file corruption.
1417 	 */
1418 	mode &= ~(O_CREAT|O_EXCL|O_APPEND);
1419 
1420 	/*
1421 	 * Do not open twice, and do not reopen for reading
1422 	 * if we already have write handle.
1423 	 */
1424 	if (((mode & FREAD) && (pnd->pnd_flags & PND_RFH)) ||
1425 	    ((mode & FREAD) && (pnd->pnd_flags & PND_WFH)) ||
1426 	    ((mode & FWRITE) && (pnd->pnd_flags & PND_WFH)))
1427 		goto out;
1428 
1429 	/*
1430 	 * Queue open on a node so that we do not open
1431 	 * twice. This would be better with read and
1432 	 * write distinguished.
1433 	 */
1434 	while (pnd->pnd_flags & PND_INOPEN)
1435 		requeue_request(pu, opc, PCQ_OPEN);
1436 	pnd->pnd_flags |= PND_INOPEN;
1437 
1438 	/*
1439 	 * Convert PUFFS mode to FUSE mode: convert FREAD/FWRITE
1440 	 * to O_RDONLY/O_WRONLY while perserving the other options.
1441 	 */
1442 	fmode = mode & ~(FREAD|FWRITE);
1443 	fmode |= (mode & FWRITE) ? O_RDWR : O_RDONLY;
1444 
1445 	pm = ps->ps_new_msg(pu, opc, op, sizeof(*foi), pcr);
1446 	foi = GET_INPAYLOAD(ps, pm, fuse_open_in);
1447 	foi->flags = fmode;
1448 	foi->unused = 0;
1449 
1450 	if ((error = xchg_msg(pu, opc, pm, sizeof(*foo), wait_reply)) != 0)
1451 		goto out;
1452 
1453 	foo = GET_OUTPAYLOAD(ps, pm, fuse_open_out);
1454 
1455 	/*
1456 	 * Save the file handle in node private data
1457 	 * so that we can reuse it later
1458 	 */
1459 	perfuse_new_fh(opc, foo->fh, mode);
1460 
1461 #ifdef PERFUSE_DEBUG
1462 	if (perfuse_diagflags & (PDF_FH|PDF_FILENAME))
1463 		DPRINTF("%s: opc = %p, file = \"%s\", "
1464 			"nodeid = 0x%"PRIx64", %s%sfh = 0x%"PRIx64"\n",
1465 			__func__, (void *)opc, perfuse_node_path(opc),
1466 			pnd->pnd_nodeid, mode & FREAD ? "r" : "",
1467 			mode & FWRITE ? "w" : "", foo->fh);
1468 #endif
1469 
1470 	ps->ps_destroy_msg(pm);
1471 out:
1472 
1473 	pnd->pnd_flags &= ~PND_INOPEN;
1474 	(void)dequeue_requests(ps, opc, PCQ_OPEN, DEQUEUE_ALL);
1475 
1476 	return error;
1477 }
1478 
1479 /* ARGSUSED0 */
1480 int
1481 perfuse_node_close(struct puffs_usermount *pu, puffs_cookie_t opc, int flags,
1482 	const struct puffs_cred *pcr)
1483 {
1484 	struct perfuse_node_data *pnd;
1485 
1486 	pnd = PERFUSE_NODE_DATA(opc);
1487 
1488 	if (!(pnd->pnd_flags & PND_OPEN))
1489 		return EBADF;
1490 
1491 	/*
1492 	 * Actual close is postponed at inactive time.
1493 	 */
1494 	return 0;
1495 }
1496 
1497 int
1498 perfuse_node_access(struct puffs_usermount *pu, puffs_cookie_t opc, int mode,
1499 	const struct puffs_cred *pcr)
1500 {
1501 	perfuse_msg_t *pm;
1502 	struct perfuse_state *ps;
1503 	struct fuse_access_in *fai;
1504 	int error;
1505 
1506 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
1507 		return ENOENT;
1508 
1509 	/*
1510 	 * If we previously detected the filesystem does not
1511 	 * implement access(), short-circuit the call and skip
1512 	 * to libpuffs access() emulation.
1513 	 */
1514 	ps = puffs_getspecific(pu);
1515 	if (ps->ps_flags & PS_NO_ACCESS) {
1516 		const struct vattr *vap;
1517 
1518 		vap = puffs_pn_getvap((struct puffs_node *)opc);
1519 
1520 		error = puffs_access(IFTOVT(vap->va_mode),
1521 				     vap->va_mode & ACCESSPERMS,
1522 				     vap->va_uid, vap->va_gid,
1523 				     (mode_t)mode, pcr);
1524 		return error;
1525 	}
1526 
1527 	/*
1528 	 * Plain access call
1529 	 */
1530 	pm = ps->ps_new_msg(pu, opc, FUSE_ACCESS, sizeof(*fai), pcr);
1531 	fai = GET_INPAYLOAD(ps, pm, fuse_access_in);
1532 	fai->mask = 0;
1533 	fai->mask |= (mode & PUFFS_VREAD) ? R_OK : 0;
1534 	fai->mask |= (mode & PUFFS_VWRITE) ? W_OK : 0;
1535 	fai->mask |= (mode & PUFFS_VEXEC) ? X_OK : 0;
1536 
1537 	error = xchg_msg(pu, opc, pm, NO_PAYLOAD_REPLY_LEN, wait_reply);
1538 
1539 	ps->ps_destroy_msg(pm);
1540 
1541 	/*
1542 	 * If unimplemented, start over with emulation
1543 	 */
1544 	if (error == ENOSYS) {
1545 		ps->ps_flags |= PS_NO_ACCESS;
1546 		return perfuse_node_access(pu, opc, mode, pcr);
1547 	}
1548 
1549 	return error;
1550 }
1551 
1552 int
1553 perfuse_node_getattr(struct puffs_usermount *pu, puffs_cookie_t opc,
1554 	struct vattr *vap, const struct puffs_cred *pcr)
1555 {
1556 	perfuse_msg_t *pm = NULL;
1557 	struct perfuse_state *ps;
1558 	struct perfuse_node_data *pnd = PERFUSE_NODE_DATA(opc);
1559 	struct fuse_getattr_in *fgi;
1560 	struct fuse_attr_out *fao;
1561 	int error = 0;
1562 
1563 	if (pnd->pnd_flags & PND_REMOVED)
1564 		return ENOENT;
1565 
1566 	/*
1567 	 * Serialize size access, see comment in perfuse_node_setattr().
1568 	 */
1569 	while (pnd->pnd_flags & PND_INRESIZE)
1570 		requeue_request(pu, opc, PCQ_RESIZE);
1571 	pnd->pnd_flags |= PND_INRESIZE;
1572 
1573 	ps = puffs_getspecific(pu);
1574 
1575 	/*
1576 	 * Check for cached attributes
1577 	 * This still require serialized access to size.
1578 	 */
1579 	if (!attr_expired(opc)) {
1580 		(void)memcpy(vap, puffs_pn_getvap((struct puffs_node *)opc),
1581 			     sizeof(*vap));
1582 		goto out;
1583 	}
1584 
1585 	/*
1586 	 * FUSE_GETATTR_FH must be set in fgi->flags
1587 	 * if we use for fgi->fh
1588 	 */
1589 	pm = ps->ps_new_msg(pu, opc, FUSE_GETATTR, sizeof(*fgi), pcr);
1590 	fgi = GET_INPAYLOAD(ps, pm, fuse_getattr_in);
1591 	fgi->getattr_flags = 0;
1592 	fgi->dummy = 0;
1593 	fgi->fh = 0;
1594 
1595 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_OPEN) {
1596 		fgi->fh = perfuse_get_fh(opc, FREAD);
1597 		fgi->getattr_flags |= FUSE_GETATTR_FH;
1598 	}
1599 
1600 #ifdef PERFUSE_DEBUG
1601 	if (perfuse_diagflags & PDF_RESIZE)
1602 		DPRINTF(">> %s %p %" PRIu64 "\n", __func__, (void *)opc,
1603 		    vap->va_size);
1604 #endif
1605 
1606 	if ((error = xchg_msg(pu, opc, pm, sizeof(*fao), wait_reply)) != 0)
1607 		goto out;
1608 
1609 	fao = GET_OUTPAYLOAD(ps, pm, fuse_attr_out);
1610 
1611 #ifdef PERFUSE_DEBUG
1612 	if (perfuse_diagflags & PDF_RESIZE)
1613 		DPRINTF("<< %s %p %" PRIu64 " -> %" PRIu64 "\n", __func__,
1614 		    (void *)opc, vap->va_size, fao->attr.size);
1615 #endif
1616 
1617 	/*
1618 	 * We set birthtime, flags, filerev,vaflags to 0.
1619 	 * This seems the best bet, since the information is
1620 	 * not available from filesystem.
1621 	 */
1622 	fuse_attr_to_vap(ps, vap, &fao->attr);
1623 	set_expire(opc, NULL, fao);
1624 
1625 	ps->ps_destroy_msg(pm);
1626 out:
1627 
1628 	pnd->pnd_flags &= ~PND_INRESIZE;
1629 	(void)dequeue_requests(ps, opc, PCQ_RESIZE, DEQUEUE_ALL);
1630 
1631 	return error;
1632 }
1633 
1634 int
1635 perfuse_node_setattr(struct puffs_usermount *pu, puffs_cookie_t opc,
1636 	const struct vattr *vap, const struct puffs_cred *pcr)
1637 {
1638 	perfuse_msg_t *pm;
1639 	uint64_t fh;
1640 	struct perfuse_state *ps;
1641 	struct perfuse_node_data *pnd;
1642 	struct fuse_setattr_in *fsi;
1643 	struct fuse_attr_out *fao;
1644 	struct vattr *old_va;
1645 	int error;
1646 #ifdef PERFUSE_DEBUG
1647 	struct vattr *old_vap;
1648 	int resize_debug = 0;
1649 #endif
1650 
1651 	ps = puffs_getspecific(pu);
1652 	pnd = PERFUSE_NODE_DATA(opc);
1653 
1654 	/*
1655 	 * The only operation we can do once the file is removed
1656 	 * is to resize it, and we can do it only if it is open.
1657 	 * Do not even send the operation to the filesystem: the
1658 	 * file is not there anymore.
1659 	 */
1660 	if (pnd->pnd_flags & PND_REMOVED) {
1661 		if (!(pnd->pnd_flags & PND_OPEN))
1662 			return ENOENT;
1663 
1664 		error = 0;
1665 		goto out;
1666 	}
1667 
1668 	old_va = puffs_pn_getvap((struct puffs_node *)opc);
1669 
1670 	/*
1671 	 * Check for permission to change size
1672 	 */
1673 	if ((vap->va_size != (u_quad_t)PUFFS_VNOVAL) &&
1674 	    (error = mode_access(opc, pcr, PUFFS_VWRITE)) != 0)
1675 		return error;
1676 
1677 	/*
1678 	 * Check for permission to change dates
1679 	 */
1680 	if (((vap->va_atime.tv_sec != (time_t)PUFFS_VNOVAL) ||
1681 	     (vap->va_mtime.tv_sec != (time_t)PUFFS_VNOVAL)) &&
1682 	    (puffs_access_times(old_va->va_uid, old_va->va_gid,
1683 				old_va->va_mode, 0, pcr) != 0))
1684 		return EACCES;
1685 
1686 	/*
1687 	 * Check for permission to change owner and group
1688 	 */
1689 	if (((vap->va_uid != (uid_t)PUFFS_VNOVAL) ||
1690 	     (vap->va_gid != (gid_t)PUFFS_VNOVAL)) &&
1691 	    (puffs_access_chown(old_va->va_uid, old_va->va_gid,
1692 				vap->va_uid, vap->va_gid, pcr)) != 0)
1693 		return EACCES;
1694 
1695 	/*
1696 	 * Check for permission to change permissions
1697 	 */
1698 	if ((vap->va_mode != (mode_t)PUFFS_VNOVAL) &&
1699 	    (puffs_access_chmod(old_va->va_uid, old_va->va_gid,
1700 				old_va->va_type, vap->va_mode, pcr)) != 0)
1701 		return EACCES;
1702 
1703 	pm = ps->ps_new_msg(pu, opc, FUSE_SETATTR, sizeof(*fsi), pcr);
1704 	fsi = GET_INPAYLOAD(ps, pm, fuse_setattr_in);
1705 	fsi->valid = 0;
1706 
1707 	/*
1708 	 * Get a fh if the node is open for writing
1709 	 */
1710 	if (pnd->pnd_flags & PND_WFH) {
1711 		fh = perfuse_get_fh(opc, FWRITE);
1712 		fsi->fh = fh;
1713 		fsi->valid |= FUSE_FATTR_FH;
1714 	}
1715 
1716 	if (vap->va_size != (u_quad_t)PUFFS_VNOVAL) {
1717 		fsi->size = vap->va_size;
1718 		fsi->valid |= FUSE_FATTR_SIZE;
1719 
1720 		/*
1721 		 * Serialize anything that can touch file size
1722 		 * to avoid reordered GETATTR and SETATTR.
1723 		 * Out of order SETATTR can report stale size,
1724 		 * which will cause the kernel to truncate the file.
1725 		 */
1726 		while (pnd->pnd_flags & PND_INRESIZE)
1727 			requeue_request(pu, opc, PCQ_RESIZE);
1728 		pnd->pnd_flags |= PND_INRESIZE;
1729 	}
1730 
1731 	/*
1732  	 * Setting mtime without atime or vice versa leads to
1733 	 * dates being reset to Epoch on glusterfs. If one
1734 	 * is missing, use the old value.
1735  	 */
1736 	if ((vap->va_mtime.tv_sec != (time_t)PUFFS_VNOVAL) ||
1737 	    (vap->va_atime.tv_sec != (time_t)PUFFS_VNOVAL)) {
1738 
1739 		if (vap->va_atime.tv_sec != (time_t)PUFFS_VNOVAL) {
1740 			fsi->atime = vap->va_atime.tv_sec;
1741 			fsi->atimensec = (uint32_t)vap->va_atime.tv_nsec;
1742 		} else {
1743 			fsi->atime = old_va->va_atime.tv_sec;
1744 			fsi->atimensec = (uint32_t)old_va->va_atime.tv_nsec;
1745 		}
1746 
1747 		if (vap->va_mtime.tv_sec != (time_t)PUFFS_VNOVAL) {
1748 			fsi->mtime = vap->va_mtime.tv_sec;
1749 			fsi->mtimensec = (uint32_t)vap->va_mtime.tv_nsec;
1750 		} else {
1751 			fsi->mtime = old_va->va_mtime.tv_sec;
1752 			fsi->mtimensec = (uint32_t)old_va->va_mtime.tv_nsec;
1753 		}
1754 
1755 		fsi->valid |= (FUSE_FATTR_MTIME|FUSE_FATTR_ATIME);
1756 	}
1757 
1758 	if (vap->va_mode != (mode_t)PUFFS_VNOVAL) {
1759 		fsi->mode = vap->va_mode;
1760 		fsi->valid |= FUSE_FATTR_MODE;
1761 	}
1762 
1763 	if (vap->va_uid != (uid_t)PUFFS_VNOVAL) {
1764 		fsi->uid = vap->va_uid;
1765 		fsi->valid |= FUSE_FATTR_UID;
1766 	}
1767 
1768 	if (vap->va_gid != (gid_t)PUFFS_VNOVAL) {
1769 		fsi->gid = vap->va_gid;
1770 		fsi->valid |= FUSE_FATTR_GID;
1771 	}
1772 
1773 	if (pnd->pnd_lock_owner != 0) {
1774 		fsi->lock_owner = pnd->pnd_lock_owner;
1775 		fsi->valid |= FUSE_FATTR_LOCKOWNER;
1776 	}
1777 
1778 	/*
1779 	 * ftruncate() sends only va_size, and metadata cache
1780 	 * flush adds va_atime and va_mtime. Some FUSE
1781 	 * filesystems will attempt to detect ftruncate by
1782 	 * checking for FATTR_SIZE being set without
1783 	 * FATTR_UID|FATTR_GID|FATTR_ATIME|FATTR_MTIME|FATTR_MODE
1784 	 *
1785 	 * Try to adapt and remove FATTR_ATIME|FATTR_MTIME
1786 	 * if we suspect a ftruncate().
1787 	 */
1788 	if ((vap->va_size != (u_quad_t)PUFFS_VNOVAL) &&
1789 	    ((vap->va_mode == (mode_t)PUFFS_VNOVAL) &&
1790 	     (vap->va_uid == (uid_t)PUFFS_VNOVAL) &&
1791 	     (vap->va_gid == (gid_t)PUFFS_VNOVAL))) {
1792 		fsi->atime = 0;
1793 		fsi->atimensec = 0;
1794 		fsi->mtime = 0;
1795 		fsi->mtimensec = 0;
1796 		fsi->valid &= ~(FUSE_FATTR_ATIME|FUSE_FATTR_MTIME);
1797 	}
1798 
1799 	/*
1800 	 * If nothing remain, discard the operation.
1801 	 */
1802 	if (!(fsi->valid & (FUSE_FATTR_SIZE|FUSE_FATTR_ATIME|FUSE_FATTR_MTIME|
1803 			    FUSE_FATTR_MODE|FUSE_FATTR_UID|FUSE_FATTR_GID))) {
1804 		error = 0;
1805 		goto out;
1806 	}
1807 
1808 #ifdef PERFUSE_DEBUG
1809 	old_vap = puffs_pn_getvap((struct puffs_node *)opc);
1810 
1811 	if ((perfuse_diagflags & PDF_RESIZE) &&
1812 	    (old_vap->va_size != (u_quad_t)PUFFS_VNOVAL)) {
1813 		resize_debug = 1;
1814 
1815 		DPRINTF(">> %s %p %" PRIu64 " -> %" PRIu64 "\n", __func__,
1816 		    (void *)opc,
1817 		    puffs_pn_getvap((struct puffs_node *)opc)->va_size,
1818 		    fsi->size);
1819 	}
1820 #endif
1821 
1822 	if ((error = xchg_msg(pu, opc, pm, sizeof(*fao), wait_reply)) != 0)
1823 		goto out;
1824 
1825 	/*
1826 	 * Copy back the new values
1827 	 */
1828 	fao = GET_OUTPAYLOAD(ps, pm, fuse_attr_out);
1829 
1830 #ifdef PERFUSE_DEBUG
1831 	if (resize_debug)
1832 		DPRINTF("<< %s %p %" PRIu64 " -> %" PRIu64 "\n", __func__,
1833 		    (void *)opc, old_vap->va_size, fao->attr.size);
1834 #endif
1835 
1836 	fuse_attr_to_vap(ps, old_va, &fao->attr);
1837 	set_expire(opc, NULL, fao);
1838 
1839 	ps->ps_destroy_msg(pm);
1840 
1841 out:
1842 	if (pnd->pnd_flags & PND_INRESIZE) {
1843 		pnd->pnd_flags &= ~PND_INRESIZE;
1844 		(void)dequeue_requests(ps, opc, PCQ_RESIZE, DEQUEUE_ALL);
1845 	}
1846 
1847 	return error;
1848 }
1849 
1850 int
1851 perfuse_node_poll(struct puffs_usermount *pu, puffs_cookie_t opc, int *events)
1852 {
1853 	struct perfuse_state *ps;
1854 	perfuse_msg_t *pm;
1855 	struct fuse_poll_in *fpi;
1856 	struct fuse_poll_out *fpo;
1857 	int error;
1858 
1859 	ps = puffs_getspecific(pu);
1860 	/*
1861 	 * kh is set if FUSE_POLL_SCHEDULE_NOTIFY is set.
1862 	 *
1863 	 * XXX ps_new_msg() is called with NULL creds, which will
1864 	 * be interpreted as FUSE superuser. We have no way to
1865 	 * know the requesting process' credential, but since poll
1866 	 * is supposed to operate on a file that has been open,
1867 	 * permission should have already been checked at open time.
1868 	 * That still may breaks on filesystems that provides odd
1869 	 * semantics.
1870  	 */
1871 	pm = ps->ps_new_msg(pu, opc, FUSE_POLL, sizeof(*fpi), NULL);
1872 	fpi = GET_INPAYLOAD(ps, pm, fuse_poll_in);
1873 	fpi->fh = perfuse_get_fh(opc, FREAD);
1874 	fpi->kh = 0;
1875 	fpi->flags = 0;
1876 
1877 #ifdef PERFUSE_DEBUG
1878 	if (perfuse_diagflags & PDF_FH)
1879 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", "
1880 			"fh = 0x%"PRIx64"\n", __func__, (void *)opc,
1881 			PERFUSE_NODE_DATA(opc)->pnd_nodeid, fpi->fh);
1882 #endif
1883 	if ((error = xchg_msg(pu, opc, pm, sizeof(*fpo), wait_reply)) != 0)
1884 		return error;
1885 
1886 	fpo = GET_OUTPAYLOAD(ps, pm, fuse_poll_out);
1887 	*events = fpo->revents;
1888 
1889 	ps->ps_destroy_msg(pm);
1890 
1891 	return 0;
1892 }
1893 
1894 /* ARGSUSED0 */
1895 int
1896 perfuse_node_mmap(struct puffs_usermount *pu, puffs_cookie_t opc, int flags,
1897 	const struct puffs_cred *pcr)
1898 {
1899 	/*
1900 	 * Not implemented anymore in libfuse
1901 	 */
1902 	return ENOSYS;
1903 }
1904 
1905 /* ARGSUSED2 */
1906 int
1907 perfuse_node_fsync(struct puffs_usermount *pu, puffs_cookie_t opc,
1908 	const struct puffs_cred *pcr, int flags, off_t offlo, off_t offhi)
1909 {
1910 	int op;
1911 	perfuse_msg_t *pm;
1912 	struct perfuse_state *ps;
1913 	struct perfuse_node_data *pnd;
1914 	struct fuse_fsync_in *ffi;
1915 	uint64_t fh;
1916 	int error;
1917 
1918 	pm = NULL;
1919 	ps = puffs_getspecific(pu);
1920 	pnd = PERFUSE_NODE_DATA(opc);
1921 
1922 	/*
1923 	 * No need to sync a removed node
1924 	 */
1925 	if (pnd->pnd_flags & PND_REMOVED)
1926 		return 0;
1927 
1928 	/*
1929 	 * We do not sync closed files. They have been
1930 	 * sync at inactive time already.
1931 	 */
1932 	if (!(pnd->pnd_flags & PND_OPEN))
1933 		return 0;
1934 
1935 	if (puffs_pn_getvap((struct puffs_node *)opc)->va_type == VDIR)
1936 		op = FUSE_FSYNCDIR;
1937 	else 		/* VREG but also other types such as VLNK */
1938 		op = FUSE_FSYNC;
1939 
1940 	/*
1941 	 * Do not sync if there are no change to sync
1942 	 * XXX remove that test on files if we implement mmap
1943 	 */
1944 #ifdef PERFUSE_DEBUG
1945 	if (perfuse_diagflags & PDF_SYNC)
1946 		DPRINTF("%s: TEST opc = %p, file = \"%s\" is %sdirty\n",
1947 			__func__, (void*)opc, perfuse_node_path(opc),
1948 			pnd->pnd_flags & PND_DIRTY ? "" : "not ");
1949 #endif
1950 	if (!(pnd->pnd_flags & PND_DIRTY))
1951 		return 0;
1952 
1953 	/*
1954 	 * It seems NetBSD can call fsync without open first
1955 	 * glusterfs complain in such a situation:
1956 	 * "FSYNC() ERR => -1 (Invalid argument)"
1957 	 * The file will be closed at inactive time.
1958 	 *
1959 	 * We open the directory for reading in order to sync.
1960 	 * This sounds rather counterintuitive, but it works.
1961 	 */
1962 	if (!(pnd->pnd_flags & PND_WFH)) {
1963 		if ((error = perfuse_node_open(pu, opc, FREAD, pcr)) != 0)
1964 			goto out;
1965 	}
1966 
1967 	if (op == FUSE_FSYNCDIR)
1968 		fh = perfuse_get_fh(opc, FREAD);
1969 	else
1970 		fh = perfuse_get_fh(opc, FWRITE);
1971 
1972 	/*
1973 	 * If fsync_flags  is set, meta data should not be flushed.
1974 	 */
1975 	pm = ps->ps_new_msg(pu, opc, op, sizeof(*ffi), pcr);
1976 	ffi = GET_INPAYLOAD(ps, pm, fuse_fsync_in);
1977 	ffi->fh = fh;
1978 	ffi->fsync_flags = (flags & FFILESYNC) ? 0 : 1;
1979 
1980 #ifdef PERFUSE_DEBUG
1981 	if (perfuse_diagflags & PDF_FH)
1982 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", fh = 0x%"PRIx64"\n",
1983 			__func__, (void *)opc,
1984 			PERFUSE_NODE_DATA(opc)->pnd_nodeid, ffi->fh);
1985 #endif
1986 
1987 	if ((error = xchg_msg(pu, opc, pm,
1988 			      NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0)
1989 		goto out;
1990 
1991 	/*
1992 	 * No reply beyond fuse_out_header: nothing to do on success
1993 	 * just clear the dirty flag
1994 	 */
1995 	pnd->pnd_flags &= ~PND_DIRTY;
1996 
1997 #ifdef PERFUSE_DEBUG
1998 	if (perfuse_diagflags & PDF_SYNC)
1999 		DPRINTF("%s: CLEAR opc = %p, file = \"%s\"\n",
2000 			__func__, (void*)opc, perfuse_node_path(opc));
2001 #endif
2002 
2003 	ps->ps_destroy_msg(pm);
2004 
2005 out:
2006 	/*
2007 	 * ENOSYS is not returned to kernel,
2008 	 */
2009 	if (error == ENOSYS)
2010 		error = 0;
2011 
2012 	return error;
2013 }
2014 
2015 /* ARGSUSED0 */
2016 int
2017 perfuse_node_seek(struct puffs_usermount *pu, puffs_cookie_t opc,
2018 	off_t oldoff, off_t newoff, const struct puffs_cred *pcr)
2019 {
2020 	return 0;
2021 }
2022 
2023 int
2024 perfuse_node_remove(struct puffs_usermount *pu, puffs_cookie_t opc,
2025 	puffs_cookie_t targ, const struct puffs_cn *pcn)
2026 {
2027 	struct perfuse_state *ps;
2028 	struct perfuse_node_data *pnd;
2029 	perfuse_msg_t *pm;
2030 	char *path;
2031 	const char *name;
2032 	size_t len;
2033 	int error;
2034 
2035 	pnd = PERFUSE_NODE_DATA(opc);
2036 
2037 	if ((pnd->pnd_flags & PND_REMOVED) ||
2038 	    (PERFUSE_NODE_DATA(targ)->pnd_flags & PND_REMOVED))
2039 		return ENOENT;
2040 
2041 #ifdef PERFUSE_DEBUG
2042 	if (targ == NULL)
2043 		DERRX(EX_SOFTWARE, "%s: targ is NULL", __func__);
2044 
2045 	if (perfuse_diagflags & (PDF_FH|PDF_FILENAME))
2046 		DPRINTF("%s: opc = %p, remove opc = %p, file = \"%s\"\n",
2047 			__func__, (void *)opc, (void *)targ, pcn->pcn_name);
2048 #endif
2049 	/*
2050 	 * Await for all operations on the deleted node to drain,
2051 	 * as the filesystem may be confused to have it deleted
2052 	 * during a getattr
2053 	 */
2054 	while (PERFUSE_NODE_DATA(targ)->pnd_flags & PND_INXCHG)
2055 		requeue_request(pu, targ, PCQ_AFTERXCHG);
2056 
2057 	ps = puffs_getspecific(pu);
2058 	pnd = PERFUSE_NODE_DATA(opc);
2059 	name = pcn->pcn_name;
2060 	len = pcn->pcn_namelen + 1;
2061 
2062 	pm = ps->ps_new_msg(pu, opc, FUSE_UNLINK, len, pcn->pcn_cred);
2063 	path = _GET_INPAYLOAD(ps, pm, char *);
2064 	(void)strlcpy(path, name, len);
2065 
2066 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2067 		return error;
2068 
2069 	PERFUSE_NODE_DATA(targ)->pnd_flags |= PND_REMOVED;
2070 	if (!(PERFUSE_NODE_DATA(targ)->pnd_flags & PND_OPEN))
2071 		puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N2);
2072 
2073 	/*
2074 	 * The parent directory needs a sync
2075 	 */
2076 	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
2077 
2078 #ifdef PERFUSE_DEBUG
2079 	if (perfuse_diagflags & PDF_FILENAME)
2080 		DPRINTF("%s: remove nodeid = 0x%"PRIx64" file = \"%s\"\n",
2081 			__func__, PERFUSE_NODE_DATA(targ)->pnd_nodeid,
2082 			pcn->pcn_name);
2083 #endif
2084 	ps->ps_destroy_msg(pm);
2085 
2086 	return 0;
2087 }
2088 
2089 int
2090 perfuse_node_link(struct puffs_usermount *pu, puffs_cookie_t opc,
2091 	puffs_cookie_t targ, const struct puffs_cn *pcn)
2092 {
2093 	struct perfuse_state *ps;
2094 	perfuse_msg_t *pm;
2095 	const char *name;
2096 	size_t len;
2097 	struct puffs_node *pn;
2098 	struct fuse_link_in *fli;
2099 	int error;
2100 
2101 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
2102 		return ENOENT;
2103 
2104 	ps = puffs_getspecific(pu);
2105 	pn = (struct puffs_node *)targ;
2106 	name = pcn->pcn_name;
2107 	len =  sizeof(*fli) + pcn->pcn_namelen + 1;
2108 
2109 	pm = ps->ps_new_msg(pu, opc, FUSE_LINK, len, pcn->pcn_cred);
2110 	fli = GET_INPAYLOAD(ps, pm, fuse_link_in);
2111 	fli->oldnodeid = PERFUSE_NODE_DATA(pn)->pnd_nodeid;
2112 	(void)strlcpy((char *)(void *)(fli + 1), name, len - sizeof(*fli));
2113 
2114 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2115 		return error;
2116 
2117 	ps->ps_destroy_msg(pm);
2118 
2119 	return 0;
2120 }
2121 
2122 int
2123 perfuse_node_rename(struct puffs_usermount *pu, puffs_cookie_t opc,
2124 	puffs_cookie_t src, const struct puffs_cn *pcn_src,
2125 	puffs_cookie_t targ_dir, puffs_cookie_t targ,
2126 	const struct puffs_cn *pcn_targ)
2127 {
2128 	struct perfuse_state *ps;
2129 	perfuse_msg_t *pm;
2130 	struct fuse_rename_in *fri;
2131 	const char *newname;
2132 	const char *oldname;
2133 	char *np;
2134 	int error;
2135 	size_t len;
2136 	size_t newname_len;
2137 	size_t oldname_len;
2138 
2139 	if ((PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED) ||
2140 	    (PERFUSE_NODE_DATA(src)->pnd_flags & PND_REMOVED) ||
2141 	    (PERFUSE_NODE_DATA(targ_dir)->pnd_flags & PND_REMOVED))
2142 		return ENOENT;
2143 
2144 	/*
2145 	 * Await for all operations on the deleted node to drain,
2146 	 * as the filesystem may be confused to have it deleted
2147 	 * during a getattr
2148 	 */
2149 	if ((struct puffs_node *)targ != NULL) {
2150 		while (PERFUSE_NODE_DATA(targ)->pnd_flags & PND_INXCHG)
2151 			requeue_request(pu, targ, PCQ_AFTERXCHG);
2152 	} else {
2153 		while (PERFUSE_NODE_DATA(src)->pnd_flags & PND_INXCHG)
2154 			requeue_request(pu, src, PCQ_AFTERXCHG);
2155 	}
2156 
2157 	ps = puffs_getspecific(pu);
2158 	newname =  pcn_targ->pcn_name;
2159 	newname_len = pcn_targ->pcn_namelen + 1;
2160 	oldname =  pcn_src->pcn_name;
2161 	oldname_len = pcn_src->pcn_namelen + 1;
2162 
2163 	len = sizeof(*fri) + oldname_len + newname_len;
2164 	pm = ps->ps_new_msg(pu, opc, FUSE_RENAME, len, pcn_targ->pcn_cred);
2165 	fri = GET_INPAYLOAD(ps, pm, fuse_rename_in);
2166 	fri->newdir = PERFUSE_NODE_DATA(targ_dir)->pnd_nodeid;
2167 	np = (char *)(void *)(fri + 1);
2168 	(void)strlcpy(np, oldname, oldname_len);
2169 	np += oldname_len;
2170 	(void)strlcpy(np, newname, newname_len);
2171 
2172 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2173 		return error;
2174 
2175 	if (opc != targ_dir) {
2176 		struct perfuse_node_data *srcdir_pnd;
2177 		struct perfuse_node_data *dstdir_pnd;
2178 		struct perfuse_node_data *src_pnd;
2179 
2180 		srcdir_pnd = PERFUSE_NODE_DATA(opc);
2181 		dstdir_pnd = PERFUSE_NODE_DATA(targ_dir);
2182 		src_pnd = PERFUSE_NODE_DATA(src);
2183 
2184 		TAILQ_REMOVE(&srcdir_pnd->pnd_children, src_pnd, pnd_next);
2185 		TAILQ_INSERT_TAIL(&dstdir_pnd->pnd_children, src_pnd, pnd_next);
2186 
2187 		srcdir_pnd->pnd_childcount--;
2188 		dstdir_pnd->pnd_childcount++;
2189 
2190 		src_pnd->pnd_parent = targ_dir;
2191 
2192 		PERFUSE_NODE_DATA(targ_dir)->pnd_flags |= PND_DIRTY;
2193 	}
2194 
2195 	(void)strlcpy(PERFUSE_NODE_DATA(src)->pnd_name, newname, MAXPATHLEN);
2196 
2197 	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
2198 
2199 	if ((struct puffs_node *)targ != NULL)
2200 		PERFUSE_NODE_DATA(targ)->pnd_flags |= PND_REMOVED;
2201 
2202 #ifdef PERFUSE_DEBUG
2203 	if (perfuse_diagflags & PDF_FILENAME)
2204 		DPRINTF("%s: nodeid = 0x%"PRIx64" file = \"%s\" renamed \"%s\" "
2205 			"nodeid = 0x%"PRIx64" -> nodeid = 0x%"PRIx64" \"%s\"\n",
2206 	 		__func__, PERFUSE_NODE_DATA(src)->pnd_nodeid,
2207 			pcn_src->pcn_name, pcn_targ->pcn_name,
2208 			PERFUSE_NODE_DATA(opc)->pnd_nodeid,
2209 			PERFUSE_NODE_DATA(targ_dir)->pnd_nodeid,
2210 			perfuse_node_path(targ_dir));
2211 #endif
2212 
2213 	ps->ps_destroy_msg(pm);
2214 
2215 	return 0;
2216 }
2217 
2218 int
2219 perfuse_node_mkdir(struct puffs_usermount *pu, puffs_cookie_t opc,
2220 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
2221 	const struct vattr *vap)
2222 {
2223 	struct perfuse_state *ps;
2224 	perfuse_msg_t *pm;
2225 	struct fuse_mkdir_in *fmi;
2226 	const char *path;
2227 	size_t len;
2228 
2229 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
2230 		return ENOENT;
2231 
2232 	ps = puffs_getspecific(pu);
2233 	path = pcn->pcn_name;
2234 	len = sizeof(*fmi) + pcn->pcn_namelen + 1;
2235 
2236 	pm = ps->ps_new_msg(pu, opc, FUSE_MKDIR, len, pcn->pcn_cred);
2237 	fmi = GET_INPAYLOAD(ps, pm, fuse_mkdir_in);
2238 	fmi->mode = vap->va_mode;
2239 	fmi->umask = 0; 	/* Seems unused by libfuse? */
2240 	(void)strlcpy((char *)(void *)(fmi + 1), path, len - sizeof(*fmi));
2241 
2242 	return node_mk_common(pu, opc, pni, pcn, pm);
2243 }
2244 
2245 
2246 int
2247 perfuse_node_rmdir(struct puffs_usermount *pu, puffs_cookie_t opc,
2248 	puffs_cookie_t targ, const struct puffs_cn *pcn)
2249 {
2250 	struct perfuse_state *ps;
2251 	struct perfuse_node_data *pnd;
2252 	perfuse_msg_t *pm;
2253 	char *path;
2254 	const char *name;
2255 	size_t len;
2256 	int error;
2257 
2258 	pnd = PERFUSE_NODE_DATA(opc);
2259 
2260 	if ((pnd->pnd_flags & PND_REMOVED) ||
2261 	    (PERFUSE_NODE_DATA(targ)->pnd_flags & PND_REMOVED))
2262 		return ENOENT;
2263 
2264 	/*
2265 	 * Await for all operations on the deleted node to drain,
2266 	 * as the filesystem may be confused to have it deleted
2267 	 * during a getattr
2268 	 */
2269 	while (PERFUSE_NODE_DATA(targ)->pnd_flags & PND_INXCHG)
2270 		requeue_request(pu, targ, PCQ_AFTERXCHG);
2271 
2272 	ps = puffs_getspecific(pu);
2273 	name = pcn->pcn_name;
2274 	len = pcn->pcn_namelen + 1;
2275 
2276 	pm = ps->ps_new_msg(pu, opc, FUSE_RMDIR, len, pcn->pcn_cred);
2277 	path = _GET_INPAYLOAD(ps, pm, char *);
2278 	(void)strlcpy(path, name, len);
2279 
2280 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2281 		return error;
2282 
2283 	PERFUSE_NODE_DATA(targ)->pnd_flags |= PND_REMOVED;
2284 	if (!(PERFUSE_NODE_DATA(targ)->pnd_flags & PND_OPEN))
2285 		puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N2);
2286 
2287 	/*
2288 	 * The parent directory needs a sync
2289 	 */
2290 	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
2291 
2292 #ifdef PERFUSE_DEBUG
2293 	if (perfuse_diagflags & PDF_FILENAME)
2294 		DPRINTF("%s: remove nodeid = 0x%"PRIx64" file = \"%s\"\n",
2295 			__func__, PERFUSE_NODE_DATA(targ)->pnd_nodeid,
2296 			perfuse_node_path(targ));
2297 #endif
2298 	ps->ps_destroy_msg(pm);
2299 
2300 	return 0;
2301 }
2302 
2303 /* vap is unused */
2304 /* ARGSUSED4 */
2305 int
2306 perfuse_node_symlink(struct puffs_usermount *pu, puffs_cookie_t opc,
2307 	struct puffs_newinfo *pni, const struct puffs_cn *pcn_src,
2308 	const struct vattr *vap, const char *link_target)
2309 {
2310 	struct perfuse_state *ps;
2311 	perfuse_msg_t *pm;
2312 	char *np;
2313 	const char *path;
2314 	size_t path_len;
2315 	size_t linkname_len;
2316 	size_t len;
2317 
2318 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
2319 		return ENOENT;
2320 
2321 	ps = puffs_getspecific(pu);
2322 	path = pcn_src->pcn_name;
2323 	path_len = pcn_src->pcn_namelen + 1;
2324 	linkname_len = strlen(link_target) + 1;
2325 	len = path_len + linkname_len;
2326 
2327 	pm = ps->ps_new_msg(pu, opc, FUSE_SYMLINK, len, pcn_src->pcn_cred);
2328 	np = _GET_INPAYLOAD(ps, pm, char *);
2329 	(void)strlcpy(np, path, path_len);
2330 	np += path_len;
2331 	(void)strlcpy(np, link_target, linkname_len);
2332 
2333 	return node_mk_common(pu, opc, pni, pcn_src, pm);
2334 }
2335 
2336 /* ARGSUSED4 */
2337 int
2338 perfuse_node_readdir(struct puffs_usermount *pu, puffs_cookie_t opc,
2339 	struct dirent *dent, off_t *readoff, size_t *reslen,
2340 	const struct puffs_cred *pcr, int *eofflag, off_t *cookies,
2341 	size_t *ncookies)
2342 {
2343 	perfuse_msg_t *pm;
2344 	uint64_t fh;
2345 	struct perfuse_state *ps;
2346 	struct perfuse_node_data *pnd;
2347 	struct fuse_read_in *fri;
2348 	struct fuse_out_header *foh;
2349 	struct fuse_dirent *fd;
2350 	size_t foh_len;
2351 	int error;
2352 	size_t fd_maxlen;
2353 
2354 	error = 0;
2355 	ps = puffs_getspecific(pu);
2356 
2357 	/*
2358 	 * readdir state is kept at node level, and several readdir
2359 	 * requests can be issued at the same time on the same node.
2360 	 * We need to queue requests so that only one is in readdir
2361 	 * code at the same time.
2362 	 */
2363 	pnd = PERFUSE_NODE_DATA(opc);
2364 	while (pnd->pnd_flags & PND_INREADDIR)
2365 		requeue_request(pu, opc, PCQ_READDIR);
2366 	pnd->pnd_flags |= PND_INREADDIR;
2367 
2368 #ifdef PERFUSE_DEBUG
2369 	if (perfuse_diagflags & PDF_READDIR)
2370 		DPRINTF("%s: READDIR opc = %p enter critical section\n",
2371 			__func__, (void *)opc);
2372 #endif
2373 	/*
2374 	 * Re-initialize pnd->pnd_fd_cookie on the first readdir for a node
2375 	 */
2376 	if (*readoff == 0)
2377 		pnd->pnd_fd_cookie = 0;
2378 
2379 	/*
2380 	 * Do we already have the data bufered?
2381 	 */
2382 	if (pnd->pnd_dirent != NULL)
2383 		goto out;
2384 	pnd->pnd_dirent_len = 0;
2385 
2386 	/*
2387 	 * It seems NetBSD can call readdir without open first
2388 	 * libfuse will crash if it is done that way, hence open first.
2389 	 */
2390 	if (!(pnd->pnd_flags & PND_OPEN)) {
2391 		if ((error = perfuse_node_open(pu, opc, FREAD, pcr)) != 0)
2392 			goto out;
2393 	}
2394 
2395 	fh = perfuse_get_fh(opc, FREAD);
2396 
2397 #ifdef PERFUSE_DEBUG
2398 	if (perfuse_diagflags & PDF_FH)
2399 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", "
2400 			"rfh = 0x%"PRIx64"\n", __func__, (void *)opc,
2401 			PERFUSE_NODE_DATA(opc)->pnd_nodeid, fh);
2402 #endif
2403 
2404 	pnd->pnd_all_fd = NULL;
2405 	pnd->pnd_all_fd_len = 0;
2406 	fd_maxlen = ps->ps_max_readahead - sizeof(*foh);
2407 
2408 	do {
2409 		size_t fd_len;
2410 		char *afdp;
2411 
2412 		pm = ps->ps_new_msg(pu, opc, FUSE_READDIR, sizeof(*fri), pcr);
2413 
2414 		/*
2415 		 * read_flags, lock_owner and flags are unused in libfuse
2416 		 */
2417 		fri = GET_INPAYLOAD(ps, pm, fuse_read_in);
2418 		fri->fh = fh;
2419 		fri->offset = pnd->pnd_fd_cookie;
2420 		fri->size = (uint32_t)fd_maxlen;
2421 		fri->read_flags = 0;
2422 		fri->lock_owner = 0;
2423 		fri->flags = 0;
2424 
2425 		if ((error = xchg_msg(pu, opc, pm,
2426 				      UNSPEC_REPLY_LEN, wait_reply)) != 0)
2427 			goto out;
2428 
2429 		/*
2430 		 * There are many puffs_framebufs calls later,
2431 		 * therefore foh will not be valid for a long time.
2432 		 * Just get the length and forget it.
2433 		 */
2434 		foh = GET_OUTHDR(ps, pm);
2435 		foh_len = foh->len;
2436 
2437 		/*
2438 		 * Empty read: we reached the end of the buffer.
2439 		 */
2440 		if (foh_len == sizeof(*foh)) {
2441 			ps->ps_destroy_msg(pm);
2442 			*eofflag = 1;
2443 			break;
2444 		}
2445 
2446 		/*
2447 		 * Check for corrupted message.
2448 		 */
2449 		if (foh_len < sizeof(*foh) + sizeof(*fd)) {
2450 			ps->ps_destroy_msg(pm);
2451 			DWARNX("readdir reply too short");
2452 			error = EIO;
2453 			goto out;
2454 		}
2455 
2456 
2457 		fd = GET_OUTPAYLOAD(ps, pm, fuse_dirent);
2458 		fd_len = foh_len - sizeof(*foh);
2459 
2460 		pnd->pnd_all_fd = realloc(pnd->pnd_all_fd,
2461 					  pnd->pnd_all_fd_len + fd_len);
2462 		if (pnd->pnd_all_fd  == NULL)
2463 			DERR(EX_OSERR, "%s: malloc failed", __func__);
2464 
2465 		afdp = (char *)(void *)pnd->pnd_all_fd + pnd->pnd_all_fd_len;
2466 		(void)memcpy(afdp, fd, fd_len);
2467 
2468 		pnd->pnd_all_fd_len += fd_len;
2469 
2470 		/*
2471 		 * The fd->off field is used as a cookie for
2472 		 * resuming the next readdir() where this one was left.
2473 	 	 */
2474 		pnd->pnd_fd_cookie = readdir_last_cookie(fd, fd_len);
2475 
2476 		ps->ps_destroy_msg(pm);
2477 	} while (1 /* CONSTCOND */);
2478 
2479 	if (pnd->pnd_all_fd != NULL) {
2480 		if (fuse_to_dirent(pu, opc, pnd->pnd_all_fd,
2481 				   pnd->pnd_all_fd_len) == -1)
2482 			error = EIO;
2483 	}
2484 
2485 out:
2486 	if (pnd->pnd_all_fd != NULL) {
2487 		free(pnd->pnd_all_fd);
2488 		pnd->pnd_all_fd = NULL;
2489 		pnd->pnd_all_fd_len = 0;
2490 	}
2491 
2492 	if (error == 0)
2493 		error = readdir_buffered(opc, dent, readoff, reslen);
2494 
2495 	/*
2496 	 * Schedule queued readdir requests
2497 	 */
2498 	pnd->pnd_flags &= ~PND_INREADDIR;
2499 	(void)dequeue_requests(ps, opc, PCQ_READDIR, DEQUEUE_ALL);
2500 
2501 #ifdef PERFUSE_DEBUG
2502 	if (perfuse_diagflags & PDF_READDIR)
2503 		DPRINTF("%s: READDIR opc = %p exit critical section\n",
2504 			__func__, (void *)opc);
2505 #endif
2506 
2507 	return error;
2508 }
2509 
2510 int
2511 perfuse_node_readlink(struct puffs_usermount *pu, puffs_cookie_t opc,
2512 	const struct puffs_cred *pcr, char *linkname, size_t *linklen)
2513 {
2514 	struct perfuse_state *ps;
2515 	perfuse_msg_t *pm;
2516 	int error;
2517 	size_t len;
2518 	struct fuse_out_header *foh;
2519 
2520 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
2521 		return ENOENT;
2522 
2523 	ps = puffs_getspecific(pu);
2524 
2525 	pm = ps->ps_new_msg(pu, opc, FUSE_READLINK, 0, pcr);
2526 
2527 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2528 		return error;
2529 
2530 	foh = GET_OUTHDR(ps, pm);
2531 	len = foh->len - sizeof(*foh);
2532 	if (len > *linklen)
2533 		DERRX(EX_PROTOCOL, "path len = %zd too long", len);
2534 	if (len == 0)
2535 		DERRX(EX_PROTOCOL, "path len = %zd too short", len);
2536 
2537 	/*
2538 	 * FUSE filesystems return a NUL terminated string, we
2539 	 * do not want to trailing \0
2540 	 */
2541 	*linklen = len - 1;
2542 	(void)memcpy(linkname, _GET_OUTPAYLOAD(ps, pm, char *), len);
2543 
2544 	ps->ps_destroy_msg(pm);
2545 
2546 	return 0;
2547 }
2548 
2549 int
2550 perfuse_node_reclaim(struct puffs_usermount *pu, puffs_cookie_t opc)
2551 {
2552 	struct perfuse_state *ps;
2553 	perfuse_msg_t *pm;
2554 	struct perfuse_node_data *pnd;
2555 	struct fuse_forget_in *ffi;
2556 	struct puffs_node *pn;
2557 	struct puffs_node *pn_root;
2558 
2559 	ps = puffs_getspecific(pu);
2560 	pnd = PERFUSE_NODE_DATA(opc);
2561 
2562 	/*
2563 	 * Never forget the root.
2564 	 */
2565 	if (pnd->pnd_nodeid == FUSE_ROOT_ID)
2566 		return 0;
2567 
2568 	pnd->pnd_flags |= PND_RECLAIMED;
2569 	pnd->pnd_puffs_nlookup--;
2570 
2571 #ifdef PERFUSE_DEBUG
2572 	if (perfuse_diagflags & PDF_RECLAIM)
2573 		DPRINTF("%s (nodeid %"PRId64") reclaimed\n",
2574 			perfuse_node_path(opc), pnd->pnd_nodeid);
2575 #endif
2576 
2577 	pn_root = puffs_getroot(pu);
2578 	pn = (struct puffs_node *)opc;
2579 	while (pn != pn_root) {
2580 		struct puffs_node *parent_pn;
2581 
2582 		pnd = PERFUSE_NODE_DATA(pn);
2583 
2584 #ifdef PERFUSE_DEBUG
2585 	if (perfuse_diagflags & PDF_RECLAIM)
2586 		DPRINTF("%s (nodeid %"PRId64") is %sreclaimed, nlookup = %d "
2587 			"has childcount %d %s%s%s%s, pending ops:%s%s%s\n",
2588 		        perfuse_node_path((puffs_cookie_t)pn), pnd->pnd_nodeid,
2589 		        pnd->pnd_flags & PND_RECLAIMED ? "" : "not ",
2590 			pnd->pnd_puffs_nlookup, pnd->pnd_childcount,
2591 			pnd->pnd_flags & PND_OPEN ? "open " : "not open",
2592 			pnd->pnd_flags & PND_RFH ? "r" : "",
2593 			pnd->pnd_flags & PND_WFH ? "w" : "",
2594 			pnd->pnd_flags & PND_BUSY ? "" : " none",
2595 			pnd->pnd_flags & PND_INREADDIR ? " readdir" : "",
2596 			pnd->pnd_flags & PND_INWRITE ? " write" : "",
2597 			pnd->pnd_flags & PND_INOPEN ? " open" : "");
2598 #endif
2599 		if (!(pnd->pnd_flags & PND_RECLAIMED) ||
2600 		    (pnd->pnd_puffs_nlookup != 0) ||
2601 		    (pnd->pnd_childcount != 0))
2602 			return 0;
2603 
2604 #ifdef PERFUSE_DEBUG
2605 		if ((pnd->pnd_flags & PND_OPEN) ||
2606 		       !TAILQ_EMPTY(&pnd->pnd_pcq))
2607 			DERRX(EX_SOFTWARE, "%s: opc = %p: still open",
2608 			      __func__, (void *)opc);
2609 
2610 		if ((pnd->pnd_flags & PND_BUSY) ||
2611 		       !TAILQ_EMPTY(&pnd->pnd_pcq))
2612 			DERRX(EX_SOFTWARE, "%s: opc = %p: ongoing operations",
2613 			      __func__, (void *)opc);
2614 #endif
2615 
2616 		/*
2617 		 * Send the FORGET message
2618 		 *
2619 		 * ps_new_msg() is called with NULL creds, which will
2620 		 * be interpreted as FUSE superuser. This is obviously
2621 		 * fine since we operate with kernel creds here.
2622 		 */
2623 		pm = ps->ps_new_msg(pu, (puffs_cookie_t)pn, FUSE_FORGET,
2624 			      sizeof(*ffi), NULL);
2625 		ffi = GET_INPAYLOAD(ps, pm, fuse_forget_in);
2626 		ffi->nlookup = pnd->pnd_fuse_nlookup;
2627 
2628 		/*
2629 		 * No reply is expected, pm is freed in xchg_msg
2630 		 */
2631 		(void)xchg_msg(pu, (puffs_cookie_t)pn,
2632 			       pm, UNSPEC_REPLY_LEN, no_reply);
2633 
2634 		parent_pn = pnd->pnd_parent;
2635 
2636 		perfuse_destroy_pn(pn);
2637 
2638 		pn = parent_pn;
2639 	}
2640 
2641 	return 0;
2642 }
2643 
2644 int
2645 perfuse_node_inactive(struct puffs_usermount *pu, puffs_cookie_t opc)
2646 {
2647 	struct perfuse_state *ps;
2648 	struct perfuse_node_data *pnd;
2649 
2650 	ps = puffs_getspecific(pu);
2651 	pnd = PERFUSE_NODE_DATA(opc);
2652 
2653 	if (!(pnd->pnd_flags & (PND_OPEN|PND_REMOVED)))
2654 		return 0;
2655 
2656 	/*
2657 	 * Make sure all operation are finished
2658 	 * There can be an ongoing write. Other
2659 	 * operation wait for all data before
2660 	 * the close/inactive.
2661 	 */
2662 	while (pnd->pnd_flags & PND_INWRITE)
2663 		requeue_request(pu, opc, PCQ_AFTERWRITE);
2664 
2665 	/*
2666 	 * The inactive operation may be cancelled.
2667 	 * If no open is in progress, set PND_INOPEN
2668 	 * so that a new open will be queued.
2669 	 */
2670 	if (pnd->pnd_flags & PND_INOPEN)
2671 		return 0;
2672 
2673 	pnd->pnd_flags |= PND_INOPEN;
2674 
2675 	/*
2676 	 * Sync data
2677 	 */
2678 	if (pnd->pnd_flags & PND_DIRTY)
2679 		(void)perfuse_node_fsync(pu, opc, NULL, 0, 0, 0);
2680 
2681 	/*
2682 	 * Close handles
2683 	 */
2684 	if (pnd->pnd_flags & PND_WFH)
2685 		(void)perfuse_node_close_common(pu, opc, FWRITE);
2686 
2687 	if (pnd->pnd_flags & PND_RFH)
2688 		(void)perfuse_node_close_common(pu, opc, FREAD);
2689 
2690 	/*
2691 	 * This will cause a reclaim to be sent
2692 	 */
2693 	if (pnd->pnd_flags & PND_REMOVED)
2694 		puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N1);
2695 
2696 	/*
2697 	 * Schedule awaiting operations
2698 	 */
2699 	pnd->pnd_flags &= ~PND_INOPEN;
2700 	(void)dequeue_requests(ps, opc, PCQ_OPEN, DEQUEUE_ALL);
2701 
2702 	return 0;
2703 }
2704 
2705 
2706 /* ARGSUSED0 */
2707 int
2708 perfuse_node_print(struct puffs_usermount *pu, puffs_cookie_t opc)
2709 {
2710 	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
2711 	return 0;
2712 }
2713 
2714 /* ARGSUSED0 */
2715 int
2716 perfuse_node_pathconf(struct puffs_usermount *pu, puffs_cookie_t opc,
2717 	int name, int *retval)
2718 {
2719 	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
2720 	return 0;
2721 }
2722 
2723 int
2724 perfuse_node_advlock(struct puffs_usermount *pu, puffs_cookie_t opc,
2725 	void *id, int op, struct flock *fl, int flags)
2726 {
2727 	struct perfuse_state *ps;
2728 	int fop;
2729 	perfuse_msg_t *pm;
2730 	uint64_t fh;
2731 	struct fuse_lk_in *fli;
2732 	struct fuse_out_header *foh;
2733 	struct fuse_lk_out *flo;
2734 	uint32_t owner;
2735 	size_t len;
2736 	int error;
2737 
2738 	/*
2739 	 * Make sure we do have a filehandle, as the FUSE filesystem
2740 	 * expect one. E.g.: if we provide none, GlusterFS logs an error
2741 	 * "0-glusterfs-fuse: xl is NULL"
2742 	 *
2743 	 * We need the read file handle if the file is open read only,
2744 	 * in order to support shared locks on read-only files.
2745 	 * NB: The kernel always sends advlock for read-only
2746 	 * files at exit time when the process used lock, see
2747 	 * sys_exit -> exit1 -> fd_free -> fd_close -> VOP_ADVLOCK
2748 	 */
2749 	if ((fh = perfuse_get_fh(opc, FREAD)) == FUSE_UNKNOWN_FH)
2750 		return EBADF;
2751 
2752 	ps = puffs_getspecific(pu);
2753 
2754 	if (op == F_GETLK)
2755 		fop = FUSE_GETLK;
2756 	else
2757 		fop = (flags & F_WAIT) ? FUSE_SETLKW : FUSE_SETLK;
2758 
2759 	/*
2760 	 * XXX ps_new_msg() is called with NULL creds, which will
2761 	 * be interpreted as FUSE superuser. We have no way to
2762 	 * know the requesting process' credential, but since advlock()
2763 	 * is supposed to operate on a file that has been open(),
2764 	 * permission should have already been checked at open() time.
2765 	 */
2766 	pm = ps->ps_new_msg(pu, opc, fop, sizeof(*fli), NULL);
2767 	fli = GET_INPAYLOAD(ps, pm, fuse_lk_in);
2768 	fli->fh = fh;
2769 	fli->owner = (uint64_t)(vaddr_t)id;
2770 	fli->lk.start = fl->l_start;
2771 	fli->lk.end = fl->l_start + fl->l_len;
2772 	fli->lk.type = fl->l_type;
2773 	fli->lk.pid = fl->l_pid;
2774 	fli->lk_flags = (flags & F_FLOCK) ? FUSE_LK_FLOCK : 0;
2775 
2776 	owner = (uint32_t)(vaddr_t)id;
2777 
2778 #ifdef PERFUSE_DEBUG
2779 	if (perfuse_diagflags & PDF_FH)
2780 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", fh = 0x%"PRIx64"\n",
2781 			__func__, (void *)opc,
2782 			PERFUSE_NODE_DATA(opc)->pnd_nodeid, fli->fh);
2783 #endif
2784 
2785 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2786 		return error;
2787 
2788 	foh = GET_OUTHDR(ps, pm);
2789 	len = foh->len - sizeof(*foh);
2790 
2791 	/*
2792 	 * Save or clear the lock
2793 	 */
2794 	switch (op) {
2795 	case F_GETLK:
2796 		if (len != sizeof(*flo))
2797 			DERRX(EX_SOFTWARE,
2798 			      "%s: Unexpected lock reply len %zd",
2799 			      __func__, len);
2800 
2801 		flo = GET_OUTPAYLOAD(ps, pm, fuse_lk_out);
2802 		fl->l_start = flo->lk.start;
2803 		fl->l_len = flo->lk.end - flo->lk.start;
2804 		fl->l_pid = flo->lk.pid;
2805 		fl->l_type = flo->lk.type;
2806 		fl->l_whence = SEEK_SET;	/* libfuse hardcodes it */
2807 
2808 		PERFUSE_NODE_DATA(opc)->pnd_lock_owner = flo->lk.pid;
2809 		break;
2810 	case F_UNLCK:
2811 		owner = 0;
2812 		/* FALLTHROUGH */
2813 	case F_SETLK:
2814 		/* FALLTHROUGH */
2815 	case F_SETLKW:
2816 		if (error != 0)
2817 			PERFUSE_NODE_DATA(opc)->pnd_lock_owner = owner;
2818 
2819 		if (len != 0)
2820 			DERRX(EX_SOFTWARE,
2821 			      "%s: Unexpected unlock reply len %zd",
2822 			      __func__, len);
2823 
2824 		break;
2825 	default:
2826 		DERRX(EX_SOFTWARE, "%s: Unexpected op %d", __func__, op);
2827 		break;
2828 	}
2829 
2830 	ps->ps_destroy_msg(pm);
2831 
2832 	return 0;
2833 }
2834 
2835 int
2836 perfuse_node_read(struct puffs_usermount *pu, puffs_cookie_t opc, uint8_t *buf,
2837 	off_t offset, size_t *resid, const struct puffs_cred *pcr, int ioflag)
2838 {
2839 	struct perfuse_state *ps;
2840 	struct perfuse_node_data *pnd;
2841 	const struct vattr *vap;
2842 	perfuse_msg_t *pm;
2843 	struct fuse_read_in *fri;
2844 	struct fuse_out_header *foh;
2845 	size_t readen;
2846 	int error;
2847 
2848 	ps = puffs_getspecific(pu);
2849 	pnd = PERFUSE_NODE_DATA(opc);
2850 	vap = puffs_pn_getvap((struct puffs_node *)opc);
2851 
2852 	/*
2853 	 * NetBSD turns that into a getdents(2) output
2854 	 * We just do a EISDIR as this feature is of little use.
2855 	 */
2856 	if (vap->va_type == VDIR)
2857 		return EISDIR;
2858 
2859 	if ((u_quad_t)offset + *resid > vap->va_size)
2860 		DWARNX("%s %p read %lld@%zu beyond EOF %" PRIu64 "\n",
2861 		       __func__, (void *)opc, (long long)offset,
2862 		       *resid, vap->va_size);
2863 
2864 	do {
2865 		size_t max_read;
2866 
2867 		max_read = ps->ps_max_readahead - sizeof(*foh);
2868 		/*
2869 		 * flags may be set to FUSE_READ_LOCKOWNER
2870 		 * if lock_owner is provided.
2871 		 */
2872 		pm = ps->ps_new_msg(pu, opc, FUSE_READ, sizeof(*fri), pcr);
2873 		fri = GET_INPAYLOAD(ps, pm, fuse_read_in);
2874 		fri->fh = perfuse_get_fh(opc, FREAD);
2875 		fri->offset = offset;
2876 		fri->size = (uint32_t)MIN(*resid, max_read);
2877 		fri->read_flags = 0; /* XXX Unused by libfuse? */
2878 		fri->lock_owner = pnd->pnd_lock_owner;
2879 		fri->flags = 0;
2880 		fri->flags |= (fri->lock_owner != 0) ? FUSE_READ_LOCKOWNER : 0;
2881 
2882 #ifdef PERFUSE_DEBUG
2883 	if (perfuse_diagflags & PDF_FH)
2884 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", fh = 0x%"PRIx64"\n",
2885 			__func__, (void *)opc, pnd->pnd_nodeid, fri->fh);
2886 #endif
2887 		error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply);
2888 		if (error  != 0)
2889 			return error;
2890 
2891 		foh = GET_OUTHDR(ps, pm);
2892 		readen = foh->len - sizeof(*foh);
2893 
2894 #ifdef PERFUSE_DEBUG
2895 		if (readen > *resid)
2896 			DERRX(EX_SOFTWARE, "%s: Unexpected big read %zd",
2897 			      __func__, readen);
2898 #endif
2899 
2900 		(void)memcpy(buf,  _GET_OUTPAYLOAD(ps, pm, char *), readen);
2901 
2902 		buf += readen;
2903 		offset += readen;
2904 		*resid -= readen;
2905 
2906 		ps->ps_destroy_msg(pm);
2907 	} while ((*resid != 0) && (readen != 0));
2908 
2909 	if (ioflag & (IO_SYNC|IO_DSYNC))
2910 		ps->ps_syncreads++;
2911 	else
2912 		ps->ps_asyncreads++;
2913 
2914 	return 0;
2915 }
2916 
2917 int
2918 perfuse_node_write(struct puffs_usermount *pu, puffs_cookie_t opc, uint8_t *buf,
2919 	off_t offset, size_t *resid, const struct puffs_cred *pcr, int ioflag)
2920 {
2921 	struct perfuse_state *ps;
2922 	struct perfuse_node_data *pnd;
2923 	struct vattr *vap;
2924 	perfuse_msg_t *pm;
2925 	struct fuse_write_in *fwi;
2926 	struct fuse_write_out *fwo;
2927 	size_t data_len;
2928 	size_t payload_len;
2929 	size_t written;
2930 	int inresize;
2931 	int error;
2932 
2933 	ps = puffs_getspecific(pu);
2934 	pnd = PERFUSE_NODE_DATA(opc);
2935 	vap = puffs_pn_getvap((struct puffs_node *)opc);
2936 	written = 0;
2937 	inresize = 0;
2938 	error = 0;
2939 
2940 	if (vap->va_type == VDIR)
2941 		return EISDIR;
2942 
2943 	/*
2944 	 * We need to queue write requests in order to avoid
2945 	 * dequeueing PCQ_AFTERWRITE when there are pending writes.
2946 	 */
2947 	while (pnd->pnd_flags & PND_INWRITE)
2948 		requeue_request(pu, opc, PCQ_WRITE);
2949 	pnd->pnd_flags |= PND_INWRITE;
2950 
2951 	/*
2952 	 * Serialize size access, see comment in perfuse_node_setattr().
2953 	 */
2954 	if ((u_quad_t)offset + *resid > vap->va_size) {
2955 		while (pnd->pnd_flags & PND_INRESIZE)
2956 			requeue_request(pu, opc, PCQ_RESIZE);
2957 		pnd->pnd_flags |= PND_INRESIZE;
2958 		inresize = 1;
2959 	}
2960 
2961 	/*
2962 	 * append flag: re-read the file size so that
2963 	 * we get the latest value.
2964 	 */
2965 	if (ioflag & PUFFS_IO_APPEND) {
2966 		DWARNX("%s: PUFFS_IO_APPEND set, untested code", __func__);
2967 
2968 		if ((error = perfuse_node_getattr(pu, opc, vap, pcr)) != 0)
2969 			goto out;
2970 
2971 		offset = vap->va_size;
2972 	}
2973 
2974 #ifdef PERFUSE_DEBUG
2975 	if (perfuse_diagflags & PDF_RESIZE)
2976 		DPRINTF(">> %s %p %" PRIu64 "\n", __func__,
2977 			(void *)opc, vap->va_size);
2978 #endif
2979 
2980 	do {
2981 		size_t max_write;
2982 		/*
2983 		 * There is a writepage flag when data
2984 		 * is aligned to page size. Use it for
2985 		 * everything but the data after the last
2986 		 * page boundary.
2987 		 */
2988 		max_write = ps->ps_max_write - sizeof(*fwi);
2989 
2990 		data_len = MIN(*resid, max_write);
2991 		if (data_len > (size_t)sysconf(_SC_PAGESIZE))
2992 			data_len = data_len & ~(sysconf(_SC_PAGESIZE) - 1);
2993 
2994 		payload_len = data_len + sizeof(*fwi);
2995 
2996 		/*
2997 		 * flags may be set to FUSE_WRITE_CACHE (XXX usage?)
2998 		 * or FUSE_WRITE_LOCKOWNER, if lock_owner is provided.
2999 		 * write_flags is set to 1 for writepage.
3000 		 */
3001 		pm = ps->ps_new_msg(pu, opc, FUSE_WRITE, payload_len, pcr);
3002 		fwi = GET_INPAYLOAD(ps, pm, fuse_write_in);
3003 		fwi->fh = perfuse_get_fh(opc, FWRITE);
3004 		fwi->offset = offset;
3005 		fwi->size = (uint32_t)data_len;
3006 		fwi->write_flags = (fwi->size % sysconf(_SC_PAGESIZE)) ? 0 : 1;
3007 		fwi->lock_owner = pnd->pnd_lock_owner;
3008 		fwi->flags = 0;
3009 		fwi->flags |= (fwi->lock_owner != 0) ? FUSE_WRITE_LOCKOWNER : 0;
3010 		fwi->flags |= (ioflag & IO_DIRECT) ? 0 : FUSE_WRITE_CACHE;
3011 		(void)memcpy((fwi + 1), buf, data_len);
3012 
3013 
3014 #ifdef PERFUSE_DEBUG
3015 		if (perfuse_diagflags & PDF_FH)
3016 			DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", "
3017 				"fh = 0x%"PRIx64"\n", __func__,
3018 				(void *)opc, pnd->pnd_nodeid, fwi->fh);
3019 #endif
3020 		if ((error = xchg_msg(pu, opc, pm,
3021 				      sizeof(*fwo), wait_reply)) != 0)
3022 			goto out;
3023 
3024 		fwo = GET_OUTPAYLOAD(ps, pm, fuse_write_out);
3025 		written = fwo->size;
3026 #ifdef PERFUSE_DEBUG
3027 		if (written > *resid)
3028 			DERRX(EX_SOFTWARE, "%s: Unexpected big write %zd",
3029 			      __func__, written);
3030 #endif
3031 		*resid -= written;
3032 		offset += written;
3033 		buf += written;
3034 
3035 		ps->ps_destroy_msg(pm);
3036 	} while (*resid != 0);
3037 
3038 	/*
3039 	 * puffs_ops(3) says
3040 	 *  "everything must be written or an error will be generated"
3041 	 */
3042 	if (*resid != 0)
3043 		error = EFBIG;
3044 
3045 #ifdef PERFUSE_DEBUG
3046 	if (perfuse_diagflags & PDF_RESIZE) {
3047 		if (offset > (off_t)vap->va_size)
3048 			DPRINTF("<< %s %p %" PRIu64 " -> %lld\n", __func__,
3049 				(void *)opc, vap->va_size, (long long)offset);
3050 		else
3051 			DPRINTF("<< %s %p \n", __func__, (void *)opc);
3052 	}
3053 #endif
3054 
3055 	/*
3056 	 * Update file size if we wrote beyond the end
3057 	 */
3058 	if (offset > (off_t)vap->va_size)
3059 		vap->va_size = offset;
3060 
3061 	if (inresize) {
3062 #ifdef PERFUSE_DEBUG
3063 		if (!(pnd->pnd_flags & PND_INRESIZE))
3064 			DERRX(EX_SOFTWARE, "file write grow without resize");
3065 #endif
3066 		pnd->pnd_flags &= ~PND_INRESIZE;
3067 		(void)dequeue_requests(ps, opc, PCQ_RESIZE, DEQUEUE_ALL);
3068 	}
3069 
3070 
3071 	/*
3072 	 * Statistics
3073 	 */
3074 	if (ioflag & (IO_SYNC|IO_DSYNC))
3075 		ps->ps_syncwrites++;
3076 	else
3077 		ps->ps_asyncwrites++;
3078 
3079 	/*
3080 	 * Remember to sync the file
3081 	 */
3082 	pnd->pnd_flags |= PND_DIRTY;
3083 
3084 #ifdef PERFUSE_DEBUG
3085 	if (perfuse_diagflags & PDF_SYNC)
3086 		DPRINTF("%s: DIRTY opc = %p, file = \"%s\"\n",
3087 			__func__, (void*)opc, perfuse_node_path(opc));
3088 #endif
3089 
3090 out:
3091 	/*
3092 	 * If there are no more queued write, we can resume
3093 	 * an operation awaiting write completion.
3094 	 */
3095 	pnd->pnd_flags &= ~PND_INWRITE;
3096 	if (dequeue_requests(ps, opc, PCQ_WRITE, 1) == 0)
3097 		(void)dequeue_requests(ps, opc, PCQ_AFTERWRITE, DEQUEUE_ALL);
3098 
3099 	return error;
3100 }
3101 
3102 /* ARGSUSED0 */
3103 void
3104 perfuse_cache_write(struct puffs_usermount *pu, puffs_cookie_t opc, size_t size,
3105 	struct puffs_cacherun *runs)
3106 {
3107 	return;
3108 }
3109 
3110 /* ARGSUSED4 */
3111 int
3112 perfuse_node_getextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
3113 	int attrns, const char *attrname, size_t *attrsize, uint8_t *attr,
3114 	size_t *resid, const struct puffs_cred *pcr)
3115 {
3116 	struct perfuse_state *ps;
3117 	char fuse_attrname[LINUX_XATTR_NAME_MAX + 1];
3118 	perfuse_msg_t *pm;
3119 	struct fuse_getxattr_in *fgi;
3120 	struct fuse_getxattr_out *fgo;
3121 	struct fuse_out_header *foh;
3122 	size_t attrnamelen;
3123 	size_t len;
3124 	char *np;
3125 	int error;
3126 
3127 	ps = puffs_getspecific(pu);
3128 	attrname = perfuse_native_ns(attrns, attrname, fuse_attrname);
3129 	attrnamelen = strlen(attrname) + 1;
3130 	len = sizeof(*fgi) + attrnamelen;
3131 
3132 	pm = ps->ps_new_msg(pu, opc, FUSE_GETXATTR, len, pcr);
3133 	fgi = GET_INPAYLOAD(ps, pm, fuse_getxattr_in);
3134 	fgi->size = (unsigned int)((resid != NULL) ? *resid : 0);
3135 	np = (char *)(void *)(fgi + 1);
3136 	(void)strlcpy(np, attrname, attrnamelen);
3137 
3138 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
3139 		return error;
3140 
3141 	/*
3142 	 * We just get fuse_getattr_out with list size if we requested
3143 	 * a null size.
3144 	 */
3145 	if (resid == NULL) {
3146 		fgo = GET_OUTPAYLOAD(ps, pm, fuse_getxattr_out);
3147 
3148 		if (attrsize != NULL)
3149 			*attrsize = fgo->size;
3150 
3151 		ps->ps_destroy_msg(pm);
3152 		return 0;
3153 	}
3154 
3155 	/*
3156 	 * And with a non null requested size, we get the list just
3157 	 * after the header
3158 	 */
3159 	foh = GET_OUTHDR(ps, pm);
3160 	np = (char *)(void *)(foh + 1);
3161 
3162 	if (resid != NULL) {
3163 		len = MAX(foh->len - sizeof(*foh), *resid);
3164 		(void)memcpy(attr, np, len);
3165 		*resid -= len;
3166 	}
3167 
3168 	ps->ps_destroy_msg(pm);
3169 
3170 	return 0;
3171 }
3172 
3173 int
3174 perfuse_node_setextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
3175 	int attrns, const char *attrname, uint8_t *attr, size_t *resid,
3176 	const struct puffs_cred *pcr)
3177 {
3178 	struct perfuse_state *ps;
3179 	char fuse_attrname[LINUX_XATTR_NAME_MAX + 1];
3180 	perfuse_msg_t *pm;
3181 	struct fuse_setxattr_in *fsi;
3182 	size_t attrnamelen;
3183 	size_t len;
3184 	char *np;
3185 	int error;
3186 
3187 	ps = puffs_getspecific(pu);
3188 	attrname = perfuse_native_ns(attrns, attrname, fuse_attrname);
3189 	attrnamelen = strlen(attrname) + 1;
3190 	len = sizeof(*fsi) + attrnamelen + *resid;
3191 
3192 	pm = ps->ps_new_msg(pu, opc, FUSE_SETXATTR, len, pcr);
3193 	fsi = GET_INPAYLOAD(ps, pm, fuse_setxattr_in);
3194 	fsi->size = (unsigned int)*resid;
3195 	fsi->flags = 0;
3196 	np = (char *)(void *)(fsi + 1);
3197 	(void)strlcpy(np, attrname, attrnamelen);
3198 	np += attrnamelen;
3199 	(void)memcpy(np, (char *)attr, *resid);
3200 
3201 	if ((error = xchg_msg(pu, opc, pm,
3202 			      NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0)
3203 		return error;
3204 
3205 	*resid = 0;
3206 	ps->ps_destroy_msg(pm);
3207 
3208 	return 0;
3209 }
3210 
3211 /* ARGSUSED2 */
3212 int
3213 perfuse_node_listextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
3214 	int attrns, size_t *attrsize, uint8_t *attrs, size_t *resid, int flag,
3215 	const struct puffs_cred *pcr)
3216 {
3217 	struct perfuse_state *ps;
3218 	perfuse_msg_t *pm;
3219 	struct fuse_getxattr_in *fgi;
3220 	struct fuse_getxattr_out *fgo;
3221 	struct fuse_out_header *foh;
3222 	char *np;
3223 	size_t len, puffs_len;
3224 	int error;
3225 
3226 	ps = puffs_getspecific(pu);
3227 	len = sizeof(*fgi);
3228 
3229 	pm = ps->ps_new_msg(pu, opc, FUSE_LISTXATTR, len, pcr);
3230 	fgi = GET_INPAYLOAD(ps, pm, fuse_getxattr_in);
3231 	if (resid != NULL)
3232 		fgi->size = (unsigned int)*resid;
3233 	else
3234 		fgi->size = 0;
3235 
3236 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
3237 		return error;
3238 
3239 	/*
3240 	 * We just get fuse_getattr_out with list size if we requested
3241 	 * a null size.
3242 	 */
3243 	if (resid == NULL) {
3244 		fgo = GET_OUTPAYLOAD(ps, pm, fuse_getxattr_out);
3245 
3246 		if (attrsize != NULL)
3247 			*attrsize = fgo->size;
3248 
3249 		ps->ps_destroy_msg(pm);
3250 
3251 		return 0;
3252 	}
3253 
3254 	/*
3255 	 * And with a non null requested size, we get the list just
3256 	 * after the header
3257 	 */
3258 	foh = GET_OUTHDR(ps, pm);
3259 	np = (char *)(void *)(foh + 1);
3260 	puffs_len = foh->len - sizeof(*foh);
3261 
3262 	if (attrs != NULL) {
3263 #ifdef PUFFS_EXTATTR_LIST_LENPREFIX
3264 		/*
3265 		 * Convert the FUSE reply to length prefixed strings
3266 		 * if this is what the kernel wants.
3267 		 */
3268 		if (flag & PUFFS_EXTATTR_LIST_LENPREFIX) {
3269 			size_t i, attrlen;
3270 
3271 			for (i = 0; i < puffs_len; i += attrlen + 1) {
3272 				attrlen = strlen(np + i);
3273 				(void)memmove(np + i + 1, np + i, attrlen);
3274 				*(np + i) = (uint8_t)attrlen;
3275 			}
3276 		}
3277 #endif /* PUFFS_EXTATTR_LIST_LENPREFIX */
3278 		(void)memcpy(attrs, np, puffs_len);
3279 		*resid -= puffs_len;
3280 	}
3281 
3282 	if (attrsize != NULL)
3283 		*attrsize = puffs_len;
3284 
3285 	ps->ps_destroy_msg(pm);
3286 
3287 	return 0;
3288 }
3289 
3290 int
3291 perfuse_node_deleteextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
3292 	int attrns, const char *attrname, const struct puffs_cred *pcr)
3293 {
3294 	struct perfuse_state *ps;
3295 	char fuse_attrname[LINUX_XATTR_NAME_MAX + 1];
3296 	perfuse_msg_t *pm;
3297 	size_t attrnamelen;
3298 	char *np;
3299 	int error;
3300 
3301 	ps = puffs_getspecific(pu);
3302 	attrname = perfuse_native_ns(attrns, attrname, fuse_attrname);
3303 	attrnamelen = strlen(attrname) + 1;
3304 
3305 	pm = ps->ps_new_msg(pu, opc, FUSE_REMOVEXATTR, attrnamelen, pcr);
3306 	np = _GET_INPAYLOAD(ps, pm, char *);
3307 	(void)strlcpy(np, attrname, attrnamelen);
3308 
3309 	error = xchg_msg(pu, opc, pm, NO_PAYLOAD_REPLY_LEN, wait_reply);
3310 
3311 	ps->ps_destroy_msg(pm);
3312 
3313 	return error;
3314 }
3315