xref: /netbsd-src/lib/libperfuse/ops.c (revision 5bbd2a12505d72a8177929a37b5cee489d0a1cfd)
1 /*  $NetBSD: ops.c,v 1.59 2012/07/21 05:49:42 manu Exp $ */
2 
3 /*-
4  *  Copyright (c) 2010-2011 Emmanuel Dreyfus. All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *     notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *  THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
16  *  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
17  *  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18  *  PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
19  *  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25  *  POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <stdio.h>
29 #include <unistd.h>
30 #include <stdlib.h>
31 #include <libgen.h>
32 #include <errno.h>
33 #include <err.h>
34 #include <sysexits.h>
35 #include <syslog.h>
36 #include <puffs.h>
37 #include <sys/socket.h>
38 #include <sys/socket.h>
39 #include <sys/extattr.h>
40 #include <sys/time.h>
41 #include <machine/vmparam.h>
42 
43 #include "perfuse_priv.h"
44 #include "fuse.h"
45 
46 extern int perfuse_diagflags;
47 
48 #if 0
49 static void print_node(const char *, puffs_cookie_t);
50 #endif
51 #ifdef PUFFS_KFLAG_CACHE_FS_TTL
52 static void perfuse_newinfo_setttl(struct puffs_newinfo *,
53     struct puffs_node *, struct fuse_entry_out *, struct fuse_attr_out *);
54 #endif /* PUFFS_KFLAG_CACHE_FS_TTL */
55 static int xchg_msg(struct puffs_usermount *, puffs_cookie_t,
56     perfuse_msg_t *, size_t, enum perfuse_xchg_pb_reply);
57 static int mode_access(puffs_cookie_t, const struct puffs_cred *, mode_t);
58 static int sticky_access(puffs_cookie_t, struct puffs_node *,
59     const struct puffs_cred *);
60 static void fuse_attr_to_vap(struct perfuse_state *,
61     struct vattr *, struct fuse_attr *);
62 static int node_lookup_common(struct puffs_usermount *, puffs_cookie_t,
63     struct puffs_newinfo *, const char *, const struct puffs_cred *,
64     struct puffs_node **);
65 static int node_mk_common(struct puffs_usermount *, puffs_cookie_t,
66     struct puffs_newinfo *, const struct puffs_cn *pcn, perfuse_msg_t *);
67 static uint64_t readdir_last_cookie(struct fuse_dirent *, size_t);
68 static ssize_t fuse_to_dirent(struct puffs_usermount *, puffs_cookie_t,
69     struct fuse_dirent *, size_t);
70 static void readdir_buffered(puffs_cookie_t, struct dirent *, off_t *,
71     size_t *);
72 static void node_ref(puffs_cookie_t);
73 static void node_rele(puffs_cookie_t);
74 static void requeue_request(struct puffs_usermount *,
75     puffs_cookie_t opc, enum perfuse_qtype);
76 static int dequeue_requests(puffs_cookie_t opc, enum perfuse_qtype, int);
77 #define DEQUEUE_ALL 0
78 
79 /*
80  *  From <sys/vnode>, inside #ifdef _KERNEL section
81  */
82 #define IO_SYNC		(0x40|IO_DSYNC)
83 #define IO_DSYNC	0x00200
84 #define IO_DIRECT	0x02000
85 
86 /*
87  *  From <fcntl>, inside #ifdef _KERNEL section
88  */
89 #define F_WAIT		0x010
90 #define F_FLOCK		0x020
91 #define OFLAGS(fflags)  ((fflags) - 1)
92 
93 /*
94  * Borrowed from src/sys/kern/vfs_subr.c and src/sys/sys/vnode.h
95  */
96 const enum vtype iftovt_tab[16] = {
97 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
98         VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
99 };
100 const int vttoif_tab[9] = {
101 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
102         S_IFSOCK, S_IFIFO, S_IFMT,
103 };
104 
105 #define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12])
106 #define VTTOIF(indx) (vttoif_tab[(int)(indx)])
107 
108 #if 0
109 static void
110 print_node(const char *func, puffs_cookie_t opc)
111 {
112 	struct puffs_node *pn;
113 	struct perfuse_node_data *pnd;
114 	struct vattr *vap;
115 
116 	pn = (struct puffs_node *)opc;
117 	pnd = PERFUSE_NODE_DATA(opc);
118 	vap = &pn->pn_va;
119 
120 	printf("%s: \"%s\", opc = %p, nodeid = 0x%"PRIx64" ino = %"PRIu64"\n",
121 	       func, pnd->pnd_name, opc, pnd->pnd_nodeid, vap->va_fileid);
122 
123 	return;
124 }
125 #endif /* PERFUSE_DEBUG */
126 
127 int
128 perfuse_node_close_common(struct puffs_usermount *pu, puffs_cookie_t opc,
129 	int mode)
130 {
131 	struct perfuse_state *ps;
132 	perfuse_msg_t *pm;
133 	int op;
134 	uint64_t fh;
135 	struct fuse_release_in *fri;
136 	struct perfuse_node_data *pnd;
137 	struct puffs_node *pn;
138 	int error;
139 
140 	ps = puffs_getspecific(pu);
141 	pn = (struct puffs_node *)opc;
142 	pnd = PERFUSE_NODE_DATA(pn);
143 
144 	if (puffs_pn_getvap(pn)->va_type == VDIR) {
145 		op = FUSE_RELEASEDIR;
146 		mode = FREAD;
147 	} else {
148 		op = FUSE_RELEASE;
149 	}
150 
151 	/*
152 	 * Destroy the filehandle before sending the
153 	 * request to the FUSE filesystem, otherwise
154 	 * we may get a second close() while we wait
155 	 * for the reply, and we would end up closing
156 	 * the same fh twice instead of closng both.
157 	 */
158 	fh = perfuse_get_fh(opc, mode);
159 	perfuse_destroy_fh(pn, fh);
160 
161 	/*
162 	 * release_flags may be set to FUSE_RELEASE_FLUSH
163 	 * to flush locks. lock_owner must be set in that case
164 	 *
165 	 * ps_new_msg() is called with NULL creds, which will
166 	 * be interpreted as FUSE superuser. We come here from the
167 	 * inactive method, which provides no creds, but obviously
168 	 * runs with kernel privilege.
169 	 */
170 	pm = ps->ps_new_msg(pu, opc, op, sizeof(*fri), NULL);
171 	fri = GET_INPAYLOAD(ps, pm, fuse_release_in);
172 	fri->fh = fh;
173 	fri->flags = 0;
174 	fri->release_flags = 0;
175 	fri->lock_owner = pnd->pnd_lock_owner;
176 	fri->flags = (fri->lock_owner != 0) ? FUSE_RELEASE_FLUSH : 0;
177 
178 #ifdef PERFUSE_DEBUG
179 	if (perfuse_diagflags & PDF_FH)
180 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", fh = 0x%"PRIx64"\n",
181 			 __func__, (void *)opc, pnd->pnd_nodeid, fri->fh);
182 #endif
183 
184 	if ((error = xchg_msg(pu, opc, pm,
185 			      NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0)
186 		DERRX(EX_SOFTWARE, "%s: freed fh = 0x%"PRIx64" but filesystem "
187 		      "returned error = %d", __func__, fh, error);
188 
189 	ps->ps_destroy_msg(pm);
190 
191 	return 0;
192 }
193 
194 static int
195 xchg_msg(struct puffs_usermount *pu, puffs_cookie_t opc, perfuse_msg_t *pm,
196 	size_t len, enum perfuse_xchg_pb_reply wait)
197 {
198 	struct perfuse_state *ps;
199 	struct perfuse_node_data *pnd;
200 	struct perfuse_trace *pt = NULL;
201 	int error;
202 
203 	ps = puffs_getspecific(pu);
204 	pnd = NULL;
205 	if ((struct puffs_node *)opc != NULL)
206 		pnd = PERFUSE_NODE_DATA(opc);
207 
208 #ifdef PERFUSE_DEBUG
209 	if ((perfuse_diagflags & PDF_FILENAME) && (opc != 0))
210 		DPRINTF("file = \"%s\", ino = %"PRIu64" flags = 0x%x\n",
211 			perfuse_node_path(ps, opc),
212 			((struct puffs_node *)opc)->pn_va.va_fileid,
213 			PERFUSE_NODE_DATA(opc)->pnd_flags);
214 #endif
215 	ps->ps_xchgcount++;
216 	if (pnd)
217 		pnd->pnd_inxchg++;
218 
219 	/*
220 	 * Record FUSE call start if requested
221 	 */
222 	if (perfuse_diagflags & PDF_TRACE)
223 		pt = perfuse_trace_begin(ps, opc, pm);
224 
225 	/*
226 	 * Do actual FUSE exchange
227 	 */
228 	if ((error = ps->ps_xchg_msg(pu, pm, len, wait)) != 0)
229 		ps->ps_destroy_msg(pm);
230 
231 	/*
232 	 * Record FUSE call end if requested
233 	 */
234 	if (pt != NULL)
235 		perfuse_trace_end(ps, pt, error);
236 
237 	ps->ps_xchgcount--;
238 	if (pnd) {
239 		pnd->pnd_inxchg--;
240 		(void)dequeue_requests(opc, PCQ_AFTERXCHG, DEQUEUE_ALL);
241 	}
242 
243 	return error;
244 }
245 
246 static int
247 mode_access(puffs_cookie_t opc, const struct puffs_cred *pcr, mode_t mode)
248 {
249 	struct puffs_node *pn;
250 	struct vattr *va;
251 
252 	/*
253 	 * pcr is NULL for self open through fsync or readdir.
254 	 * In both case, access control is useless, as it was
255 	 * done before, at open time.
256 	 */
257 	if (pcr == NULL)
258 		return 0;
259 
260 	pn = (struct puffs_node *)opc;
261 	va = puffs_pn_getvap(pn);
262 	return puffs_access(va->va_type, va->va_mode,
263 			    va->va_uid, va->va_gid,
264 			    mode, pcr);
265 }
266 
267 static int
268 sticky_access(puffs_cookie_t opc, struct puffs_node *targ,
269 	      const struct puffs_cred *pcr)
270 {
271 	uid_t uid;
272 	int sticky, owner;
273 
274 	/*
275 	 * This covers the case where the kernel requests a DELETE
276 	 * or RENAME on its own, and where puffs_cred_getuid would
277 	 * return -1. While such a situation should not happen,
278 	 * we allow it here.
279 	 *
280 	 * This also allows root to tamper with other users' files
281 	 * that have the sticky bit.
282 	 */
283 	if (puffs_cred_isjuggernaut(pcr))
284 		return 0;
285 
286 	if (puffs_cred_getuid(pcr, &uid) != 0)
287 		DERRX(EX_SOFTWARE, "puffs_cred_getuid fails in %s", __func__);
288 
289 	sticky = puffs_pn_getvap(opc)->va_mode & S_ISTXT;
290 	owner = puffs_pn_getvap(targ)->va_uid == uid;
291 
292 	if (sticky && !owner)
293 		return EACCES;
294 
295 	return 0;
296 }
297 
298 
299 static void
300 fuse_attr_to_vap(struct perfuse_state *ps, struct vattr *vap,
301 	struct fuse_attr *fa)
302 {
303 	vap->va_type = IFTOVT(fa->mode);
304 	vap->va_mode = fa->mode & ALLPERMS;
305 	vap->va_nlink = fa->nlink;
306 	vap->va_uid = fa->uid;
307 	vap->va_gid = fa->gid;
308 	vap->va_fsid = (long)ps->ps_fsid;
309 	vap->va_fileid = fa->ino;
310 	vap->va_size = fa->size;
311 	vap->va_blocksize = fa->blksize;
312 	vap->va_atime.tv_sec = (time_t)fa->atime;
313 	vap->va_atime.tv_nsec = (long) fa->atimensec;
314 	vap->va_mtime.tv_sec = (time_t)fa->mtime;
315 	vap->va_mtime.tv_nsec = (long)fa->mtimensec;
316 	vap->va_ctime.tv_sec = (time_t)fa->ctime;
317 	vap->va_ctime.tv_nsec = (long)fa->ctimensec;
318 	vap->va_birthtime.tv_sec = 0;
319 	vap->va_birthtime.tv_nsec = 0;
320 	vap->va_gen = 0;
321 	vap->va_flags = 0;
322 	vap->va_rdev = fa->rdev;
323 	vap->va_bytes = fa->size;
324 	vap->va_filerev = (u_quad_t)PUFFS_VNOVAL;
325 	vap->va_vaflags = 0;
326 
327 	if (vap->va_blocksize == 0)
328 		vap->va_blocksize = DEV_BSIZE;
329 
330 	if (vap->va_size == (size_t)PUFFS_VNOVAL) /* XXX */
331 		vap->va_size = 0;
332 
333 	return;
334 }
335 
336 #ifdef PUFFS_KFLAG_CACHE_FS_TTL
337 static void
338 perfuse_newinfo_setttl(struct puffs_newinfo *pni,
339     struct puffs_node *pn, struct fuse_entry_out *feo,
340     struct fuse_attr_out *fao)
341 {
342 #ifdef PERFUSE_DEBUG
343 	if ((feo == NULL) && (fao == NULL))
344 		DERRX(EX_SOFTWARE, "%s: feo and fao NULL", __func__);
345 
346 	if ((feo != NULL) && (fao != NULL))
347 		DERRX(EX_SOFTWARE, "%s: feo and fao != NULL", __func__);
348 #endif /* PERFUSE_DEBUG */
349 
350 	if (fao != NULL) {
351 		struct timespec va_ttl;
352 
353 		va_ttl.tv_sec = fao->attr_valid;
354 		va_ttl.tv_nsec = fao->attr_valid_nsec;
355 
356 		puffs_newinfo_setvattl(pni, &va_ttl);
357 	}
358 
359 	if (feo != NULL) {
360 		struct timespec va_ttl;
361 		struct timespec cn_ttl;
362 		struct timespec now;
363 		struct perfuse_node_data *pnd = PERFUSE_NODE_DATA(pn);
364 
365 		va_ttl.tv_sec = feo->attr_valid;
366 		va_ttl.tv_nsec = feo->attr_valid_nsec;
367 		cn_ttl.tv_sec = feo->entry_valid;
368 		cn_ttl.tv_nsec = feo->entry_valid_nsec;
369 
370 		puffs_newinfo_setvattl(pni, &va_ttl);
371 		puffs_newinfo_setcnttl(pni, &cn_ttl);
372 
373 		if (clock_gettime(CLOCK_REALTIME, &now) != 0)
374 			DERR(EX_OSERR, "clock_gettime failed");
375 
376                 timespecadd(&now, &cn_ttl, &pnd->pnd_cn_expire);
377 	}
378 
379 	return;
380 }
381 #endif /* PUFFS_KFLAG_CACHE_FS_TTL */
382 
383 static int
384 node_lookup_common(struct puffs_usermount *pu, puffs_cookie_t opc,
385 	struct puffs_newinfo *pni, const char *path,
386 	const struct puffs_cred *pcr, struct puffs_node **pnp)
387 {
388 	struct perfuse_state *ps;
389 	struct perfuse_node_data *oldpnd;
390 	perfuse_msg_t *pm;
391 	struct fuse_entry_out *feo;
392 	struct puffs_node *pn;
393 	size_t len;
394 	int error;
395 
396 	/*
397 	 * Prevent further lookups if the parent was removed
398 	 */
399 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
400 		return ESTALE;
401 
402 	if (pnp == NULL)
403 		DERRX(EX_SOFTWARE, "pnp must be != NULL");
404 
405 	ps = puffs_getspecific(pu);
406 
407 #ifdef PERFUSE_DEBUG
408 	if (perfuse_diagflags & PDF_FILENAME)
409 		DPRINTF("%s: opc = %p, file = \"%s\" looking up \"%s\"\n",
410 			__func__, (void *)opc,
411 			perfuse_node_path(ps, opc), path);
412 
413 	if (strcmp(path, ".") == 0)
414 		DERRX(EX_SOFTWARE, "unexpected dot-lookup");
415 
416 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_RECLAIMED)
417 		DERRX(EX_SOFTWARE,
418 		      "looking up reclaimed node opc = %p, name = \"%s\"",
419 		      opc, path);
420 
421 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_INVALID)
422 		DERRX(EX_SOFTWARE,
423 		      "looking up freed node opc = %p, name = \"%s\"",
424 		      opc, path);
425 #endif /* PERFUSE_DEBUG */
426 
427 	len = strlen(path) + 1;
428 	pm = ps->ps_new_msg(pu, opc, FUSE_LOOKUP, len, pcr);
429 	(void)strlcpy(_GET_INPAYLOAD(ps, pm, char *), path, len);
430 
431 	if ((error = xchg_msg(pu, opc, pm, sizeof(*feo), wait_reply)) != 0)
432 		return error;
433 
434 	feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
435 
436 	/*
437 	 * Check for a known node, not reclaimed, with another name.
438 	 * It may have been moved, or we can lookup ../
439 	 */
440 	if (((oldpnd = perfuse_node_bynodeid(ps, feo->nodeid)) != NULL) &&
441 	    !(oldpnd->pnd_flags & PND_RECLAIMED)) {
442 		/*
443 		 * Save the new node name if not ..
444 		 */
445 		if (strncmp(path, "..", len) != 0)
446 			(void)strlcpy(oldpnd->pnd_name,
447 				      path, MAXPATHLEN);
448 		pn = oldpnd->pnd_pn;
449 
450 	} else {
451 		pn = perfuse_new_pn(pu, path, opc);
452 		PERFUSE_NODE_DATA(pn)->pnd_nodeid = feo->nodeid;
453 		perfuse_node_cache(ps, pn);
454 	}
455 
456 #ifdef PERFUSE_DEBUG
457 	if (PERFUSE_NODE_DATA(pn)->pnd_flags & PND_RECLAIMED)
458 		DERRX(EX_SOFTWARE,
459 		      "reclaimed in lookup opc = %p, name = \"%s\", ck = %p",
460 		      opc, path, pn);
461 
462 	if (PERFUSE_NODE_DATA(pn)->pnd_flags & PND_INVALID)
463 		DERRX(EX_SOFTWARE,
464 		      "freed in lookup opc = %p, name = \"%s\", ck = %p",
465 		      opc, path, pn);
466 #endif /* PERFUSE_DEBUG */
467 
468 	fuse_attr_to_vap(ps, &pn->pn_va, &feo->attr);
469 	pn->pn_va.va_gen = (u_long)(feo->generation);
470 	PERFUSE_NODE_DATA(pn)->pnd_fuse_nlookup++;
471 
472 	*pnp = pn;
473 
474 #ifdef PERFUSE_DEBUG
475 	if (perfuse_diagflags & PDF_FILENAME)
476 		DPRINTF("%s: opc = %p, looked up opc = %p, "
477 			"nodeid = 0x%"PRIx64" file = \"%s\"\n", __func__,
478 			(void *)opc, pn, feo->nodeid, path);
479 #endif
480 
481 	if (pni != NULL) {
482 #ifdef PUFFS_KFLAG_CACHE_FS_TTL
483 		puffs_newinfo_setva(pni, &pn->pn_va);
484 		perfuse_newinfo_setttl(pni, pn, feo, NULL);
485 #endif /* PUFFS_KFLAG_CACHE_FS_TTL */
486 		puffs_newinfo_setcookie(pni, pn);
487 		puffs_newinfo_setvtype(pni, pn->pn_va.va_type);
488 		puffs_newinfo_setsize(pni, (voff_t)pn->pn_va.va_size);
489 		puffs_newinfo_setrdev(pni, pn->pn_va.va_rdev);
490 	}
491 
492 	if (PERFUSE_NODE_DATA(pn)->pnd_flags & PND_NODELEAK) {
493 		PERFUSE_NODE_DATA(pn)->pnd_flags &= ~PND_NODELEAK;
494 		ps->ps_nodeleakcount--;
495 	}
496 
497 	ps->ps_destroy_msg(pm);
498 
499 	return 0;
500 }
501 
502 
503 /*
504  * Common code for methods that create objects:
505  * perfuse_node_mkdir
506  * perfuse_node_mknod
507  * perfuse_node_symlink
508  */
509 static int
510 node_mk_common(struct puffs_usermount *pu, puffs_cookie_t opc,
511 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
512 	perfuse_msg_t *pm)
513 {
514 	struct perfuse_state *ps;
515 	struct puffs_node *pn;
516 	struct fuse_entry_out *feo;
517 	int error;
518 
519 	ps =  puffs_getspecific(pu);
520 
521 	if ((error = xchg_msg(pu, opc, pm, sizeof(*feo), wait_reply)) != 0)
522 		return error;
523 
524 	feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
525 	if (feo->nodeid == PERFUSE_UNKNOWN_NODEID)
526 		DERRX(EX_SOFTWARE, "%s: no nodeid", __func__);
527 
528 	pn = perfuse_new_pn(pu, pcn->pcn_name, opc);
529 	PERFUSE_NODE_DATA(pn)->pnd_nodeid = feo->nodeid;
530 	PERFUSE_NODE_DATA(pn)->pnd_puffs_nlookup++;
531 	perfuse_node_cache(ps, pn);
532 
533 	fuse_attr_to_vap(ps, &pn->pn_va, &feo->attr);
534 	pn->pn_va.va_gen = (u_long)(feo->generation);
535 
536 	puffs_newinfo_setcookie(pni, pn);
537 #ifdef PUFFS_KFLAG_CACHE_FS_TTL
538 	puffs_newinfo_setva(pni, &pn->pn_va);
539 	perfuse_newinfo_setttl(pni, pn, feo, NULL);
540 #endif /* PUFFS_KFLAG_CACHE_FS_TTL */
541 
542 
543 #ifdef PERFUSE_DEBUG
544 	if (perfuse_diagflags & PDF_FILENAME)
545 		DPRINTF("%s: opc = %p, file = \"%s\", flags = 0x%x "
546 			"nodeid = 0x%"PRIx64"\n",
547 			__func__, (void *)pn, pcn->pcn_name,
548 			PERFUSE_NODE_DATA(pn)->pnd_flags, feo->nodeid);
549 #endif
550 	ps->ps_destroy_msg(pm);
551 
552 	/* Parents is now dirty */
553 	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
554 
555 	return 0;
556 }
557 
558 static uint64_t
559 readdir_last_cookie(struct fuse_dirent *fd, size_t fd_len)
560 {
561 	size_t len;
562 	size_t seen = 0;
563 	char *ndp;
564 
565 	do {
566 		len = FUSE_DIRENT_ALIGN(sizeof(*fd) + fd->namelen);
567 		seen += len;
568 
569 		if (seen >= fd_len)
570 			break;
571 
572 		ndp = (char *)(void *)fd + (size_t)len;
573 		fd = (struct fuse_dirent *)(void *)ndp;
574 	} while (1 /* CONSTCOND */);
575 
576 	return fd->off;
577 }
578 
579 static ssize_t
580 fuse_to_dirent(struct puffs_usermount *pu, puffs_cookie_t opc,
581 	struct fuse_dirent *fd, size_t fd_len)
582 {
583 	struct dirent *dents;
584 	size_t dents_len;
585 	ssize_t written;
586 	uint64_t fd_offset;
587 	struct fuse_dirent *fd_base;
588 	size_t len;
589 
590 	fd_base = fd;
591 	fd_offset = 0;
592 	written = 0;
593 	dents = PERFUSE_NODE_DATA(opc)->pnd_dirent;
594 	dents_len = (size_t)PERFUSE_NODE_DATA(opc)->pnd_dirent_len;
595 
596 	do {
597 		char *ndp;
598 		size_t reclen;
599 
600 		reclen = _DIRENT_RECLEN(dents, fd->namelen);
601 
602 		/*
603 		 * Check we do not overflow the output buffer
604 		 * struct fuse_dirent is bigger than struct dirent,
605 		 * so we should always use fd_len and never reallocate
606 		 * later.
607 		 * If we have to reallocate,try to double the buffer
608 		 * each time so that we do not have to do it too often.
609 		 */
610 		if (written + reclen > dents_len) {
611 			if (dents_len == 0)
612 				dents_len = fd_len;
613 			else
614 				dents_len =
615 				   MAX(2 * dents_len, written + reclen);
616 
617 			dents = PERFUSE_NODE_DATA(opc)->pnd_dirent;
618 			if ((dents = realloc(dents, dents_len)) == NULL)
619 				DERR(EX_OSERR, "%s: malloc failed", __func__);
620 
621 			PERFUSE_NODE_DATA(opc)->pnd_dirent = dents;
622 			PERFUSE_NODE_DATA(opc)->pnd_dirent_len = dents_len;
623 
624 			/*
625 			 * (void *) for delint
626 			 */
627 			ndp = (char *)(void *)dents + written;
628 			dents = (struct dirent *)(void *)ndp;
629 		}
630 
631 		/*
632 		 * Filesystem was mounted without -o use_ino
633 		 * Perform a lookup to find it.
634 		 */
635 		if (fd->ino == PERFUSE_UNKNOWN_INO) {
636 			struct puffs_node *pn;
637 
638 			if (node_lookup_common(pu, opc, NULL, fd->name,
639 					       NULL, &pn) != 0) {
640 				DWARNX("node_lookup_common failed");
641 			} else {
642 				fd->ino = pn->pn_va.va_fileid;
643 				(void)perfuse_node_reclaim(pu, pn);
644 			}
645 		}
646 
647 		dents->d_fileno = fd->ino;
648 		dents->d_reclen = (unsigned short)reclen;
649 		dents->d_namlen = fd->namelen;
650 		dents->d_type = fd->type;
651 		strlcpy(dents->d_name, fd->name, fd->namelen + 1);
652 
653 #ifdef PERFUSE_DEBUG
654 		if (perfuse_diagflags & PDF_READDIR)
655 			DPRINTF("%s: translated \"%s\" ino = %"PRIu64"\n",
656 				__func__, dents->d_name, dents->d_fileno);
657 #endif
658 
659 		dents = _DIRENT_NEXT(dents);
660 		written += reclen;
661 
662 		/*
663 		 * Move to the next record.
664 		 * fd->off is not the offset, it is an opaque cookie
665 		 * given by the filesystem to keep state across multiple
666 		 * readdir() operation.
667 		 * Use record alignement instead.
668 		 */
669 		len = FUSE_DIRENT_ALIGN(sizeof(*fd) + fd->namelen);
670 #ifdef PERFUSE_DEBUG
671 		if (perfuse_diagflags & PDF_READDIR)
672 			DPRINTF("%s: record at %"PRId64"/0x%"PRIx64" "
673 				"length = %zd/0x%zx. "
674 				"next record at %"PRId64"/0x%"PRIx64" "
675 				"max %zd/0x%zx\n",
676 				__func__, fd_offset, fd_offset, len, len,
677 				fd_offset + len, fd_offset + len,
678 				fd_len, fd_len);
679 #endif
680 		fd_offset += len;
681 
682 		/*
683 		 * Check if next record is still within the packet
684 		 * If it is not, we reached the end of the buffer.
685 		 */
686 		if (fd_offset >= fd_len)
687 			break;
688 
689 		/*
690 		 * (void *) for delint
691 		 */
692 		ndp = (char *)(void *)fd_base + (size_t)fd_offset;
693 		fd = (struct fuse_dirent *)(void *)ndp;
694 
695 	} while (1 /* CONSTCOND */);
696 
697 	/*
698 	 * Adjust the dirent output length
699 	 */
700 	if (written != -1)
701 		PERFUSE_NODE_DATA(opc)->pnd_dirent_len = written;
702 
703 	return written;
704 }
705 
706 static void
707 readdir_buffered(puffs_cookie_t opc, struct dirent *dent, off_t *readoff,
708 	size_t *reslen)
709 {
710 	struct dirent *fromdent;
711 	struct perfuse_node_data *pnd;
712 	char *ndp;
713 
714 	pnd = PERFUSE_NODE_DATA(opc);
715 
716 	while (*readoff < pnd->pnd_dirent_len) {
717 		/*
718 		 * (void *) for delint
719 		 */
720 		ndp = (char *)(void *)pnd->pnd_dirent + (size_t)*readoff;
721 		fromdent = (struct dirent *)(void *)ndp;
722 
723 		if (*reslen < _DIRENT_SIZE(fromdent))
724 			break;
725 
726 		memcpy(dent, fromdent, _DIRENT_SIZE(fromdent));
727 		*readoff += _DIRENT_SIZE(fromdent);
728 		*reslen -= _DIRENT_SIZE(fromdent);
729 
730 		dent = _DIRENT_NEXT(dent);
731 	}
732 
733 #ifdef PERFUSE_DEBUG
734 	if (perfuse_diagflags & PDF_READDIR)
735 		DPRINTF("%s: readoff = %"PRId64",  "
736 			"pnd->pnd_dirent_len = %"PRId64"\n",
737 			__func__, *readoff, pnd->pnd_dirent_len);
738 #endif
739 	if (*readoff >=  pnd->pnd_dirent_len) {
740 		free(pnd->pnd_dirent);
741 		pnd->pnd_dirent = NULL;
742 		pnd->pnd_dirent_len = 0;
743 	}
744 
745 	return;
746 }
747 
748 
749 static void
750 node_ref(puffs_cookie_t opc)
751 {
752 	struct perfuse_node_data *pnd = PERFUSE_NODE_DATA(opc);
753 
754 #ifdef PERFUSE_DEBUG
755 	if (pnd->pnd_flags & PND_INVALID)
756 		DERRX(EX_SOFTWARE, "Use of freed node opc = %p", opc);
757 #endif /* PERFUSE_DEBUG */
758 
759 	pnd->pnd_ref++;
760 	return;
761 }
762 
763 static void
764 node_rele(puffs_cookie_t opc)
765 {
766 	struct perfuse_node_data *pnd = PERFUSE_NODE_DATA(opc);
767 
768 #ifdef PERFUSE_DEBUG
769 	if (pnd->pnd_flags & PND_INVALID)
770 		DERRX(EX_SOFTWARE, "Use of freed node opc = %p", opc);
771 #endif /* PERFUSE_DEBUG */
772 
773 	pnd->pnd_ref--;
774 
775 	if (pnd->pnd_ref == 0)
776 		(void)dequeue_requests(opc, PCQ_REF, DEQUEUE_ALL);
777 
778 	return;
779 }
780 
781 static void
782 requeue_request(struct puffs_usermount *pu, puffs_cookie_t opc,
783 	enum perfuse_qtype type)
784 {
785 	struct perfuse_cc_queue pcq;
786 	struct perfuse_node_data *pnd;
787 #ifdef PERFUSE_DEBUG
788 	struct perfuse_state *ps;
789 
790 	ps = perfuse_getspecific(pu);
791 #endif
792 
793 	pnd = PERFUSE_NODE_DATA(opc);
794 	pcq.pcq_type = type;
795 	pcq.pcq_cc = puffs_cc_getcc(pu);
796 	TAILQ_INSERT_TAIL(&pnd->pnd_pcq, &pcq, pcq_next);
797 
798 #ifdef PERFUSE_DEBUG
799 	if (perfuse_diagflags & PDF_REQUEUE)
800 		DPRINTF("%s: REQUEUE opc = %p, pcc = %p (%s)\n",
801 		        __func__, (void *)opc, pcq.pcq_cc,
802 			perfuse_qtypestr[type]);
803 #endif
804 
805 	puffs_cc_yield(pcq.pcq_cc);
806 	TAILQ_REMOVE(&pnd->pnd_pcq, &pcq, pcq_next);
807 
808 #ifdef PERFUSE_DEBUG
809 	if (perfuse_diagflags & PDF_REQUEUE)
810 		DPRINTF("%s: RESUME opc = %p, pcc = %p (%s)\n",
811 		        __func__, (void *)opc, pcq.pcq_cc,
812 			perfuse_qtypestr[type]);
813 #endif
814 
815 	return;
816 }
817 
818 static int
819 dequeue_requests(puffs_cookie_t opc, enum perfuse_qtype type, int max)
820 {
821 	struct perfuse_cc_queue *pcq;
822 	struct perfuse_node_data *pnd;
823 	int dequeued;
824 
825 	pnd = PERFUSE_NODE_DATA(opc);
826 	dequeued = 0;
827 	TAILQ_FOREACH(pcq, &pnd->pnd_pcq, pcq_next) {
828 		if (pcq->pcq_type != type)
829 			continue;
830 
831 #ifdef PERFUSE_DEBUG
832 		if (perfuse_diagflags & PDF_REQUEUE)
833 			DPRINTF("%s: SCHEDULE opc = %p, pcc = %p (%s)\n",
834 				__func__, (void *)opc, pcq->pcq_cc,
835 				 perfuse_qtypestr[type]);
836 #endif
837 		puffs_cc_schedule(pcq->pcq_cc);
838 
839 		if (++dequeued == max)
840 			break;
841 	}
842 
843 #ifdef PERFUSE_DEBUG
844 	if (perfuse_diagflags & PDF_REQUEUE)
845 		DPRINTF("%s: DONE  opc = %p\n", __func__, (void *)opc);
846 #endif
847 
848 	return dequeued;
849 }
850 
851 void
852 perfuse_fs_init(struct puffs_usermount *pu)
853 {
854 	struct perfuse_state *ps;
855 	perfuse_msg_t *pm;
856 	struct fuse_init_in *fii;
857 	struct fuse_init_out *fio;
858 	int error;
859 
860 	ps = puffs_getspecific(pu);
861 
862         if (puffs_mount(pu, ps->ps_target, ps->ps_mountflags, ps->ps_root) != 0)
863                 DERR(EX_OSERR, "%s: puffs_mount failed", __func__);
864 
865 	/*
866 	 * Linux 2.6.34.1 sends theses flags:
867 	 * FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC
868 	 * FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK
869 	 *
870 	 * Linux also sets max_readahead at 32 pages (128 kB)
871 	 *
872 	 * ps_new_msg() is called with NULL creds, which will
873 	 * be interpreted as FUSE superuser.
874 	 */
875 	pm = ps->ps_new_msg(pu, 0, FUSE_INIT, sizeof(*fii), NULL);
876 	fii = GET_INPAYLOAD(ps, pm, fuse_init_in);
877 	fii->major = FUSE_KERNEL_VERSION;
878 	fii->minor = FUSE_KERNEL_MINOR_VERSION;
879 	fii->max_readahead = (unsigned int)(32 * sysconf(_SC_PAGESIZE));
880 	fii->flags = (FUSE_ASYNC_READ|FUSE_POSIX_LOCKS|FUSE_ATOMIC_O_TRUNC);
881 
882 	if ((error = xchg_msg(pu, 0, pm, sizeof(*fio), wait_reply)) != 0)
883 		DERRX(EX_SOFTWARE, "init message exchange failed (%d)", error);
884 
885 	fio = GET_OUTPAYLOAD(ps, pm, fuse_init_out);
886 	ps->ps_max_readahead = fio->max_readahead;
887 	ps->ps_max_write = fio->max_write;
888 
889 	ps->ps_destroy_msg(pm);
890 
891 	return;
892 }
893 
894 int
895 perfuse_fs_unmount(struct puffs_usermount *pu, int flags)
896 {
897 	perfuse_msg_t *pm;
898 	struct perfuse_state *ps;
899 	puffs_cookie_t opc;
900 	int error;
901 
902 	ps = puffs_getspecific(pu);
903 	opc = (puffs_cookie_t)puffs_getroot(pu);
904 
905 	/*
906 	 * ps_new_msg() is called with NULL creds, which will
907 	 * be interpreted as FUSE superuser.
908 	 */
909 	pm = ps->ps_new_msg(pu, opc, FUSE_DESTROY, 0, NULL);
910 
911 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0){
912 		DWARN("unmount %s", ps->ps_target);
913 		if (!(flags & MNT_FORCE))
914 			return error;
915 		else
916 			error = 0;
917 	} else {
918 		ps->ps_destroy_msg(pm);
919 	}
920 
921 	ps->ps_umount(pu);
922 
923 	if (perfuse_diagflags & PDF_MISC)
924 		DPRINTF("%s unmounted, exit\n", ps->ps_target);
925 
926 	return 0;
927 }
928 
929 int
930 perfuse_fs_statvfs(struct puffs_usermount *pu, struct statvfs *svfsb)
931 {
932 	struct perfuse_state *ps;
933 	perfuse_msg_t *pm;
934 	puffs_cookie_t opc;
935 	struct fuse_statfs_out *fso;
936 	int error;
937 
938 	ps = puffs_getspecific(pu);
939 	opc = (puffs_cookie_t)puffs_getroot(pu);
940 
941 	/*
942 	 * ps_new_msg() is called with NULL creds, which will
943 	 * be interpreted as FUSE superuser.
944 	 */
945 	pm = ps->ps_new_msg(pu, opc, FUSE_STATFS, 0, NULL);
946 
947 	if ((error = xchg_msg(pu, opc, pm, sizeof(*fso), wait_reply)) != 0)
948 		return error;
949 
950 	fso = GET_OUTPAYLOAD(ps, pm, fuse_statfs_out);
951 	svfsb->f_flag = ps->ps_mountflags;
952 	svfsb->f_bsize = fso->st.bsize;
953 	svfsb->f_frsize = fso->st.frsize;
954 	svfsb->f_iosize = ((struct puffs_node *)opc)->pn_va.va_blocksize;
955 	svfsb->f_blocks = fso->st.blocks;
956 	svfsb->f_bfree = fso->st.bfree;
957 	svfsb->f_bavail = fso->st.bavail;
958 	svfsb->f_bresvd = fso->st.bfree - fso->st.bavail;
959 	svfsb->f_files = fso->st.files;
960 	svfsb->f_ffree = fso->st.ffree;
961 	svfsb->f_favail = fso->st.ffree;/* files not reserved for root */
962 	svfsb->f_fresvd = 0;		/* files reserved for root */
963 
964 	svfsb->f_syncreads = ps->ps_syncreads;
965 	svfsb->f_syncwrites = ps->ps_syncwrites;
966 
967 	svfsb->f_asyncreads = ps->ps_asyncreads;
968 	svfsb->f_asyncwrites = ps->ps_asyncwrites;
969 
970 	(void)memcpy(&svfsb->f_fsidx, &ps->ps_fsid, sizeof(ps->ps_fsid));
971 	svfsb->f_fsid = (unsigned long)ps->ps_fsid;
972 	svfsb->f_namemax = MAXPATHLEN;	/* XXX */
973 	svfsb->f_owner = ps->ps_owner_uid;
974 
975 	(void)strlcpy(svfsb->f_mntonname, ps->ps_target, _VFS_NAMELEN);
976 
977 	if (ps->ps_filesystemtype != NULL)
978 		(void)strlcpy(svfsb->f_fstypename,
979 			      ps->ps_filesystemtype, _VFS_NAMELEN);
980 	else
981 		(void)strlcpy(svfsb->f_fstypename, "fuse", _VFS_NAMELEN);
982 
983 	if (ps->ps_source != NULL)
984 		strlcpy(svfsb->f_mntfromname, ps->ps_source, _VFS_NAMELEN);
985 	else
986 		strlcpy(svfsb->f_mntfromname, _PATH_FUSE, _VFS_NAMELEN);
987 
988 	ps->ps_destroy_msg(pm);
989 
990 	return 0;
991 }
992 
993 int
994 perfuse_fs_sync(struct puffs_usermount *pu, int waitfor,
995 	const struct puffs_cred *pcr)
996 {
997 	/*
998 	 * FUSE does not seem to have a FS sync callback.
999 	 * Maybe do not even register this callback
1000 	 */
1001 	return puffs_fsnop_sync(pu, waitfor, pcr);
1002 }
1003 
1004 /* ARGSUSED0 */
1005 int
1006 perfuse_fs_fhtonode(struct puffs_usermount *pu, void *fid, size_t fidsize,
1007 	struct puffs_newinfo *pni)
1008 {
1009 	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
1010 	return 0;
1011 }
1012 
1013 /* ARGSUSED0 */
1014 int
1015 perfuse_fs_nodetofh(struct puffs_usermount *pu, puffs_cookie_t cookie,
1016 	void *fid, size_t *fidsize)
1017 {
1018 	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
1019 	return 0;
1020 }
1021 
1022 #if 0
1023 /* ARGSUSED0 */
1024 void
1025 perfuse_fs_extattrctl(struct puffs_usermount *pu, int cmd,
1026 	puffs_cookie_t *cookie, int flags, int namespace, const char *attrname)
1027 {
1028 	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
1029 	return 0;
1030 }
1031 #endif /* 0 */
1032 
1033 /* ARGSUSED0 */
1034 void
1035 perfuse_fs_suspend(struct puffs_usermount *pu, int status)
1036 {
1037 	return;
1038 }
1039 
1040 
1041 int
1042 perfuse_node_lookup(struct puffs_usermount *pu, puffs_cookie_t opc,
1043 	struct puffs_newinfo *pni, const struct puffs_cn *pcn)
1044 {
1045 	struct perfuse_state *ps;
1046 	struct puffs_node *pn;
1047 	mode_t mode;
1048 	int error;
1049 
1050 	ps = puffs_getspecific(pu);
1051 	node_ref(opc);
1052 
1053 	/*
1054 	 * Check permissions
1055 	 */
1056 	switch(pcn->pcn_nameiop) {
1057 	case NAMEI_DELETE: /* FALLTHROUGH */
1058 	case NAMEI_RENAME: /* FALLTHROUGH */
1059 	case NAMEI_CREATE:
1060 		if (pcn->pcn_flags & NAMEI_ISLASTCN)
1061 			mode = PUFFS_VEXEC|PUFFS_VWRITE;
1062 		else
1063 			mode = PUFFS_VEXEC;
1064 		break;
1065 	case NAMEI_LOOKUP: /* FALLTHROUGH */
1066 	default:
1067 		mode = PUFFS_VEXEC;
1068 		break;
1069 	}
1070 
1071 	if ((error = mode_access(opc, pcn->pcn_cred, mode)) != 0)
1072 		goto out;
1073 
1074 	error = node_lookup_common(pu, (puffs_cookie_t)opc, pni,
1075 				   pcn->pcn_name, pcn->pcn_cred, &pn);
1076 
1077 	if (error != 0)
1078 		goto out;
1079 
1080 	/*
1081 	 * Kernel would kill us if the filesystem returned the parent
1082 	 * itself. If we want to live, hide that!
1083 	 */
1084 	if ((opc == (puffs_cookie_t)pn) && (strcmp(pcn->pcn_name, ".") != 0)) {
1085 		DERRX(EX_SOFTWARE, "lookup \"%s\" in \"%s\" returned parent",
1086 		      pcn->pcn_name, perfuse_node_path(ps, opc));
1087 		/* NOTREACHED */
1088 		error = ESTALE;
1089 		goto out;
1090 	}
1091 
1092 	/*
1093 	 * Removed node
1094 	 */
1095 	if (PERFUSE_NODE_DATA(pn)->pnd_flags & PND_REMOVED) {
1096 		error = ENOENT;
1097 		goto out;
1098 	}
1099 
1100 	/*
1101 	 * Check for sticky bit. Unfortunately there is no way to
1102 	 * do this before creating the puffs_node, since we require
1103 	 * this operation to get the node owner.
1104 	 */
1105 	switch (pcn->pcn_nameiop) {
1106 	case NAMEI_DELETE: /* FALLTHROUGH */
1107 	case NAMEI_RENAME:
1108 		error = sticky_access(opc, pn, pcn->pcn_cred);
1109 		if (error != 0) {
1110 			(void)perfuse_node_reclaim(pu, pn);
1111 			goto out;
1112 		}
1113 		break;
1114 	default:
1115 		break;
1116 	}
1117 
1118 	PERFUSE_NODE_DATA(pn)->pnd_puffs_nlookup++;
1119 
1120 	error = 0;
1121 
1122 out:
1123 	node_rele(opc);
1124 	return error;
1125 }
1126 
1127 int
1128 perfuse_node_create(struct puffs_usermount *pu, puffs_cookie_t opc,
1129 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
1130 	const struct vattr *vap)
1131 {
1132 	perfuse_msg_t *pm;
1133 	struct perfuse_state *ps;
1134 	struct fuse_create_in *fci;
1135 	struct fuse_entry_out *feo;
1136 	struct fuse_open_out *foo;
1137 	struct puffs_node *pn;
1138 	const char *name;
1139 	size_t namelen;
1140 	size_t len;
1141 	int error;
1142 
1143 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
1144 		return ENOENT;
1145 
1146 	node_ref(opc);
1147 
1148 	/*
1149 	 * If create is unimplemented: Check that it does not
1150 	 * already exists, and if not, do mknod and open
1151 	 */
1152 	ps = puffs_getspecific(pu);
1153 	if (ps->ps_flags & PS_NO_CREAT) {
1154 		error = node_lookup_common(pu, opc, NULL, pcn->pcn_name,
1155 					   pcn->pcn_cred, &pn);
1156 		if (error == 0)	{
1157 			(void)perfuse_node_reclaim(pu, pn);
1158 			error = EEXIST;
1159 			goto out;
1160 		}
1161 
1162 		error = perfuse_node_mknod(pu, opc, pni, pcn, vap);
1163 		if (error != 0)
1164 			goto out;
1165 
1166 		error = node_lookup_common(pu, opc, NULL, pcn->pcn_name,
1167 					   pcn->pcn_cred, &pn);
1168 		if (error != 0)
1169 			goto out;
1170 
1171 		/*
1172 		 * FUSE does the open at create time, while
1173 		 * NetBSD will open in a subsequent operation.
1174 		 * We need to open now, in order to retain FUSE
1175 		 * semantics. The calling process will not get
1176 		 * a file descriptor before the kernel sends
1177 		 * the open operation.
1178 		 */
1179 		error = perfuse_node_open(pu, (puffs_cookie_t)pn,
1180 					  FWRITE, pcn->pcn_cred);
1181 		goto out;
1182 	}
1183 
1184 	name = pcn->pcn_name;
1185 	namelen = pcn->pcn_namelen + 1;
1186 	len = sizeof(*fci) + namelen;
1187 
1188 	/*
1189 	 * flags should use O_WRONLY instead of O_RDWR, but it
1190 	 * breaks when the caller tries to read from file.
1191 	 *
1192 	 * mode must contain file type (ie: S_IFREG), use VTTOIF(vap->va_type)
1193 	 */
1194 	pm = ps->ps_new_msg(pu, opc, FUSE_CREATE, len, pcn->pcn_cred);
1195 	fci = GET_INPAYLOAD(ps, pm, fuse_create_in);
1196 	fci->flags = O_CREAT | O_TRUNC | O_RDWR;
1197 	fci->mode = vap->va_mode | VTTOIF(vap->va_type);
1198 	fci->umask = 0; 	/* Seems unused by libfuse */
1199 	(void)strlcpy((char*)(void *)(fci + 1), name, namelen);
1200 
1201 	len = sizeof(*feo) + sizeof(*foo);
1202 	if ((error = xchg_msg(pu, opc, pm, len, wait_reply)) != 0) {
1203 		/*
1204 		 * create is unimplmented, remember it for later,
1205 		 * and start over using mknod and open instead.
1206 		 */
1207 		if (error == ENOSYS) {
1208 			ps->ps_flags |= PS_NO_CREAT;
1209 			error = perfuse_node_create(pu, opc, pni, pcn, vap);
1210 		}
1211 
1212 		goto out;
1213 	}
1214 
1215 	feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
1216 	foo = (struct fuse_open_out *)(void *)(feo + 1);
1217 	if (feo->nodeid == PERFUSE_UNKNOWN_NODEID)
1218 		DERRX(EX_SOFTWARE, "%s: no nodeid", __func__);
1219 
1220 	/*
1221 	 * Save the file handle and inode in node private data
1222 	 * so that we can reuse it later
1223 	 */
1224 	pn = perfuse_new_pn(pu, name, opc);
1225 	perfuse_new_fh((puffs_cookie_t)pn, foo->fh, FWRITE);
1226 	PERFUSE_NODE_DATA(pn)->pnd_nodeid = feo->nodeid;
1227 	PERFUSE_NODE_DATA(pn)->pnd_puffs_nlookup++;
1228 	perfuse_node_cache(ps, pn);
1229 
1230 	fuse_attr_to_vap(ps, &pn->pn_va, &feo->attr);
1231 	pn->pn_va.va_gen = (u_long)(feo->generation);
1232 
1233 	puffs_newinfo_setcookie(pni, pn);
1234 #ifdef PUFFS_KFLAG_CACHE_FS_TTL
1235 	puffs_newinfo_setva(pni, &pn->pn_va);
1236 	perfuse_newinfo_setttl(pni, pn, feo, NULL);
1237 #endif /* PUFFS_KFLAG_CACHE_FS_TTL */
1238 
1239 #ifdef PERFUSE_DEBUG
1240 	if (perfuse_diagflags & (PDF_FH|PDF_FILENAME))
1241 		DPRINTF("%s: opc = %p, file = \"%s\", flags = 0x%x "
1242 			"nodeid = 0x%"PRIx64", wfh = 0x%"PRIx64"\n",
1243 			__func__, (void *)pn, pcn->pcn_name,
1244 			PERFUSE_NODE_DATA(pn)->pnd_flags, feo->nodeid,
1245 			foo->fh);
1246 #endif
1247 
1248 	ps->ps_destroy_msg(pm);
1249 	error = 0;
1250 
1251 out:
1252 	node_rele(opc);
1253 	return error;
1254 }
1255 
1256 
1257 int
1258 perfuse_node_mknod(struct puffs_usermount *pu, puffs_cookie_t opc,
1259 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
1260 	const struct vattr *vap)
1261 {
1262 	struct perfuse_state *ps;
1263 	perfuse_msg_t *pm;
1264 	struct fuse_mknod_in *fmi;
1265 	const char* path;
1266 	size_t len;
1267 	int error;
1268 
1269 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
1270 		return ENOENT;
1271 
1272 	node_ref(opc);
1273 
1274 	/*
1275 	 * Only superuser can mknod objects other than
1276 	 * directories, files, socks, fifo and links.
1277 	 *
1278 	 * Create an object require -WX permission in the parent directory
1279 	 */
1280 	switch (vap->va_type) {
1281 	case VDIR:	/* FALLTHROUGH */
1282 	case VREG:	/* FALLTHROUGH */
1283 	case VFIFO:	/* FALLTHROUGH */
1284 	case VSOCK:
1285 		break;
1286 	default:	/* VNON, VBLK, VCHR, VBAD */
1287 		if (!puffs_cred_isjuggernaut(pcn->pcn_cred)) {
1288 			error = EACCES;
1289 			goto out;
1290 		}
1291 		break;
1292 	}
1293 
1294 
1295 	ps = puffs_getspecific(pu);
1296 	path = pcn->pcn_name;
1297 	len = sizeof(*fmi) + pcn->pcn_namelen + 1;
1298 
1299 	/*
1300 	 * mode must contain file type (ie: S_IFREG), use VTTOIF(vap->va_type)
1301 	 */
1302 	pm = ps->ps_new_msg(pu, opc, FUSE_MKNOD, len, pcn->pcn_cred);
1303 	fmi = GET_INPAYLOAD(ps, pm, fuse_mknod_in);
1304 	fmi->mode = vap->va_mode | VTTOIF(vap->va_type);
1305 	fmi->rdev = (uint32_t)vap->va_rdev;
1306 	fmi->umask = 0; 	/* Seems unused bu libfuse */
1307 	(void)strlcpy((char *)(void *)(fmi + 1), path, len - sizeof(*fmi));
1308 
1309 	error = node_mk_common(pu, opc, pni, pcn, pm);
1310 
1311 out:
1312 	node_rele(opc);
1313 	return error;
1314 }
1315 
1316 
1317 int
1318 perfuse_node_open(struct puffs_usermount *pu, puffs_cookie_t opc, int mode,
1319 	const struct puffs_cred *pcr)
1320 {
1321 	struct perfuse_state *ps;
1322 	struct perfuse_node_data *pnd;
1323 	perfuse_msg_t *pm;
1324 	mode_t fmode;
1325 	int op;
1326 	struct fuse_open_in *foi;
1327 	struct fuse_open_out *foo;
1328 	struct puffs_node *pn;
1329 	int error;
1330 
1331 	ps = puffs_getspecific(pu);
1332 	pn = (struct puffs_node *)opc;
1333 	pnd = PERFUSE_NODE_DATA(opc);
1334 	error = 0;
1335 
1336 	if (pnd->pnd_flags & PND_REMOVED)
1337 		return ENOENT;
1338 
1339 	node_ref(opc);
1340 
1341 	if (puffs_pn_getvap(pn)->va_type == VDIR)
1342 		op = FUSE_OPENDIR;
1343 	else
1344 		op = FUSE_OPEN;
1345 
1346 	/*
1347 	 * libfuse docs says
1348 	 * - O_CREAT and O_EXCL should never be set.
1349 	 * - O_TRUNC may be used if mount option atomic_o_trunc is used XXX
1350 	 *
1351 	 * O_APPEND makes no sense since FUSE always sends
1352 	 * the file offset for write operations. If the
1353 	 * filesystem uses pwrite(), O_APPEND would cause
1354 	 * the offset to be ignored and cause file corruption.
1355 	 */
1356 	mode &= ~(O_CREAT|O_EXCL|O_APPEND);
1357 
1358 	/*
1359 	 * Do not open twice, and do not reopen for reading
1360 	 * if we already have write handle.
1361 	 */
1362 	if (((mode & FREAD) && (pnd->pnd_flags & PND_RFH)) ||
1363 	    ((mode & FREAD) && (pnd->pnd_flags & PND_WFH)) ||
1364 	    ((mode & FWRITE) && (pnd->pnd_flags & PND_WFH)))
1365 		goto out;
1366 
1367 	/*
1368 	 * Queue open on a node so that we do not open
1369 	 * twice. This would be better with read and
1370 	 * write distinguished.
1371 	 */
1372 	while (pnd->pnd_flags & PND_INOPEN)
1373 		requeue_request(pu, opc, PCQ_OPEN);
1374 	pnd->pnd_flags |= PND_INOPEN;
1375 
1376 	/*
1377 	 * Convert PUFFS mode to FUSE mode: convert FREAD/FWRITE
1378 	 * to O_RDONLY/O_WRONLY while perserving the other options.
1379 	 */
1380 	fmode = mode & ~(FREAD|FWRITE);
1381 	fmode |= (mode & FWRITE) ? O_RDWR : O_RDONLY;
1382 
1383 	pm = ps->ps_new_msg(pu, opc, op, sizeof(*foi), pcr);
1384 	foi = GET_INPAYLOAD(ps, pm, fuse_open_in);
1385 	foi->flags = fmode;
1386 	foi->unused = 0;
1387 
1388 	if ((error = xchg_msg(pu, opc, pm, sizeof(*foo), wait_reply)) != 0)
1389 		goto out;
1390 
1391 	foo = GET_OUTPAYLOAD(ps, pm, fuse_open_out);
1392 
1393 	/*
1394 	 * Save the file handle in node private data
1395 	 * so that we can reuse it later
1396 	 */
1397 	perfuse_new_fh(opc, foo->fh, mode);
1398 
1399 #ifdef PERFUSE_DEBUG
1400 	if (perfuse_diagflags & (PDF_FH|PDF_FILENAME))
1401 		DPRINTF("%s: opc = %p, file = \"%s\", "
1402 			"nodeid = 0x%"PRIx64", %s%sfh = 0x%"PRIx64"\n",
1403 			__func__, (void *)opc, perfuse_node_path(ps, opc),
1404 			pnd->pnd_nodeid, mode & FREAD ? "r" : "",
1405 			mode & FWRITE ? "w" : "", foo->fh);
1406 #endif
1407 
1408 	ps->ps_destroy_msg(pm);
1409 out:
1410 
1411 	pnd->pnd_flags &= ~PND_INOPEN;
1412 	(void)dequeue_requests(opc, PCQ_OPEN, DEQUEUE_ALL);
1413 
1414 	node_rele(opc);
1415 	return error;
1416 }
1417 
1418 /* ARGSUSED0 */
1419 int
1420 perfuse_node_close(struct puffs_usermount *pu, puffs_cookie_t opc, int flags,
1421 	const struct puffs_cred *pcr)
1422 {
1423 	struct perfuse_node_data *pnd;
1424 
1425 	pnd = PERFUSE_NODE_DATA(opc);
1426 
1427 	if (!(pnd->pnd_flags & PND_OPEN))
1428 		return EBADF;
1429 
1430 	/*
1431 	 * Actual close is postponed at inactive time.
1432 	 */
1433 	return 0;
1434 }
1435 
1436 int
1437 perfuse_node_access(struct puffs_usermount *pu, puffs_cookie_t opc, int mode,
1438 	const struct puffs_cred *pcr)
1439 {
1440 	perfuse_msg_t *pm;
1441 	struct perfuse_state *ps;
1442 	struct fuse_access_in *fai;
1443 	int error;
1444 
1445 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
1446 		return ENOENT;
1447 
1448 	node_ref(opc);
1449 
1450 	/*
1451 	 * If we previously detected the filesystem does not
1452 	 * implement access(), short-circuit the call and skip
1453 	 * to libpuffs access() emulation.
1454 	 */
1455 	ps = puffs_getspecific(pu);
1456 	if (ps->ps_flags & PS_NO_ACCESS) {
1457 		const struct vattr *vap;
1458 
1459 		vap = puffs_pn_getvap((struct puffs_node *)opc);
1460 
1461 		error = puffs_access(IFTOVT(vap->va_mode),
1462 				     vap->va_mode & ACCESSPERMS,
1463 				     vap->va_uid, vap->va_gid,
1464 				     (mode_t)mode, pcr);
1465 		goto out;
1466 	}
1467 
1468 	/*
1469 	 * Plain access call
1470 	 */
1471 	pm = ps->ps_new_msg(pu, opc, FUSE_ACCESS, sizeof(*fai), pcr);
1472 	fai = GET_INPAYLOAD(ps, pm, fuse_access_in);
1473 	fai->mask = 0;
1474 	fai->mask |= (mode & PUFFS_VREAD) ? R_OK : 0;
1475 	fai->mask |= (mode & PUFFS_VWRITE) ? W_OK : 0;
1476 	fai->mask |= (mode & PUFFS_VEXEC) ? X_OK : 0;
1477 
1478 	error = xchg_msg(pu, opc, pm, NO_PAYLOAD_REPLY_LEN, wait_reply);
1479 
1480 	ps->ps_destroy_msg(pm);
1481 
1482 	/*
1483 	 * If unimplemented, start over with emulation
1484 	 */
1485 	if (error == ENOSYS) {
1486 		ps->ps_flags |= PS_NO_ACCESS;
1487 		error = perfuse_node_access(pu, opc, mode, pcr);
1488 	}
1489 
1490 out:
1491 	node_rele(opc);
1492 	return error;
1493 }
1494 
1495 int
1496 perfuse_node_getattr(struct puffs_usermount *pu, puffs_cookie_t opc,
1497 	struct vattr *vap, const struct puffs_cred *pcr)
1498 {
1499 	return perfuse_node_getattr_ttl(pu, opc, vap, pcr, NULL);
1500 }
1501 
1502 int
1503 perfuse_node_getattr_ttl(struct puffs_usermount *pu, puffs_cookie_t opc,
1504 	struct vattr *vap, const struct puffs_cred *pcr,
1505 	struct timespec *va_ttl)
1506 {
1507 	perfuse_msg_t *pm = NULL;
1508 	struct perfuse_state *ps;
1509 	struct perfuse_node_data *pnd = PERFUSE_NODE_DATA(opc);
1510 	struct fuse_getattr_in *fgi;
1511 	struct fuse_attr_out *fao;
1512 	int error = 0;
1513 
1514 	if (pnd->pnd_flags & PND_REMOVED)
1515 		return ENOENT;
1516 
1517 	node_ref(opc);
1518 
1519 	/*
1520 	 * Serialize size access, see comment in perfuse_node_setattr().
1521 	 */
1522 	while (pnd->pnd_flags & PND_INRESIZE)
1523 		requeue_request(pu, opc, PCQ_RESIZE);
1524 	pnd->pnd_flags |= PND_INRESIZE;
1525 
1526 	ps = puffs_getspecific(pu);
1527 
1528 	/*
1529 	 * FUSE_GETATTR_FH must be set in fgi->flags
1530 	 * if we use for fgi->fh
1531 	 */
1532 	pm = ps->ps_new_msg(pu, opc, FUSE_GETATTR, sizeof(*fgi), pcr);
1533 	fgi = GET_INPAYLOAD(ps, pm, fuse_getattr_in);
1534 	fgi->getattr_flags = 0;
1535 	fgi->dummy = 0;
1536 	fgi->fh = 0;
1537 
1538 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_OPEN) {
1539 		fgi->fh = perfuse_get_fh(opc, FREAD);
1540 		fgi->getattr_flags |= FUSE_GETATTR_FH;
1541 	}
1542 
1543 #ifdef PERFUSE_DEBUG
1544 	if (perfuse_diagflags & PDF_RESIZE)
1545 		DPRINTF(">> %s %p %" PRIu64 "\n", __func__, (void *)opc,
1546 		    vap->va_size);
1547 #endif
1548 
1549 	if ((error = xchg_msg(pu, opc, pm, sizeof(*fao), wait_reply)) != 0)
1550 		goto out;
1551 
1552 	fao = GET_OUTPAYLOAD(ps, pm, fuse_attr_out);
1553 
1554 #ifdef PERFUSE_DEBUG
1555 	if (perfuse_diagflags & PDF_RESIZE)
1556 		DPRINTF("<< %s %p %" PRIu64 " -> %" PRIu64 "\n", __func__,
1557 		    (void *)opc, vap->va_size, fao->attr.size);
1558 #endif
1559 
1560 	/*
1561 	 * We set birthtime, flags, filerev,vaflags to 0.
1562 	 * This seems the best bet, since the information is
1563 	 * not available from filesystem.
1564 	 */
1565 	fuse_attr_to_vap(ps, vap, &fao->attr);
1566 
1567 	if (va_ttl != NULL) {
1568 		va_ttl->tv_sec = fao->attr_valid;
1569 		va_ttl->tv_nsec = fao->attr_valid_nsec;
1570 	}
1571 
1572 	ps->ps_destroy_msg(pm);
1573 	error = 0;
1574 out:
1575 
1576 	pnd->pnd_flags &= ~PND_INRESIZE;
1577 	(void)dequeue_requests(opc, PCQ_RESIZE, DEQUEUE_ALL);
1578 
1579 	node_rele(opc);
1580 	return error;
1581 }
1582 
1583 int
1584 perfuse_node_setattr(struct puffs_usermount *pu, puffs_cookie_t opc,
1585 	const struct vattr *vap, const struct puffs_cred *pcr)
1586 {
1587 	return perfuse_node_setattr_ttl(pu, opc,
1588 					__UNCONST(vap), pcr, NULL, 0);
1589 }
1590 
1591 int
1592 perfuse_node_setattr_ttl(struct puffs_usermount *pu, puffs_cookie_t opc,
1593 	struct vattr *vap, const struct puffs_cred *pcr,
1594 	struct timespec *va_ttl, int xflag)
1595 {
1596 	perfuse_msg_t *pm;
1597 	uint64_t fh;
1598 	struct perfuse_state *ps;
1599 	struct perfuse_node_data *pnd;
1600 	struct fuse_setattr_in *fsi;
1601 	struct fuse_attr_out *fao;
1602 	struct vattr *old_va;
1603 	enum perfuse_xchg_pb_reply reply;
1604 	int error;
1605 #ifdef PERFUSE_DEBUG
1606 	struct vattr *old_vap;
1607 	int resize_debug = 0;
1608 #endif
1609 	ps = puffs_getspecific(pu);
1610 	pnd = PERFUSE_NODE_DATA(opc);
1611 
1612 	/*
1613 	 * The only operation we can do once the file is removed
1614 	 * is to resize it, and we can do it only if it is open.
1615 	 * Do not even send the operation to the filesystem: the
1616 	 * file is not there anymore.
1617 	 */
1618 	if (pnd->pnd_flags & PND_REMOVED) {
1619 		if (!(pnd->pnd_flags & PND_OPEN))
1620 			return ENOENT;
1621 
1622 		return 0;
1623 	}
1624 
1625 	old_va = puffs_pn_getvap((struct puffs_node *)opc);
1626 
1627 	/*
1628 	 * Check for permission to change size
1629 	 * It is always allowed if we already have a write file handle
1630 	 */
1631 	if ((vap->va_size != (u_quad_t)PUFFS_VNOVAL) &&
1632 	    !(pnd->pnd_flags & PND_WFH) &&
1633 	    (error = mode_access(opc, pcr, PUFFS_VWRITE)) != 0)
1634 		return error;
1635 
1636 	/*
1637 	 * Check for permission to change dates
1638 	 */
1639 	if (((vap->va_atime.tv_sec != (time_t)PUFFS_VNOVAL) ||
1640 	     (vap->va_mtime.tv_sec != (time_t)PUFFS_VNOVAL)) &&
1641 	    (puffs_access_times(old_va->va_uid, old_va->va_gid,
1642 				old_va->va_mode, 0, pcr) != 0))
1643 		return EACCES;
1644 
1645 	/*
1646 	 * Check for permission to change owner and group
1647 	 */
1648 	if (((vap->va_uid != (uid_t)PUFFS_VNOVAL) ||
1649 	     (vap->va_gid != (gid_t)PUFFS_VNOVAL)) &&
1650 	    (puffs_access_chown(old_va->va_uid, old_va->va_gid,
1651 				vap->va_uid, vap->va_gid, pcr)) != 0)
1652 		return EACCES;
1653 
1654 	/*
1655 	 * Check for permission to change permissions
1656 	 */
1657 	if ((vap->va_mode != (mode_t)PUFFS_VNOVAL) &&
1658 	    (puffs_access_chmod(old_va->va_uid, old_va->va_gid,
1659 				old_va->va_type, vap->va_mode, pcr)) != 0)
1660 		return EACCES;
1661 
1662 	node_ref(opc);
1663 
1664 	if (pnd->pnd_flags & PND_WFH)
1665 		fh = perfuse_get_fh(opc, FWRITE);
1666 	else
1667 		fh = FUSE_UNKNOWN_FH;
1668 
1669 	/*
1670 	 * fchmod() sets mode and fh, and it may carry
1671 	 * a resize as well. That may break if the
1672 	 * filesystem does chmod then resize, and fails
1673 	 * because it does not have permission anymore.
1674 	 * We work this around by splitting into two setattr.
1675 	 */
1676 	if ((vap->va_size != (u_quad_t)PUFFS_VNOVAL) &&
1677 	    (vap->va_mode != (mode_t)PUFFS_VNOVAL) &&
1678 	    (fh != FUSE_UNKNOWN_FH)) {
1679 		struct vattr resize_va;
1680 
1681 		(void)memcpy(&resize_va, vap, sizeof(resize_va));
1682 		resize_va.va_mode = (mode_t)PUFFS_VNOVAL;
1683 		if ((error = perfuse_node_setattr_ttl(pu, opc, &resize_va,
1684 						      pcr, va_ttl, xflag)) != 0)
1685 			goto out2;
1686 
1687 		vap->va_size = (u_quad_t)PUFFS_VNOVAL;
1688 	}
1689 
1690 	pm = ps->ps_new_msg(pu, opc, FUSE_SETATTR, sizeof(*fsi), pcr);
1691 	fsi = GET_INPAYLOAD(ps, pm, fuse_setattr_in);
1692 	fsi->valid = 0;
1693 
1694 	/*
1695 	 * Get a fh if the node is open for writing
1696 	 */
1697 	if (fh != FUSE_UNKNOWN_FH) {
1698 		fsi->fh = fh;
1699 		fsi->valid |= FUSE_FATTR_FH;
1700 	}
1701 
1702 
1703 	if (vap->va_size != (u_quad_t)PUFFS_VNOVAL) {
1704 		fsi->size = vap->va_size;
1705 		fsi->valid |= FUSE_FATTR_SIZE;
1706 
1707 		/*
1708 		 * Serialize anything that can touch file size
1709 		 * to avoid reordered GETATTR and SETATTR.
1710 		 * Out of order SETATTR can report stale size,
1711 		 * which will cause the kernel to truncate the file.
1712 		 * XXX Probably useless now we have a lock on GETATTR
1713 		 */
1714 		while (pnd->pnd_flags & PND_INRESIZE)
1715 			requeue_request(pu, opc, PCQ_RESIZE);
1716 		pnd->pnd_flags |= PND_INRESIZE;
1717 	}
1718 
1719 	/*
1720  	 * Setting mtime without atime or vice versa leads to
1721 	 * dates being reset to Epoch on glusterfs. If one
1722 	 * is missing, use the old value.
1723  	 */
1724 	if ((vap->va_mtime.tv_sec != (time_t)PUFFS_VNOVAL) ||
1725 	    (vap->va_atime.tv_sec != (time_t)PUFFS_VNOVAL)) {
1726 
1727 		if (vap->va_atime.tv_sec != (time_t)PUFFS_VNOVAL) {
1728 			fsi->atime = vap->va_atime.tv_sec;
1729 			fsi->atimensec = (uint32_t)vap->va_atime.tv_nsec;
1730 		} else {
1731 			fsi->atime = old_va->va_atime.tv_sec;
1732 			fsi->atimensec = (uint32_t)old_va->va_atime.tv_nsec;
1733 		}
1734 
1735 		if (vap->va_mtime.tv_sec != (time_t)PUFFS_VNOVAL) {
1736 			fsi->mtime = vap->va_mtime.tv_sec;
1737 			fsi->mtimensec = (uint32_t)vap->va_mtime.tv_nsec;
1738 		} else {
1739 			fsi->mtime = old_va->va_mtime.tv_sec;
1740 			fsi->mtimensec = (uint32_t)old_va->va_mtime.tv_nsec;
1741 		}
1742 
1743 		fsi->valid |= (FUSE_FATTR_MTIME|FUSE_FATTR_ATIME);
1744 	}
1745 
1746 	if (vap->va_mode != (mode_t)PUFFS_VNOVAL) {
1747 		fsi->mode = vap->va_mode;
1748 		fsi->valid |= FUSE_FATTR_MODE;
1749 	}
1750 
1751 	if (vap->va_uid != (uid_t)PUFFS_VNOVAL) {
1752 		fsi->uid = vap->va_uid;
1753 		fsi->valid |= FUSE_FATTR_UID;
1754 	}
1755 
1756 	if (vap->va_gid != (gid_t)PUFFS_VNOVAL) {
1757 		fsi->gid = vap->va_gid;
1758 		fsi->valid |= FUSE_FATTR_GID;
1759 	}
1760 
1761 	if (pnd->pnd_lock_owner != 0) {
1762 		fsi->lock_owner = pnd->pnd_lock_owner;
1763 		fsi->valid |= FUSE_FATTR_LOCKOWNER;
1764 	}
1765 
1766 	/*
1767 	 * ftruncate() sends only va_size, and metadata cache
1768 	 * flush adds va_atime and va_mtime. Some FUSE
1769 	 * filesystems will attempt to detect ftruncate by
1770 	 * checking for FATTR_SIZE being set without
1771 	 * FATTR_UID|FATTR_GID|FATTR_ATIME|FATTR_MTIME|FATTR_MODE
1772 	 *
1773 	 * Try to adapt and remove FATTR_ATIME|FATTR_MTIME
1774 	 * if we suspect a ftruncate().
1775 	 */
1776 	if ((vap->va_size != (u_quad_t)PUFFS_VNOVAL) &&
1777 	    ((vap->va_mode == (mode_t)PUFFS_VNOVAL) &&
1778 	     (vap->va_uid == (uid_t)PUFFS_VNOVAL) &&
1779 	     (vap->va_gid == (gid_t)PUFFS_VNOVAL))) {
1780 		fsi->atime = 0;
1781 		fsi->atimensec = 0;
1782 		fsi->mtime = 0;
1783 		fsi->mtimensec = 0;
1784 		fsi->valid &= ~(FUSE_FATTR_ATIME|FUSE_FATTR_MTIME);
1785 	}
1786 
1787 	/*
1788 	 * If nothing remain, discard the operation.
1789 	 */
1790 	if (!(fsi->valid & (FUSE_FATTR_SIZE|FUSE_FATTR_ATIME|FUSE_FATTR_MTIME|
1791 			    FUSE_FATTR_MODE|FUSE_FATTR_UID|FUSE_FATTR_GID))) {
1792 		error = 0;
1793 		ps->ps_destroy_msg(pm);
1794 		goto out;
1795 	}
1796 
1797 #ifdef PERFUSE_DEBUG
1798 	old_vap = puffs_pn_getvap((struct puffs_node *)opc);
1799 
1800 	if ((perfuse_diagflags & PDF_RESIZE) &&
1801 	    (old_vap->va_size != (u_quad_t)PUFFS_VNOVAL)) {
1802 		resize_debug = 1;
1803 
1804 		DPRINTF(">> %s %p %" PRIu64 " -> %" PRIu64 "\n", __func__,
1805 		    (void *)opc,
1806 		    puffs_pn_getvap((struct puffs_node *)opc)->va_size,
1807 		    fsi->size);
1808 	}
1809 #endif
1810 
1811 	/*
1812 	 * Do not honour FAF when changing size. How do
1813 	 * you want such a thing to work?
1814 	 */
1815 	reply = wait_reply;
1816 #ifdef PUFFS_SETATTR_FAF
1817 	if ((xflag & PUFFS_SETATTR_FAF) && !(fsi->valid & FUSE_FATTR_SIZE))
1818 		reply = no_reply;
1819 #endif
1820 	if ((error = xchg_msg(pu, opc, pm, sizeof(*fao), reply)) != 0)
1821 		goto out;
1822 
1823 	if (reply == no_reply)
1824 		goto out;
1825 
1826 	/*
1827 	 * Copy back the new values
1828 	 */
1829 	fao = GET_OUTPAYLOAD(ps, pm, fuse_attr_out);
1830 
1831 #ifdef PERFUSE_DEBUG
1832 	if (resize_debug)
1833 		DPRINTF("<< %s %p %" PRIu64 " -> %" PRIu64 "\n", __func__,
1834 		    (void *)opc, old_vap->va_size, fao->attr.size);
1835 #endif
1836 
1837 	fuse_attr_to_vap(ps, old_va, &fao->attr);
1838 
1839 	if (va_ttl != NULL) {
1840 		va_ttl->tv_sec = fao->attr_valid;
1841 		va_ttl->tv_nsec = fao->attr_valid_nsec;
1842 		(void)memcpy(vap, old_va, sizeof(*vap));
1843 	}
1844 
1845 	ps->ps_destroy_msg(pm);
1846 	error = 0;
1847 
1848 out:
1849 	if (pnd->pnd_flags & PND_INRESIZE) {
1850 		pnd->pnd_flags &= ~PND_INRESIZE;
1851 		(void)dequeue_requests(opc, PCQ_RESIZE, DEQUEUE_ALL);
1852 	}
1853 
1854 out2:
1855 	node_rele(opc);
1856 	return error;
1857 }
1858 
1859 int
1860 perfuse_node_poll(struct puffs_usermount *pu, puffs_cookie_t opc, int *events)
1861 {
1862 	struct perfuse_state *ps;
1863 	perfuse_msg_t *pm;
1864 	struct fuse_poll_in *fpi;
1865 	struct fuse_poll_out *fpo;
1866 	int error;
1867 
1868 	node_ref(opc);
1869 	ps = puffs_getspecific(pu);
1870 	/*
1871 	 * kh is set if FUSE_POLL_SCHEDULE_NOTIFY is set.
1872 	 *
1873 	 * XXX ps_new_msg() is called with NULL creds, which will
1874 	 * be interpreted as FUSE superuser. We have no way to
1875 	 * know the requesting process' credential, but since poll
1876 	 * is supposed to operate on a file that has been open,
1877 	 * permission should have already been checked at open time.
1878 	 * That still may breaks on filesystems that provides odd
1879 	 * semantics.
1880  	 */
1881 	pm = ps->ps_new_msg(pu, opc, FUSE_POLL, sizeof(*fpi), NULL);
1882 	fpi = GET_INPAYLOAD(ps, pm, fuse_poll_in);
1883 	fpi->fh = perfuse_get_fh(opc, FREAD);
1884 	fpi->kh = 0;
1885 	fpi->flags = 0;
1886 
1887 #ifdef PERFUSE_DEBUG
1888 	if (perfuse_diagflags & PDF_FH)
1889 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", "
1890 			"fh = 0x%"PRIx64"\n", __func__, (void *)opc,
1891 			PERFUSE_NODE_DATA(opc)->pnd_nodeid, fpi->fh);
1892 #endif
1893 	if ((error = xchg_msg(pu, opc, pm, sizeof(*fpo), wait_reply)) != 0)
1894 		goto out;
1895 
1896 	fpo = GET_OUTPAYLOAD(ps, pm, fuse_poll_out);
1897 	*events = fpo->revents;
1898 
1899 	ps->ps_destroy_msg(pm);
1900 	error = 0;
1901 
1902 out:
1903 	node_rele(opc);
1904 	return error;
1905 }
1906 
1907 /* ARGSUSED0 */
1908 int
1909 perfuse_node_mmap(struct puffs_usermount *pu, puffs_cookie_t opc, int flags,
1910 	const struct puffs_cred *pcr)
1911 {
1912 	/*
1913 	 * Not implemented anymore in libfuse
1914 	 */
1915 	return ENOSYS;
1916 }
1917 
1918 /* ARGSUSED2 */
1919 int
1920 perfuse_node_fsync(struct puffs_usermount *pu, puffs_cookie_t opc,
1921 	const struct puffs_cred *pcr, int flags, off_t offlo, off_t offhi)
1922 {
1923 	int op;
1924 	perfuse_msg_t *pm;
1925 	struct perfuse_state *ps;
1926 	struct perfuse_node_data *pnd;
1927 	struct fuse_fsync_in *ffi;
1928 	uint64_t fh;
1929 	int error = 0;
1930 
1931 	pm = NULL;
1932 	ps = puffs_getspecific(pu);
1933 	pnd = PERFUSE_NODE_DATA(opc);
1934 
1935 	/*
1936 	 * No need to sync a removed node
1937 	 */
1938 	if (pnd->pnd_flags & PND_REMOVED)
1939 		return 0;
1940 
1941 	/*
1942 	 * We do not sync closed files. They have been
1943 	 * sync at inactive time already.
1944 	 */
1945 	if (!(pnd->pnd_flags & PND_OPEN))
1946 		return 0;
1947 
1948 	node_ref(opc);
1949 
1950 	if (puffs_pn_getvap((struct puffs_node *)opc)->va_type == VDIR)
1951 		op = FUSE_FSYNCDIR;
1952 	else 		/* VREG but also other types such as VLNK */
1953 		op = FUSE_FSYNC;
1954 
1955 	/*
1956 	 * Do not sync if there are no change to sync
1957 	 * XXX remove that test on files if we implement mmap
1958 	 */
1959 #ifdef PERFUSE_DEBUG
1960 	if (perfuse_diagflags & PDF_SYNC)
1961 		DPRINTF("%s: TEST opc = %p, file = \"%s\" is %sdirty\n",
1962 			__func__, (void*)opc, perfuse_node_path(ps, opc),
1963 			pnd->pnd_flags & PND_DIRTY ? "" : "not ");
1964 #endif
1965 	if (!(pnd->pnd_flags & PND_DIRTY))
1966 		goto out;
1967 
1968 	/*
1969 	 * It seems NetBSD can call fsync without open first
1970 	 * glusterfs complain in such a situation:
1971 	 * "FSYNC() ERR => -1 (Invalid argument)"
1972 	 * The file will be closed at inactive time.
1973 	 *
1974 	 * We open the directory for reading in order to sync.
1975 	 * This sounds rather counterintuitive, but it works.
1976 	 */
1977 	if (!(pnd->pnd_flags & PND_WFH)) {
1978 		if ((error = perfuse_node_open(pu, opc, FREAD, pcr)) != 0)
1979 			goto out;
1980 	}
1981 
1982 	if (op == FUSE_FSYNCDIR)
1983 		fh = perfuse_get_fh(opc, FREAD);
1984 	else
1985 		fh = perfuse_get_fh(opc, FWRITE);
1986 
1987 	/*
1988 	 * If fsync_flags  is set, meta data should not be flushed.
1989 	 */
1990 	pm = ps->ps_new_msg(pu, opc, op, sizeof(*ffi), pcr);
1991 	ffi = GET_INPAYLOAD(ps, pm, fuse_fsync_in);
1992 	ffi->fh = fh;
1993 	ffi->fsync_flags = (flags & FFILESYNC) ? 0 : 1;
1994 
1995 #ifdef PERFUSE_DEBUG
1996 	if (perfuse_diagflags & PDF_FH)
1997 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", fh = 0x%"PRIx64"\n",
1998 			__func__, (void *)opc,
1999 			PERFUSE_NODE_DATA(opc)->pnd_nodeid, ffi->fh);
2000 #endif
2001 
2002 	if ((error = xchg_msg(pu, opc, pm,
2003 			      NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0)
2004 		goto out;
2005 
2006 	/*
2007 	 * No reply beyond fuse_out_header: nothing to do on success
2008 	 * just clear the dirty flag
2009 	 */
2010 	pnd->pnd_flags &= ~PND_DIRTY;
2011 
2012 #ifdef PERFUSE_DEBUG
2013 	if (perfuse_diagflags & PDF_SYNC)
2014 		DPRINTF("%s: CLEAR opc = %p, file = \"%s\"\n",
2015 			__func__, (void*)opc, perfuse_node_path(ps, opc));
2016 #endif
2017 
2018 	ps->ps_destroy_msg(pm);
2019 	error = 0;
2020 
2021 out:
2022 	/*
2023 	 * ENOSYS is not returned to kernel,
2024 	 */
2025 	if (error == ENOSYS)
2026 		error = 0;
2027 
2028 	node_rele(opc);
2029 	return error;
2030 }
2031 
2032 /* ARGSUSED0 */
2033 int
2034 perfuse_node_seek(struct puffs_usermount *pu, puffs_cookie_t opc,
2035 	off_t oldoff, off_t newoff, const struct puffs_cred *pcr)
2036 {
2037 	return 0;
2038 }
2039 
2040 int
2041 perfuse_node_remove(struct puffs_usermount *pu, puffs_cookie_t opc,
2042 	puffs_cookie_t targ, const struct puffs_cn *pcn)
2043 {
2044 	struct perfuse_state *ps;
2045 	struct perfuse_node_data *pnd;
2046 	perfuse_msg_t *pm;
2047 	char *path;
2048 	const char *name;
2049 	size_t len;
2050 	int error;
2051 
2052 	pnd = PERFUSE_NODE_DATA(opc);
2053 
2054 	if ((pnd->pnd_flags & PND_REMOVED) ||
2055 	    (PERFUSE_NODE_DATA(targ)->pnd_flags & PND_REMOVED))
2056 		return ENOENT;
2057 
2058 #ifdef PERFUSE_DEBUG
2059 	if (targ == NULL)
2060 		DERRX(EX_SOFTWARE, "%s: targ is NULL", __func__);
2061 
2062 	if (perfuse_diagflags & (PDF_FH|PDF_FILENAME))
2063 		DPRINTF("%s: opc = %p, remove opc = %p, file = \"%s\"\n",
2064 			__func__, (void *)opc, (void *)targ, pcn->pcn_name);
2065 #endif
2066 	node_ref(opc);
2067 	node_ref(targ);
2068 
2069 	/*
2070 	 * Await for all operations on the deleted node to drain,
2071 	 * as the filesystem may be confused to have it deleted
2072 	 * during a getattr
2073 	 */
2074 	while (PERFUSE_NODE_DATA(targ)->pnd_inxchg)
2075 		requeue_request(pu, targ, PCQ_AFTERXCHG);
2076 
2077 	ps = puffs_getspecific(pu);
2078 	pnd = PERFUSE_NODE_DATA(opc);
2079 	name = pcn->pcn_name;
2080 	len = pcn->pcn_namelen + 1;
2081 
2082 	pm = ps->ps_new_msg(pu, opc, FUSE_UNLINK, len, pcn->pcn_cred);
2083 	path = _GET_INPAYLOAD(ps, pm, char *);
2084 	(void)strlcpy(path, name, len);
2085 
2086 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2087 		goto out;
2088 
2089 	perfuse_cache_flush(targ);
2090 	PERFUSE_NODE_DATA(targ)->pnd_flags |= PND_REMOVED;
2091 
2092 	if (!(PERFUSE_NODE_DATA(targ)->pnd_flags & PND_OPEN))
2093 		puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N2);
2094 
2095 	/*
2096 	 * The parent directory needs a sync
2097 	 */
2098 	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
2099 
2100 #ifdef PERFUSE_DEBUG
2101 	if (perfuse_diagflags & PDF_FILENAME)
2102 		DPRINTF("%s: remove nodeid = 0x%"PRIx64" file = \"%s\"\n",
2103 			__func__, PERFUSE_NODE_DATA(targ)->pnd_nodeid,
2104 			pcn->pcn_name);
2105 #endif
2106 	ps->ps_destroy_msg(pm);
2107 	error = 0;
2108 
2109 out:
2110 	node_rele(opc);
2111 	node_rele(targ);
2112 	return error;
2113 }
2114 
2115 int
2116 perfuse_node_link(struct puffs_usermount *pu, puffs_cookie_t opc,
2117 	puffs_cookie_t targ, const struct puffs_cn *pcn)
2118 {
2119 	struct perfuse_state *ps;
2120 	perfuse_msg_t *pm;
2121 	const char *name;
2122 	size_t len;
2123 	struct puffs_node *pn;
2124 	struct fuse_link_in *fli;
2125 	int error;
2126 
2127 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
2128 		return ENOENT;
2129 
2130 	node_ref(opc);
2131 	node_ref(targ);
2132 	ps = puffs_getspecific(pu);
2133 	pn = (struct puffs_node *)targ;
2134 	name = pcn->pcn_name;
2135 	len =  sizeof(*fli) + pcn->pcn_namelen + 1;
2136 
2137 	pm = ps->ps_new_msg(pu, opc, FUSE_LINK, len, pcn->pcn_cred);
2138 	fli = GET_INPAYLOAD(ps, pm, fuse_link_in);
2139 	fli->oldnodeid = PERFUSE_NODE_DATA(pn)->pnd_nodeid;
2140 	(void)strlcpy((char *)(void *)(fli + 1), name, len - sizeof(*fli));
2141 
2142 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2143 		goto out;
2144 
2145 	ps->ps_destroy_msg(pm);
2146 	error = 0;
2147 
2148 out:
2149 	node_rele(opc);
2150 	node_rele(targ);
2151 	return error;
2152 }
2153 
2154 int
2155 perfuse_node_rename(struct puffs_usermount *pu, puffs_cookie_t opc,
2156 	puffs_cookie_t src, const struct puffs_cn *pcn_src,
2157 	puffs_cookie_t targ_dir, puffs_cookie_t targ,
2158 	const struct puffs_cn *pcn_targ)
2159 {
2160 	struct perfuse_state *ps;
2161 	struct perfuse_node_data *dstdir_pnd;
2162 	perfuse_msg_t *pm;
2163 	struct fuse_rename_in *fri;
2164 	const char *newname;
2165 	const char *oldname;
2166 	char *np;
2167 	int error;
2168 	size_t len;
2169 	size_t newname_len;
2170 	size_t oldname_len;
2171 
2172 	if ((PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED) ||
2173 	    (PERFUSE_NODE_DATA(src)->pnd_flags & PND_REMOVED) ||
2174 	    (PERFUSE_NODE_DATA(targ_dir)->pnd_flags & PND_REMOVED))
2175 		return ENOENT;
2176 
2177 	node_ref(opc);
2178 	node_ref(src);
2179 
2180 	/*
2181 	 * Await for all operations on the deleted node to drain,
2182 	 * as the filesystem may be confused to have it deleted
2183 	 * during a getattr
2184 	 */
2185 	if ((struct puffs_node *)targ != NULL) {
2186 		node_ref(targ);
2187 		while (PERFUSE_NODE_DATA(targ)->pnd_inxchg)
2188 			requeue_request(pu, targ, PCQ_AFTERXCHG);
2189 	} else {
2190 		while (PERFUSE_NODE_DATA(src)->pnd_inxchg)
2191 			requeue_request(pu, src, PCQ_AFTERXCHG);
2192 	}
2193 
2194 	ps = puffs_getspecific(pu);
2195 	newname =  pcn_targ->pcn_name;
2196 	newname_len = pcn_targ->pcn_namelen + 1;
2197 	oldname =  pcn_src->pcn_name;
2198 	oldname_len = pcn_src->pcn_namelen + 1;
2199 
2200 	len = sizeof(*fri) + oldname_len + newname_len;
2201 	pm = ps->ps_new_msg(pu, opc, FUSE_RENAME, len, pcn_targ->pcn_cred);
2202 	fri = GET_INPAYLOAD(ps, pm, fuse_rename_in);
2203 	fri->newdir = PERFUSE_NODE_DATA(targ_dir)->pnd_nodeid;
2204 	np = (char *)(void *)(fri + 1);
2205 	(void)strlcpy(np, oldname, oldname_len);
2206 	np += oldname_len;
2207 	(void)strlcpy(np, newname, newname_len);
2208 
2209 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2210 		goto out;
2211 
2212 
2213 	/*
2214 	 * Record new parent nodeid
2215 	 */
2216 	dstdir_pnd = PERFUSE_NODE_DATA(targ_dir);
2217 	PERFUSE_NODE_DATA(src)->pnd_parent_nodeid = dstdir_pnd->pnd_nodeid;
2218 
2219 	if (opc != targ_dir)
2220 		dstdir_pnd->pnd_flags |= PND_DIRTY;
2221 
2222 	if (strcmp(newname, "..") != 0)
2223 		(void)strlcpy(PERFUSE_NODE_DATA(src)->pnd_name,
2224 		    newname, MAXPATHLEN);
2225 	else
2226 		PERFUSE_NODE_DATA(src)->pnd_name[0] = 0; /* forget name */
2227 
2228 	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
2229 
2230 	if ((struct puffs_node *)targ != NULL) {
2231 		perfuse_cache_flush(targ);
2232 		PERFUSE_NODE_DATA(targ)->pnd_flags |= PND_REMOVED;
2233 	}
2234 
2235 #ifdef PERFUSE_DEBUG
2236 	if (perfuse_diagflags & PDF_FILENAME)
2237 		DPRINTF("%s: nodeid = 0x%"PRIx64" file = \"%s\" renamed \"%s\" "
2238 			"nodeid = 0x%"PRIx64" -> nodeid = 0x%"PRIx64" \"%s\"\n",
2239 	 		__func__, PERFUSE_NODE_DATA(src)->pnd_nodeid,
2240 			pcn_src->pcn_name, pcn_targ->pcn_name,
2241 			PERFUSE_NODE_DATA(opc)->pnd_nodeid,
2242 			PERFUSE_NODE_DATA(targ_dir)->pnd_nodeid,
2243 			perfuse_node_path(ps, targ_dir));
2244 #endif
2245 
2246 	ps->ps_destroy_msg(pm);
2247 	error = 0;
2248 
2249 out:
2250 	node_rele(opc);
2251 	node_rele(src);
2252 	if ((struct puffs_node *)targ != NULL)
2253 		node_rele(targ);
2254 
2255 	return error;
2256 }
2257 
2258 int
2259 perfuse_node_mkdir(struct puffs_usermount *pu, puffs_cookie_t opc,
2260 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
2261 	const struct vattr *vap)
2262 {
2263 	struct perfuse_state *ps;
2264 	perfuse_msg_t *pm;
2265 	struct fuse_mkdir_in *fmi;
2266 	const char *path;
2267 	size_t len;
2268 	int error;
2269 
2270 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
2271 		return ENOENT;
2272 
2273 	node_ref(opc);
2274 	ps = puffs_getspecific(pu);
2275 	path = pcn->pcn_name;
2276 	len = sizeof(*fmi) + pcn->pcn_namelen + 1;
2277 
2278 	pm = ps->ps_new_msg(pu, opc, FUSE_MKDIR, len, pcn->pcn_cred);
2279 	fmi = GET_INPAYLOAD(ps, pm, fuse_mkdir_in);
2280 	fmi->mode = vap->va_mode;
2281 	fmi->umask = 0; 	/* Seems unused by libfuse? */
2282 	(void)strlcpy((char *)(void *)(fmi + 1), path, len - sizeof(*fmi));
2283 
2284 	error = node_mk_common(pu, opc, pni, pcn, pm);
2285 
2286 	node_rele(opc);
2287 	return error;
2288 }
2289 
2290 
2291 int
2292 perfuse_node_rmdir(struct puffs_usermount *pu, puffs_cookie_t opc,
2293 	puffs_cookie_t targ, const struct puffs_cn *pcn)
2294 {
2295 	struct perfuse_state *ps;
2296 	struct perfuse_node_data *pnd;
2297 	perfuse_msg_t *pm;
2298 	char *path;
2299 	const char *name;
2300 	size_t len;
2301 	int error;
2302 
2303 	pnd = PERFUSE_NODE_DATA(opc);
2304 
2305 	if ((pnd->pnd_flags & PND_REMOVED) ||
2306 	    (PERFUSE_NODE_DATA(targ)->pnd_flags & PND_REMOVED))
2307 		return ENOENT;
2308 
2309 	node_ref(opc);
2310 	node_ref(targ);
2311 
2312 	/*
2313 	 * Await for all operations on the deleted node to drain,
2314 	 * as the filesystem may be confused to have it deleted
2315 	 * during a getattr
2316 	 */
2317 	while (PERFUSE_NODE_DATA(targ)->pnd_inxchg)
2318 		requeue_request(pu, targ, PCQ_AFTERXCHG);
2319 
2320 	ps = puffs_getspecific(pu);
2321 	name = pcn->pcn_name;
2322 	len = pcn->pcn_namelen + 1;
2323 
2324 	pm = ps->ps_new_msg(pu, opc, FUSE_RMDIR, len, pcn->pcn_cred);
2325 	path = _GET_INPAYLOAD(ps, pm, char *);
2326 	(void)strlcpy(path, name, len);
2327 
2328 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2329 		goto out;
2330 
2331 	perfuse_cache_flush(targ);
2332 	PERFUSE_NODE_DATA(targ)->pnd_flags |= PND_REMOVED;
2333 
2334 	if (!(PERFUSE_NODE_DATA(targ)->pnd_flags & PND_OPEN))
2335 		puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N2);
2336 
2337 	/*
2338 	 * The parent directory needs a sync
2339 	 */
2340 	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
2341 
2342 #ifdef PERFUSE_DEBUG
2343 	if (perfuse_diagflags & PDF_FILENAME)
2344 		DPRINTF("%s: remove nodeid = 0x%"PRIx64" file = \"%s\"\n",
2345 			__func__, PERFUSE_NODE_DATA(targ)->pnd_nodeid,
2346 			perfuse_node_path(ps, targ));
2347 #endif
2348 	ps->ps_destroy_msg(pm);
2349 	error = 0;
2350 
2351 out:
2352 	node_rele(opc);
2353 	node_rele(targ);
2354 	return error;
2355 }
2356 
2357 /* vap is unused */
2358 /* ARGSUSED4 */
2359 int
2360 perfuse_node_symlink(struct puffs_usermount *pu, puffs_cookie_t opc,
2361 	struct puffs_newinfo *pni, const struct puffs_cn *pcn_src,
2362 	const struct vattr *vap, const char *link_target)
2363 {
2364 	struct perfuse_state *ps;
2365 	perfuse_msg_t *pm;
2366 	char *np;
2367 	const char *path;
2368 	size_t path_len;
2369 	size_t linkname_len;
2370 	size_t len;
2371 	int error;
2372 
2373 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
2374 		return ENOENT;
2375 
2376 	node_ref(opc);
2377 	ps = puffs_getspecific(pu);
2378 	path = pcn_src->pcn_name;
2379 	path_len = pcn_src->pcn_namelen + 1;
2380 	linkname_len = strlen(link_target) + 1;
2381 	len = path_len + linkname_len;
2382 
2383 	pm = ps->ps_new_msg(pu, opc, FUSE_SYMLINK, len, pcn_src->pcn_cred);
2384 	np = _GET_INPAYLOAD(ps, pm, char *);
2385 	(void)strlcpy(np, path, path_len);
2386 	np += path_len;
2387 	(void)strlcpy(np, link_target, linkname_len);
2388 
2389 	error = node_mk_common(pu, opc, pni, pcn_src, pm);
2390 
2391 	node_rele(opc);
2392 	return error;
2393 }
2394 
2395 /* ARGSUSED4 */
2396 int
2397 perfuse_node_readdir(struct puffs_usermount *pu, puffs_cookie_t opc,
2398 	struct dirent *dent, off_t *readoff, size_t *reslen,
2399 	const struct puffs_cred *pcr, int *eofflag, off_t *cookies,
2400 	size_t *ncookies)
2401 {
2402 	perfuse_msg_t *pm;
2403 	uint64_t fh;
2404 	struct perfuse_state *ps;
2405 	struct perfuse_node_data *pnd;
2406 	struct fuse_read_in *fri;
2407 	struct fuse_out_header *foh;
2408 	struct fuse_dirent *fd;
2409 	size_t foh_len;
2410 	int error;
2411 	size_t fd_maxlen;
2412 
2413 	error = 0;
2414 	node_ref(opc);
2415 	ps = puffs_getspecific(pu);
2416 
2417 	/*
2418 	 * readdir state is kept at node level, and several readdir
2419 	 * requests can be issued at the same time on the same node.
2420 	 * We need to queue requests so that only one is in readdir
2421 	 * code at the same time.
2422 	 */
2423 	pnd = PERFUSE_NODE_DATA(opc);
2424 	while (pnd->pnd_flags & PND_INREADDIR)
2425 		requeue_request(pu, opc, PCQ_READDIR);
2426 	pnd->pnd_flags |= PND_INREADDIR;
2427 
2428 #ifdef PERFUSE_DEBUG
2429 	if (perfuse_diagflags & PDF_READDIR)
2430 		DPRINTF("%s: READDIR opc = %p enter critical section\n",
2431 			__func__, (void *)opc);
2432 #endif
2433 	/*
2434 	 * Re-initialize pnd->pnd_fd_cookie on the first readdir for a node
2435 	 */
2436 	if (*readoff == 0)
2437 		pnd->pnd_fd_cookie = 0;
2438 
2439 	/*
2440 	 * Do we already have the data bufered?
2441 	 */
2442 	if (pnd->pnd_dirent != NULL)
2443 		goto out;
2444 	pnd->pnd_dirent_len = 0;
2445 
2446 	/*
2447 	 * It seems NetBSD can call readdir without open first
2448 	 * libfuse will crash if it is done that way, hence open first.
2449 	 */
2450 	if (!(pnd->pnd_flags & PND_OPEN)) {
2451 		if ((error = perfuse_node_open(pu, opc, FREAD, pcr)) != 0)
2452 			goto out;
2453 	}
2454 
2455 	fh = perfuse_get_fh(opc, FREAD);
2456 
2457 #ifdef PERFUSE_DEBUG
2458 	if (perfuse_diagflags & PDF_FH)
2459 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", "
2460 			"rfh = 0x%"PRIx64"\n", __func__, (void *)opc,
2461 			PERFUSE_NODE_DATA(opc)->pnd_nodeid, fh);
2462 #endif
2463 
2464 	pnd->pnd_all_fd = NULL;
2465 	pnd->pnd_all_fd_len = 0;
2466 	fd_maxlen = ps->ps_max_readahead - sizeof(*foh);
2467 
2468 	do {
2469 		size_t fd_len;
2470 		char *afdp;
2471 
2472 		pm = ps->ps_new_msg(pu, opc, FUSE_READDIR, sizeof(*fri), pcr);
2473 
2474 		/*
2475 		 * read_flags, lock_owner and flags are unused in libfuse
2476 		 */
2477 		fri = GET_INPAYLOAD(ps, pm, fuse_read_in);
2478 		fri->fh = fh;
2479 		fri->offset = pnd->pnd_fd_cookie;
2480 		fri->size = (uint32_t)fd_maxlen;
2481 		fri->read_flags = 0;
2482 		fri->lock_owner = 0;
2483 		fri->flags = 0;
2484 
2485 		if ((error = xchg_msg(pu, opc, pm,
2486 				      UNSPEC_REPLY_LEN, wait_reply)) != 0)
2487 			goto out;
2488 
2489 		/*
2490 		 * There are many puffs_framebufs calls later,
2491 		 * therefore foh will not be valid for a long time.
2492 		 * Just get the length and forget it.
2493 		 */
2494 		foh = GET_OUTHDR(ps, pm);
2495 		foh_len = foh->len;
2496 
2497 		/*
2498 		 * Empty read: we reached the end of the buffer.
2499 		 */
2500 		if (foh_len == sizeof(*foh)) {
2501 			ps->ps_destroy_msg(pm);
2502 			*eofflag = 1;
2503 			break;
2504 		}
2505 
2506 		/*
2507 		 * Check for corrupted message.
2508 		 */
2509 		if (foh_len < sizeof(*foh) + sizeof(*fd)) {
2510 			ps->ps_destroy_msg(pm);
2511 			DWARNX("readdir reply too short");
2512 			error = EIO;
2513 			goto out;
2514 		}
2515 
2516 
2517 		fd = GET_OUTPAYLOAD(ps, pm, fuse_dirent);
2518 		fd_len = foh_len - sizeof(*foh);
2519 
2520 		pnd->pnd_all_fd = realloc(pnd->pnd_all_fd,
2521 					  pnd->pnd_all_fd_len + fd_len);
2522 		if (pnd->pnd_all_fd  == NULL)
2523 			DERR(EX_OSERR, "%s: malloc failed", __func__);
2524 
2525 		afdp = (char *)(void *)pnd->pnd_all_fd + pnd->pnd_all_fd_len;
2526 		(void)memcpy(afdp, fd, fd_len);
2527 
2528 		pnd->pnd_all_fd_len += fd_len;
2529 
2530 		/*
2531 		 * The fd->off field is used as a cookie for
2532 		 * resuming the next readdir() where this one was left.
2533 	 	 */
2534 		pnd->pnd_fd_cookie = readdir_last_cookie(fd, fd_len);
2535 
2536 		ps->ps_destroy_msg(pm);
2537 	} while (1 /* CONSTCOND */);
2538 
2539 	if (pnd->pnd_all_fd != NULL) {
2540 		if (fuse_to_dirent(pu, opc, pnd->pnd_all_fd,
2541 				   pnd->pnd_all_fd_len) == -1)
2542 			error = EIO;
2543 	}
2544 
2545 out:
2546 	if (pnd->pnd_all_fd != NULL) {
2547 		free(pnd->pnd_all_fd);
2548 		pnd->pnd_all_fd = NULL;
2549 		pnd->pnd_all_fd_len = 0;
2550 	}
2551 
2552 	if (error == 0)
2553 		readdir_buffered(opc, dent, readoff, reslen);
2554 
2555 	/*
2556 	 * Schedule queued readdir requests
2557 	 */
2558 	pnd->pnd_flags &= ~PND_INREADDIR;
2559 	(void)dequeue_requests(opc, PCQ_READDIR, DEQUEUE_ALL);
2560 
2561 #ifdef PERFUSE_DEBUG
2562 	if (perfuse_diagflags & PDF_READDIR)
2563 		DPRINTF("%s: READDIR opc = %p exit critical section\n",
2564 			__func__, (void *)opc);
2565 #endif
2566 
2567 	node_rele(opc);
2568 	return error;
2569 }
2570 
2571 int
2572 perfuse_node_readlink(struct puffs_usermount *pu, puffs_cookie_t opc,
2573 	const struct puffs_cred *pcr, char *linkname, size_t *linklen)
2574 {
2575 	struct perfuse_state *ps;
2576 	perfuse_msg_t *pm;
2577 	int error;
2578 	size_t len;
2579 	struct fuse_out_header *foh;
2580 
2581 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
2582 		return ENOENT;
2583 
2584 	node_ref(opc);
2585 	ps = puffs_getspecific(pu);
2586 
2587 	pm = ps->ps_new_msg(pu, opc, FUSE_READLINK, 0, pcr);
2588 
2589 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2590 		goto out;
2591 
2592 	foh = GET_OUTHDR(ps, pm);
2593 	len = foh->len - sizeof(*foh);
2594 	if (len > *linklen)
2595 		DERRX(EX_PROTOCOL, "path len = %zd too long", len);
2596 	if (len == 0)
2597 		DERRX(EX_PROTOCOL, "path len = %zd too short", len);
2598 
2599 	/*
2600 	 * FUSE filesystems return a NUL terminated string, we
2601 	 * do not want to trailing \0
2602 	 */
2603 	*linklen = len - 1;
2604 	(void)memcpy(linkname, _GET_OUTPAYLOAD(ps, pm, char *), len);
2605 
2606 	ps->ps_destroy_msg(pm);
2607 	error = 0;
2608 
2609 out:
2610 	node_rele(opc);
2611 	return error;
2612 }
2613 
2614 int
2615 perfuse_node_reclaim(struct puffs_usermount *pu, puffs_cookie_t opc)
2616 {
2617 	struct perfuse_state *ps;
2618 	perfuse_msg_t *pm;
2619 	struct perfuse_node_data *pnd;
2620 	struct fuse_forget_in *ffi;
2621 	int nlookup;
2622 	struct timespec now;
2623 
2624 	if (opc == 0)
2625 		return 0;
2626 
2627 	ps = puffs_getspecific(pu);
2628 	pnd = PERFUSE_NODE_DATA(opc);
2629 
2630 	/*
2631 	 * Never forget the root.
2632 	 */
2633 	if (pnd->pnd_nodeid == FUSE_ROOT_ID)
2634 		return 0;
2635 
2636 	/*
2637 	 * There is a race condition between reclaim and lookup.
2638 	 * When looking up an already known node, the kernel cannot
2639 	 * hold a reference on the result until it gets the PUFFS
2640 	 * reply. It mayy therefore reclaim the node after the
2641 	 * userland looked it up, and before it gets the reply.
2642 	 * On rely, the kernel re-creates the node, but at that
2643 	 * time the node has been reclaimed in userland.
2644 	 *
2645 	 * In order to avoid this, we refuse reclaiming nodes that
2646 	 * are too young since the last lookup - and that we do
2647 	 * not have removed on our own, of course.
2648 	 */
2649 	if (clock_gettime(CLOCK_REALTIME, &now) != 0)
2650 		DERR(EX_OSERR, "clock_gettime failed");
2651 
2652 	if (timespeccmp(&pnd->pnd_cn_expire, &now, >) &&
2653 	    !(pnd->pnd_flags & PND_REMOVED)) {
2654 		if (!(pnd->pnd_flags & PND_NODELEAK)) {
2655 			ps->ps_nodeleakcount++;
2656 			pnd->pnd_flags |= PND_NODELEAK;
2657 		}
2658 		DWARNX("possible leaked node:: opc = %p \"%s\"",
2659 		       opc, pnd->pnd_name);
2660 		return 0;
2661 	}
2662 
2663 	node_ref(opc);
2664 	pnd->pnd_flags |= PND_RECLAIMED;
2665 	pnd->pnd_puffs_nlookup--;
2666 	nlookup = pnd->pnd_puffs_nlookup;
2667 
2668 #ifdef PERFUSE_DEBUG
2669 	if (perfuse_diagflags & PDF_RECLAIM)
2670 		DPRINTF("%s (nodeid %"PRId64") reclaimed\n",
2671 			perfuse_node_path(ps, opc), pnd->pnd_nodeid);
2672 #endif
2673 
2674 #ifdef PERFUSE_DEBUG
2675 	if (perfuse_diagflags & PDF_RECLAIM)
2676 		DPRINTF("%s (nodeid %"PRId64") is %sreclaimed, nlookup = %d "
2677 			"%s%s%s%s, pending ops:%s%s%s\n",
2678 		        perfuse_node_path(ps, opc), pnd->pnd_nodeid,
2679 		        pnd->pnd_flags & PND_RECLAIMED ? "" : "not ",
2680 			pnd->pnd_puffs_nlookup,
2681 			pnd->pnd_flags & PND_OPEN ? "open " : "not open",
2682 			pnd->pnd_flags & PND_RFH ? "r" : "",
2683 			pnd->pnd_flags & PND_WFH ? "w" : "",
2684 			pnd->pnd_flags & PND_BUSY ? "" : " none",
2685 			pnd->pnd_flags & PND_INREADDIR ? " readdir" : "",
2686 			pnd->pnd_flags & PND_INWRITE ? " write" : "",
2687 			pnd->pnd_flags & PND_INOPEN ? " open" : "");
2688 #endif
2689 	/*
2690 	 * Make sure it is not looked up again
2691 	 */
2692 	if (!(pnd->pnd_flags & PND_REMOVED))
2693 		perfuse_cache_flush(opc);
2694 
2695 	/*
2696 	 * Purge any activity on the node, while checking
2697 	 * that it remains eligible for a reclaim.
2698 	 */
2699 	while (pnd->pnd_ref > 1)
2700 		requeue_request(pu, opc, PCQ_REF);
2701 
2702 	/*
2703 	 * reclaim cancel?
2704 	 */
2705 	if (pnd->pnd_puffs_nlookup > nlookup) {
2706 		pnd->pnd_flags &= ~PND_RECLAIMED;
2707 		perfuse_node_cache(ps, opc);
2708 		node_rele(opc);
2709 		return 0;
2710 	}
2711 
2712 
2713 #ifdef PERFUSE_DEBUG
2714 	if ((pnd->pnd_flags & PND_OPEN) ||
2715 	       !TAILQ_EMPTY(&pnd->pnd_pcq))
2716 		DERRX(EX_SOFTWARE, "%s: opc = %p: still open",
2717 		      __func__, opc);
2718 
2719 	if ((pnd->pnd_flags & PND_BUSY) ||
2720 	       !TAILQ_EMPTY(&pnd->pnd_pcq))
2721 		DERRX(EX_SOFTWARE, "%s: opc = %p: queued operations",
2722 		      __func__, opc);
2723 
2724 	if (pnd->pnd_inxchg != 0)
2725 		DERRX(EX_SOFTWARE, "%s: opc = %p: ongoing operations",
2726 		      __func__, opc);
2727 #endif
2728 
2729 	/*
2730 	 * Send the FORGET message
2731 	 *
2732 	 * ps_new_msg() is called with NULL creds, which will
2733 	 * be interpreted as FUSE superuser. This is obviously
2734 	 * fine since we operate with kernel creds here.
2735 	 */
2736 	pm = ps->ps_new_msg(pu, opc, FUSE_FORGET,
2737 		      sizeof(*ffi), NULL);
2738 	ffi = GET_INPAYLOAD(ps, pm, fuse_forget_in);
2739 	ffi->nlookup = pnd->pnd_fuse_nlookup;
2740 
2741 	/*
2742 	 * No reply is expected, pm is freed in xchg_msg
2743 	 */
2744 	(void)xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, no_reply);
2745 
2746 	perfuse_destroy_pn(pu, opc);
2747 
2748 	return 0;
2749 }
2750 
2751 int
2752 perfuse_node_inactive(struct puffs_usermount *pu, puffs_cookie_t opc)
2753 {
2754 	struct perfuse_node_data *pnd;
2755 	int error;
2756 
2757 	if (opc == 0)
2758 		return 0;
2759 
2760 	node_ref(opc);
2761 	pnd = PERFUSE_NODE_DATA(opc);
2762 
2763 	if (!(pnd->pnd_flags & (PND_OPEN|PND_REMOVED)))
2764 		goto out;
2765 
2766 	/*
2767 	 * Make sure all operation are finished
2768 	 * There can be an ongoing write. Other
2769 	 * operation wait for all data before
2770 	 * the close/inactive.
2771 	 */
2772 	while (pnd->pnd_flags & PND_INWRITE)
2773 		requeue_request(pu, opc, PCQ_AFTERWRITE);
2774 
2775 	/*
2776 	 * The inactive operation may be cancelled,
2777 	 * If no open is in progress, set PND_INOPEN
2778 	 * so that a new open will be queued.
2779 	 */
2780 	if (pnd->pnd_flags & PND_INOPEN)
2781 		goto out;
2782 
2783 	pnd->pnd_flags |= PND_INOPEN;
2784 
2785 	/*
2786 	 * Sync data
2787 	 */
2788 	if (pnd->pnd_flags & PND_DIRTY) {
2789 		if ((error = perfuse_node_fsync(pu, opc, NULL, 0, 0, 0)) != 0)
2790 			DWARN("%s: perfuse_node_fsync failed error = %d",
2791 			      __func__, error);
2792 	}
2793 
2794 
2795 	/*
2796 	 * Close handles
2797 	 */
2798 	if (pnd->pnd_flags & PND_WFH) {
2799 		if ((error = perfuse_node_close_common(pu, opc, FWRITE)) != 0)
2800 			DWARN("%s: close write FH failed error = %d",
2801 			      __func__, error);
2802 	}
2803 
2804 	if (pnd->pnd_flags & PND_RFH) {
2805 		if ((error = perfuse_node_close_common(pu, opc, FREAD)) != 0)
2806 			DWARN("%s: close read FH failed error = %d",
2807 			      __func__, error);
2808 	}
2809 
2810 	/*
2811 	 * This will cause a reclaim to be sent
2812 	 */
2813 	if (pnd->pnd_flags & PND_REMOVED)
2814 		puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N1);
2815 
2816 	/*
2817 	 * Schedule awaiting operations
2818 	 */
2819 	pnd->pnd_flags &= ~PND_INOPEN;
2820 	(void)dequeue_requests(opc, PCQ_OPEN, DEQUEUE_ALL);
2821 
2822 	/*
2823 	 * errors are ignored, since the kernel ignores the return code.
2824 	 */
2825 out:
2826 	node_rele(opc);
2827 	return 0;
2828 }
2829 
2830 
2831 /* ARGSUSED0 */
2832 int
2833 perfuse_node_print(struct puffs_usermount *pu, puffs_cookie_t opc)
2834 {
2835 	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
2836 	return 0;
2837 }
2838 
2839 /* ARGSUSED0 */
2840 int
2841 perfuse_node_pathconf(struct puffs_usermount *pu, puffs_cookie_t opc,
2842 	int name, int *retval)
2843 {
2844 	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
2845 	return 0;
2846 }
2847 
2848 int
2849 perfuse_node_advlock(struct puffs_usermount *pu, puffs_cookie_t opc,
2850 	void *id, int op, struct flock *fl, int flags)
2851 {
2852 	struct perfuse_state *ps;
2853 	int fop;
2854 	perfuse_msg_t *pm;
2855 	uint64_t fh;
2856 	struct fuse_lk_in *fli;
2857 	struct fuse_out_header *foh;
2858 	struct fuse_lk_out *flo;
2859 	uint32_t owner;
2860 	size_t len;
2861 	int error;
2862 
2863 	node_ref(opc);
2864 
2865 	/*
2866 	 * Make sure we do have a filehandle, as the FUSE filesystem
2867 	 * expect one. E.g.: if we provide none, GlusterFS logs an error
2868 	 * "0-glusterfs-fuse: xl is NULL"
2869 	 *
2870 	 * We need the read file handle if the file is open read only,
2871 	 * in order to support shared locks on read-only files.
2872 	 * NB: The kernel always sends advlock for read-only
2873 	 * files at exit time when the process used lock, see
2874 	 * sys_exit -> exit1 -> fd_free -> fd_close -> VOP_ADVLOCK
2875 	 */
2876 	if ((fh = perfuse_get_fh(opc, FREAD)) == FUSE_UNKNOWN_FH) {
2877 		error = EBADF;
2878 		goto out;
2879 	}
2880 
2881 	ps = puffs_getspecific(pu);
2882 
2883 	if (op == F_GETLK)
2884 		fop = FUSE_GETLK;
2885 	else
2886 		fop = (flags & F_WAIT) ? FUSE_SETLKW : FUSE_SETLK;
2887 
2888 	/*
2889 	 * XXX ps_new_msg() is called with NULL creds, which will
2890 	 * be interpreted as FUSE superuser. We have no way to
2891 	 * know the requesting process' credential, but since advlock()
2892 	 * is supposed to operate on a file that has been open(),
2893 	 * permission should have already been checked at open() time.
2894 	 */
2895 	pm = ps->ps_new_msg(pu, opc, fop, sizeof(*fli), NULL);
2896 	fli = GET_INPAYLOAD(ps, pm, fuse_lk_in);
2897 	fli->fh = fh;
2898 	fli->owner = (uint64_t)(vaddr_t)id;
2899 	fli->lk.start = fl->l_start;
2900 	fli->lk.end = fl->l_start + fl->l_len;
2901 	fli->lk.type = fl->l_type;
2902 	fli->lk.pid = fl->l_pid;
2903 	fli->lk_flags = (flags & F_FLOCK) ? FUSE_LK_FLOCK : 0;
2904 
2905 	owner = (uint32_t)(vaddr_t)id;
2906 
2907 #ifdef PERFUSE_DEBUG
2908 	if (perfuse_diagflags & PDF_FH)
2909 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", fh = 0x%"PRIx64"\n",
2910 			__func__, (void *)opc,
2911 			PERFUSE_NODE_DATA(opc)->pnd_nodeid, fli->fh);
2912 #endif
2913 
2914 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2915 		goto out;
2916 
2917 	foh = GET_OUTHDR(ps, pm);
2918 	len = foh->len - sizeof(*foh);
2919 
2920 	/*
2921 	 * Save or clear the lock
2922 	 */
2923 	switch (op) {
2924 	case F_GETLK:
2925 		if (len != sizeof(*flo))
2926 			DERRX(EX_SOFTWARE,
2927 			      "%s: Unexpected lock reply len %zd",
2928 			      __func__, len);
2929 
2930 		flo = GET_OUTPAYLOAD(ps, pm, fuse_lk_out);
2931 		fl->l_start = flo->lk.start;
2932 		fl->l_len = flo->lk.end - flo->lk.start;
2933 		fl->l_pid = flo->lk.pid;
2934 		fl->l_type = flo->lk.type;
2935 		fl->l_whence = SEEK_SET;	/* libfuse hardcodes it */
2936 
2937 		PERFUSE_NODE_DATA(opc)->pnd_lock_owner = flo->lk.pid;
2938 		break;
2939 	case F_UNLCK:
2940 		owner = 0;
2941 		/* FALLTHROUGH */
2942 	case F_SETLK:
2943 		/* FALLTHROUGH */
2944 	case F_SETLKW:
2945 		if (error != 0)
2946 			PERFUSE_NODE_DATA(opc)->pnd_lock_owner = owner;
2947 
2948 		if (len != 0)
2949 			DERRX(EX_SOFTWARE,
2950 			      "%s: Unexpected unlock reply len %zd",
2951 			      __func__, len);
2952 
2953 		break;
2954 	default:
2955 		DERRX(EX_SOFTWARE, "%s: Unexpected op %d", __func__, op);
2956 		break;
2957 	}
2958 
2959 	ps->ps_destroy_msg(pm);
2960 	error = 0;
2961 
2962 out:
2963 	node_rele(opc);
2964 	return error;
2965 }
2966 
2967 int
2968 perfuse_node_read(struct puffs_usermount *pu, puffs_cookie_t opc, uint8_t *buf,
2969 	off_t offset, size_t *resid, const struct puffs_cred *pcr, int ioflag)
2970 {
2971 	struct perfuse_state *ps;
2972 	struct perfuse_node_data *pnd;
2973 	const struct vattr *vap;
2974 	perfuse_msg_t *pm;
2975 	struct fuse_read_in *fri;
2976 	struct fuse_out_header *foh;
2977 	size_t readen;
2978 	int error;
2979 
2980 	ps = puffs_getspecific(pu);
2981 	pnd = PERFUSE_NODE_DATA(opc);
2982 	vap = puffs_pn_getvap((struct puffs_node *)opc);
2983 
2984 	/*
2985 	 * NetBSD turns that into a getdents(2) output
2986 	 * We just do a EISDIR as this feature is of little use.
2987 	 */
2988 	if (vap->va_type == VDIR)
2989 		return EISDIR;
2990 
2991 	if ((u_quad_t)offset + *resid > vap->va_size)
2992 		DWARNX("%s %p read %lld@%zu beyond EOF %" PRIu64 "\n",
2993 		       __func__, (void *)opc, (long long)offset,
2994 		       *resid, vap->va_size);
2995 
2996 	do {
2997 		size_t max_read;
2998 
2999 		max_read = ps->ps_max_readahead - sizeof(*foh);
3000 		/*
3001 		 * flags may be set to FUSE_READ_LOCKOWNER
3002 		 * if lock_owner is provided.
3003 		 */
3004 		pm = ps->ps_new_msg(pu, opc, FUSE_READ, sizeof(*fri), pcr);
3005 		fri = GET_INPAYLOAD(ps, pm, fuse_read_in);
3006 		fri->fh = perfuse_get_fh(opc, FREAD);
3007 		fri->offset = offset;
3008 		fri->size = (uint32_t)MIN(*resid, max_read);
3009 		fri->read_flags = 0; /* XXX Unused by libfuse? */
3010 		fri->lock_owner = pnd->pnd_lock_owner;
3011 		fri->flags = 0;
3012 		fri->flags |= (fri->lock_owner != 0) ? FUSE_READ_LOCKOWNER : 0;
3013 
3014 #ifdef PERFUSE_DEBUG
3015 	if (perfuse_diagflags & PDF_FH)
3016 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", fh = 0x%"PRIx64"\n",
3017 			__func__, (void *)opc, pnd->pnd_nodeid, fri->fh);
3018 #endif
3019 		error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply);
3020 		if (error  != 0)
3021 			return error;
3022 
3023 		foh = GET_OUTHDR(ps, pm);
3024 		readen = foh->len - sizeof(*foh);
3025 
3026 #ifdef PERFUSE_DEBUG
3027 		if (readen > *resid)
3028 			DERRX(EX_SOFTWARE, "%s: Unexpected big read %zd",
3029 			      __func__, readen);
3030 #endif
3031 
3032 		(void)memcpy(buf,  _GET_OUTPAYLOAD(ps, pm, char *), readen);
3033 
3034 		buf += readen;
3035 		offset += readen;
3036 		*resid -= readen;
3037 
3038 		ps->ps_destroy_msg(pm);
3039 	} while ((*resid != 0) && (readen != 0));
3040 
3041 	if (ioflag & (IO_SYNC|IO_DSYNC))
3042 		ps->ps_syncreads++;
3043 	else
3044 		ps->ps_asyncreads++;
3045 
3046 	return 0;
3047 }
3048 
3049 int
3050 perfuse_node_write(struct puffs_usermount *pu, puffs_cookie_t opc,
3051 	uint8_t *buf, off_t offset, size_t *resid,
3052 	const struct puffs_cred *pcr, int ioflag)
3053 {
3054 	return perfuse_node_write2(pu, opc, buf, offset, resid, pcr, ioflag, 0);
3055 }
3056 
3057 /* ARGSUSED7 */
3058 int
3059 perfuse_node_write2(struct puffs_usermount *pu, puffs_cookie_t opc,
3060 	uint8_t *buf, off_t offset, size_t *resid,
3061 	const struct puffs_cred *pcr, int ioflag, int xflag)
3062 {
3063 	struct perfuse_state *ps;
3064 	struct perfuse_node_data *pnd;
3065 	struct vattr *vap;
3066 	perfuse_msg_t *pm;
3067 	struct fuse_write_in *fwi;
3068 	struct fuse_write_out *fwo;
3069 	size_t data_len;
3070 	size_t payload_len;
3071 	size_t written;
3072 	int inresize;
3073 	int error;
3074 
3075 	ps = puffs_getspecific(pu);
3076 	pnd = PERFUSE_NODE_DATA(opc);
3077 	vap = puffs_pn_getvap((struct puffs_node *)opc);
3078 	written = 0;
3079 	inresize = 0;
3080 	error = 0;
3081 
3082 	if (vap->va_type == VDIR)
3083 		return EISDIR;
3084 
3085 	node_ref(opc);
3086 
3087 	/*
3088 	 * We need to queue write requests in order to avoid
3089 	 * dequeueing PCQ_AFTERWRITE when there are pending writes.
3090 	 */
3091 	while (pnd->pnd_flags & PND_INWRITE)
3092 		requeue_request(pu, opc, PCQ_WRITE);
3093 	pnd->pnd_flags |= PND_INWRITE;
3094 
3095 	/*
3096 	 * Serialize size access, see comment in perfuse_node_setattr().
3097 	 */
3098 	if ((u_quad_t)offset + *resid > vap->va_size) {
3099 		while (pnd->pnd_flags & PND_INRESIZE)
3100 			requeue_request(pu, opc, PCQ_RESIZE);
3101 		pnd->pnd_flags |= PND_INRESIZE;
3102 		inresize = 1;
3103 	}
3104 
3105 	/*
3106 	 * append flag: re-read the file size so that
3107 	 * we get the latest value.
3108 	 */
3109 	if (ioflag & PUFFS_IO_APPEND) {
3110 		DWARNX("%s: PUFFS_IO_APPEND set, untested code", __func__);
3111 
3112 		if ((error = perfuse_node_getattr(pu, opc, vap, pcr)) != 0)
3113 			goto out;
3114 
3115 		offset = vap->va_size;
3116 	}
3117 
3118 #ifdef PERFUSE_DEBUG
3119 	if (perfuse_diagflags & PDF_RESIZE)
3120 		DPRINTF(">> %s %p %" PRIu64 "\n", __func__,
3121 			(void *)opc, vap->va_size);
3122 #endif
3123 
3124 	do {
3125 		size_t max_write;
3126 		/*
3127 		 * There is a writepage flag when data
3128 		 * is aligned to page size. Use it for
3129 		 * everything but the data after the last
3130 		 * page boundary.
3131 		 */
3132 		max_write = ps->ps_max_write - sizeof(*fwi);
3133 
3134 		data_len = MIN(*resid, max_write);
3135 		if (data_len > (size_t)sysconf(_SC_PAGESIZE))
3136 			data_len = data_len & ~(sysconf(_SC_PAGESIZE) - 1);
3137 
3138 		payload_len = data_len + sizeof(*fwi);
3139 
3140 		/*
3141 		 * flags may be set to FUSE_WRITE_CACHE (XXX usage?)
3142 		 * or FUSE_WRITE_LOCKOWNER, if lock_owner is provided.
3143 		 * write_flags is set to 1 for writepage.
3144 		 */
3145 		pm = ps->ps_new_msg(pu, opc, FUSE_WRITE, payload_len, pcr);
3146 		fwi = GET_INPAYLOAD(ps, pm, fuse_write_in);
3147 		fwi->fh = perfuse_get_fh(opc, FWRITE);
3148 		fwi->offset = offset;
3149 		fwi->size = (uint32_t)data_len;
3150 		fwi->write_flags = (fwi->size % sysconf(_SC_PAGESIZE)) ? 0 : 1;
3151 		fwi->lock_owner = pnd->pnd_lock_owner;
3152 		fwi->flags = 0;
3153 		fwi->flags |= (fwi->lock_owner != 0) ? FUSE_WRITE_LOCKOWNER : 0;
3154 		fwi->flags |= (ioflag & IO_DIRECT) ? 0 : FUSE_WRITE_CACHE;
3155 		(void)memcpy((fwi + 1), buf, data_len);
3156 
3157 
3158 #ifdef PERFUSE_DEBUG
3159 		if (perfuse_diagflags & PDF_FH)
3160 			DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", "
3161 				"fh = 0x%"PRIx64"\n", __func__,
3162 				(void *)opc, pnd->pnd_nodeid, fwi->fh);
3163 #endif
3164 		if ((error = xchg_msg(pu, opc, pm,
3165 				      sizeof(*fwo), wait_reply)) != 0)
3166 			goto out;
3167 
3168 		fwo = GET_OUTPAYLOAD(ps, pm, fuse_write_out);
3169 		written = fwo->size;
3170 		ps->ps_destroy_msg(pm);
3171 
3172 #ifdef PERFUSE_DEBUG
3173 		if (written > *resid)
3174 			DERRX(EX_SOFTWARE, "%s: Unexpected big write %zd",
3175 			      __func__, written);
3176 #endif
3177 		*resid -= written;
3178 		offset += written;
3179 		buf += written;
3180 
3181 	} while (*resid != 0);
3182 
3183 	/*
3184 	 * puffs_ops(3) says
3185 	 *  "everything must be written or an error will be generated"
3186 	 */
3187 	if (*resid != 0)
3188 		error = EFBIG;
3189 
3190 #ifdef PERFUSE_DEBUG
3191 	if (perfuse_diagflags & PDF_RESIZE) {
3192 		if (offset > (off_t)vap->va_size)
3193 			DPRINTF("<< %s %p %" PRIu64 " -> %lld\n", __func__,
3194 				(void *)opc, vap->va_size, (long long)offset);
3195 		else
3196 			DPRINTF("<< %s %p \n", __func__, (void *)opc);
3197 	}
3198 #endif
3199 
3200 	/*
3201 	 * Update file size if we wrote beyond the end
3202 	 */
3203 	if (offset > (off_t)vap->va_size)
3204 		vap->va_size = offset;
3205 
3206 	if (inresize) {
3207 #ifdef PERFUSE_DEBUG
3208 		if (!(pnd->pnd_flags & PND_INRESIZE))
3209 			DERRX(EX_SOFTWARE, "file write grow without resize");
3210 #endif
3211 		pnd->pnd_flags &= ~PND_INRESIZE;
3212 		(void)dequeue_requests(opc, PCQ_RESIZE, DEQUEUE_ALL);
3213 	}
3214 
3215 
3216 	/*
3217 	 * Statistics
3218 	 */
3219 	if (ioflag & (IO_SYNC|IO_DSYNC))
3220 		ps->ps_syncwrites++;
3221 	else
3222 		ps->ps_asyncwrites++;
3223 
3224 	/*
3225 	 * Remember to sync the file
3226 	 */
3227 	pnd->pnd_flags |= PND_DIRTY;
3228 
3229 #ifdef PERFUSE_DEBUG
3230 	if (perfuse_diagflags & PDF_SYNC)
3231 		DPRINTF("%s: DIRTY opc = %p, file = \"%s\"\n",
3232 			__func__, (void*)opc, perfuse_node_path(ps, opc));
3233 #endif
3234 
3235 out:
3236 	/*
3237 	 * VOP_PUTPAGE causes FAF write where kernel does not
3238 	 * check operation result. At least warn if it failed.
3239 	 */
3240 #ifdef PUFFS_WRITE_FAF
3241 	if (error && (xflag & PUFFS_WRITE_FAF))
3242 		DWARN("Data loss caused by FAF write failed on \"%s\"",
3243 		      pnd->pnd_name);
3244 #endif /* PUFFS_WRITE_FAF */
3245 
3246 	/*
3247 	 * If there are no more queued write, we can resume
3248 	 * an operation awaiting write completion.
3249 	 */
3250 	pnd->pnd_flags &= ~PND_INWRITE;
3251 	if (dequeue_requests(opc, PCQ_WRITE, 1) == 0)
3252 		(void)dequeue_requests(opc, PCQ_AFTERWRITE, DEQUEUE_ALL);
3253 
3254 	node_rele(opc);
3255 	return error;
3256 }
3257 
3258 /* ARGSUSED0 */
3259 void
3260 perfuse_cache_write(struct puffs_usermount *pu, puffs_cookie_t opc, size_t size,
3261 	struct puffs_cacherun *runs)
3262 {
3263 	return;
3264 }
3265 
3266 /* ARGSUSED4 */
3267 int
3268 perfuse_node_getextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
3269 	int attrns, const char *attrname, size_t *attrsize, uint8_t *attr,
3270 	size_t *resid, const struct puffs_cred *pcr)
3271 {
3272 	struct perfuse_state *ps;
3273 	char fuse_attrname[LINUX_XATTR_NAME_MAX + 1];
3274 	perfuse_msg_t *pm;
3275 	struct fuse_getxattr_in *fgi;
3276 	struct fuse_getxattr_out *fgo;
3277 	struct fuse_out_header *foh;
3278 	size_t attrnamelen;
3279 	size_t len;
3280 	char *np;
3281 	int error;
3282 
3283 	node_ref(opc);
3284 	ps = puffs_getspecific(pu);
3285 	attrname = perfuse_native_ns(attrns, attrname, fuse_attrname);
3286 	attrnamelen = strlen(attrname) + 1;
3287 	len = sizeof(*fgi) + attrnamelen;
3288 
3289 	pm = ps->ps_new_msg(pu, opc, FUSE_GETXATTR, len, pcr);
3290 	fgi = GET_INPAYLOAD(ps, pm, fuse_getxattr_in);
3291 	fgi->size = (unsigned int)((resid != NULL) ? *resid : 0);
3292 	np = (char *)(void *)(fgi + 1);
3293 	(void)strlcpy(np, attrname, attrnamelen);
3294 
3295 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
3296 		goto out;
3297 
3298 	/*
3299 	 * We just get fuse_getattr_out with list size if we requested
3300 	 * a null size.
3301 	 */
3302 	if (resid == NULL) {
3303 		fgo = GET_OUTPAYLOAD(ps, pm, fuse_getxattr_out);
3304 
3305 		if (attrsize != NULL)
3306 			*attrsize = fgo->size;
3307 
3308 		ps->ps_destroy_msg(pm);
3309 		error = 0;
3310 		goto out;
3311 	}
3312 
3313 	/*
3314 	 * And with a non null requested size, we get the list just
3315 	 * after the header
3316 	 */
3317 	foh = GET_OUTHDR(ps, pm);
3318 	np = (char *)(void *)(foh + 1);
3319 
3320 	if (resid != NULL) {
3321 		len = MAX(foh->len - sizeof(*foh), *resid);
3322 		(void)memcpy(attr, np, len);
3323 		*resid -= len;
3324 	}
3325 
3326 	ps->ps_destroy_msg(pm);
3327 	error = 0;
3328 
3329 out:
3330 	node_rele(opc);
3331 	return error;
3332 }
3333 
3334 int
3335 perfuse_node_setextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
3336 	int attrns, const char *attrname, uint8_t *attr, size_t *resid,
3337 	const struct puffs_cred *pcr)
3338 {
3339 	struct perfuse_state *ps;
3340 	char fuse_attrname[LINUX_XATTR_NAME_MAX + 1];
3341 	perfuse_msg_t *pm;
3342 	struct fuse_setxattr_in *fsi;
3343 	size_t attrnamelen;
3344 	size_t len;
3345 	char *np;
3346 	int error;
3347 
3348 	node_ref(opc);
3349 	ps = puffs_getspecific(pu);
3350 	attrname = perfuse_native_ns(attrns, attrname, fuse_attrname);
3351 	attrnamelen = strlen(attrname) + 1;
3352 	len = sizeof(*fsi) + attrnamelen + *resid;
3353 
3354 	pm = ps->ps_new_msg(pu, opc, FUSE_SETXATTR, len, pcr);
3355 	fsi = GET_INPAYLOAD(ps, pm, fuse_setxattr_in);
3356 	fsi->size = (unsigned int)*resid;
3357 	fsi->flags = 0;
3358 	np = (char *)(void *)(fsi + 1);
3359 	(void)strlcpy(np, attrname, attrnamelen);
3360 	np += attrnamelen;
3361 	(void)memcpy(np, (char *)attr, *resid);
3362 
3363 	if ((error = xchg_msg(pu, opc, pm,
3364 			      NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0)
3365 		goto out;
3366 
3367 	ps->ps_destroy_msg(pm);
3368 	*resid = 0;
3369 	error = 0;
3370 
3371 out:
3372 	node_rele(opc);
3373 	return error;
3374 }
3375 
3376 /* ARGSUSED2 */
3377 int
3378 perfuse_node_listextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
3379 	int attrns, size_t *attrsize, uint8_t *attrs, size_t *resid, int flag,
3380 	const struct puffs_cred *pcr)
3381 {
3382 	struct perfuse_state *ps;
3383 	perfuse_msg_t *pm;
3384 	struct fuse_getxattr_in *fgi;
3385 	struct fuse_getxattr_out *fgo;
3386 	struct fuse_out_header *foh;
3387 	char *np;
3388 	size_t len, puffs_len;
3389 	int error;
3390 
3391 	node_ref(opc);
3392 
3393 	ps = puffs_getspecific(pu);
3394 	len = sizeof(*fgi);
3395 
3396 	pm = ps->ps_new_msg(pu, opc, FUSE_LISTXATTR, len, pcr);
3397 	fgi = GET_INPAYLOAD(ps, pm, fuse_getxattr_in);
3398 	if (resid != NULL)
3399 		fgi->size = (unsigned int)*resid;
3400 	else
3401 		fgi->size = 0;
3402 
3403 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
3404 		goto out;
3405 
3406 	/*
3407 	 * We just get fuse_getattr_out with list size if we requested
3408 	 * a null size.
3409 	 */
3410 	if (resid == NULL) {
3411 		fgo = GET_OUTPAYLOAD(ps, pm, fuse_getxattr_out);
3412 
3413 		if (attrsize != NULL)
3414 			*attrsize = fgo->size;
3415 
3416 		ps->ps_destroy_msg(pm);
3417 
3418 		error = 0;
3419 		goto out;
3420 	}
3421 
3422 	/*
3423 	 * And with a non null requested size, we get the list just
3424 	 * after the header
3425 	 */
3426 	foh = GET_OUTHDR(ps, pm);
3427 	np = (char *)(void *)(foh + 1);
3428 	puffs_len = foh->len - sizeof(*foh);
3429 
3430 	if (attrs != NULL) {
3431 #ifdef PUFFS_EXTATTR_LIST_LENPREFIX
3432 		/*
3433 		 * Convert the FUSE reply to length prefixed strings
3434 		 * if this is what the kernel wants.
3435 		 */
3436 		if (flag & PUFFS_EXTATTR_LIST_LENPREFIX) {
3437 			size_t i, attrlen;
3438 
3439 			for (i = 0; i < puffs_len; i += attrlen + 1) {
3440 				attrlen = strlen(np + i);
3441 				(void)memmove(np + i + 1, np + i, attrlen);
3442 				*(np + i) = (uint8_t)attrlen;
3443 			}
3444 		}
3445 #endif /* PUFFS_EXTATTR_LIST_LENPREFIX */
3446 		(void)memcpy(attrs, np, puffs_len);
3447 		*resid -= puffs_len;
3448 	}
3449 
3450 	if (attrsize != NULL)
3451 		*attrsize = puffs_len;
3452 
3453 	ps->ps_destroy_msg(pm);
3454 	error = 0;
3455 
3456 out:
3457 	node_rele(opc);
3458 	return error;
3459 }
3460 
3461 int
3462 perfuse_node_deleteextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
3463 	int attrns, const char *attrname, const struct puffs_cred *pcr)
3464 {
3465 	struct perfuse_state *ps;
3466 	char fuse_attrname[LINUX_XATTR_NAME_MAX + 1];
3467 	perfuse_msg_t *pm;
3468 	size_t attrnamelen;
3469 	char *np;
3470 	int error;
3471 
3472 	node_ref(opc);
3473 
3474 	ps = puffs_getspecific(pu);
3475 	attrname = perfuse_native_ns(attrns, attrname, fuse_attrname);
3476 	attrnamelen = strlen(attrname) + 1;
3477 
3478 	pm = ps->ps_new_msg(pu, opc, FUSE_REMOVEXATTR, attrnamelen, pcr);
3479 	np = _GET_INPAYLOAD(ps, pm, char *);
3480 	(void)strlcpy(np, attrname, attrnamelen);
3481 
3482 	error = xchg_msg(pu, opc, pm, NO_PAYLOAD_REPLY_LEN, wait_reply);
3483 
3484 	ps->ps_destroy_msg(pm);
3485 
3486 	node_rele(opc);
3487 	return error;
3488 }
3489