xref: /netbsd-src/lib/libperfuse/ops.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /*  $NetBSD: ops.c,v 1.63 2014/01/06 08:56:34 manu Exp $ */
2 
3 /*-
4  *  Copyright (c) 2010-2011 Emmanuel Dreyfus. All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *     notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *  THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
16  *  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
17  *  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18  *  PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
19  *  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25  *  POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <stdio.h>
29 #include <unistd.h>
30 #include <stdlib.h>
31 #include <libgen.h>
32 #include <errno.h>
33 #include <err.h>
34 #include <sysexits.h>
35 #include <syslog.h>
36 #include <puffs.h>
37 #include <sys/socket.h>
38 #include <sys/socket.h>
39 #include <sys/extattr.h>
40 #include <sys/time.h>
41 #include <machine/vmparam.h>
42 
43 #include "perfuse_priv.h"
44 #include "fuse.h"
45 
46 extern int perfuse_diagflags;
47 
48 #if 0
49 static void print_node(const char *, puffs_cookie_t);
50 #endif
51 #ifdef PUFFS_KFLAG_CACHE_FS_TTL
52 static void perfuse_newinfo_setttl(struct puffs_newinfo *,
53     struct puffs_node *, struct fuse_entry_out *, struct fuse_attr_out *);
54 #endif /* PUFFS_KFLAG_CACHE_FS_TTL */
55 static int xchg_msg(struct puffs_usermount *, puffs_cookie_t,
56     perfuse_msg_t *, size_t, enum perfuse_xchg_pb_reply);
57 static int mode_access(puffs_cookie_t, const struct puffs_cred *, mode_t);
58 static int sticky_access(puffs_cookie_t, struct puffs_node *,
59     const struct puffs_cred *);
60 static void fuse_attr_to_vap(struct perfuse_state *,
61     struct vattr *, struct fuse_attr *);
62 static int node_lookup_common(struct puffs_usermount *, puffs_cookie_t,
63     struct puffs_newinfo *, const char *, const struct puffs_cred *,
64     struct puffs_node **);
65 static int node_mk_common(struct puffs_usermount *, puffs_cookie_t,
66     struct puffs_newinfo *, const struct puffs_cn *pcn, perfuse_msg_t *);
67 static uint64_t readdir_last_cookie(struct fuse_dirent *, size_t);
68 static ssize_t fuse_to_dirent(struct puffs_usermount *, puffs_cookie_t,
69     struct fuse_dirent *, size_t);
70 static void readdir_buffered(puffs_cookie_t, struct dirent *, off_t *,
71     size_t *);
72 static void node_ref(puffs_cookie_t);
73 static void node_rele(puffs_cookie_t);
74 static void requeue_request(struct puffs_usermount *,
75     puffs_cookie_t opc, enum perfuse_qtype);
76 static int dequeue_requests(puffs_cookie_t opc, enum perfuse_qtype, int);
77 #define DEQUEUE_ALL 0
78 
79 /*
80  *  From <sys/vnode>, inside #ifdef _KERNEL section
81  */
82 #define IO_SYNC		(0x40|IO_DSYNC)
83 #define IO_DSYNC	0x00200
84 #define IO_DIRECT	0x02000
85 
86 /*
87  *  From <fcntl>, inside #ifdef _KERNEL section
88  */
89 #define F_WAIT		0x010
90 #define F_FLOCK		0x020
91 #define OFLAGS(fflags)  ((fflags) - 1)
92 
93 /*
94  * Borrowed from src/sys/kern/vfs_subr.c and src/sys/sys/vnode.h
95  */
96 const enum vtype iftovt_tab[16] = {
97 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
98         VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
99 };
100 const int vttoif_tab[9] = {
101 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
102         S_IFSOCK, S_IFIFO, S_IFMT,
103 };
104 
105 #define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12])
106 #define VTTOIF(indx) (vttoif_tab[(int)(indx)])
107 
108 #if 0
109 static void
110 print_node(const char *func, puffs_cookie_t opc)
111 {
112 	struct puffs_node *pn;
113 	struct perfuse_node_data *pnd;
114 	struct vattr *vap;
115 
116 	pn = (struct puffs_node *)opc;
117 	pnd = PERFUSE_NODE_DATA(opc);
118 	vap = &pn->pn_va;
119 
120 	printf("%s: \"%s\", opc = %p, nodeid = 0x%"PRIx64" ino = %"PRIu64"\n",
121 	       func, pnd->pnd_name, opc, pnd->pnd_nodeid, vap->va_fileid);
122 
123 	return;
124 }
125 #endif /* PERFUSE_DEBUG */
126 
127 int
128 perfuse_node_close_common(struct puffs_usermount *pu, puffs_cookie_t opc,
129 	int mode)
130 {
131 	struct perfuse_state *ps;
132 	perfuse_msg_t *pm;
133 	int op;
134 	uint64_t fh;
135 	struct fuse_release_in *fri;
136 	struct perfuse_node_data *pnd;
137 	struct puffs_node *pn;
138 	int error;
139 
140 	ps = puffs_getspecific(pu);
141 	pn = (struct puffs_node *)opc;
142 	pnd = PERFUSE_NODE_DATA(pn);
143 
144 	if (puffs_pn_getvap(pn)->va_type == VDIR) {
145 		op = FUSE_RELEASEDIR;
146 		mode = FREAD;
147 	} else {
148 		op = FUSE_RELEASE;
149 	}
150 
151 	/*
152 	 * Destroy the filehandle before sending the
153 	 * request to the FUSE filesystem, otherwise
154 	 * we may get a second close() while we wait
155 	 * for the reply, and we would end up closing
156 	 * the same fh twice instead of closng both.
157 	 */
158 	fh = perfuse_get_fh(opc, mode);
159 	perfuse_destroy_fh(pn, fh);
160 
161 	/*
162 	 * release_flags may be set to FUSE_RELEASE_FLUSH
163 	 * to flush locks. lock_owner must be set in that case
164 	 *
165 	 * ps_new_msg() is called with NULL creds, which will
166 	 * be interpreted as FUSE superuser. We come here from the
167 	 * inactive method, which provides no creds, but obviously
168 	 * runs with kernel privilege.
169 	 */
170 	pm = ps->ps_new_msg(pu, opc, op, sizeof(*fri), NULL);
171 	fri = GET_INPAYLOAD(ps, pm, fuse_release_in);
172 	fri->fh = fh;
173 	fri->flags = 0;
174 	fri->release_flags = 0;
175 	fri->lock_owner = pnd->pnd_lock_owner;
176 	fri->flags = (fri->lock_owner != 0) ? FUSE_RELEASE_FLUSH : 0;
177 
178 #ifdef PERFUSE_DEBUG
179 	if (perfuse_diagflags & PDF_FH)
180 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", fh = 0x%"PRIx64"\n",
181 			 __func__, (void *)opc, pnd->pnd_nodeid, fri->fh);
182 #endif
183 
184 	if ((error = xchg_msg(pu, opc, pm,
185 			      NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0)
186 		DERRX(EX_SOFTWARE, "%s: freed fh = 0x%"PRIx64" but filesystem "
187 		      "returned error = %d", __func__, fh, error);
188 
189 	ps->ps_destroy_msg(pm);
190 
191 	return 0;
192 }
193 
194 static int
195 xchg_msg(struct puffs_usermount *pu, puffs_cookie_t opc, perfuse_msg_t *pm,
196 	size_t len, enum perfuse_xchg_pb_reply wait)
197 {
198 	struct perfuse_state *ps;
199 	struct perfuse_node_data *pnd;
200 	struct perfuse_trace *pt = NULL;
201 	int error;
202 
203 	ps = puffs_getspecific(pu);
204 	pnd = NULL;
205 	if ((struct puffs_node *)opc != NULL)
206 		pnd = PERFUSE_NODE_DATA(opc);
207 
208 #ifdef PERFUSE_DEBUG
209 	if ((perfuse_diagflags & PDF_FILENAME) && (opc != 0))
210 		DPRINTF("file = \"%s\", ino = %"PRIu64" flags = 0x%x\n",
211 			perfuse_node_path(ps, opc),
212 			((struct puffs_node *)opc)->pn_va.va_fileid,
213 			PERFUSE_NODE_DATA(opc)->pnd_flags);
214 #endif
215 	ps->ps_xchgcount++;
216 	if (pnd)
217 		pnd->pnd_inxchg++;
218 
219 	/*
220 	 * Record FUSE call start if requested
221 	 */
222 	if (perfuse_diagflags & PDF_TRACE)
223 		pt = perfuse_trace_begin(ps, opc, pm);
224 
225 	/*
226 	 * Do actual FUSE exchange
227 	 */
228 	if ((error = ps->ps_xchg_msg(pu, pm, len, wait)) != 0)
229 		ps->ps_destroy_msg(pm);
230 
231 	/*
232 	 * Record FUSE call end if requested
233 	 */
234 	if (pt != NULL)
235 		perfuse_trace_end(ps, pt, error);
236 
237 	ps->ps_xchgcount--;
238 	if (pnd) {
239 		pnd->pnd_inxchg--;
240 		(void)dequeue_requests(opc, PCQ_AFTERXCHG, DEQUEUE_ALL);
241 	}
242 
243 	return error;
244 }
245 
246 static int
247 mode_access(puffs_cookie_t opc, const struct puffs_cred *pcr, mode_t mode)
248 {
249 	struct puffs_node *pn;
250 	struct vattr *va;
251 
252 	/*
253 	 * pcr is NULL for self open through fsync or readdir.
254 	 * In both case, access control is useless, as it was
255 	 * done before, at open time.
256 	 */
257 	if (pcr == NULL)
258 		return 0;
259 
260 	pn = (struct puffs_node *)opc;
261 	va = puffs_pn_getvap(pn);
262 	return puffs_access(va->va_type, va->va_mode,
263 			    va->va_uid, va->va_gid,
264 			    mode, pcr);
265 }
266 
267 static int
268 sticky_access(puffs_cookie_t opc, struct puffs_node *targ,
269 	      const struct puffs_cred *pcr)
270 {
271 	uid_t uid;
272 	int sticky, owner;
273 
274 	/*
275 	 * This covers the case where the kernel requests a DELETE
276 	 * or RENAME on its own, and where puffs_cred_getuid would
277 	 * return -1. While such a situation should not happen,
278 	 * we allow it here.
279 	 *
280 	 * This also allows root to tamper with other users' files
281 	 * that have the sticky bit.
282 	 */
283 	if (puffs_cred_isjuggernaut(pcr))
284 		return 0;
285 
286 	if (puffs_cred_getuid(pcr, &uid) != 0)
287 		DERRX(EX_SOFTWARE, "puffs_cred_getuid fails in %s", __func__);
288 
289 	sticky = puffs_pn_getvap(opc)->va_mode & S_ISTXT;
290 	owner = puffs_pn_getvap(targ)->va_uid == uid;
291 
292 	if (sticky && !owner)
293 		return EACCES;
294 
295 	return 0;
296 }
297 
298 
299 static void
300 fuse_attr_to_vap(struct perfuse_state *ps, struct vattr *vap,
301 	struct fuse_attr *fa)
302 {
303 	vap->va_type = IFTOVT(fa->mode);
304 	vap->va_mode = fa->mode & ALLPERMS;
305 	vap->va_nlink = fa->nlink;
306 	vap->va_uid = fa->uid;
307 	vap->va_gid = fa->gid;
308 	vap->va_fsid = (long)ps->ps_fsid;
309 	vap->va_fileid = fa->ino;
310 	vap->va_size = fa->size;
311 	vap->va_blocksize = fa->blksize;
312 	vap->va_atime.tv_sec = (time_t)fa->atime;
313 	vap->va_atime.tv_nsec = (long) fa->atimensec;
314 	vap->va_mtime.tv_sec = (time_t)fa->mtime;
315 	vap->va_mtime.tv_nsec = (long)fa->mtimensec;
316 	vap->va_ctime.tv_sec = (time_t)fa->ctime;
317 	vap->va_ctime.tv_nsec = (long)fa->ctimensec;
318 	vap->va_birthtime.tv_sec = 0;
319 	vap->va_birthtime.tv_nsec = 0;
320 	vap->va_gen = 0;
321 	vap->va_flags = 0;
322 	vap->va_rdev = fa->rdev;
323 	vap->va_bytes = fa->size;
324 	vap->va_filerev = (u_quad_t)PUFFS_VNOVAL;
325 	vap->va_vaflags = 0;
326 
327 	if (vap->va_blocksize == 0)
328 		vap->va_blocksize = DEV_BSIZE;
329 
330 	if (vap->va_size == (size_t)PUFFS_VNOVAL) /* XXX */
331 		vap->va_size = 0;
332 
333 	return;
334 }
335 
336 #ifdef PUFFS_KFLAG_CACHE_FS_TTL
337 static void
338 perfuse_newinfo_setttl(struct puffs_newinfo *pni,
339     struct puffs_node *pn, struct fuse_entry_out *feo,
340     struct fuse_attr_out *fao)
341 {
342 #ifdef PERFUSE_DEBUG
343 	if ((feo == NULL) && (fao == NULL))
344 		DERRX(EX_SOFTWARE, "%s: feo and fao NULL", __func__);
345 
346 	if ((feo != NULL) && (fao != NULL))
347 		DERRX(EX_SOFTWARE, "%s: feo and fao != NULL", __func__);
348 #endif /* PERFUSE_DEBUG */
349 
350 	if (fao != NULL) {
351 		struct timespec va_ttl;
352 
353 		va_ttl.tv_sec = fao->attr_valid;
354 		va_ttl.tv_nsec = fao->attr_valid_nsec;
355 
356 		puffs_newinfo_setvattl(pni, &va_ttl);
357 	}
358 
359 	if (feo != NULL) {
360 		struct timespec va_ttl;
361 		struct timespec cn_ttl;
362 		struct timespec now;
363 		struct perfuse_node_data *pnd = PERFUSE_NODE_DATA(pn);
364 
365 		va_ttl.tv_sec = feo->attr_valid;
366 		va_ttl.tv_nsec = feo->attr_valid_nsec;
367 		cn_ttl.tv_sec = feo->entry_valid;
368 		cn_ttl.tv_nsec = feo->entry_valid_nsec;
369 
370 		puffs_newinfo_setvattl(pni, &va_ttl);
371 		puffs_newinfo_setcnttl(pni, &cn_ttl);
372 
373 		if (clock_gettime(CLOCK_REALTIME, &now) != 0)
374 			DERR(EX_OSERR, "clock_gettime failed");
375 
376                 timespecadd(&now, &cn_ttl, &pnd->pnd_cn_expire);
377 	}
378 
379 	return;
380 }
381 #endif /* PUFFS_KFLAG_CACHE_FS_TTL */
382 
383 static int
384 node_lookup_common(struct puffs_usermount *pu, puffs_cookie_t opc,
385 	struct puffs_newinfo *pni, const char *path,
386 	const struct puffs_cred *pcr, struct puffs_node **pnp)
387 {
388 	struct perfuse_state *ps;
389 	struct perfuse_node_data *oldpnd;
390 	perfuse_msg_t *pm;
391 	struct fuse_entry_out *feo;
392 	struct puffs_node *pn;
393 	size_t len;
394 	int error;
395 
396 	/*
397 	 * Prevent further lookups if the parent was removed
398 	 */
399 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
400 		return ESTALE;
401 
402 	if (pnp == NULL)
403 		DERRX(EX_SOFTWARE, "pnp must be != NULL");
404 
405 	ps = puffs_getspecific(pu);
406 
407 #ifdef PERFUSE_DEBUG
408 	if (perfuse_diagflags & PDF_FILENAME)
409 		DPRINTF("%s: opc = %p, file = \"%s\" looking up \"%s\"\n",
410 			__func__, (void *)opc,
411 			perfuse_node_path(ps, opc), path);
412 
413 	if (strcmp(path, ".") == 0)
414 		DERRX(EX_SOFTWARE, "unexpected dot-lookup");
415 
416 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_RECLAIMED)
417 		DERRX(EX_SOFTWARE,
418 		      "looking up reclaimed node opc = %p, name = \"%s\"",
419 		      opc, path);
420 
421 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_INVALID)
422 		DERRX(EX_SOFTWARE,
423 		      "looking up freed node opc = %p, name = \"%s\"",
424 		      opc, path);
425 #endif /* PERFUSE_DEBUG */
426 
427 	len = strlen(path) + 1;
428 	pm = ps->ps_new_msg(pu, opc, FUSE_LOOKUP, len, pcr);
429 	(void)strlcpy(_GET_INPAYLOAD(ps, pm, char *), path, len);
430 
431 	if ((error = xchg_msg(pu, opc, pm, sizeof(*feo), wait_reply)) != 0)
432 		return error;
433 
434 	feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
435 
436 	/*
437 	 * Starting with ABI 7.4, inode number 0 means ENOENT,
438 	 * with entry_valid / entry_valid_nsec giving negative
439 	 * cache timeout (which we do not implement yet).
440 	 */
441 	if (feo->attr.ino == 0) {
442 		ps->ps_destroy_msg(pm);
443 		return ENOENT;
444 	}
445 
446 	/*
447 	 * Check for a known node, not reclaimed, with another name.
448 	 * It may have been moved, or we can lookup ../
449 	 */
450 	if (((oldpnd = perfuse_node_bynodeid(ps, feo->nodeid)) != NULL) &&
451 	    !(oldpnd->pnd_flags & PND_RECLAIMED)) {
452 		/*
453 		 * Save the new node name if not ..
454 		 */
455 		if (strncmp(path, "..", len) != 0)
456 			(void)strlcpy(oldpnd->pnd_name,
457 				      path, MAXPATHLEN);
458 		pn = oldpnd->pnd_pn;
459 
460 	} else {
461 		pn = perfuse_new_pn(pu, path, opc);
462 		PERFUSE_NODE_DATA(pn)->pnd_nodeid = feo->nodeid;
463 		perfuse_node_cache(ps, pn);
464 	}
465 
466 #ifdef PERFUSE_DEBUG
467 	if (PERFUSE_NODE_DATA(pn)->pnd_flags & PND_RECLAIMED)
468 		DERRX(EX_SOFTWARE,
469 		      "reclaimed in lookup opc = %p, name = \"%s\", ck = %p",
470 		      opc, path, pn);
471 
472 	if (PERFUSE_NODE_DATA(pn)->pnd_flags & PND_INVALID)
473 		DERRX(EX_SOFTWARE,
474 		      "freed in lookup opc = %p, name = \"%s\", ck = %p",
475 		      opc, path, pn);
476 #endif /* PERFUSE_DEBUG */
477 
478 	fuse_attr_to_vap(ps, &pn->pn_va, &feo->attr);
479 	pn->pn_va.va_gen = (u_long)(feo->generation);
480 	PERFUSE_NODE_DATA(pn)->pnd_fuse_nlookup++;
481 
482 	*pnp = pn;
483 
484 #ifdef PERFUSE_DEBUG
485 	if (perfuse_diagflags & PDF_FILENAME)
486 		DPRINTF("%s: opc = %p, looked up opc = %p, "
487 			"nodeid = 0x%"PRIx64" file = \"%s\"\n", __func__,
488 			(void *)opc, pn, feo->nodeid, path);
489 #endif
490 
491 	if (pni != NULL) {
492 #ifdef PUFFS_KFLAG_CACHE_FS_TTL
493 		puffs_newinfo_setva(pni, &pn->pn_va);
494 		perfuse_newinfo_setttl(pni, pn, feo, NULL);
495 #endif /* PUFFS_KFLAG_CACHE_FS_TTL */
496 		puffs_newinfo_setcookie(pni, pn);
497 		puffs_newinfo_setvtype(pni, pn->pn_va.va_type);
498 		puffs_newinfo_setsize(pni, (voff_t)pn->pn_va.va_size);
499 		puffs_newinfo_setrdev(pni, pn->pn_va.va_rdev);
500 	}
501 
502 	if (PERFUSE_NODE_DATA(pn)->pnd_flags & PND_NODELEAK) {
503 		PERFUSE_NODE_DATA(pn)->pnd_flags &= ~PND_NODELEAK;
504 		ps->ps_nodeleakcount--;
505 	}
506 
507 	ps->ps_destroy_msg(pm);
508 
509 	return 0;
510 }
511 
512 
513 /*
514  * Common code for methods that create objects:
515  * perfuse_node_mkdir
516  * perfuse_node_mknod
517  * perfuse_node_symlink
518  */
519 static int
520 node_mk_common(struct puffs_usermount *pu, puffs_cookie_t opc,
521 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
522 	perfuse_msg_t *pm)
523 {
524 	struct perfuse_state *ps;
525 	struct puffs_node *pn;
526 	struct fuse_entry_out *feo;
527 	int error;
528 
529 	ps =  puffs_getspecific(pu);
530 
531 	if ((error = xchg_msg(pu, opc, pm, sizeof(*feo), wait_reply)) != 0)
532 		return error;
533 
534 	feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
535 	if (feo->nodeid == PERFUSE_UNKNOWN_NODEID)
536 		DERRX(EX_SOFTWARE, "%s: no nodeid", __func__);
537 
538 	pn = perfuse_new_pn(pu, pcn->pcn_name, opc);
539 	PERFUSE_NODE_DATA(pn)->pnd_nodeid = feo->nodeid;
540 	PERFUSE_NODE_DATA(pn)->pnd_puffs_nlookup++;
541 	perfuse_node_cache(ps, pn);
542 
543 	fuse_attr_to_vap(ps, &pn->pn_va, &feo->attr);
544 	pn->pn_va.va_gen = (u_long)(feo->generation);
545 
546 	puffs_newinfo_setcookie(pni, pn);
547 #ifdef PUFFS_KFLAG_CACHE_FS_TTL
548 	puffs_newinfo_setva(pni, &pn->pn_va);
549 	perfuse_newinfo_setttl(pni, pn, feo, NULL);
550 #endif /* PUFFS_KFLAG_CACHE_FS_TTL */
551 
552 
553 #ifdef PERFUSE_DEBUG
554 	if (perfuse_diagflags & PDF_FILENAME)
555 		DPRINTF("%s: opc = %p, file = \"%s\", flags = 0x%x "
556 			"nodeid = 0x%"PRIx64"\n",
557 			__func__, (void *)pn, pcn->pcn_name,
558 			PERFUSE_NODE_DATA(pn)->pnd_flags, feo->nodeid);
559 #endif
560 	ps->ps_destroy_msg(pm);
561 
562 	/* Parents is now dirty */
563 	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
564 
565 	return 0;
566 }
567 
568 static uint64_t
569 readdir_last_cookie(struct fuse_dirent *fd, size_t fd_len)
570 {
571 	size_t len;
572 	size_t seen = 0;
573 	char *ndp;
574 
575 	do {
576 		len = FUSE_DIRENT_ALIGN(sizeof(*fd) + fd->namelen);
577 		seen += len;
578 
579 		if (seen >= fd_len)
580 			break;
581 
582 		ndp = (char *)(void *)fd + (size_t)len;
583 		fd = (struct fuse_dirent *)(void *)ndp;
584 	} while (1 /* CONSTCOND */);
585 
586 	return fd->off;
587 }
588 
589 static ssize_t
590 fuse_to_dirent(struct puffs_usermount *pu, puffs_cookie_t opc,
591 	struct fuse_dirent *fd, size_t fd_len)
592 {
593 	struct dirent *dents;
594 	size_t dents_len;
595 	ssize_t written;
596 	uint64_t fd_offset;
597 	struct fuse_dirent *fd_base;
598 	size_t len;
599 
600 	fd_base = fd;
601 	fd_offset = 0;
602 	written = 0;
603 	dents = PERFUSE_NODE_DATA(opc)->pnd_dirent;
604 	dents_len = (size_t)PERFUSE_NODE_DATA(opc)->pnd_dirent_len;
605 
606 	do {
607 		char *ndp;
608 		size_t reclen;
609 
610 		reclen = _DIRENT_RECLEN(dents, fd->namelen);
611 
612 		/*
613 		 * Check we do not overflow the output buffer
614 		 * struct fuse_dirent is bigger than struct dirent,
615 		 * so we should always use fd_len and never reallocate
616 		 * later.
617 		 * If we have to reallocate,try to double the buffer
618 		 * each time so that we do not have to do it too often.
619 		 */
620 		if (written + reclen > dents_len) {
621 			if (dents_len == 0)
622 				dents_len = fd_len;
623 			else
624 				dents_len =
625 				   MAX(2 * dents_len, written + reclen);
626 
627 			dents = PERFUSE_NODE_DATA(opc)->pnd_dirent;
628 			if ((dents = realloc(dents, dents_len)) == NULL)
629 				DERR(EX_OSERR, "%s: malloc failed", __func__);
630 
631 			PERFUSE_NODE_DATA(opc)->pnd_dirent = dents;
632 			PERFUSE_NODE_DATA(opc)->pnd_dirent_len = dents_len;
633 
634 			/*
635 			 * (void *) for delint
636 			 */
637 			ndp = (char *)(void *)dents + written;
638 			dents = (struct dirent *)(void *)ndp;
639 		}
640 
641 		/*
642 		 * Filesystem was mounted without -o use_ino
643 		 * Perform a lookup to find it.
644 		 */
645 		if (fd->ino == PERFUSE_UNKNOWN_INO) {
646 			struct puffs_node *pn;
647 			struct perfuse_node_data *pnd = PERFUSE_NODE_DATA(opc);
648 
649 			/*
650 			 * Avoid breaking out of fs
651 			 * by lookup to .. on root
652 			 */
653 			if ((strcmp(fd->name, "..") == 0) &&
654 			    (pnd->pnd_nodeid == FUSE_ROOT_ID)) {
655 				fd->ino = FUSE_ROOT_ID;
656 			} else {
657 				if (node_lookup_common(pu, opc, NULL, fd->name,
658 						       NULL, &pn) != 0) {
659 					DWARNX("node_lookup_common failed");
660 				} else {
661 					fd->ino = pn->pn_va.va_fileid;
662 					(void)perfuse_node_reclaim(pu, pn);
663 				}
664 			}
665 		}
666 
667 		dents->d_fileno = fd->ino;
668 		dents->d_reclen = (unsigned short)reclen;
669 		dents->d_namlen = fd->namelen;
670 		dents->d_type = fd->type;
671 		strlcpy(dents->d_name, fd->name, fd->namelen + 1);
672 
673 #ifdef PERFUSE_DEBUG
674 		if (perfuse_diagflags & PDF_READDIR)
675 			DPRINTF("%s: translated \"%s\" ino = %"PRIu64"\n",
676 				__func__, dents->d_name, dents->d_fileno);
677 #endif
678 
679 		dents = _DIRENT_NEXT(dents);
680 		written += reclen;
681 
682 		/*
683 		 * Move to the next record.
684 		 * fd->off is not the offset, it is an opaque cookie
685 		 * given by the filesystem to keep state across multiple
686 		 * readdir() operation.
687 		 * Use record alignement instead.
688 		 */
689 		len = FUSE_DIRENT_ALIGN(sizeof(*fd) + fd->namelen);
690 #ifdef PERFUSE_DEBUG
691 		if (perfuse_diagflags & PDF_READDIR)
692 			DPRINTF("%s: record at %"PRId64"/0x%"PRIx64" "
693 				"length = %zd/0x%zx. "
694 				"next record at %"PRId64"/0x%"PRIx64" "
695 				"max %zd/0x%zx\n",
696 				__func__, fd_offset, fd_offset, len, len,
697 				fd_offset + len, fd_offset + len,
698 				fd_len, fd_len);
699 #endif
700 		fd_offset += len;
701 
702 		/*
703 		 * Check if next record is still within the packet
704 		 * If it is not, we reached the end of the buffer.
705 		 */
706 		if (fd_offset >= fd_len)
707 			break;
708 
709 		/*
710 		 * (void *) for delint
711 		 */
712 		ndp = (char *)(void *)fd_base + (size_t)fd_offset;
713 		fd = (struct fuse_dirent *)(void *)ndp;
714 
715 	} while (1 /* CONSTCOND */);
716 
717 	/*
718 	 * Adjust the dirent output length
719 	 */
720 	if (written != -1)
721 		PERFUSE_NODE_DATA(opc)->pnd_dirent_len = written;
722 
723 	return written;
724 }
725 
726 static void
727 readdir_buffered(puffs_cookie_t opc, struct dirent *dent, off_t *readoff,
728 	size_t *reslen)
729 {
730 	struct dirent *fromdent;
731 	struct perfuse_node_data *pnd;
732 	char *ndp;
733 
734 	pnd = PERFUSE_NODE_DATA(opc);
735 
736 	while (*readoff < pnd->pnd_dirent_len) {
737 		/*
738 		 * (void *) for delint
739 		 */
740 		ndp = (char *)(void *)pnd->pnd_dirent + (size_t)*readoff;
741 		fromdent = (struct dirent *)(void *)ndp;
742 
743 		if (*reslen < _DIRENT_SIZE(fromdent))
744 			break;
745 
746 		memcpy(dent, fromdent, _DIRENT_SIZE(fromdent));
747 		*readoff += _DIRENT_SIZE(fromdent);
748 		*reslen -= _DIRENT_SIZE(fromdent);
749 
750 		dent = _DIRENT_NEXT(dent);
751 	}
752 
753 #ifdef PERFUSE_DEBUG
754 	if (perfuse_diagflags & PDF_READDIR)
755 		DPRINTF("%s: readoff = %"PRId64",  "
756 			"pnd->pnd_dirent_len = %"PRId64"\n",
757 			__func__, *readoff, pnd->pnd_dirent_len);
758 #endif
759 	if (*readoff >=  pnd->pnd_dirent_len) {
760 		free(pnd->pnd_dirent);
761 		pnd->pnd_dirent = NULL;
762 		pnd->pnd_dirent_len = 0;
763 	}
764 
765 	return;
766 }
767 
768 
769 static void
770 node_ref(puffs_cookie_t opc)
771 {
772 	struct perfuse_node_data *pnd = PERFUSE_NODE_DATA(opc);
773 
774 #ifdef PERFUSE_DEBUG
775 	if (pnd->pnd_flags & PND_INVALID)
776 		DERRX(EX_SOFTWARE, "Use of freed node opc = %p", opc);
777 #endif /* PERFUSE_DEBUG */
778 
779 	pnd->pnd_ref++;
780 	return;
781 }
782 
783 static void
784 node_rele(puffs_cookie_t opc)
785 {
786 	struct perfuse_node_data *pnd = PERFUSE_NODE_DATA(opc);
787 
788 #ifdef PERFUSE_DEBUG
789 	if (pnd->pnd_flags & PND_INVALID)
790 		DERRX(EX_SOFTWARE, "Use of freed node opc = %p", opc);
791 #endif /* PERFUSE_DEBUG */
792 
793 	pnd->pnd_ref--;
794 
795 	if (pnd->pnd_ref == 0)
796 		(void)dequeue_requests(opc, PCQ_REF, DEQUEUE_ALL);
797 
798 	return;
799 }
800 
801 static void
802 requeue_request(struct puffs_usermount *pu, puffs_cookie_t opc,
803 	enum perfuse_qtype type)
804 {
805 	struct perfuse_cc_queue pcq;
806 	struct perfuse_node_data *pnd;
807 #ifdef PERFUSE_DEBUG
808 	struct perfuse_state *ps;
809 
810 	ps = perfuse_getspecific(pu);
811 #endif
812 
813 	pnd = PERFUSE_NODE_DATA(opc);
814 	pcq.pcq_type = type;
815 	pcq.pcq_cc = puffs_cc_getcc(pu);
816 	TAILQ_INSERT_TAIL(&pnd->pnd_pcq, &pcq, pcq_next);
817 
818 #ifdef PERFUSE_DEBUG
819 	if (perfuse_diagflags & PDF_REQUEUE)
820 		DPRINTF("%s: REQUEUE opc = %p, pcc = %p (%s)\n",
821 		        __func__, (void *)opc, pcq.pcq_cc,
822 			perfuse_qtypestr[type]);
823 #endif
824 
825 	puffs_cc_yield(pcq.pcq_cc);
826 	TAILQ_REMOVE(&pnd->pnd_pcq, &pcq, pcq_next);
827 
828 #ifdef PERFUSE_DEBUG
829 	if (perfuse_diagflags & PDF_REQUEUE)
830 		DPRINTF("%s: RESUME opc = %p, pcc = %p (%s)\n",
831 		        __func__, (void *)opc, pcq.pcq_cc,
832 			perfuse_qtypestr[type]);
833 #endif
834 
835 	return;
836 }
837 
838 static int
839 dequeue_requests(puffs_cookie_t opc, enum perfuse_qtype type, int max)
840 {
841 	struct perfuse_cc_queue *pcq;
842 	struct perfuse_node_data *pnd;
843 	int dequeued;
844 
845 	pnd = PERFUSE_NODE_DATA(opc);
846 	dequeued = 0;
847 	TAILQ_FOREACH(pcq, &pnd->pnd_pcq, pcq_next) {
848 		if (pcq->pcq_type != type)
849 			continue;
850 
851 #ifdef PERFUSE_DEBUG
852 		if (perfuse_diagflags & PDF_REQUEUE)
853 			DPRINTF("%s: SCHEDULE opc = %p, pcc = %p (%s)\n",
854 				__func__, (void *)opc, pcq->pcq_cc,
855 				 perfuse_qtypestr[type]);
856 #endif
857 		puffs_cc_schedule(pcq->pcq_cc);
858 
859 		if (++dequeued == max)
860 			break;
861 	}
862 
863 #ifdef PERFUSE_DEBUG
864 	if (perfuse_diagflags & PDF_REQUEUE)
865 		DPRINTF("%s: DONE  opc = %p\n", __func__, (void *)opc);
866 #endif
867 
868 	return dequeued;
869 }
870 
871 void
872 perfuse_fs_init(struct puffs_usermount *pu)
873 {
874 	struct perfuse_state *ps;
875 	perfuse_msg_t *pm;
876 	struct fuse_init_in *fii;
877 	struct fuse_init_out *fio;
878 	int error;
879 
880 	ps = puffs_getspecific(pu);
881 
882         if (puffs_mount(pu, ps->ps_target, ps->ps_mountflags, ps->ps_root) != 0)
883                 DERR(EX_OSERR, "%s: puffs_mount failed", __func__);
884 
885 	/*
886 	 * Linux 2.6.34.1 sends theses flags:
887 	 * FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC
888 	 * FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK
889 	 *
890 	 * Linux also sets max_readahead at 32 pages (128 kB)
891 	 *
892 	 * ps_new_msg() is called with NULL creds, which will
893 	 * be interpreted as FUSE superuser.
894 	 */
895 	pm = ps->ps_new_msg(pu, 0, FUSE_INIT, sizeof(*fii), NULL);
896 	fii = GET_INPAYLOAD(ps, pm, fuse_init_in);
897 	fii->major = FUSE_KERNEL_VERSION;
898 	fii->minor = FUSE_KERNEL_MINOR_VERSION;
899 	fii->max_readahead = (unsigned int)(32 * sysconf(_SC_PAGESIZE));
900 	fii->flags = (FUSE_ASYNC_READ|FUSE_POSIX_LOCKS|FUSE_ATOMIC_O_TRUNC);
901 
902 	if ((error = xchg_msg(pu, 0, pm, sizeof(*fio), wait_reply)) != 0)
903 		DERRX(EX_SOFTWARE, "init message exchange failed (%d)", error);
904 
905 	fio = GET_OUTPAYLOAD(ps, pm, fuse_init_out);
906 	ps->ps_max_readahead = fio->max_readahead;
907 	ps->ps_max_write = fio->max_write;
908 
909 	ps->ps_destroy_msg(pm);
910 
911 	return;
912 }
913 
914 int
915 perfuse_fs_unmount(struct puffs_usermount *pu, int flags)
916 {
917 	perfuse_msg_t *pm;
918 	struct perfuse_state *ps;
919 	puffs_cookie_t opc;
920 	int error;
921 
922 	ps = puffs_getspecific(pu);
923 	opc = (puffs_cookie_t)puffs_getroot(pu);
924 
925 	/*
926 	 * ps_new_msg() is called with NULL creds, which will
927 	 * be interpreted as FUSE superuser.
928 	 */
929 	pm = ps->ps_new_msg(pu, opc, FUSE_DESTROY, 0, NULL);
930 
931 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0){
932 		DWARN("unmount %s", ps->ps_target);
933 		if (!(flags & MNT_FORCE))
934 			return error;
935 		else
936 			error = 0;
937 	} else {
938 		ps->ps_destroy_msg(pm);
939 	}
940 
941 	ps->ps_umount(pu);
942 
943 	if (perfuse_diagflags & PDF_MISC)
944 		DPRINTF("%s unmounted, exit\n", ps->ps_target);
945 
946 	return 0;
947 }
948 
949 int
950 perfuse_fs_statvfs(struct puffs_usermount *pu, struct statvfs *svfsb)
951 {
952 	struct perfuse_state *ps;
953 	perfuse_msg_t *pm;
954 	puffs_cookie_t opc;
955 	struct fuse_statfs_out *fso;
956 	int error;
957 
958 	ps = puffs_getspecific(pu);
959 	opc = (puffs_cookie_t)puffs_getroot(pu);
960 
961 	/*
962 	 * ps_new_msg() is called with NULL creds, which will
963 	 * be interpreted as FUSE superuser.
964 	 */
965 	pm = ps->ps_new_msg(pu, opc, FUSE_STATFS, 0, NULL);
966 
967 	if ((error = xchg_msg(pu, opc, pm, sizeof(*fso), wait_reply)) != 0)
968 		return error;
969 
970 	fso = GET_OUTPAYLOAD(ps, pm, fuse_statfs_out);
971 	svfsb->f_flag = ps->ps_mountflags;
972 	svfsb->f_bsize = fso->st.bsize;
973 	svfsb->f_frsize = fso->st.frsize;
974 	svfsb->f_iosize = ((struct puffs_node *)opc)->pn_va.va_blocksize;
975 	svfsb->f_blocks = fso->st.blocks;
976 	svfsb->f_bfree = fso->st.bfree;
977 	svfsb->f_bavail = fso->st.bavail;
978 	svfsb->f_bresvd = fso->st.bfree - fso->st.bavail;
979 	svfsb->f_files = fso->st.files;
980 	svfsb->f_ffree = fso->st.ffree;
981 	svfsb->f_favail = fso->st.ffree;/* files not reserved for root */
982 	svfsb->f_fresvd = 0;		/* files reserved for root */
983 
984 	svfsb->f_syncreads = ps->ps_syncreads;
985 	svfsb->f_syncwrites = ps->ps_syncwrites;
986 
987 	svfsb->f_asyncreads = ps->ps_asyncreads;
988 	svfsb->f_asyncwrites = ps->ps_asyncwrites;
989 
990 	(void)memcpy(&svfsb->f_fsidx, &ps->ps_fsid, sizeof(ps->ps_fsid));
991 	svfsb->f_fsid = (unsigned long)ps->ps_fsid;
992 	svfsb->f_namemax = MAXPATHLEN;	/* XXX */
993 	svfsb->f_owner = ps->ps_owner_uid;
994 
995 	(void)strlcpy(svfsb->f_mntonname, ps->ps_target, _VFS_NAMELEN);
996 
997 	if (ps->ps_filesystemtype != NULL)
998 		(void)strlcpy(svfsb->f_fstypename,
999 			      ps->ps_filesystemtype, _VFS_NAMELEN);
1000 	else
1001 		(void)strlcpy(svfsb->f_fstypename, "fuse", _VFS_NAMELEN);
1002 
1003 	if (ps->ps_source != NULL)
1004 		strlcpy(svfsb->f_mntfromname, ps->ps_source, _VFS_NAMELEN);
1005 	else
1006 		strlcpy(svfsb->f_mntfromname, _PATH_FUSE, _VFS_NAMELEN);
1007 
1008 	ps->ps_destroy_msg(pm);
1009 
1010 	return 0;
1011 }
1012 
1013 int
1014 perfuse_fs_sync(struct puffs_usermount *pu, int waitfor,
1015 	const struct puffs_cred *pcr)
1016 {
1017 	/*
1018 	 * FUSE does not seem to have a FS sync callback.
1019 	 * Maybe do not even register this callback
1020 	 */
1021 	return puffs_fsnop_sync(pu, waitfor, pcr);
1022 }
1023 
1024 /* ARGSUSED0 */
1025 int
1026 perfuse_fs_fhtonode(struct puffs_usermount *pu, void *fid, size_t fidsize,
1027 	struct puffs_newinfo *pni)
1028 {
1029 	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
1030 	return 0;
1031 }
1032 
1033 /* ARGSUSED0 */
1034 int
1035 perfuse_fs_nodetofh(struct puffs_usermount *pu, puffs_cookie_t cookie,
1036 	void *fid, size_t *fidsize)
1037 {
1038 	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
1039 	return 0;
1040 }
1041 
1042 #if 0
1043 /* ARGSUSED0 */
1044 void
1045 perfuse_fs_extattrctl(struct puffs_usermount *pu, int cmd,
1046 	puffs_cookie_t *cookie, int flags, int namespace, const char *attrname)
1047 {
1048 	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
1049 	return 0;
1050 }
1051 #endif /* 0 */
1052 
1053 /* ARGSUSED0 */
1054 void
1055 perfuse_fs_suspend(struct puffs_usermount *pu, int status)
1056 {
1057 	return;
1058 }
1059 
1060 
1061 int
1062 perfuse_node_lookup(struct puffs_usermount *pu, puffs_cookie_t opc,
1063 	struct puffs_newinfo *pni, const struct puffs_cn *pcn)
1064 {
1065 	struct perfuse_state *ps;
1066 	struct puffs_node *pn;
1067 	mode_t mode;
1068 	int error;
1069 
1070 	ps = puffs_getspecific(pu);
1071 	node_ref(opc);
1072 
1073 	/*
1074 	 * Check permissions
1075 	 */
1076 	switch(pcn->pcn_nameiop) {
1077 	case NAMEI_DELETE: /* FALLTHROUGH */
1078 	case NAMEI_RENAME: /* FALLTHROUGH */
1079 	case NAMEI_CREATE:
1080 		if (pcn->pcn_flags & NAMEI_ISLASTCN)
1081 			mode = PUFFS_VEXEC|PUFFS_VWRITE;
1082 		else
1083 			mode = PUFFS_VEXEC;
1084 		break;
1085 	case NAMEI_LOOKUP: /* FALLTHROUGH */
1086 	default:
1087 		mode = PUFFS_VEXEC;
1088 		break;
1089 	}
1090 
1091 	if ((error = mode_access(opc, pcn->pcn_cred, mode)) != 0)
1092 		goto out;
1093 
1094 	error = node_lookup_common(pu, (puffs_cookie_t)opc, pni,
1095 				   pcn->pcn_name, pcn->pcn_cred, &pn);
1096 
1097 	if (error != 0)
1098 		goto out;
1099 
1100 	/*
1101 	 * Kernel would kill us if the filesystem returned the parent
1102 	 * itself. If we want to live, hide that!
1103 	 */
1104 	if ((opc == (puffs_cookie_t)pn) && (strcmp(pcn->pcn_name, ".") != 0)) {
1105 		DERRX(EX_SOFTWARE, "lookup \"%s\" in \"%s\" returned parent",
1106 		      pcn->pcn_name, perfuse_node_path(ps, opc));
1107 		/* NOTREACHED */
1108 		error = ESTALE;
1109 		goto out;
1110 	}
1111 
1112 	/*
1113 	 * Removed node
1114 	 */
1115 	if (PERFUSE_NODE_DATA(pn)->pnd_flags & PND_REMOVED) {
1116 		error = ENOENT;
1117 		goto out;
1118 	}
1119 
1120 	/*
1121 	 * Check for sticky bit. Unfortunately there is no way to
1122 	 * do this before creating the puffs_node, since we require
1123 	 * this operation to get the node owner.
1124 	 */
1125 	switch (pcn->pcn_nameiop) {
1126 	case NAMEI_DELETE: /* FALLTHROUGH */
1127 	case NAMEI_RENAME:
1128 		error = sticky_access(opc, pn, pcn->pcn_cred);
1129 		if (error != 0) {
1130 			(void)perfuse_node_reclaim(pu, pn);
1131 			goto out;
1132 		}
1133 		break;
1134 	default:
1135 		break;
1136 	}
1137 
1138 	PERFUSE_NODE_DATA(pn)->pnd_puffs_nlookup++;
1139 
1140 	error = 0;
1141 
1142 out:
1143 	node_rele(opc);
1144 	return error;
1145 }
1146 
1147 int
1148 perfuse_node_create(struct puffs_usermount *pu, puffs_cookie_t opc,
1149 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
1150 	const struct vattr *vap)
1151 {
1152 	perfuse_msg_t *pm;
1153 	struct perfuse_state *ps;
1154 	struct fuse_create_in *fci;
1155 	struct fuse_entry_out *feo;
1156 	struct fuse_open_out *foo;
1157 	struct puffs_node *pn;
1158 	const char *name;
1159 	size_t namelen;
1160 	size_t len;
1161 	int error;
1162 
1163 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
1164 		return ENOENT;
1165 
1166 	node_ref(opc);
1167 
1168 	/*
1169 	 * If create is unimplemented: Check that it does not
1170 	 * already exists, and if not, do mknod and open
1171 	 */
1172 	ps = puffs_getspecific(pu);
1173 	if (ps->ps_flags & PS_NO_CREAT) {
1174 		error = node_lookup_common(pu, opc, NULL, pcn->pcn_name,
1175 					   pcn->pcn_cred, &pn);
1176 		if (error == 0)	{
1177 			(void)perfuse_node_reclaim(pu, pn);
1178 			error = EEXIST;
1179 			goto out;
1180 		}
1181 
1182 		error = perfuse_node_mknod(pu, opc, pni, pcn, vap);
1183 		if (error != 0)
1184 			goto out;
1185 
1186 		error = node_lookup_common(pu, opc, NULL, pcn->pcn_name,
1187 					   pcn->pcn_cred, &pn);
1188 		if (error != 0)
1189 			goto out;
1190 
1191 		/*
1192 		 * FUSE does the open at create time, while
1193 		 * NetBSD will open in a subsequent operation.
1194 		 * We need to open now, in order to retain FUSE
1195 		 * semantics. The calling process will not get
1196 		 * a file descriptor before the kernel sends
1197 		 * the open operation.
1198 		 */
1199 		error = perfuse_node_open(pu, (puffs_cookie_t)pn,
1200 					  FWRITE, pcn->pcn_cred);
1201 		goto out;
1202 	}
1203 
1204 	name = pcn->pcn_name;
1205 	namelen = pcn->pcn_namelen + 1;
1206 	len = sizeof(*fci) + namelen;
1207 
1208 	/*
1209 	 * flags should use O_WRONLY instead of O_RDWR, but it
1210 	 * breaks when the caller tries to read from file.
1211 	 *
1212 	 * mode must contain file type (ie: S_IFREG), use VTTOIF(vap->va_type)
1213 	 */
1214 	pm = ps->ps_new_msg(pu, opc, FUSE_CREATE, len, pcn->pcn_cred);
1215 	fci = GET_INPAYLOAD(ps, pm, fuse_create_in);
1216 	fci->flags = O_CREAT | O_TRUNC | O_RDWR;
1217 	fci->mode = vap->va_mode | VTTOIF(vap->va_type);
1218 	fci->umask = 0; 	/* Seems unused by libfuse */
1219 	(void)strlcpy((char*)(void *)(fci + 1), name, namelen);
1220 
1221 	len = sizeof(*feo) + sizeof(*foo);
1222 	if ((error = xchg_msg(pu, opc, pm, len, wait_reply)) != 0) {
1223 		/*
1224 		 * create is unimplmented, remember it for later,
1225 		 * and start over using mknod and open instead.
1226 		 */
1227 		if (error == ENOSYS) {
1228 			ps->ps_flags |= PS_NO_CREAT;
1229 			error = perfuse_node_create(pu, opc, pni, pcn, vap);
1230 		}
1231 
1232 		goto out;
1233 	}
1234 
1235 	feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
1236 	foo = (struct fuse_open_out *)(void *)(feo + 1);
1237 	if (feo->nodeid == PERFUSE_UNKNOWN_NODEID)
1238 		DERRX(EX_SOFTWARE, "%s: no nodeid", __func__);
1239 
1240 	/*
1241 	 * Save the file handle and inode in node private data
1242 	 * so that we can reuse it later
1243 	 */
1244 	pn = perfuse_new_pn(pu, name, opc);
1245 	perfuse_new_fh((puffs_cookie_t)pn, foo->fh, FWRITE);
1246 	PERFUSE_NODE_DATA(pn)->pnd_nodeid = feo->nodeid;
1247 	PERFUSE_NODE_DATA(pn)->pnd_puffs_nlookup++;
1248 	perfuse_node_cache(ps, pn);
1249 
1250 	fuse_attr_to_vap(ps, &pn->pn_va, &feo->attr);
1251 	pn->pn_va.va_gen = (u_long)(feo->generation);
1252 
1253 	puffs_newinfo_setcookie(pni, pn);
1254 #ifdef PUFFS_KFLAG_CACHE_FS_TTL
1255 	puffs_newinfo_setva(pni, &pn->pn_va);
1256 	perfuse_newinfo_setttl(pni, pn, feo, NULL);
1257 #endif /* PUFFS_KFLAG_CACHE_FS_TTL */
1258 
1259 #ifdef PERFUSE_DEBUG
1260 	if (perfuse_diagflags & (PDF_FH|PDF_FILENAME))
1261 		DPRINTF("%s: opc = %p, file = \"%s\", flags = 0x%x "
1262 			"nodeid = 0x%"PRIx64", wfh = 0x%"PRIx64"\n",
1263 			__func__, (void *)pn, pcn->pcn_name,
1264 			PERFUSE_NODE_DATA(pn)->pnd_flags, feo->nodeid,
1265 			foo->fh);
1266 #endif
1267 
1268 	ps->ps_destroy_msg(pm);
1269 	error = 0;
1270 
1271 out:
1272 	node_rele(opc);
1273 	return error;
1274 }
1275 
1276 
1277 int
1278 perfuse_node_mknod(struct puffs_usermount *pu, puffs_cookie_t opc,
1279 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
1280 	const struct vattr *vap)
1281 {
1282 	struct perfuse_state *ps;
1283 	perfuse_msg_t *pm;
1284 	struct fuse_mknod_in *fmi;
1285 	const char* path;
1286 	size_t len;
1287 	int error;
1288 
1289 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
1290 		return ENOENT;
1291 
1292 	node_ref(opc);
1293 
1294 	/*
1295 	 * Only superuser can mknod objects other than
1296 	 * directories, files, socks, fifo and links.
1297 	 *
1298 	 * Create an object require -WX permission in the parent directory
1299 	 */
1300 	switch (vap->va_type) {
1301 	case VDIR:	/* FALLTHROUGH */
1302 	case VREG:	/* FALLTHROUGH */
1303 	case VFIFO:	/* FALLTHROUGH */
1304 	case VSOCK:
1305 		break;
1306 	default:	/* VNON, VBLK, VCHR, VBAD */
1307 		if (!puffs_cred_isjuggernaut(pcn->pcn_cred)) {
1308 			error = EACCES;
1309 			goto out;
1310 		}
1311 		break;
1312 	}
1313 
1314 
1315 	ps = puffs_getspecific(pu);
1316 	path = pcn->pcn_name;
1317 	len = sizeof(*fmi) + pcn->pcn_namelen + 1;
1318 
1319 	/*
1320 	 * mode must contain file type (ie: S_IFREG), use VTTOIF(vap->va_type)
1321 	 */
1322 	pm = ps->ps_new_msg(pu, opc, FUSE_MKNOD, len, pcn->pcn_cred);
1323 	fmi = GET_INPAYLOAD(ps, pm, fuse_mknod_in);
1324 	fmi->mode = vap->va_mode | VTTOIF(vap->va_type);
1325 	fmi->rdev = (uint32_t)vap->va_rdev;
1326 	fmi->umask = 0; 	/* Seems unused bu libfuse */
1327 	(void)strlcpy((char *)(void *)(fmi + 1), path, len - sizeof(*fmi));
1328 
1329 	error = node_mk_common(pu, opc, pni, pcn, pm);
1330 
1331 out:
1332 	node_rele(opc);
1333 	return error;
1334 }
1335 
1336 
1337 int
1338 perfuse_node_open(struct puffs_usermount *pu, puffs_cookie_t opc, int mode,
1339 	const struct puffs_cred *pcr)
1340 {
1341 	struct perfuse_state *ps;
1342 	struct perfuse_node_data *pnd;
1343 	perfuse_msg_t *pm;
1344 	mode_t fmode;
1345 	int op;
1346 	struct fuse_open_in *foi;
1347 	struct fuse_open_out *foo;
1348 	struct puffs_node *pn;
1349 	int error;
1350 
1351 	ps = puffs_getspecific(pu);
1352 	pn = (struct puffs_node *)opc;
1353 	pnd = PERFUSE_NODE_DATA(opc);
1354 	error = 0;
1355 
1356 	if (pnd->pnd_flags & PND_REMOVED)
1357 		return ENOENT;
1358 
1359 	node_ref(opc);
1360 
1361 	if (puffs_pn_getvap(pn)->va_type == VDIR)
1362 		op = FUSE_OPENDIR;
1363 	else
1364 		op = FUSE_OPEN;
1365 
1366 	/*
1367 	 * libfuse docs says
1368 	 * - O_CREAT and O_EXCL should never be set.
1369 	 * - O_TRUNC may be used if mount option atomic_o_trunc is used XXX
1370 	 *
1371 	 * O_APPEND makes no sense since FUSE always sends
1372 	 * the file offset for write operations. If the
1373 	 * filesystem uses pwrite(), O_APPEND would cause
1374 	 * the offset to be ignored and cause file corruption.
1375 	 */
1376 	mode &= ~(O_CREAT|O_EXCL|O_APPEND);
1377 
1378 	/*
1379 	 * Do not open twice, and do not reopen for reading
1380 	 * if we already have write handle.
1381 	 */
1382 	switch (mode & (FREAD|FWRITE)) {
1383 	case FREAD:
1384 		if (pnd->pnd_flags & (PND_RFH|PND_WFH))
1385 			goto out;
1386 		break;
1387 	case FWRITE:
1388 		if (pnd->pnd_flags & PND_WFH)
1389 			goto out;
1390 		break;
1391 	case FREAD|FWRITE:
1392 		if (pnd->pnd_flags & PND_WFH)
1393 			goto out;
1394 
1395 		/*
1396 		 * Corner case: if already open for reading (PND_RFH)
1397 		 * and re-opening FREAD|FWRITE, we need to reopen,
1398 		 * but only for writing. Note the change on mode
1399 		 * will only affect perfuse_new_fh()
1400 		 */
1401 		if (pnd->pnd_flags & PND_RFH)
1402 			mode &= ~FREAD;
1403 		break;
1404 	default:
1405 		DWARNX("open without either FREAD nor FWRITE");
1406 		error = EPERM;
1407 		goto out;
1408 	}
1409 
1410 	/*
1411 	 * Queue open on a node so that we do not open
1412 	 * twice. This would be better with read and
1413 	 * write distinguished.
1414 	 */
1415 	while (pnd->pnd_flags & PND_INOPEN)
1416 		requeue_request(pu, opc, PCQ_OPEN);
1417 	pnd->pnd_flags |= PND_INOPEN;
1418 
1419 	/*
1420 	 * Convert PUFFS mode to FUSE mode: convert FREAD/FWRITE
1421 	 * to O_RDONLY/O_WRONLY while perserving the other options.
1422 	 */
1423 	fmode = mode & ~(FREAD|FWRITE);
1424 	fmode |= (mode & FWRITE) ? O_RDWR : O_RDONLY;
1425 
1426 	pm = ps->ps_new_msg(pu, opc, op, sizeof(*foi), pcr);
1427 	foi = GET_INPAYLOAD(ps, pm, fuse_open_in);
1428 	foi->flags = fmode;
1429 	foi->unused = 0;
1430 
1431 	if ((error = xchg_msg(pu, opc, pm, sizeof(*foo), wait_reply)) != 0)
1432 		goto out;
1433 
1434 	foo = GET_OUTPAYLOAD(ps, pm, fuse_open_out);
1435 
1436 	/*
1437 	 * Save the file handle in node private data
1438 	 * so that we can reuse it later
1439 	 */
1440 	perfuse_new_fh(opc, foo->fh, mode);
1441 
1442 #ifdef PERFUSE_DEBUG
1443 	if (perfuse_diagflags & (PDF_FH|PDF_FILENAME))
1444 		DPRINTF("%s: opc = %p, file = \"%s\", "
1445 			"nodeid = 0x%"PRIx64", %s%sfh = 0x%"PRIx64"\n",
1446 			__func__, (void *)opc, perfuse_node_path(ps, opc),
1447 			pnd->pnd_nodeid, mode & FREAD ? "r" : "",
1448 			mode & FWRITE ? "w" : "", foo->fh);
1449 #endif
1450 
1451 	ps->ps_destroy_msg(pm);
1452 out:
1453 
1454 	pnd->pnd_flags &= ~PND_INOPEN;
1455 	(void)dequeue_requests(opc, PCQ_OPEN, DEQUEUE_ALL);
1456 
1457 	node_rele(opc);
1458 	return error;
1459 }
1460 
1461 /* ARGSUSED0 */
1462 int
1463 perfuse_node_close(struct puffs_usermount *pu, puffs_cookie_t opc, int flags,
1464 	const struct puffs_cred *pcr)
1465 {
1466 	struct perfuse_node_data *pnd;
1467 
1468 	pnd = PERFUSE_NODE_DATA(opc);
1469 
1470 	if (!(pnd->pnd_flags & PND_OPEN))
1471 		return EBADF;
1472 
1473 	/*
1474 	 * Actual close is postponed at inactive time.
1475 	 */
1476 	return 0;
1477 }
1478 
1479 int
1480 perfuse_node_access(struct puffs_usermount *pu, puffs_cookie_t opc, int mode,
1481 	const struct puffs_cred *pcr)
1482 {
1483 	perfuse_msg_t *pm;
1484 	struct perfuse_state *ps;
1485 	struct fuse_access_in *fai;
1486 	int error;
1487 
1488 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
1489 		return ENOENT;
1490 
1491 	node_ref(opc);
1492 
1493 	/*
1494 	 * If we previously detected the filesystem does not
1495 	 * implement access(), short-circuit the call and skip
1496 	 * to libpuffs access() emulation.
1497 	 */
1498 	ps = puffs_getspecific(pu);
1499 	if (ps->ps_flags & PS_NO_ACCESS) {
1500 		const struct vattr *vap;
1501 
1502 		vap = puffs_pn_getvap((struct puffs_node *)opc);
1503 
1504 		error = puffs_access(IFTOVT(vap->va_mode),
1505 				     vap->va_mode & ACCESSPERMS,
1506 				     vap->va_uid, vap->va_gid,
1507 				     (mode_t)mode, pcr);
1508 		goto out;
1509 	}
1510 
1511 	/*
1512 	 * Plain access call
1513 	 */
1514 	pm = ps->ps_new_msg(pu, opc, FUSE_ACCESS, sizeof(*fai), pcr);
1515 	fai = GET_INPAYLOAD(ps, pm, fuse_access_in);
1516 	fai->mask = 0;
1517 	fai->mask |= (mode & PUFFS_VREAD) ? R_OK : 0;
1518 	fai->mask |= (mode & PUFFS_VWRITE) ? W_OK : 0;
1519 	fai->mask |= (mode & PUFFS_VEXEC) ? X_OK : 0;
1520 
1521 	error = xchg_msg(pu, opc, pm, NO_PAYLOAD_REPLY_LEN, wait_reply);
1522 
1523 	ps->ps_destroy_msg(pm);
1524 
1525 	/*
1526 	 * If unimplemented, start over with emulation
1527 	 */
1528 	if (error == ENOSYS) {
1529 		ps->ps_flags |= PS_NO_ACCESS;
1530 		error = perfuse_node_access(pu, opc, mode, pcr);
1531 	}
1532 
1533 out:
1534 	node_rele(opc);
1535 	return error;
1536 }
1537 
1538 int
1539 perfuse_node_getattr(struct puffs_usermount *pu, puffs_cookie_t opc,
1540 	struct vattr *vap, const struct puffs_cred *pcr)
1541 {
1542 	return perfuse_node_getattr_ttl(pu, opc, vap, pcr, NULL);
1543 }
1544 
1545 int
1546 perfuse_node_getattr_ttl(struct puffs_usermount *pu, puffs_cookie_t opc,
1547 	struct vattr *vap, const struct puffs_cred *pcr,
1548 	struct timespec *va_ttl)
1549 {
1550 	perfuse_msg_t *pm = NULL;
1551 	struct perfuse_state *ps;
1552 	struct perfuse_node_data *pnd = PERFUSE_NODE_DATA(opc);
1553 	struct fuse_getattr_in *fgi;
1554 	struct fuse_attr_out *fao;
1555 	int error = 0;
1556 
1557 	if (pnd->pnd_flags & PND_REMOVED)
1558 		return ENOENT;
1559 
1560 	node_ref(opc);
1561 
1562 	/*
1563 	 * Serialize size access, see comment in perfuse_node_setattr().
1564 	 */
1565 	while (pnd->pnd_flags & PND_INRESIZE)
1566 		requeue_request(pu, opc, PCQ_RESIZE);
1567 	pnd->pnd_flags |= PND_INRESIZE;
1568 
1569 	ps = puffs_getspecific(pu);
1570 
1571 	/*
1572 	 * FUSE_GETATTR_FH must be set in fgi->flags
1573 	 * if we use for fgi->fh
1574 	 */
1575 	pm = ps->ps_new_msg(pu, opc, FUSE_GETATTR, sizeof(*fgi), pcr);
1576 	fgi = GET_INPAYLOAD(ps, pm, fuse_getattr_in);
1577 	fgi->getattr_flags = 0;
1578 	fgi->dummy = 0;
1579 	fgi->fh = 0;
1580 
1581 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_OPEN) {
1582 		fgi->fh = perfuse_get_fh(opc, FREAD);
1583 		fgi->getattr_flags |= FUSE_GETATTR_FH;
1584 	}
1585 
1586 #ifdef PERFUSE_DEBUG
1587 	if (perfuse_diagflags & PDF_RESIZE)
1588 		DPRINTF(">> %s %p %" PRIu64 "\n", __func__, (void *)opc,
1589 		    vap->va_size);
1590 #endif
1591 
1592 	if ((error = xchg_msg(pu, opc, pm, sizeof(*fao), wait_reply)) != 0)
1593 		goto out;
1594 
1595 	fao = GET_OUTPAYLOAD(ps, pm, fuse_attr_out);
1596 
1597 #ifdef PERFUSE_DEBUG
1598 	if (perfuse_diagflags & PDF_RESIZE)
1599 		DPRINTF("<< %s %p %" PRIu64 " -> %" PRIu64 "\n", __func__,
1600 		    (void *)opc, vap->va_size, fao->attr.size);
1601 #endif
1602 
1603 	/*
1604 	 * We set birthtime, flags, filerev,vaflags to 0.
1605 	 * This seems the best bet, since the information is
1606 	 * not available from filesystem.
1607 	 */
1608 	fuse_attr_to_vap(ps, vap, &fao->attr);
1609 
1610 	if (va_ttl != NULL) {
1611 		va_ttl->tv_sec = fao->attr_valid;
1612 		va_ttl->tv_nsec = fao->attr_valid_nsec;
1613 	}
1614 
1615 	ps->ps_destroy_msg(pm);
1616 	error = 0;
1617 out:
1618 
1619 	pnd->pnd_flags &= ~PND_INRESIZE;
1620 	(void)dequeue_requests(opc, PCQ_RESIZE, DEQUEUE_ALL);
1621 
1622 	node_rele(opc);
1623 	return error;
1624 }
1625 
1626 int
1627 perfuse_node_setattr(struct puffs_usermount *pu, puffs_cookie_t opc,
1628 	const struct vattr *vap, const struct puffs_cred *pcr)
1629 {
1630 	return perfuse_node_setattr_ttl(pu, opc,
1631 					__UNCONST(vap), pcr, NULL, 0);
1632 }
1633 
1634 int
1635 perfuse_node_setattr_ttl(struct puffs_usermount *pu, puffs_cookie_t opc,
1636 	struct vattr *vap, const struct puffs_cred *pcr,
1637 	struct timespec *va_ttl, int xflag)
1638 {
1639 	perfuse_msg_t *pm;
1640 	uint64_t fh;
1641 	struct perfuse_state *ps;
1642 	struct perfuse_node_data *pnd;
1643 	struct fuse_setattr_in *fsi;
1644 	struct fuse_attr_out *fao;
1645 	struct vattr *old_va;
1646 	enum perfuse_xchg_pb_reply reply;
1647 	int error;
1648 #ifdef PERFUSE_DEBUG
1649 	struct vattr *old_vap;
1650 	int resize_debug = 0;
1651 #endif
1652 	ps = puffs_getspecific(pu);
1653 	pnd = PERFUSE_NODE_DATA(opc);
1654 
1655 	/*
1656 	 * The only operation we can do once the file is removed
1657 	 * is to resize it, and we can do it only if it is open.
1658 	 * Do not even send the operation to the filesystem: the
1659 	 * file is not there anymore.
1660 	 */
1661 	if (pnd->pnd_flags & PND_REMOVED) {
1662 		if (!(pnd->pnd_flags & PND_OPEN))
1663 			return ENOENT;
1664 
1665 		return 0;
1666 	}
1667 
1668 	old_va = puffs_pn_getvap((struct puffs_node *)opc);
1669 
1670 	/*
1671 	 * Check for permission to change size
1672 	 * It is always allowed if we already have a write file handle
1673 	 */
1674 	if ((vap->va_size != (u_quad_t)PUFFS_VNOVAL) &&
1675 	    !(pnd->pnd_flags & PND_WFH) &&
1676 	    (error = mode_access(opc, pcr, PUFFS_VWRITE)) != 0)
1677 		return error;
1678 
1679 	/*
1680 	 * Check for permission to change dates
1681 	 */
1682 	if (((vap->va_atime.tv_sec != (time_t)PUFFS_VNOVAL) ||
1683 	     (vap->va_mtime.tv_sec != (time_t)PUFFS_VNOVAL)) &&
1684 	    (puffs_access_times(old_va->va_uid, old_va->va_gid,
1685 				old_va->va_mode, 0, pcr) != 0))
1686 		return EACCES;
1687 
1688 	/*
1689 	 * Check for permission to change owner and group
1690 	 */
1691 	if (((vap->va_uid != (uid_t)PUFFS_VNOVAL) ||
1692 	     (vap->va_gid != (gid_t)PUFFS_VNOVAL)) &&
1693 	    (puffs_access_chown(old_va->va_uid, old_va->va_gid,
1694 				vap->va_uid, vap->va_gid, pcr)) != 0)
1695 		return EACCES;
1696 
1697 	/*
1698 	 * Check for permission to change permissions
1699 	 */
1700 	if ((vap->va_mode != (mode_t)PUFFS_VNOVAL) &&
1701 	    (puffs_access_chmod(old_va->va_uid, old_va->va_gid,
1702 				old_va->va_type, vap->va_mode, pcr)) != 0)
1703 		return EACCES;
1704 
1705 	node_ref(opc);
1706 
1707 	if (pnd->pnd_flags & PND_WFH)
1708 		fh = perfuse_get_fh(opc, FWRITE);
1709 	else
1710 		fh = FUSE_UNKNOWN_FH;
1711 
1712 	/*
1713 	 * fchmod() sets mode and fh, and it may carry
1714 	 * a resize as well. That may break if the
1715 	 * filesystem does chmod then resize, and fails
1716 	 * because it does not have permission anymore.
1717 	 * We work this around by splitting into two setattr.
1718 	 */
1719 	if ((vap->va_size != (u_quad_t)PUFFS_VNOVAL) &&
1720 	    (vap->va_mode != (mode_t)PUFFS_VNOVAL) &&
1721 	    (fh != FUSE_UNKNOWN_FH)) {
1722 		struct vattr resize_va;
1723 
1724 		(void)memcpy(&resize_va, vap, sizeof(resize_va));
1725 		resize_va.va_mode = (mode_t)PUFFS_VNOVAL;
1726 		if ((error = perfuse_node_setattr_ttl(pu, opc, &resize_va,
1727 						      pcr, va_ttl, xflag)) != 0)
1728 			goto out2;
1729 
1730 		vap->va_size = (u_quad_t)PUFFS_VNOVAL;
1731 	}
1732 
1733 	pm = ps->ps_new_msg(pu, opc, FUSE_SETATTR, sizeof(*fsi), pcr);
1734 	fsi = GET_INPAYLOAD(ps, pm, fuse_setattr_in);
1735 	fsi->valid = 0;
1736 
1737 	/*
1738 	 * Get a fh if the node is open for writing
1739 	 */
1740 	if (fh != FUSE_UNKNOWN_FH) {
1741 		fsi->fh = fh;
1742 		fsi->valid |= FUSE_FATTR_FH;
1743 	}
1744 
1745 
1746 	if (vap->va_size != (u_quad_t)PUFFS_VNOVAL) {
1747 		fsi->size = vap->va_size;
1748 		fsi->valid |= FUSE_FATTR_SIZE;
1749 
1750 		/*
1751 		 * Serialize anything that can touch file size
1752 		 * to avoid reordered GETATTR and SETATTR.
1753 		 * Out of order SETATTR can report stale size,
1754 		 * which will cause the kernel to truncate the file.
1755 		 * XXX Probably useless now we have a lock on GETATTR
1756 		 */
1757 		while (pnd->pnd_flags & PND_INRESIZE)
1758 			requeue_request(pu, opc, PCQ_RESIZE);
1759 		pnd->pnd_flags |= PND_INRESIZE;
1760 	}
1761 
1762 	/*
1763  	 * Setting mtime without atime or vice versa leads to
1764 	 * dates being reset to Epoch on glusterfs. If one
1765 	 * is missing, use the old value.
1766  	 */
1767 	if ((vap->va_mtime.tv_sec != (time_t)PUFFS_VNOVAL) ||
1768 	    (vap->va_atime.tv_sec != (time_t)PUFFS_VNOVAL)) {
1769 
1770 		if (vap->va_atime.tv_sec != (time_t)PUFFS_VNOVAL) {
1771 			fsi->atime = vap->va_atime.tv_sec;
1772 			fsi->atimensec = (uint32_t)vap->va_atime.tv_nsec;
1773 		} else {
1774 			fsi->atime = old_va->va_atime.tv_sec;
1775 			fsi->atimensec = (uint32_t)old_va->va_atime.tv_nsec;
1776 		}
1777 
1778 		if (vap->va_mtime.tv_sec != (time_t)PUFFS_VNOVAL) {
1779 			fsi->mtime = vap->va_mtime.tv_sec;
1780 			fsi->mtimensec = (uint32_t)vap->va_mtime.tv_nsec;
1781 		} else {
1782 			fsi->mtime = old_va->va_mtime.tv_sec;
1783 			fsi->mtimensec = (uint32_t)old_va->va_mtime.tv_nsec;
1784 		}
1785 
1786 		fsi->valid |= (FUSE_FATTR_MTIME|FUSE_FATTR_ATIME);
1787 	}
1788 
1789 	if (vap->va_mode != (mode_t)PUFFS_VNOVAL) {
1790 		fsi->mode = vap->va_mode;
1791 		fsi->valid |= FUSE_FATTR_MODE;
1792 	}
1793 
1794 	if (vap->va_uid != (uid_t)PUFFS_VNOVAL) {
1795 		fsi->uid = vap->va_uid;
1796 		fsi->valid |= FUSE_FATTR_UID;
1797 	}
1798 
1799 	if (vap->va_gid != (gid_t)PUFFS_VNOVAL) {
1800 		fsi->gid = vap->va_gid;
1801 		fsi->valid |= FUSE_FATTR_GID;
1802 	}
1803 
1804 	if (pnd->pnd_lock_owner != 0) {
1805 		fsi->lock_owner = pnd->pnd_lock_owner;
1806 		fsi->valid |= FUSE_FATTR_LOCKOWNER;
1807 	}
1808 
1809 	/*
1810 	 * ftruncate() sends only va_size, and metadata cache
1811 	 * flush adds va_atime and va_mtime. Some FUSE
1812 	 * filesystems will attempt to detect ftruncate by
1813 	 * checking for FATTR_SIZE being set without
1814 	 * FATTR_UID|FATTR_GID|FATTR_ATIME|FATTR_MTIME|FATTR_MODE
1815 	 *
1816 	 * Try to adapt and remove FATTR_ATIME|FATTR_MTIME
1817 	 * if we suspect a ftruncate().
1818 	 */
1819 	if ((vap->va_size != (u_quad_t)PUFFS_VNOVAL) &&
1820 	    ((vap->va_mode == (mode_t)PUFFS_VNOVAL) &&
1821 	     (vap->va_uid == (uid_t)PUFFS_VNOVAL) &&
1822 	     (vap->va_gid == (gid_t)PUFFS_VNOVAL))) {
1823 		fsi->atime = 0;
1824 		fsi->atimensec = 0;
1825 		fsi->mtime = 0;
1826 		fsi->mtimensec = 0;
1827 		fsi->valid &= ~(FUSE_FATTR_ATIME|FUSE_FATTR_MTIME);
1828 	}
1829 
1830 	/*
1831 	 * If nothing remain, discard the operation.
1832 	 */
1833 	if (!(fsi->valid & (FUSE_FATTR_SIZE|FUSE_FATTR_ATIME|FUSE_FATTR_MTIME|
1834 			    FUSE_FATTR_MODE|FUSE_FATTR_UID|FUSE_FATTR_GID))) {
1835 		error = 0;
1836 		ps->ps_destroy_msg(pm);
1837 		goto out;
1838 	}
1839 
1840 #ifdef PERFUSE_DEBUG
1841 	old_vap = puffs_pn_getvap((struct puffs_node *)opc);
1842 
1843 	if ((perfuse_diagflags & PDF_RESIZE) &&
1844 	    (old_vap->va_size != (u_quad_t)PUFFS_VNOVAL)) {
1845 		resize_debug = 1;
1846 
1847 		DPRINTF(">> %s %p %" PRIu64 " -> %" PRIu64 "\n", __func__,
1848 		    (void *)opc,
1849 		    puffs_pn_getvap((struct puffs_node *)opc)->va_size,
1850 		    fsi->size);
1851 	}
1852 #endif
1853 
1854 	/*
1855 	 * Do not honour FAF when changing size. How do
1856 	 * you want such a thing to work?
1857 	 */
1858 	reply = wait_reply;
1859 #ifdef PUFFS_SETATTR_FAF
1860 	if ((xflag & PUFFS_SETATTR_FAF) && !(fsi->valid & FUSE_FATTR_SIZE))
1861 		reply = no_reply;
1862 #endif
1863 	if ((error = xchg_msg(pu, opc, pm, sizeof(*fao), reply)) != 0)
1864 		goto out;
1865 
1866 	if (reply == no_reply)
1867 		goto out;
1868 
1869 	/*
1870 	 * Copy back the new values
1871 	 */
1872 	fao = GET_OUTPAYLOAD(ps, pm, fuse_attr_out);
1873 
1874 #ifdef PERFUSE_DEBUG
1875 	if (resize_debug)
1876 		DPRINTF("<< %s %p %" PRIu64 " -> %" PRIu64 "\n", __func__,
1877 		    (void *)opc, old_vap->va_size, fao->attr.size);
1878 #endif
1879 
1880 	fuse_attr_to_vap(ps, old_va, &fao->attr);
1881 
1882 	if (va_ttl != NULL) {
1883 		va_ttl->tv_sec = fao->attr_valid;
1884 		va_ttl->tv_nsec = fao->attr_valid_nsec;
1885 		(void)memcpy(vap, old_va, sizeof(*vap));
1886 	}
1887 
1888 	ps->ps_destroy_msg(pm);
1889 	error = 0;
1890 
1891 out:
1892 	if (pnd->pnd_flags & PND_INRESIZE) {
1893 		pnd->pnd_flags &= ~PND_INRESIZE;
1894 		(void)dequeue_requests(opc, PCQ_RESIZE, DEQUEUE_ALL);
1895 	}
1896 
1897 out2:
1898 	node_rele(opc);
1899 	return error;
1900 }
1901 
1902 int
1903 perfuse_node_poll(struct puffs_usermount *pu, puffs_cookie_t opc, int *events)
1904 {
1905 	struct perfuse_state *ps;
1906 	perfuse_msg_t *pm;
1907 	struct fuse_poll_in *fpi;
1908 	struct fuse_poll_out *fpo;
1909 	int error;
1910 
1911 	node_ref(opc);
1912 	ps = puffs_getspecific(pu);
1913 	/*
1914 	 * kh is set if FUSE_POLL_SCHEDULE_NOTIFY is set.
1915 	 *
1916 	 * XXX ps_new_msg() is called with NULL creds, which will
1917 	 * be interpreted as FUSE superuser. We have no way to
1918 	 * know the requesting process' credential, but since poll
1919 	 * is supposed to operate on a file that has been open,
1920 	 * permission should have already been checked at open time.
1921 	 * That still may breaks on filesystems that provides odd
1922 	 * semantics.
1923  	 */
1924 	pm = ps->ps_new_msg(pu, opc, FUSE_POLL, sizeof(*fpi), NULL);
1925 	fpi = GET_INPAYLOAD(ps, pm, fuse_poll_in);
1926 	fpi->fh = perfuse_get_fh(opc, FREAD);
1927 	fpi->kh = 0;
1928 	fpi->flags = 0;
1929 
1930 #ifdef PERFUSE_DEBUG
1931 	if (perfuse_diagflags & PDF_FH)
1932 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", "
1933 			"fh = 0x%"PRIx64"\n", __func__, (void *)opc,
1934 			PERFUSE_NODE_DATA(opc)->pnd_nodeid, fpi->fh);
1935 #endif
1936 	if ((error = xchg_msg(pu, opc, pm, sizeof(*fpo), wait_reply)) != 0)
1937 		goto out;
1938 
1939 	fpo = GET_OUTPAYLOAD(ps, pm, fuse_poll_out);
1940 	*events = fpo->revents;
1941 
1942 	ps->ps_destroy_msg(pm);
1943 	error = 0;
1944 
1945 out:
1946 	node_rele(opc);
1947 	return error;
1948 }
1949 
1950 /* ARGSUSED0 */
1951 int
1952 perfuse_node_mmap(struct puffs_usermount *pu, puffs_cookie_t opc, int flags,
1953 	const struct puffs_cred *pcr)
1954 {
1955 	/*
1956 	 * Not implemented anymore in libfuse
1957 	 */
1958 	return ENOSYS;
1959 }
1960 
1961 /* ARGSUSED2 */
1962 int
1963 perfuse_node_fsync(struct puffs_usermount *pu, puffs_cookie_t opc,
1964 	const struct puffs_cred *pcr, int flags, off_t offlo, off_t offhi)
1965 {
1966 	int op;
1967 	perfuse_msg_t *pm;
1968 	struct perfuse_state *ps;
1969 	struct perfuse_node_data *pnd;
1970 	struct fuse_fsync_in *ffi;
1971 	uint64_t fh;
1972 	int error = 0;
1973 
1974 	pm = NULL;
1975 	ps = puffs_getspecific(pu);
1976 	pnd = PERFUSE_NODE_DATA(opc);
1977 
1978 	/*
1979 	 * No need to sync a removed node
1980 	 */
1981 	if (pnd->pnd_flags & PND_REMOVED)
1982 		return 0;
1983 
1984 	/*
1985 	 * We do not sync closed files. They have been
1986 	 * sync at inactive time already.
1987 	 */
1988 	if (!(pnd->pnd_flags & PND_OPEN))
1989 		return 0;
1990 
1991 	node_ref(opc);
1992 
1993 	if (puffs_pn_getvap((struct puffs_node *)opc)->va_type == VDIR)
1994 		op = FUSE_FSYNCDIR;
1995 	else 		/* VREG but also other types such as VLNK */
1996 		op = FUSE_FSYNC;
1997 
1998 	/*
1999 	 * Do not sync if there are no change to sync
2000 	 * XXX remove that test on files if we implement mmap
2001 	 */
2002 #ifdef PERFUSE_DEBUG
2003 	if (perfuse_diagflags & PDF_SYNC)
2004 		DPRINTF("%s: TEST opc = %p, file = \"%s\" is %sdirty\n",
2005 			__func__, (void*)opc, perfuse_node_path(ps, opc),
2006 			pnd->pnd_flags & PND_DIRTY ? "" : "not ");
2007 #endif
2008 	if (!(pnd->pnd_flags & PND_DIRTY))
2009 		goto out;
2010 
2011 	/*
2012 	 * It seems NetBSD can call fsync without open first
2013 	 * glusterfs complain in such a situation:
2014 	 * "FSYNC() ERR => -1 (Invalid argument)"
2015 	 * The file will be closed at inactive time.
2016 	 *
2017 	 * We open the directory for reading in order to sync.
2018 	 * This sounds rather counterintuitive, but it works.
2019 	 */
2020 	if (!(pnd->pnd_flags & PND_WFH)) {
2021 		if ((error = perfuse_node_open(pu, opc, FREAD, pcr)) != 0)
2022 			goto out;
2023 	}
2024 
2025 	if (op == FUSE_FSYNCDIR)
2026 		fh = perfuse_get_fh(opc, FREAD);
2027 	else
2028 		fh = perfuse_get_fh(opc, FWRITE);
2029 
2030 	/*
2031 	 * If fsync_flags  is set, meta data should not be flushed.
2032 	 */
2033 	pm = ps->ps_new_msg(pu, opc, op, sizeof(*ffi), pcr);
2034 	ffi = GET_INPAYLOAD(ps, pm, fuse_fsync_in);
2035 	ffi->fh = fh;
2036 	ffi->fsync_flags = (flags & FFILESYNC) ? 0 : 1;
2037 
2038 #ifdef PERFUSE_DEBUG
2039 	if (perfuse_diagflags & PDF_FH)
2040 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", fh = 0x%"PRIx64"\n",
2041 			__func__, (void *)opc,
2042 			PERFUSE_NODE_DATA(opc)->pnd_nodeid, ffi->fh);
2043 #endif
2044 
2045 	if ((error = xchg_msg(pu, opc, pm,
2046 			      NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0)
2047 		goto out;
2048 
2049 	/*
2050 	 * No reply beyond fuse_out_header: nothing to do on success
2051 	 * just clear the dirty flag
2052 	 */
2053 	pnd->pnd_flags &= ~PND_DIRTY;
2054 
2055 #ifdef PERFUSE_DEBUG
2056 	if (perfuse_diagflags & PDF_SYNC)
2057 		DPRINTF("%s: CLEAR opc = %p, file = \"%s\"\n",
2058 			__func__, (void*)opc, perfuse_node_path(ps, opc));
2059 #endif
2060 
2061 	ps->ps_destroy_msg(pm);
2062 	error = 0;
2063 
2064 out:
2065 	/*
2066 	 * ENOSYS is not returned to kernel,
2067 	 */
2068 	if (error == ENOSYS)
2069 		error = 0;
2070 
2071 	node_rele(opc);
2072 	return error;
2073 }
2074 
2075 /* ARGSUSED0 */
2076 int
2077 perfuse_node_seek(struct puffs_usermount *pu, puffs_cookie_t opc,
2078 	off_t oldoff, off_t newoff, const struct puffs_cred *pcr)
2079 {
2080 	return 0;
2081 }
2082 
2083 int
2084 perfuse_node_remove(struct puffs_usermount *pu, puffs_cookie_t opc,
2085 	puffs_cookie_t targ, const struct puffs_cn *pcn)
2086 {
2087 	struct perfuse_state *ps;
2088 	struct perfuse_node_data *pnd;
2089 	perfuse_msg_t *pm;
2090 	char *path;
2091 	const char *name;
2092 	size_t len;
2093 	int error;
2094 
2095 	pnd = PERFUSE_NODE_DATA(opc);
2096 
2097 	if ((pnd->pnd_flags & PND_REMOVED) ||
2098 	    (PERFUSE_NODE_DATA(targ)->pnd_flags & PND_REMOVED))
2099 		return ENOENT;
2100 
2101 #ifdef PERFUSE_DEBUG
2102 	if (targ == NULL)
2103 		DERRX(EX_SOFTWARE, "%s: targ is NULL", __func__);
2104 
2105 	if (perfuse_diagflags & (PDF_FH|PDF_FILENAME))
2106 		DPRINTF("%s: opc = %p, remove opc = %p, file = \"%s\"\n",
2107 			__func__, (void *)opc, (void *)targ, pcn->pcn_name);
2108 #endif
2109 	node_ref(opc);
2110 	node_ref(targ);
2111 
2112 	/*
2113 	 * Await for all operations on the deleted node to drain,
2114 	 * as the filesystem may be confused to have it deleted
2115 	 * during a getattr
2116 	 */
2117 	while (PERFUSE_NODE_DATA(targ)->pnd_inxchg)
2118 		requeue_request(pu, targ, PCQ_AFTERXCHG);
2119 
2120 	ps = puffs_getspecific(pu);
2121 	pnd = PERFUSE_NODE_DATA(opc);
2122 	name = pcn->pcn_name;
2123 	len = pcn->pcn_namelen + 1;
2124 
2125 	pm = ps->ps_new_msg(pu, opc, FUSE_UNLINK, len, pcn->pcn_cred);
2126 	path = _GET_INPAYLOAD(ps, pm, char *);
2127 	(void)strlcpy(path, name, len);
2128 
2129 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2130 		goto out;
2131 
2132 	perfuse_cache_flush(targ);
2133 	PERFUSE_NODE_DATA(targ)->pnd_flags |= PND_REMOVED;
2134 
2135 	if (!(PERFUSE_NODE_DATA(targ)->pnd_flags & PND_OPEN))
2136 		puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N2);
2137 
2138 	/*
2139 	 * The parent directory needs a sync
2140 	 */
2141 	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
2142 
2143 #ifdef PERFUSE_DEBUG
2144 	if (perfuse_diagflags & PDF_FILENAME)
2145 		DPRINTF("%s: remove nodeid = 0x%"PRIx64" file = \"%s\"\n",
2146 			__func__, PERFUSE_NODE_DATA(targ)->pnd_nodeid,
2147 			pcn->pcn_name);
2148 #endif
2149 	ps->ps_destroy_msg(pm);
2150 	error = 0;
2151 
2152 out:
2153 	node_rele(opc);
2154 	node_rele(targ);
2155 	return error;
2156 }
2157 
2158 int
2159 perfuse_node_link(struct puffs_usermount *pu, puffs_cookie_t opc,
2160 	puffs_cookie_t targ, const struct puffs_cn *pcn)
2161 {
2162 	struct perfuse_state *ps;
2163 	perfuse_msg_t *pm;
2164 	const char *name;
2165 	size_t len;
2166 	struct puffs_node *pn;
2167 	struct fuse_link_in *fli;
2168 	int error;
2169 
2170 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
2171 		return ENOENT;
2172 
2173 	node_ref(opc);
2174 	node_ref(targ);
2175 	ps = puffs_getspecific(pu);
2176 	pn = (struct puffs_node *)targ;
2177 	name = pcn->pcn_name;
2178 	len =  sizeof(*fli) + pcn->pcn_namelen + 1;
2179 
2180 	pm = ps->ps_new_msg(pu, opc, FUSE_LINK, len, pcn->pcn_cred);
2181 	fli = GET_INPAYLOAD(ps, pm, fuse_link_in);
2182 	fli->oldnodeid = PERFUSE_NODE_DATA(pn)->pnd_nodeid;
2183 	(void)strlcpy((char *)(void *)(fli + 1), name, len - sizeof(*fli));
2184 
2185 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2186 		goto out;
2187 
2188 	ps->ps_destroy_msg(pm);
2189 	error = 0;
2190 
2191 out:
2192 	node_rele(opc);
2193 	node_rele(targ);
2194 	return error;
2195 }
2196 
2197 int
2198 perfuse_node_rename(struct puffs_usermount *pu, puffs_cookie_t opc,
2199 	puffs_cookie_t src, const struct puffs_cn *pcn_src,
2200 	puffs_cookie_t targ_dir, puffs_cookie_t targ,
2201 	const struct puffs_cn *pcn_targ)
2202 {
2203 	struct perfuse_state *ps;
2204 	struct perfuse_node_data *dstdir_pnd;
2205 	perfuse_msg_t *pm;
2206 	struct fuse_rename_in *fri;
2207 	const char *newname;
2208 	const char *oldname;
2209 	char *np;
2210 	int error;
2211 	size_t len;
2212 	size_t newname_len;
2213 	size_t oldname_len;
2214 
2215 	if ((PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED) ||
2216 	    (PERFUSE_NODE_DATA(src)->pnd_flags & PND_REMOVED) ||
2217 	    (PERFUSE_NODE_DATA(targ_dir)->pnd_flags & PND_REMOVED))
2218 		return ENOENT;
2219 
2220 	node_ref(opc);
2221 	node_ref(src);
2222 
2223 	/*
2224 	 * Await for all operations on the deleted node to drain,
2225 	 * as the filesystem may be confused to have it deleted
2226 	 * during a getattr
2227 	 */
2228 	if ((struct puffs_node *)targ != NULL) {
2229 		node_ref(targ);
2230 		while (PERFUSE_NODE_DATA(targ)->pnd_inxchg)
2231 			requeue_request(pu, targ, PCQ_AFTERXCHG);
2232 	} else {
2233 		while (PERFUSE_NODE_DATA(src)->pnd_inxchg)
2234 			requeue_request(pu, src, PCQ_AFTERXCHG);
2235 	}
2236 
2237 	ps = puffs_getspecific(pu);
2238 	newname =  pcn_targ->pcn_name;
2239 	newname_len = pcn_targ->pcn_namelen + 1;
2240 	oldname =  pcn_src->pcn_name;
2241 	oldname_len = pcn_src->pcn_namelen + 1;
2242 
2243 	len = sizeof(*fri) + oldname_len + newname_len;
2244 	pm = ps->ps_new_msg(pu, opc, FUSE_RENAME, len, pcn_targ->pcn_cred);
2245 	fri = GET_INPAYLOAD(ps, pm, fuse_rename_in);
2246 	fri->newdir = PERFUSE_NODE_DATA(targ_dir)->pnd_nodeid;
2247 	np = (char *)(void *)(fri + 1);
2248 	(void)strlcpy(np, oldname, oldname_len);
2249 	np += oldname_len;
2250 	(void)strlcpy(np, newname, newname_len);
2251 
2252 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2253 		goto out;
2254 
2255 
2256 	/*
2257 	 * Record new parent nodeid
2258 	 */
2259 	dstdir_pnd = PERFUSE_NODE_DATA(targ_dir);
2260 	PERFUSE_NODE_DATA(src)->pnd_parent_nodeid = dstdir_pnd->pnd_nodeid;
2261 
2262 	if (opc != targ_dir)
2263 		dstdir_pnd->pnd_flags |= PND_DIRTY;
2264 
2265 	if (strcmp(newname, "..") != 0)
2266 		(void)strlcpy(PERFUSE_NODE_DATA(src)->pnd_name,
2267 		    newname, MAXPATHLEN);
2268 	else
2269 		PERFUSE_NODE_DATA(src)->pnd_name[0] = 0; /* forget name */
2270 
2271 	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
2272 
2273 	if ((struct puffs_node *)targ != NULL) {
2274 		perfuse_cache_flush(targ);
2275 		PERFUSE_NODE_DATA(targ)->pnd_flags |= PND_REMOVED;
2276 	}
2277 
2278 #ifdef PERFUSE_DEBUG
2279 	if (perfuse_diagflags & PDF_FILENAME)
2280 		DPRINTF("%s: nodeid = 0x%"PRIx64" file = \"%s\" renamed \"%s\" "
2281 			"nodeid = 0x%"PRIx64" -> nodeid = 0x%"PRIx64" \"%s\"\n",
2282 	 		__func__, PERFUSE_NODE_DATA(src)->pnd_nodeid,
2283 			pcn_src->pcn_name, pcn_targ->pcn_name,
2284 			PERFUSE_NODE_DATA(opc)->pnd_nodeid,
2285 			PERFUSE_NODE_DATA(targ_dir)->pnd_nodeid,
2286 			perfuse_node_path(ps, targ_dir));
2287 #endif
2288 
2289 	ps->ps_destroy_msg(pm);
2290 	error = 0;
2291 
2292 out:
2293 	node_rele(opc);
2294 	node_rele(src);
2295 	if ((struct puffs_node *)targ != NULL)
2296 		node_rele(targ);
2297 
2298 	return error;
2299 }
2300 
2301 int
2302 perfuse_node_mkdir(struct puffs_usermount *pu, puffs_cookie_t opc,
2303 	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
2304 	const struct vattr *vap)
2305 {
2306 	struct perfuse_state *ps;
2307 	perfuse_msg_t *pm;
2308 	struct fuse_mkdir_in *fmi;
2309 	const char *path;
2310 	size_t len;
2311 	int error;
2312 
2313 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
2314 		return ENOENT;
2315 
2316 	node_ref(opc);
2317 	ps = puffs_getspecific(pu);
2318 	path = pcn->pcn_name;
2319 	len = sizeof(*fmi) + pcn->pcn_namelen + 1;
2320 
2321 	pm = ps->ps_new_msg(pu, opc, FUSE_MKDIR, len, pcn->pcn_cred);
2322 	fmi = GET_INPAYLOAD(ps, pm, fuse_mkdir_in);
2323 	fmi->mode = vap->va_mode;
2324 	fmi->umask = 0; 	/* Seems unused by libfuse? */
2325 	(void)strlcpy((char *)(void *)(fmi + 1), path, len - sizeof(*fmi));
2326 
2327 	error = node_mk_common(pu, opc, pni, pcn, pm);
2328 
2329 	node_rele(opc);
2330 	return error;
2331 }
2332 
2333 
2334 int
2335 perfuse_node_rmdir(struct puffs_usermount *pu, puffs_cookie_t opc,
2336 	puffs_cookie_t targ, const struct puffs_cn *pcn)
2337 {
2338 	struct perfuse_state *ps;
2339 	struct perfuse_node_data *pnd;
2340 	perfuse_msg_t *pm;
2341 	char *path;
2342 	const char *name;
2343 	size_t len;
2344 	int error;
2345 
2346 	pnd = PERFUSE_NODE_DATA(opc);
2347 
2348 	if ((pnd->pnd_flags & PND_REMOVED) ||
2349 	    (PERFUSE_NODE_DATA(targ)->pnd_flags & PND_REMOVED))
2350 		return ENOENT;
2351 
2352 	node_ref(opc);
2353 	node_ref(targ);
2354 
2355 	/*
2356 	 * Await for all operations on the deleted node to drain,
2357 	 * as the filesystem may be confused to have it deleted
2358 	 * during a getattr
2359 	 */
2360 	while (PERFUSE_NODE_DATA(targ)->pnd_inxchg)
2361 		requeue_request(pu, targ, PCQ_AFTERXCHG);
2362 
2363 	ps = puffs_getspecific(pu);
2364 	name = pcn->pcn_name;
2365 	len = pcn->pcn_namelen + 1;
2366 
2367 	pm = ps->ps_new_msg(pu, opc, FUSE_RMDIR, len, pcn->pcn_cred);
2368 	path = _GET_INPAYLOAD(ps, pm, char *);
2369 	(void)strlcpy(path, name, len);
2370 
2371 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2372 		goto out;
2373 
2374 	perfuse_cache_flush(targ);
2375 	PERFUSE_NODE_DATA(targ)->pnd_flags |= PND_REMOVED;
2376 
2377 	if (!(PERFUSE_NODE_DATA(targ)->pnd_flags & PND_OPEN))
2378 		puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N2);
2379 
2380 	/*
2381 	 * The parent directory needs a sync
2382 	 */
2383 	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
2384 
2385 #ifdef PERFUSE_DEBUG
2386 	if (perfuse_diagflags & PDF_FILENAME)
2387 		DPRINTF("%s: remove nodeid = 0x%"PRIx64" file = \"%s\"\n",
2388 			__func__, PERFUSE_NODE_DATA(targ)->pnd_nodeid,
2389 			perfuse_node_path(ps, targ));
2390 #endif
2391 	ps->ps_destroy_msg(pm);
2392 	error = 0;
2393 
2394 out:
2395 	node_rele(opc);
2396 	node_rele(targ);
2397 	return error;
2398 }
2399 
2400 /* vap is unused */
2401 /* ARGSUSED4 */
2402 int
2403 perfuse_node_symlink(struct puffs_usermount *pu, puffs_cookie_t opc,
2404 	struct puffs_newinfo *pni, const struct puffs_cn *pcn_src,
2405 	const struct vattr *vap, const char *link_target)
2406 {
2407 	struct perfuse_state *ps;
2408 	perfuse_msg_t *pm;
2409 	char *np;
2410 	const char *path;
2411 	size_t path_len;
2412 	size_t linkname_len;
2413 	size_t len;
2414 	int error;
2415 
2416 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
2417 		return ENOENT;
2418 
2419 	node_ref(opc);
2420 	ps = puffs_getspecific(pu);
2421 	path = pcn_src->pcn_name;
2422 	path_len = pcn_src->pcn_namelen + 1;
2423 	linkname_len = strlen(link_target) + 1;
2424 	len = path_len + linkname_len;
2425 
2426 	pm = ps->ps_new_msg(pu, opc, FUSE_SYMLINK, len, pcn_src->pcn_cred);
2427 	np = _GET_INPAYLOAD(ps, pm, char *);
2428 	(void)strlcpy(np, path, path_len);
2429 	np += path_len;
2430 	(void)strlcpy(np, link_target, linkname_len);
2431 
2432 	error = node_mk_common(pu, opc, pni, pcn_src, pm);
2433 
2434 	node_rele(opc);
2435 	return error;
2436 }
2437 
2438 /* ARGSUSED4 */
2439 int
2440 perfuse_node_readdir(struct puffs_usermount *pu, puffs_cookie_t opc,
2441 	struct dirent *dent, off_t *readoff, size_t *reslen,
2442 	const struct puffs_cred *pcr, int *eofflag, off_t *cookies,
2443 	size_t *ncookies)
2444 {
2445 	perfuse_msg_t *pm;
2446 	uint64_t fh;
2447 	struct perfuse_state *ps;
2448 	struct perfuse_node_data *pnd;
2449 	struct fuse_read_in *fri;
2450 	struct fuse_out_header *foh;
2451 	struct fuse_dirent *fd;
2452 	size_t foh_len;
2453 	int error;
2454 	size_t fd_maxlen;
2455 
2456 	error = 0;
2457 	node_ref(opc);
2458 	ps = puffs_getspecific(pu);
2459 
2460 	/*
2461 	 * readdir state is kept at node level, and several readdir
2462 	 * requests can be issued at the same time on the same node.
2463 	 * We need to queue requests so that only one is in readdir
2464 	 * code at the same time.
2465 	 */
2466 	pnd = PERFUSE_NODE_DATA(opc);
2467 	while (pnd->pnd_flags & PND_INREADDIR)
2468 		requeue_request(pu, opc, PCQ_READDIR);
2469 	pnd->pnd_flags |= PND_INREADDIR;
2470 
2471 #ifdef PERFUSE_DEBUG
2472 	if (perfuse_diagflags & PDF_READDIR)
2473 		DPRINTF("%s: READDIR opc = %p enter critical section\n",
2474 			__func__, (void *)opc);
2475 #endif
2476 	/*
2477 	 * Re-initialize pnd->pnd_fd_cookie on the first readdir for a node
2478 	 */
2479 	if (*readoff == 0)
2480 		pnd->pnd_fd_cookie = 0;
2481 
2482 	/*
2483 	 * Do we already have the data bufered?
2484 	 */
2485 	if (pnd->pnd_dirent != NULL)
2486 		goto out;
2487 	pnd->pnd_dirent_len = 0;
2488 
2489 	/*
2490 	 * It seems NetBSD can call readdir without open first
2491 	 * libfuse will crash if it is done that way, hence open first.
2492 	 */
2493 	if (!(pnd->pnd_flags & PND_OPEN)) {
2494 		if ((error = perfuse_node_open(pu, opc, FREAD, pcr)) != 0)
2495 			goto out;
2496 	}
2497 
2498 	fh = perfuse_get_fh(opc, FREAD);
2499 
2500 #ifdef PERFUSE_DEBUG
2501 	if (perfuse_diagflags & PDF_FH)
2502 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", "
2503 			"rfh = 0x%"PRIx64"\n", __func__, (void *)opc,
2504 			PERFUSE_NODE_DATA(opc)->pnd_nodeid, fh);
2505 #endif
2506 
2507 	pnd->pnd_all_fd = NULL;
2508 	pnd->pnd_all_fd_len = 0;
2509 	fd_maxlen = ps->ps_max_readahead - sizeof(*foh);
2510 
2511 	do {
2512 		size_t fd_len;
2513 		char *afdp;
2514 
2515 		pm = ps->ps_new_msg(pu, opc, FUSE_READDIR, sizeof(*fri), pcr);
2516 
2517 		/*
2518 		 * read_flags, lock_owner and flags are unused in libfuse
2519 		 */
2520 		fri = GET_INPAYLOAD(ps, pm, fuse_read_in);
2521 		fri->fh = fh;
2522 		fri->offset = pnd->pnd_fd_cookie;
2523 		fri->size = (uint32_t)fd_maxlen;
2524 		fri->read_flags = 0;
2525 		fri->lock_owner = 0;
2526 		fri->flags = 0;
2527 
2528 		if ((error = xchg_msg(pu, opc, pm,
2529 				      UNSPEC_REPLY_LEN, wait_reply)) != 0)
2530 			goto out;
2531 
2532 		/*
2533 		 * There are many puffs_framebufs calls later,
2534 		 * therefore foh will not be valid for a long time.
2535 		 * Just get the length and forget it.
2536 		 */
2537 		foh = GET_OUTHDR(ps, pm);
2538 		foh_len = foh->len;
2539 
2540 		/*
2541 		 * Empty read: we reached the end of the buffer.
2542 		 */
2543 		if (foh_len == sizeof(*foh)) {
2544 			ps->ps_destroy_msg(pm);
2545 			*eofflag = 1;
2546 			break;
2547 		}
2548 
2549 		/*
2550 		 * Check for corrupted message.
2551 		 */
2552 		if (foh_len < sizeof(*foh) + sizeof(*fd)) {
2553 			ps->ps_destroy_msg(pm);
2554 			DWARNX("readdir reply too short");
2555 			error = EIO;
2556 			goto out;
2557 		}
2558 
2559 
2560 		fd = GET_OUTPAYLOAD(ps, pm, fuse_dirent);
2561 		fd_len = foh_len - sizeof(*foh);
2562 
2563 		pnd->pnd_all_fd = realloc(pnd->pnd_all_fd,
2564 					  pnd->pnd_all_fd_len + fd_len);
2565 		if (pnd->pnd_all_fd  == NULL)
2566 			DERR(EX_OSERR, "%s: malloc failed", __func__);
2567 
2568 		afdp = (char *)(void *)pnd->pnd_all_fd + pnd->pnd_all_fd_len;
2569 		(void)memcpy(afdp, fd, fd_len);
2570 
2571 		pnd->pnd_all_fd_len += fd_len;
2572 
2573 		/*
2574 		 * The fd->off field is used as a cookie for
2575 		 * resuming the next readdir() where this one was left.
2576 	 	 */
2577 		pnd->pnd_fd_cookie = readdir_last_cookie(fd, fd_len);
2578 
2579 		ps->ps_destroy_msg(pm);
2580 	} while (1 /* CONSTCOND */);
2581 
2582 	if (pnd->pnd_all_fd != NULL) {
2583 		if (fuse_to_dirent(pu, opc, pnd->pnd_all_fd,
2584 				   pnd->pnd_all_fd_len) == -1)
2585 			error = EIO;
2586 	}
2587 
2588 out:
2589 	if (pnd->pnd_all_fd != NULL) {
2590 		free(pnd->pnd_all_fd);
2591 		pnd->pnd_all_fd = NULL;
2592 		pnd->pnd_all_fd_len = 0;
2593 	}
2594 
2595 	if (error == 0)
2596 		readdir_buffered(opc, dent, readoff, reslen);
2597 
2598 	/*
2599 	 * Schedule queued readdir requests
2600 	 */
2601 	pnd->pnd_flags &= ~PND_INREADDIR;
2602 	(void)dequeue_requests(opc, PCQ_READDIR, DEQUEUE_ALL);
2603 
2604 #ifdef PERFUSE_DEBUG
2605 	if (perfuse_diagflags & PDF_READDIR)
2606 		DPRINTF("%s: READDIR opc = %p exit critical section\n",
2607 			__func__, (void *)opc);
2608 #endif
2609 
2610 	node_rele(opc);
2611 	return error;
2612 }
2613 
2614 int
2615 perfuse_node_readlink(struct puffs_usermount *pu, puffs_cookie_t opc,
2616 	const struct puffs_cred *pcr, char *linkname, size_t *linklen)
2617 {
2618 	struct perfuse_state *ps;
2619 	perfuse_msg_t *pm;
2620 	int error;
2621 	size_t len;
2622 	struct fuse_out_header *foh;
2623 
2624 	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
2625 		return ENOENT;
2626 
2627 	node_ref(opc);
2628 	ps = puffs_getspecific(pu);
2629 
2630 	pm = ps->ps_new_msg(pu, opc, FUSE_READLINK, 0, pcr);
2631 
2632 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2633 		goto out;
2634 
2635 	foh = GET_OUTHDR(ps, pm);
2636 	len = foh->len - sizeof(*foh);
2637 	if (len > *linklen)
2638 		DERRX(EX_PROTOCOL, "path len = %zd too long", len);
2639 	if (len == 0)
2640 		DERRX(EX_PROTOCOL, "path len = %zd too short", len);
2641 
2642 	/*
2643 	 * FUSE filesystems return a NUL terminated string, we
2644 	 * do not want to trailing \0
2645 	 */
2646 	*linklen = len - 1;
2647 	(void)memcpy(linkname, _GET_OUTPAYLOAD(ps, pm, char *), len);
2648 
2649 	ps->ps_destroy_msg(pm);
2650 	error = 0;
2651 
2652 out:
2653 	node_rele(opc);
2654 	return error;
2655 }
2656 
2657 int
2658 perfuse_node_reclaim(struct puffs_usermount *pu, puffs_cookie_t opc)
2659 {
2660 	struct perfuse_state *ps;
2661 	perfuse_msg_t *pm;
2662 	struct perfuse_node_data *pnd;
2663 	struct fuse_forget_in *ffi;
2664 	int nlookup;
2665 	struct timespec now;
2666 
2667 	if (opc == 0)
2668 		return 0;
2669 
2670 	ps = puffs_getspecific(pu);
2671 	pnd = PERFUSE_NODE_DATA(opc);
2672 
2673 	/*
2674 	 * Never forget the root.
2675 	 */
2676 	if (pnd->pnd_nodeid == FUSE_ROOT_ID)
2677 		return 0;
2678 
2679 	/*
2680 	 * There is a race condition between reclaim and lookup.
2681 	 * When looking up an already known node, the kernel cannot
2682 	 * hold a reference on the result until it gets the PUFFS
2683 	 * reply. It mayy therefore reclaim the node after the
2684 	 * userland looked it up, and before it gets the reply.
2685 	 * On rely, the kernel re-creates the node, but at that
2686 	 * time the node has been reclaimed in userland.
2687 	 *
2688 	 * In order to avoid this, we refuse reclaiming nodes that
2689 	 * are too young since the last lookup - and that we do
2690 	 * not have removed on our own, of course.
2691 	 */
2692 	if (clock_gettime(CLOCK_REALTIME, &now) != 0)
2693 		DERR(EX_OSERR, "clock_gettime failed");
2694 
2695 	if (timespeccmp(&pnd->pnd_cn_expire, &now, >) &&
2696 	    !(pnd->pnd_flags & PND_REMOVED)) {
2697 		if (!(pnd->pnd_flags & PND_NODELEAK)) {
2698 			ps->ps_nodeleakcount++;
2699 			pnd->pnd_flags |= PND_NODELEAK;
2700 		}
2701 		DWARNX("possible leaked node:: opc = %p \"%s\"",
2702 		       opc, pnd->pnd_name);
2703 		return 0;
2704 	}
2705 
2706 	node_ref(opc);
2707 	pnd->pnd_flags |= PND_RECLAIMED;
2708 	pnd->pnd_puffs_nlookup--;
2709 	nlookup = pnd->pnd_puffs_nlookup;
2710 
2711 #ifdef PERFUSE_DEBUG
2712 	if (perfuse_diagflags & PDF_RECLAIM)
2713 		DPRINTF("%s (nodeid %"PRId64") reclaimed\n",
2714 			perfuse_node_path(ps, opc), pnd->pnd_nodeid);
2715 #endif
2716 
2717 #ifdef PERFUSE_DEBUG
2718 	if (perfuse_diagflags & PDF_RECLAIM)
2719 		DPRINTF("%s (nodeid %"PRId64") is %sreclaimed, nlookup = %d "
2720 			"%s%s%s%s, pending ops:%s%s%s\n",
2721 		        perfuse_node_path(ps, opc), pnd->pnd_nodeid,
2722 		        pnd->pnd_flags & PND_RECLAIMED ? "" : "not ",
2723 			pnd->pnd_puffs_nlookup,
2724 			pnd->pnd_flags & PND_OPEN ? "open " : "not open",
2725 			pnd->pnd_flags & PND_RFH ? "r" : "",
2726 			pnd->pnd_flags & PND_WFH ? "w" : "",
2727 			pnd->pnd_flags & PND_BUSY ? "" : " none",
2728 			pnd->pnd_flags & PND_INREADDIR ? " readdir" : "",
2729 			pnd->pnd_flags & PND_INWRITE ? " write" : "",
2730 			pnd->pnd_flags & PND_INOPEN ? " open" : "");
2731 #endif
2732 	/*
2733 	 * Make sure it is not looked up again
2734 	 */
2735 	if (!(pnd->pnd_flags & PND_REMOVED))
2736 		perfuse_cache_flush(opc);
2737 
2738 	/*
2739 	 * Purge any activity on the node, while checking
2740 	 * that it remains eligible for a reclaim.
2741 	 */
2742 	while (pnd->pnd_ref > 1)
2743 		requeue_request(pu, opc, PCQ_REF);
2744 
2745 	/*
2746 	 * reclaim cancel?
2747 	 */
2748 	if (pnd->pnd_puffs_nlookup > nlookup) {
2749 		pnd->pnd_flags &= ~PND_RECLAIMED;
2750 		perfuse_node_cache(ps, opc);
2751 		node_rele(opc);
2752 		return 0;
2753 	}
2754 
2755 
2756 #ifdef PERFUSE_DEBUG
2757 	if ((pnd->pnd_flags & PND_OPEN) ||
2758 	       !TAILQ_EMPTY(&pnd->pnd_pcq))
2759 		DERRX(EX_SOFTWARE, "%s: opc = %p \"%s\": still open",
2760 		      __func__, opc, pnd->pnd_name);
2761 
2762 	if ((pnd->pnd_flags & PND_BUSY) ||
2763 	       !TAILQ_EMPTY(&pnd->pnd_pcq))
2764 		DERRX(EX_SOFTWARE, "%s: opc = %p: queued operations",
2765 		      __func__, opc);
2766 
2767 	if (pnd->pnd_inxchg != 0)
2768 		DERRX(EX_SOFTWARE, "%s: opc = %p: ongoing operations",
2769 		      __func__, opc);
2770 #endif
2771 
2772 	/*
2773 	 * Send the FORGET message
2774 	 *
2775 	 * ps_new_msg() is called with NULL creds, which will
2776 	 * be interpreted as FUSE superuser. This is obviously
2777 	 * fine since we operate with kernel creds here.
2778 	 */
2779 	pm = ps->ps_new_msg(pu, opc, FUSE_FORGET,
2780 		      sizeof(*ffi), NULL);
2781 	ffi = GET_INPAYLOAD(ps, pm, fuse_forget_in);
2782 	ffi->nlookup = pnd->pnd_fuse_nlookup;
2783 
2784 	/*
2785 	 * No reply is expected, pm is freed in xchg_msg
2786 	 */
2787 	(void)xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, no_reply);
2788 
2789 	perfuse_destroy_pn(pu, opc);
2790 
2791 	return 0;
2792 }
2793 
2794 int
2795 perfuse_node_inactive(struct puffs_usermount *pu, puffs_cookie_t opc)
2796 {
2797 	struct perfuse_node_data *pnd;
2798 	int error;
2799 
2800 	if (opc == 0)
2801 		return 0;
2802 
2803 	node_ref(opc);
2804 	pnd = PERFUSE_NODE_DATA(opc);
2805 
2806 	if (!(pnd->pnd_flags & (PND_OPEN|PND_REMOVED)))
2807 		goto out;
2808 
2809 	/*
2810 	 * Make sure all operation are finished
2811 	 * There can be an ongoing write. Other
2812 	 * operation wait for all data before
2813 	 * the close/inactive.
2814 	 */
2815 	while (pnd->pnd_flags & PND_INWRITE)
2816 		requeue_request(pu, opc, PCQ_AFTERWRITE);
2817 
2818 	/*
2819 	 * The inactive operation may be cancelled,
2820 	 * If no open is in progress, set PND_INOPEN
2821 	 * so that a new open will be queued.
2822 	 */
2823 	if (pnd->pnd_flags & PND_INOPEN)
2824 		goto out;
2825 
2826 	pnd->pnd_flags |= PND_INOPEN;
2827 
2828 	/*
2829 	 * Sync data
2830 	 */
2831 	if (pnd->pnd_flags & PND_DIRTY) {
2832 		if ((error = perfuse_node_fsync(pu, opc, NULL, 0, 0, 0)) != 0)
2833 			DWARN("%s: perfuse_node_fsync failed error = %d",
2834 			      __func__, error);
2835 	}
2836 
2837 
2838 	/*
2839 	 * Close handles
2840 	 */
2841 	if (pnd->pnd_flags & PND_WFH) {
2842 		if ((error = perfuse_node_close_common(pu, opc, FWRITE)) != 0)
2843 			DWARN("%s: close write FH failed error = %d",
2844 			      __func__, error);
2845 	}
2846 
2847 	if (pnd->pnd_flags & PND_RFH) {
2848 		if ((error = perfuse_node_close_common(pu, opc, FREAD)) != 0)
2849 			DWARN("%s: close read FH failed error = %d",
2850 			      __func__, error);
2851 	}
2852 
2853 	/*
2854 	 * This will cause a reclaim to be sent
2855 	 */
2856 	if (pnd->pnd_flags & PND_REMOVED)
2857 		puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N1);
2858 
2859 	/*
2860 	 * Schedule awaiting operations
2861 	 */
2862 	pnd->pnd_flags &= ~PND_INOPEN;
2863 	(void)dequeue_requests(opc, PCQ_OPEN, DEQUEUE_ALL);
2864 
2865 	/*
2866 	 * errors are ignored, since the kernel ignores the return code.
2867 	 */
2868 out:
2869 	node_rele(opc);
2870 	return 0;
2871 }
2872 
2873 
2874 /* ARGSUSED0 */
2875 int
2876 perfuse_node_print(struct puffs_usermount *pu, puffs_cookie_t opc)
2877 {
2878 	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
2879 	return 0;
2880 }
2881 
2882 /* ARGSUSED0 */
2883 int
2884 perfuse_node_pathconf(struct puffs_usermount *pu, puffs_cookie_t opc,
2885 	int name, int *retval)
2886 {
2887 	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
2888 	return 0;
2889 }
2890 
2891 int
2892 perfuse_node_advlock(struct puffs_usermount *pu, puffs_cookie_t opc,
2893 	void *id, int op, struct flock *fl, int flags)
2894 {
2895 	struct perfuse_state *ps;
2896 	int fop;
2897 	perfuse_msg_t *pm;
2898 	uint64_t fh;
2899 	struct fuse_lk_in *fli;
2900 	struct fuse_out_header *foh;
2901 	struct fuse_lk_out *flo;
2902 	uint32_t owner;
2903 	size_t len;
2904 	int error;
2905 
2906 	node_ref(opc);
2907 
2908 	/*
2909 	 * Make sure we do have a filehandle, as the FUSE filesystem
2910 	 * expect one. E.g.: if we provide none, GlusterFS logs an error
2911 	 * "0-glusterfs-fuse: xl is NULL"
2912 	 *
2913 	 * We need the read file handle if the file is open read only,
2914 	 * in order to support shared locks on read-only files.
2915 	 * NB: The kernel always sends advlock for read-only
2916 	 * files at exit time when the process used lock, see
2917 	 * sys_exit -> exit1 -> fd_free -> fd_close -> VOP_ADVLOCK
2918 	 */
2919 	if ((fh = perfuse_get_fh(opc, FREAD)) == FUSE_UNKNOWN_FH) {
2920 		error = EBADF;
2921 		goto out;
2922 	}
2923 
2924 	ps = puffs_getspecific(pu);
2925 
2926 	if (op == F_GETLK)
2927 		fop = FUSE_GETLK;
2928 	else
2929 		fop = (flags & F_WAIT) ? FUSE_SETLKW : FUSE_SETLK;
2930 
2931 	/*
2932 	 * XXX ps_new_msg() is called with NULL creds, which will
2933 	 * be interpreted as FUSE superuser. We have no way to
2934 	 * know the requesting process' credential, but since advlock()
2935 	 * is supposed to operate on a file that has been open(),
2936 	 * permission should have already been checked at open() time.
2937 	 */
2938 	pm = ps->ps_new_msg(pu, opc, fop, sizeof(*fli), NULL);
2939 	fli = GET_INPAYLOAD(ps, pm, fuse_lk_in);
2940 	fli->fh = fh;
2941 	fli->owner = (uint64_t)(vaddr_t)id;
2942 	fli->lk.start = fl->l_start;
2943 	fli->lk.end = fl->l_start + fl->l_len;
2944 	fli->lk.type = fl->l_type;
2945 	fli->lk.pid = fl->l_pid;
2946 	fli->lk_flags = (flags & F_FLOCK) ? FUSE_LK_FLOCK : 0;
2947 
2948 	owner = (uint32_t)(vaddr_t)id;
2949 
2950 #ifdef PERFUSE_DEBUG
2951 	if (perfuse_diagflags & PDF_FH)
2952 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", fh = 0x%"PRIx64"\n",
2953 			__func__, (void *)opc,
2954 			PERFUSE_NODE_DATA(opc)->pnd_nodeid, fli->fh);
2955 #endif
2956 
2957 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2958 		goto out;
2959 
2960 	foh = GET_OUTHDR(ps, pm);
2961 	len = foh->len - sizeof(*foh);
2962 
2963 	/*
2964 	 * Save or clear the lock
2965 	 */
2966 	switch (op) {
2967 	case F_GETLK:
2968 		if (len != sizeof(*flo))
2969 			DERRX(EX_SOFTWARE,
2970 			      "%s: Unexpected lock reply len %zd",
2971 			      __func__, len);
2972 
2973 		flo = GET_OUTPAYLOAD(ps, pm, fuse_lk_out);
2974 		fl->l_start = flo->lk.start;
2975 		fl->l_len = flo->lk.end - flo->lk.start;
2976 		fl->l_pid = flo->lk.pid;
2977 		fl->l_type = flo->lk.type;
2978 		fl->l_whence = SEEK_SET;	/* libfuse hardcodes it */
2979 
2980 		PERFUSE_NODE_DATA(opc)->pnd_lock_owner = flo->lk.pid;
2981 		break;
2982 	case F_UNLCK:
2983 		owner = 0;
2984 		/* FALLTHROUGH */
2985 	case F_SETLK:
2986 		/* FALLTHROUGH */
2987 	case F_SETLKW:
2988 		if (error != 0)
2989 			PERFUSE_NODE_DATA(opc)->pnd_lock_owner = owner;
2990 
2991 		if (len != 0)
2992 			DERRX(EX_SOFTWARE,
2993 			      "%s: Unexpected unlock reply len %zd",
2994 			      __func__, len);
2995 
2996 		break;
2997 	default:
2998 		DERRX(EX_SOFTWARE, "%s: Unexpected op %d", __func__, op);
2999 		break;
3000 	}
3001 
3002 	ps->ps_destroy_msg(pm);
3003 	error = 0;
3004 
3005 out:
3006 	node_rele(opc);
3007 	return error;
3008 }
3009 
3010 int
3011 perfuse_node_read(struct puffs_usermount *pu, puffs_cookie_t opc, uint8_t *buf,
3012 	off_t offset, size_t *resid, const struct puffs_cred *pcr, int ioflag)
3013 {
3014 	struct perfuse_state *ps;
3015 	struct perfuse_node_data *pnd;
3016 	const struct vattr *vap;
3017 	perfuse_msg_t *pm;
3018 	struct fuse_read_in *fri;
3019 	struct fuse_out_header *foh;
3020 	size_t readen;
3021 	int error;
3022 
3023 	ps = puffs_getspecific(pu);
3024 	pnd = PERFUSE_NODE_DATA(opc);
3025 	vap = puffs_pn_getvap((struct puffs_node *)opc);
3026 
3027 	/*
3028 	 * NetBSD turns that into a getdents(2) output
3029 	 * We just do a EISDIR as this feature is of little use.
3030 	 */
3031 	if (vap->va_type == VDIR)
3032 		return EISDIR;
3033 
3034 	if ((u_quad_t)offset + *resid > vap->va_size)
3035 		DWARNX("%s %p read %lld@%zu beyond EOF %" PRIu64 "\n",
3036 		       __func__, (void *)opc, (long long)offset,
3037 		       *resid, vap->va_size);
3038 
3039 	do {
3040 		size_t max_read;
3041 
3042 		max_read = ps->ps_max_readahead - sizeof(*foh);
3043 		/*
3044 		 * flags may be set to FUSE_READ_LOCKOWNER
3045 		 * if lock_owner is provided.
3046 		 */
3047 		pm = ps->ps_new_msg(pu, opc, FUSE_READ, sizeof(*fri), pcr);
3048 		fri = GET_INPAYLOAD(ps, pm, fuse_read_in);
3049 		fri->fh = perfuse_get_fh(opc, FREAD);
3050 		fri->offset = offset;
3051 		fri->size = (uint32_t)MIN(*resid, max_read);
3052 		fri->read_flags = 0; /* XXX Unused by libfuse? */
3053 		fri->lock_owner = pnd->pnd_lock_owner;
3054 		fri->flags = 0;
3055 		fri->flags |= (fri->lock_owner != 0) ? FUSE_READ_LOCKOWNER : 0;
3056 
3057 #ifdef PERFUSE_DEBUG
3058 	if (perfuse_diagflags & PDF_FH)
3059 		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", fh = 0x%"PRIx64"\n",
3060 			__func__, (void *)opc, pnd->pnd_nodeid, fri->fh);
3061 #endif
3062 		error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply);
3063 		if (error  != 0)
3064 			return error;
3065 
3066 		foh = GET_OUTHDR(ps, pm);
3067 		readen = foh->len - sizeof(*foh);
3068 
3069 #ifdef PERFUSE_DEBUG
3070 		if (readen > *resid)
3071 			DERRX(EX_SOFTWARE, "%s: Unexpected big read %zd",
3072 			      __func__, readen);
3073 #endif
3074 
3075 		(void)memcpy(buf,  _GET_OUTPAYLOAD(ps, pm, char *), readen);
3076 
3077 		buf += readen;
3078 		offset += readen;
3079 		*resid -= readen;
3080 
3081 		ps->ps_destroy_msg(pm);
3082 	} while ((*resid != 0) && (readen != 0));
3083 
3084 	if (ioflag & (IO_SYNC|IO_DSYNC))
3085 		ps->ps_syncreads++;
3086 	else
3087 		ps->ps_asyncreads++;
3088 
3089 	return 0;
3090 }
3091 
3092 int
3093 perfuse_node_write(struct puffs_usermount *pu, puffs_cookie_t opc,
3094 	uint8_t *buf, off_t offset, size_t *resid,
3095 	const struct puffs_cred *pcr, int ioflag)
3096 {
3097 	return perfuse_node_write2(pu, opc, buf, offset, resid, pcr, ioflag, 0);
3098 }
3099 
3100 /* ARGSUSED7 */
3101 int
3102 perfuse_node_write2(struct puffs_usermount *pu, puffs_cookie_t opc,
3103 	uint8_t *buf, off_t offset, size_t *resid,
3104 	const struct puffs_cred *pcr, int ioflag, int xflag)
3105 {
3106 	struct perfuse_state *ps;
3107 	struct perfuse_node_data *pnd;
3108 	struct vattr *vap;
3109 	perfuse_msg_t *pm;
3110 	struct fuse_write_in *fwi;
3111 	struct fuse_write_out *fwo;
3112 	size_t data_len;
3113 	size_t payload_len;
3114 	size_t written;
3115 	int inresize;
3116 	int error;
3117 
3118 	ps = puffs_getspecific(pu);
3119 	pnd = PERFUSE_NODE_DATA(opc);
3120 	vap = puffs_pn_getvap((struct puffs_node *)opc);
3121 	written = 0;
3122 	inresize = 0;
3123 	error = 0;
3124 
3125 	if (vap->va_type == VDIR)
3126 		return EISDIR;
3127 
3128 	node_ref(opc);
3129 
3130 	/*
3131 	 * We need to queue write requests in order to avoid
3132 	 * dequeueing PCQ_AFTERWRITE when there are pending writes.
3133 	 */
3134 	while (pnd->pnd_flags & PND_INWRITE)
3135 		requeue_request(pu, opc, PCQ_WRITE);
3136 	pnd->pnd_flags |= PND_INWRITE;
3137 
3138 	/*
3139 	 * Serialize size access, see comment in perfuse_node_setattr().
3140 	 */
3141 	if ((u_quad_t)offset + *resid > vap->va_size) {
3142 		while (pnd->pnd_flags & PND_INRESIZE)
3143 			requeue_request(pu, opc, PCQ_RESIZE);
3144 		pnd->pnd_flags |= PND_INRESIZE;
3145 		inresize = 1;
3146 	}
3147 
3148 	/*
3149 	 * append flag: re-read the file size so that
3150 	 * we get the latest value.
3151 	 */
3152 	if (ioflag & PUFFS_IO_APPEND) {
3153 		DWARNX("%s: PUFFS_IO_APPEND set, untested code", __func__);
3154 
3155 		if ((error = perfuse_node_getattr(pu, opc, vap, pcr)) != 0)
3156 			goto out;
3157 
3158 		offset = vap->va_size;
3159 	}
3160 
3161 #ifdef PERFUSE_DEBUG
3162 	if (perfuse_diagflags & PDF_RESIZE)
3163 		DPRINTF(">> %s %p %" PRIu64 "\n", __func__,
3164 			(void *)opc, vap->va_size);
3165 #endif
3166 
3167 	do {
3168 		size_t max_write;
3169 		/*
3170 		 * There is a writepage flag when data
3171 		 * is aligned to page size. Use it for
3172 		 * everything but the data after the last
3173 		 * page boundary.
3174 		 */
3175 		max_write = ps->ps_max_write - sizeof(*fwi);
3176 
3177 		data_len = MIN(*resid, max_write);
3178 		if (data_len > (size_t)sysconf(_SC_PAGESIZE))
3179 			data_len = data_len & ~(sysconf(_SC_PAGESIZE) - 1);
3180 
3181 		payload_len = data_len + sizeof(*fwi);
3182 
3183 		/*
3184 		 * flags may be set to FUSE_WRITE_CACHE (XXX usage?)
3185 		 * or FUSE_WRITE_LOCKOWNER, if lock_owner is provided.
3186 		 * write_flags is set to 1 for writepage.
3187 		 */
3188 		pm = ps->ps_new_msg(pu, opc, FUSE_WRITE, payload_len, pcr);
3189 		fwi = GET_INPAYLOAD(ps, pm, fuse_write_in);
3190 		fwi->fh = perfuse_get_fh(opc, FWRITE);
3191 		fwi->offset = offset;
3192 		fwi->size = (uint32_t)data_len;
3193 		fwi->write_flags = (fwi->size % sysconf(_SC_PAGESIZE)) ? 0 : 1;
3194 		fwi->lock_owner = pnd->pnd_lock_owner;
3195 		fwi->flags = 0;
3196 		fwi->flags |= (fwi->lock_owner != 0) ? FUSE_WRITE_LOCKOWNER : 0;
3197 		fwi->flags |= (ioflag & IO_DIRECT) ? 0 : FUSE_WRITE_CACHE;
3198 		(void)memcpy((fwi + 1), buf, data_len);
3199 
3200 
3201 #ifdef PERFUSE_DEBUG
3202 		if (perfuse_diagflags & PDF_FH)
3203 			DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", "
3204 				"fh = 0x%"PRIx64"\n", __func__,
3205 				(void *)opc, pnd->pnd_nodeid, fwi->fh);
3206 #endif
3207 		if ((error = xchg_msg(pu, opc, pm,
3208 				      sizeof(*fwo), wait_reply)) != 0)
3209 			goto out;
3210 
3211 		fwo = GET_OUTPAYLOAD(ps, pm, fuse_write_out);
3212 		written = fwo->size;
3213 		ps->ps_destroy_msg(pm);
3214 
3215 #ifdef PERFUSE_DEBUG
3216 		if (written > *resid)
3217 			DERRX(EX_SOFTWARE, "%s: Unexpected big write %zd",
3218 			      __func__, written);
3219 #endif
3220 		*resid -= written;
3221 		offset += written;
3222 		buf += written;
3223 
3224 	} while (*resid != 0);
3225 
3226 	/*
3227 	 * puffs_ops(3) says
3228 	 *  "everything must be written or an error will be generated"
3229 	 */
3230 	if (*resid != 0)
3231 		error = EFBIG;
3232 
3233 #ifdef PERFUSE_DEBUG
3234 	if (perfuse_diagflags & PDF_RESIZE) {
3235 		if (offset > (off_t)vap->va_size)
3236 			DPRINTF("<< %s %p %" PRIu64 " -> %lld\n", __func__,
3237 				(void *)opc, vap->va_size, (long long)offset);
3238 		else
3239 			DPRINTF("<< %s %p \n", __func__, (void *)opc);
3240 	}
3241 #endif
3242 
3243 	/*
3244 	 * Update file size if we wrote beyond the end
3245 	 */
3246 	if (offset > (off_t)vap->va_size)
3247 		vap->va_size = offset;
3248 
3249 	if (inresize) {
3250 #ifdef PERFUSE_DEBUG
3251 		if (!(pnd->pnd_flags & PND_INRESIZE))
3252 			DERRX(EX_SOFTWARE, "file write grow without resize");
3253 #endif
3254 		pnd->pnd_flags &= ~PND_INRESIZE;
3255 		(void)dequeue_requests(opc, PCQ_RESIZE, DEQUEUE_ALL);
3256 	}
3257 
3258 
3259 	/*
3260 	 * Statistics
3261 	 */
3262 	if (ioflag & (IO_SYNC|IO_DSYNC))
3263 		ps->ps_syncwrites++;
3264 	else
3265 		ps->ps_asyncwrites++;
3266 
3267 	/*
3268 	 * Remember to sync the file
3269 	 */
3270 	pnd->pnd_flags |= PND_DIRTY;
3271 
3272 #ifdef PERFUSE_DEBUG
3273 	if (perfuse_diagflags & PDF_SYNC)
3274 		DPRINTF("%s: DIRTY opc = %p, file = \"%s\"\n",
3275 			__func__, (void*)opc, perfuse_node_path(ps, opc));
3276 #endif
3277 
3278 out:
3279 	/*
3280 	 * VOP_PUTPAGE causes FAF write where kernel does not
3281 	 * check operation result. At least warn if it failed.
3282 	 */
3283 #ifdef PUFFS_WRITE_FAF
3284 	if (error && (xflag & PUFFS_WRITE_FAF))
3285 		DWARN("Data loss caused by FAF write failed on \"%s\"",
3286 		      pnd->pnd_name);
3287 #endif /* PUFFS_WRITE_FAF */
3288 
3289 	/*
3290 	 * If there are no more queued write, we can resume
3291 	 * an operation awaiting write completion.
3292 	 */
3293 	pnd->pnd_flags &= ~PND_INWRITE;
3294 	if (dequeue_requests(opc, PCQ_WRITE, 1) == 0)
3295 		(void)dequeue_requests(opc, PCQ_AFTERWRITE, DEQUEUE_ALL);
3296 
3297 	node_rele(opc);
3298 	return error;
3299 }
3300 
3301 /* ARGSUSED0 */
3302 void
3303 perfuse_cache_write(struct puffs_usermount *pu, puffs_cookie_t opc, size_t size,
3304 	struct puffs_cacherun *runs)
3305 {
3306 	return;
3307 }
3308 
3309 /* ARGSUSED4 */
3310 int
3311 perfuse_node_getextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
3312 	int attrns, const char *attrname, size_t *attrsize, uint8_t *attr,
3313 	size_t *resid, const struct puffs_cred *pcr)
3314 {
3315 	struct perfuse_state *ps;
3316 	char fuse_attrname[LINUX_XATTR_NAME_MAX + 1];
3317 	perfuse_msg_t *pm;
3318 	struct fuse_getxattr_in *fgi;
3319 	struct fuse_getxattr_out *fgo;
3320 	struct fuse_out_header *foh;
3321 	size_t attrnamelen;
3322 	size_t len;
3323 	char *np;
3324 	int error;
3325 
3326 	node_ref(opc);
3327 	ps = puffs_getspecific(pu);
3328 	attrname = perfuse_native_ns(attrns, attrname, fuse_attrname);
3329 	attrnamelen = strlen(attrname) + 1;
3330 	len = sizeof(*fgi) + attrnamelen;
3331 
3332 	pm = ps->ps_new_msg(pu, opc, FUSE_GETXATTR, len, pcr);
3333 	fgi = GET_INPAYLOAD(ps, pm, fuse_getxattr_in);
3334 	fgi->size = (unsigned int)((resid != NULL) ? *resid : 0);
3335 	np = (char *)(void *)(fgi + 1);
3336 	(void)strlcpy(np, attrname, attrnamelen);
3337 
3338 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
3339 		goto out;
3340 
3341 	/*
3342 	 * We just get fuse_getattr_out with list size if we requested
3343 	 * a null size.
3344 	 */
3345 	if (resid == NULL) {
3346 		fgo = GET_OUTPAYLOAD(ps, pm, fuse_getxattr_out);
3347 
3348 		if (attrsize != NULL)
3349 			*attrsize = fgo->size;
3350 
3351 		ps->ps_destroy_msg(pm);
3352 		error = 0;
3353 		goto out;
3354 	}
3355 
3356 	/*
3357 	 * And with a non null requested size, we get the list just
3358 	 * after the header
3359 	 */
3360 	foh = GET_OUTHDR(ps, pm);
3361 	np = (char *)(void *)(foh + 1);
3362 
3363 	if (resid != NULL) {
3364 		len = MAX(foh->len - sizeof(*foh), *resid);
3365 		(void)memcpy(attr, np, len);
3366 		*resid -= len;
3367 	}
3368 
3369 	ps->ps_destroy_msg(pm);
3370 	error = 0;
3371 
3372 out:
3373 	node_rele(opc);
3374 	return error;
3375 }
3376 
3377 int
3378 perfuse_node_setextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
3379 	int attrns, const char *attrname, uint8_t *attr, size_t *resid,
3380 	const struct puffs_cred *pcr)
3381 {
3382 	struct perfuse_state *ps;
3383 	char fuse_attrname[LINUX_XATTR_NAME_MAX + 1];
3384 	perfuse_msg_t *pm;
3385 	struct fuse_setxattr_in *fsi;
3386 	size_t attrnamelen;
3387 	size_t len;
3388 	char *np;
3389 	int error;
3390 
3391 	node_ref(opc);
3392 	ps = puffs_getspecific(pu);
3393 	attrname = perfuse_native_ns(attrns, attrname, fuse_attrname);
3394 	attrnamelen = strlen(attrname) + 1;
3395 	len = sizeof(*fsi) + attrnamelen + *resid;
3396 
3397 	pm = ps->ps_new_msg(pu, opc, FUSE_SETXATTR, len, pcr);
3398 	fsi = GET_INPAYLOAD(ps, pm, fuse_setxattr_in);
3399 	fsi->size = (unsigned int)*resid;
3400 	fsi->flags = 0;
3401 	np = (char *)(void *)(fsi + 1);
3402 	(void)strlcpy(np, attrname, attrnamelen);
3403 	np += attrnamelen;
3404 	(void)memcpy(np, (char *)attr, *resid);
3405 
3406 	if ((error = xchg_msg(pu, opc, pm,
3407 			      NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0)
3408 		goto out;
3409 
3410 	ps->ps_destroy_msg(pm);
3411 	*resid = 0;
3412 	error = 0;
3413 
3414 out:
3415 	node_rele(opc);
3416 	return error;
3417 }
3418 
3419 /* ARGSUSED2 */
3420 int
3421 perfuse_node_listextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
3422 	int attrns, size_t *attrsize, uint8_t *attrs, size_t *resid, int flag,
3423 	const struct puffs_cred *pcr)
3424 {
3425 	struct perfuse_state *ps;
3426 	perfuse_msg_t *pm;
3427 	struct fuse_getxattr_in *fgi;
3428 	struct fuse_getxattr_out *fgo;
3429 	struct fuse_out_header *foh;
3430 	char *np;
3431 	size_t len, puffs_len;
3432 	int error;
3433 
3434 	node_ref(opc);
3435 
3436 	ps = puffs_getspecific(pu);
3437 	len = sizeof(*fgi);
3438 
3439 	pm = ps->ps_new_msg(pu, opc, FUSE_LISTXATTR, len, pcr);
3440 	fgi = GET_INPAYLOAD(ps, pm, fuse_getxattr_in);
3441 	if (resid != NULL)
3442 		fgi->size = (unsigned int)*resid;
3443 	else
3444 		fgi->size = 0;
3445 
3446 	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
3447 		goto out;
3448 
3449 	/*
3450 	 * We just get fuse_getattr_out with list size if we requested
3451 	 * a null size.
3452 	 */
3453 	if (resid == NULL) {
3454 		fgo = GET_OUTPAYLOAD(ps, pm, fuse_getxattr_out);
3455 
3456 		if (attrsize != NULL)
3457 			*attrsize = fgo->size;
3458 
3459 		ps->ps_destroy_msg(pm);
3460 
3461 		error = 0;
3462 		goto out;
3463 	}
3464 
3465 	/*
3466 	 * And with a non null requested size, we get the list just
3467 	 * after the header
3468 	 */
3469 	foh = GET_OUTHDR(ps, pm);
3470 	np = (char *)(void *)(foh + 1);
3471 	puffs_len = foh->len - sizeof(*foh);
3472 
3473 	if (attrs != NULL) {
3474 #ifdef PUFFS_EXTATTR_LIST_LENPREFIX
3475 		/*
3476 		 * Convert the FUSE reply to length prefixed strings
3477 		 * if this is what the kernel wants.
3478 		 */
3479 		if (flag & PUFFS_EXTATTR_LIST_LENPREFIX) {
3480 			size_t i, attrlen;
3481 
3482 			for (i = 0; i < puffs_len; i += attrlen + 1) {
3483 				attrlen = strlen(np + i);
3484 				(void)memmove(np + i + 1, np + i, attrlen);
3485 				*(np + i) = (uint8_t)attrlen;
3486 			}
3487 		}
3488 #endif /* PUFFS_EXTATTR_LIST_LENPREFIX */
3489 		(void)memcpy(attrs, np, puffs_len);
3490 		*resid -= puffs_len;
3491 	}
3492 
3493 	if (attrsize != NULL)
3494 		*attrsize = puffs_len;
3495 
3496 	ps->ps_destroy_msg(pm);
3497 	error = 0;
3498 
3499 out:
3500 	node_rele(opc);
3501 	return error;
3502 }
3503 
3504 int
3505 perfuse_node_deleteextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
3506 	int attrns, const char *attrname, const struct puffs_cred *pcr)
3507 {
3508 	struct perfuse_state *ps;
3509 	char fuse_attrname[LINUX_XATTR_NAME_MAX + 1];
3510 	perfuse_msg_t *pm;
3511 	size_t attrnamelen;
3512 	char *np;
3513 	int error;
3514 
3515 	node_ref(opc);
3516 
3517 	ps = puffs_getspecific(pu);
3518 	attrname = perfuse_native_ns(attrns, attrname, fuse_attrname);
3519 	attrnamelen = strlen(attrname) + 1;
3520 
3521 	pm = ps->ps_new_msg(pu, opc, FUSE_REMOVEXATTR, attrnamelen, pcr);
3522 	np = _GET_INPAYLOAD(ps, pm, char *);
3523 	(void)strlcpy(np, attrname, attrnamelen);
3524 
3525 	error = xchg_msg(pu, opc, pm, NO_PAYLOAD_REPLY_LEN, wait_reply);
3526 
3527 	ps->ps_destroy_msg(pm);
3528 
3529 	node_rele(opc);
3530 	return error;
3531 }
3532