xref: /netbsd-src/share/examples/puffs/pgfs/pgfs_puffs.c (revision b757af438b42b93f8c6571f026d8b8ef3eaf5fc9)
1 /*	$NetBSD: pgfs_puffs.c,v 1.1 2011/10/12 01:05:00 yamt Exp $	*/
2 
3 /*-
4  * Copyright (c)2010,2011 YAMAMOTO Takashi,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * puffs node ops and fs ops.
31  */
32 
33 #include <sys/cdefs.h>
34 #ifndef lint
35 __RCSID("$NetBSD: pgfs_puffs.c,v 1.1 2011/10/12 01:05:00 yamt Exp $");
36 #endif /* not lint */
37 
38 #include <assert.h>
39 #include <err.h>
40 #include <errno.h>
41 #include <puffs.h>
42 #include <inttypes.h>
43 #include <stdarg.h>
44 #include <stdbool.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <util.h>
49 
50 #include <libpq-fe.h>
51 #include <libpq/libpq-fs.h>	/* INV_* */
52 
53 #include "pgfs.h"
54 #include "pgfs_db.h"
55 #include "pgfs_subs.h"
56 #include "pgfs_debug.h"
57 
58 static fileid_t
59 cookie_to_fileid(puffs_cookie_t cookie)
60 {
61 
62 	return (fileid_t)(uintptr_t)cookie;
63 }
64 
65 static puffs_cookie_t
66 fileid_to_cookie(fileid_t id)
67 {
68 	puffs_cookie_t cookie = (puffs_cookie_t)(uintptr_t)id;
69 
70 	/* XXX not true for 32-bit ports */
71 	assert(cookie_to_fileid(cookie) == id);
72 	return cookie;
73 }
74 
75 puffs_cookie_t
76 pgfs_root_cookie(void)
77 {
78 
79 	return fileid_to_cookie(PGFS_ROOT_FILEID);
80 }
81 
82 int
83 pgfs_node_getattr(struct puffs_usermount *pu, puffs_cookie_t opc,
84     struct vattr *va, const struct puffs_cred *pcr)
85 {
86 	struct Xconn *xc;
87 	struct fileid_lock_handle *lock;
88 	fileid_t fileid = cookie_to_fileid(opc);
89 	int error;
90 
91 	DPRINTF("%llu\n", fileid);
92 	lock = fileid_lock(fileid, puffs_cc_getcc(pu));
93 retry:
94 	xc = begin_readonly(pu);
95 	error = getattr(xc, fileid, va, GETATTR_ALL);
96 	if (error != 0) {
97 		goto got_error;
98 	}
99 	error = commit(xc);
100 	if (error != 0) {
101 		goto got_error;
102 	}
103 	goto done;
104 got_error:
105 	rollback(xc);
106 	if (error == EAGAIN) {
107 		goto retry;
108 	}
109 done:
110 	fileid_unlock(lock);
111 	return error;
112 }
113 
114 #define	PGFS_DIRCOOKIE_DOT	0	/* . entry */
115 #define	PGFS_DIRCOOKIE_DOTDOT	1	/* .. entry */
116 #define	PGFS_DIRCOOKIE_EOD	2	/* end of directory */
117 
118 int
119 pgfs_node_readdir(struct puffs_usermount *pu, puffs_cookie_t opc,
120     struct dirent *dent, off_t *readoff, size_t *reslen,
121     const struct puffs_cred *pcr, int *eofflag, off_t *cookies,
122     size_t *ncookies)
123 {
124 	fileid_t parent_fileid;
125 	fileid_t child_fileid;
126 	uint64_t cookie;
127 	uint64_t nextcookie;
128 	uint64_t offset;
129 	struct Xconn *xc = NULL;
130 	static const Oid types[] = {
131 		TEXTOID,	/* name */
132 		INT8OID,	/* cookie */
133 		INT8OID,	/* nextcookie */
134 		INT8OID,	/* child_fileid */
135 	};
136 	const char *name;
137 	char *nametofree = NULL;
138 	struct fetchstatus s;
139 	int error;
140 	bool fetching;
141 	bool bufferfull;
142 
143 	parent_fileid = cookie_to_fileid(opc);
144 	offset = *readoff;
145 	DPRINTF("%llu %" PRIu64 "\n", parent_fileid, offset);
146 	*ncookies = 0;
147 	fetching = false;
148 next:
149 	if (offset == PGFS_DIRCOOKIE_DOT) {
150 		name = ".";
151 		child_fileid = parent_fileid;
152 		cookie = offset;
153 		nextcookie = PGFS_DIRCOOKIE_DOTDOT;
154 		goto store_and_next;
155 	}
156 	if (offset == PGFS_DIRCOOKIE_DOTDOT) {
157 		if (parent_fileid != PGFS_ROOT_FILEID) {
158 			if (xc == NULL) {
159 				xc = begin(pu);
160 			}
161 			error = lookupp(xc, parent_fileid, &child_fileid);
162 			if (error != 0) {
163 				rollback(xc);
164 				return error;
165 			}
166 		} else {
167 			child_fileid = parent_fileid;
168 		}
169 		name = "..";
170 		cookie = offset;
171 		nextcookie = PGFS_DIRCOOKIE_EOD + 1;
172 		goto store_and_next;
173 	}
174 	if (offset == PGFS_DIRCOOKIE_EOD) {
175 		*eofflag = 1;
176 		goto done;
177 	}
178 	/* offset > PGFS_DIRCOOKIE_EOD; normal entries */
179 	if (xc == NULL) {
180 		xc = begin(pu);
181 	}
182 	if (!fetching) {
183 		static struct cmd *c;
184 
185 		/*
186 		 * a simpler query like "ORDER BY name OFFSET :offset - 3"
187 		 * would work well for most of cases.  however, it doesn't for
188 		 * applications which expect readdir cookies are kept valid
189 		 * even after unlink of other entries in the directory.
190 		 * eg. cvs, bonnie++
191 		 *
192 		 * 2::int8 == PGFS_DIRCOOKIE_EOD
193 		 */
194 		CREATECMD(c,
195 			"SELECT name, cookie, "
196 			"lead(cookie, 1, 2::int8) OVER (ORDER BY cookie), "
197 			"child_fileid "
198 			"FROM dirent "
199 			"WHERE parent_fileid = $1 "
200 			"AND cookie >= $2 "
201 			"ORDER BY cookie", INT8OID, INT8OID);
202 		error = sendcmd(xc, c, parent_fileid, offset);
203 		if (error != 0) {
204 			rollback(xc);
205 			return error;
206 		}
207 		fetching = true;
208 		fetchinit(&s, xc);
209 	}
210 	/*
211 	 * fetch and process an entry
212 	 */
213 	error = FETCHNEXT(&s, types, &nametofree, &cookie, &nextcookie,
214 	    &child_fileid);
215 	if (error == ENOENT) {
216 		DPRINTF("ENOENT\n");
217 		if (offset == PGFS_DIRCOOKIE_EOD + 1) {
218 			DPRINTF("empty directory\n");
219 			*eofflag = 1;
220 			goto done;
221 		}
222 		fetchdone(&s);
223 		rollback(xc);
224 		return EINVAL;
225 	}
226 	if (error != 0) {
227 		DPRINTF("error %d\n", error);
228 		fetchdone(&s);
229 		rollback(xc);
230 		return error;
231 	}
232 	if (offset != cookie && offset != PGFS_DIRCOOKIE_EOD + 1) {
233 		free(nametofree);
234 		fetchdone(&s);
235 		rollback(xc);
236 		return EINVAL;
237 	}
238 	name = nametofree;
239 store_and_next:
240 	/*
241 	 * store an entry and continue processing unless the result buffer
242 	 * is full.
243 	 */
244 	bufferfull = !puffs_nextdent(&dent, name, child_fileid, DT_UNKNOWN,
245 	    reslen);
246 	free(nametofree);
247 	nametofree = NULL;
248 	if (bufferfull) {
249 		*eofflag = 0;
250 		goto done;
251 	}
252 	PUFFS_STORE_DCOOKIE(cookies, ncookies, cookie);
253 	offset = nextcookie;
254 	*readoff = offset;
255 	goto next;
256 done:
257 	/*
258 	 * cleanup and update atime of the directory.
259 	 */
260 	assert(nametofree == NULL);
261 	if (fetching) {
262 		fetchdone(&s);
263 		fetching = false;
264 	}
265 	if (xc == NULL) {
266 retry:
267 		xc = begin(pu);
268 	}
269 	error = update_atime(xc, parent_fileid);
270 	if (error != 0) {
271 		goto got_error;
272 	}
273 	error = commit(xc);
274 	if (error != 0) {
275 		goto got_error;
276 	}
277 	return 0;
278 got_error:
279 	rollback(xc);
280 	if (error == EAGAIN) {
281 		goto retry;
282 	}
283 	return error;
284 }
285 
286 int
287 pgfs_node_lookup(struct puffs_usermount *pu, puffs_cookie_t opc,
288     struct puffs_newinfo *pni, const struct puffs_cn *pcn)
289 {
290 	struct vattr dva;
291 	struct vattr cva;
292 	struct puffs_cred * const pcr = pcn->pcn_cred;
293 	fileid_t parent_fileid;
294 	const char *name;
295 	fileid_t child_fileid;
296 	struct Xconn *xc;
297 	mode_t access_mode;
298 	int error;
299 	int saved_error;
300 
301 	parent_fileid = cookie_to_fileid(opc);
302 	name = pcn->pcn_name;
303 	DPRINTF("%llu %s\n", parent_fileid, name);
304 	assert(strcmp(name, ".")); /* . is handled by framework */
305 retry:
306 	xc = begin_readonly(pu);
307 	error = getattr(xc, parent_fileid, &dva,
308 	    GETATTR_TYPE|GETATTR_MODE|GETATTR_UID|GETATTR_GID);
309 	if (error != 0) {
310 		goto got_error;
311 	}
312 	access_mode = PUFFS_VEXEC;
313 	if ((pcn->pcn_flags & NAMEI_ISLASTCN) != 0 &&
314 	    pcn->pcn_nameiop != NAMEI_LOOKUP) {
315 		access_mode |= PUFFS_VWRITE;
316 	}
317 	error = puffs_access(dva.va_type, dva.va_mode, dva.va_uid, dva.va_gid,
318 	    access_mode, pcr);
319 	if (error != 0) {
320 		goto commit_and_return;
321 	}
322 	if (!strcmp(name, "..")) {
323 		error = lookupp(xc, parent_fileid, &child_fileid);
324 		if (error != 0) {
325 			goto got_error;
326 		}
327 	} else {
328 		static struct cmd *c;
329 		static const Oid types[] = { INT8OID, };
330 		struct fetchstatus s;
331 
332 		CREATECMD(c, "SELECT child_fileid "
333 			"FROM dirent "
334 			"WHERE parent_fileid = $1 AND name = $2;",
335 			INT8OID, TEXTOID);
336 		error = sendcmd(xc, c, parent_fileid, name);
337 		if (error != 0) {
338 			DPRINTF("sendcmd %d\n", error);
339 			goto got_error;
340 		}
341 		fetchinit(&s, xc);
342 		error = FETCHNEXT(&s, types, &child_fileid);
343 		fetchdone(&s);
344 		if (error == ENOENT) {
345 			goto commit_and_return;
346 		}
347 		if (error != 0) {
348 			goto got_error;
349 		}
350 	}
351 	error = getattr(xc, child_fileid, &cva, GETATTR_TYPE|GETATTR_SIZE);
352 	if (error != 0) {
353 		goto got_error;
354 	}
355 	error = commit(xc);
356 	if (error != 0) {
357 		goto got_error;
358 	}
359 	puffs_newinfo_setcookie(pni, fileid_to_cookie(child_fileid));
360 	puffs_newinfo_setvtype(pni, cva.va_type);
361 	puffs_newinfo_setsize(pni, cva.va_size);
362 	return 0;
363 got_error:
364 	rollback(xc);
365 	if (error == EAGAIN) {
366 		goto retry;
367 	}
368 	return error;
369 commit_and_return:
370 	saved_error = error;
371 	error = commit(xc);
372 	if (error != 0) {
373 		goto got_error;
374 	}
375 	return saved_error;
376 }
377 
378 int
379 pgfs_node_mkdir(struct puffs_usermount *pu, puffs_cookie_t opc,
380     struct puffs_newinfo *pni, const struct puffs_cn *pcn,
381     const struct vattr *va)
382 {
383 	struct Xconn *xc;
384 	fileid_t parent_fileid = cookie_to_fileid(opc);
385 	fileid_t new_fileid;
386 	struct puffs_cred * const pcr = pcn->pcn_cred;
387 	uid_t uid;
388 	gid_t gid;
389 	int error;
390 
391 	DPRINTF("%llu %s\n", parent_fileid, pcn->pcn_name);
392 	if (puffs_cred_getuid(pcr, &uid) == -1 ||
393 	    puffs_cred_getgid(pcr, &gid) == -1) {
394 		return errno;
395 	}
396 retry:
397 	xc = begin(pu);
398 	error = mklinkfile(xc, parent_fileid, pcn->pcn_name, VDIR,
399 	    va->va_mode, uid, gid, &new_fileid);
400 	if (error == 0) {
401 		error = update_nlink(xc, parent_fileid, 1);
402 	}
403 	if (error != 0) {
404 		goto got_error;
405 	}
406 	error = commit(xc);
407 	if (error != 0) {
408 		goto got_error;
409 	}
410 	puffs_newinfo_setcookie(pni, fileid_to_cookie(new_fileid));
411 	return 0;
412 got_error:
413 	rollback(xc);
414 	if (error == EAGAIN) {
415 		goto retry;
416 	}
417 	return error;
418 }
419 
420 int
421 pgfs_node_create(struct puffs_usermount *pu, puffs_cookie_t opc,
422     struct puffs_newinfo *pni, const struct puffs_cn *pcn,
423     const struct vattr *va)
424 {
425 	struct Xconn *xc;
426 	fileid_t parent_fileid = cookie_to_fileid(opc);
427 	fileid_t new_fileid;
428 	struct puffs_cred * const pcr = pcn->pcn_cred;
429 	uid_t uid;
430 	gid_t gid;
431 	int error;
432 
433 	DPRINTF("%llu %s\n", parent_fileid, pcn->pcn_name);
434 	if (puffs_cred_getuid(pcr, &uid) == -1 ||
435 	    puffs_cred_getgid(pcr, &gid) == -1) {
436 		return errno;
437 	}
438 retry:
439 	xc = begin(pu);
440 	error = mklinkfile_lo(xc, parent_fileid, pcn->pcn_name, VREG,
441 	    va->va_mode,
442 	    uid, gid, &new_fileid, NULL);
443 	if (error != 0) {
444 		goto got_error;
445 	}
446 	error = commit(xc);
447 	if (error != 0) {
448 		goto got_error;
449 	}
450 	puffs_newinfo_setcookie(pni, fileid_to_cookie(new_fileid));
451 	return 0;
452 got_error:
453 	rollback(xc);
454 	if (error == EAGAIN) {
455 		goto retry;
456 	}
457 	return error;
458 }
459 
460 int
461 pgfs_node_write(struct puffs_usermount *pu, puffs_cookie_t opc,
462     uint8_t *buf, off_t offset, size_t *resid,
463     const struct puffs_cred *pcr, int ioflags)
464 {
465 	struct Xconn *xc;
466 	struct fileid_lock_handle *lock;
467 	fileid_t fileid = cookie_to_fileid(opc);
468 	size_t resultlen;
469 	int fd;
470 	int error;
471 
472 	if ((ioflags & PUFFS_IO_APPEND) != 0) {
473 		DPRINTF("%llu append sz %zu\n", fileid, *resid);
474 	} else {
475 		DPRINTF("%llu off %" PRIu64 " sz %zu\n", fileid,
476 		    (uint64_t)offset, *resid);
477 	}
478 	lock = fileid_lock(fileid, puffs_cc_getcc(pu));
479 retry:
480 	xc = begin(pu);
481 	error = update_mctime(xc, fileid);
482 	if (error != 0) {
483 		goto got_error;
484 	}
485 	error = lo_open_by_fileid(xc, fileid, INV_WRITE, &fd);
486 	if (error != 0) {
487 		goto got_error;
488 	}
489 	if ((ioflags & PUFFS_IO_APPEND) != 0) {
490 		int32_t off;
491 
492 		error = my_lo_lseek(xc, fd, 0, SEEK_END, &off);
493 		if (error != 0) {
494 			goto got_error;
495 		}
496 		offset = off;
497 	}
498 	if (offset < 0) {			/* negative offset */
499 		error = EINVAL;
500 		goto got_error;
501 	}
502 	if ((uint64_t)(INT64_MAX - offset) < *resid ||	/* int64 overflow */
503 	    INT_MAX < offset + *resid) {	/* our max filesize */
504 		error = EFBIG;
505 		goto got_error;
506 	}
507 	if ((ioflags & PUFFS_IO_APPEND) == 0) {
508 		error = my_lo_lseek(xc, fd, offset, SEEK_SET, NULL);
509 		if (error != 0) {
510 			goto got_error;
511 		}
512 	}
513 	error = my_lo_write(xc, fd, (const char *)buf, *resid, &resultlen);
514 	if (error != 0) {
515 		goto got_error;
516 	}
517 	assert(*resid >= resultlen);
518 	error = commit(xc);
519 	if (error != 0) {
520 		goto got_error;
521 	}
522 	*resid -= resultlen;
523 	DPRINTF("resid %zu\n", *resid);
524 	goto done;
525 got_error:
526 	rollback(xc);
527 	if (error == EAGAIN) {
528 		goto retry;
529 	}
530 done:
531 	fileid_unlock(lock);
532 	return error;
533 }
534 
535 int
536 pgfs_node_read(struct puffs_usermount *pu, puffs_cookie_t opc,
537     uint8_t *buf, off_t offset, size_t *resid,
538     const struct puffs_cred *pcr, int ioflags)
539 {
540 	struct Xconn *xc;
541 	fileid_t fileid = cookie_to_fileid(opc);
542 	size_t resultlen;
543 	int fd;
544 	int error;
545 
546 	DPRINTF("%llu off %" PRIu64 " sz %zu\n",
547 	    fileid, (uint64_t)offset, *resid);
548 retry:
549 	xc = begin(pu);
550 	/*
551 	 * try to update atime first as it's prune to conflict with other
552 	 * transactions.  eg. read-ahead requests can conflict each other.
553 	 * we don't want to retry my_lo_read as it's expensive.
554 	 *
555 	 * XXX probably worth to implement noatime mount option.
556 	 */
557 	error = update_atime(xc, fileid);
558 	if (error != 0) {
559 		goto got_error;
560 	}
561 	error = lo_open_by_fileid(xc, fileid, INV_READ, &fd);
562 	if (error != 0) {
563 		goto got_error;
564 	}
565 	error = my_lo_lseek(xc, fd, offset, SEEK_SET, NULL);
566 	if (error != 0) {
567 		goto got_error;
568 	}
569 	error = my_lo_read(xc, fd, buf, *resid, &resultlen);
570 	if (error != 0) {
571 		goto got_error;
572 	}
573 	assert(*resid >= resultlen);
574 	error = commit(xc);
575 	if (error != 0) {
576 		goto got_error;
577 	}
578 	*resid -= resultlen;
579 	return 0;
580 got_error:
581 	rollback(xc);
582 	if (error == EAGAIN) {
583 		goto retry;
584 	}
585 	return error;
586 }
587 
588 int
589 pgfs_node_link(struct puffs_usermount *pu, puffs_cookie_t dir_opc,
590     puffs_cookie_t targ_opc, const struct puffs_cn *pcn)
591 {
592 	struct Xconn *xc;
593 	fileid_t dir_fileid = cookie_to_fileid(dir_opc);
594 	fileid_t targ_fileid = cookie_to_fileid(targ_opc);
595 	struct vattr va;
596 	int error;
597 
598 	DPRINTF("%llu %llu %s\n", dir_fileid, targ_fileid, pcn->pcn_name);
599 retry:
600 	xc = begin(pu);
601 	error = getattr(xc, targ_fileid, &va, GETATTR_TYPE);
602 	if (error != 0) {
603 		goto got_error;
604 	}
605 	if (va.va_type == VDIR) {
606 		error = EPERM;
607 		goto got_error;
608 	}
609 	error = linkfile(xc, dir_fileid, pcn->pcn_name, targ_fileid);
610 	if (error != 0) {
611 		goto got_error;
612 	}
613 	error = update_ctime(xc, targ_fileid);
614 	if (error != 0) {
615 		goto got_error;
616 	}
617 	error = commit(xc);
618 	if (error != 0) {
619 		goto got_error;
620 	}
621 	return 0;
622 got_error:
623 	rollback(xc);
624 	if (error == EAGAIN) {
625 		goto retry;
626 	}
627 	return error;
628 }
629 
630 int
631 pgfs_node_remove(struct puffs_usermount *pu, puffs_cookie_t opc,
632     puffs_cookie_t targ, const struct puffs_cn *pcn)
633 {
634 	struct Xconn *xc;
635 	fileid_t fileid = cookie_to_fileid(opc);
636 	fileid_t targ_fileid = cookie_to_fileid(targ);
637 	struct vattr va;
638 	int error;
639 
640 retry:
641 	xc = begin(pu);
642 	error = getattr(xc, targ_fileid, &va, GETATTR_TYPE);
643 	if (error != 0) {
644 		goto got_error;
645 	}
646 	if (va.va_type == VDIR) {
647 		error = EPERM;
648 		goto got_error;
649 	}
650 	error = unlinkfile(xc, fileid, pcn->pcn_name, targ_fileid);
651 	if (error != 0) {
652 		goto got_error;
653 	}
654 	error = commit(xc);
655 	if (error != 0) {
656 		goto got_error;
657 	}
658 	return 0;
659 got_error:
660 	rollback(xc);
661 	if (error == EAGAIN) {
662 		goto retry;
663 	}
664 	return error;
665 }
666 
667 int
668 pgfs_node_rmdir(struct puffs_usermount *pu, puffs_cookie_t opc,
669     puffs_cookie_t targ, const struct puffs_cn *pcn)
670 {
671 	struct Xconn *xc;
672 	fileid_t parent_fileid = cookie_to_fileid(opc);
673 	fileid_t targ_fileid = cookie_to_fileid(targ);
674 	struct vattr va;
675 	bool empty;
676 	int error;
677 
678 retry:
679 	xc = begin(pu);
680 	error = getattr(xc, targ_fileid, &va, GETATTR_TYPE);
681 	if (error != 0) {
682 		goto got_error;
683 	}
684 	if (va.va_type != VDIR) {
685 		error = ENOTDIR;
686 		goto got_error;
687 	}
688 	error = isempty(xc, targ_fileid, &empty);
689 	if (error != 0) {
690 		goto got_error;
691 	}
692 	if (!empty) {
693 		error = ENOTEMPTY;
694 		goto got_error;
695 	}
696 	error = unlinkfile(xc, parent_fileid, pcn->pcn_name, targ_fileid);
697 	if (error == 0) {
698 		error = update_nlink(xc, parent_fileid, -1);
699 	}
700 	if (error != 0) {
701 		goto got_error;
702 	}
703 	error = commit(xc);
704 	if (error != 0) {
705 		goto got_error;
706 	}
707 	return 0;
708 got_error:
709 	rollback(xc);
710 	if (error == EAGAIN) {
711 		goto retry;
712 	}
713 	return error;
714 }
715 
716 int
717 pgfs_node_inactive(struct puffs_usermount *pu, puffs_cookie_t opc)
718 {
719 	struct Xconn *xc;
720 	fileid_t fileid = cookie_to_fileid(opc);
721 	struct vattr va;
722 	int error;
723 
724 	/*
725 	 * XXX
726 	 * probably this should be handed to the separate "reaper" context
727 	 * because lo_unlink() can be too expensive to execute synchronously.
728 	 * however, the puffs_cc API doesn't provide a way to create a worker
729 	 * context.
730 	 */
731 
732 	DPRINTF("%llu\n", fileid);
733 retry:
734 	xc = begin(pu);
735 	error = getattr(xc, fileid, &va, GETATTR_NLINK|GETATTR_TYPE);
736 	if (error != 0) {
737 		DPRINTF("%llu GETATTR fail\n", fileid);
738 		goto got_error;
739 	}
740 	if (va.va_nlink == 0) {
741 		DPRINTF("%llu nlink=0\n", fileid);
742 		error = cleanupfile(xc, fileid, &va);
743 		if (error != 0) {
744 			goto got_error;
745 		}
746 	}
747 	error = commit(xc);
748 	if (error != 0) {
749 		goto got_error;
750 	}
751 	return 0;
752 got_error:
753 	rollback(xc);
754 	if (error == EAGAIN) {
755 		goto retry;
756 	}
757 	return error;
758 }
759 
760 int
761 pgfs_node_setattr(struct puffs_usermount *pu, puffs_cookie_t opc,
762     const struct vattr *va, const struct puffs_cred *pcr)
763 {
764 	struct Xconn *xc;
765 	struct fileid_lock_handle *lock;
766 	fileid_t fileid = cookie_to_fileid(opc);
767 	struct vattr ova;
768 	unsigned int attrs;
769 	int error;
770 
771 	DPRINTF("%llu\n", fileid);
772 	if (va->va_flags != (u_long)PUFFS_VNOVAL) {
773 		return EOPNOTSUPP;
774 	}
775 	attrs = 0;
776 	if (va->va_uid != (uid_t)PUFFS_VNOVAL ||
777 	    va->va_gid != (gid_t)PUFFS_VNOVAL) {
778 		attrs |= GETATTR_UID|GETATTR_GID|GETATTR_MODE;
779 	}
780 	if (va->va_mode != (mode_t)PUFFS_VNOVAL) {
781 		attrs |= GETATTR_TYPE|GETATTR_UID|GETATTR_GID;
782 	}
783 	if (va->va_atime.tv_sec != PUFFS_VNOVAL ||
784 	    va->va_mtime.tv_sec != PUFFS_VNOVAL ||
785 	    va->va_ctime.tv_sec != PUFFS_VNOVAL) {
786 		attrs |= GETATTR_UID|GETATTR_GID|GETATTR_MODE;
787 	}
788 	lock = fileid_lock(fileid, puffs_cc_getcc(pu));
789 retry:
790 	xc = begin(pu);
791 	error = getattr(xc, fileid, &ova, attrs);
792 	if (error != 0) {
793 		goto got_error;
794 	}
795 	if (va->va_uid != (uid_t)PUFFS_VNOVAL ||
796 	    va->va_gid != (gid_t)PUFFS_VNOVAL) {
797 		static struct cmd *c;
798 		uint64_t newuid =
799 		    va->va_uid != (uid_t)PUFFS_VNOVAL ? va->va_uid : ova.va_uid;
800 		uint64_t newgid =
801 		    va->va_gid != (gid_t)PUFFS_VNOVAL ? va->va_gid : ova.va_gid;
802 
803 		error = puffs_access_chown(ova.va_uid, ova.va_gid,
804 		    newuid, newgid, pcr);
805 		if (error != 0) {
806 			goto got_error;
807 		}
808 		CREATECMD(c,
809 			"UPDATE file "
810 			"SET uid = $1, gid = $2 "
811 			"WHERE fileid = $3", INT8OID, INT8OID, INT8OID);
812 		error = simplecmd(xc, c, newuid, newgid, fileid);
813 		if (error != 0) {
814 			goto got_error;
815 		}
816 		ova.va_uid = newuid;
817 		ova.va_gid = newgid;
818 	}
819 	if (va->va_mode != (mode_t)PUFFS_VNOVAL) {
820 		static struct cmd *c;
821 		uint64_t newmode = va->va_mode;
822 
823 		error = puffs_access_chmod(ova.va_uid, ova.va_gid, ova.va_type,
824 		    newmode, pcr);
825 		if (error != 0) {
826 			goto got_error;
827 		}
828 		CREATECMD(c,
829 			"UPDATE file "
830 			"SET mode = $1 "
831 			"WHERE fileid = $2", INT8OID, INT8OID);
832 		error = simplecmd(xc, c, newmode, fileid);
833 		if (error != 0) {
834 			goto got_error;
835 		}
836 		ova.va_mode = newmode;
837 	}
838 	if (va->va_atime.tv_sec != PUFFS_VNOVAL ||
839 	    va->va_mtime.tv_sec != PUFFS_VNOVAL ||
840 	    va->va_ctime.tv_sec != PUFFS_VNOVAL ||
841 	    va->va_birthtime.tv_sec != PUFFS_VNOVAL) {
842 		error = puffs_access_times(ova.va_uid, ova.va_gid, ova.va_mode,
843 		    (va->va_vaflags & VA_UTIMES_NULL) != 0, pcr);
844 		if (error != 0) {
845 			goto got_error;
846 		}
847 		if (va->va_atime.tv_sec != PUFFS_VNOVAL) {
848 			static struct cmd *c;
849 			char *ts;
850 
851 			error = timespec_to_pgtimestamp(&va->va_atime, &ts);
852 			if (error != 0) {
853 				goto got_error;
854 			}
855 			CREATECMD(c,
856 				"UPDATE file "
857 				"SET atime = $1 "
858 				"WHERE fileid = $2", TIMESTAMPTZOID, INT8OID);
859 			error = simplecmd(xc, c, ts, fileid);
860 			free(ts);
861 			if (error != 0) {
862 				goto got_error;
863 			}
864 		}
865 		if (va->va_mtime.tv_sec != PUFFS_VNOVAL) {
866 			static struct cmd *c;
867 			char *ts;
868 
869 			error = timespec_to_pgtimestamp(&va->va_mtime, &ts);
870 			if (error != 0) {
871 				goto got_error;
872 			}
873 			CREATECMD(c,
874 				"UPDATE file "
875 				"SET mtime = $1 "
876 				"WHERE fileid = $2", TIMESTAMPTZOID, INT8OID);
877 			error = simplecmd(xc, c, ts, fileid);
878 			free(ts);
879 			if (error != 0) {
880 				goto got_error;
881 			}
882 		}
883 		if (va->va_ctime.tv_sec != PUFFS_VNOVAL) {
884 			static struct cmd *c;
885 			char *ts;
886 
887 			error = timespec_to_pgtimestamp(&va->va_ctime, &ts);
888 			if (error != 0) {
889 				goto got_error;
890 			}
891 			CREATECMD(c,
892 				"UPDATE file "
893 				"SET ctime = $1 "
894 				"WHERE fileid = $2", TIMESTAMPTZOID, INT8OID);
895 			error = simplecmd(xc, c, ts, fileid);
896 			free(ts);
897 			if (error != 0) {
898 				goto got_error;
899 			}
900 		}
901 		if (va->va_birthtime.tv_sec != PUFFS_VNOVAL) {
902 			static struct cmd *c;
903 			char *ts;
904 
905 			error = timespec_to_pgtimestamp(&va->va_birthtime, &ts);
906 			if (error != 0) {
907 				goto got_error;
908 			}
909 			CREATECMD(c,
910 				"UPDATE file "
911 				"SET btime = $1 "
912 				"WHERE fileid = $2", TIMESTAMPTZOID, INT8OID);
913 			error = simplecmd(xc, c, ts, fileid);
914 			free(ts);
915 			if (error != 0) {
916 				goto got_error;
917 			}
918 		}
919 	}
920 	if (va->va_size != (uint64_t)PUFFS_VNOVAL) {
921 		int fd;
922 
923 		if (va->va_size > INT_MAX) {
924 			error = EFBIG;
925 			goto got_error;
926 		}
927 		error = lo_open_by_fileid(xc, fileid, INV_READ|INV_WRITE, &fd);
928 		if (error != 0) {
929 			goto got_error;
930 		}
931 		error = my_lo_truncate(xc, fd, va->va_size);
932 		if (error != 0) {
933 			goto got_error;
934 		}
935 		error = my_lo_close(xc, fd);
936 		if (error != 0) {
937 			goto got_error;
938 		}
939 	}
940 	error = commit(xc);
941 	if (error != 0) {
942 		goto got_error;
943 	}
944 	goto done;
945 got_error:
946 	rollback(xc);
947 	if (error == EAGAIN) {
948 		goto retry;
949 	}
950 done:
951 	fileid_unlock(lock);
952 	return error;
953 }
954 
955 int
956 pgfs_node_rename(struct puffs_usermount *pu, puffs_cookie_t src_dir,
957     puffs_cookie_t src, const struct puffs_cn *pcn_src,
958     puffs_cookie_t targ_dir, puffs_cookie_t targ,
959     const struct puffs_cn *pcn_targ)
960 {
961 	struct Xconn *xc;
962 	fileid_t fileid_src_dir = cookie_to_fileid(src_dir);
963 	fileid_t fileid_src = cookie_to_fileid(src);
964 	fileid_t fileid_targ_dir = cookie_to_fileid(targ_dir);
965 	fileid_t fileid_targ = cookie_to_fileid(targ);
966 	struct vattr va_src;
967 	struct vattr va_targ;
968 	int error;
969 
970 	DPRINTF("%llu %llu %llu %llu\n", fileid_src_dir, fileid_src,
971 	    fileid_targ_dir, fileid_targ);
972 retry:
973 	xc = begin(pu);
974 	error = getattr(xc, fileid_src, &va_src, GETATTR_TYPE);
975 	if (error != 0) {
976 		goto got_error;
977 	}
978 	if (va_src.va_type == VDIR) {
979 		error = check_path(xc, fileid_src, fileid_targ_dir);
980 		if (error != 0) {
981 			goto got_error;
982 		}
983 	}
984 	if (fileid_targ != 0) {
985 		error = getattr(xc, fileid_targ, &va_targ,
986 		    GETATTR_TYPE|GETATTR_NLINK);
987 		if (error != 0) {
988 			goto got_error;
989 		}
990 		if (va_src.va_type == VDIR) {
991 			if (va_targ.va_type != VDIR) {
992 				error = ENOTDIR;
993 				goto got_error;
994 			}
995 			if (va_targ.va_nlink != 2) {
996 				error = ENOTEMPTY;
997 				goto got_error;
998 			}
999 		} else if (va_targ.va_type == VDIR) {
1000 			error = EISDIR;
1001 			goto got_error;
1002 		}
1003 		error = unlinkfile(xc, fileid_targ_dir, pcn_targ->pcn_name,
1004 		    fileid_targ);
1005 		if (error == 0 && va_targ.va_type == VDIR) {
1006 			error = update_nlink(xc, fileid_targ_dir, -1);
1007 		}
1008 		if (error != 0) {
1009 			goto got_error;
1010 		}
1011 	}
1012 	error = linkfile(xc, fileid_targ_dir, pcn_targ->pcn_name, fileid_src);
1013 	if (error == 0 && va_src.va_type == VDIR) {
1014 		error = update_nlink(xc, fileid_targ_dir, 1);
1015 	}
1016 	if (error != 0) {
1017 		goto got_error;
1018 	}
1019 	/* XXX ctime? */
1020 	error = unlinkfile(xc, fileid_src_dir, pcn_src->pcn_name, fileid_src);
1021 	if (error == 0 && va_src.va_type == VDIR) {
1022 		error = update_nlink(xc, fileid_src_dir, -1);
1023 	}
1024 	if (error != 0) {
1025 		goto got_error;
1026 	}
1027 	error = commit(xc);
1028 	if (error != 0) {
1029 		goto got_error;
1030 	}
1031 	return 0;
1032 got_error:
1033 	rollback(xc);
1034 	if (error == EAGAIN) {
1035 		goto retry;
1036 	}
1037 	return error;
1038 }
1039 
1040 int
1041 pgfs_node_symlink(struct puffs_usermount *pu, puffs_cookie_t opc,
1042     struct puffs_newinfo *pni, const struct puffs_cn *pcn,
1043     const struct vattr *va, const char *target)
1044 {
1045 	struct Xconn *xc;
1046 	struct puffs_cred *pcr = pcn->pcn_cred;
1047 	fileid_t parent_fileid = cookie_to_fileid(opc);
1048 	fileid_t new_fileid;
1049 	size_t resultlen;
1050 	size_t targetlen;
1051 	uid_t uid;
1052 	gid_t gid;
1053 	int loid;
1054 	int fd;
1055 	int error;
1056 
1057 	DPRINTF("%llu %s %s\n", parent_fileid, pcn->pcn_name, target);
1058 	if (puffs_cred_getuid(pcr, &uid) == -1 ||
1059 	    puffs_cred_getgid(pcr, &gid) == -1) {
1060 		return errno;
1061 	}
1062 retry:
1063 	xc = begin(pu);
1064 	error = mklinkfile_lo(xc, parent_fileid, pcn->pcn_name, VLNK,
1065 	    va->va_mode, uid, gid, &new_fileid, &loid);
1066 	if (error != 0) {
1067 		goto got_error;
1068 	}
1069 	error = my_lo_open(xc, loid, INV_WRITE, &fd);
1070 	if (error != 0) {
1071 		goto got_error;
1072 	}
1073 	targetlen = strlen(target);
1074 	error = my_lo_write(xc, fd, target, targetlen, &resultlen);
1075 	if (error != 0) {
1076 		goto got_error;
1077 	}
1078 	if (resultlen != targetlen) {
1079 		error = ENOSPC; /* XXX */
1080 		goto got_error;
1081 	}
1082 	error = commit(xc);
1083 	if (error != 0) {
1084 		goto got_error;
1085 	}
1086 	puffs_newinfo_setcookie(pni, fileid_to_cookie(new_fileid));
1087 	return 0;
1088 got_error:
1089 	rollback(xc);
1090 	if (error == EAGAIN) {
1091 		goto retry;
1092 	}
1093 	return error;
1094 }
1095 
1096 int
1097 pgfs_node_readlink(struct puffs_usermount *pu, puffs_cookie_t opc,
1098     const struct puffs_cred *pcr, char *buf, size_t *buflenp)
1099 {
1100 	fileid_t fileid = cookie_to_fileid(opc);
1101 	struct Xconn *xc;
1102 	size_t resultlen;
1103 	int fd;
1104 	int error;
1105 
1106 	DPRINTF("%llu\n", fileid);
1107 	xc = begin_readonly(pu);
1108 	error = lo_open_by_fileid(xc, fileid, INV_READ, &fd);
1109 	if (error != 0) {
1110 		rollback(xc);
1111 		return error;
1112 	}
1113 	error = my_lo_read(xc, fd, buf, *buflenp, &resultlen);
1114 	if (error != 0) {
1115 		rollback(xc);
1116 		return error;
1117 	}
1118 	assert(resultlen <= *buflenp);
1119 	error = commit(xc);
1120 	if (error != 0) {
1121 		return error;
1122 	}
1123 	*buflenp = resultlen;
1124 	return 0;
1125 }
1126 
1127 int
1128 pgfs_node_access(struct puffs_usermount *pu, puffs_cookie_t opc,
1129     int mode, const struct puffs_cred *pcr)
1130 {
1131 	struct Xconn *xc;
1132 	fileid_t fileid = cookie_to_fileid(opc);
1133 	struct vattr va;
1134 	int error;
1135 
1136 	DPRINTF("%llu\n", fileid);
1137 retry:
1138 	xc = begin_readonly(pu);
1139 	error = getattr(xc, fileid, &va,
1140 	    GETATTR_TYPE|GETATTR_MODE|GETATTR_UID|GETATTR_GID);
1141 	if (error != 0) {
1142 		goto got_error;
1143 	}
1144 	error = commit(xc);
1145 	if (error != 0) {
1146 		goto got_error;
1147 	}
1148 	return puffs_access(va.va_type, va.va_mode, va.va_uid, va.va_gid, mode,
1149 	    pcr);
1150 got_error:
1151 	rollback(xc);
1152 	if (error == EAGAIN) {
1153 		goto retry;
1154 	}
1155 	return error;
1156 }
1157 
1158 int
1159 pgfs_node_fsync(struct puffs_usermount *pu, puffs_cookie_t opc,
1160     const struct puffs_cred *pcr, int flags, off_t offlo, off_t offhi)
1161 {
1162 	fileid_t fileid = cookie_to_fileid(opc);
1163 
1164 	DPRINTF("%llu\n", fileid);
1165 	return flush_xacts(pu);
1166 }
1167 
1168 int
1169 pgfs_fs_statvfs(struct puffs_usermount *pu, struct statvfs *sbp)
1170 {
1171 	struct Xconn *xc;
1172 	uint64_t nfiles;
1173 	uint64_t bytes;
1174 	uint64_t lo_bytes;
1175 	static struct cmd *c_nfiles;
1176 	static struct cmd *c_bytes;
1177 	static struct cmd *c_lobytes;
1178 	static const Oid types[] = { INT8OID, };
1179 	struct fetchstatus s;
1180 	int error;
1181 
1182 retry:
1183 	xc = begin_readonly(pu);
1184 	/*
1185 	 * use an estimate which we can retrieve quickly, instead of
1186 	 * "SELECT count(*) from file".
1187 	 */
1188 	CREATECMD_NOPARAM(c_nfiles,
1189 		"SELECT reltuples::int8 "
1190 		"FROM pg_class c LEFT JOIN pg_namespace n "
1191 		"ON (n.oid=c.relnamespace) "
1192 		"WHERE n.nspname = 'pgfs' AND c.relname = 'file'");
1193 	CREATECMD_NOPARAM(c_bytes,
1194 		"SELECT sum(pg_total_relation_size(c.oid))::int8 "
1195 		"FROM pg_class c LEFT JOIN pg_namespace n "
1196 		"ON (n.oid=c.relnamespace) "
1197 		"WHERE n.nspname = 'pgfs'");
1198 	/*
1199 	 * the following is not correct if someone else is using large objects
1200 	 * in the same database.  we don't bother to join with datafork it as
1201 	 * it's too expensive for the little benefit.
1202 	 */
1203 	CREATECMD_NOPARAM(c_lobytes,
1204 		"SELECT pg_total_relation_size('pg_largeobject')::int8");
1205 	error = sendcmd(xc, c_nfiles);
1206 	if (error != 0) {
1207 		goto got_error;
1208 	}
1209 	fetchinit(&s, xc);
1210 	error = FETCHNEXT(&s, types, &nfiles);
1211 	fetchdone(&s);
1212 	if (error != 0) {
1213 		goto got_error;
1214 	}
1215 	error = sendcmd(xc, c_bytes);
1216 	if (error != 0) {
1217 		goto got_error;
1218 	}
1219 	fetchinit(&s, xc);
1220 	error = FETCHNEXT(&s, types, &bytes);
1221 	fetchdone(&s);
1222 	if (error != 0) {
1223 		goto got_error;
1224 	}
1225 	error = sendcmd(xc, c_lobytes);
1226 	if (error != 0) {
1227 		goto got_error;
1228 	}
1229 	fetchinit(&s, xc);
1230 	error = FETCHNEXT(&s, types, &lo_bytes);
1231 	fetchdone(&s);
1232 	if (error != 0) {
1233 		goto got_error;
1234 	}
1235 	error = commit(xc);
1236 	if (error != 0) {
1237 		goto got_error;
1238 	}
1239 	/*
1240 	 * XXX fill f_blocks and f_files with meaningless large values.
1241 	 * there are no easy way to provide meaningful values for them
1242 	 * esp. with tablespaces.
1243 	 */
1244 	sbp->f_bsize = LOBLKSIZE;
1245 	sbp->f_frsize = LOBLKSIZE;
1246 	sbp->f_blocks = INT64_MAX / 100 / sbp->f_frsize;
1247 	sbp->f_bfree = sbp->f_blocks - howmany(bytes + lo_bytes, sbp->f_frsize);
1248 	sbp->f_bavail = sbp->f_bfree;
1249 	sbp->f_bresvd = 0;
1250 	sbp->f_files = INT_MAX;
1251 	sbp->f_ffree = sbp->f_files - nfiles;
1252 	sbp->f_favail = sbp->f_ffree;
1253 	sbp->f_fresvd = 0;
1254 	return 0;
1255 got_error:
1256 	rollback(xc);
1257 	if (error == EAGAIN) {
1258 		goto retry;
1259 	}
1260 	return error;
1261 }
1262