xref: /netbsd-src/lib/libukfs/ukfs.c (revision abb0f93cd77b67f080613360c65701f85e5f5cfe)
1 /*	$NetBSD: ukfs.c,v 1.46 2009/12/12 00:46:04 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2007, 2008, 2009  Antti Kantee.  All Rights Reserved.
5  *
6  * Development of this software was supported by the
7  * Finnish Cultural Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
19  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 /*
32  * This library enables access to files systems directly without
33  * involving system calls.
34  */
35 
36 #ifdef __linux__
37 #define _XOPEN_SOURCE 500
38 #define _BSD_SOURCE
39 #define _FILE_OFFSET_BITS 64
40 #endif
41 
42 #include <sys/param.h>
43 #include <sys/queue.h>
44 #include <sys/stat.h>
45 #include <sys/sysctl.h>
46 #include <sys/mount.h>
47 
48 #include <assert.h>
49 #include <dirent.h>
50 #include <dlfcn.h>
51 #include <err.h>
52 #include <errno.h>
53 #include <fcntl.h>
54 #include <pthread.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59 #include <stdint.h>
60 
61 #include <rump/ukfs.h>
62 
63 #include <rump/rump.h>
64 #include <rump/rump_syscalls.h>
65 #include <rump/rumpuser.h>
66 
67 #include "ukfs_int_disklabel.h"
68 
69 #define UKFS_MODE_DEFAULT 0555
70 
71 struct ukfs {
72 	struct mount *ukfs_mp;
73 	struct vnode *ukfs_rvp;
74 	void *ukfs_specific;
75 
76 	pthread_spinlock_t ukfs_spin;
77 	pid_t ukfs_nextpid;
78 	struct vnode *ukfs_cdir;
79 	int ukfs_devfd;
80 	char *ukfs_devpath;
81 	char *ukfs_mountpath;
82 	struct ukfs_part *ukfs_part;
83 };
84 
85 static int builddirs(const char *, mode_t,
86     int (*mkdirfn)(struct ukfs *, const char *, mode_t), struct ukfs *);
87 
88 struct mount *
89 ukfs_getmp(struct ukfs *ukfs)
90 {
91 
92 	return ukfs->ukfs_mp;
93 }
94 
95 struct vnode *
96 ukfs_getrvp(struct ukfs *ukfs)
97 {
98 	struct vnode *rvp;
99 
100 	rvp = ukfs->ukfs_rvp;
101 	rump_pub_vp_incref(rvp);
102 
103 	return rvp;
104 }
105 
106 void
107 ukfs_setspecific(struct ukfs *ukfs, void *priv)
108 {
109 
110 	ukfs->ukfs_specific = priv;
111 }
112 
113 void *
114 ukfs_getspecific(struct ukfs *ukfs)
115 {
116 
117 	return ukfs->ukfs_specific;
118 }
119 
120 #ifdef DONT_WANT_PTHREAD_LINKAGE
121 #define pthread_spin_lock(a)
122 #define pthread_spin_unlock(a)
123 #define pthread_spin_init(a,b)
124 #define pthread_spin_destroy(a)
125 #endif
126 
127 static pid_t
128 nextpid(struct ukfs *ukfs)
129 {
130 	pid_t npid;
131 
132 	pthread_spin_lock(&ukfs->ukfs_spin);
133 	if (ukfs->ukfs_nextpid == 0)
134 		ukfs->ukfs_nextpid++;
135 	npid = ukfs->ukfs_nextpid++;
136 	pthread_spin_unlock(&ukfs->ukfs_spin);
137 
138 	return npid;
139 }
140 
141 static void
142 precall(struct ukfs *ukfs)
143 {
144 	struct vnode *rvp, *cvp;
145 
146 	rump_pub_lwp_alloc_and_switch(nextpid(ukfs), 1);
147 	rvp = ukfs_getrvp(ukfs);
148 	pthread_spin_lock(&ukfs->ukfs_spin);
149 	cvp = ukfs->ukfs_cdir;
150 	pthread_spin_unlock(&ukfs->ukfs_spin);
151 	rump_pub_rcvp_set(rvp, cvp); /* takes refs */
152 	rump_pub_vp_rele(rvp);
153 }
154 
155 static void
156 postcall(struct ukfs *ukfs)
157 {
158 	struct vnode *rvp;
159 
160 	rvp = ukfs_getrvp(ukfs);
161 	rump_pub_rcvp_set(NULL, rvp);
162 	rump_pub_vp_rele(rvp);
163 	rump_pub_lwp_release(rump_pub_lwp_curlwp());
164 }
165 
166 struct ukfs_part {
167 	int part_type;
168 	char part_labelchar;
169 	off_t part_devoff;
170 	off_t part_devsize;
171 };
172 
173 enum ukfs_parttype { UKFS_PART_NONE, UKFS_PART_DISKLABEL, UKFS_PART_OFFSET };
174 
175 static struct ukfs_part ukfs__part_none = {
176 	.part_type = UKFS_PART_NONE,
177 	.part_devoff = 0,
178 	.part_devsize = RUMP_ETFS_SIZE_ENDOFF,
179 };
180 static struct ukfs_part ukfs__part_na;
181 struct ukfs_part *ukfs_part_none = &ukfs__part_none;
182 struct ukfs_part *ukfs_part_na = &ukfs__part_na;
183 
184 #define PART2LOCKSIZE(len) ((len) == RUMP_ETFS_SIZE_ENDOFF ? 0 : (len))
185 
186 int
187 _ukfs_init(int version)
188 {
189 	int rv;
190 
191 	if (version != UKFS_VERSION) {
192 		printf("incompatible ukfs version, %d vs. %d\n",
193 		    version, UKFS_VERSION);
194 		errno = EPROGMISMATCH;
195 		return -1;
196 	}
197 
198 	if ((rv = rump_init()) != 0) {
199 		errno = rv;
200 		return -1;
201 	}
202 
203 	return 0;
204 }
205 
206 /*ARGSUSED*/
207 static int
208 rumpmkdir(struct ukfs *dummy, const char *path, mode_t mode)
209 {
210 
211 	return rump_sys_mkdir(path, mode);
212 }
213 
214 int
215 ukfs_part_probe(char *devpath, struct ukfs_part **partp)
216 {
217 	struct ukfs_part *part;
218 	char *p;
219 	int error = 0;
220 	int devfd = -1;
221 
222 	if ((p = strstr(devpath, UKFS_PARTITION_SCANMAGIC)) != NULL) {
223 		fprintf(stderr, "ukfs: %%PART is deprecated.  use "
224 		    "%%DISKLABEL instead\n");
225 		errno = ENODEV;
226 		return -1;
227 	}
228 
229 	part = malloc(sizeof(*part));
230 	if (part == NULL) {
231 		errno = ENOMEM;
232 		return -1;
233 	}
234 	part->part_type = UKFS_PART_NONE;
235 
236 	/*
237 	 * Check for magic in pathname:
238 	 *   disklabel: /regularpath%DISKLABEL:labelchar%\0
239 	 *     offsets: /regularpath%OFFSET:start,end%\0
240 	 */
241 #define MAGICADJ_DISKLABEL(p, n) (p+sizeof(UKFS_DISKLABEL_SCANMAGIC)-1+n)
242 	if ((p = strstr(devpath, UKFS_DISKLABEL_SCANMAGIC)) != NULL
243 	    && strlen(p) == UKFS_DISKLABEL_MAGICLEN
244 	    && *(MAGICADJ_DISKLABEL(p,1)) == '%') {
245 		if (*(MAGICADJ_DISKLABEL(p,0)) >= 'a' &&
246 		    *(MAGICADJ_DISKLABEL(p,0)) < 'a' + UKFS_MAXPARTITIONS) {
247 			struct ukfs__disklabel dl;
248 			struct ukfs__partition *pp;
249 			char buf[65536];
250 			char labelchar = *(MAGICADJ_DISKLABEL(p,0));
251 			int partition = labelchar - 'a';
252 
253 			*p = '\0';
254 			devfd = open(devpath, O_RDONLY);
255 			if (devfd == -1) {
256 				error = errno;
257 				goto out;
258 			}
259 
260 			/* Locate the disklabel and find the partition. */
261 			if (pread(devfd, buf, sizeof(buf), 0) == -1) {
262 				error = errno;
263 				goto out;
264 			}
265 
266 			if (ukfs__disklabel_scan(&dl, buf, sizeof(buf)) != 0) {
267 				error = ENOENT;
268 				goto out;
269 			}
270 
271 			if (dl.d_npartitions < partition) {
272 				error = ENOENT;
273 				goto out;
274 			}
275 
276 			pp = &dl.d_partitions[partition];
277 			part->part_type = UKFS_PART_DISKLABEL;
278 			part->part_labelchar = labelchar;
279 			part->part_devoff = pp->p_offset << DEV_BSHIFT;
280 			part->part_devsize = pp->p_size << DEV_BSHIFT;
281 		} else {
282 			error = EINVAL;
283 		}
284 #define MAGICADJ_OFFSET(p, n) (p+sizeof(UKFS_OFFSET_SCANMAGIC)-1+n)
285 	} else if (((p = strstr(devpath, UKFS_OFFSET_SCANMAGIC)) != NULL)
286 	    && (strlen(p) >= UKFS_OFFSET_MINLEN)) {
287 		char *comma, *pers, *ep, *nptr;
288 		u_quad_t val;
289 
290 		comma = strchr(p, ',');
291 		if (comma == NULL) {
292 			error = EINVAL;
293 			goto out;
294 		}
295 		pers = strchr(comma, '%');
296 		if (pers == NULL) {
297 			error = EINVAL;
298 			goto out;
299 		}
300 		*comma = '\0';
301 		*pers = '\0';
302 		*p = '\0';
303 
304 		nptr = MAGICADJ_OFFSET(p,0);
305 		/* check if string is negative */
306 		if (*nptr == '-') {
307 			error = ERANGE;
308 			goto out;
309 		}
310 		val = strtouq(nptr, &ep, 10);
311 		if (val == UQUAD_MAX) {
312 			error = ERANGE;
313 			goto out;
314 		}
315 		if (*ep != '\0') {
316 			error = EADDRNOTAVAIL; /* creative ;) */
317 			goto out;
318 		}
319 		part->part_devoff = val;
320 
321 		/* omstart */
322 
323 		nptr = comma+1;
324 		/* check if string is negative */
325 		if (*nptr == '-') {
326 			error = ERANGE;
327 			goto out;
328 		}
329 		val = strtouq(nptr, &ep, 10);
330 		if (val == UQUAD_MAX) {
331 			error = ERANGE;
332 			goto out;
333 		}
334 		if (*ep != '\0') {
335 			error = EADDRNOTAVAIL; /* creative ;) */
336 			goto out;
337 		}
338 		part->part_devsize = val;
339 		part->part_type = UKFS_PART_OFFSET;
340 	} else {
341 		free(part);
342 		part = ukfs_part_none;
343 	}
344 
345  out:
346 	if (devfd != -1)
347 		close(devfd);
348 	if (error) {
349 		free(part);
350 		errno = error;
351 	} else {
352 		*partp = part;
353 	}
354 
355 	return error ? -1 : 0;
356 }
357 
358 int
359 ukfs_part_tostring(struct ukfs_part *part, char *str, size_t strsize)
360 {
361 	int rv;
362 
363 	*str = '\0';
364 	/* "pseudo" values */
365 	if (part == ukfs_part_na) {
366 		errno = EINVAL;
367 		return -1;
368 	}
369 	if (part == ukfs_part_none)
370 		return 0;
371 
372 	rv = 0;
373 	switch (part->part_type) {
374 	case UKFS_PART_NONE:
375 		break;
376 
377 	case UKFS_PART_DISKLABEL:
378 		snprintf(str, strsize, "%%DISKLABEL:%c%%",part->part_labelchar);
379 		rv = 1;
380 		break;
381 
382 	case UKFS_PART_OFFSET:
383 		snprintf(str, strsize, "[%llu,%llu]",
384 		    (unsigned long long)part->part_devoff,
385 		    (unsigned long long)(part->part_devoff+part->part_devsize));
386 		rv = 1;
387 		break;
388 	}
389 
390 	return rv;
391 }
392 
393 static void
394 unlockdev(int fd, struct ukfs_part *part)
395 {
396 	struct flock flarg;
397 
398 	memset(&flarg, 0, sizeof(flarg));
399 	flarg.l_type = F_UNLCK;
400 	flarg.l_whence = SEEK_SET;
401 	flarg.l_start = part->part_devoff;
402 	flarg.l_len = PART2LOCKSIZE(part->part_devsize);
403 	if (fcntl(fd, F_SETLK, &flarg) == -1)
404 		warn("ukfs: cannot unlock device file");
405 }
406 
407 /*
408  * Open the disk file and flock it.  Also, if we are operation on
409  * an embedded partition, find the partition offset and size from
410  * the disklabel.
411  *
412  * We hard-fail only in two cases:
413  *  1) we failed to get the partition info out (don't know what offset
414  *     to mount from)
415  *  2) we failed to flock the source device (i.e. fcntl() fails,
416  *     not e.g. open() before it)
417  *
418  * Otherwise we let the code proceed to mount and let the file system
419  * throw the proper error.  The only questionable bit is that if we
420  * soft-fail before flock and mount does succeed...
421  *
422  * Returns: -1 error (errno reports error code)
423  *           0 success
424  *
425  * dfdp: -1  device is not open
426  *        n  device is open
427  */
428 static int
429 process_diskdevice(const char *devpath, struct ukfs_part *part, int rdonly,
430 	int *dfdp)
431 {
432 	struct stat sb;
433 	int rv = 0, devfd;
434 
435 	/* defaults */
436 	*dfdp = -1;
437 
438 	devfd = open(devpath, rdonly ? O_RDONLY : O_RDWR);
439 	if (devfd == -1) {
440 		rv = errno;
441 		goto out;
442 	}
443 
444 	if (fstat(devfd, &sb) == -1) {
445 		rv = errno;
446 		goto out;
447 	}
448 
449 	/*
450 	 * We do this only for non-block device since the
451 	 * (NetBSD) kernel allows block device open only once.
452 	 * We also need to close the device for fairly obvious reasons.
453 	 */
454 	if (!S_ISBLK(sb.st_mode)) {
455 		struct flock flarg;
456 
457 		memset(&flarg, 0, sizeof(flarg));
458 		flarg.l_type = rdonly ? F_RDLCK : F_WRLCK;
459 		flarg.l_whence = SEEK_SET;
460 		flarg.l_start = part->part_devoff;
461 		flarg.l_len = PART2LOCKSIZE(part->part_devsize);
462 		if (fcntl(devfd, F_SETLK, &flarg) == -1) {
463 			pid_t holder;
464 			int sverrno;
465 
466 			sverrno = errno;
467 			if (fcntl(devfd, F_GETLK, &flarg) != 1)
468 				holder = flarg.l_pid;
469 			else
470 				holder = -1;
471 			warnx("ukfs_mount: cannot lock device.  held by pid %d",
472 			    holder);
473 			rv = sverrno;
474 			goto out;
475 		}
476 	} else {
477 		close(devfd);
478 		devfd = -1;
479 	}
480 	*dfdp = devfd;
481 
482  out:
483 	if (rv) {
484 		if (devfd != -1)
485 			close(devfd);
486 	}
487 
488 	return rv;
489 }
490 
491 static struct ukfs *
492 doukfsmount(const char *vfsname, const char *devpath, struct ukfs_part *part,
493 	const char *mountpath, int mntflags, void *arg, size_t alen)
494 {
495 	struct ukfs *fs = NULL;
496 	int rv = 0, devfd = -1;
497 	int mounted = 0;
498 	int regged = 0;
499 
500 	if (part != ukfs_part_na) {
501 		if ((rv = process_diskdevice(devpath, part,
502 		    mntflags & MNT_RDONLY, &devfd)) != 0)
503 			goto out;
504 	}
505 
506 	fs = malloc(sizeof(struct ukfs));
507 	if (fs == NULL) {
508 		rv = ENOMEM;
509 		goto out;
510 	}
511 	memset(fs, 0, sizeof(struct ukfs));
512 
513 	/* create our mountpoint.  this is never removed. */
514 	if (builddirs(mountpath, 0777, rumpmkdir, NULL) == -1) {
515 		if (errno != EEXIST) {
516 			rv = errno;
517 			goto out;
518 		}
519 	}
520 
521 	if (part != ukfs_part_na) {
522 		/* LINTED */
523 		rv = rump_pub_etfs_register_withsize(devpath, devpath,
524 		    RUMP_ETFS_BLK, part->part_devoff, part->part_devsize);
525 		if (rv) {
526 			goto out;
527 		}
528 		regged = 1;
529 	}
530 
531 	rv = rump_sys_mount(vfsname, mountpath, mntflags, arg, alen);
532 	if (rv) {
533 		rv = errno;
534 		goto out;
535 	}
536 	mounted = 1;
537 	rv = rump_pub_vfs_getmp(mountpath, &fs->ukfs_mp);
538 	if (rv) {
539 		goto out;
540 	}
541 	rv = rump_pub_vfs_root(fs->ukfs_mp, &fs->ukfs_rvp, 0);
542 	if (rv) {
543 		goto out;
544 	}
545 
546 	if (regged) {
547 		fs->ukfs_devpath = strdup(devpath);
548 	}
549 	fs->ukfs_mountpath = strdup(mountpath);
550 	fs->ukfs_cdir = ukfs_getrvp(fs);
551 	pthread_spin_init(&fs->ukfs_spin, PTHREAD_PROCESS_SHARED);
552 	fs->ukfs_devfd = devfd;
553 	fs->ukfs_part = part;
554 	assert(rv == 0);
555 
556  out:
557 	if (rv) {
558 		if (fs) {
559 			if (fs->ukfs_rvp)
560 				rump_pub_vp_rele(fs->ukfs_rvp);
561 			free(fs);
562 			fs = NULL;
563 		}
564 		if (mounted)
565 			rump_sys_unmount(mountpath, MNT_FORCE);
566 		if (regged)
567 			rump_pub_etfs_remove(devpath);
568 		if (devfd != -1) {
569 			unlockdev(devfd, part);
570 			close(devfd);
571 		}
572 		ukfs_part_release(part);
573 		errno = rv;
574 	}
575 
576 	return fs;
577 }
578 
579 struct ukfs *
580 ukfs_mount(const char *vfsname, const char *devpath,
581 	const char *mountpath, int mntflags, void *arg, size_t alen)
582 {
583 
584 	return doukfsmount(vfsname, devpath, ukfs_part_na,
585 	    mountpath, mntflags, arg, alen);
586 }
587 
588 struct ukfs *
589 ukfs_mount_disk(const char *vfsname, const char *devpath,
590 	struct ukfs_part *part, const char *mountpath, int mntflags,
591 	void *arg, size_t alen)
592 {
593 
594 	return doukfsmount(vfsname, devpath, part,
595 	    mountpath, mntflags, arg, alen);
596 }
597 
598 int
599 ukfs_release(struct ukfs *fs, int flags)
600 {
601 
602 	if ((flags & UKFS_RELFLAG_NOUNMOUNT) == 0) {
603 		int rv, mntflag, error;
604 
605 		ukfs_chdir(fs, "/");
606 		mntflag = 0;
607 		if (flags & UKFS_RELFLAG_FORCE)
608 			mntflag = MNT_FORCE;
609 		rump_pub_lwp_alloc_and_switch(nextpid(fs), 1);
610 		rump_pub_vp_rele(fs->ukfs_rvp);
611 		fs->ukfs_rvp = NULL;
612 		rv = rump_sys_unmount(fs->ukfs_mountpath, mntflag);
613 		if (rv == -1) {
614 			error = errno;
615 			rump_pub_vfs_root(fs->ukfs_mp, &fs->ukfs_rvp, 0);
616 			rump_pub_lwp_release(rump_pub_lwp_curlwp());
617 			ukfs_chdir(fs, fs->ukfs_mountpath);
618 			errno = error;
619 			return -1;
620 		}
621 		rump_pub_lwp_release(rump_pub_lwp_curlwp());
622 	}
623 
624 	ukfs_part_release(fs->ukfs_part);
625 	if (fs->ukfs_devpath) {
626 		rump_pub_etfs_remove(fs->ukfs_devpath);
627 		free(fs->ukfs_devpath);
628 	}
629 	free(fs->ukfs_mountpath);
630 
631 	pthread_spin_destroy(&fs->ukfs_spin);
632 	if (fs->ukfs_devfd != -1) {
633 		unlockdev(fs->ukfs_devfd, fs->ukfs_part);
634 		close(fs->ukfs_devfd);
635 	}
636 	free(fs);
637 
638 	return 0;
639 }
640 
641 void
642 ukfs_part_release(struct ukfs_part *part)
643 {
644 
645 	if (part != ukfs_part_none && part != ukfs_part_na)
646 		free(part);
647 }
648 
649 #define STDCALL(ukfs, thecall)						\
650 	int rv = 0;							\
651 									\
652 	precall(ukfs);							\
653 	rv = thecall;							\
654 	postcall(ukfs);							\
655 	return rv;
656 
657 int
658 ukfs_opendir(struct ukfs *ukfs, const char *dirname, struct ukfs_dircookie **c)
659 {
660 	struct vnode *vp;
661 	int rv;
662 
663 	precall(ukfs);
664 	rv = rump_pub_namei(RUMP_NAMEI_LOOKUP, RUMP_NAMEI_LOCKLEAF, dirname,
665 	    NULL, &vp, NULL);
666 	postcall(ukfs);
667 
668 	if (rv == 0) {
669 		RUMP_VOP_UNLOCK(vp, 0);
670 	} else {
671 		errno = rv;
672 		rv = -1;
673 	}
674 
675 	/*LINTED*/
676 	*c = (struct ukfs_dircookie *)vp;
677 	return rv;
678 }
679 
680 static int
681 getmydents(struct vnode *vp, off_t *off, uint8_t *buf, size_t bufsize)
682 {
683 	struct uio *uio;
684 	size_t resid;
685 	int rv, eofflag;
686 	kauth_cred_t cred;
687 
688 	uio = rump_pub_uio_setup(buf, bufsize, *off, RUMPUIO_READ);
689 	cred = rump_pub_cred_suserget();
690 	rv = RUMP_VOP_READDIR(vp, uio, cred, &eofflag, NULL, NULL);
691 	rump_pub_cred_put(cred);
692 	RUMP_VOP_UNLOCK(vp, 0);
693 	*off = rump_pub_uio_getoff(uio);
694 	resid = rump_pub_uio_free(uio);
695 
696 	if (rv) {
697 		errno = rv;
698 		return -1;
699 	}
700 
701 	/* LINTED: not totally correct return type, but follows syscall */
702 	return bufsize - resid;
703 }
704 
705 /*ARGSUSED*/
706 int
707 ukfs_getdents_cookie(struct ukfs *ukfs, struct ukfs_dircookie *c, off_t *off,
708 	uint8_t *buf, size_t bufsize)
709 {
710 	/*LINTED*/
711 	struct vnode *vp = (struct vnode *)c;
712 
713 	RUMP_VOP_LOCK(vp, RUMP_LK_SHARED);
714 	return getmydents(vp, off, buf, bufsize);
715 }
716 
717 int
718 ukfs_getdents(struct ukfs *ukfs, const char *dirname, off_t *off,
719 	uint8_t *buf, size_t bufsize)
720 {
721 	struct vnode *vp;
722 	int rv;
723 
724 	precall(ukfs);
725 	rv = rump_pub_namei(RUMP_NAMEI_LOOKUP, RUMP_NAMEI_LOCKLEAF, dirname,
726 	    NULL, &vp, NULL);
727 	postcall(ukfs);
728 	if (rv) {
729 		errno = rv;
730 		return -1;
731 	}
732 
733 	rv = getmydents(vp, off, buf, bufsize);
734 	rump_pub_vp_rele(vp);
735 	return rv;
736 }
737 
738 /*ARGSUSED*/
739 int
740 ukfs_closedir(struct ukfs *ukfs, struct ukfs_dircookie *c)
741 {
742 
743 	/*LINTED*/
744 	rump_pub_vp_rele((struct vnode *)c);
745 	return 0;
746 }
747 
748 int
749 ukfs_open(struct ukfs *ukfs, const char *filename, int flags)
750 {
751 	int fd;
752 
753 	precall(ukfs);
754 	fd = rump_sys_open(filename, flags, 0);
755 	postcall(ukfs);
756 	if (fd == -1)
757 		return -1;
758 
759 	return fd;
760 }
761 
762 ssize_t
763 ukfs_read(struct ukfs *ukfs, const char *filename, off_t off,
764 	uint8_t *buf, size_t bufsize)
765 {
766 	int fd;
767 	ssize_t xfer = -1; /* XXXgcc */
768 
769 	precall(ukfs);
770 	fd = rump_sys_open(filename, RUMP_O_RDONLY, 0);
771 	if (fd == -1)
772 		goto out;
773 
774 	xfer = rump_sys_pread(fd, buf, bufsize, off);
775 	rump_sys_close(fd);
776 
777  out:
778 	postcall(ukfs);
779 	if (fd == -1) {
780 		return -1;
781 	}
782 	return xfer;
783 }
784 
785 /*ARGSUSED*/
786 ssize_t
787 ukfs_read_fd(struct ukfs *ukfs, int fd, off_t off, uint8_t *buf, size_t buflen)
788 {
789 
790 	return rump_sys_pread(fd, buf, buflen, off);
791 }
792 
793 ssize_t
794 ukfs_write(struct ukfs *ukfs, const char *filename, off_t off,
795 	uint8_t *buf, size_t bufsize)
796 {
797 	int fd;
798 	ssize_t xfer = -1; /* XXXgcc */
799 
800 	precall(ukfs);
801 	fd = rump_sys_open(filename, RUMP_O_WRONLY, 0);
802 	if (fd == -1)
803 		goto out;
804 
805 	/* write and commit */
806 	xfer = rump_sys_pwrite(fd, buf, bufsize, off);
807 	if (xfer > 0)
808 		rump_sys_fsync(fd);
809 
810 	rump_sys_close(fd);
811 
812  out:
813 	postcall(ukfs);
814 	if (fd == -1) {
815 		return -1;
816 	}
817 	return xfer;
818 }
819 
820 /*ARGSUSED*/
821 ssize_t
822 ukfs_write_fd(struct ukfs *ukfs, int fd, off_t off, uint8_t *buf, size_t buflen,
823 	int dosync)
824 {
825 	ssize_t xfer;
826 
827 	xfer = rump_sys_pwrite(fd, buf, buflen, off);
828 	if (xfer > 0 && dosync)
829 		rump_sys_fsync(fd);
830 
831 	return xfer;
832 }
833 
834 /*ARGSUSED*/
835 int
836 ukfs_close(struct ukfs *ukfs, int fd)
837 {
838 
839 	rump_sys_close(fd);
840 	return 0;
841 }
842 
843 int
844 ukfs_create(struct ukfs *ukfs, const char *filename, mode_t mode)
845 {
846 	int fd;
847 
848 	precall(ukfs);
849 	fd = rump_sys_open(filename, RUMP_O_WRONLY | RUMP_O_CREAT, mode);
850 	if (fd == -1)
851 		return -1;
852 	rump_sys_close(fd);
853 
854 	postcall(ukfs);
855 	return 0;
856 }
857 
858 int
859 ukfs_mknod(struct ukfs *ukfs, const char *path, mode_t mode, dev_t dev)
860 {
861 
862 	STDCALL(ukfs, rump_sys_mknod(path, mode, dev));
863 }
864 
865 int
866 ukfs_mkfifo(struct ukfs *ukfs, const char *path, mode_t mode)
867 {
868 
869 	STDCALL(ukfs, rump_sys_mkfifo(path, mode));
870 }
871 
872 int
873 ukfs_mkdir(struct ukfs *ukfs, const char *filename, mode_t mode)
874 {
875 
876 	STDCALL(ukfs, rump_sys_mkdir(filename, mode));
877 }
878 
879 int
880 ukfs_remove(struct ukfs *ukfs, const char *filename)
881 {
882 
883 	STDCALL(ukfs, rump_sys_unlink(filename));
884 }
885 
886 int
887 ukfs_rmdir(struct ukfs *ukfs, const char *filename)
888 {
889 
890 	STDCALL(ukfs, rump_sys_rmdir(filename));
891 }
892 
893 int
894 ukfs_link(struct ukfs *ukfs, const char *filename, const char *f_create)
895 {
896 
897 	STDCALL(ukfs, rump_sys_link(filename, f_create));
898 }
899 
900 int
901 ukfs_symlink(struct ukfs *ukfs, const char *filename, const char *linkname)
902 {
903 
904 	STDCALL(ukfs, rump_sys_symlink(filename, linkname));
905 }
906 
907 ssize_t
908 ukfs_readlink(struct ukfs *ukfs, const char *filename,
909 	char *linkbuf, size_t buflen)
910 {
911 	ssize_t rv;
912 
913 	precall(ukfs);
914 	rv = rump_sys_readlink(filename, linkbuf, buflen);
915 	postcall(ukfs);
916 	return rv;
917 }
918 
919 int
920 ukfs_rename(struct ukfs *ukfs, const char *from, const char *to)
921 {
922 
923 	STDCALL(ukfs, rump_sys_rename(from, to));
924 }
925 
926 int
927 ukfs_chdir(struct ukfs *ukfs, const char *path)
928 {
929 	struct vnode *newvp, *oldvp;
930 	int rv;
931 
932 	precall(ukfs);
933 	rv = rump_sys_chdir(path);
934 	if (rv == -1)
935 		goto out;
936 
937 	newvp = rump_pub_cdir_get();
938 	pthread_spin_lock(&ukfs->ukfs_spin);
939 	oldvp = ukfs->ukfs_cdir;
940 	ukfs->ukfs_cdir = newvp;
941 	pthread_spin_unlock(&ukfs->ukfs_spin);
942 	if (oldvp)
943 		rump_pub_vp_rele(oldvp);
944 
945  out:
946 	postcall(ukfs);
947 	return rv;
948 }
949 
950 /*
951  * If we want to use post-time_t file systems on pre-time_t hosts,
952  * we must translate the stat structure.  Since we don't currently
953  * have a general method for making compat calls in rump, special-case
954  * this one.
955  *
956  * Note that this does not allow making system calls to older rump
957  * kernels from newer hosts.
958  */
959 #define VERS_TIMECHANGE 599000700
960 
961 static int
962 needcompat(void)
963 {
964 
965 #ifdef __NetBSD__
966 	/*LINTED*/
967 	return __NetBSD_Version__ < VERS_TIMECHANGE
968 	    && rump_pub_getversion() >= VERS_TIMECHANGE;
969 #else
970 	return 0;
971 #endif
972 }
973 
974 int
975 ukfs_stat(struct ukfs *ukfs, const char *filename, struct stat *file_stat)
976 {
977 	int rv;
978 
979 	precall(ukfs);
980 	if (needcompat())
981 		rv = rump_pub_sys___stat30(filename, file_stat);
982 	else
983 		rv = rump_sys_stat(filename, file_stat);
984 	postcall(ukfs);
985 
986 	return rv;
987 }
988 
989 int
990 ukfs_lstat(struct ukfs *ukfs, const char *filename, struct stat *file_stat)
991 {
992 	int rv;
993 
994 	precall(ukfs);
995 	if (needcompat())
996 		rv = rump_pub_sys___lstat30(filename, file_stat);
997 	else
998 		rv = rump_sys_lstat(filename, file_stat);
999 	postcall(ukfs);
1000 
1001 	return rv;
1002 }
1003 
1004 int
1005 ukfs_chmod(struct ukfs *ukfs, const char *filename, mode_t mode)
1006 {
1007 
1008 	STDCALL(ukfs, rump_sys_chmod(filename, mode));
1009 }
1010 
1011 int
1012 ukfs_lchmod(struct ukfs *ukfs, const char *filename, mode_t mode)
1013 {
1014 
1015 	STDCALL(ukfs, rump_sys_lchmod(filename, mode));
1016 }
1017 
1018 int
1019 ukfs_chown(struct ukfs *ukfs, const char *filename, uid_t uid, gid_t gid)
1020 {
1021 
1022 	STDCALL(ukfs, rump_sys_chown(filename, uid, gid));
1023 }
1024 
1025 int
1026 ukfs_lchown(struct ukfs *ukfs, const char *filename, uid_t uid, gid_t gid)
1027 {
1028 
1029 	STDCALL(ukfs, rump_sys_lchown(filename, uid, gid));
1030 }
1031 
1032 int
1033 ukfs_chflags(struct ukfs *ukfs, const char *filename, u_long flags)
1034 {
1035 
1036 	STDCALL(ukfs, rump_sys_chflags(filename, flags));
1037 }
1038 
1039 int
1040 ukfs_lchflags(struct ukfs *ukfs, const char *filename, u_long flags)
1041 {
1042 
1043 	STDCALL(ukfs, rump_sys_lchflags(filename, flags));
1044 }
1045 
1046 int
1047 ukfs_utimes(struct ukfs *ukfs, const char *filename, const struct timeval *tptr)
1048 {
1049 
1050 	STDCALL(ukfs, rump_sys_utimes(filename, tptr));
1051 }
1052 
1053 int
1054 ukfs_lutimes(struct ukfs *ukfs, const char *filename,
1055 	      const struct timeval *tptr)
1056 {
1057 
1058 	STDCALL(ukfs, rump_sys_lutimes(filename, tptr));
1059 }
1060 
1061 /*
1062  * Dynamic module support
1063  */
1064 
1065 /* load one library */
1066 
1067 /*
1068  * XXX: the dlerror stuff isn't really threadsafe, but then again I
1069  * can't protect against other threads calling dl*() outside of ukfs,
1070  * so just live with it being flimsy
1071  */
1072 int
1073 ukfs_modload(const char *fname)
1074 {
1075 	void *handle;
1076 	struct modinfo **mi;
1077 	int error;
1078 
1079 	handle = dlopen(fname, RTLD_LAZY|RTLD_GLOBAL);
1080 	if (handle == NULL) {
1081 		const char *dlmsg = dlerror();
1082 		if (strstr(dlmsg, "Undefined symbol"))
1083 			return 0;
1084 		warnx("dlopen %s failed: %s\n", fname, dlmsg);
1085 		/* XXXerrno */
1086 		return -1;
1087 	}
1088 
1089 	mi = dlsym(handle, "__start_link_set_modules");
1090 	if (mi) {
1091 		error = rump_pub_module_init(*mi, NULL);
1092 		if (error)
1093 			goto errclose;
1094 		return 1;
1095 	}
1096 	error = EINVAL;
1097 
1098  errclose:
1099 	dlclose(handle);
1100 	errno = error;
1101 	return -1;
1102 }
1103 
1104 struct loadfail {
1105 	char *pname;
1106 
1107 	LIST_ENTRY(loadfail) entries;
1108 };
1109 
1110 #define RUMPFSMOD_PREFIX "librumpfs_"
1111 #define RUMPFSMOD_SUFFIX ".so"
1112 
1113 int
1114 ukfs_modload_dir(const char *dir)
1115 {
1116 	char nbuf[MAXPATHLEN+1], *p;
1117 	struct dirent entry, *result;
1118 	DIR *libdir;
1119 	struct loadfail *lf, *nlf;
1120 	int error, nloaded = 0, redo;
1121 	LIST_HEAD(, loadfail) lfs;
1122 
1123 	libdir = opendir(dir);
1124 	if (libdir == NULL)
1125 		return -1;
1126 
1127 	LIST_INIT(&lfs);
1128 	for (;;) {
1129 		if ((error = readdir_r(libdir, &entry, &result)) != 0)
1130 			break;
1131 		if (!result)
1132 			break;
1133 		if (strncmp(result->d_name, RUMPFSMOD_PREFIX,
1134 		    strlen(RUMPFSMOD_PREFIX)) != 0)
1135 			continue;
1136 		if (((p = strstr(result->d_name, RUMPFSMOD_SUFFIX)) == NULL)
1137 		    || strlen(p) != strlen(RUMPFSMOD_SUFFIX))
1138 			continue;
1139 		strlcpy(nbuf, dir, sizeof(nbuf));
1140 		strlcat(nbuf, "/", sizeof(nbuf));
1141 		strlcat(nbuf, result->d_name, sizeof(nbuf));
1142 		switch (ukfs_modload(nbuf)) {
1143 		case 0:
1144 			lf = malloc(sizeof(*lf));
1145 			if (lf == NULL) {
1146 				error = ENOMEM;
1147 				break;
1148 			}
1149 			lf->pname = strdup(nbuf);
1150 			if (lf->pname == NULL) {
1151 				free(lf);
1152 				error = ENOMEM;
1153 				break;
1154 			}
1155 			LIST_INSERT_HEAD(&lfs, lf, entries);
1156 			break;
1157 		case 1:
1158 			nloaded++;
1159 			break;
1160 		default:
1161 			/* ignore errors */
1162 			break;
1163 		}
1164 	}
1165 	closedir(libdir);
1166 	if (error && nloaded != 0)
1167 		error = 0;
1168 
1169 	/*
1170 	 * El-cheapo dependency calculator.  Just try to load the
1171 	 * modules n times in a loop
1172 	 */
1173 	for (redo = 1; redo;) {
1174 		redo = 0;
1175 		nlf = LIST_FIRST(&lfs);
1176 		while ((lf = nlf) != NULL) {
1177 			nlf = LIST_NEXT(lf, entries);
1178 			if (ukfs_modload(lf->pname) == 1) {
1179 				nloaded++;
1180 				redo = 1;
1181 				LIST_REMOVE(lf, entries);
1182 				free(lf->pname);
1183 				free(lf);
1184 			}
1185 		}
1186 	}
1187 
1188 	while ((lf = LIST_FIRST(&lfs)) != NULL) {
1189 		LIST_REMOVE(lf, entries);
1190 		free(lf->pname);
1191 		free(lf);
1192 	}
1193 
1194 	if (error && nloaded == 0) {
1195 		errno = error;
1196 		return -1;
1197 	}
1198 
1199 	return nloaded;
1200 }
1201 
1202 /* XXX: this code uses definitions from NetBSD, needs rumpdefs */
1203 ssize_t
1204 ukfs_vfstypes(char *buf, size_t buflen)
1205 {
1206 	int mib[3];
1207 	struct sysctlnode q, ans[128];
1208 	size_t alen;
1209 	int i;
1210 
1211 	mib[0] = CTL_VFS;
1212 	mib[1] = VFS_GENERIC;
1213 	mib[2] = CTL_QUERY;
1214 	alen = sizeof(ans);
1215 
1216 	memset(&q, 0, sizeof(q));
1217 	q.sysctl_flags = SYSCTL_VERSION;
1218 
1219 	if (rump_sys___sysctl(mib, 3, ans, &alen, &q, sizeof(q)) == -1) {
1220 		return -1;
1221 	}
1222 
1223 	for (i = 0; i < alen/sizeof(ans[0]); i++)
1224 		if (strcmp("fstypes", ans[i].sysctl_name) == 0)
1225 			break;
1226 	if (i == alen/sizeof(ans[0])) {
1227 		errno = ENXIO;
1228 		return -1;
1229 	}
1230 
1231 	mib[0] = CTL_VFS;
1232 	mib[1] = VFS_GENERIC;
1233 	mib[2] = ans[i].sysctl_num;
1234 
1235 	if (rump_sys___sysctl(mib, 3, buf, &buflen, NULL, 0) == -1) {
1236 		return -1;
1237 	}
1238 
1239 	return buflen;
1240 }
1241 
1242 /*
1243  * Utilities
1244  */
1245 static int
1246 builddirs(const char *pathname, mode_t mode,
1247 	int (*mkdirfn)(struct ukfs *, const char *, mode_t), struct ukfs *fs)
1248 {
1249 	char *f1, *f2;
1250 	int rv;
1251 	mode_t mask;
1252 	bool end;
1253 
1254 	/*ukfs_umask((mask = ukfs_umask(0)));*/
1255 	umask((mask = umask(0)));
1256 
1257 	f1 = f2 = strdup(pathname);
1258 	if (f1 == NULL) {
1259 		errno = ENOMEM;
1260 		return -1;
1261 	}
1262 
1263 	end = false;
1264 	for (;;) {
1265 		/* find next component */
1266 		f2 += strspn(f2, "/");
1267 		f2 += strcspn(f2, "/");
1268 		if (*f2 == '\0')
1269 			end = true;
1270 		else
1271 			*f2 = '\0';
1272 
1273 		rv = mkdirfn(fs, f1, mode & ~mask);
1274 		if (errno == EEXIST)
1275 			rv = 0;
1276 
1277 		if (rv == -1 || *f2 != '\0' || end)
1278 			break;
1279 
1280 		*f2 = '/';
1281 	}
1282 
1283 	free(f1);
1284 
1285 	return rv;
1286 }
1287 
1288 int
1289 ukfs_util_builddirs(struct ukfs *ukfs, const char *pathname, mode_t mode)
1290 {
1291 
1292 	return builddirs(pathname, mode, ukfs_mkdir, ukfs);
1293 }
1294