xref: /netbsd-src/lib/libukfs/ukfs.c (revision 09afef20633f5fe63d92dfe43ee3a9380dc06883)
1 /*	$NetBSD: ukfs.c,v 1.43 2009/12/03 14:23:49 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2007, 2008, 2009  Antti Kantee.  All Rights Reserved.
5  *
6  * Development of this software was supported by the
7  * Finnish Cultural Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
19  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 /*
32  * This library enables access to files systems directly without
33  * involving system calls.
34  */
35 
36 #ifdef __linux__
37 #define _XOPEN_SOURCE 500
38 #define _BSD_SOURCE
39 #define _FILE_OFFSET_BITS 64
40 #endif
41 
42 #include <sys/param.h>
43 #include <sys/queue.h>
44 #include <sys/stat.h>
45 #include <sys/sysctl.h>
46 #include <sys/mount.h>
47 
48 #include <assert.h>
49 #include <dirent.h>
50 #include <dlfcn.h>
51 #include <err.h>
52 #include <errno.h>
53 #include <fcntl.h>
54 #include <pthread.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59 #include <stdint.h>
60 
61 #include <rump/ukfs.h>
62 
63 #include <rump/rump.h>
64 #include <rump/rump_syscalls.h>
65 #include <rump/rumpuser.h>
66 
67 #include "ukfs_int_disklabel.h"
68 
69 #define UKFS_MODE_DEFAULT 0555
70 
71 struct ukfs {
72 	struct mount *ukfs_mp;
73 	struct vnode *ukfs_rvp;
74 	void *ukfs_specific;
75 
76 	pthread_spinlock_t ukfs_spin;
77 	pid_t ukfs_nextpid;
78 	struct vnode *ukfs_cdir;
79 	int ukfs_devfd;
80 	char *ukfs_devpath;
81 	char *ukfs_mountpath;
82 };
83 
84 static int builddirs(const char *, mode_t,
85     int (*mkdirfn)(struct ukfs *, const char *, mode_t), struct ukfs *);
86 
87 struct mount *
88 ukfs_getmp(struct ukfs *ukfs)
89 {
90 
91 	return ukfs->ukfs_mp;
92 }
93 
94 struct vnode *
95 ukfs_getrvp(struct ukfs *ukfs)
96 {
97 	struct vnode *rvp;
98 
99 	rvp = ukfs->ukfs_rvp;
100 	rump_pub_vp_incref(rvp);
101 
102 	return rvp;
103 }
104 
105 void
106 ukfs_setspecific(struct ukfs *ukfs, void *priv)
107 {
108 
109 	ukfs->ukfs_specific = priv;
110 }
111 
112 void *
113 ukfs_getspecific(struct ukfs *ukfs)
114 {
115 
116 	return ukfs->ukfs_specific;
117 }
118 
119 #ifdef DONT_WANT_PTHREAD_LINKAGE
120 #define pthread_spin_lock(a)
121 #define pthread_spin_unlock(a)
122 #define pthread_spin_init(a,b)
123 #define pthread_spin_destroy(a)
124 #endif
125 
126 static pid_t
127 nextpid(struct ukfs *ukfs)
128 {
129 	pid_t npid;
130 
131 	pthread_spin_lock(&ukfs->ukfs_spin);
132 	if (ukfs->ukfs_nextpid == 0)
133 		ukfs->ukfs_nextpid++;
134 	npid = ukfs->ukfs_nextpid++;
135 	pthread_spin_unlock(&ukfs->ukfs_spin);
136 
137 	return npid;
138 }
139 
140 static void
141 precall(struct ukfs *ukfs)
142 {
143 	struct vnode *rvp, *cvp;
144 
145 	rump_pub_lwp_alloc_and_switch(nextpid(ukfs), 1);
146 	rvp = ukfs_getrvp(ukfs);
147 	pthread_spin_lock(&ukfs->ukfs_spin);
148 	cvp = ukfs->ukfs_cdir;
149 	pthread_spin_unlock(&ukfs->ukfs_spin);
150 	rump_pub_rcvp_set(rvp, cvp); /* takes refs */
151 	rump_pub_vp_rele(rvp);
152 }
153 
154 static void
155 postcall(struct ukfs *ukfs)
156 {
157 	struct vnode *rvp;
158 
159 	rvp = ukfs_getrvp(ukfs);
160 	rump_pub_rcvp_set(NULL, rvp);
161 	rump_pub_vp_rele(rvp);
162 	rump_pub_lwp_release(rump_pub_lwp_curlwp());
163 }
164 
165 struct ukfs_part {
166 	int part_type;
167 	char part_labelchar;
168 	off_t part_devoff;
169 	off_t part_devsize;
170 };
171 
172 enum ukfs_parttype { UKFS_PART_NONE, UKFS_PART_DISKLABEL, UKFS_PART_OFFSET };
173 
174 static struct ukfs_part ukfs__part_none = {
175 	.part_type = UKFS_PART_NONE,
176 	.part_devoff = 0,
177 	.part_devsize = RUMP_ETFS_SIZE_ENDOFF,
178 };
179 static struct ukfs_part ukfs__part_na;
180 struct ukfs_part *ukfs_part_none;
181 struct ukfs_part *ukfs_part_na;
182 
183 static void
184 ukfs_initparts(void)
185 {
186 
187 	ukfs_part_none = &ukfs__part_none;
188 	ukfs_part_na = &ukfs__part_na;
189 }
190 
191 int
192 _ukfs_init(int version)
193 {
194 	int rv;
195 
196 	if (version != UKFS_VERSION) {
197 		printf("incompatible ukfs version, %d vs. %d\n",
198 		    version, UKFS_VERSION);
199 		errno = EPROGMISMATCH;
200 		return -1;
201 	}
202 
203 	ukfs_initparts();
204 	if ((rv = rump_init()) != 0) {
205 		errno = rv;
206 		return -1;
207 	}
208 
209 	return 0;
210 }
211 
212 /*ARGSUSED*/
213 static int
214 rumpmkdir(struct ukfs *dummy, const char *path, mode_t mode)
215 {
216 
217 	return rump_sys_mkdir(path, mode);
218 }
219 
220 int
221 ukfs_part_probe(char *devpath, struct ukfs_part **partp)
222 {
223 	struct ukfs_part *part;
224 	char *p;
225 	int error = 0;
226 	int devfd = -1;
227 
228 	ukfs_initparts();
229 	if ((p = strstr(devpath, UKFS_PARTITION_SCANMAGIC)) != NULL) {
230 		fprintf(stderr, "ukfs: %%PART is deprecated.  use "
231 		    "%%DISKLABEL instead\n");
232 		errno = ENODEV;
233 		return -1;
234 	}
235 
236 	part = malloc(sizeof(*part));
237 	if (part == NULL) {
238 		errno = ENOMEM;
239 		return -1;
240 	}
241 	part->part_type = UKFS_PART_NONE;
242 
243 	/*
244 	 * Check for magic in pathname:
245 	 *   disklabel: /regularpath%DISKLABEL:labelchar%\0
246 	 *     offsets: /regularpath%OFFSET:start,end%\0
247 	 */
248 #define MAGICADJ_DISKLABEL(p, n) (p+sizeof(UKFS_DISKLABEL_SCANMAGIC)-1+n)
249 	if ((p = strstr(devpath, UKFS_DISKLABEL_SCANMAGIC)) != NULL
250 	    && strlen(p) == UKFS_DISKLABEL_MAGICLEN
251 	    && *(MAGICADJ_DISKLABEL(p,1)) == '%') {
252 		if (*(MAGICADJ_DISKLABEL(p,0)) >= 'a' &&
253 		    *(MAGICADJ_DISKLABEL(p,0)) < 'a' + UKFS_MAXPARTITIONS) {
254 			struct ukfs__disklabel dl;
255 			struct ukfs__partition *pp;
256 			char buf[65536];
257 			char labelchar = *(MAGICADJ_DISKLABEL(p,0));
258 			int partition = labelchar - 'a';
259 
260 			*p = '\0';
261 			devfd = open(devpath, O_RDONLY);
262 			if (devfd == -1) {
263 				error = errno;
264 				goto out;
265 			}
266 
267 			/* Locate the disklabel and find the partition. */
268 			if (pread(devfd, buf, sizeof(buf), 0) == -1) {
269 				error = errno;
270 				goto out;
271 			}
272 
273 			if (ukfs__disklabel_scan(&dl, buf, sizeof(buf)) != 0) {
274 				error = ENOENT;
275 				goto out;
276 			}
277 
278 			if (dl.d_npartitions < partition) {
279 				error = ENOENT;
280 				goto out;
281 			}
282 
283 			pp = &dl.d_partitions[partition];
284 			part->part_type = UKFS_PART_DISKLABEL;
285 			part->part_labelchar = labelchar;
286 			part->part_devoff = pp->p_offset << DEV_BSHIFT;
287 			part->part_devsize = pp->p_size << DEV_BSHIFT;
288 		} else {
289 			error = EINVAL;
290 		}
291 #define MAGICADJ_OFFSET(p, n) (p+sizeof(UKFS_OFFSET_SCANMAGIC)-1+n)
292 	} else if (((p = strstr(devpath, UKFS_OFFSET_SCANMAGIC)) != NULL)
293 	    && (strlen(p) >= UKFS_OFFSET_MINLEN)) {
294 		char *comma, *pers, *ep, *nptr;
295 		u_quad_t val;
296 
297 		comma = strchr(p, ',');
298 		if (comma == NULL) {
299 			error = EINVAL;
300 			goto out;
301 		}
302 		pers = strchr(comma, '%');
303 		if (pers == NULL) {
304 			error = EINVAL;
305 			goto out;
306 		}
307 		*comma = '\0';
308 		*pers = '\0';
309 		*p = '\0';
310 
311 		nptr = MAGICADJ_OFFSET(p,0);
312 		/* check if string is negative */
313 		if (*nptr == '-') {
314 			error = ERANGE;
315 			goto out;
316 		}
317 		val = strtouq(nptr, &ep, 10);
318 		if (val == UQUAD_MAX) {
319 			error = ERANGE;
320 			goto out;
321 		}
322 		if (*ep != '\0') {
323 			error = EADDRNOTAVAIL; /* creative ;) */
324 			goto out;
325 		}
326 		part->part_devoff = val;
327 
328 		/* omstart */
329 
330 		nptr = comma+1;
331 		/* check if string is negative */
332 		if (*nptr == '-') {
333 			error = ERANGE;
334 			goto out;
335 		}
336 		val = strtouq(nptr, &ep, 10);
337 		if (val == UQUAD_MAX) {
338 			error = ERANGE;
339 			goto out;
340 		}
341 		if (*ep != '\0') {
342 			error = EADDRNOTAVAIL; /* creative ;) */
343 			goto out;
344 		}
345 		part->part_devsize = val;
346 		part->part_type = UKFS_PART_OFFSET;
347 	} else {
348 		free(part);
349 		part = ukfs_part_none;
350 	}
351 
352  out:
353 	if (devfd != -1)
354 		close(devfd);
355 	if (error) {
356 		free(part);
357 		errno = error;
358 	} else {
359 		*partp = part;
360 	}
361 
362 	return error ? -1 : 0;
363 }
364 
365 int
366 ukfs_part_tostring(struct ukfs_part *part, char *str, size_t strsize)
367 {
368 	int rv;
369 
370 	*str = '\0';
371 	/* "pseudo" values */
372 	if (part == ukfs_part_na) {
373 		errno = EINVAL;
374 		return -1;
375 	}
376 	if (part == ukfs_part_none)
377 		return 0;
378 
379 	rv = 0;
380 	switch (part->part_type) {
381 	case UKFS_PART_NONE:
382 		break;
383 
384 	case UKFS_PART_DISKLABEL:
385 		snprintf(str, strsize, "%%DISKLABEL:%c%%",part->part_labelchar);
386 		rv = 1;
387 		break;
388 
389 	case UKFS_PART_OFFSET:
390 		snprintf(str, strsize, "[%llu,%llu]",
391 		    (unsigned long long)part->part_devoff,
392 		    (unsigned long long)(part->part_devoff+part->part_devsize));
393 		rv = 1;
394 		break;
395 	}
396 
397 	return rv;
398 }
399 
400 /*
401  * Open the disk file and flock it.  Also, if we are operation on
402  * an embedded partition, find the partition offset and size from
403  * the disklabel.
404  *
405  * We hard-fail only in two cases:
406  *  1) we failed to get the partition info out (don't know what offset
407  *     to mount from)
408  *  2) we failed to flock the source device (i.e. flock() fails,
409  *     not e.g. open() before it)
410  *
411  * Otherwise we let the code proceed to mount and let the file system
412  * throw the proper error.  The only questionable bit is that if we
413  * soft-fail before flock() and mount does succeed...
414  *
415  * Returns: -1 error (errno reports error code)
416  *           0 success
417  *
418  * dfdp: -1  device is not open
419  *        n  device is open
420  */
421 static int
422 process_diskdevice(const char *devpath, struct ukfs_part *part, int rdonly,
423 	int *dfdp)
424 {
425 	struct stat sb;
426 	int rv = 0, devfd;
427 
428 	/* defaults */
429 	*dfdp = -1;
430 
431 	devfd = open(devpath, rdonly ? O_RDONLY : O_RDWR);
432 	if (devfd == -1) {
433 		rv = errno;
434 		goto out;
435 	}
436 
437 	if (fstat(devfd, &sb) == -1) {
438 		rv = errno;
439 		goto out;
440 	}
441 
442 	/*
443 	 * We do this only for non-block device since the
444 	 * (NetBSD) kernel allows block device open only once.
445 	 * We also need to close the device for fairly obvious reasons.
446 	 */
447 	if (!S_ISBLK(sb.st_mode)) {
448 		if (flock(devfd, LOCK_NB | (rdonly ? LOCK_SH:LOCK_EX)) == -1) {
449 			warnx("ukfs_mount: cannot get %s lock on "
450 			    "device", rdonly ? "shared" : "exclusive");
451 			rv = errno;
452 			goto out;
453 		}
454 	} else {
455 		close(devfd);
456 		devfd = -1;
457 	}
458 	*dfdp = devfd;
459 
460  out:
461 	if (rv) {
462 		if (devfd != -1)
463 			close(devfd);
464 	}
465 
466 	return rv;
467 }
468 
469 static struct ukfs *
470 doukfsmount(const char *vfsname, const char *devpath, struct ukfs_part *part,
471 	const char *mountpath, int mntflags, void *arg, size_t alen)
472 {
473 	struct ukfs *fs = NULL;
474 	int rv = 0, devfd = -1;
475 	int mounted = 0;
476 	int regged = 0;
477 
478 	if (part != ukfs_part_na) {
479 		if ((rv = process_diskdevice(devpath, part,
480 		    mntflags & MNT_RDONLY, &devfd)) != 0)
481 			goto out;
482 	}
483 
484 	fs = malloc(sizeof(struct ukfs));
485 	if (fs == NULL) {
486 		rv = ENOMEM;
487 		goto out;
488 	}
489 	memset(fs, 0, sizeof(struct ukfs));
490 
491 	/* create our mountpoint.  this is never removed. */
492 	if (builddirs(mountpath, 0777, rumpmkdir, NULL) == -1) {
493 		if (errno != EEXIST) {
494 			rv = errno;
495 			goto out;
496 		}
497 	}
498 
499 	if (part != ukfs_part_na) {
500 		/* LINTED */
501 		rv = rump_pub_etfs_register_withsize(devpath, devpath,
502 		    RUMP_ETFS_BLK, part->part_devoff, part->part_devsize);
503 		if (rv) {
504 			goto out;
505 		}
506 		regged = 1;
507 	}
508 
509 	rv = rump_sys_mount(vfsname, mountpath, mntflags, arg, alen);
510 	if (rv) {
511 		rv = errno;
512 		goto out;
513 	}
514 	mounted = 1;
515 	rv = rump_pub_vfs_getmp(mountpath, &fs->ukfs_mp);
516 	if (rv) {
517 		goto out;
518 	}
519 	rv = rump_pub_vfs_root(fs->ukfs_mp, &fs->ukfs_rvp, 0);
520 	if (rv) {
521 		goto out;
522 	}
523 
524 	if (regged) {
525 		fs->ukfs_devpath = strdup(devpath);
526 	}
527 	fs->ukfs_mountpath = strdup(mountpath);
528 	fs->ukfs_cdir = ukfs_getrvp(fs);
529 	pthread_spin_init(&fs->ukfs_spin, PTHREAD_PROCESS_SHARED);
530 	fs->ukfs_devfd = devfd;
531 	assert(rv == 0);
532 
533  out:
534 	ukfs_part_release(part);
535 	if (rv) {
536 		if (fs) {
537 			if (fs->ukfs_rvp)
538 				rump_pub_vp_rele(fs->ukfs_rvp);
539 			free(fs);
540 			fs = NULL;
541 		}
542 		if (mounted)
543 			rump_sys_unmount(mountpath, MNT_FORCE);
544 		if (regged)
545 			rump_pub_etfs_remove(devpath);
546 		if (devfd != -1) {
547 			flock(devfd, LOCK_UN);
548 			close(devfd);
549 		}
550 		errno = rv;
551 	}
552 
553 	return fs;
554 }
555 
556 struct ukfs *
557 ukfs_mount(const char *vfsname, const char *devpath,
558 	const char *mountpath, int mntflags, void *arg, size_t alen)
559 {
560 
561 	return doukfsmount(vfsname, devpath, ukfs_part_na,
562 	    mountpath, mntflags, arg, alen);
563 }
564 
565 struct ukfs *
566 ukfs_mount_disk(const char *vfsname, const char *devpath,
567 	struct ukfs_part *part, const char *mountpath, int mntflags,
568 	void *arg, size_t alen)
569 {
570 
571 	return doukfsmount(vfsname, devpath, part,
572 	    mountpath, mntflags, arg, alen);
573 }
574 
575 int
576 ukfs_release(struct ukfs *fs, int flags)
577 {
578 
579 	if ((flags & UKFS_RELFLAG_NOUNMOUNT) == 0) {
580 		int rv, mntflag, error;
581 
582 		ukfs_chdir(fs, "/");
583 		mntflag = 0;
584 		if (flags & UKFS_RELFLAG_FORCE)
585 			mntflag = MNT_FORCE;
586 		rump_pub_lwp_alloc_and_switch(nextpid(fs), 1);
587 		rump_pub_vp_rele(fs->ukfs_rvp);
588 		fs->ukfs_rvp = NULL;
589 		rv = rump_sys_unmount(fs->ukfs_mountpath, mntflag);
590 		if (rv == -1) {
591 			error = errno;
592 			rump_pub_vfs_root(fs->ukfs_mp, &fs->ukfs_rvp, 0);
593 			rump_pub_lwp_release(rump_pub_lwp_curlwp());
594 			ukfs_chdir(fs, fs->ukfs_mountpath);
595 			errno = error;
596 			return -1;
597 		}
598 		rump_pub_lwp_release(rump_pub_lwp_curlwp());
599 	}
600 
601 	if (fs->ukfs_devpath) {
602 		rump_pub_etfs_remove(fs->ukfs_devpath);
603 		free(fs->ukfs_devpath);
604 	}
605 	free(fs->ukfs_mountpath);
606 
607 	pthread_spin_destroy(&fs->ukfs_spin);
608 	if (fs->ukfs_devfd != -1) {
609 		flock(fs->ukfs_devfd, LOCK_UN);
610 		close(fs->ukfs_devfd);
611 	}
612 	free(fs);
613 
614 	return 0;
615 }
616 
617 void
618 ukfs_part_release(struct ukfs_part *part)
619 {
620 
621 	if (part != ukfs_part_none && part != ukfs_part_na)
622 		free(part);
623 }
624 
625 #define STDCALL(ukfs, thecall)						\
626 	int rv = 0;							\
627 									\
628 	precall(ukfs);							\
629 	rv = thecall;							\
630 	postcall(ukfs);							\
631 	return rv;
632 
633 int
634 ukfs_opendir(struct ukfs *ukfs, const char *dirname, struct ukfs_dircookie **c)
635 {
636 	struct vnode *vp;
637 	int rv;
638 
639 	precall(ukfs);
640 	rv = rump_pub_namei(RUMP_NAMEI_LOOKUP, RUMP_NAMEI_LOCKLEAF, dirname,
641 	    NULL, &vp, NULL);
642 	postcall(ukfs);
643 
644 	if (rv == 0) {
645 		RUMP_VOP_UNLOCK(vp, 0);
646 	} else {
647 		errno = rv;
648 		rv = -1;
649 	}
650 
651 	/*LINTED*/
652 	*c = (struct ukfs_dircookie *)vp;
653 	return rv;
654 }
655 
656 static int
657 getmydents(struct vnode *vp, off_t *off, uint8_t *buf, size_t bufsize)
658 {
659 	struct uio *uio;
660 	size_t resid;
661 	int rv, eofflag;
662 	kauth_cred_t cred;
663 
664 	uio = rump_pub_uio_setup(buf, bufsize, *off, RUMPUIO_READ);
665 	cred = rump_pub_cred_suserget();
666 	rv = RUMP_VOP_READDIR(vp, uio, cred, &eofflag, NULL, NULL);
667 	rump_pub_cred_put(cred);
668 	RUMP_VOP_UNLOCK(vp, 0);
669 	*off = rump_pub_uio_getoff(uio);
670 	resid = rump_pub_uio_free(uio);
671 
672 	if (rv) {
673 		errno = rv;
674 		return -1;
675 	}
676 
677 	/* LINTED: not totally correct return type, but follows syscall */
678 	return bufsize - resid;
679 }
680 
681 /*ARGSUSED*/
682 int
683 ukfs_getdents_cookie(struct ukfs *ukfs, struct ukfs_dircookie *c, off_t *off,
684 	uint8_t *buf, size_t bufsize)
685 {
686 	/*LINTED*/
687 	struct vnode *vp = (struct vnode *)c;
688 
689 	RUMP_VOP_LOCK(vp, RUMP_LK_SHARED);
690 	return getmydents(vp, off, buf, bufsize);
691 }
692 
693 int
694 ukfs_getdents(struct ukfs *ukfs, const char *dirname, off_t *off,
695 	uint8_t *buf, size_t bufsize)
696 {
697 	struct vnode *vp;
698 	int rv;
699 
700 	precall(ukfs);
701 	rv = rump_pub_namei(RUMP_NAMEI_LOOKUP, RUMP_NAMEI_LOCKLEAF, dirname,
702 	    NULL, &vp, NULL);
703 	postcall(ukfs);
704 	if (rv) {
705 		errno = rv;
706 		return -1;
707 	}
708 
709 	rv = getmydents(vp, off, buf, bufsize);
710 	rump_pub_vp_rele(vp);
711 	return rv;
712 }
713 
714 /*ARGSUSED*/
715 int
716 ukfs_closedir(struct ukfs *ukfs, struct ukfs_dircookie *c)
717 {
718 
719 	/*LINTED*/
720 	rump_pub_vp_rele((struct vnode *)c);
721 	return 0;
722 }
723 
724 int
725 ukfs_open(struct ukfs *ukfs, const char *filename, int flags)
726 {
727 	int fd;
728 
729 	precall(ukfs);
730 	fd = rump_sys_open(filename, flags, 0);
731 	postcall(ukfs);
732 	if (fd == -1)
733 		return -1;
734 
735 	return fd;
736 }
737 
738 ssize_t
739 ukfs_read(struct ukfs *ukfs, const char *filename, off_t off,
740 	uint8_t *buf, size_t bufsize)
741 {
742 	int fd;
743 	ssize_t xfer = -1; /* XXXgcc */
744 
745 	precall(ukfs);
746 	fd = rump_sys_open(filename, RUMP_O_RDONLY, 0);
747 	if (fd == -1)
748 		goto out;
749 
750 	xfer = rump_sys_pread(fd, buf, bufsize, off);
751 	rump_sys_close(fd);
752 
753  out:
754 	postcall(ukfs);
755 	if (fd == -1) {
756 		return -1;
757 	}
758 	return xfer;
759 }
760 
761 /*ARGSUSED*/
762 ssize_t
763 ukfs_read_fd(struct ukfs *ukfs, int fd, off_t off, uint8_t *buf, size_t buflen)
764 {
765 
766 	return rump_sys_pread(fd, buf, buflen, off);
767 }
768 
769 ssize_t
770 ukfs_write(struct ukfs *ukfs, const char *filename, off_t off,
771 	uint8_t *buf, size_t bufsize)
772 {
773 	int fd;
774 	ssize_t xfer = -1; /* XXXgcc */
775 
776 	precall(ukfs);
777 	fd = rump_sys_open(filename, RUMP_O_WRONLY, 0);
778 	if (fd == -1)
779 		goto out;
780 
781 	/* write and commit */
782 	xfer = rump_sys_pwrite(fd, buf, bufsize, off);
783 	if (xfer > 0)
784 		rump_sys_fsync(fd);
785 
786 	rump_sys_close(fd);
787 
788  out:
789 	postcall(ukfs);
790 	if (fd == -1) {
791 		return -1;
792 	}
793 	return xfer;
794 }
795 
796 /*ARGSUSED*/
797 ssize_t
798 ukfs_write_fd(struct ukfs *ukfs, int fd, off_t off, uint8_t *buf, size_t buflen,
799 	int dosync)
800 {
801 	ssize_t xfer;
802 
803 	xfer = rump_sys_pwrite(fd, buf, buflen, off);
804 	if (xfer > 0 && dosync)
805 		rump_sys_fsync(fd);
806 
807 	return xfer;
808 }
809 
810 /*ARGSUSED*/
811 int
812 ukfs_close(struct ukfs *ukfs, int fd)
813 {
814 
815 	rump_sys_close(fd);
816 	return 0;
817 }
818 
819 int
820 ukfs_create(struct ukfs *ukfs, const char *filename, mode_t mode)
821 {
822 	int fd;
823 
824 	precall(ukfs);
825 	fd = rump_sys_open(filename, RUMP_O_WRONLY | RUMP_O_CREAT, mode);
826 	if (fd == -1)
827 		return -1;
828 	rump_sys_close(fd);
829 
830 	postcall(ukfs);
831 	return 0;
832 }
833 
834 int
835 ukfs_mknod(struct ukfs *ukfs, const char *path, mode_t mode, dev_t dev)
836 {
837 
838 	STDCALL(ukfs, rump_sys_mknod(path, mode, dev));
839 }
840 
841 int
842 ukfs_mkfifo(struct ukfs *ukfs, const char *path, mode_t mode)
843 {
844 
845 	STDCALL(ukfs, rump_sys_mkfifo(path, mode));
846 }
847 
848 int
849 ukfs_mkdir(struct ukfs *ukfs, const char *filename, mode_t mode)
850 {
851 
852 	STDCALL(ukfs, rump_sys_mkdir(filename, mode));
853 }
854 
855 int
856 ukfs_remove(struct ukfs *ukfs, const char *filename)
857 {
858 
859 	STDCALL(ukfs, rump_sys_unlink(filename));
860 }
861 
862 int
863 ukfs_rmdir(struct ukfs *ukfs, const char *filename)
864 {
865 
866 	STDCALL(ukfs, rump_sys_rmdir(filename));
867 }
868 
869 int
870 ukfs_link(struct ukfs *ukfs, const char *filename, const char *f_create)
871 {
872 
873 	STDCALL(ukfs, rump_sys_link(filename, f_create));
874 }
875 
876 int
877 ukfs_symlink(struct ukfs *ukfs, const char *filename, const char *linkname)
878 {
879 
880 	STDCALL(ukfs, rump_sys_symlink(filename, linkname));
881 }
882 
883 ssize_t
884 ukfs_readlink(struct ukfs *ukfs, const char *filename,
885 	char *linkbuf, size_t buflen)
886 {
887 	ssize_t rv;
888 
889 	precall(ukfs);
890 	rv = rump_sys_readlink(filename, linkbuf, buflen);
891 	postcall(ukfs);
892 	return rv;
893 }
894 
895 int
896 ukfs_rename(struct ukfs *ukfs, const char *from, const char *to)
897 {
898 
899 	STDCALL(ukfs, rump_sys_rename(from, to));
900 }
901 
902 int
903 ukfs_chdir(struct ukfs *ukfs, const char *path)
904 {
905 	struct vnode *newvp, *oldvp;
906 	int rv;
907 
908 	precall(ukfs);
909 	rv = rump_sys_chdir(path);
910 	if (rv == -1)
911 		goto out;
912 
913 	newvp = rump_pub_cdir_get();
914 	pthread_spin_lock(&ukfs->ukfs_spin);
915 	oldvp = ukfs->ukfs_cdir;
916 	ukfs->ukfs_cdir = newvp;
917 	pthread_spin_unlock(&ukfs->ukfs_spin);
918 	if (oldvp)
919 		rump_pub_vp_rele(oldvp);
920 
921  out:
922 	postcall(ukfs);
923 	return rv;
924 }
925 
926 /*
927  * If we want to use post-time_t file systems on pre-time_t hosts,
928  * we must translate the stat structure.  Since we don't currently
929  * have a general method for making compat calls in rump, special-case
930  * this one.
931  *
932  * Note that this does not allow making system calls to older rump
933  * kernels from newer hosts.
934  */
935 #define VERS_TIMECHANGE 599000700
936 
937 static int
938 needcompat(void)
939 {
940 
941 #ifdef __NetBSD__
942 	/*LINTED*/
943 	return __NetBSD_Version__ < VERS_TIMECHANGE
944 	    && rump_pub_getversion() >= VERS_TIMECHANGE;
945 #else
946 	return 0;
947 #endif
948 }
949 
950 int
951 ukfs_stat(struct ukfs *ukfs, const char *filename, struct stat *file_stat)
952 {
953 	int rv;
954 
955 	precall(ukfs);
956 	if (needcompat())
957 		rv = rump_pub_sys___stat30(filename, file_stat);
958 	else
959 		rv = rump_sys_stat(filename, file_stat);
960 	postcall(ukfs);
961 
962 	return rv;
963 }
964 
965 int
966 ukfs_lstat(struct ukfs *ukfs, const char *filename, struct stat *file_stat)
967 {
968 	int rv;
969 
970 	precall(ukfs);
971 	if (needcompat())
972 		rv = rump_pub_sys___lstat30(filename, file_stat);
973 	else
974 		rv = rump_sys_lstat(filename, file_stat);
975 	postcall(ukfs);
976 
977 	return rv;
978 }
979 
980 int
981 ukfs_chmod(struct ukfs *ukfs, const char *filename, mode_t mode)
982 {
983 
984 	STDCALL(ukfs, rump_sys_chmod(filename, mode));
985 }
986 
987 int
988 ukfs_lchmod(struct ukfs *ukfs, const char *filename, mode_t mode)
989 {
990 
991 	STDCALL(ukfs, rump_sys_lchmod(filename, mode));
992 }
993 
994 int
995 ukfs_chown(struct ukfs *ukfs, const char *filename, uid_t uid, gid_t gid)
996 {
997 
998 	STDCALL(ukfs, rump_sys_chown(filename, uid, gid));
999 }
1000 
1001 int
1002 ukfs_lchown(struct ukfs *ukfs, const char *filename, uid_t uid, gid_t gid)
1003 {
1004 
1005 	STDCALL(ukfs, rump_sys_lchown(filename, uid, gid));
1006 }
1007 
1008 int
1009 ukfs_chflags(struct ukfs *ukfs, const char *filename, u_long flags)
1010 {
1011 
1012 	STDCALL(ukfs, rump_sys_chflags(filename, flags));
1013 }
1014 
1015 int
1016 ukfs_lchflags(struct ukfs *ukfs, const char *filename, u_long flags)
1017 {
1018 
1019 	STDCALL(ukfs, rump_sys_lchflags(filename, flags));
1020 }
1021 
1022 int
1023 ukfs_utimes(struct ukfs *ukfs, const char *filename, const struct timeval *tptr)
1024 {
1025 
1026 	STDCALL(ukfs, rump_sys_utimes(filename, tptr));
1027 }
1028 
1029 int
1030 ukfs_lutimes(struct ukfs *ukfs, const char *filename,
1031 	      const struct timeval *tptr)
1032 {
1033 
1034 	STDCALL(ukfs, rump_sys_lutimes(filename, tptr));
1035 }
1036 
1037 /*
1038  * Dynamic module support
1039  */
1040 
1041 /* load one library */
1042 
1043 /*
1044  * XXX: the dlerror stuff isn't really threadsafe, but then again I
1045  * can't protect against other threads calling dl*() outside of ukfs,
1046  * so just live with it being flimsy
1047  */
1048 int
1049 ukfs_modload(const char *fname)
1050 {
1051 	void *handle;
1052 	struct modinfo **mi;
1053 	int error;
1054 
1055 	handle = dlopen(fname, RTLD_LAZY|RTLD_GLOBAL);
1056 	if (handle == NULL) {
1057 		const char *dlmsg = dlerror();
1058 		if (strstr(dlmsg, "Undefined symbol"))
1059 			return 0;
1060 		warnx("dlopen %s failed: %s\n", fname, dlmsg);
1061 		/* XXXerrno */
1062 		return -1;
1063 	}
1064 
1065 	mi = dlsym(handle, "__start_link_set_modules");
1066 	if (mi) {
1067 		error = rump_pub_module_init(*mi, NULL);
1068 		if (error)
1069 			goto errclose;
1070 		return 1;
1071 	}
1072 	error = EINVAL;
1073 
1074  errclose:
1075 	dlclose(handle);
1076 	errno = error;
1077 	return -1;
1078 }
1079 
1080 struct loadfail {
1081 	char *pname;
1082 
1083 	LIST_ENTRY(loadfail) entries;
1084 };
1085 
1086 #define RUMPFSMOD_PREFIX "librumpfs_"
1087 #define RUMPFSMOD_SUFFIX ".so"
1088 
1089 int
1090 ukfs_modload_dir(const char *dir)
1091 {
1092 	char nbuf[MAXPATHLEN+1], *p;
1093 	struct dirent entry, *result;
1094 	DIR *libdir;
1095 	struct loadfail *lf, *nlf;
1096 	int error, nloaded = 0, redo;
1097 	LIST_HEAD(, loadfail) lfs;
1098 
1099 	libdir = opendir(dir);
1100 	if (libdir == NULL)
1101 		return -1;
1102 
1103 	LIST_INIT(&lfs);
1104 	for (;;) {
1105 		if ((error = readdir_r(libdir, &entry, &result)) != 0)
1106 			break;
1107 		if (!result)
1108 			break;
1109 		if (strncmp(result->d_name, RUMPFSMOD_PREFIX,
1110 		    strlen(RUMPFSMOD_PREFIX)) != 0)
1111 			continue;
1112 		if (((p = strstr(result->d_name, RUMPFSMOD_SUFFIX)) == NULL)
1113 		    || strlen(p) != strlen(RUMPFSMOD_SUFFIX))
1114 			continue;
1115 		strlcpy(nbuf, dir, sizeof(nbuf));
1116 		strlcat(nbuf, "/", sizeof(nbuf));
1117 		strlcat(nbuf, result->d_name, sizeof(nbuf));
1118 		switch (ukfs_modload(nbuf)) {
1119 		case 0:
1120 			lf = malloc(sizeof(*lf));
1121 			if (lf == NULL) {
1122 				error = ENOMEM;
1123 				break;
1124 			}
1125 			lf->pname = strdup(nbuf);
1126 			if (lf->pname == NULL) {
1127 				free(lf);
1128 				error = ENOMEM;
1129 				break;
1130 			}
1131 			LIST_INSERT_HEAD(&lfs, lf, entries);
1132 			break;
1133 		case 1:
1134 			nloaded++;
1135 			break;
1136 		default:
1137 			/* ignore errors */
1138 			break;
1139 		}
1140 	}
1141 	closedir(libdir);
1142 	if (error && nloaded != 0)
1143 		error = 0;
1144 
1145 	/*
1146 	 * El-cheapo dependency calculator.  Just try to load the
1147 	 * modules n times in a loop
1148 	 */
1149 	for (redo = 1; redo;) {
1150 		redo = 0;
1151 		nlf = LIST_FIRST(&lfs);
1152 		while ((lf = nlf) != NULL) {
1153 			nlf = LIST_NEXT(lf, entries);
1154 			if (ukfs_modload(lf->pname) == 1) {
1155 				nloaded++;
1156 				redo = 1;
1157 				LIST_REMOVE(lf, entries);
1158 				free(lf->pname);
1159 				free(lf);
1160 			}
1161 		}
1162 	}
1163 
1164 	while ((lf = LIST_FIRST(&lfs)) != NULL) {
1165 		LIST_REMOVE(lf, entries);
1166 		free(lf->pname);
1167 		free(lf);
1168 	}
1169 
1170 	if (error && nloaded == 0) {
1171 		errno = error;
1172 		return -1;
1173 	}
1174 
1175 	return nloaded;
1176 }
1177 
1178 /* XXX: this code uses definitions from NetBSD, needs rumpdefs */
1179 ssize_t
1180 ukfs_vfstypes(char *buf, size_t buflen)
1181 {
1182 	int mib[3];
1183 	struct sysctlnode q, ans[128];
1184 	size_t alen;
1185 	int i;
1186 
1187 	mib[0] = CTL_VFS;
1188 	mib[1] = VFS_GENERIC;
1189 	mib[2] = CTL_QUERY;
1190 	alen = sizeof(ans);
1191 
1192 	memset(&q, 0, sizeof(q));
1193 	q.sysctl_flags = SYSCTL_VERSION;
1194 
1195 	if (rump_sys___sysctl(mib, 3, ans, &alen, &q, sizeof(q)) == -1) {
1196 		return -1;
1197 	}
1198 
1199 	for (i = 0; i < alen/sizeof(ans[0]); i++)
1200 		if (strcmp("fstypes", ans[i].sysctl_name) == 0)
1201 			break;
1202 	if (i == alen/sizeof(ans[0])) {
1203 		errno = ENXIO;
1204 		return -1;
1205 	}
1206 
1207 	mib[0] = CTL_VFS;
1208 	mib[1] = VFS_GENERIC;
1209 	mib[2] = ans[i].sysctl_num;
1210 
1211 	if (rump_sys___sysctl(mib, 3, buf, &buflen, NULL, 0) == -1) {
1212 		return -1;
1213 	}
1214 
1215 	return buflen;
1216 }
1217 
1218 /*
1219  * Utilities
1220  */
1221 static int
1222 builddirs(const char *pathname, mode_t mode,
1223 	int (*mkdirfn)(struct ukfs *, const char *, mode_t), struct ukfs *fs)
1224 {
1225 	char *f1, *f2;
1226 	int rv;
1227 	mode_t mask;
1228 	bool end;
1229 
1230 	/*ukfs_umask((mask = ukfs_umask(0)));*/
1231 	umask((mask = umask(0)));
1232 
1233 	f1 = f2 = strdup(pathname);
1234 	if (f1 == NULL) {
1235 		errno = ENOMEM;
1236 		return -1;
1237 	}
1238 
1239 	end = false;
1240 	for (;;) {
1241 		/* find next component */
1242 		f2 += strspn(f2, "/");
1243 		f2 += strcspn(f2, "/");
1244 		if (*f2 == '\0')
1245 			end = true;
1246 		else
1247 			*f2 = '\0';
1248 
1249 		rv = mkdirfn(fs, f1, mode & ~mask);
1250 		if (errno == EEXIST)
1251 			rv = 0;
1252 
1253 		if (rv == -1 || *f2 != '\0' || end)
1254 			break;
1255 
1256 		*f2 = '/';
1257 	}
1258 
1259 	free(f1);
1260 
1261 	return rv;
1262 }
1263 
1264 int
1265 ukfs_util_builddirs(struct ukfs *ukfs, const char *pathname, mode_t mode)
1266 {
1267 
1268 	return builddirs(pathname, mode, ukfs_mkdir, ukfs);
1269 }
1270