xref: /netbsd-src/lib/libukfs/ukfs.c (revision c2f76ff004a2cb67efe5b12d97bd3ef7fe89e18d)
1 /*	$NetBSD: ukfs.c,v 1.56 2011/01/02 13:01:45 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2007, 2008, 2009  Antti Kantee.  All Rights Reserved.
5  *
6  * Development of this software was supported by the
7  * Finnish Cultural Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
19  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 /*
32  * This library enables access to files systems directly without
33  * involving system calls.
34  */
35 
36 #ifdef __linux__
37 #define _XOPEN_SOURCE 500
38 #define _BSD_SOURCE
39 #define _FILE_OFFSET_BITS 64
40 #endif
41 
42 #include <sys/param.h>
43 #include <sys/queue.h>
44 #include <sys/stat.h>
45 #include <sys/sysctl.h>
46 #include <sys/mount.h>
47 
48 #include <assert.h>
49 #include <dirent.h>
50 #include <dlfcn.h>
51 #include <err.h>
52 #include <errno.h>
53 #include <fcntl.h>
54 #include <pthread.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59 #include <stdint.h>
60 
61 #include <rump/ukfs.h>
62 
63 #include <rump/rump.h>
64 #include <rump/rump_syscalls.h>
65 
66 #include "ukfs_int_disklabel.h"
67 
68 #define UKFS_MODE_DEFAULT 0555
69 
70 struct ukfs {
71 	pthread_spinlock_t ukfs_spin;
72 
73 	struct mount *ukfs_mp;
74 	struct lwp *ukfs_lwp;
75 	void *ukfs_specific;
76 
77 	int ukfs_devfd;
78 
79 	char *ukfs_devpath;
80 	char *ukfs_mountpath;
81 	char *ukfs_cwd;
82 
83 	struct ukfs_part *ukfs_part;
84 };
85 
86 static int builddirs(const char *, mode_t,
87     int (*mkdirfn)(struct ukfs *, const char *, mode_t), struct ukfs *);
88 
89 struct mount *
90 ukfs_getmp(struct ukfs *ukfs)
91 {
92 
93 	return ukfs->ukfs_mp;
94 }
95 
96 void
97 ukfs_setspecific(struct ukfs *ukfs, void *priv)
98 {
99 
100 	ukfs->ukfs_specific = priv;
101 }
102 
103 void *
104 ukfs_getspecific(struct ukfs *ukfs)
105 {
106 
107 	return ukfs->ukfs_specific;
108 }
109 
110 #ifdef DONT_WANT_PTHREAD_LINKAGE
111 #define pthread_spin_lock(a)
112 #define pthread_spin_unlock(a)
113 #define pthread_spin_init(a,b)
114 #define pthread_spin_destroy(a)
115 #endif
116 
117 static int
118 precall(struct ukfs *ukfs, struct lwp **curlwp)
119 {
120 
121 	/* save previous.  ensure start from pristine context */
122 	*curlwp = rump_pub_lwproc_curlwp();
123 	if (*curlwp)
124 		rump_pub_lwproc_switch(ukfs->ukfs_lwp);
125 	rump_pub_lwproc_rfork(RUMP_RFCFDG);
126 
127 	if (rump_sys_chroot(ukfs->ukfs_mountpath) == -1)
128 		return errno;
129 	if (rump_sys_chdir(ukfs->ukfs_cwd) == -1)
130 		return errno;
131 
132 	return 0;
133 }
134 
135 static void
136 postcall(struct lwp *curlwp)
137 {
138 
139 	rump_pub_lwproc_releaselwp();
140 	if (curlwp)
141 		rump_pub_lwproc_switch(curlwp);
142 }
143 
144 #define PRECALL()							\
145 struct lwp *ukfs_curlwp;						\
146 do {									\
147 	int ukfs_rv;							\
148 	if ((ukfs_rv = precall(ukfs, &ukfs_curlwp)) != 0) {		\
149 		errno = ukfs_rv;					\
150 		return -1;						\
151 	}								\
152 } while (/*CONSTCOND*/0)
153 
154 #define POSTCALL() postcall(ukfs_curlwp);
155 
156 struct ukfs_part {
157 	pthread_spinlock_t part_lck;
158 	int part_refcount;
159 
160 	int part_type;
161 	char part_labelchar;
162 	off_t part_devoff;
163 	off_t part_devsize;
164 };
165 
166 enum ukfs_parttype { UKFS_PART_NONE, UKFS_PART_DISKLABEL, UKFS_PART_OFFSET };
167 
168 static struct ukfs_part ukfs__part_none = {
169 	.part_type = UKFS_PART_NONE,
170 	.part_devoff = 0,
171 	.part_devsize = RUMP_ETFS_SIZE_ENDOFF,
172 };
173 static struct ukfs_part ukfs__part_na;
174 struct ukfs_part *ukfs_part_none = &ukfs__part_none;
175 struct ukfs_part *ukfs_part_na = &ukfs__part_na;
176 
177 #define PART2LOCKSIZE(len) ((len) == RUMP_ETFS_SIZE_ENDOFF ? 0 : (len))
178 
179 int
180 _ukfs_init(int version)
181 {
182 	int rv;
183 
184 	if (version != UKFS_VERSION) {
185 		printf("incompatible ukfs version, %d vs. %d\n",
186 		    version, UKFS_VERSION);
187 		errno = EPROGMISMATCH;
188 		return -1;
189 	}
190 
191 	if ((rv = rump_init()) != 0) {
192 		errno = rv;
193 		return -1;
194 	}
195 
196 	return 0;
197 }
198 
199 /*ARGSUSED*/
200 static int
201 rumpmkdir(struct ukfs *dummy, const char *path, mode_t mode)
202 {
203 
204 	return rump_sys_mkdir(path, mode);
205 }
206 
207 int
208 ukfs_part_probe(char *devpath, struct ukfs_part **partp)
209 {
210 	struct ukfs_part *part;
211 	char *p;
212 	int error = 0;
213 	int devfd = -1;
214 
215 	if ((p = strstr(devpath, UKFS_PARTITION_SCANMAGIC)) != NULL) {
216 		fprintf(stderr, "ukfs: %%PART is deprecated.  use "
217 		    "%%DISKLABEL instead\n");
218 		errno = ENODEV;
219 		return -1;
220 	}
221 
222 	part = malloc(sizeof(*part));
223 	if (part == NULL) {
224 		errno = ENOMEM;
225 		return -1;
226 	}
227 	if (pthread_spin_init(&part->part_lck, PTHREAD_PROCESS_PRIVATE) == -1) {
228 		error = errno;
229 		free(part);
230 		errno = error;
231 		return -1;
232 	}
233 	part->part_type = UKFS_PART_NONE;
234 	part->part_refcount = 1;
235 
236 	/*
237 	 * Check for magic in pathname:
238 	 *   disklabel: /regularpath%DISKLABEL:labelchar%\0
239 	 *     offsets: /regularpath%OFFSET:start,end%\0
240 	 */
241 #define MAGICADJ_DISKLABEL(p, n) (p+sizeof(UKFS_DISKLABEL_SCANMAGIC)-1+n)
242 	if ((p = strstr(devpath, UKFS_DISKLABEL_SCANMAGIC)) != NULL
243 	    && strlen(p) == UKFS_DISKLABEL_MAGICLEN
244 	    && *(MAGICADJ_DISKLABEL(p,1)) == '%') {
245 		if (*(MAGICADJ_DISKLABEL(p,0)) >= 'a' &&
246 		    *(MAGICADJ_DISKLABEL(p,0)) < 'a' + UKFS_MAXPARTITIONS) {
247 			struct ukfs__disklabel dl;
248 			struct ukfs__partition *pp;
249 			char buf[65536];
250 			char labelchar = *(MAGICADJ_DISKLABEL(p,0));
251 			int partition = labelchar - 'a';
252 
253 			*p = '\0';
254 			devfd = open(devpath, O_RDONLY);
255 			if (devfd == -1) {
256 				error = errno;
257 				goto out;
258 			}
259 
260 			/* Locate the disklabel and find the partition. */
261 			if (pread(devfd, buf, sizeof(buf), 0) == -1) {
262 				error = errno;
263 				goto out;
264 			}
265 
266 			if (ukfs__disklabel_scan(&dl, buf, sizeof(buf)) != 0) {
267 				error = ENOENT;
268 				goto out;
269 			}
270 
271 			if (dl.d_npartitions < partition) {
272 				error = ENOENT;
273 				goto out;
274 			}
275 
276 			pp = &dl.d_partitions[partition];
277 			part->part_type = UKFS_PART_DISKLABEL;
278 			part->part_labelchar = labelchar;
279 			part->part_devoff = pp->p_offset << DEV_BSHIFT;
280 			part->part_devsize = pp->p_size << DEV_BSHIFT;
281 		} else {
282 			error = EINVAL;
283 		}
284 #define MAGICADJ_OFFSET(p, n) (p+sizeof(UKFS_OFFSET_SCANMAGIC)-1+n)
285 	} else if (((p = strstr(devpath, UKFS_OFFSET_SCANMAGIC)) != NULL)
286 	    && (strlen(p) >= UKFS_OFFSET_MINLEN)) {
287 		char *comma, *pers, *ep, *nptr;
288 		u_quad_t val;
289 
290 		comma = strchr(p, ',');
291 		if (comma == NULL) {
292 			error = EINVAL;
293 			goto out;
294 		}
295 		pers = strchr(comma, '%');
296 		if (pers == NULL) {
297 			error = EINVAL;
298 			goto out;
299 		}
300 		*comma = '\0';
301 		*pers = '\0';
302 		*p = '\0';
303 
304 		nptr = MAGICADJ_OFFSET(p,0);
305 		/* check if string is negative */
306 		if (*nptr == '-') {
307 			error = ERANGE;
308 			goto out;
309 		}
310 		val = strtouq(nptr, &ep, 10);
311 		if (val == UQUAD_MAX) {
312 			error = ERANGE;
313 			goto out;
314 		}
315 		if (*ep != '\0') {
316 			error = EADDRNOTAVAIL; /* creative ;) */
317 			goto out;
318 		}
319 		part->part_devoff = val;
320 
321 		/* omstart */
322 
323 		nptr = comma+1;
324 		/* check if string is negative */
325 		if (*nptr == '-') {
326 			error = ERANGE;
327 			goto out;
328 		}
329 		val = strtouq(nptr, &ep, 10);
330 		if (val == UQUAD_MAX) {
331 			error = ERANGE;
332 			goto out;
333 		}
334 		if (*ep != '\0') {
335 			error = EADDRNOTAVAIL; /* creative ;) */
336 			goto out;
337 		}
338 		part->part_devsize = val;
339 		part->part_type = UKFS_PART_OFFSET;
340 	} else {
341 		ukfs_part_release(part);
342 		part = ukfs_part_none;
343 	}
344 
345  out:
346 	if (devfd != -1)
347 		close(devfd);
348 	if (error) {
349 		free(part);
350 		errno = error;
351 	} else {
352 		*partp = part;
353 	}
354 
355 	return error ? -1 : 0;
356 }
357 
358 int
359 ukfs_part_tostring(struct ukfs_part *part, char *str, size_t strsize)
360 {
361 	int rv;
362 
363 	*str = '\0';
364 	/* "pseudo" values */
365 	if (part == ukfs_part_na) {
366 		errno = EINVAL;
367 		return -1;
368 	}
369 	if (part == ukfs_part_none)
370 		return 0;
371 
372 	rv = 0;
373 	switch (part->part_type) {
374 	case UKFS_PART_NONE:
375 		break;
376 
377 	case UKFS_PART_DISKLABEL:
378 		snprintf(str, strsize, "%%DISKLABEL:%c%%",part->part_labelchar);
379 		rv = 1;
380 		break;
381 
382 	case UKFS_PART_OFFSET:
383 		snprintf(str, strsize, "[%llu,%llu]",
384 		    (unsigned long long)part->part_devoff,
385 		    (unsigned long long)(part->part_devoff+part->part_devsize));
386 		rv = 1;
387 		break;
388 	}
389 
390 	return rv;
391 }
392 
393 static void
394 unlockdev(int fd, struct ukfs_part *part)
395 {
396 	struct flock flarg;
397 
398 	if (part == ukfs_part_na)
399 		return;
400 
401 	memset(&flarg, 0, sizeof(flarg));
402 	flarg.l_type = F_UNLCK;
403 	flarg.l_whence = SEEK_SET;
404 	flarg.l_start = part->part_devoff;
405 	flarg.l_len = PART2LOCKSIZE(part->part_devsize);
406 	if (fcntl(fd, F_SETLK, &flarg) == -1)
407 		warn("ukfs: cannot unlock device file");
408 }
409 
410 /*
411  * Open the disk file and flock it.  Also, if we are operation on
412  * an embedded partition, find the partition offset and size from
413  * the disklabel.
414  *
415  * We hard-fail only in two cases:
416  *  1) we failed to get the partition info out (don't know what offset
417  *     to mount from)
418  *  2) we failed to flock the source device (i.e. fcntl() fails,
419  *     not e.g. open() before it)
420  *
421  * Otherwise we let the code proceed to mount and let the file system
422  * throw the proper error.  The only questionable bit is that if we
423  * soft-fail before flock and mount does succeed...
424  *
425  * Returns: -1 error (errno reports error code)
426  *           0 success
427  *
428  * dfdp: -1  device is not open
429  *        n  device is open
430  */
431 static int
432 process_diskdevice(const char *devpath, struct ukfs_part *part, int rdonly,
433 	int *dfdp)
434 {
435 	struct stat sb;
436 	int rv = 0, devfd;
437 
438 	/* defaults */
439 	*dfdp = -1;
440 
441 	devfd = open(devpath, rdonly ? O_RDONLY : O_RDWR);
442 	if (devfd == -1) {
443 		rv = errno;
444 		goto out;
445 	}
446 
447 	if (fstat(devfd, &sb) == -1) {
448 		rv = errno;
449 		goto out;
450 	}
451 
452 	/*
453 	 * We do this only for non-block device since the
454 	 * (NetBSD) kernel allows block device open only once.
455 	 * We also need to close the device for fairly obvious reasons.
456 	 */
457 	if (!S_ISBLK(sb.st_mode)) {
458 		struct flock flarg;
459 
460 		memset(&flarg, 0, sizeof(flarg));
461 		flarg.l_type = rdonly ? F_RDLCK : F_WRLCK;
462 		flarg.l_whence = SEEK_SET;
463 		flarg.l_start = part->part_devoff;
464 		flarg.l_len = PART2LOCKSIZE(part->part_devsize);
465 		if (fcntl(devfd, F_SETLK, &flarg) == -1) {
466 			pid_t holder;
467 			int sverrno;
468 
469 			sverrno = errno;
470 			if (fcntl(devfd, F_GETLK, &flarg) != 1)
471 				holder = flarg.l_pid;
472 			else
473 				holder = -1;
474 			warnx("ukfs_mount: cannot lock device.  held by pid %d",
475 			    holder);
476 			rv = sverrno;
477 			goto out;
478 		}
479 	} else {
480 		close(devfd);
481 		devfd = -1;
482 	}
483 	*dfdp = devfd;
484 
485  out:
486 	if (rv) {
487 		if (devfd != -1)
488 			close(devfd);
489 	}
490 
491 	return rv;
492 }
493 
494 struct mountinfo {
495 	const char *mi_vfsname;
496 	const char *mi_mountpath;
497 	int mi_mntflags;
498 	void *mi_arg;
499 	size_t mi_alen;
500 	int *mi_error;
501 };
502 static void *
503 mfs_mounter(void *arg)
504 {
505 	struct mountinfo *mi = arg;
506 	int rv;
507 
508 	rv = rump_sys_mount(mi->mi_vfsname, mi->mi_mountpath, mi->mi_mntflags,
509 	    mi->mi_arg, mi->mi_alen);
510 	if (rv) {
511 		warn("mfs mount failed.  fix me.");
512 		abort(); /* XXX */
513 	}
514 
515 	return NULL;
516 }
517 
518 static struct ukfs *
519 doukfsmount(const char *vfsname, const char *devpath, struct ukfs_part *part,
520 	const char *mountpath, int mntflags, void *arg, size_t alen)
521 {
522 	struct ukfs *fs = NULL;
523 	struct lwp *curlwp;
524 	int rv = 0, devfd = -1;
525 	int mounted = 0;
526 	int regged = 0;
527 
528 	pthread_spin_lock(&part->part_lck);
529 	part->part_refcount++;
530 	pthread_spin_unlock(&part->part_lck);
531 	if (part != ukfs_part_na) {
532 		if ((rv = process_diskdevice(devpath, part,
533 		    mntflags & MNT_RDONLY, &devfd)) != 0)
534 			goto out;
535 	}
536 
537 	fs = malloc(sizeof(struct ukfs));
538 	if (fs == NULL) {
539 		rv = ENOMEM;
540 		goto out;
541 	}
542 	memset(fs, 0, sizeof(struct ukfs));
543 
544 	/* create our mountpoint.  this is never removed. */
545 	if (builddirs(mountpath, 0777, rumpmkdir, NULL) == -1) {
546 		if (errno != EEXIST) {
547 			rv = errno;
548 			goto out;
549 		}
550 	}
551 
552 	if (part != ukfs_part_na) {
553 		/* LINTED */
554 		rv = rump_pub_etfs_register_withsize(devpath, devpath,
555 		    RUMP_ETFS_BLK, part->part_devoff, part->part_devsize);
556 		if (rv) {
557 			goto out;
558 		}
559 		regged = 1;
560 	}
561 
562 	/*
563 	 * MFS is special since mount(2) doesn't return.  Hence, we
564 	 * create a thread here.  Could fix mfs to return, but there's
565 	 * too much history for me to bother.
566 	 */
567 	if (strcmp(vfsname, MOUNT_MFS) == 0) {
568 		pthread_t pt;
569 		struct mountinfo mi;
570 		int i;
571 
572 		mi.mi_vfsname = vfsname;
573 		mi.mi_mountpath = mountpath;
574 		mi.mi_mntflags = mntflags;
575 		mi.mi_arg = arg;
576 		mi.mi_alen = alen;
577 
578 		if (pthread_create(&pt, NULL, mfs_mounter, &mi) == -1) {
579 			rv = errno;
580 			goto out;
581 		}
582 
583 		for (i = 0;i < 100000; i++) {
584 			struct statvfs svfsb;
585 
586 			rv = rump_sys_statvfs1(mountpath, &svfsb, ST_WAIT);
587 			if (rv == -1) {
588 				rv = errno;
589 				goto out;
590 			}
591 
592 			if (strcmp(svfsb.f_mntonname, mountpath) == 0 &&
593 			    strcmp(svfsb.f_fstypename, MOUNT_MFS) == 0) {
594 				break;
595 			}
596 			usleep(1);
597 		}
598 	} else {
599 		rv = rump_sys_mount(vfsname, mountpath, mntflags, arg, alen);
600 		if (rv) {
601 			rv = errno;
602 			goto out;
603 		}
604 	}
605 
606 	mounted = 1;
607 	rv = rump_pub_vfs_getmp(mountpath, &fs->ukfs_mp);
608 	if (rv) {
609 		goto out;
610 	}
611 
612 	if (regged) {
613 		fs->ukfs_devpath = strdup(devpath);
614 	}
615 	fs->ukfs_mountpath = strdup(mountpath);
616 	pthread_spin_init(&fs->ukfs_spin, PTHREAD_PROCESS_SHARED);
617 	fs->ukfs_devfd = devfd;
618 	fs->ukfs_part = part;
619 	assert(rv == 0);
620 
621 	curlwp = rump_pub_lwproc_curlwp();
622 	rump_pub_lwproc_newlwp(0);
623 	fs->ukfs_lwp = rump_pub_lwproc_curlwp();
624 	fs->ukfs_cwd = strdup("/");
625 	rump_pub_lwproc_switch(curlwp);
626 
627  out:
628 	if (rv) {
629 		if (fs) {
630 			free(fs);
631 			fs = NULL;
632 		}
633 		if (mounted)
634 			rump_sys_unmount(mountpath, MNT_FORCE);
635 		if (regged)
636 			rump_pub_etfs_remove(devpath);
637 		if (devfd != -1) {
638 			unlockdev(devfd, part);
639 			close(devfd);
640 		}
641 		ukfs_part_release(part);
642 		errno = rv;
643 	}
644 
645 	return fs;
646 }
647 
648 struct ukfs *
649 ukfs_mount(const char *vfsname, const char *devpath,
650 	const char *mountpath, int mntflags, void *arg, size_t alen)
651 {
652 
653 	return doukfsmount(vfsname, devpath, ukfs_part_na,
654 	    mountpath, mntflags, arg, alen);
655 }
656 
657 struct ukfs *
658 ukfs_mount_disk(const char *vfsname, const char *devpath,
659 	struct ukfs_part *part, const char *mountpath, int mntflags,
660 	void *arg, size_t alen)
661 {
662 
663 	return doukfsmount(vfsname, devpath, part,
664 	    mountpath, mntflags, arg, alen);
665 }
666 
667 int
668 ukfs_release(struct ukfs *fs, int flags)
669 {
670 	struct lwp *curlwp = rump_pub_lwproc_curlwp();
671 
672 	/* get root lwp */
673 	rump_pub_lwproc_switch(fs->ukfs_lwp);
674 	rump_pub_lwproc_rfork(RUMP_RFCFDG);
675 
676 	if ((flags & UKFS_RELFLAG_NOUNMOUNT) == 0) {
677 		int rv, mntflag, error;
678 
679 		mntflag = 0;
680 		if (flags & UKFS_RELFLAG_FORCE)
681 			mntflag = MNT_FORCE;
682 
683 		rv = rump_sys_unmount(fs->ukfs_mountpath, mntflag);
684 		if (rv == -1) {
685 			error = errno;
686 			rump_pub_lwproc_releaselwp();
687 			if (curlwp)
688 				rump_pub_lwproc_switch(curlwp);
689 			errno = error;
690 			return -1;
691 		}
692 	}
693 
694 	if (fs->ukfs_devpath) {
695 		rump_pub_etfs_remove(fs->ukfs_devpath);
696 		free(fs->ukfs_devpath);
697 	}
698 	free(fs->ukfs_mountpath);
699 	free(fs->ukfs_cwd);
700 
701 	/* release this routine's lwp and ukfs base lwp */
702 	rump_pub_lwproc_releaselwp();
703 	rump_pub_lwproc_switch(fs->ukfs_lwp);
704 	rump_pub_lwproc_releaselwp();
705 
706 	pthread_spin_destroy(&fs->ukfs_spin);
707 	if (fs->ukfs_devfd != -1) {
708 		unlockdev(fs->ukfs_devfd, fs->ukfs_part);
709 		close(fs->ukfs_devfd);
710 	}
711 	ukfs_part_release(fs->ukfs_part);
712 	free(fs);
713 
714 	if (curlwp)
715 		rump_pub_lwproc_switch(curlwp);
716 
717 	return 0;
718 }
719 
720 void
721 ukfs_part_release(struct ukfs_part *part)
722 {
723 	int release;
724 
725 	if (part != ukfs_part_none && part != ukfs_part_na) {
726 		pthread_spin_lock(&part->part_lck);
727 		release = --part->part_refcount == 0;
728 		pthread_spin_unlock(&part->part_lck);
729 		if (release) {
730 			pthread_spin_destroy(&part->part_lck);
731 			free(part);
732 		}
733 	}
734 }
735 
736 #define STDCALL(ukfs, thecall)						\
737 	int rv = 0;							\
738 									\
739 	PRECALL();							\
740 	rv = thecall;							\
741 	POSTCALL();							\
742 	return rv;
743 
744 int
745 ukfs_opendir(struct ukfs *ukfs, const char *dirname, struct ukfs_dircookie **c)
746 {
747 	struct vnode *vp;
748 	int rv;
749 
750 	PRECALL();
751 	rv = rump_pub_namei(RUMP_NAMEI_LOOKUP, RUMP_NAMEI_LOCKLEAF, dirname,
752 	    NULL, &vp, NULL);
753 	POSTCALL();
754 
755 	if (rv == 0) {
756 		RUMP_VOP_UNLOCK(vp);
757 	} else {
758 		errno = rv;
759 		rv = -1;
760 	}
761 
762 	/*LINTED*/
763 	*c = (struct ukfs_dircookie *)vp;
764 	return rv;
765 }
766 
767 static int
768 getmydents(struct vnode *vp, off_t *off, uint8_t *buf, size_t bufsize)
769 {
770 	struct uio *uio;
771 	size_t resid;
772 	int rv, eofflag;
773 	struct kauth_cred *cred;
774 
775 	uio = rump_pub_uio_setup(buf, bufsize, *off, RUMPUIO_READ);
776 	cred = rump_pub_cred_create(0, 0, 0, NULL);
777 	rv = RUMP_VOP_READDIR(vp, uio, cred, &eofflag, NULL, NULL);
778 	rump_pub_cred_put(cred);
779 	RUMP_VOP_UNLOCK(vp);
780 	*off = rump_pub_uio_getoff(uio);
781 	resid = rump_pub_uio_free(uio);
782 
783 	if (rv) {
784 		errno = rv;
785 		return -1;
786 	}
787 
788 	/* LINTED: not totally correct return type, but follows syscall */
789 	return bufsize - resid;
790 }
791 
792 /*ARGSUSED*/
793 int
794 ukfs_getdents_cookie(struct ukfs *ukfs, struct ukfs_dircookie *c, off_t *off,
795 	uint8_t *buf, size_t bufsize)
796 {
797 	/*LINTED*/
798 	struct vnode *vp = (struct vnode *)c;
799 
800 	RUMP_VOP_LOCK(vp, RUMP_LK_SHARED);
801 	return getmydents(vp, off, buf, bufsize);
802 }
803 
804 int
805 ukfs_getdents(struct ukfs *ukfs, const char *dirname, off_t *off,
806 	uint8_t *buf, size_t bufsize)
807 {
808 	struct vnode *vp;
809 	int rv;
810 
811 	PRECALL();
812 	rv = rump_pub_namei(RUMP_NAMEI_LOOKUP, RUMP_NAMEI_LOCKLEAF, dirname,
813 	    NULL, &vp, NULL);
814 	if (rv) {
815 		POSTCALL();
816 		errno = rv;
817 		return -1;
818 	}
819 
820 	rv = getmydents(vp, off, buf, bufsize);
821 	rump_pub_vp_rele(vp);
822 	POSTCALL();
823 	return rv;
824 }
825 
826 /*ARGSUSED*/
827 int
828 ukfs_closedir(struct ukfs *ukfs, struct ukfs_dircookie *c)
829 {
830 
831 	/*LINTED*/
832 	rump_pub_vp_rele((struct vnode *)c);
833 	return 0;
834 }
835 
836 int
837 ukfs_open(struct ukfs *ukfs, const char *filename, int flags)
838 {
839 	int fd;
840 
841 	PRECALL();
842 	fd = rump_sys_open(filename, flags, 0);
843 	POSTCALL();
844 	if (fd == -1)
845 		return -1;
846 
847 	return fd;
848 }
849 
850 ssize_t
851 ukfs_read(struct ukfs *ukfs, const char *filename, off_t off,
852 	uint8_t *buf, size_t bufsize)
853 {
854 	int fd;
855 	ssize_t xfer = -1; /* XXXgcc */
856 
857 	PRECALL();
858 	fd = rump_sys_open(filename, RUMP_O_RDONLY, 0);
859 	if (fd == -1)
860 		goto out;
861 
862 	xfer = rump_sys_pread(fd, buf, bufsize, off);
863 	rump_sys_close(fd);
864 
865  out:
866 	POSTCALL();
867 	if (fd == -1) {
868 		return -1;
869 	}
870 	return xfer;
871 }
872 
873 /*ARGSUSED*/
874 ssize_t
875 ukfs_read_fd(struct ukfs *ukfs, int fd, off_t off, uint8_t *buf, size_t buflen)
876 {
877 
878 	return rump_sys_pread(fd, buf, buflen, off);
879 }
880 
881 ssize_t
882 ukfs_write(struct ukfs *ukfs, const char *filename, off_t off,
883 	uint8_t *buf, size_t bufsize)
884 {
885 	int fd;
886 	ssize_t xfer = -1; /* XXXgcc */
887 
888 	PRECALL();
889 	fd = rump_sys_open(filename, RUMP_O_WRONLY, 0);
890 	if (fd == -1)
891 		goto out;
892 
893 	/* write and commit */
894 	xfer = rump_sys_pwrite(fd, buf, bufsize, off);
895 	if (xfer > 0)
896 		rump_sys_fsync(fd);
897 
898 	rump_sys_close(fd);
899 
900  out:
901 	POSTCALL();
902 	if (fd == -1) {
903 		return -1;
904 	}
905 	return xfer;
906 }
907 
908 /*ARGSUSED*/
909 ssize_t
910 ukfs_write_fd(struct ukfs *ukfs, int fd, off_t off, uint8_t *buf, size_t buflen,
911 	int dosync)
912 {
913 	ssize_t xfer;
914 
915 	xfer = rump_sys_pwrite(fd, buf, buflen, off);
916 	if (xfer > 0 && dosync)
917 		rump_sys_fsync(fd);
918 
919 	return xfer;
920 }
921 
922 /*ARGSUSED*/
923 int
924 ukfs_close(struct ukfs *ukfs, int fd)
925 {
926 
927 	rump_sys_close(fd);
928 	return 0;
929 }
930 
931 int
932 ukfs_create(struct ukfs *ukfs, const char *filename, mode_t mode)
933 {
934 	int fd;
935 
936 	PRECALL();
937 	fd = rump_sys_open(filename, RUMP_O_WRONLY | RUMP_O_CREAT, mode);
938 	if (fd == -1)
939 		return -1;
940 	rump_sys_close(fd);
941 
942 	POSTCALL();
943 	return 0;
944 }
945 
946 int
947 ukfs_mknod(struct ukfs *ukfs, const char *path, mode_t mode, dev_t dev)
948 {
949 
950 	STDCALL(ukfs, rump_sys_mknod(path, mode, dev));
951 }
952 
953 int
954 ukfs_mkfifo(struct ukfs *ukfs, const char *path, mode_t mode)
955 {
956 
957 	STDCALL(ukfs, rump_sys_mkfifo(path, mode));
958 }
959 
960 int
961 ukfs_mkdir(struct ukfs *ukfs, const char *filename, mode_t mode)
962 {
963 
964 	STDCALL(ukfs, rump_sys_mkdir(filename, mode));
965 }
966 
967 int
968 ukfs_remove(struct ukfs *ukfs, const char *filename)
969 {
970 
971 	STDCALL(ukfs, rump_sys_unlink(filename));
972 }
973 
974 int
975 ukfs_rmdir(struct ukfs *ukfs, const char *filename)
976 {
977 
978 	STDCALL(ukfs, rump_sys_rmdir(filename));
979 }
980 
981 int
982 ukfs_link(struct ukfs *ukfs, const char *filename, const char *f_create)
983 {
984 
985 	STDCALL(ukfs, rump_sys_link(filename, f_create));
986 }
987 
988 int
989 ukfs_symlink(struct ukfs *ukfs, const char *filename, const char *linkname)
990 {
991 
992 	STDCALL(ukfs, rump_sys_symlink(filename, linkname));
993 }
994 
995 ssize_t
996 ukfs_readlink(struct ukfs *ukfs, const char *filename,
997 	char *linkbuf, size_t buflen)
998 {
999 	ssize_t rv;
1000 
1001 	PRECALL();
1002 	rv = rump_sys_readlink(filename, linkbuf, buflen);
1003 	POSTCALL();
1004 	return rv;
1005 }
1006 
1007 int
1008 ukfs_rename(struct ukfs *ukfs, const char *from, const char *to)
1009 {
1010 
1011 	STDCALL(ukfs, rump_sys_rename(from, to));
1012 }
1013 
1014 int
1015 ukfs_chdir(struct ukfs *ukfs, const char *path)
1016 {
1017 	char *newpath, *oldpath;
1018 	int rv;
1019 
1020 	PRECALL();
1021 	rv = rump_sys_chdir(path);
1022 	if (rv == -1)
1023 		goto out;
1024 
1025 	newpath = malloc(MAXPATHLEN);
1026 	if (rump_sys___getcwd(newpath, MAXPATHLEN) == -1) {
1027 		goto out;
1028 	}
1029 
1030 	pthread_spin_lock(&ukfs->ukfs_spin);
1031 	oldpath = ukfs->ukfs_cwd;
1032 	ukfs->ukfs_cwd = newpath;
1033 	pthread_spin_unlock(&ukfs->ukfs_spin);
1034 	free(oldpath);
1035 
1036  out:
1037 	POSTCALL();
1038 	return rv;
1039 }
1040 
1041 int
1042 ukfs_stat(struct ukfs *ukfs, const char *filename, struct stat *file_stat)
1043 {
1044 	int rv;
1045 
1046 	PRECALL();
1047 	rv = rump_sys_stat(filename, file_stat);
1048 	POSTCALL();
1049 
1050 	return rv;
1051 }
1052 
1053 int
1054 ukfs_lstat(struct ukfs *ukfs, const char *filename, struct stat *file_stat)
1055 {
1056 	int rv;
1057 
1058 	PRECALL();
1059 	rv = rump_sys_lstat(filename, file_stat);
1060 	POSTCALL();
1061 
1062 	return rv;
1063 }
1064 
1065 int
1066 ukfs_chmod(struct ukfs *ukfs, const char *filename, mode_t mode)
1067 {
1068 
1069 	STDCALL(ukfs, rump_sys_chmod(filename, mode));
1070 }
1071 
1072 int
1073 ukfs_lchmod(struct ukfs *ukfs, const char *filename, mode_t mode)
1074 {
1075 
1076 	STDCALL(ukfs, rump_sys_lchmod(filename, mode));
1077 }
1078 
1079 int
1080 ukfs_chown(struct ukfs *ukfs, const char *filename, uid_t uid, gid_t gid)
1081 {
1082 
1083 	STDCALL(ukfs, rump_sys_chown(filename, uid, gid));
1084 }
1085 
1086 int
1087 ukfs_lchown(struct ukfs *ukfs, const char *filename, uid_t uid, gid_t gid)
1088 {
1089 
1090 	STDCALL(ukfs, rump_sys_lchown(filename, uid, gid));
1091 }
1092 
1093 int
1094 ukfs_chflags(struct ukfs *ukfs, const char *filename, u_long flags)
1095 {
1096 
1097 	STDCALL(ukfs, rump_sys_chflags(filename, flags));
1098 }
1099 
1100 int
1101 ukfs_lchflags(struct ukfs *ukfs, const char *filename, u_long flags)
1102 {
1103 
1104 	STDCALL(ukfs, rump_sys_lchflags(filename, flags));
1105 }
1106 
1107 int
1108 ukfs_utimes(struct ukfs *ukfs, const char *filename, const struct timeval *tptr)
1109 {
1110 
1111 	STDCALL(ukfs, rump_sys_utimes(filename, tptr));
1112 }
1113 
1114 int
1115 ukfs_lutimes(struct ukfs *ukfs, const char *filename,
1116 	      const struct timeval *tptr)
1117 {
1118 
1119 	STDCALL(ukfs, rump_sys_lutimes(filename, tptr));
1120 }
1121 
1122 /*
1123  * Dynamic module support
1124  */
1125 
1126 /* load one library */
1127 
1128 /*
1129  * XXX: the dlerror stuff isn't really threadsafe, but then again I
1130  * can't protect against other threads calling dl*() outside of ukfs,
1131  * so just live with it being flimsy
1132  */
1133 int
1134 ukfs_modload(const char *fname)
1135 {
1136 	void *handle;
1137 	const struct modinfo *const *mi_start, *const *mi_end;
1138 	int error;
1139 
1140 	handle = dlopen(fname, RTLD_LAZY|RTLD_GLOBAL);
1141 	if (handle == NULL) {
1142 		const char *dlmsg = dlerror();
1143 		if (strstr(dlmsg, "Undefined symbol"))
1144 			return 0;
1145 		warnx("dlopen %s failed: %s\n", fname, dlmsg);
1146 		/* XXXerrno */
1147 		return -1;
1148 	}
1149 
1150 	mi_start = dlsym(handle, "__start_link_set_modules");
1151 	mi_end = dlsym(handle, "__stop_link_set_modules");
1152 	if (mi_start && mi_end) {
1153 		error = rump_pub_module_init(mi_start,
1154 		    (size_t)(mi_end-mi_start));
1155 		if (error)
1156 			goto errclose;
1157 		return 1;
1158 	}
1159 	error = EINVAL;
1160 
1161  errclose:
1162 	dlclose(handle);
1163 	errno = error;
1164 	return -1;
1165 }
1166 
1167 struct loadfail {
1168 	char *pname;
1169 
1170 	LIST_ENTRY(loadfail) entries;
1171 };
1172 
1173 #define RUMPFSMOD_PREFIX "librumpfs_"
1174 #define RUMPFSMOD_SUFFIX ".so"
1175 
1176 int
1177 ukfs_modload_dir(const char *dir)
1178 {
1179 	char nbuf[MAXPATHLEN+1], *p;
1180 	struct dirent entry, *result;
1181 	DIR *libdir;
1182 	struct loadfail *lf, *nlf;
1183 	int error, nloaded = 0, redo;
1184 	LIST_HEAD(, loadfail) lfs;
1185 
1186 	libdir = opendir(dir);
1187 	if (libdir == NULL)
1188 		return -1;
1189 
1190 	LIST_INIT(&lfs);
1191 	for (;;) {
1192 		if ((error = readdir_r(libdir, &entry, &result)) != 0)
1193 			break;
1194 		if (!result)
1195 			break;
1196 		if (strncmp(result->d_name, RUMPFSMOD_PREFIX,
1197 		    strlen(RUMPFSMOD_PREFIX)) != 0)
1198 			continue;
1199 		if (((p = strstr(result->d_name, RUMPFSMOD_SUFFIX)) == NULL)
1200 		    || strlen(p) != strlen(RUMPFSMOD_SUFFIX))
1201 			continue;
1202 		strlcpy(nbuf, dir, sizeof(nbuf));
1203 		strlcat(nbuf, "/", sizeof(nbuf));
1204 		strlcat(nbuf, result->d_name, sizeof(nbuf));
1205 		switch (ukfs_modload(nbuf)) {
1206 		case 0:
1207 			lf = malloc(sizeof(*lf));
1208 			if (lf == NULL) {
1209 				error = ENOMEM;
1210 				break;
1211 			}
1212 			lf->pname = strdup(nbuf);
1213 			if (lf->pname == NULL) {
1214 				free(lf);
1215 				error = ENOMEM;
1216 				break;
1217 			}
1218 			LIST_INSERT_HEAD(&lfs, lf, entries);
1219 			break;
1220 		case 1:
1221 			nloaded++;
1222 			break;
1223 		default:
1224 			/* ignore errors */
1225 			break;
1226 		}
1227 	}
1228 	closedir(libdir);
1229 	if (error && nloaded != 0)
1230 		error = 0;
1231 
1232 	/*
1233 	 * El-cheapo dependency calculator.  Just try to load the
1234 	 * modules n times in a loop
1235 	 */
1236 	for (redo = 1; redo;) {
1237 		redo = 0;
1238 		nlf = LIST_FIRST(&lfs);
1239 		while ((lf = nlf) != NULL) {
1240 			nlf = LIST_NEXT(lf, entries);
1241 			if (ukfs_modload(lf->pname) == 1) {
1242 				nloaded++;
1243 				redo = 1;
1244 				LIST_REMOVE(lf, entries);
1245 				free(lf->pname);
1246 				free(lf);
1247 			}
1248 		}
1249 	}
1250 
1251 	while ((lf = LIST_FIRST(&lfs)) != NULL) {
1252 		LIST_REMOVE(lf, entries);
1253 		free(lf->pname);
1254 		free(lf);
1255 	}
1256 
1257 	if (error && nloaded == 0) {
1258 		errno = error;
1259 		return -1;
1260 	}
1261 
1262 	return nloaded;
1263 }
1264 
1265 /* XXX: this code uses definitions from NetBSD, needs rumpdefs */
1266 ssize_t
1267 ukfs_vfstypes(char *buf, size_t buflen)
1268 {
1269 	int mib[3];
1270 	struct sysctlnode q, ans[128];
1271 	size_t alen;
1272 	int i;
1273 
1274 	mib[0] = CTL_VFS;
1275 	mib[1] = VFS_GENERIC;
1276 	mib[2] = CTL_QUERY;
1277 	alen = sizeof(ans);
1278 
1279 	memset(&q, 0, sizeof(q));
1280 	q.sysctl_flags = SYSCTL_VERSION;
1281 
1282 	if (rump_sys___sysctl(mib, 3, ans, &alen, &q, sizeof(q)) == -1) {
1283 		return -1;
1284 	}
1285 
1286 	for (i = 0; i < alen/sizeof(ans[0]); i++)
1287 		if (strcmp("fstypes", ans[i].sysctl_name) == 0)
1288 			break;
1289 	if (i == alen/sizeof(ans[0])) {
1290 		errno = ENXIO;
1291 		return -1;
1292 	}
1293 
1294 	mib[0] = CTL_VFS;
1295 	mib[1] = VFS_GENERIC;
1296 	mib[2] = ans[i].sysctl_num;
1297 
1298 	if (rump_sys___sysctl(mib, 3, buf, &buflen, NULL, 0) == -1) {
1299 		return -1;
1300 	}
1301 
1302 	return buflen;
1303 }
1304 
1305 /*
1306  * Utilities
1307  */
1308 static int
1309 builddirs(const char *pathname, mode_t mode,
1310 	int (*mkdirfn)(struct ukfs *, const char *, mode_t), struct ukfs *fs)
1311 {
1312 	char *f1, *f2;
1313 	int rv;
1314 	mode_t mask;
1315 	bool end;
1316 
1317 	/*ukfs_umask((mask = ukfs_umask(0)));*/
1318 	umask((mask = umask(0)));
1319 
1320 	f1 = f2 = strdup(pathname);
1321 	if (f1 == NULL) {
1322 		errno = ENOMEM;
1323 		return -1;
1324 	}
1325 
1326 	end = false;
1327 	for (;;) {
1328 		/* find next component */
1329 		f2 += strspn(f2, "/");
1330 		f2 += strcspn(f2, "/");
1331 		if (*f2 == '\0')
1332 			end = true;
1333 		else
1334 			*f2 = '\0';
1335 
1336 		rv = mkdirfn(fs, f1, mode & ~mask);
1337 		if (errno == EEXIST)
1338 			rv = 0;
1339 
1340 		if (rv == -1 || *f2 != '\0' || end)
1341 			break;
1342 
1343 		*f2 = '/';
1344 	}
1345 
1346 	free(f1);
1347 
1348 	return rv;
1349 }
1350 
1351 int
1352 ukfs_util_builddirs(struct ukfs *ukfs, const char *pathname, mode_t mode)
1353 {
1354 
1355 	return builddirs(pathname, mode, ukfs_mkdir, ukfs);
1356 }
1357