xref: /netbsd-src/lib/libukfs/ukfs.c (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1 /*	$NetBSD: ukfs.c,v 1.54 2010/09/07 17:16:18 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2007, 2008, 2009  Antti Kantee.  All Rights Reserved.
5  *
6  * Development of this software was supported by the
7  * Finnish Cultural Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
19  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 /*
32  * This library enables access to files systems directly without
33  * involving system calls.
34  */
35 
36 #ifdef __linux__
37 #define _XOPEN_SOURCE 500
38 #define _BSD_SOURCE
39 #define _FILE_OFFSET_BITS 64
40 #endif
41 
42 #include <sys/param.h>
43 #include <sys/queue.h>
44 #include <sys/stat.h>
45 #include <sys/sysctl.h>
46 #include <sys/mount.h>
47 
48 #include <assert.h>
49 #include <dirent.h>
50 #include <dlfcn.h>
51 #include <err.h>
52 #include <errno.h>
53 #include <fcntl.h>
54 #include <pthread.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59 #include <stdint.h>
60 
61 #include <rump/ukfs.h>
62 
63 #include <rump/rump.h>
64 #include <rump/rump_syscalls.h>
65 #include <rump/rumpuser.h>
66 
67 #include "ukfs_int_disklabel.h"
68 
69 #define UKFS_MODE_DEFAULT 0555
70 
71 struct ukfs {
72 	pthread_spinlock_t ukfs_spin;
73 
74 	struct mount *ukfs_mp;
75 	struct lwp *ukfs_lwp;
76 	void *ukfs_specific;
77 
78 	int ukfs_devfd;
79 
80 	char *ukfs_devpath;
81 	char *ukfs_mountpath;
82 	char *ukfs_cwd;
83 
84 	struct ukfs_part *ukfs_part;
85 };
86 
87 static int builddirs(const char *, mode_t,
88     int (*mkdirfn)(struct ukfs *, const char *, mode_t), struct ukfs *);
89 
90 struct mount *
91 ukfs_getmp(struct ukfs *ukfs)
92 {
93 
94 	return ukfs->ukfs_mp;
95 }
96 
97 void
98 ukfs_setspecific(struct ukfs *ukfs, void *priv)
99 {
100 
101 	ukfs->ukfs_specific = priv;
102 }
103 
104 void *
105 ukfs_getspecific(struct ukfs *ukfs)
106 {
107 
108 	return ukfs->ukfs_specific;
109 }
110 
111 #ifdef DONT_WANT_PTHREAD_LINKAGE
112 #define pthread_spin_lock(a)
113 #define pthread_spin_unlock(a)
114 #define pthread_spin_init(a,b)
115 #define pthread_spin_destroy(a)
116 #endif
117 
118 static int
119 precall(struct ukfs *ukfs, struct lwp **curlwp)
120 {
121 
122 	/* save previous.  ensure start from pristine context */
123 	*curlwp = rump_pub_lwproc_curlwp();
124 	if (*curlwp)
125 		rump_pub_lwproc_switch(ukfs->ukfs_lwp);
126 	rump_pub_lwproc_newproc();
127 
128 	if (rump_sys_chroot(ukfs->ukfs_mountpath) == -1)
129 		return errno;
130 	if (rump_sys_chdir(ukfs->ukfs_cwd) == -1)
131 		return errno;
132 
133 	return 0;
134 }
135 
136 static void
137 postcall(struct lwp *curlwp)
138 {
139 
140 	rump_pub_lwproc_releaselwp();
141 	if (curlwp)
142 		rump_pub_lwproc_switch(curlwp);
143 }
144 
145 #define PRECALL()							\
146 struct lwp *ukfs_curlwp;						\
147 do {									\
148 	int ukfs_rv;							\
149 	if ((ukfs_rv = precall(ukfs, &ukfs_curlwp)) != 0) {		\
150 		errno = ukfs_rv;					\
151 		return -1;						\
152 	}								\
153 } while (/*CONSTCOND*/0)
154 
155 #define POSTCALL() postcall(ukfs_curlwp);
156 
157 struct ukfs_part {
158 	pthread_spinlock_t part_lck;
159 	int part_refcount;
160 
161 	int part_type;
162 	char part_labelchar;
163 	off_t part_devoff;
164 	off_t part_devsize;
165 };
166 
167 enum ukfs_parttype { UKFS_PART_NONE, UKFS_PART_DISKLABEL, UKFS_PART_OFFSET };
168 
169 static struct ukfs_part ukfs__part_none = {
170 	.part_type = UKFS_PART_NONE,
171 	.part_devoff = 0,
172 	.part_devsize = RUMP_ETFS_SIZE_ENDOFF,
173 };
174 static struct ukfs_part ukfs__part_na;
175 struct ukfs_part *ukfs_part_none = &ukfs__part_none;
176 struct ukfs_part *ukfs_part_na = &ukfs__part_na;
177 
178 #define PART2LOCKSIZE(len) ((len) == RUMP_ETFS_SIZE_ENDOFF ? 0 : (len))
179 
180 int
181 _ukfs_init(int version)
182 {
183 	int rv;
184 
185 	if (version != UKFS_VERSION) {
186 		printf("incompatible ukfs version, %d vs. %d\n",
187 		    version, UKFS_VERSION);
188 		errno = EPROGMISMATCH;
189 		return -1;
190 	}
191 
192 	if ((rv = rump_init()) != 0) {
193 		errno = rv;
194 		return -1;
195 	}
196 
197 	return 0;
198 }
199 
200 /*ARGSUSED*/
201 static int
202 rumpmkdir(struct ukfs *dummy, const char *path, mode_t mode)
203 {
204 
205 	return rump_sys_mkdir(path, mode);
206 }
207 
208 int
209 ukfs_part_probe(char *devpath, struct ukfs_part **partp)
210 {
211 	struct ukfs_part *part;
212 	char *p;
213 	int error = 0;
214 	int devfd = -1;
215 
216 	if ((p = strstr(devpath, UKFS_PARTITION_SCANMAGIC)) != NULL) {
217 		fprintf(stderr, "ukfs: %%PART is deprecated.  use "
218 		    "%%DISKLABEL instead\n");
219 		errno = ENODEV;
220 		return -1;
221 	}
222 
223 	part = malloc(sizeof(*part));
224 	if (part == NULL) {
225 		errno = ENOMEM;
226 		return -1;
227 	}
228 	if (pthread_spin_init(&part->part_lck, PTHREAD_PROCESS_PRIVATE) == -1) {
229 		error = errno;
230 		free(part);
231 		errno = error;
232 		return -1;
233 	}
234 	part->part_type = UKFS_PART_NONE;
235 	part->part_refcount = 1;
236 
237 	/*
238 	 * Check for magic in pathname:
239 	 *   disklabel: /regularpath%DISKLABEL:labelchar%\0
240 	 *     offsets: /regularpath%OFFSET:start,end%\0
241 	 */
242 #define MAGICADJ_DISKLABEL(p, n) (p+sizeof(UKFS_DISKLABEL_SCANMAGIC)-1+n)
243 	if ((p = strstr(devpath, UKFS_DISKLABEL_SCANMAGIC)) != NULL
244 	    && strlen(p) == UKFS_DISKLABEL_MAGICLEN
245 	    && *(MAGICADJ_DISKLABEL(p,1)) == '%') {
246 		if (*(MAGICADJ_DISKLABEL(p,0)) >= 'a' &&
247 		    *(MAGICADJ_DISKLABEL(p,0)) < 'a' + UKFS_MAXPARTITIONS) {
248 			struct ukfs__disklabel dl;
249 			struct ukfs__partition *pp;
250 			char buf[65536];
251 			char labelchar = *(MAGICADJ_DISKLABEL(p,0));
252 			int partition = labelchar - 'a';
253 
254 			*p = '\0';
255 			devfd = open(devpath, O_RDONLY);
256 			if (devfd == -1) {
257 				error = errno;
258 				goto out;
259 			}
260 
261 			/* Locate the disklabel and find the partition. */
262 			if (pread(devfd, buf, sizeof(buf), 0) == -1) {
263 				error = errno;
264 				goto out;
265 			}
266 
267 			if (ukfs__disklabel_scan(&dl, buf, sizeof(buf)) != 0) {
268 				error = ENOENT;
269 				goto out;
270 			}
271 
272 			if (dl.d_npartitions < partition) {
273 				error = ENOENT;
274 				goto out;
275 			}
276 
277 			pp = &dl.d_partitions[partition];
278 			part->part_type = UKFS_PART_DISKLABEL;
279 			part->part_labelchar = labelchar;
280 			part->part_devoff = pp->p_offset << DEV_BSHIFT;
281 			part->part_devsize = pp->p_size << DEV_BSHIFT;
282 		} else {
283 			error = EINVAL;
284 		}
285 #define MAGICADJ_OFFSET(p, n) (p+sizeof(UKFS_OFFSET_SCANMAGIC)-1+n)
286 	} else if (((p = strstr(devpath, UKFS_OFFSET_SCANMAGIC)) != NULL)
287 	    && (strlen(p) >= UKFS_OFFSET_MINLEN)) {
288 		char *comma, *pers, *ep, *nptr;
289 		u_quad_t val;
290 
291 		comma = strchr(p, ',');
292 		if (comma == NULL) {
293 			error = EINVAL;
294 			goto out;
295 		}
296 		pers = strchr(comma, '%');
297 		if (pers == NULL) {
298 			error = EINVAL;
299 			goto out;
300 		}
301 		*comma = '\0';
302 		*pers = '\0';
303 		*p = '\0';
304 
305 		nptr = MAGICADJ_OFFSET(p,0);
306 		/* check if string is negative */
307 		if (*nptr == '-') {
308 			error = ERANGE;
309 			goto out;
310 		}
311 		val = strtouq(nptr, &ep, 10);
312 		if (val == UQUAD_MAX) {
313 			error = ERANGE;
314 			goto out;
315 		}
316 		if (*ep != '\0') {
317 			error = EADDRNOTAVAIL; /* creative ;) */
318 			goto out;
319 		}
320 		part->part_devoff = val;
321 
322 		/* omstart */
323 
324 		nptr = comma+1;
325 		/* check if string is negative */
326 		if (*nptr == '-') {
327 			error = ERANGE;
328 			goto out;
329 		}
330 		val = strtouq(nptr, &ep, 10);
331 		if (val == UQUAD_MAX) {
332 			error = ERANGE;
333 			goto out;
334 		}
335 		if (*ep != '\0') {
336 			error = EADDRNOTAVAIL; /* creative ;) */
337 			goto out;
338 		}
339 		part->part_devsize = val;
340 		part->part_type = UKFS_PART_OFFSET;
341 	} else {
342 		ukfs_part_release(part);
343 		part = ukfs_part_none;
344 	}
345 
346  out:
347 	if (devfd != -1)
348 		close(devfd);
349 	if (error) {
350 		free(part);
351 		errno = error;
352 	} else {
353 		*partp = part;
354 	}
355 
356 	return error ? -1 : 0;
357 }
358 
359 int
360 ukfs_part_tostring(struct ukfs_part *part, char *str, size_t strsize)
361 {
362 	int rv;
363 
364 	*str = '\0';
365 	/* "pseudo" values */
366 	if (part == ukfs_part_na) {
367 		errno = EINVAL;
368 		return -1;
369 	}
370 	if (part == ukfs_part_none)
371 		return 0;
372 
373 	rv = 0;
374 	switch (part->part_type) {
375 	case UKFS_PART_NONE:
376 		break;
377 
378 	case UKFS_PART_DISKLABEL:
379 		snprintf(str, strsize, "%%DISKLABEL:%c%%",part->part_labelchar);
380 		rv = 1;
381 		break;
382 
383 	case UKFS_PART_OFFSET:
384 		snprintf(str, strsize, "[%llu,%llu]",
385 		    (unsigned long long)part->part_devoff,
386 		    (unsigned long long)(part->part_devoff+part->part_devsize));
387 		rv = 1;
388 		break;
389 	}
390 
391 	return rv;
392 }
393 
394 static void
395 unlockdev(int fd, struct ukfs_part *part)
396 {
397 	struct flock flarg;
398 
399 	if (part == ukfs_part_na)
400 		return;
401 
402 	memset(&flarg, 0, sizeof(flarg));
403 	flarg.l_type = F_UNLCK;
404 	flarg.l_whence = SEEK_SET;
405 	flarg.l_start = part->part_devoff;
406 	flarg.l_len = PART2LOCKSIZE(part->part_devsize);
407 	if (fcntl(fd, F_SETLK, &flarg) == -1)
408 		warn("ukfs: cannot unlock device file");
409 }
410 
411 /*
412  * Open the disk file and flock it.  Also, if we are operation on
413  * an embedded partition, find the partition offset and size from
414  * the disklabel.
415  *
416  * We hard-fail only in two cases:
417  *  1) we failed to get the partition info out (don't know what offset
418  *     to mount from)
419  *  2) we failed to flock the source device (i.e. fcntl() fails,
420  *     not e.g. open() before it)
421  *
422  * Otherwise we let the code proceed to mount and let the file system
423  * throw the proper error.  The only questionable bit is that if we
424  * soft-fail before flock and mount does succeed...
425  *
426  * Returns: -1 error (errno reports error code)
427  *           0 success
428  *
429  * dfdp: -1  device is not open
430  *        n  device is open
431  */
432 static int
433 process_diskdevice(const char *devpath, struct ukfs_part *part, int rdonly,
434 	int *dfdp)
435 {
436 	struct stat sb;
437 	int rv = 0, devfd;
438 
439 	/* defaults */
440 	*dfdp = -1;
441 
442 	devfd = open(devpath, rdonly ? O_RDONLY : O_RDWR);
443 	if (devfd == -1) {
444 		rv = errno;
445 		goto out;
446 	}
447 
448 	if (fstat(devfd, &sb) == -1) {
449 		rv = errno;
450 		goto out;
451 	}
452 
453 	/*
454 	 * We do this only for non-block device since the
455 	 * (NetBSD) kernel allows block device open only once.
456 	 * We also need to close the device for fairly obvious reasons.
457 	 */
458 	if (!S_ISBLK(sb.st_mode)) {
459 		struct flock flarg;
460 
461 		memset(&flarg, 0, sizeof(flarg));
462 		flarg.l_type = rdonly ? F_RDLCK : F_WRLCK;
463 		flarg.l_whence = SEEK_SET;
464 		flarg.l_start = part->part_devoff;
465 		flarg.l_len = PART2LOCKSIZE(part->part_devsize);
466 		if (fcntl(devfd, F_SETLK, &flarg) == -1) {
467 			pid_t holder;
468 			int sverrno;
469 
470 			sverrno = errno;
471 			if (fcntl(devfd, F_GETLK, &flarg) != 1)
472 				holder = flarg.l_pid;
473 			else
474 				holder = -1;
475 			warnx("ukfs_mount: cannot lock device.  held by pid %d",
476 			    holder);
477 			rv = sverrno;
478 			goto out;
479 		}
480 	} else {
481 		close(devfd);
482 		devfd = -1;
483 	}
484 	*dfdp = devfd;
485 
486  out:
487 	if (rv) {
488 		if (devfd != -1)
489 			close(devfd);
490 	}
491 
492 	return rv;
493 }
494 
495 struct mountinfo {
496 	const char *mi_vfsname;
497 	const char *mi_mountpath;
498 	int mi_mntflags;
499 	void *mi_arg;
500 	size_t mi_alen;
501 	int *mi_error;
502 };
503 static void *
504 mfs_mounter(void *arg)
505 {
506 	struct mountinfo *mi = arg;
507 	int rv;
508 
509 	rv = rump_sys_mount(mi->mi_vfsname, mi->mi_mountpath, mi->mi_mntflags,
510 	    mi->mi_arg, mi->mi_alen);
511 	if (rv) {
512 		warn("mfs mount failed.  fix me.");
513 		abort(); /* XXX */
514 	}
515 
516 	return NULL;
517 }
518 
519 static struct ukfs *
520 doukfsmount(const char *vfsname, const char *devpath, struct ukfs_part *part,
521 	const char *mountpath, int mntflags, void *arg, size_t alen)
522 {
523 	struct ukfs *fs = NULL;
524 	struct lwp *curlwp;
525 	int rv = 0, devfd = -1;
526 	int mounted = 0;
527 	int regged = 0;
528 
529 	pthread_spin_lock(&part->part_lck);
530 	part->part_refcount++;
531 	pthread_spin_unlock(&part->part_lck);
532 	if (part != ukfs_part_na) {
533 		if ((rv = process_diskdevice(devpath, part,
534 		    mntflags & MNT_RDONLY, &devfd)) != 0)
535 			goto out;
536 	}
537 
538 	fs = malloc(sizeof(struct ukfs));
539 	if (fs == NULL) {
540 		rv = ENOMEM;
541 		goto out;
542 	}
543 	memset(fs, 0, sizeof(struct ukfs));
544 
545 	/* create our mountpoint.  this is never removed. */
546 	if (builddirs(mountpath, 0777, rumpmkdir, NULL) == -1) {
547 		if (errno != EEXIST) {
548 			rv = errno;
549 			goto out;
550 		}
551 	}
552 
553 	if (part != ukfs_part_na) {
554 		/* LINTED */
555 		rv = rump_pub_etfs_register_withsize(devpath, devpath,
556 		    RUMP_ETFS_BLK, part->part_devoff, part->part_devsize);
557 		if (rv) {
558 			goto out;
559 		}
560 		regged = 1;
561 	}
562 
563 	/*
564 	 * MFS is special since mount(2) doesn't return.  Hence, we
565 	 * create a thread here.  Could fix mfs to return, but there's
566 	 * too much history for me to bother.
567 	 */
568 	if (strcmp(vfsname, MOUNT_MFS) == 0) {
569 		pthread_t pt;
570 		struct mountinfo mi;
571 		int i;
572 
573 		mi.mi_vfsname = vfsname;
574 		mi.mi_mountpath = mountpath;
575 		mi.mi_mntflags = mntflags;
576 		mi.mi_arg = arg;
577 		mi.mi_alen = alen;
578 
579 		if (pthread_create(&pt, NULL, mfs_mounter, &mi) == -1) {
580 			rv = errno;
581 			goto out;
582 		}
583 
584 		for (i = 0;i < 100000; i++) {
585 			struct statvfs svfsb;
586 
587 			rv = rump_sys_statvfs1(mountpath, &svfsb, ST_WAIT);
588 			if (rv == -1) {
589 				rv = errno;
590 				goto out;
591 			}
592 
593 			if (strcmp(svfsb.f_mntonname, mountpath) == 0 &&
594 			    strcmp(svfsb.f_fstypename, MOUNT_MFS) == 0) {
595 				break;
596 			}
597 			usleep(1);
598 		}
599 	} else {
600 		rv = rump_sys_mount(vfsname, mountpath, mntflags, arg, alen);
601 		if (rv) {
602 			rv = errno;
603 			goto out;
604 		}
605 	}
606 
607 	mounted = 1;
608 	rv = rump_pub_vfs_getmp(mountpath, &fs->ukfs_mp);
609 	if (rv) {
610 		goto out;
611 	}
612 
613 	if (regged) {
614 		fs->ukfs_devpath = strdup(devpath);
615 	}
616 	fs->ukfs_mountpath = strdup(mountpath);
617 	pthread_spin_init(&fs->ukfs_spin, PTHREAD_PROCESS_SHARED);
618 	fs->ukfs_devfd = devfd;
619 	fs->ukfs_part = part;
620 	assert(rv == 0);
621 
622 	curlwp = rump_pub_lwproc_curlwp();
623 	rump_pub_lwproc_newlwp(0);
624 	fs->ukfs_lwp = rump_pub_lwproc_curlwp();
625 	fs->ukfs_cwd = strdup("/");
626 	rump_pub_lwproc_switch(curlwp);
627 
628  out:
629 	if (rv) {
630 		if (fs) {
631 			free(fs);
632 			fs = NULL;
633 		}
634 		if (mounted)
635 			rump_sys_unmount(mountpath, MNT_FORCE);
636 		if (regged)
637 			rump_pub_etfs_remove(devpath);
638 		if (devfd != -1) {
639 			unlockdev(devfd, part);
640 			close(devfd);
641 		}
642 		ukfs_part_release(part);
643 		errno = rv;
644 	}
645 
646 	return fs;
647 }
648 
649 struct ukfs *
650 ukfs_mount(const char *vfsname, const char *devpath,
651 	const char *mountpath, int mntflags, void *arg, size_t alen)
652 {
653 
654 	return doukfsmount(vfsname, devpath, ukfs_part_na,
655 	    mountpath, mntflags, arg, alen);
656 }
657 
658 struct ukfs *
659 ukfs_mount_disk(const char *vfsname, const char *devpath,
660 	struct ukfs_part *part, const char *mountpath, int mntflags,
661 	void *arg, size_t alen)
662 {
663 
664 	return doukfsmount(vfsname, devpath, part,
665 	    mountpath, mntflags, arg, alen);
666 }
667 
668 int
669 ukfs_release(struct ukfs *fs, int flags)
670 {
671 	struct lwp *curlwp = rump_pub_lwproc_curlwp();
672 
673 	/* get root lwp */
674 	rump_pub_lwproc_switch(fs->ukfs_lwp);
675 	rump_pub_lwproc_newproc();
676 
677 	if ((flags & UKFS_RELFLAG_NOUNMOUNT) == 0) {
678 		int rv, mntflag, error;
679 
680 		mntflag = 0;
681 		if (flags & UKFS_RELFLAG_FORCE)
682 			mntflag = MNT_FORCE;
683 
684 		rv = rump_sys_unmount(fs->ukfs_mountpath, mntflag);
685 		if (rv == -1) {
686 			error = errno;
687 			rump_pub_lwproc_releaselwp();
688 			if (curlwp)
689 				rump_pub_lwproc_switch(curlwp);
690 			errno = error;
691 			return -1;
692 		}
693 	}
694 
695 	if (fs->ukfs_devpath) {
696 		rump_pub_etfs_remove(fs->ukfs_devpath);
697 		free(fs->ukfs_devpath);
698 	}
699 	free(fs->ukfs_mountpath);
700 	free(fs->ukfs_cwd);
701 
702 	/* release this routine's lwp and ukfs base lwp */
703 	rump_pub_lwproc_releaselwp();
704 	rump_pub_lwproc_switch(fs->ukfs_lwp);
705 	rump_pub_lwproc_releaselwp();
706 
707 	pthread_spin_destroy(&fs->ukfs_spin);
708 	if (fs->ukfs_devfd != -1) {
709 		unlockdev(fs->ukfs_devfd, fs->ukfs_part);
710 		close(fs->ukfs_devfd);
711 	}
712 	ukfs_part_release(fs->ukfs_part);
713 	free(fs);
714 
715 	if (curlwp)
716 		rump_pub_lwproc_switch(curlwp);
717 
718 	return 0;
719 }
720 
721 void
722 ukfs_part_release(struct ukfs_part *part)
723 {
724 	int release;
725 
726 	if (part != ukfs_part_none && part != ukfs_part_na) {
727 		pthread_spin_lock(&part->part_lck);
728 		release = --part->part_refcount == 0;
729 		pthread_spin_unlock(&part->part_lck);
730 		if (release) {
731 			pthread_spin_destroy(&part->part_lck);
732 			free(part);
733 		}
734 	}
735 }
736 
737 #define STDCALL(ukfs, thecall)						\
738 	int rv = 0;							\
739 									\
740 	PRECALL();							\
741 	rv = thecall;							\
742 	POSTCALL();							\
743 	return rv;
744 
745 int
746 ukfs_opendir(struct ukfs *ukfs, const char *dirname, struct ukfs_dircookie **c)
747 {
748 	struct vnode *vp;
749 	int rv;
750 
751 	PRECALL();
752 	rv = rump_pub_namei(RUMP_NAMEI_LOOKUP, RUMP_NAMEI_LOCKLEAF, dirname,
753 	    NULL, &vp, NULL);
754 	POSTCALL();
755 
756 	if (rv == 0) {
757 		RUMP_VOP_UNLOCK(vp);
758 	} else {
759 		errno = rv;
760 		rv = -1;
761 	}
762 
763 	/*LINTED*/
764 	*c = (struct ukfs_dircookie *)vp;
765 	return rv;
766 }
767 
768 static int
769 getmydents(struct vnode *vp, off_t *off, uint8_t *buf, size_t bufsize)
770 {
771 	struct uio *uio;
772 	size_t resid;
773 	int rv, eofflag;
774 	struct kauth_cred *cred;
775 
776 	uio = rump_pub_uio_setup(buf, bufsize, *off, RUMPUIO_READ);
777 	cred = rump_pub_cred_create(0, 0, 0, NULL);
778 	rv = RUMP_VOP_READDIR(vp, uio, cred, &eofflag, NULL, NULL);
779 	rump_pub_cred_put(cred);
780 	RUMP_VOP_UNLOCK(vp);
781 	*off = rump_pub_uio_getoff(uio);
782 	resid = rump_pub_uio_free(uio);
783 
784 	if (rv) {
785 		errno = rv;
786 		return -1;
787 	}
788 
789 	/* LINTED: not totally correct return type, but follows syscall */
790 	return bufsize - resid;
791 }
792 
793 /*ARGSUSED*/
794 int
795 ukfs_getdents_cookie(struct ukfs *ukfs, struct ukfs_dircookie *c, off_t *off,
796 	uint8_t *buf, size_t bufsize)
797 {
798 	/*LINTED*/
799 	struct vnode *vp = (struct vnode *)c;
800 
801 	RUMP_VOP_LOCK(vp, RUMP_LK_SHARED);
802 	return getmydents(vp, off, buf, bufsize);
803 }
804 
805 int
806 ukfs_getdents(struct ukfs *ukfs, const char *dirname, off_t *off,
807 	uint8_t *buf, size_t bufsize)
808 {
809 	struct vnode *vp;
810 	int rv;
811 
812 	PRECALL();
813 	rv = rump_pub_namei(RUMP_NAMEI_LOOKUP, RUMP_NAMEI_LOCKLEAF, dirname,
814 	    NULL, &vp, NULL);
815 	if (rv) {
816 		POSTCALL();
817 		errno = rv;
818 		return -1;
819 	}
820 
821 	rv = getmydents(vp, off, buf, bufsize);
822 	rump_pub_vp_rele(vp);
823 	POSTCALL();
824 	return rv;
825 }
826 
827 /*ARGSUSED*/
828 int
829 ukfs_closedir(struct ukfs *ukfs, struct ukfs_dircookie *c)
830 {
831 
832 	/*LINTED*/
833 	rump_pub_vp_rele((struct vnode *)c);
834 	return 0;
835 }
836 
837 int
838 ukfs_open(struct ukfs *ukfs, const char *filename, int flags)
839 {
840 	int fd;
841 
842 	PRECALL();
843 	fd = rump_sys_open(filename, flags, 0);
844 	POSTCALL();
845 	if (fd == -1)
846 		return -1;
847 
848 	return fd;
849 }
850 
851 ssize_t
852 ukfs_read(struct ukfs *ukfs, const char *filename, off_t off,
853 	uint8_t *buf, size_t bufsize)
854 {
855 	int fd;
856 	ssize_t xfer = -1; /* XXXgcc */
857 
858 	PRECALL();
859 	fd = rump_sys_open(filename, RUMP_O_RDONLY, 0);
860 	if (fd == -1)
861 		goto out;
862 
863 	xfer = rump_sys_pread(fd, buf, bufsize, off);
864 	rump_sys_close(fd);
865 
866  out:
867 	POSTCALL();
868 	if (fd == -1) {
869 		return -1;
870 	}
871 	return xfer;
872 }
873 
874 /*ARGSUSED*/
875 ssize_t
876 ukfs_read_fd(struct ukfs *ukfs, int fd, off_t off, uint8_t *buf, size_t buflen)
877 {
878 
879 	return rump_sys_pread(fd, buf, buflen, off);
880 }
881 
882 ssize_t
883 ukfs_write(struct ukfs *ukfs, const char *filename, off_t off,
884 	uint8_t *buf, size_t bufsize)
885 {
886 	int fd;
887 	ssize_t xfer = -1; /* XXXgcc */
888 
889 	PRECALL();
890 	fd = rump_sys_open(filename, RUMP_O_WRONLY, 0);
891 	if (fd == -1)
892 		goto out;
893 
894 	/* write and commit */
895 	xfer = rump_sys_pwrite(fd, buf, bufsize, off);
896 	if (xfer > 0)
897 		rump_sys_fsync(fd);
898 
899 	rump_sys_close(fd);
900 
901  out:
902 	POSTCALL();
903 	if (fd == -1) {
904 		return -1;
905 	}
906 	return xfer;
907 }
908 
909 /*ARGSUSED*/
910 ssize_t
911 ukfs_write_fd(struct ukfs *ukfs, int fd, off_t off, uint8_t *buf, size_t buflen,
912 	int dosync)
913 {
914 	ssize_t xfer;
915 
916 	xfer = rump_sys_pwrite(fd, buf, buflen, off);
917 	if (xfer > 0 && dosync)
918 		rump_sys_fsync(fd);
919 
920 	return xfer;
921 }
922 
923 /*ARGSUSED*/
924 int
925 ukfs_close(struct ukfs *ukfs, int fd)
926 {
927 
928 	rump_sys_close(fd);
929 	return 0;
930 }
931 
932 int
933 ukfs_create(struct ukfs *ukfs, const char *filename, mode_t mode)
934 {
935 	int fd;
936 
937 	PRECALL();
938 	fd = rump_sys_open(filename, RUMP_O_WRONLY | RUMP_O_CREAT, mode);
939 	if (fd == -1)
940 		return -1;
941 	rump_sys_close(fd);
942 
943 	POSTCALL();
944 	return 0;
945 }
946 
947 int
948 ukfs_mknod(struct ukfs *ukfs, const char *path, mode_t mode, dev_t dev)
949 {
950 
951 	STDCALL(ukfs, rump_sys_mknod(path, mode, dev));
952 }
953 
954 int
955 ukfs_mkfifo(struct ukfs *ukfs, const char *path, mode_t mode)
956 {
957 
958 	STDCALL(ukfs, rump_sys_mkfifo(path, mode));
959 }
960 
961 int
962 ukfs_mkdir(struct ukfs *ukfs, const char *filename, mode_t mode)
963 {
964 
965 	STDCALL(ukfs, rump_sys_mkdir(filename, mode));
966 }
967 
968 int
969 ukfs_remove(struct ukfs *ukfs, const char *filename)
970 {
971 
972 	STDCALL(ukfs, rump_sys_unlink(filename));
973 }
974 
975 int
976 ukfs_rmdir(struct ukfs *ukfs, const char *filename)
977 {
978 
979 	STDCALL(ukfs, rump_sys_rmdir(filename));
980 }
981 
982 int
983 ukfs_link(struct ukfs *ukfs, const char *filename, const char *f_create)
984 {
985 
986 	STDCALL(ukfs, rump_sys_link(filename, f_create));
987 }
988 
989 int
990 ukfs_symlink(struct ukfs *ukfs, const char *filename, const char *linkname)
991 {
992 
993 	STDCALL(ukfs, rump_sys_symlink(filename, linkname));
994 }
995 
996 ssize_t
997 ukfs_readlink(struct ukfs *ukfs, const char *filename,
998 	char *linkbuf, size_t buflen)
999 {
1000 	ssize_t rv;
1001 
1002 	PRECALL();
1003 	rv = rump_sys_readlink(filename, linkbuf, buflen);
1004 	POSTCALL();
1005 	return rv;
1006 }
1007 
1008 int
1009 ukfs_rename(struct ukfs *ukfs, const char *from, const char *to)
1010 {
1011 
1012 	STDCALL(ukfs, rump_sys_rename(from, to));
1013 }
1014 
1015 int
1016 ukfs_chdir(struct ukfs *ukfs, const char *path)
1017 {
1018 	char *newpath, *oldpath;
1019 	int rv;
1020 
1021 	PRECALL();
1022 	rv = rump_sys_chdir(path);
1023 	if (rv == -1)
1024 		goto out;
1025 
1026 	newpath = malloc(MAXPATHLEN);
1027 	if (rump_sys___getcwd(newpath, MAXPATHLEN) == -1) {
1028 		goto out;
1029 	}
1030 
1031 	pthread_spin_lock(&ukfs->ukfs_spin);
1032 	oldpath = ukfs->ukfs_cwd;
1033 	ukfs->ukfs_cwd = newpath;
1034 	pthread_spin_unlock(&ukfs->ukfs_spin);
1035 	free(oldpath);
1036 
1037  out:
1038 	POSTCALL();
1039 	return rv;
1040 }
1041 
1042 int
1043 ukfs_stat(struct ukfs *ukfs, const char *filename, struct stat *file_stat)
1044 {
1045 	int rv;
1046 
1047 	PRECALL();
1048 	rv = rump_sys_stat(filename, file_stat);
1049 	POSTCALL();
1050 
1051 	return rv;
1052 }
1053 
1054 int
1055 ukfs_lstat(struct ukfs *ukfs, const char *filename, struct stat *file_stat)
1056 {
1057 	int rv;
1058 
1059 	PRECALL();
1060 	rv = rump_sys_lstat(filename, file_stat);
1061 	POSTCALL();
1062 
1063 	return rv;
1064 }
1065 
1066 int
1067 ukfs_chmod(struct ukfs *ukfs, const char *filename, mode_t mode)
1068 {
1069 
1070 	STDCALL(ukfs, rump_sys_chmod(filename, mode));
1071 }
1072 
1073 int
1074 ukfs_lchmod(struct ukfs *ukfs, const char *filename, mode_t mode)
1075 {
1076 
1077 	STDCALL(ukfs, rump_sys_lchmod(filename, mode));
1078 }
1079 
1080 int
1081 ukfs_chown(struct ukfs *ukfs, const char *filename, uid_t uid, gid_t gid)
1082 {
1083 
1084 	STDCALL(ukfs, rump_sys_chown(filename, uid, gid));
1085 }
1086 
1087 int
1088 ukfs_lchown(struct ukfs *ukfs, const char *filename, uid_t uid, gid_t gid)
1089 {
1090 
1091 	STDCALL(ukfs, rump_sys_lchown(filename, uid, gid));
1092 }
1093 
1094 int
1095 ukfs_chflags(struct ukfs *ukfs, const char *filename, u_long flags)
1096 {
1097 
1098 	STDCALL(ukfs, rump_sys_chflags(filename, flags));
1099 }
1100 
1101 int
1102 ukfs_lchflags(struct ukfs *ukfs, const char *filename, u_long flags)
1103 {
1104 
1105 	STDCALL(ukfs, rump_sys_lchflags(filename, flags));
1106 }
1107 
1108 int
1109 ukfs_utimes(struct ukfs *ukfs, const char *filename, const struct timeval *tptr)
1110 {
1111 
1112 	STDCALL(ukfs, rump_sys_utimes(filename, tptr));
1113 }
1114 
1115 int
1116 ukfs_lutimes(struct ukfs *ukfs, const char *filename,
1117 	      const struct timeval *tptr)
1118 {
1119 
1120 	STDCALL(ukfs, rump_sys_lutimes(filename, tptr));
1121 }
1122 
1123 /*
1124  * Dynamic module support
1125  */
1126 
1127 /* load one library */
1128 
1129 /*
1130  * XXX: the dlerror stuff isn't really threadsafe, but then again I
1131  * can't protect against other threads calling dl*() outside of ukfs,
1132  * so just live with it being flimsy
1133  */
1134 int
1135 ukfs_modload(const char *fname)
1136 {
1137 	void *handle;
1138 	const struct modinfo *const *mi_start, *const *mi_end;
1139 	int error;
1140 
1141 	handle = dlopen(fname, RTLD_LAZY|RTLD_GLOBAL);
1142 	if (handle == NULL) {
1143 		const char *dlmsg = dlerror();
1144 		if (strstr(dlmsg, "Undefined symbol"))
1145 			return 0;
1146 		warnx("dlopen %s failed: %s\n", fname, dlmsg);
1147 		/* XXXerrno */
1148 		return -1;
1149 	}
1150 
1151 	mi_start = dlsym(handle, "__start_link_set_modules");
1152 	mi_end = dlsym(handle, "__stop_link_set_modules");
1153 	if (mi_start && mi_end) {
1154 		error = rump_pub_module_init(mi_start,
1155 		    (size_t)(mi_end-mi_start));
1156 		if (error)
1157 			goto errclose;
1158 		return 1;
1159 	}
1160 	error = EINVAL;
1161 
1162  errclose:
1163 	dlclose(handle);
1164 	errno = error;
1165 	return -1;
1166 }
1167 
1168 struct loadfail {
1169 	char *pname;
1170 
1171 	LIST_ENTRY(loadfail) entries;
1172 };
1173 
1174 #define RUMPFSMOD_PREFIX "librumpfs_"
1175 #define RUMPFSMOD_SUFFIX ".so"
1176 
1177 int
1178 ukfs_modload_dir(const char *dir)
1179 {
1180 	char nbuf[MAXPATHLEN+1], *p;
1181 	struct dirent entry, *result;
1182 	DIR *libdir;
1183 	struct loadfail *lf, *nlf;
1184 	int error, nloaded = 0, redo;
1185 	LIST_HEAD(, loadfail) lfs;
1186 
1187 	libdir = opendir(dir);
1188 	if (libdir == NULL)
1189 		return -1;
1190 
1191 	LIST_INIT(&lfs);
1192 	for (;;) {
1193 		if ((error = readdir_r(libdir, &entry, &result)) != 0)
1194 			break;
1195 		if (!result)
1196 			break;
1197 		if (strncmp(result->d_name, RUMPFSMOD_PREFIX,
1198 		    strlen(RUMPFSMOD_PREFIX)) != 0)
1199 			continue;
1200 		if (((p = strstr(result->d_name, RUMPFSMOD_SUFFIX)) == NULL)
1201 		    || strlen(p) != strlen(RUMPFSMOD_SUFFIX))
1202 			continue;
1203 		strlcpy(nbuf, dir, sizeof(nbuf));
1204 		strlcat(nbuf, "/", sizeof(nbuf));
1205 		strlcat(nbuf, result->d_name, sizeof(nbuf));
1206 		switch (ukfs_modload(nbuf)) {
1207 		case 0:
1208 			lf = malloc(sizeof(*lf));
1209 			if (lf == NULL) {
1210 				error = ENOMEM;
1211 				break;
1212 			}
1213 			lf->pname = strdup(nbuf);
1214 			if (lf->pname == NULL) {
1215 				free(lf);
1216 				error = ENOMEM;
1217 				break;
1218 			}
1219 			LIST_INSERT_HEAD(&lfs, lf, entries);
1220 			break;
1221 		case 1:
1222 			nloaded++;
1223 			break;
1224 		default:
1225 			/* ignore errors */
1226 			break;
1227 		}
1228 	}
1229 	closedir(libdir);
1230 	if (error && nloaded != 0)
1231 		error = 0;
1232 
1233 	/*
1234 	 * El-cheapo dependency calculator.  Just try to load the
1235 	 * modules n times in a loop
1236 	 */
1237 	for (redo = 1; redo;) {
1238 		redo = 0;
1239 		nlf = LIST_FIRST(&lfs);
1240 		while ((lf = nlf) != NULL) {
1241 			nlf = LIST_NEXT(lf, entries);
1242 			if (ukfs_modload(lf->pname) == 1) {
1243 				nloaded++;
1244 				redo = 1;
1245 				LIST_REMOVE(lf, entries);
1246 				free(lf->pname);
1247 				free(lf);
1248 			}
1249 		}
1250 	}
1251 
1252 	while ((lf = LIST_FIRST(&lfs)) != NULL) {
1253 		LIST_REMOVE(lf, entries);
1254 		free(lf->pname);
1255 		free(lf);
1256 	}
1257 
1258 	if (error && nloaded == 0) {
1259 		errno = error;
1260 		return -1;
1261 	}
1262 
1263 	return nloaded;
1264 }
1265 
1266 /* XXX: this code uses definitions from NetBSD, needs rumpdefs */
1267 ssize_t
1268 ukfs_vfstypes(char *buf, size_t buflen)
1269 {
1270 	int mib[3];
1271 	struct sysctlnode q, ans[128];
1272 	size_t alen;
1273 	int i;
1274 
1275 	mib[0] = CTL_VFS;
1276 	mib[1] = VFS_GENERIC;
1277 	mib[2] = CTL_QUERY;
1278 	alen = sizeof(ans);
1279 
1280 	memset(&q, 0, sizeof(q));
1281 	q.sysctl_flags = SYSCTL_VERSION;
1282 
1283 	if (rump_sys___sysctl(mib, 3, ans, &alen, &q, sizeof(q)) == -1) {
1284 		return -1;
1285 	}
1286 
1287 	for (i = 0; i < alen/sizeof(ans[0]); i++)
1288 		if (strcmp("fstypes", ans[i].sysctl_name) == 0)
1289 			break;
1290 	if (i == alen/sizeof(ans[0])) {
1291 		errno = ENXIO;
1292 		return -1;
1293 	}
1294 
1295 	mib[0] = CTL_VFS;
1296 	mib[1] = VFS_GENERIC;
1297 	mib[2] = ans[i].sysctl_num;
1298 
1299 	if (rump_sys___sysctl(mib, 3, buf, &buflen, NULL, 0) == -1) {
1300 		return -1;
1301 	}
1302 
1303 	return buflen;
1304 }
1305 
1306 /*
1307  * Utilities
1308  */
1309 static int
1310 builddirs(const char *pathname, mode_t mode,
1311 	int (*mkdirfn)(struct ukfs *, const char *, mode_t), struct ukfs *fs)
1312 {
1313 	char *f1, *f2;
1314 	int rv;
1315 	mode_t mask;
1316 	bool end;
1317 
1318 	/*ukfs_umask((mask = ukfs_umask(0)));*/
1319 	umask((mask = umask(0)));
1320 
1321 	f1 = f2 = strdup(pathname);
1322 	if (f1 == NULL) {
1323 		errno = ENOMEM;
1324 		return -1;
1325 	}
1326 
1327 	end = false;
1328 	for (;;) {
1329 		/* find next component */
1330 		f2 += strspn(f2, "/");
1331 		f2 += strcspn(f2, "/");
1332 		if (*f2 == '\0')
1333 			end = true;
1334 		else
1335 			*f2 = '\0';
1336 
1337 		rv = mkdirfn(fs, f1, mode & ~mask);
1338 		if (errno == EEXIST)
1339 			rv = 0;
1340 
1341 		if (rv == -1 || *f2 != '\0' || end)
1342 			break;
1343 
1344 		*f2 = '/';
1345 	}
1346 
1347 	free(f1);
1348 
1349 	return rv;
1350 }
1351 
1352 int
1353 ukfs_util_builddirs(struct ukfs *ukfs, const char *pathname, mode_t mode)
1354 {
1355 
1356 	return builddirs(pathname, mode, ukfs_mkdir, ukfs);
1357 }
1358