xref: /netbsd-src/lib/libukfs/ukfs.c (revision 1423e65b26cfa3a30fb7aaea6d57132588c43746)
1 /*	$NetBSD: ukfs.c,v 1.51 2010/06/24 13:03:05 hannken Exp $	*/
2 
3 /*
4  * Copyright (c) 2007, 2008, 2009  Antti Kantee.  All Rights Reserved.
5  *
6  * Development of this software was supported by the
7  * Finnish Cultural Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
19  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 /*
32  * This library enables access to files systems directly without
33  * involving system calls.
34  */
35 
36 #ifdef __linux__
37 #define _XOPEN_SOURCE 500
38 #define _BSD_SOURCE
39 #define _FILE_OFFSET_BITS 64
40 #endif
41 
42 #include <sys/param.h>
43 #include <sys/queue.h>
44 #include <sys/stat.h>
45 #include <sys/sysctl.h>
46 #include <sys/mount.h>
47 
48 #include <assert.h>
49 #include <dirent.h>
50 #include <dlfcn.h>
51 #include <err.h>
52 #include <errno.h>
53 #include <fcntl.h>
54 #include <pthread.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59 #include <stdint.h>
60 
61 #include <rump/ukfs.h>
62 
63 #include <rump/rump.h>
64 #include <rump/rump_syscalls.h>
65 #include <rump/rumpuser.h>
66 
67 #include "ukfs_int_disklabel.h"
68 
69 #define UKFS_MODE_DEFAULT 0555
70 
71 struct ukfs {
72 	struct mount *ukfs_mp;
73 	struct vnode *ukfs_rvp;
74 	void *ukfs_specific;
75 
76 	pthread_spinlock_t ukfs_spin;
77 	pid_t ukfs_nextpid;
78 	struct vnode *ukfs_cdir;
79 	int ukfs_devfd;
80 	char *ukfs_devpath;
81 	char *ukfs_mountpath;
82 	struct ukfs_part *ukfs_part;
83 };
84 
85 static int builddirs(const char *, mode_t,
86     int (*mkdirfn)(struct ukfs *, const char *, mode_t), struct ukfs *);
87 
88 struct mount *
89 ukfs_getmp(struct ukfs *ukfs)
90 {
91 
92 	return ukfs->ukfs_mp;
93 }
94 
95 struct vnode *
96 ukfs_getrvp(struct ukfs *ukfs)
97 {
98 	struct vnode *rvp;
99 
100 	rvp = ukfs->ukfs_rvp;
101 	rump_pub_vp_incref(rvp);
102 
103 	return rvp;
104 }
105 
106 void
107 ukfs_setspecific(struct ukfs *ukfs, void *priv)
108 {
109 
110 	ukfs->ukfs_specific = priv;
111 }
112 
113 void *
114 ukfs_getspecific(struct ukfs *ukfs)
115 {
116 
117 	return ukfs->ukfs_specific;
118 }
119 
120 #ifdef DONT_WANT_PTHREAD_LINKAGE
121 #define pthread_spin_lock(a)
122 #define pthread_spin_unlock(a)
123 #define pthread_spin_init(a,b)
124 #define pthread_spin_destroy(a)
125 #endif
126 
127 static pid_t
128 nextpid(struct ukfs *ukfs)
129 {
130 	pid_t npid;
131 
132 	pthread_spin_lock(&ukfs->ukfs_spin);
133 	if (ukfs->ukfs_nextpid == 0)
134 		ukfs->ukfs_nextpid++;
135 	npid = ukfs->ukfs_nextpid++;
136 	pthread_spin_unlock(&ukfs->ukfs_spin);
137 
138 	return npid;
139 }
140 
141 static void
142 precall(struct ukfs *ukfs)
143 {
144 	struct vnode *rvp, *cvp;
145 
146 	rump_pub_lwp_alloc_and_switch(nextpid(ukfs), 1);
147 	rvp = ukfs_getrvp(ukfs);
148 	pthread_spin_lock(&ukfs->ukfs_spin);
149 	cvp = ukfs->ukfs_cdir;
150 	pthread_spin_unlock(&ukfs->ukfs_spin);
151 	rump_pub_rcvp_set(rvp, cvp); /* takes refs */
152 	rump_pub_vp_rele(rvp);
153 }
154 
155 static void
156 postcall(struct ukfs *ukfs)
157 {
158 	struct vnode *rvp;
159 
160 	rvp = ukfs_getrvp(ukfs);
161 	rump_pub_rcvp_set(NULL, rvp);
162 	rump_pub_vp_rele(rvp);
163 	rump_pub_lwp_release(rump_pub_lwp_curlwp());
164 }
165 
166 struct ukfs_part {
167 	pthread_spinlock_t part_lck;
168 	int part_refcount;
169 
170 	int part_type;
171 	char part_labelchar;
172 	off_t part_devoff;
173 	off_t part_devsize;
174 };
175 
176 enum ukfs_parttype { UKFS_PART_NONE, UKFS_PART_DISKLABEL, UKFS_PART_OFFSET };
177 
178 static struct ukfs_part ukfs__part_none = {
179 	.part_type = UKFS_PART_NONE,
180 	.part_devoff = 0,
181 	.part_devsize = RUMP_ETFS_SIZE_ENDOFF,
182 };
183 static struct ukfs_part ukfs__part_na;
184 struct ukfs_part *ukfs_part_none = &ukfs__part_none;
185 struct ukfs_part *ukfs_part_na = &ukfs__part_na;
186 
187 #define PART2LOCKSIZE(len) ((len) == RUMP_ETFS_SIZE_ENDOFF ? 0 : (len))
188 
189 int
190 _ukfs_init(int version)
191 {
192 	int rv;
193 
194 	if (version != UKFS_VERSION) {
195 		printf("incompatible ukfs version, %d vs. %d\n",
196 		    version, UKFS_VERSION);
197 		errno = EPROGMISMATCH;
198 		return -1;
199 	}
200 
201 	if ((rv = rump_init()) != 0) {
202 		errno = rv;
203 		return -1;
204 	}
205 
206 	return 0;
207 }
208 
209 /*ARGSUSED*/
210 static int
211 rumpmkdir(struct ukfs *dummy, const char *path, mode_t mode)
212 {
213 
214 	return rump_sys_mkdir(path, mode);
215 }
216 
217 int
218 ukfs_part_probe(char *devpath, struct ukfs_part **partp)
219 {
220 	struct ukfs_part *part;
221 	char *p;
222 	int error = 0;
223 	int devfd = -1;
224 
225 	if ((p = strstr(devpath, UKFS_PARTITION_SCANMAGIC)) != NULL) {
226 		fprintf(stderr, "ukfs: %%PART is deprecated.  use "
227 		    "%%DISKLABEL instead\n");
228 		errno = ENODEV;
229 		return -1;
230 	}
231 
232 	part = malloc(sizeof(*part));
233 	if (part == NULL) {
234 		errno = ENOMEM;
235 		return -1;
236 	}
237 	if (pthread_spin_init(&part->part_lck, PTHREAD_PROCESS_PRIVATE) == -1) {
238 		error = errno;
239 		free(part);
240 		errno = error;
241 		return -1;
242 	}
243 	part->part_type = UKFS_PART_NONE;
244 	part->part_refcount = 1;
245 
246 	/*
247 	 * Check for magic in pathname:
248 	 *   disklabel: /regularpath%DISKLABEL:labelchar%\0
249 	 *     offsets: /regularpath%OFFSET:start,end%\0
250 	 */
251 #define MAGICADJ_DISKLABEL(p, n) (p+sizeof(UKFS_DISKLABEL_SCANMAGIC)-1+n)
252 	if ((p = strstr(devpath, UKFS_DISKLABEL_SCANMAGIC)) != NULL
253 	    && strlen(p) == UKFS_DISKLABEL_MAGICLEN
254 	    && *(MAGICADJ_DISKLABEL(p,1)) == '%') {
255 		if (*(MAGICADJ_DISKLABEL(p,0)) >= 'a' &&
256 		    *(MAGICADJ_DISKLABEL(p,0)) < 'a' + UKFS_MAXPARTITIONS) {
257 			struct ukfs__disklabel dl;
258 			struct ukfs__partition *pp;
259 			char buf[65536];
260 			char labelchar = *(MAGICADJ_DISKLABEL(p,0));
261 			int partition = labelchar - 'a';
262 
263 			*p = '\0';
264 			devfd = open(devpath, O_RDONLY);
265 			if (devfd == -1) {
266 				error = errno;
267 				goto out;
268 			}
269 
270 			/* Locate the disklabel and find the partition. */
271 			if (pread(devfd, buf, sizeof(buf), 0) == -1) {
272 				error = errno;
273 				goto out;
274 			}
275 
276 			if (ukfs__disklabel_scan(&dl, buf, sizeof(buf)) != 0) {
277 				error = ENOENT;
278 				goto out;
279 			}
280 
281 			if (dl.d_npartitions < partition) {
282 				error = ENOENT;
283 				goto out;
284 			}
285 
286 			pp = &dl.d_partitions[partition];
287 			part->part_type = UKFS_PART_DISKLABEL;
288 			part->part_labelchar = labelchar;
289 			part->part_devoff = pp->p_offset << DEV_BSHIFT;
290 			part->part_devsize = pp->p_size << DEV_BSHIFT;
291 		} else {
292 			error = EINVAL;
293 		}
294 #define MAGICADJ_OFFSET(p, n) (p+sizeof(UKFS_OFFSET_SCANMAGIC)-1+n)
295 	} else if (((p = strstr(devpath, UKFS_OFFSET_SCANMAGIC)) != NULL)
296 	    && (strlen(p) >= UKFS_OFFSET_MINLEN)) {
297 		char *comma, *pers, *ep, *nptr;
298 		u_quad_t val;
299 
300 		comma = strchr(p, ',');
301 		if (comma == NULL) {
302 			error = EINVAL;
303 			goto out;
304 		}
305 		pers = strchr(comma, '%');
306 		if (pers == NULL) {
307 			error = EINVAL;
308 			goto out;
309 		}
310 		*comma = '\0';
311 		*pers = '\0';
312 		*p = '\0';
313 
314 		nptr = MAGICADJ_OFFSET(p,0);
315 		/* check if string is negative */
316 		if (*nptr == '-') {
317 			error = ERANGE;
318 			goto out;
319 		}
320 		val = strtouq(nptr, &ep, 10);
321 		if (val == UQUAD_MAX) {
322 			error = ERANGE;
323 			goto out;
324 		}
325 		if (*ep != '\0') {
326 			error = EADDRNOTAVAIL; /* creative ;) */
327 			goto out;
328 		}
329 		part->part_devoff = val;
330 
331 		/* omstart */
332 
333 		nptr = comma+1;
334 		/* check if string is negative */
335 		if (*nptr == '-') {
336 			error = ERANGE;
337 			goto out;
338 		}
339 		val = strtouq(nptr, &ep, 10);
340 		if (val == UQUAD_MAX) {
341 			error = ERANGE;
342 			goto out;
343 		}
344 		if (*ep != '\0') {
345 			error = EADDRNOTAVAIL; /* creative ;) */
346 			goto out;
347 		}
348 		part->part_devsize = val;
349 		part->part_type = UKFS_PART_OFFSET;
350 	} else {
351 		ukfs_part_release(part);
352 		part = ukfs_part_none;
353 	}
354 
355  out:
356 	if (devfd != -1)
357 		close(devfd);
358 	if (error) {
359 		free(part);
360 		errno = error;
361 	} else {
362 		*partp = part;
363 	}
364 
365 	return error ? -1 : 0;
366 }
367 
368 int
369 ukfs_part_tostring(struct ukfs_part *part, char *str, size_t strsize)
370 {
371 	int rv;
372 
373 	*str = '\0';
374 	/* "pseudo" values */
375 	if (part == ukfs_part_na) {
376 		errno = EINVAL;
377 		return -1;
378 	}
379 	if (part == ukfs_part_none)
380 		return 0;
381 
382 	rv = 0;
383 	switch (part->part_type) {
384 	case UKFS_PART_NONE:
385 		break;
386 
387 	case UKFS_PART_DISKLABEL:
388 		snprintf(str, strsize, "%%DISKLABEL:%c%%",part->part_labelchar);
389 		rv = 1;
390 		break;
391 
392 	case UKFS_PART_OFFSET:
393 		snprintf(str, strsize, "[%llu,%llu]",
394 		    (unsigned long long)part->part_devoff,
395 		    (unsigned long long)(part->part_devoff+part->part_devsize));
396 		rv = 1;
397 		break;
398 	}
399 
400 	return rv;
401 }
402 
403 static void
404 unlockdev(int fd, struct ukfs_part *part)
405 {
406 	struct flock flarg;
407 
408 	if (part == ukfs_part_na)
409 		return;
410 
411 	memset(&flarg, 0, sizeof(flarg));
412 	flarg.l_type = F_UNLCK;
413 	flarg.l_whence = SEEK_SET;
414 	flarg.l_start = part->part_devoff;
415 	flarg.l_len = PART2LOCKSIZE(part->part_devsize);
416 	if (fcntl(fd, F_SETLK, &flarg) == -1)
417 		warn("ukfs: cannot unlock device file");
418 }
419 
420 /*
421  * Open the disk file and flock it.  Also, if we are operation on
422  * an embedded partition, find the partition offset and size from
423  * the disklabel.
424  *
425  * We hard-fail only in two cases:
426  *  1) we failed to get the partition info out (don't know what offset
427  *     to mount from)
428  *  2) we failed to flock the source device (i.e. fcntl() fails,
429  *     not e.g. open() before it)
430  *
431  * Otherwise we let the code proceed to mount and let the file system
432  * throw the proper error.  The only questionable bit is that if we
433  * soft-fail before flock and mount does succeed...
434  *
435  * Returns: -1 error (errno reports error code)
436  *           0 success
437  *
438  * dfdp: -1  device is not open
439  *        n  device is open
440  */
441 static int
442 process_diskdevice(const char *devpath, struct ukfs_part *part, int rdonly,
443 	int *dfdp)
444 {
445 	struct stat sb;
446 	int rv = 0, devfd;
447 
448 	/* defaults */
449 	*dfdp = -1;
450 
451 	devfd = open(devpath, rdonly ? O_RDONLY : O_RDWR);
452 	if (devfd == -1) {
453 		rv = errno;
454 		goto out;
455 	}
456 
457 	if (fstat(devfd, &sb) == -1) {
458 		rv = errno;
459 		goto out;
460 	}
461 
462 	/*
463 	 * We do this only for non-block device since the
464 	 * (NetBSD) kernel allows block device open only once.
465 	 * We also need to close the device for fairly obvious reasons.
466 	 */
467 	if (!S_ISBLK(sb.st_mode)) {
468 		struct flock flarg;
469 
470 		memset(&flarg, 0, sizeof(flarg));
471 		flarg.l_type = rdonly ? F_RDLCK : F_WRLCK;
472 		flarg.l_whence = SEEK_SET;
473 		flarg.l_start = part->part_devoff;
474 		flarg.l_len = PART2LOCKSIZE(part->part_devsize);
475 		if (fcntl(devfd, F_SETLK, &flarg) == -1) {
476 			pid_t holder;
477 			int sverrno;
478 
479 			sverrno = errno;
480 			if (fcntl(devfd, F_GETLK, &flarg) != 1)
481 				holder = flarg.l_pid;
482 			else
483 				holder = -1;
484 			warnx("ukfs_mount: cannot lock device.  held by pid %d",
485 			    holder);
486 			rv = sverrno;
487 			goto out;
488 		}
489 	} else {
490 		close(devfd);
491 		devfd = -1;
492 	}
493 	*dfdp = devfd;
494 
495  out:
496 	if (rv) {
497 		if (devfd != -1)
498 			close(devfd);
499 	}
500 
501 	return rv;
502 }
503 
504 struct mountinfo {
505 	const char *mi_vfsname;
506 	const char *mi_mountpath;
507 	int mi_mntflags;
508 	void *mi_arg;
509 	size_t mi_alen;
510 	int *mi_error;
511 };
512 static void *
513 mfs_mounter(void *arg)
514 {
515 	struct mountinfo *mi = arg;
516 	int rv;
517 
518 	rv = rump_sys_mount(mi->mi_vfsname, mi->mi_mountpath, mi->mi_mntflags,
519 	    mi->mi_arg, mi->mi_alen);
520 	if (rv) {
521 		warn("mfs mount failed.  fix me.");
522 		abort(); /* XXX */
523 	}
524 
525 	return NULL;
526 }
527 
528 static struct ukfs *
529 doukfsmount(const char *vfsname, const char *devpath, struct ukfs_part *part,
530 	const char *mountpath, int mntflags, void *arg, size_t alen)
531 {
532 	struct ukfs *fs = NULL;
533 	int rv = 0, devfd = -1;
534 	int mounted = 0;
535 	int regged = 0;
536 
537 	pthread_spin_lock(&part->part_lck);
538 	part->part_refcount++;
539 	pthread_spin_unlock(&part->part_lck);
540 	if (part != ukfs_part_na) {
541 		if ((rv = process_diskdevice(devpath, part,
542 		    mntflags & MNT_RDONLY, &devfd)) != 0)
543 			goto out;
544 	}
545 
546 	fs = malloc(sizeof(struct ukfs));
547 	if (fs == NULL) {
548 		rv = ENOMEM;
549 		goto out;
550 	}
551 	memset(fs, 0, sizeof(struct ukfs));
552 
553 	/* create our mountpoint.  this is never removed. */
554 	if (builddirs(mountpath, 0777, rumpmkdir, NULL) == -1) {
555 		if (errno != EEXIST) {
556 			rv = errno;
557 			goto out;
558 		}
559 	}
560 
561 	if (part != ukfs_part_na) {
562 		/* LINTED */
563 		rv = rump_pub_etfs_register_withsize(devpath, devpath,
564 		    RUMP_ETFS_BLK, part->part_devoff, part->part_devsize);
565 		if (rv) {
566 			goto out;
567 		}
568 		regged = 1;
569 	}
570 
571 	/*
572 	 * MFS is special since mount(2) doesn't return.  Hence, we
573 	 * create a thread here.  Could fix mfs to return, but there's
574 	 * too much history for me to bother.
575 	 */
576 	if (strcmp(vfsname, MOUNT_MFS) == 0) {
577 		pthread_t pt;
578 		struct mountinfo mi;
579 		int i;
580 
581 		mi.mi_vfsname = vfsname;
582 		mi.mi_mountpath = mountpath;
583 		mi.mi_mntflags = mntflags;
584 		mi.mi_arg = arg;
585 		mi.mi_alen = alen;
586 
587 		if (pthread_create(&pt, NULL, mfs_mounter, &mi) == -1) {
588 			rv = errno;
589 			goto out;
590 		}
591 
592 		for (i = 0;i < 100000; i++) {
593 			struct statvfs svfsb;
594 
595 			rv = rump_sys_statvfs1(mountpath, &svfsb, ST_WAIT);
596 			if (rv == -1) {
597 				rv = errno;
598 				goto out;
599 			}
600 
601 			if (strcmp(svfsb.f_mntonname, mountpath) == 0 &&
602 			    strcmp(svfsb.f_fstypename, MOUNT_MFS) == 0) {
603 				break;
604 			}
605 			usleep(1);
606 		}
607 	} else {
608 		rv = rump_sys_mount(vfsname, mountpath, mntflags, arg, alen);
609 		if (rv) {
610 			rv = errno;
611 			goto out;
612 		}
613 	}
614 
615 	mounted = 1;
616 	rv = rump_pub_vfs_getmp(mountpath, &fs->ukfs_mp);
617 	if (rv) {
618 		goto out;
619 	}
620 	rv = rump_pub_vfs_root(fs->ukfs_mp, &fs->ukfs_rvp, 0);
621 	if (rv) {
622 		goto out;
623 	}
624 
625 	if (regged) {
626 		fs->ukfs_devpath = strdup(devpath);
627 	}
628 	fs->ukfs_mountpath = strdup(mountpath);
629 	fs->ukfs_cdir = ukfs_getrvp(fs);
630 	pthread_spin_init(&fs->ukfs_spin, PTHREAD_PROCESS_SHARED);
631 	fs->ukfs_devfd = devfd;
632 	fs->ukfs_part = part;
633 	assert(rv == 0);
634 
635  out:
636 	if (rv) {
637 		if (fs) {
638 			if (fs->ukfs_rvp)
639 				rump_pub_vp_rele(fs->ukfs_rvp);
640 			free(fs);
641 			fs = NULL;
642 		}
643 		if (mounted)
644 			rump_sys_unmount(mountpath, MNT_FORCE);
645 		if (regged)
646 			rump_pub_etfs_remove(devpath);
647 		if (devfd != -1) {
648 			unlockdev(devfd, part);
649 			close(devfd);
650 		}
651 		ukfs_part_release(part);
652 		errno = rv;
653 	}
654 
655 	return fs;
656 }
657 
658 struct ukfs *
659 ukfs_mount(const char *vfsname, const char *devpath,
660 	const char *mountpath, int mntflags, void *arg, size_t alen)
661 {
662 
663 	return doukfsmount(vfsname, devpath, ukfs_part_na,
664 	    mountpath, mntflags, arg, alen);
665 }
666 
667 struct ukfs *
668 ukfs_mount_disk(const char *vfsname, const char *devpath,
669 	struct ukfs_part *part, const char *mountpath, int mntflags,
670 	void *arg, size_t alen)
671 {
672 
673 	return doukfsmount(vfsname, devpath, part,
674 	    mountpath, mntflags, arg, alen);
675 }
676 
677 int
678 ukfs_release(struct ukfs *fs, int flags)
679 {
680 
681 	if ((flags & UKFS_RELFLAG_NOUNMOUNT) == 0) {
682 		int rv, mntflag, error;
683 
684 		ukfs_chdir(fs, "/");
685 		mntflag = 0;
686 		if (flags & UKFS_RELFLAG_FORCE)
687 			mntflag = MNT_FORCE;
688 		rump_pub_lwp_alloc_and_switch(nextpid(fs), 1);
689 		rump_pub_vp_rele(fs->ukfs_rvp);
690 		fs->ukfs_rvp = NULL;
691 		rv = rump_sys_unmount(fs->ukfs_mountpath, mntflag);
692 		if (rv == -1) {
693 			error = errno;
694 			rump_pub_vfs_root(fs->ukfs_mp, &fs->ukfs_rvp, 0);
695 			rump_pub_lwp_release(rump_pub_lwp_curlwp());
696 			ukfs_chdir(fs, fs->ukfs_mountpath);
697 			errno = error;
698 			return -1;
699 		}
700 		rump_pub_lwp_release(rump_pub_lwp_curlwp());
701 	}
702 
703 	if (fs->ukfs_devpath) {
704 		rump_pub_etfs_remove(fs->ukfs_devpath);
705 		free(fs->ukfs_devpath);
706 	}
707 	free(fs->ukfs_mountpath);
708 
709 	pthread_spin_destroy(&fs->ukfs_spin);
710 	if (fs->ukfs_devfd != -1) {
711 		unlockdev(fs->ukfs_devfd, fs->ukfs_part);
712 		close(fs->ukfs_devfd);
713 	}
714 	ukfs_part_release(fs->ukfs_part);
715 	free(fs);
716 
717 	return 0;
718 }
719 
720 void
721 ukfs_part_release(struct ukfs_part *part)
722 {
723 	int release;
724 
725 	if (part != ukfs_part_none && part != ukfs_part_na) {
726 		pthread_spin_lock(&part->part_lck);
727 		release = --part->part_refcount == 0;
728 		pthread_spin_unlock(&part->part_lck);
729 		if (release) {
730 			pthread_spin_destroy(&part->part_lck);
731 			free(part);
732 		}
733 	}
734 }
735 
736 #define STDCALL(ukfs, thecall)						\
737 	int rv = 0;							\
738 									\
739 	precall(ukfs);							\
740 	rv = thecall;							\
741 	postcall(ukfs);							\
742 	return rv;
743 
744 int
745 ukfs_opendir(struct ukfs *ukfs, const char *dirname, struct ukfs_dircookie **c)
746 {
747 	struct vnode *vp;
748 	int rv;
749 
750 	precall(ukfs);
751 	rv = rump_pub_namei(RUMP_NAMEI_LOOKUP, RUMP_NAMEI_LOCKLEAF, dirname,
752 	    NULL, &vp, NULL);
753 	postcall(ukfs);
754 
755 	if (rv == 0) {
756 		RUMP_VOP_UNLOCK(vp);
757 	} else {
758 		errno = rv;
759 		rv = -1;
760 	}
761 
762 	/*LINTED*/
763 	*c = (struct ukfs_dircookie *)vp;
764 	return rv;
765 }
766 
767 static int
768 getmydents(struct vnode *vp, off_t *off, uint8_t *buf, size_t bufsize)
769 {
770 	struct uio *uio;
771 	size_t resid;
772 	int rv, eofflag;
773 	struct kauth_cred *cred;
774 
775 	uio = rump_pub_uio_setup(buf, bufsize, *off, RUMPUIO_READ);
776 	cred = rump_pub_cred_suserget();
777 	rv = RUMP_VOP_READDIR(vp, uio, cred, &eofflag, NULL, NULL);
778 	rump_pub_cred_put(cred);
779 	RUMP_VOP_UNLOCK(vp);
780 	*off = rump_pub_uio_getoff(uio);
781 	resid = rump_pub_uio_free(uio);
782 
783 	if (rv) {
784 		errno = rv;
785 		return -1;
786 	}
787 
788 	/* LINTED: not totally correct return type, but follows syscall */
789 	return bufsize - resid;
790 }
791 
792 /*ARGSUSED*/
793 int
794 ukfs_getdents_cookie(struct ukfs *ukfs, struct ukfs_dircookie *c, off_t *off,
795 	uint8_t *buf, size_t bufsize)
796 {
797 	/*LINTED*/
798 	struct vnode *vp = (struct vnode *)c;
799 
800 	RUMP_VOP_LOCK(vp, RUMP_LK_SHARED);
801 	return getmydents(vp, off, buf, bufsize);
802 }
803 
804 int
805 ukfs_getdents(struct ukfs *ukfs, const char *dirname, off_t *off,
806 	uint8_t *buf, size_t bufsize)
807 {
808 	struct vnode *vp;
809 	int rv;
810 
811 	precall(ukfs);
812 	rv = rump_pub_namei(RUMP_NAMEI_LOOKUP, RUMP_NAMEI_LOCKLEAF, dirname,
813 	    NULL, &vp, NULL);
814 	postcall(ukfs);
815 	if (rv) {
816 		errno = rv;
817 		return -1;
818 	}
819 
820 	rv = getmydents(vp, off, buf, bufsize);
821 	rump_pub_vp_rele(vp);
822 	return rv;
823 }
824 
825 /*ARGSUSED*/
826 int
827 ukfs_closedir(struct ukfs *ukfs, struct ukfs_dircookie *c)
828 {
829 
830 	/*LINTED*/
831 	rump_pub_vp_rele((struct vnode *)c);
832 	return 0;
833 }
834 
835 int
836 ukfs_open(struct ukfs *ukfs, const char *filename, int flags)
837 {
838 	int fd;
839 
840 	precall(ukfs);
841 	fd = rump_sys_open(filename, flags, 0);
842 	postcall(ukfs);
843 	if (fd == -1)
844 		return -1;
845 
846 	return fd;
847 }
848 
849 ssize_t
850 ukfs_read(struct ukfs *ukfs, const char *filename, off_t off,
851 	uint8_t *buf, size_t bufsize)
852 {
853 	int fd;
854 	ssize_t xfer = -1; /* XXXgcc */
855 
856 	precall(ukfs);
857 	fd = rump_sys_open(filename, RUMP_O_RDONLY, 0);
858 	if (fd == -1)
859 		goto out;
860 
861 	xfer = rump_sys_pread(fd, buf, bufsize, off);
862 	rump_sys_close(fd);
863 
864  out:
865 	postcall(ukfs);
866 	if (fd == -1) {
867 		return -1;
868 	}
869 	return xfer;
870 }
871 
872 /*ARGSUSED*/
873 ssize_t
874 ukfs_read_fd(struct ukfs *ukfs, int fd, off_t off, uint8_t *buf, size_t buflen)
875 {
876 
877 	return rump_sys_pread(fd, buf, buflen, off);
878 }
879 
880 ssize_t
881 ukfs_write(struct ukfs *ukfs, const char *filename, off_t off,
882 	uint8_t *buf, size_t bufsize)
883 {
884 	int fd;
885 	ssize_t xfer = -1; /* XXXgcc */
886 
887 	precall(ukfs);
888 	fd = rump_sys_open(filename, RUMP_O_WRONLY, 0);
889 	if (fd == -1)
890 		goto out;
891 
892 	/* write and commit */
893 	xfer = rump_sys_pwrite(fd, buf, bufsize, off);
894 	if (xfer > 0)
895 		rump_sys_fsync(fd);
896 
897 	rump_sys_close(fd);
898 
899  out:
900 	postcall(ukfs);
901 	if (fd == -1) {
902 		return -1;
903 	}
904 	return xfer;
905 }
906 
907 /*ARGSUSED*/
908 ssize_t
909 ukfs_write_fd(struct ukfs *ukfs, int fd, off_t off, uint8_t *buf, size_t buflen,
910 	int dosync)
911 {
912 	ssize_t xfer;
913 
914 	xfer = rump_sys_pwrite(fd, buf, buflen, off);
915 	if (xfer > 0 && dosync)
916 		rump_sys_fsync(fd);
917 
918 	return xfer;
919 }
920 
921 /*ARGSUSED*/
922 int
923 ukfs_close(struct ukfs *ukfs, int fd)
924 {
925 
926 	rump_sys_close(fd);
927 	return 0;
928 }
929 
930 int
931 ukfs_create(struct ukfs *ukfs, const char *filename, mode_t mode)
932 {
933 	int fd;
934 
935 	precall(ukfs);
936 	fd = rump_sys_open(filename, RUMP_O_WRONLY | RUMP_O_CREAT, mode);
937 	if (fd == -1)
938 		return -1;
939 	rump_sys_close(fd);
940 
941 	postcall(ukfs);
942 	return 0;
943 }
944 
945 int
946 ukfs_mknod(struct ukfs *ukfs, const char *path, mode_t mode, dev_t dev)
947 {
948 
949 	STDCALL(ukfs, rump_sys_mknod(path, mode, dev));
950 }
951 
952 int
953 ukfs_mkfifo(struct ukfs *ukfs, const char *path, mode_t mode)
954 {
955 
956 	STDCALL(ukfs, rump_sys_mkfifo(path, mode));
957 }
958 
959 int
960 ukfs_mkdir(struct ukfs *ukfs, const char *filename, mode_t mode)
961 {
962 
963 	STDCALL(ukfs, rump_sys_mkdir(filename, mode));
964 }
965 
966 int
967 ukfs_remove(struct ukfs *ukfs, const char *filename)
968 {
969 
970 	STDCALL(ukfs, rump_sys_unlink(filename));
971 }
972 
973 int
974 ukfs_rmdir(struct ukfs *ukfs, const char *filename)
975 {
976 
977 	STDCALL(ukfs, rump_sys_rmdir(filename));
978 }
979 
980 int
981 ukfs_link(struct ukfs *ukfs, const char *filename, const char *f_create)
982 {
983 
984 	STDCALL(ukfs, rump_sys_link(filename, f_create));
985 }
986 
987 int
988 ukfs_symlink(struct ukfs *ukfs, const char *filename, const char *linkname)
989 {
990 
991 	STDCALL(ukfs, rump_sys_symlink(filename, linkname));
992 }
993 
994 ssize_t
995 ukfs_readlink(struct ukfs *ukfs, const char *filename,
996 	char *linkbuf, size_t buflen)
997 {
998 	ssize_t rv;
999 
1000 	precall(ukfs);
1001 	rv = rump_sys_readlink(filename, linkbuf, buflen);
1002 	postcall(ukfs);
1003 	return rv;
1004 }
1005 
1006 int
1007 ukfs_rename(struct ukfs *ukfs, const char *from, const char *to)
1008 {
1009 
1010 	STDCALL(ukfs, rump_sys_rename(from, to));
1011 }
1012 
1013 int
1014 ukfs_chdir(struct ukfs *ukfs, const char *path)
1015 {
1016 	struct vnode *newvp, *oldvp;
1017 	int rv;
1018 
1019 	precall(ukfs);
1020 	rv = rump_sys_chdir(path);
1021 	if (rv == -1)
1022 		goto out;
1023 
1024 	newvp = rump_pub_cdir_get();
1025 	pthread_spin_lock(&ukfs->ukfs_spin);
1026 	oldvp = ukfs->ukfs_cdir;
1027 	ukfs->ukfs_cdir = newvp;
1028 	pthread_spin_unlock(&ukfs->ukfs_spin);
1029 	if (oldvp)
1030 		rump_pub_vp_rele(oldvp);
1031 
1032  out:
1033 	postcall(ukfs);
1034 	return rv;
1035 }
1036 
1037 /*
1038  * If we want to use post-time_t file systems on pre-time_t hosts,
1039  * we must translate the stat structure.  Since we don't currently
1040  * have a general method for making compat calls in rump, special-case
1041  * this one.
1042  *
1043  * Note that this does not allow making system calls to older rump
1044  * kernels from newer hosts.
1045  */
1046 #define VERS_TIMECHANGE 599000700
1047 
1048 static int
1049 needcompat(void)
1050 {
1051 
1052 #ifdef __NetBSD__
1053 	/*LINTED*/
1054 	return __NetBSD_Version__ < VERS_TIMECHANGE
1055 	    && rump_pub_getversion() >= VERS_TIMECHANGE;
1056 #else
1057 	return 0;
1058 #endif
1059 }
1060 
1061 int
1062 ukfs_stat(struct ukfs *ukfs, const char *filename, struct stat *file_stat)
1063 {
1064 	int rv;
1065 
1066 	precall(ukfs);
1067 	if (needcompat())
1068 		rv = rump_pub_sys___stat30(filename, file_stat);
1069 	else
1070 		rv = rump_sys_stat(filename, file_stat);
1071 	postcall(ukfs);
1072 
1073 	return rv;
1074 }
1075 
1076 int
1077 ukfs_lstat(struct ukfs *ukfs, const char *filename, struct stat *file_stat)
1078 {
1079 	int rv;
1080 
1081 	precall(ukfs);
1082 	if (needcompat())
1083 		rv = rump_pub_sys___lstat30(filename, file_stat);
1084 	else
1085 		rv = rump_sys_lstat(filename, file_stat);
1086 	postcall(ukfs);
1087 
1088 	return rv;
1089 }
1090 
1091 int
1092 ukfs_chmod(struct ukfs *ukfs, const char *filename, mode_t mode)
1093 {
1094 
1095 	STDCALL(ukfs, rump_sys_chmod(filename, mode));
1096 }
1097 
1098 int
1099 ukfs_lchmod(struct ukfs *ukfs, const char *filename, mode_t mode)
1100 {
1101 
1102 	STDCALL(ukfs, rump_sys_lchmod(filename, mode));
1103 }
1104 
1105 int
1106 ukfs_chown(struct ukfs *ukfs, const char *filename, uid_t uid, gid_t gid)
1107 {
1108 
1109 	STDCALL(ukfs, rump_sys_chown(filename, uid, gid));
1110 }
1111 
1112 int
1113 ukfs_lchown(struct ukfs *ukfs, const char *filename, uid_t uid, gid_t gid)
1114 {
1115 
1116 	STDCALL(ukfs, rump_sys_lchown(filename, uid, gid));
1117 }
1118 
1119 int
1120 ukfs_chflags(struct ukfs *ukfs, const char *filename, u_long flags)
1121 {
1122 
1123 	STDCALL(ukfs, rump_sys_chflags(filename, flags));
1124 }
1125 
1126 int
1127 ukfs_lchflags(struct ukfs *ukfs, const char *filename, u_long flags)
1128 {
1129 
1130 	STDCALL(ukfs, rump_sys_lchflags(filename, flags));
1131 }
1132 
1133 int
1134 ukfs_utimes(struct ukfs *ukfs, const char *filename, const struct timeval *tptr)
1135 {
1136 
1137 	STDCALL(ukfs, rump_sys_utimes(filename, tptr));
1138 }
1139 
1140 int
1141 ukfs_lutimes(struct ukfs *ukfs, const char *filename,
1142 	      const struct timeval *tptr)
1143 {
1144 
1145 	STDCALL(ukfs, rump_sys_lutimes(filename, tptr));
1146 }
1147 
1148 /*
1149  * Dynamic module support
1150  */
1151 
1152 /* load one library */
1153 
1154 /*
1155  * XXX: the dlerror stuff isn't really threadsafe, but then again I
1156  * can't protect against other threads calling dl*() outside of ukfs,
1157  * so just live with it being flimsy
1158  */
1159 int
1160 ukfs_modload(const char *fname)
1161 {
1162 	void *handle;
1163 	const struct modinfo *const *mi_start, *const *mi_end;
1164 	int error;
1165 
1166 	handle = dlopen(fname, RTLD_LAZY|RTLD_GLOBAL);
1167 	if (handle == NULL) {
1168 		const char *dlmsg = dlerror();
1169 		if (strstr(dlmsg, "Undefined symbol"))
1170 			return 0;
1171 		warnx("dlopen %s failed: %s\n", fname, dlmsg);
1172 		/* XXXerrno */
1173 		return -1;
1174 	}
1175 
1176 	mi_start = dlsym(handle, "__start_link_set_modules");
1177 	mi_end = dlsym(handle, "__stop_link_set_modules");
1178 	if (mi_start && mi_end) {
1179 		error = rump_pub_module_init(mi_start,
1180 		    (size_t)(mi_end-mi_start));
1181 		if (error)
1182 			goto errclose;
1183 		return 1;
1184 	}
1185 	error = EINVAL;
1186 
1187  errclose:
1188 	dlclose(handle);
1189 	errno = error;
1190 	return -1;
1191 }
1192 
1193 struct loadfail {
1194 	char *pname;
1195 
1196 	LIST_ENTRY(loadfail) entries;
1197 };
1198 
1199 #define RUMPFSMOD_PREFIX "librumpfs_"
1200 #define RUMPFSMOD_SUFFIX ".so"
1201 
1202 int
1203 ukfs_modload_dir(const char *dir)
1204 {
1205 	char nbuf[MAXPATHLEN+1], *p;
1206 	struct dirent entry, *result;
1207 	DIR *libdir;
1208 	struct loadfail *lf, *nlf;
1209 	int error, nloaded = 0, redo;
1210 	LIST_HEAD(, loadfail) lfs;
1211 
1212 	libdir = opendir(dir);
1213 	if (libdir == NULL)
1214 		return -1;
1215 
1216 	LIST_INIT(&lfs);
1217 	for (;;) {
1218 		if ((error = readdir_r(libdir, &entry, &result)) != 0)
1219 			break;
1220 		if (!result)
1221 			break;
1222 		if (strncmp(result->d_name, RUMPFSMOD_PREFIX,
1223 		    strlen(RUMPFSMOD_PREFIX)) != 0)
1224 			continue;
1225 		if (((p = strstr(result->d_name, RUMPFSMOD_SUFFIX)) == NULL)
1226 		    || strlen(p) != strlen(RUMPFSMOD_SUFFIX))
1227 			continue;
1228 		strlcpy(nbuf, dir, sizeof(nbuf));
1229 		strlcat(nbuf, "/", sizeof(nbuf));
1230 		strlcat(nbuf, result->d_name, sizeof(nbuf));
1231 		switch (ukfs_modload(nbuf)) {
1232 		case 0:
1233 			lf = malloc(sizeof(*lf));
1234 			if (lf == NULL) {
1235 				error = ENOMEM;
1236 				break;
1237 			}
1238 			lf->pname = strdup(nbuf);
1239 			if (lf->pname == NULL) {
1240 				free(lf);
1241 				error = ENOMEM;
1242 				break;
1243 			}
1244 			LIST_INSERT_HEAD(&lfs, lf, entries);
1245 			break;
1246 		case 1:
1247 			nloaded++;
1248 			break;
1249 		default:
1250 			/* ignore errors */
1251 			break;
1252 		}
1253 	}
1254 	closedir(libdir);
1255 	if (error && nloaded != 0)
1256 		error = 0;
1257 
1258 	/*
1259 	 * El-cheapo dependency calculator.  Just try to load the
1260 	 * modules n times in a loop
1261 	 */
1262 	for (redo = 1; redo;) {
1263 		redo = 0;
1264 		nlf = LIST_FIRST(&lfs);
1265 		while ((lf = nlf) != NULL) {
1266 			nlf = LIST_NEXT(lf, entries);
1267 			if (ukfs_modload(lf->pname) == 1) {
1268 				nloaded++;
1269 				redo = 1;
1270 				LIST_REMOVE(lf, entries);
1271 				free(lf->pname);
1272 				free(lf);
1273 			}
1274 		}
1275 	}
1276 
1277 	while ((lf = LIST_FIRST(&lfs)) != NULL) {
1278 		LIST_REMOVE(lf, entries);
1279 		free(lf->pname);
1280 		free(lf);
1281 	}
1282 
1283 	if (error && nloaded == 0) {
1284 		errno = error;
1285 		return -1;
1286 	}
1287 
1288 	return nloaded;
1289 }
1290 
1291 /* XXX: this code uses definitions from NetBSD, needs rumpdefs */
1292 ssize_t
1293 ukfs_vfstypes(char *buf, size_t buflen)
1294 {
1295 	int mib[3];
1296 	struct sysctlnode q, ans[128];
1297 	size_t alen;
1298 	int i;
1299 
1300 	mib[0] = CTL_VFS;
1301 	mib[1] = VFS_GENERIC;
1302 	mib[2] = CTL_QUERY;
1303 	alen = sizeof(ans);
1304 
1305 	memset(&q, 0, sizeof(q));
1306 	q.sysctl_flags = SYSCTL_VERSION;
1307 
1308 	if (rump_sys___sysctl(mib, 3, ans, &alen, &q, sizeof(q)) == -1) {
1309 		return -1;
1310 	}
1311 
1312 	for (i = 0; i < alen/sizeof(ans[0]); i++)
1313 		if (strcmp("fstypes", ans[i].sysctl_name) == 0)
1314 			break;
1315 	if (i == alen/sizeof(ans[0])) {
1316 		errno = ENXIO;
1317 		return -1;
1318 	}
1319 
1320 	mib[0] = CTL_VFS;
1321 	mib[1] = VFS_GENERIC;
1322 	mib[2] = ans[i].sysctl_num;
1323 
1324 	if (rump_sys___sysctl(mib, 3, buf, &buflen, NULL, 0) == -1) {
1325 		return -1;
1326 	}
1327 
1328 	return buflen;
1329 }
1330 
1331 /*
1332  * Utilities
1333  */
1334 static int
1335 builddirs(const char *pathname, mode_t mode,
1336 	int (*mkdirfn)(struct ukfs *, const char *, mode_t), struct ukfs *fs)
1337 {
1338 	char *f1, *f2;
1339 	int rv;
1340 	mode_t mask;
1341 	bool end;
1342 
1343 	/*ukfs_umask((mask = ukfs_umask(0)));*/
1344 	umask((mask = umask(0)));
1345 
1346 	f1 = f2 = strdup(pathname);
1347 	if (f1 == NULL) {
1348 		errno = ENOMEM;
1349 		return -1;
1350 	}
1351 
1352 	end = false;
1353 	for (;;) {
1354 		/* find next component */
1355 		f2 += strspn(f2, "/");
1356 		f2 += strcspn(f2, "/");
1357 		if (*f2 == '\0')
1358 			end = true;
1359 		else
1360 			*f2 = '\0';
1361 
1362 		rv = mkdirfn(fs, f1, mode & ~mask);
1363 		if (errno == EEXIST)
1364 			rv = 0;
1365 
1366 		if (rv == -1 || *f2 != '\0' || end)
1367 			break;
1368 
1369 		*f2 = '/';
1370 	}
1371 
1372 	free(f1);
1373 
1374 	return rv;
1375 }
1376 
1377 int
1378 ukfs_util_builddirs(struct ukfs *ukfs, const char *pathname, mode_t mode)
1379 {
1380 
1381 	return builddirs(pathname, mode, ukfs_mkdir, ukfs);
1382 }
1383