xref: /netbsd-src/lib/libukfs/ukfs.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /*	$NetBSD: ukfs.c,v 1.58 2015/06/17 00:15:26 christos Exp $	*/
2 
3 /*
4  * Copyright (c) 2007, 2008, 2009  Antti Kantee.  All Rights Reserved.
5  *
6  * Development of this software was supported by the
7  * Finnish Cultural Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
19  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 /*
32  * This library enables access to files systems directly without
33  * involving system calls.
34  */
35 
36 #ifdef __linux__
37 #define _XOPEN_SOURCE 500
38 #define _BSD_SOURCE
39 #define _FILE_OFFSET_BITS 64
40 #endif
41 
42 #include <sys/param.h>
43 #include <sys/queue.h>
44 #include <sys/stat.h>
45 #include <sys/sysctl.h>
46 #include <sys/mount.h>
47 
48 #include <assert.h>
49 #include <dirent.h>
50 #include <dlfcn.h>
51 #include <err.h>
52 #include <errno.h>
53 #include <fcntl.h>
54 #include <pthread.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59 #include <stdint.h>
60 
61 #include <rump/ukfs.h>
62 
63 #include <rump/rump.h>
64 #include <rump/rump_syscalls.h>
65 
66 #include "ukfs_int_disklabel.h"
67 
68 #define UKFS_MODE_DEFAULT 0555
69 
70 struct ukfs {
71 	pthread_spinlock_t ukfs_spin;
72 
73 	struct mount *ukfs_mp;
74 	struct lwp *ukfs_lwp;
75 	void *ukfs_specific;
76 
77 	int ukfs_devfd;
78 
79 	char *ukfs_devpath;
80 	char *ukfs_mountpath;
81 	char *ukfs_cwd;
82 
83 	struct ukfs_part *ukfs_part;
84 };
85 
86 static int builddirs(const char *, mode_t,
87     int (*mkdirfn)(struct ukfs *, const char *, mode_t), struct ukfs *);
88 
89 struct mount *
90 ukfs_getmp(struct ukfs *ukfs)
91 {
92 
93 	return ukfs->ukfs_mp;
94 }
95 
96 void
97 ukfs_setspecific(struct ukfs *ukfs, void *priv)
98 {
99 
100 	ukfs->ukfs_specific = priv;
101 }
102 
103 void *
104 ukfs_getspecific(struct ukfs *ukfs)
105 {
106 
107 	return ukfs->ukfs_specific;
108 }
109 
110 #ifdef DONT_WANT_PTHREAD_LINKAGE
111 #define pthread_spin_lock(a)
112 #define pthread_spin_unlock(a)
113 #define pthread_spin_init(a,b)
114 #define pthread_spin_destroy(a)
115 #endif
116 
117 static int
118 precall(struct ukfs *ukfs, struct lwp **curlwp)
119 {
120 
121 	/* save previous.  ensure start from pristine context */
122 	*curlwp = rump_pub_lwproc_curlwp();
123 	if (*curlwp)
124 		rump_pub_lwproc_switch(ukfs->ukfs_lwp);
125 	rump_pub_lwproc_rfork(RUMP_RFCFDG);
126 
127 	if (rump_sys_chroot(ukfs->ukfs_mountpath) == -1)
128 		return errno;
129 	if (rump_sys_chdir(ukfs->ukfs_cwd) == -1)
130 		return errno;
131 
132 	return 0;
133 }
134 
135 static void
136 postcall(struct lwp *curlwp)
137 {
138 
139 	rump_pub_lwproc_releaselwp();
140 	if (curlwp)
141 		rump_pub_lwproc_switch(curlwp);
142 }
143 
144 #define PRECALL()							\
145 struct lwp *ukfs_curlwp;						\
146 do {									\
147 	int ukfs_rv;							\
148 	if ((ukfs_rv = precall(ukfs, &ukfs_curlwp)) != 0) {		\
149 		errno = ukfs_rv;					\
150 		return -1;						\
151 	}								\
152 } while (/*CONSTCOND*/0)
153 
154 #define POSTCALL() postcall(ukfs_curlwp);
155 
156 struct ukfs_part {
157 	pthread_spinlock_t part_lck;
158 	int part_refcount;
159 
160 	int part_type;
161 	char part_labelchar;
162 	off_t part_devoff;
163 	off_t part_devsize;
164 };
165 
166 enum ukfs_parttype { UKFS_PART_NONE, UKFS_PART_DISKLABEL, UKFS_PART_OFFSET };
167 
168 static struct ukfs_part ukfs__part_none = {
169 	.part_type = UKFS_PART_NONE,
170 	.part_devoff = 0,
171 	.part_devsize = RUMP_ETFS_SIZE_ENDOFF,
172 };
173 static struct ukfs_part ukfs__part_na;
174 struct ukfs_part *ukfs_part_none = &ukfs__part_none;
175 struct ukfs_part *ukfs_part_na = &ukfs__part_na;
176 
177 #define PART2LOCKSIZE(len) ((len) == RUMP_ETFS_SIZE_ENDOFF ? 0 : (len))
178 
179 int
180 _ukfs_init(int version)
181 {
182 	int rv;
183 
184 	if (version != UKFS_VERSION) {
185 		errno = EPROGMISMATCH;
186 		warn("incompatible ukfs version, %d vs. %d",
187 		    version, UKFS_VERSION);
188 		return -1;
189 	}
190 
191 	if ((rv = rump_init()) != 0) {
192 		errno = rv;
193 		return -1;
194 	}
195 
196 	return 0;
197 }
198 
199 /*ARGSUSED*/
200 static int
201 rumpmkdir(struct ukfs *dummy, const char *path, mode_t mode)
202 {
203 
204 	return rump_sys_mkdir(path, mode);
205 }
206 
207 int
208 ukfs_part_probe(char *devpath, struct ukfs_part **partp)
209 {
210 	struct ukfs_part *part;
211 	char *p;
212 	int error = 0;
213 	int devfd = -1;
214 
215 	if ((p = strstr(devpath, UKFS_PARTITION_SCANMAGIC)) != NULL) {
216 		warnx("ukfs: %%PART is deprecated.  use "
217 		    "%%DISKLABEL instead");
218 		errno = ENODEV;
219 		return -1;
220 	}
221 
222 	part = malloc(sizeof(*part));
223 	if (part == NULL) {
224 		errno = ENOMEM;
225 		return -1;
226 	}
227 	if (pthread_spin_init(&part->part_lck, PTHREAD_PROCESS_PRIVATE) == -1) {
228 		error = errno;
229 		free(part);
230 		errno = error;
231 		return -1;
232 	}
233 	part->part_type = UKFS_PART_NONE;
234 	part->part_refcount = 1;
235 
236 	/*
237 	 * Check for magic in pathname:
238 	 *   disklabel: /regularpath%DISKLABEL:labelchar%\0
239 	 *     offsets: /regularpath%OFFSET:start,end%\0
240 	 */
241 #define MAGICADJ_DISKLABEL(p, n) (p+sizeof(UKFS_DISKLABEL_SCANMAGIC)-1+n)
242 	if ((p = strstr(devpath, UKFS_DISKLABEL_SCANMAGIC)) != NULL
243 	    && strlen(p) == UKFS_DISKLABEL_MAGICLEN
244 	    && *(MAGICADJ_DISKLABEL(p,1)) == '%') {
245 		if (*(MAGICADJ_DISKLABEL(p,0)) >= 'a' &&
246 		    *(MAGICADJ_DISKLABEL(p,0)) < 'a' + UKFS_MAXPARTITIONS) {
247 			struct ukfs__disklabel dl;
248 			struct ukfs__partition *pp;
249 			int imswapped;
250 			char buf[65536];
251 			char labelchar = *(MAGICADJ_DISKLABEL(p,0));
252 			int partition = labelchar - 'a';
253 			uint32_t poffset, psize;
254 
255 			*p = '\0';
256 			devfd = open(devpath, O_RDONLY);
257 			if (devfd == -1) {
258 				error = errno;
259 				goto out;
260 			}
261 
262 			/* Locate the disklabel and find the partition. */
263 			if (pread(devfd, buf, sizeof(buf), 0) == -1) {
264 				error = errno;
265 				goto out;
266 			}
267 
268 			if (ukfs__disklabel_scan(&dl, &imswapped,
269 			    buf, sizeof(buf)) != 0) {
270 				error = ENOENT;
271 				goto out;
272 			}
273 
274 			if (dl.d_npartitions < partition) {
275 				error = ENOENT;
276 				goto out;
277 			}
278 
279 			pp = &dl.d_partitions[partition];
280 			part->part_type = UKFS_PART_DISKLABEL;
281 			part->part_labelchar = labelchar;
282 			if (imswapped) {
283 				poffset = bswap32(pp->p_offset);
284 				psize = bswap32(pp->p_size);
285 			} else {
286 				poffset = pp->p_offset;
287 				psize = pp->p_size;
288 			}
289 			part->part_devoff = poffset << DEV_BSHIFT;
290 			part->part_devsize = psize << DEV_BSHIFT;
291 		} else {
292 			error = EINVAL;
293 		}
294 #define MAGICADJ_OFFSET(p, n) (p+sizeof(UKFS_OFFSET_SCANMAGIC)-1+n)
295 	} else if (((p = strstr(devpath, UKFS_OFFSET_SCANMAGIC)) != NULL)
296 	    && (strlen(p) >= UKFS_OFFSET_MINLEN)) {
297 		char *comma, *pers, *ep, *nptr;
298 		u_quad_t val;
299 
300 		comma = strchr(p, ',');
301 		if (comma == NULL) {
302 			error = EINVAL;
303 			goto out;
304 		}
305 		pers = strchr(comma, '%');
306 		if (pers == NULL) {
307 			error = EINVAL;
308 			goto out;
309 		}
310 		*comma = '\0';
311 		*pers = '\0';
312 		*p = '\0';
313 
314 		nptr = MAGICADJ_OFFSET(p,0);
315 		/* check if string is negative */
316 		if (*nptr == '-') {
317 			error = ERANGE;
318 			goto out;
319 		}
320 		val = strtouq(nptr, &ep, 10);
321 		if (val == UQUAD_MAX) {
322 			error = ERANGE;
323 			goto out;
324 		}
325 		if (*ep != '\0') {
326 			error = EADDRNOTAVAIL; /* creative ;) */
327 			goto out;
328 		}
329 		part->part_devoff = val;
330 
331 		/* omstart */
332 
333 		nptr = comma+1;
334 		/* check if string is negative */
335 		if (*nptr == '-') {
336 			error = ERANGE;
337 			goto out;
338 		}
339 		val = strtouq(nptr, &ep, 10);
340 		if (val == UQUAD_MAX) {
341 			error = ERANGE;
342 			goto out;
343 		}
344 		if (*ep != '\0') {
345 			error = EADDRNOTAVAIL; /* creative ;) */
346 			goto out;
347 		}
348 		part->part_devsize = val;
349 		part->part_type = UKFS_PART_OFFSET;
350 	} else {
351 		ukfs_part_release(part);
352 		part = ukfs_part_none;
353 	}
354 
355  out:
356 	if (devfd != -1)
357 		close(devfd);
358 	if (error) {
359 		free(part);
360 		errno = error;
361 	} else {
362 		*partp = part;
363 	}
364 
365 	return error ? -1 : 0;
366 }
367 
368 int
369 ukfs_part_tostring(struct ukfs_part *part, char *str, size_t strsize)
370 {
371 	int rv;
372 
373 	*str = '\0';
374 	/* "pseudo" values */
375 	if (part == ukfs_part_na) {
376 		errno = EINVAL;
377 		return -1;
378 	}
379 	if (part == ukfs_part_none)
380 		return 0;
381 
382 	rv = 0;
383 	switch (part->part_type) {
384 	case UKFS_PART_NONE:
385 		break;
386 
387 	case UKFS_PART_DISKLABEL:
388 		snprintf(str, strsize, "%%DISKLABEL:%c%%",part->part_labelchar);
389 		rv = 1;
390 		break;
391 
392 	case UKFS_PART_OFFSET:
393 		snprintf(str, strsize, "[%llu,%llu]",
394 		    (unsigned long long)part->part_devoff,
395 		    (unsigned long long)(part->part_devoff+part->part_devsize));
396 		rv = 1;
397 		break;
398 	}
399 
400 	return rv;
401 }
402 
403 static void
404 unlockdev(int fd, struct ukfs_part *part)
405 {
406 	struct flock flarg;
407 
408 	if (part == ukfs_part_na)
409 		return;
410 
411 	memset(&flarg, 0, sizeof(flarg));
412 	flarg.l_type = F_UNLCK;
413 	flarg.l_whence = SEEK_SET;
414 	flarg.l_start = part->part_devoff;
415 	flarg.l_len = PART2LOCKSIZE(part->part_devsize);
416 	if (fcntl(fd, F_SETLK, &flarg) == -1)
417 		warn("ukfs: cannot unlock device file");
418 }
419 
420 /*
421  * Open the disk file and flock it.  Also, if we are operation on
422  * an embedded partition, find the partition offset and size from
423  * the disklabel.
424  *
425  * We hard-fail only in two cases:
426  *  1) we failed to get the partition info out (don't know what offset
427  *     to mount from)
428  *  2) we failed to flock the source device (i.e. fcntl() fails,
429  *     not e.g. open() before it)
430  *
431  * Otherwise we let the code proceed to mount and let the file system
432  * throw the proper error.  The only questionable bit is that if we
433  * soft-fail before flock and mount does succeed...
434  *
435  * Returns: -1 error (errno reports error code)
436  *           0 success
437  *
438  * dfdp: -1  device is not open
439  *        n  device is open
440  */
441 static int
442 process_diskdevice(const char *devpath, struct ukfs_part *part, int rdonly,
443 	int *dfdp)
444 {
445 	struct stat sb;
446 	int rv = 0, devfd;
447 
448 	/* defaults */
449 	*dfdp = -1;
450 
451 	devfd = open(devpath, rdonly ? O_RDONLY : O_RDWR);
452 	if (devfd == -1) {
453 		rv = errno;
454 		goto out;
455 	}
456 
457 	if (fstat(devfd, &sb) == -1) {
458 		rv = errno;
459 		goto out;
460 	}
461 
462 	/*
463 	 * We do this only for non-block device since the
464 	 * (NetBSD) kernel allows block device open only once.
465 	 * We also need to close the device for fairly obvious reasons.
466 	 */
467 	if (!S_ISBLK(sb.st_mode)) {
468 		struct flock flarg;
469 
470 		memset(&flarg, 0, sizeof(flarg));
471 		flarg.l_type = rdonly ? F_RDLCK : F_WRLCK;
472 		flarg.l_whence = SEEK_SET;
473 		flarg.l_start = part->part_devoff;
474 		flarg.l_len = PART2LOCKSIZE(part->part_devsize);
475 		if (fcntl(devfd, F_SETLK, &flarg) == -1) {
476 			pid_t holder;
477 			int sverrno;
478 
479 			sverrno = errno;
480 			if (fcntl(devfd, F_GETLK, &flarg) != 1)
481 				holder = flarg.l_pid;
482 			else
483 				holder = -1;
484 			warnx("ukfs_mount: cannot lock device.  held by pid %d",
485 			    holder);
486 			rv = sverrno;
487 			goto out;
488 		}
489 	} else {
490 		close(devfd);
491 		devfd = -1;
492 	}
493 	*dfdp = devfd;
494 
495  out:
496 	if (rv) {
497 		if (devfd != -1)
498 			close(devfd);
499 	}
500 
501 	return rv;
502 }
503 
504 struct mountinfo {
505 	const char *mi_vfsname;
506 	const char *mi_mountpath;
507 	int mi_mntflags;
508 	void *mi_arg;
509 	size_t mi_alen;
510 	int *mi_error;
511 };
512 static void *
513 mfs_mounter(void *arg)
514 {
515 	struct mountinfo *mi = arg;
516 	int rv;
517 
518 	rv = rump_sys_mount(mi->mi_vfsname, mi->mi_mountpath, mi->mi_mntflags,
519 	    mi->mi_arg, mi->mi_alen);
520 	if (rv) {
521 		warn("mfs mount failed.  fix me.");
522 		abort(); /* XXX */
523 	}
524 
525 	return NULL;
526 }
527 
528 static struct ukfs *
529 doukfsmount(const char *vfsname, const char *devpath, struct ukfs_part *part,
530 	const char *mountpath, int mntflags, void *arg, size_t alen)
531 {
532 	struct ukfs *fs = NULL;
533 	struct lwp *curlwp;
534 	int rv = 0, devfd = -1;
535 	int mounted = 0;
536 	int regged = 0;
537 
538 	pthread_spin_lock(&part->part_lck);
539 	part->part_refcount++;
540 	pthread_spin_unlock(&part->part_lck);
541 	if (part != ukfs_part_na) {
542 		if ((rv = process_diskdevice(devpath, part,
543 		    mntflags & MNT_RDONLY, &devfd)) != 0)
544 			goto out;
545 	}
546 
547 	fs = malloc(sizeof(struct ukfs));
548 	if (fs == NULL) {
549 		rv = ENOMEM;
550 		goto out;
551 	}
552 	memset(fs, 0, sizeof(struct ukfs));
553 
554 	/* create our mountpoint.  this is never removed. */
555 	if (builddirs(mountpath, 0777, rumpmkdir, NULL) == -1) {
556 		if (errno != EEXIST) {
557 			rv = errno;
558 			goto out;
559 		}
560 	}
561 
562 	if (part != ukfs_part_na) {
563 		/* LINTED */
564 		rv = rump_pub_etfs_register_withsize(devpath, devpath,
565 		    RUMP_ETFS_BLK, part->part_devoff, part->part_devsize);
566 		if (rv) {
567 			goto out;
568 		}
569 		regged = 1;
570 	}
571 
572 	/*
573 	 * MFS is special since mount(2) doesn't return.  Hence, we
574 	 * create a thread here.  Could fix mfs to return, but there's
575 	 * too much history for me to bother.
576 	 */
577 	if (strcmp(vfsname, MOUNT_MFS) == 0) {
578 		pthread_t pt;
579 		struct mountinfo mi;
580 		int i;
581 
582 		mi.mi_vfsname = vfsname;
583 		mi.mi_mountpath = mountpath;
584 		mi.mi_mntflags = mntflags;
585 		mi.mi_arg = arg;
586 		mi.mi_alen = alen;
587 
588 		if (pthread_create(&pt, NULL, mfs_mounter, &mi) == -1) {
589 			rv = errno;
590 			goto out;
591 		}
592 
593 		for (i = 0;i < 100000; i++) {
594 			struct statvfs svfsb;
595 
596 			rv = rump_sys_statvfs1(mountpath, &svfsb, ST_WAIT);
597 			if (rv == -1) {
598 				rv = errno;
599 				goto out;
600 			}
601 
602 			if (strcmp(svfsb.f_mntonname, mountpath) == 0 &&
603 			    strcmp(svfsb.f_fstypename, MOUNT_MFS) == 0) {
604 				break;
605 			}
606 			usleep(1);
607 		}
608 	} else {
609 		rv = rump_sys_mount(vfsname, mountpath, mntflags, arg, alen);
610 		if (rv) {
611 			rv = errno;
612 			goto out;
613 		}
614 	}
615 
616 	mounted = 1;
617 	rv = rump_pub_vfs_getmp(mountpath, &fs->ukfs_mp);
618 	if (rv) {
619 		goto out;
620 	}
621 
622 	if (regged) {
623 		fs->ukfs_devpath = strdup(devpath);
624 	}
625 	fs->ukfs_mountpath = strdup(mountpath);
626 	pthread_spin_init(&fs->ukfs_spin, PTHREAD_PROCESS_SHARED);
627 	fs->ukfs_devfd = devfd;
628 	fs->ukfs_part = part;
629 	assert(rv == 0);
630 
631 	curlwp = rump_pub_lwproc_curlwp();
632 	rump_pub_lwproc_newlwp(0);
633 	fs->ukfs_lwp = rump_pub_lwproc_curlwp();
634 	fs->ukfs_cwd = strdup("/");
635 	rump_pub_lwproc_switch(curlwp);
636 
637  out:
638 	if (rv) {
639 		if (fs) {
640 			free(fs);
641 			fs = NULL;
642 		}
643 		if (mounted)
644 			rump_sys_unmount(mountpath, MNT_FORCE);
645 		if (regged)
646 			rump_pub_etfs_remove(devpath);
647 		if (devfd != -1) {
648 			unlockdev(devfd, part);
649 			close(devfd);
650 		}
651 		ukfs_part_release(part);
652 		errno = rv;
653 	}
654 
655 	return fs;
656 }
657 
658 struct ukfs *
659 ukfs_mount(const char *vfsname, const char *devpath,
660 	const char *mountpath, int mntflags, void *arg, size_t alen)
661 {
662 
663 	return doukfsmount(vfsname, devpath, ukfs_part_na,
664 	    mountpath, mntflags, arg, alen);
665 }
666 
667 struct ukfs *
668 ukfs_mount_disk(const char *vfsname, const char *devpath,
669 	struct ukfs_part *part, const char *mountpath, int mntflags,
670 	void *arg, size_t alen)
671 {
672 
673 	return doukfsmount(vfsname, devpath, part,
674 	    mountpath, mntflags, arg, alen);
675 }
676 
677 int
678 ukfs_release(struct ukfs *fs, int flags)
679 {
680 	struct lwp *curlwp = rump_pub_lwproc_curlwp();
681 
682 	/* get root lwp */
683 	rump_pub_lwproc_switch(fs->ukfs_lwp);
684 	rump_pub_lwproc_rfork(RUMP_RFCFDG);
685 
686 	if ((flags & UKFS_RELFLAG_NOUNMOUNT) == 0) {
687 		int rv, mntflag, error;
688 
689 		mntflag = 0;
690 		if (flags & UKFS_RELFLAG_FORCE)
691 			mntflag = MNT_FORCE;
692 
693 		rv = rump_sys_unmount(fs->ukfs_mountpath, mntflag);
694 		if (rv == -1) {
695 			error = errno;
696 			rump_pub_lwproc_releaselwp();
697 			if (curlwp)
698 				rump_pub_lwproc_switch(curlwp);
699 			errno = error;
700 			return -1;
701 		}
702 	}
703 
704 	if (fs->ukfs_devpath) {
705 		rump_pub_etfs_remove(fs->ukfs_devpath);
706 		free(fs->ukfs_devpath);
707 	}
708 	free(fs->ukfs_mountpath);
709 	free(fs->ukfs_cwd);
710 
711 	/* release this routine's lwp and ukfs base lwp */
712 	rump_pub_lwproc_releaselwp();
713 	rump_pub_lwproc_switch(fs->ukfs_lwp);
714 	rump_pub_lwproc_releaselwp();
715 
716 	pthread_spin_destroy(&fs->ukfs_spin);
717 	if (fs->ukfs_devfd != -1) {
718 		unlockdev(fs->ukfs_devfd, fs->ukfs_part);
719 		close(fs->ukfs_devfd);
720 	}
721 	ukfs_part_release(fs->ukfs_part);
722 	free(fs);
723 
724 	if (curlwp)
725 		rump_pub_lwproc_switch(curlwp);
726 
727 	return 0;
728 }
729 
730 void
731 ukfs_part_release(struct ukfs_part *part)
732 {
733 	int release;
734 
735 	if (part != ukfs_part_none && part != ukfs_part_na) {
736 		pthread_spin_lock(&part->part_lck);
737 		release = --part->part_refcount == 0;
738 		pthread_spin_unlock(&part->part_lck);
739 		if (release) {
740 			pthread_spin_destroy(&part->part_lck);
741 			free(part);
742 		}
743 	}
744 }
745 
746 #define STDCALL(ukfs, thecall)						\
747 	int rv = 0;							\
748 									\
749 	PRECALL();							\
750 	rv = thecall;							\
751 	POSTCALL();							\
752 	return rv;
753 
754 int
755 ukfs_opendir(struct ukfs *ukfs, const char *dirname, struct ukfs_dircookie **c)
756 {
757 	struct vnode *vp;
758 	int rv;
759 
760 	PRECALL();
761 	rv = rump_pub_namei(RUMP_NAMEI_LOOKUP, RUMP_NAMEI_LOCKLEAF, dirname,
762 	    NULL, &vp, NULL);
763 	POSTCALL();
764 
765 	if (rv == 0) {
766 		RUMP_VOP_UNLOCK(vp);
767 	} else {
768 		errno = rv;
769 		rv = -1;
770 	}
771 
772 	/*LINTED*/
773 	*c = (struct ukfs_dircookie *)vp;
774 	return rv;
775 }
776 
777 static int
778 getmydents(struct vnode *vp, off_t *off, uint8_t *buf, size_t bufsize)
779 {
780 	struct uio *uio;
781 	size_t resid;
782 	int rv, eofflag;
783 	struct kauth_cred *cred;
784 
785 	uio = rump_pub_uio_setup(buf, bufsize, *off, RUMPUIO_READ);
786 	cred = rump_pub_cred_create(0, 0, 0, NULL);
787 	rv = RUMP_VOP_READDIR(vp, uio, cred, &eofflag, NULL, NULL);
788 	rump_pub_cred_put(cred);
789 	RUMP_VOP_UNLOCK(vp);
790 	*off = rump_pub_uio_getoff(uio);
791 	resid = rump_pub_uio_free(uio);
792 
793 	if (rv) {
794 		errno = rv;
795 		return -1;
796 	}
797 
798 	/* LINTED: not totally correct return type, but follows syscall */
799 	return bufsize - resid;
800 }
801 
802 /*ARGSUSED*/
803 int
804 ukfs_getdents_cookie(struct ukfs *ukfs, struct ukfs_dircookie *c, off_t *off,
805 	uint8_t *buf, size_t bufsize)
806 {
807 	/*LINTED*/
808 	struct vnode *vp = (struct vnode *)c;
809 
810 	RUMP_VOP_LOCK(vp, RUMP_LK_SHARED);
811 	return getmydents(vp, off, buf, bufsize);
812 }
813 
814 int
815 ukfs_getdents(struct ukfs *ukfs, const char *dirname, off_t *off,
816 	uint8_t *buf, size_t bufsize)
817 {
818 	struct vnode *vp;
819 	int rv;
820 
821 	PRECALL();
822 	rv = rump_pub_namei(RUMP_NAMEI_LOOKUP, RUMP_NAMEI_LOCKLEAF, dirname,
823 	    NULL, &vp, NULL);
824 	if (rv) {
825 		POSTCALL();
826 		errno = rv;
827 		return -1;
828 	}
829 
830 	rv = getmydents(vp, off, buf, bufsize);
831 	rump_pub_vp_rele(vp);
832 	POSTCALL();
833 	return rv;
834 }
835 
836 /*ARGSUSED*/
837 int
838 ukfs_closedir(struct ukfs *ukfs, struct ukfs_dircookie *c)
839 {
840 
841 	/*LINTED*/
842 	rump_pub_vp_rele((struct vnode *)c);
843 	return 0;
844 }
845 
846 int
847 ukfs_open(struct ukfs *ukfs, const char *filename, int flags)
848 {
849 	int fd;
850 
851 	PRECALL();
852 	fd = rump_sys_open(filename, flags, 0);
853 	POSTCALL();
854 	if (fd == -1)
855 		return -1;
856 
857 	return fd;
858 }
859 
860 ssize_t
861 ukfs_read(struct ukfs *ukfs, const char *filename, off_t off,
862 	uint8_t *buf, size_t bufsize)
863 {
864 	int fd;
865 	ssize_t xfer = -1; /* XXXgcc */
866 
867 	PRECALL();
868 	fd = rump_sys_open(filename, RUMP_O_RDONLY, 0);
869 	if (fd == -1)
870 		goto out;
871 
872 	xfer = rump_sys_pread(fd, buf, bufsize, off);
873 	rump_sys_close(fd);
874 
875  out:
876 	POSTCALL();
877 	if (fd == -1) {
878 		return -1;
879 	}
880 	return xfer;
881 }
882 
883 /*ARGSUSED*/
884 ssize_t
885 ukfs_read_fd(struct ukfs *ukfs, int fd, off_t off, uint8_t *buf, size_t buflen)
886 {
887 
888 	return rump_sys_pread(fd, buf, buflen, off);
889 }
890 
891 ssize_t
892 ukfs_write(struct ukfs *ukfs, const char *filename, off_t off,
893 	uint8_t *buf, size_t bufsize)
894 {
895 	int fd;
896 	ssize_t xfer = -1; /* XXXgcc */
897 
898 	PRECALL();
899 	fd = rump_sys_open(filename, RUMP_O_WRONLY, 0);
900 	if (fd == -1)
901 		goto out;
902 
903 	/* write and commit */
904 	xfer = rump_sys_pwrite(fd, buf, bufsize, off);
905 	if (xfer > 0)
906 		rump_sys_fsync(fd);
907 
908 	rump_sys_close(fd);
909 
910  out:
911 	POSTCALL();
912 	if (fd == -1) {
913 		return -1;
914 	}
915 	return xfer;
916 }
917 
918 /*ARGSUSED*/
919 ssize_t
920 ukfs_write_fd(struct ukfs *ukfs, int fd, off_t off, uint8_t *buf, size_t buflen,
921 	int dosync)
922 {
923 	ssize_t xfer;
924 
925 	xfer = rump_sys_pwrite(fd, buf, buflen, off);
926 	if (xfer > 0 && dosync)
927 		rump_sys_fsync(fd);
928 
929 	return xfer;
930 }
931 
932 /*ARGSUSED*/
933 int
934 ukfs_close(struct ukfs *ukfs, int fd)
935 {
936 
937 	rump_sys_close(fd);
938 	return 0;
939 }
940 
941 int
942 ukfs_create(struct ukfs *ukfs, const char *filename, mode_t mode)
943 {
944 	int fd;
945 
946 	PRECALL();
947 	fd = rump_sys_open(filename, RUMP_O_WRONLY | RUMP_O_CREAT, mode);
948 	if (fd == -1)
949 		return -1;
950 	rump_sys_close(fd);
951 
952 	POSTCALL();
953 	return 0;
954 }
955 
956 int
957 ukfs_mknod(struct ukfs *ukfs, const char *path, mode_t mode, dev_t dev)
958 {
959 
960 	STDCALL(ukfs, rump_sys_mknod(path, mode, dev));
961 }
962 
963 int
964 ukfs_mkfifo(struct ukfs *ukfs, const char *path, mode_t mode)
965 {
966 
967 	STDCALL(ukfs, rump_sys_mkfifo(path, mode));
968 }
969 
970 int
971 ukfs_mkdir(struct ukfs *ukfs, const char *filename, mode_t mode)
972 {
973 
974 	STDCALL(ukfs, rump_sys_mkdir(filename, mode));
975 }
976 
977 int
978 ukfs_remove(struct ukfs *ukfs, const char *filename)
979 {
980 
981 	STDCALL(ukfs, rump_sys_unlink(filename));
982 }
983 
984 int
985 ukfs_rmdir(struct ukfs *ukfs, const char *filename)
986 {
987 
988 	STDCALL(ukfs, rump_sys_rmdir(filename));
989 }
990 
991 int
992 ukfs_link(struct ukfs *ukfs, const char *filename, const char *f_create)
993 {
994 
995 	STDCALL(ukfs, rump_sys_link(filename, f_create));
996 }
997 
998 int
999 ukfs_symlink(struct ukfs *ukfs, const char *filename, const char *linkname)
1000 {
1001 
1002 	STDCALL(ukfs, rump_sys_symlink(filename, linkname));
1003 }
1004 
1005 ssize_t
1006 ukfs_readlink(struct ukfs *ukfs, const char *filename,
1007 	char *linkbuf, size_t buflen)
1008 {
1009 	ssize_t rv;
1010 
1011 	PRECALL();
1012 	rv = rump_sys_readlink(filename, linkbuf, buflen);
1013 	POSTCALL();
1014 	return rv;
1015 }
1016 
1017 int
1018 ukfs_rename(struct ukfs *ukfs, const char *from, const char *to)
1019 {
1020 
1021 	STDCALL(ukfs, rump_sys_rename(from, to));
1022 }
1023 
1024 int
1025 ukfs_chdir(struct ukfs *ukfs, const char *path)
1026 {
1027 	char *newpath, *oldpath;
1028 	int rv;
1029 
1030 	PRECALL();
1031 	rv = rump_sys_chdir(path);
1032 	if (rv == -1)
1033 		goto out;
1034 
1035 	newpath = malloc(MAXPATHLEN);
1036 	if (rump_sys___getcwd(newpath, MAXPATHLEN) == -1) {
1037 		goto out;
1038 	}
1039 
1040 	pthread_spin_lock(&ukfs->ukfs_spin);
1041 	oldpath = ukfs->ukfs_cwd;
1042 	ukfs->ukfs_cwd = newpath;
1043 	pthread_spin_unlock(&ukfs->ukfs_spin);
1044 	free(oldpath);
1045 
1046  out:
1047 	POSTCALL();
1048 	return rv;
1049 }
1050 
1051 int
1052 ukfs_stat(struct ukfs *ukfs, const char *filename, struct stat *file_stat)
1053 {
1054 	int rv;
1055 
1056 	PRECALL();
1057 	rv = rump_sys_stat(filename, file_stat);
1058 	POSTCALL();
1059 
1060 	return rv;
1061 }
1062 
1063 int
1064 ukfs_lstat(struct ukfs *ukfs, const char *filename, struct stat *file_stat)
1065 {
1066 	int rv;
1067 
1068 	PRECALL();
1069 	rv = rump_sys_lstat(filename, file_stat);
1070 	POSTCALL();
1071 
1072 	return rv;
1073 }
1074 
1075 int
1076 ukfs_chmod(struct ukfs *ukfs, const char *filename, mode_t mode)
1077 {
1078 
1079 	STDCALL(ukfs, rump_sys_chmod(filename, mode));
1080 }
1081 
1082 int
1083 ukfs_lchmod(struct ukfs *ukfs, const char *filename, mode_t mode)
1084 {
1085 
1086 	STDCALL(ukfs, rump_sys_lchmod(filename, mode));
1087 }
1088 
1089 int
1090 ukfs_chown(struct ukfs *ukfs, const char *filename, uid_t uid, gid_t gid)
1091 {
1092 
1093 	STDCALL(ukfs, rump_sys_chown(filename, uid, gid));
1094 }
1095 
1096 int
1097 ukfs_lchown(struct ukfs *ukfs, const char *filename, uid_t uid, gid_t gid)
1098 {
1099 
1100 	STDCALL(ukfs, rump_sys_lchown(filename, uid, gid));
1101 }
1102 
1103 int
1104 ukfs_chflags(struct ukfs *ukfs, const char *filename, u_long flags)
1105 {
1106 
1107 	STDCALL(ukfs, rump_sys_chflags(filename, flags));
1108 }
1109 
1110 int
1111 ukfs_lchflags(struct ukfs *ukfs, const char *filename, u_long flags)
1112 {
1113 
1114 	STDCALL(ukfs, rump_sys_lchflags(filename, flags));
1115 }
1116 
1117 int
1118 ukfs_utimes(struct ukfs *ukfs, const char *filename, const struct timeval *tptr)
1119 {
1120 
1121 	STDCALL(ukfs, rump_sys_utimes(filename, tptr));
1122 }
1123 
1124 int
1125 ukfs_lutimes(struct ukfs *ukfs, const char *filename,
1126 	      const struct timeval *tptr)
1127 {
1128 
1129 	STDCALL(ukfs, rump_sys_lutimes(filename, tptr));
1130 }
1131 
1132 /*
1133  * Dynamic module support
1134  */
1135 
1136 /* load one library */
1137 
1138 /*
1139  * XXX: the dlerror stuff isn't really threadsafe, but then again I
1140  * can't protect against other threads calling dl*() outside of ukfs,
1141  * so just live with it being flimsy
1142  */
1143 int
1144 ukfs_modload(const char *fname)
1145 {
1146 	void *handle;
1147 	const struct modinfo *const *mi_start, *const *mi_end;
1148 	int error;
1149 
1150 	handle = dlopen(fname, RTLD_LAZY|RTLD_GLOBAL);
1151 	if (handle == NULL) {
1152 		const char *dlmsg = dlerror();
1153 		if (strstr(dlmsg, "Undefined symbol"))
1154 			return 0;
1155 		warnx("dlopen %s failed: %s", fname, dlmsg);
1156 		/* XXXerrno */
1157 		return -1;
1158 	}
1159 
1160 	mi_start = dlsym(handle, "__start_link_set_modules");
1161 	mi_end = dlsym(handle, "__stop_link_set_modules");
1162 	if (mi_start && mi_end) {
1163 		error = rump_pub_module_init(mi_start,
1164 		    (size_t)(mi_end-mi_start));
1165 		if (error)
1166 			goto errclose;
1167 		return 1;
1168 	}
1169 	error = EINVAL;
1170 
1171  errclose:
1172 	dlclose(handle);
1173 	errno = error;
1174 	return -1;
1175 }
1176 
1177 struct loadfail {
1178 	char *pname;
1179 
1180 	LIST_ENTRY(loadfail) entries;
1181 };
1182 
1183 #define RUMPFSMOD_PREFIX "librumpfs_"
1184 #define RUMPFSMOD_SUFFIX ".so"
1185 
1186 int
1187 ukfs_modload_dir(const char *dir)
1188 {
1189 	char nbuf[MAXPATHLEN+1], *p;
1190 	struct dirent entry, *result;
1191 	DIR *libdir;
1192 	struct loadfail *lf, *nlf;
1193 	int error, nloaded = 0, redo;
1194 	LIST_HEAD(, loadfail) lfs;
1195 
1196 	libdir = opendir(dir);
1197 	if (libdir == NULL)
1198 		return -1;
1199 
1200 	LIST_INIT(&lfs);
1201 	for (;;) {
1202 		if ((error = readdir_r(libdir, &entry, &result)) != 0)
1203 			break;
1204 		if (!result)
1205 			break;
1206 		if (strncmp(result->d_name, RUMPFSMOD_PREFIX,
1207 		    strlen(RUMPFSMOD_PREFIX)) != 0)
1208 			continue;
1209 		if (((p = strstr(result->d_name, RUMPFSMOD_SUFFIX)) == NULL)
1210 		    || strlen(p) != strlen(RUMPFSMOD_SUFFIX))
1211 			continue;
1212 		strlcpy(nbuf, dir, sizeof(nbuf));
1213 		strlcat(nbuf, "/", sizeof(nbuf));
1214 		strlcat(nbuf, result->d_name, sizeof(nbuf));
1215 		switch (ukfs_modload(nbuf)) {
1216 		case 0:
1217 			lf = malloc(sizeof(*lf));
1218 			if (lf == NULL) {
1219 				error = ENOMEM;
1220 				break;
1221 			}
1222 			lf->pname = strdup(nbuf);
1223 			if (lf->pname == NULL) {
1224 				free(lf);
1225 				error = ENOMEM;
1226 				break;
1227 			}
1228 			LIST_INSERT_HEAD(&lfs, lf, entries);
1229 			break;
1230 		case 1:
1231 			nloaded++;
1232 			break;
1233 		default:
1234 			/* ignore errors */
1235 			break;
1236 		}
1237 	}
1238 	closedir(libdir);
1239 	if (error && nloaded != 0)
1240 		error = 0;
1241 
1242 	/*
1243 	 * El-cheapo dependency calculator.  Just try to load the
1244 	 * modules n times in a loop
1245 	 */
1246 	for (redo = 1; redo;) {
1247 		redo = 0;
1248 		nlf = LIST_FIRST(&lfs);
1249 		while ((lf = nlf) != NULL) {
1250 			nlf = LIST_NEXT(lf, entries);
1251 			if (ukfs_modload(lf->pname) == 1) {
1252 				nloaded++;
1253 				redo = 1;
1254 				LIST_REMOVE(lf, entries);
1255 				free(lf->pname);
1256 				free(lf);
1257 			}
1258 		}
1259 	}
1260 
1261 	while ((lf = LIST_FIRST(&lfs)) != NULL) {
1262 		LIST_REMOVE(lf, entries);
1263 		free(lf->pname);
1264 		free(lf);
1265 	}
1266 
1267 	if (error && nloaded == 0) {
1268 		errno = error;
1269 		return -1;
1270 	}
1271 
1272 	return nloaded;
1273 }
1274 
1275 /* XXX: this code uses definitions from NetBSD, needs rumpdefs */
1276 ssize_t
1277 ukfs_vfstypes(char *buf, size_t buflen)
1278 {
1279 	int mib[3];
1280 	struct sysctlnode q, ans[128];
1281 	size_t alen;
1282 	int i;
1283 
1284 	mib[0] = CTL_VFS;
1285 	mib[1] = VFS_GENERIC;
1286 	mib[2] = CTL_QUERY;
1287 	alen = sizeof(ans);
1288 
1289 	memset(&q, 0, sizeof(q));
1290 	q.sysctl_flags = SYSCTL_VERSION;
1291 
1292 	if (rump_sys___sysctl(mib, 3, ans, &alen, &q, sizeof(q)) == -1) {
1293 		return -1;
1294 	}
1295 
1296 	for (i = 0; i < alen/sizeof(ans[0]); i++)
1297 		if (strcmp("fstypes", ans[i].sysctl_name) == 0)
1298 			break;
1299 	if (i == alen/sizeof(ans[0])) {
1300 		errno = ENXIO;
1301 		return -1;
1302 	}
1303 
1304 	mib[0] = CTL_VFS;
1305 	mib[1] = VFS_GENERIC;
1306 	mib[2] = ans[i].sysctl_num;
1307 
1308 	if (rump_sys___sysctl(mib, 3, buf, &buflen, NULL, 0) == -1) {
1309 		return -1;
1310 	}
1311 
1312 	return buflen;
1313 }
1314 
1315 /*
1316  * Utilities
1317  */
1318 static int
1319 builddirs(const char *pathname, mode_t mode,
1320 	int (*mkdirfn)(struct ukfs *, const char *, mode_t), struct ukfs *fs)
1321 {
1322 	char *f1, *f2;
1323 	int rv;
1324 	mode_t mask;
1325 	bool end;
1326 
1327 	/*ukfs_umask((mask = ukfs_umask(0)));*/
1328 	umask((mask = umask(0)));
1329 
1330 	f1 = f2 = strdup(pathname);
1331 	if (f1 == NULL) {
1332 		errno = ENOMEM;
1333 		return -1;
1334 	}
1335 
1336 	end = false;
1337 	for (;;) {
1338 		/* find next component */
1339 		f2 += strspn(f2, "/");
1340 		f2 += strcspn(f2, "/");
1341 		if (*f2 == '\0')
1342 			end = true;
1343 		else
1344 			*f2 = '\0';
1345 
1346 		rv = mkdirfn(fs, f1, mode & ~mask);
1347 		if (errno == EEXIST)
1348 			rv = 0;
1349 
1350 		if (rv == -1 || *f2 != '\0' || end)
1351 			break;
1352 
1353 		*f2 = '/';
1354 	}
1355 
1356 	free(f1);
1357 
1358 	return rv;
1359 }
1360 
1361 int
1362 ukfs_util_builddirs(struct ukfs *ukfs, const char *pathname, mode_t mode)
1363 {
1364 
1365 	return builddirs(pathname, mode, ukfs_mkdir, ukfs);
1366 }
1367