xref: /netbsd-src/lib/librumpuser/rumpuser.c (revision 7788a0781fe6ff2cce37368b4578a7ade0850cb1)
1 /*	$NetBSD: rumpuser.c,v 1.53 2013/05/15 15:57:01 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2007-2010 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include "rumpuser_port.h"
29 
30 #if !defined(lint)
31 __RCSID("$NetBSD: rumpuser.c,v 1.53 2013/05/15 15:57:01 pooka Exp $");
32 #endif /* !lint */
33 
34 #include <sys/ioctl.h>
35 #include <sys/mman.h>
36 #include <sys/uio.h>
37 #include <sys/stat.h>
38 #include <sys/time.h>
39 
40 #ifdef __NetBSD__
41 #include <sys/disk.h>
42 #include <sys/disklabel.h>
43 #include <sys/dkio.h>
44 #endif
45 
46 #if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
47 #include <sys/sysctl.h>
48 #endif
49 
50 #include <assert.h>
51 #include <errno.h>
52 #include <fcntl.h>
53 #include <netdb.h>
54 #include <signal.h>
55 #include <stdarg.h>
56 #include <stdint.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <time.h>
61 #include <unistd.h>
62 
63 #include <rump/rumpuser.h>
64 
65 #include "rumpuser_int.h"
66 
67 struct rumpuser_hyperup rumpuser__hyp;
68 
69 int
70 rumpuser_init(int version, const struct rumpuser_hyperup *hyp)
71 {
72 
73 	if (version != RUMPUSER_VERSION) {
74 		fprintf(stderr, "rumpuser mismatch, kern: %d, hypervisor %d\n",
75 		    version, RUMPUSER_VERSION);
76 		return 1;
77 	}
78 
79 #ifdef RUMPUSER_USE_DEVRANDOM
80 	uint32_t rv;
81 	int fd;
82 
83 	if ((fd = open("/dev/urandom", O_RDONLY)) == -1) {
84 		srandom(time(NULL));
85 	} else {
86 		if (read(fd, &rv, sizeof(rv)) != sizeof(rv))
87 			srandom(time(NULL));
88 		else
89 			srandom(rv);
90 		close(fd);
91 	}
92 #endif
93 
94 	rumpuser__thrinit();
95 	rumpuser__hyp = *hyp;
96 
97 	return 0;
98 }
99 
100 int
101 rumpuser_getfileinfo(const char *path, uint64_t *sizep, int *ftp)
102 {
103 	struct stat sb;
104 	uint64_t size = 0;
105 	int needsdev = 0, rv = 0, ft = 0;
106 	int fd = -1;
107 
108 	if (stat(path, &sb) == -1) {
109 		rv = errno;
110 		goto out;
111 	}
112 
113 	switch (sb.st_mode & S_IFMT) {
114 	case S_IFDIR:
115 		ft = RUMPUSER_FT_DIR;
116 		break;
117 	case S_IFREG:
118 		ft = RUMPUSER_FT_REG;
119 		break;
120 	case S_IFBLK:
121 		ft = RUMPUSER_FT_BLK;
122 		needsdev = 1;
123 		break;
124 	case S_IFCHR:
125 		ft = RUMPUSER_FT_CHR;
126 		needsdev = 1;
127 		break;
128 	default:
129 		ft = RUMPUSER_FT_OTHER;
130 		break;
131 	}
132 
133 	if (!needsdev) {
134 		size = sb.st_size;
135 	} else if (sizep) {
136 		/*
137 		 * Welcome to the jungle.  Of course querying the kernel
138 		 * for a device partition size is supposed to be far from
139 		 * trivial.  On NetBSD we use ioctl.  On $other platform
140 		 * we have a problem.  We try "the lseek trick" and just
141 		 * fail if that fails.  Platform specific code can later
142 		 * be written here if appropriate.
143 		 *
144 		 * On NetBSD we hope and pray that for block devices nobody
145 		 * else is holding them open, because otherwise the kernel
146 		 * will not permit us to open it.  Thankfully, this is
147 		 * usually called only in bootstrap and then we can
148 		 * forget about it.
149 		 */
150 #ifndef __NetBSD__
151 		off_t off;
152 
153 		fd = open(path, O_RDONLY);
154 		if (fd == -1) {
155 			rv = errno;
156 			goto out;
157 		}
158 
159 		off = lseek(fd, 0, SEEK_END);
160 		if (off != 0) {
161 			size = off;
162 			goto out;
163 		}
164 		fprintf(stderr, "error: device size query not implemented on "
165 		    "this platform\n");
166 		rv = EOPNOTSUPP;
167 		goto out;
168 #else
169 		struct disklabel lab;
170 		struct partition *parta;
171 		struct dkwedge_info dkw;
172 
173 		fd = open(path, O_RDONLY);
174 		if (fd == -1) {
175 			rv = errno;
176 			goto out;
177 		}
178 
179 		if (ioctl(fd, DIOCGDINFO, &lab) == 0) {
180 			parta = &lab.d_partitions[DISKPART(sb.st_rdev)];
181 			size = (uint64_t)lab.d_secsize * parta->p_size;
182 			goto out;
183 		}
184 
185 		if (ioctl(fd, DIOCGWEDGEINFO, &dkw) == 0) {
186 			/*
187 			 * XXX: should use DIOCGDISKINFO to query
188 			 * sector size, but that requires proplib,
189 			 * so just don't bother for now.  it's nice
190 			 * that something as difficult as figuring out
191 			 * a partition's size has been made so easy.
192 			 */
193 			size = dkw.dkw_size << DEV_BSHIFT;
194 			goto out;
195 		}
196 
197 		rv = errno;
198 #endif /* __NetBSD__ */
199 	}
200 
201  out:
202 	if (rv == 0 && sizep)
203 		*sizep = size;
204 	if (rv == 0 && ftp)
205 		*ftp = ft;
206 	if (fd != -1)
207 		close(fd);
208 
209 	ET(rv);
210 }
211 
212 int
213 rumpuser_malloc(size_t howmuch, int alignment, void **memp)
214 {
215 	void *mem;
216 	int rv;
217 
218 	if (alignment == 0)
219 		alignment = sizeof(void *);
220 
221 	rv = posix_memalign(&mem, (size_t)alignment, howmuch);
222 	if (__predict_false(rv != 0)) {
223 		if (rv == EINVAL) {
224 			printf("rumpuser_malloc: invalid alignment %d\n",
225 			    alignment);
226 			abort();
227 		}
228 	}
229 
230 	*memp = mem;
231 	ET(rv);
232 }
233 
234 /*ARGSUSED1*/
235 void
236 rumpuser_free(void *ptr, size_t size)
237 {
238 
239 	free(ptr);
240 }
241 
242 int
243 rumpuser_anonmmap(void *prefaddr, size_t size, int alignbit,
244 	int exec, void **memp)
245 {
246 	void *mem;
247 	int prot, rv;
248 
249 #ifndef MAP_ALIGNED
250 #define MAP_ALIGNED(a) 0
251 	if (alignbit)
252 		fprintf(stderr, "rumpuser_anonmmap: warning, requested "
253 		    "alignment not supported by hypervisor\n");
254 #endif
255 
256 	prot = PROT_READ|PROT_WRITE;
257 	if (exec)
258 		prot |= PROT_EXEC;
259 	mem = mmap(prefaddr, size, prot,
260 	    MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(alignbit), -1, 0);
261 	if (mem == MAP_FAILED) {
262 		rv = errno;
263 	} else {
264 		*memp = mem;
265 		rv = 0;
266 	}
267 
268 	ET(rv);
269 }
270 
271 void
272 rumpuser_unmap(void *addr, size_t len)
273 {
274 
275 	munmap(addr, len);
276 }
277 
278 int
279 rumpuser_open(const char *path, int ruflags, int *fdp)
280 {
281 	int fd, flags, rv;
282 
283 	switch (ruflags & RUMPUSER_OPEN_ACCMODE) {
284 	case RUMPUSER_OPEN_RDONLY:
285 		flags = O_RDONLY;
286 		break;
287 	case RUMPUSER_OPEN_WRONLY:
288 		flags = O_WRONLY;
289 		break;
290 	case RUMPUSER_OPEN_RDWR:
291 		flags = O_RDWR;
292 		break;
293 	default:
294 		rv = EINVAL;
295 		goto out;
296 	}
297 
298 #define TESTSET(_ru_, _h_) if (ruflags & _ru_) flags |= _h_;
299 	TESTSET(RUMPUSER_OPEN_CREATE, O_CREAT);
300 	TESTSET(RUMPUSER_OPEN_EXCL, O_EXCL);
301 #undef TESTSET
302 
303 	KLOCK_WRAP(fd = open(path, flags, 0644));
304 	if (fd == -1) {
305 		rv = errno;
306 	} else {
307 		*fdp = fd;
308 		rv = 0;
309 	}
310 
311  out:
312 	ET(rv);
313 }
314 
315 int
316 rumpuser_close(int fd)
317 {
318 	int nlocks;
319 
320 	rumpkern_unsched(&nlocks, NULL);
321 	fsync(fd);
322 	close(fd);
323 	rumpkern_sched(nlocks, NULL);
324 
325 	ET(0);
326 }
327 
328 /*
329  * Assume "struct rumpuser_iovec" and "struct iovec" are the same.
330  * If you encounter POSIX platforms where they aren't, add some
331  * translation for iovlen > 1.
332  */
333 int
334 rumpuser_iovread(int fd, struct rumpuser_iovec *ruiov, size_t iovlen,
335 	int64_t roff, size_t *retp)
336 {
337 	struct iovec *iov = (struct iovec *)ruiov;
338 	off_t off = (off_t)roff;
339 	ssize_t nn;
340 	int rv;
341 
342 	if (off == RUMPUSER_IOV_NOSEEK) {
343 		KLOCK_WRAP(nn = readv(fd, iov, iovlen));
344 	} else {
345 		int nlocks;
346 
347 		rumpkern_unsched(&nlocks, NULL);
348 		if (lseek(fd, off, SEEK_SET) == off) {
349 			nn = readv(fd, iov, iovlen);
350 		} else {
351 			nn = -1;
352 		}
353 		rumpkern_sched(nlocks, NULL);
354 	}
355 
356 	if (nn == -1) {
357 		rv = errno;
358 	} else {
359 		*retp = (size_t)nn;
360 		rv = 0;
361 	}
362 
363 	ET(rv);
364 }
365 
366 int
367 rumpuser_iovwrite(int fd, const struct rumpuser_iovec *ruiov, size_t iovlen,
368 	int64_t roff, size_t *retp)
369 {
370 	const struct iovec *iov = (const struct iovec *)ruiov;
371 	off_t off = (off_t)roff;
372 	ssize_t nn;
373 	int rv;
374 
375 	if (off == RUMPUSER_IOV_NOSEEK) {
376 		KLOCK_WRAP(nn = writev(fd, iov, iovlen));
377 	} else {
378 		int nlocks;
379 
380 		rumpkern_unsched(&nlocks, NULL);
381 		if (lseek(fd, off, SEEK_SET) == off) {
382 			nn = writev(fd, iov, iovlen);
383 		} else {
384 			nn = -1;
385 		}
386 		rumpkern_sched(nlocks, NULL);
387 	}
388 
389 	if (nn == -1) {
390 		rv = errno;
391 	} else {
392 		*retp = (size_t)nn;
393 		rv = 0;
394 	}
395 
396 	ET(rv);
397 }
398 
399 int
400 rumpuser_syncfd(int fd, int flags, uint64_t start, uint64_t len)
401 {
402 	int rv = 0;
403 
404 	/*
405 	 * For now, assume fd is regular file and does not care
406 	 * about read syncing
407 	 */
408 	if ((flags & RUMPUSER_SYNCFD_BOTH) == 0) {
409 		rv = EINVAL;
410 		goto out;
411 	}
412 	if ((flags & RUMPUSER_SYNCFD_WRITE) == 0) {
413 		rv = 0;
414 		goto out;
415 	}
416 
417 #ifdef __NetBSD__
418 	{
419 	int fsflags = FDATASYNC;
420 
421 	if (fsflags & RUMPUSER_SYNCFD_SYNC)
422 		fsflags |= FDISKSYNC;
423 	if (fsync_range(fd, fsflags, start, len) == -1)
424 		rv = errno;
425 	}
426 #else
427 	/* el-simplo */
428 	if (fsync(fd) == -1)
429 		rv = errno;
430 #endif
431 
432  out:
433 	ET(rv);
434 }
435 
436 int
437 rumpuser_clock_gettime(int enum_rumpclock, int64_t *sec, long *nsec)
438 {
439 	enum rumpclock rclk = enum_rumpclock;
440 	struct timespec ts;
441 	clockid_t clk;
442 	int rv;
443 
444 	switch (rclk) {
445 	case RUMPUSER_CLOCK_RELWALL:
446 		clk = CLOCK_REALTIME;
447 		break;
448 	case RUMPUSER_CLOCK_ABSMONO:
449 #ifdef HAVE_CLOCK_NANOSLEEP
450 		clk = CLOCK_MONOTONIC;
451 #else
452 		clk = CLOCK_REALTIME;
453 #endif
454 		break;
455 	default:
456 		abort();
457 	}
458 
459 	if (clock_gettime(clk, &ts) == -1) {
460 		rv = errno;
461 	} else {
462 		*sec = ts.tv_sec;
463 		*nsec = ts.tv_nsec;
464 		rv = 0;
465 	}
466 
467 	ET(rv);
468 }
469 
470 int
471 rumpuser_clock_sleep(int enum_rumpclock, int64_t sec, long nsec)
472 {
473 	enum rumpclock rclk = enum_rumpclock;
474 	struct timespec rqt, rmt;
475 	int nlocks;
476 	int rv;
477 
478 	rumpkern_unsched(&nlocks, NULL);
479 
480 	/*LINTED*/
481 	rqt.tv_sec = sec;
482 	/*LINTED*/
483 	rqt.tv_nsec = nsec;
484 
485 	switch (rclk) {
486 	case RUMPUSER_CLOCK_RELWALL:
487 		do {
488 			rv = nanosleep(&rqt, &rmt);
489 			rqt = rmt;
490 		} while (rv == -1 && errno == EINTR);
491 		if (rv == -1) {
492 			rv = errno;
493 		}
494 		break;
495 	case RUMPUSER_CLOCK_ABSMONO:
496 		do {
497 #ifdef HAVE_CLOCK_NANOSLEEP
498 			rv = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME,
499 			    &rqt, NULL);
500 #else
501 			/* le/la/der/die/das sigh. timevalspec tailspin */
502 			struct timespec ts, tsr;
503 			clock_gettime(CLOCK_REALTIME, &ts);
504 			if (ts.tv_sec == rqt.tv_sec ?
505 			    ts.tv_nsec > rqt.tv_nsec : ts.tv_sec > rqt.tv_sec) {
506 				rv = 0;
507 			} else {
508 				tsr.tv_sec = rqt.tv_sec - ts.tv_sec;
509 				tsr.tv_nsec = rqt.tv_nsec - ts.tv_nsec;
510 				if (tsr.tv_nsec < 0) {
511 					tsr.tv_sec--;
512 					tsr.tv_nsec += 1000*1000*1000;
513 				}
514 				rv = nanosleep(&tsr, NULL);
515 			}
516 #endif
517 		} while (rv == -1 && errno == EINTR);
518 		if (rv == -1) {
519 			rv = errno;
520 		}
521 		break;
522 	default:
523 		abort();
524 	}
525 
526 	rumpkern_sched(nlocks, NULL);
527 
528 	ET(rv);
529 }
530 
531 static int
532 gethostncpu(void)
533 {
534 	int ncpu = 1;
535 
536 #if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
537 	size_t sz = sizeof(ncpu);
538 
539 	sysctlbyname("hw.ncpu", &ncpu, &sz, NULL, 0);
540 #elif defined(__linux__) || defined(__CYGWIN__)
541 	FILE *fp;
542 	char *line = NULL;
543 	size_t n = 0;
544 
545 	/* If anyone knows a better way, I'm all ears */
546 	if ((fp = fopen("/proc/cpuinfo", "r")) != NULL) {
547 		ncpu = 0;
548 		while (getline(&line, &n, fp) != -1) {
549 			if (strncmp(line,
550 			    "processor", sizeof("processor")-1) == 0)
551 			    	ncpu++;
552 		}
553 		if (ncpu == 0)
554 			ncpu = 1;
555 		free(line);
556 		fclose(fp);
557 	}
558 #elif __sun__
559 	/* XXX: this is just a rough estimate ... */
560 	ncpu = sysconf(_SC_NPROCESSORS_ONLN);
561 #endif
562 
563 	return ncpu;
564 }
565 
566 int
567 rumpuser_getparam(const char *name, void *buf, size_t blen)
568 {
569 	int rv;
570 
571 	if (strcmp(name, RUMPUSER_PARAM_NCPU) == 0) {
572 		int ncpu;
573 
574 		if (getenv_r("RUMP_NCPU", buf, blen) == -1) {
575 			ncpu = gethostncpu();
576 			snprintf(buf, blen, "%d", ncpu);
577 		}
578 		rv = 0;
579 	} else if (strcmp(name, RUMPUSER_PARAM_HOSTNAME) == 0) {
580 		char tmp[MAXHOSTNAMELEN];
581 
582 		if (gethostname(tmp, sizeof(tmp)) == -1) {
583 			snprintf(buf, blen, "rump-%05d", (int)getpid());
584 		} else {
585 			snprintf(buf, blen, "rump-%05d.%s",
586 			    (int)getpid(), tmp);
587 		}
588 		rv = 0;
589 	} else if (*name == '_') {
590 		rv = EINVAL;
591 	} else {
592 		if (getenv_r(name, buf, blen) == -1)
593 			rv = errno;
594 		else
595 			rv = 0;
596 	}
597 
598 	ET(rv);
599 }
600 
601 void
602 rumpuser_putchar(int c)
603 {
604 
605 	putchar(c);
606 }
607 
608 void
609 rumpuser_exit(int rv)
610 {
611 
612 	if (rv == RUMPUSER_PANIC)
613 		abort();
614 	else
615 		exit(rv);
616 }
617 
618 void
619 rumpuser_seterrno(int error)
620 {
621 
622 	errno = error;
623 }
624 
625 /*
626  * This is meant for safe debugging prints from the kernel.
627  */
628 void
629 rumpuser_dprintf(const char *format, ...)
630 {
631 	va_list ap;
632 
633 	va_start(ap, format);
634 	vfprintf(stderr, format, ap);
635 	va_end(ap);
636 }
637 
638 int
639 rumpuser_kill(int64_t pid, int sig)
640 {
641 	int rv;
642 
643 #ifdef __NetBSD__
644 	int error;
645 
646 	if (pid == RUMPUSER_PID_SELF) {
647 		error = raise(sig);
648 	} else {
649 		error = kill((pid_t)pid, sig);
650 	}
651 	if (error == -1)
652 		rv = errno;
653 	else
654 		rv = 0;
655 #else
656 	/* XXXfixme: signal numbers may not match on non-NetBSD */
657 	rv = EOPNOTSUPP;
658 #endif
659 
660 	ET(rv);
661 }
662 
663 int
664 rumpuser_getrandom(void *buf, size_t buflen, int flags, size_t *retp)
665 {
666 	size_t origlen = buflen;
667 	uint32_t *p = buf;
668 	uint32_t tmp;
669 	int chunk;
670 
671 	do {
672 		chunk = buflen < 4 ? buflen : 4; /* portable MIN ... */
673 		tmp = RUMPUSER_RANDOM();
674 		memcpy(p, &tmp, chunk);
675 		p++;
676 		buflen -= chunk;
677 	} while (chunk);
678 
679 	*retp = origlen;
680 	ET(0);
681 }
682