xref: /netbsd-src/lib/librumpuser/rumpuser.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /*	$NetBSD: rumpuser.c,v 1.59 2014/04/02 13:54:42 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2007-2010 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include "rumpuser_port.h"
29 
30 #if !defined(lint)
31 __RCSID("$NetBSD: rumpuser.c,v 1.59 2014/04/02 13:54:42 pooka Exp $");
32 #endif /* !lint */
33 
34 #include <sys/ioctl.h>
35 #include <sys/mman.h>
36 #include <sys/uio.h>
37 #include <sys/stat.h>
38 #include <sys/time.h>
39 
40 #ifdef __NetBSD__
41 #include <sys/disk.h>
42 #include <sys/disklabel.h>
43 #include <sys/dkio.h>
44 #endif
45 
46 #if defined(__NetBSD__) || defined(__FreeBSD__) || \
47     defined(__DragonFly__) || defined(__APPLE__)
48 #define	__BSD__
49 #endif
50 
51 #if defined(__BSD__)
52 #include <sys/sysctl.h>
53 #endif
54 
55 #include <assert.h>
56 #include <errno.h>
57 #include <fcntl.h>
58 #include <netdb.h>
59 #include <signal.h>
60 #include <stdarg.h>
61 #include <stdint.h>
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <string.h>
65 #include <time.h>
66 #include <unistd.h>
67 
68 #include <rump/rumpuser.h>
69 
70 #include "rumpuser_int.h"
71 
72 struct rumpuser_hyperup rumpuser__hyp;
73 
74 int
75 rumpuser_init(int version, const struct rumpuser_hyperup *hyp)
76 {
77 
78 	if (version != RUMPUSER_VERSION) {
79 		fprintf(stderr, "rumpuser mismatch, kern: %d, hypervisor %d\n",
80 		    version, RUMPUSER_VERSION);
81 		return 1;
82 	}
83 
84 #ifdef RUMPUSER_USE_DEVRANDOM
85 	uint32_t rv;
86 	int fd;
87 
88 	if ((fd = open("/dev/urandom", O_RDONLY)) == -1) {
89 		srandom(time(NULL));
90 	} else {
91 		if (read(fd, &rv, sizeof(rv)) != sizeof(rv))
92 			srandom(time(NULL));
93 		else
94 			srandom(rv);
95 		close(fd);
96 	}
97 #endif
98 
99 	rumpuser__thrinit();
100 	rumpuser__hyp = *hyp;
101 
102 	return 0;
103 }
104 
105 int
106 rumpuser_getfileinfo(const char *path, uint64_t *sizep, int *ftp)
107 {
108 	struct stat sb;
109 	uint64_t size = 0;
110 	int needsdev = 0, rv = 0, ft = 0;
111 	int fd = -1;
112 
113 	if (stat(path, &sb) == -1) {
114 		rv = errno;
115 		goto out;
116 	}
117 
118 	switch (sb.st_mode & S_IFMT) {
119 	case S_IFDIR:
120 		ft = RUMPUSER_FT_DIR;
121 		break;
122 	case S_IFREG:
123 		ft = RUMPUSER_FT_REG;
124 		break;
125 	case S_IFBLK:
126 		ft = RUMPUSER_FT_BLK;
127 		needsdev = 1;
128 		break;
129 	case S_IFCHR:
130 		ft = RUMPUSER_FT_CHR;
131 		needsdev = 1;
132 		break;
133 	default:
134 		ft = RUMPUSER_FT_OTHER;
135 		break;
136 	}
137 
138 	if (!needsdev) {
139 		size = sb.st_size;
140 	} else if (sizep) {
141 		/*
142 		 * Welcome to the jungle.  Of course querying the kernel
143 		 * for a device partition size is supposed to be far from
144 		 * trivial.  On NetBSD we use ioctl.  On $other platform
145 		 * we have a problem.  We try "the lseek trick" and just
146 		 * fail if that fails.  Platform specific code can later
147 		 * be written here if appropriate.
148 		 *
149 		 * On NetBSD we hope and pray that for block devices nobody
150 		 * else is holding them open, because otherwise the kernel
151 		 * will not permit us to open it.  Thankfully, this is
152 		 * usually called only in bootstrap and then we can
153 		 * forget about it.
154 		 */
155 #ifndef __NetBSD__
156 		off_t off;
157 
158 		fd = open(path, O_RDONLY);
159 		if (fd == -1) {
160 			rv = errno;
161 			goto out;
162 		}
163 
164 		off = lseek(fd, 0, SEEK_END);
165 		if (off != 0) {
166 			size = off;
167 			goto out;
168 		}
169 		fprintf(stderr, "error: device size query not implemented on "
170 		    "this platform\n");
171 		rv = EOPNOTSUPP;
172 		goto out;
173 #else
174 		struct disklabel lab;
175 		struct partition *parta;
176 		struct dkwedge_info dkw;
177 
178 		fd = open(path, O_RDONLY);
179 		if (fd == -1) {
180 			rv = errno;
181 			goto out;
182 		}
183 
184 		if (ioctl(fd, DIOCGDINFO, &lab) == 0) {
185 			parta = &lab.d_partitions[DISKPART(sb.st_rdev)];
186 			size = (uint64_t)lab.d_secsize * parta->p_size;
187 			goto out;
188 		}
189 
190 		if (ioctl(fd, DIOCGWEDGEINFO, &dkw) == 0) {
191 			/*
192 			 * XXX: should use DIOCGDISKINFO to query
193 			 * sector size, but that requires proplib,
194 			 * so just don't bother for now.  it's nice
195 			 * that something as difficult as figuring out
196 			 * a partition's size has been made so easy.
197 			 */
198 			size = dkw.dkw_size << DEV_BSHIFT;
199 			goto out;
200 		}
201 
202 		rv = errno;
203 #endif /* __NetBSD__ */
204 	}
205 
206  out:
207 	if (rv == 0 && sizep)
208 		*sizep = size;
209 	if (rv == 0 && ftp)
210 		*ftp = ft;
211 	if (fd != -1)
212 		close(fd);
213 
214 	ET(rv);
215 }
216 
217 int
218 rumpuser_malloc(size_t howmuch, int alignment, void **memp)
219 {
220 	void *mem = NULL;
221 	int rv;
222 
223 	if (alignment == 0)
224 		alignment = sizeof(void *);
225 
226 	rv = posix_memalign(&mem, (size_t)alignment, howmuch);
227 	if (__predict_false(rv != 0)) {
228 		if (rv == EINVAL) {
229 			printf("rumpuser_malloc: invalid alignment %d\n",
230 			    alignment);
231 			abort();
232 		}
233 	}
234 
235 	*memp = mem;
236 	ET(rv);
237 }
238 
239 /*ARGSUSED1*/
240 void
241 rumpuser_free(void *ptr, size_t size)
242 {
243 
244 	free(ptr);
245 }
246 
247 int
248 rumpuser_anonmmap(void *prefaddr, size_t size, int alignbit,
249 	int exec, void **memp)
250 {
251 	void *mem;
252 	int prot, rv;
253 
254 #ifndef MAP_ALIGNED
255 #define MAP_ALIGNED(a) 0
256 	if (alignbit)
257 		fprintf(stderr, "rumpuser_anonmmap: warning, requested "
258 		    "alignment not supported by hypervisor\n");
259 #endif
260 
261 	prot = PROT_READ|PROT_WRITE;
262 	if (exec)
263 		prot |= PROT_EXEC;
264 	mem = mmap(prefaddr, size, prot,
265 	    MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(alignbit), -1, 0);
266 	if (mem == MAP_FAILED) {
267 		rv = errno;
268 	} else {
269 		*memp = mem;
270 		rv = 0;
271 	}
272 
273 	ET(rv);
274 }
275 
276 void
277 rumpuser_unmap(void *addr, size_t len)
278 {
279 
280 	munmap(addr, len);
281 }
282 
283 int
284 rumpuser_open(const char *path, int ruflags, int *fdp)
285 {
286 	int fd, flags, rv;
287 
288 	switch (ruflags & RUMPUSER_OPEN_ACCMODE) {
289 	case RUMPUSER_OPEN_RDONLY:
290 		flags = O_RDONLY;
291 		break;
292 	case RUMPUSER_OPEN_WRONLY:
293 		flags = O_WRONLY;
294 		break;
295 	case RUMPUSER_OPEN_RDWR:
296 		flags = O_RDWR;
297 		break;
298 	default:
299 		rv = EINVAL;
300 		goto out;
301 	}
302 
303 #define TESTSET(_ru_, _h_) if (ruflags & _ru_) flags |= _h_;
304 	TESTSET(RUMPUSER_OPEN_CREATE, O_CREAT);
305 	TESTSET(RUMPUSER_OPEN_EXCL, O_EXCL);
306 #undef TESTSET
307 
308 	KLOCK_WRAP(fd = open(path, flags, 0644));
309 	if (fd == -1) {
310 		rv = errno;
311 	} else {
312 		*fdp = fd;
313 		rv = 0;
314 	}
315 
316  out:
317 	ET(rv);
318 }
319 
320 int
321 rumpuser_close(int fd)
322 {
323 	int nlocks;
324 
325 	rumpkern_unsched(&nlocks, NULL);
326 	fsync(fd);
327 	close(fd);
328 	rumpkern_sched(nlocks, NULL);
329 
330 	ET(0);
331 }
332 
333 /*
334  * Assume "struct rumpuser_iovec" and "struct iovec" are the same.
335  * If you encounter POSIX platforms where they aren't, add some
336  * translation for iovlen > 1.
337  */
338 int
339 rumpuser_iovread(int fd, struct rumpuser_iovec *ruiov, size_t iovlen,
340 	int64_t roff, size_t *retp)
341 {
342 	struct iovec *iov = (struct iovec *)ruiov;
343 	off_t off = (off_t)roff;
344 	ssize_t nn;
345 	int rv;
346 
347 	if (off == RUMPUSER_IOV_NOSEEK) {
348 		KLOCK_WRAP(nn = readv(fd, iov, iovlen));
349 	} else {
350 		int nlocks;
351 
352 		rumpkern_unsched(&nlocks, NULL);
353 		if (lseek(fd, off, SEEK_SET) == off) {
354 			nn = readv(fd, iov, iovlen);
355 		} else {
356 			nn = -1;
357 		}
358 		rumpkern_sched(nlocks, NULL);
359 	}
360 
361 	if (nn == -1) {
362 		rv = errno;
363 	} else {
364 		*retp = (size_t)nn;
365 		rv = 0;
366 	}
367 
368 	ET(rv);
369 }
370 
371 int
372 rumpuser_iovwrite(int fd, const struct rumpuser_iovec *ruiov, size_t iovlen,
373 	int64_t roff, size_t *retp)
374 {
375 	const struct iovec *iov = (const struct iovec *)ruiov;
376 	off_t off = (off_t)roff;
377 	ssize_t nn;
378 	int rv;
379 
380 	if (off == RUMPUSER_IOV_NOSEEK) {
381 		KLOCK_WRAP(nn = writev(fd, iov, iovlen));
382 	} else {
383 		int nlocks;
384 
385 		rumpkern_unsched(&nlocks, NULL);
386 		if (lseek(fd, off, SEEK_SET) == off) {
387 			nn = writev(fd, iov, iovlen);
388 		} else {
389 			nn = -1;
390 		}
391 		rumpkern_sched(nlocks, NULL);
392 	}
393 
394 	if (nn == -1) {
395 		rv = errno;
396 	} else {
397 		*retp = (size_t)nn;
398 		rv = 0;
399 	}
400 
401 	ET(rv);
402 }
403 
404 int
405 rumpuser_syncfd(int fd, int flags, uint64_t start, uint64_t len)
406 {
407 	int rv = 0;
408 
409 	/*
410 	 * For now, assume fd is regular file and does not care
411 	 * about read syncing
412 	 */
413 	if ((flags & RUMPUSER_SYNCFD_BOTH) == 0) {
414 		rv = EINVAL;
415 		goto out;
416 	}
417 	if ((flags & RUMPUSER_SYNCFD_WRITE) == 0) {
418 		rv = 0;
419 		goto out;
420 	}
421 
422 #ifdef __NetBSD__
423 	{
424 	int fsflags = FDATASYNC;
425 
426 	if (fsflags & RUMPUSER_SYNCFD_SYNC)
427 		fsflags |= FDISKSYNC;
428 	if (fsync_range(fd, fsflags, start, len) == -1)
429 		rv = errno;
430 	}
431 #else
432 	/* el-simplo */
433 	if (fsync(fd) == -1)
434 		rv = errno;
435 #endif
436 
437  out:
438 	ET(rv);
439 }
440 
441 int
442 rumpuser_clock_gettime(int enum_rumpclock, int64_t *sec, long *nsec)
443 {
444 	enum rumpclock rclk = enum_rumpclock;
445 	struct timespec ts;
446 	clockid_t clk;
447 	int rv;
448 
449 	switch (rclk) {
450 	case RUMPUSER_CLOCK_RELWALL:
451 		clk = CLOCK_REALTIME;
452 		break;
453 	case RUMPUSER_CLOCK_ABSMONO:
454 #ifdef HAVE_CLOCK_NANOSLEEP
455 		clk = CLOCK_MONOTONIC;
456 #else
457 		clk = CLOCK_REALTIME;
458 #endif
459 		break;
460 	default:
461 		abort();
462 	}
463 
464 	if (clock_gettime(clk, &ts) == -1) {
465 		rv = errno;
466 	} else {
467 		*sec = ts.tv_sec;
468 		*nsec = ts.tv_nsec;
469 		rv = 0;
470 	}
471 
472 	ET(rv);
473 }
474 
475 int
476 rumpuser_clock_sleep(int enum_rumpclock, int64_t sec, long nsec)
477 {
478 	enum rumpclock rclk = enum_rumpclock;
479 	struct timespec rqt, rmt;
480 	int nlocks;
481 	int rv;
482 
483 	rumpkern_unsched(&nlocks, NULL);
484 
485 	/*LINTED*/
486 	rqt.tv_sec = sec;
487 	/*LINTED*/
488 	rqt.tv_nsec = nsec;
489 
490 	switch (rclk) {
491 	case RUMPUSER_CLOCK_RELWALL:
492 		do {
493 			rv = nanosleep(&rqt, &rmt);
494 			rqt = rmt;
495 		} while (rv == -1 && errno == EINTR);
496 		if (rv == -1) {
497 			rv = errno;
498 		}
499 		break;
500 	case RUMPUSER_CLOCK_ABSMONO:
501 		do {
502 #ifdef HAVE_CLOCK_NANOSLEEP
503 			rv = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME,
504 			    &rqt, NULL);
505 #else
506 			/* le/la/der/die/das sigh. timevalspec tailspin */
507 			struct timespec ts, tsr;
508 			clock_gettime(CLOCK_REALTIME, &ts);
509 			if (ts.tv_sec == rqt.tv_sec ?
510 			    ts.tv_nsec > rqt.tv_nsec : ts.tv_sec > rqt.tv_sec) {
511 				rv = 0;
512 			} else {
513 				tsr.tv_sec = rqt.tv_sec - ts.tv_sec;
514 				tsr.tv_nsec = rqt.tv_nsec - ts.tv_nsec;
515 				if (tsr.tv_nsec < 0) {
516 					tsr.tv_sec--;
517 					tsr.tv_nsec += 1000*1000*1000;
518 				}
519 				rv = nanosleep(&tsr, NULL);
520 			}
521 #endif
522 		} while (rv == -1 && errno == EINTR);
523 		if (rv == -1) {
524 			rv = errno;
525 		}
526 		break;
527 	default:
528 		abort();
529 	}
530 
531 	rumpkern_sched(nlocks, NULL);
532 
533 	ET(rv);
534 }
535 
536 static int
537 gethostncpu(void)
538 {
539 	int ncpu = 1; /* unknown, really */
540 
541 #ifdef _SC_NPROCESSORS_ONLN
542 	ncpu = sysconf(_SC_NPROCESSORS_ONLN);
543 #endif
544 
545 	return ncpu;
546 }
547 
548 int
549 rumpuser_getparam(const char *name, void *buf, size_t blen)
550 {
551 	int rv;
552 
553 	if (strcmp(name, RUMPUSER_PARAM_NCPU) == 0) {
554 		int ncpu;
555 
556 		if (getenv_r("RUMP_NCPU", buf, blen) == -1) {
557 			sprintf(buf, "2"); /* default */
558 		} else if (strcmp(buf, "host") == 0) {
559 			ncpu = gethostncpu();
560 			snprintf(buf, blen, "%d", ncpu);
561 		}
562 		rv = 0;
563 	} else if (strcmp(name, RUMPUSER_PARAM_HOSTNAME) == 0) {
564 		char tmp[MAXHOSTNAMELEN];
565 
566 		if (gethostname(tmp, sizeof(tmp)) == -1) {
567 			snprintf(buf, blen, "rump-%05d", (int)getpid());
568 		} else {
569 			snprintf(buf, blen, "rump-%05d.%s",
570 			    (int)getpid(), tmp);
571 		}
572 		rv = 0;
573 	} else if (*name == '_') {
574 		rv = EINVAL;
575 	} else {
576 		if (getenv_r(name, buf, blen) == -1)
577 			rv = errno;
578 		else
579 			rv = 0;
580 	}
581 
582 	ET(rv);
583 }
584 
585 void
586 rumpuser_putchar(int c)
587 {
588 
589 	putchar(c);
590 }
591 
592 __dead void
593 rumpuser_exit(int rv)
594 {
595 
596 	if (rv == RUMPUSER_PANIC)
597 		abort();
598 	else
599 		exit(rv);
600 }
601 
602 void
603 rumpuser_seterrno(int error)
604 {
605 
606 	errno = error;
607 }
608 
609 /*
610  * This is meant for safe debugging prints from the kernel.
611  */
612 void
613 rumpuser_dprintf(const char *format, ...)
614 {
615 	va_list ap;
616 
617 	va_start(ap, format);
618 	vfprintf(stderr, format, ap);
619 	va_end(ap);
620 }
621 
622 int
623 rumpuser_kill(int64_t pid, int rumpsig)
624 {
625 	int sig;
626 
627 	sig = rumpuser__sig_rump2host(rumpsig);
628 	if (sig > 0)
629 		raise(sig);
630 	return 0;
631 }
632 
633 int
634 rumpuser_getrandom(void *buf, size_t buflen, int flags, size_t *retp)
635 {
636 	size_t origlen = buflen;
637 	uint32_t *p = buf;
638 	uint32_t tmp;
639 	int chunk;
640 
641 	do {
642 		chunk = buflen < 4 ? buflen : 4; /* portable MIN ... */
643 		tmp = RUMPUSER_RANDOM();
644 		memcpy(p, &tmp, chunk);
645 		p++;
646 		buflen -= chunk;
647 	} while (chunk);
648 
649 	*retp = origlen;
650 	ET(0);
651 }
652