1 /* $NetBSD: rumpuser.c,v 1.59 2014/04/02 13:54:42 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "rumpuser_port.h" 29 30 #if !defined(lint) 31 __RCSID("$NetBSD: rumpuser.c,v 1.59 2014/04/02 13:54:42 pooka Exp $"); 32 #endif /* !lint */ 33 34 #include <sys/ioctl.h> 35 #include <sys/mman.h> 36 #include <sys/uio.h> 37 #include <sys/stat.h> 38 #include <sys/time.h> 39 40 #ifdef __NetBSD__ 41 #include <sys/disk.h> 42 #include <sys/disklabel.h> 43 #include <sys/dkio.h> 44 #endif 45 46 #if defined(__NetBSD__) || defined(__FreeBSD__) || \ 47 defined(__DragonFly__) || defined(__APPLE__) 48 #define __BSD__ 49 #endif 50 51 #if defined(__BSD__) 52 #include <sys/sysctl.h> 53 #endif 54 55 #include <assert.h> 56 #include <errno.h> 57 #include <fcntl.h> 58 #include <netdb.h> 59 #include <signal.h> 60 #include <stdarg.h> 61 #include <stdint.h> 62 #include <stdio.h> 63 #include <stdlib.h> 64 #include <string.h> 65 #include <time.h> 66 #include <unistd.h> 67 68 #include <rump/rumpuser.h> 69 70 #include "rumpuser_int.h" 71 72 struct rumpuser_hyperup rumpuser__hyp; 73 74 int 75 rumpuser_init(int version, const struct rumpuser_hyperup *hyp) 76 { 77 78 if (version != RUMPUSER_VERSION) { 79 fprintf(stderr, "rumpuser mismatch, kern: %d, hypervisor %d\n", 80 version, RUMPUSER_VERSION); 81 return 1; 82 } 83 84 #ifdef RUMPUSER_USE_DEVRANDOM 85 uint32_t rv; 86 int fd; 87 88 if ((fd = open("/dev/urandom", O_RDONLY)) == -1) { 89 srandom(time(NULL)); 90 } else { 91 if (read(fd, &rv, sizeof(rv)) != sizeof(rv)) 92 srandom(time(NULL)); 93 else 94 srandom(rv); 95 close(fd); 96 } 97 #endif 98 99 rumpuser__thrinit(); 100 rumpuser__hyp = *hyp; 101 102 return 0; 103 } 104 105 int 106 rumpuser_getfileinfo(const char *path, uint64_t *sizep, int *ftp) 107 { 108 struct stat sb; 109 uint64_t size = 0; 110 int needsdev = 0, rv = 0, ft = 0; 111 int fd = -1; 112 113 if (stat(path, &sb) == -1) { 114 rv = errno; 115 goto out; 116 } 117 118 switch (sb.st_mode & S_IFMT) { 119 case S_IFDIR: 120 ft = RUMPUSER_FT_DIR; 121 break; 122 case S_IFREG: 123 ft = RUMPUSER_FT_REG; 124 break; 125 case S_IFBLK: 126 ft = RUMPUSER_FT_BLK; 127 needsdev = 1; 128 break; 129 case S_IFCHR: 130 ft = RUMPUSER_FT_CHR; 131 needsdev = 1; 132 break; 133 default: 134 ft = RUMPUSER_FT_OTHER; 135 break; 136 } 137 138 if (!needsdev) { 139 size = sb.st_size; 140 } else if (sizep) { 141 /* 142 * Welcome to the jungle. Of course querying the kernel 143 * for a device partition size is supposed to be far from 144 * trivial. On NetBSD we use ioctl. On $other platform 145 * we have a problem. We try "the lseek trick" and just 146 * fail if that fails. Platform specific code can later 147 * be written here if appropriate. 148 * 149 * On NetBSD we hope and pray that for block devices nobody 150 * else is holding them open, because otherwise the kernel 151 * will not permit us to open it. Thankfully, this is 152 * usually called only in bootstrap and then we can 153 * forget about it. 154 */ 155 #ifndef __NetBSD__ 156 off_t off; 157 158 fd = open(path, O_RDONLY); 159 if (fd == -1) { 160 rv = errno; 161 goto out; 162 } 163 164 off = lseek(fd, 0, SEEK_END); 165 if (off != 0) { 166 size = off; 167 goto out; 168 } 169 fprintf(stderr, "error: device size query not implemented on " 170 "this platform\n"); 171 rv = EOPNOTSUPP; 172 goto out; 173 #else 174 struct disklabel lab; 175 struct partition *parta; 176 struct dkwedge_info dkw; 177 178 fd = open(path, O_RDONLY); 179 if (fd == -1) { 180 rv = errno; 181 goto out; 182 } 183 184 if (ioctl(fd, DIOCGDINFO, &lab) == 0) { 185 parta = &lab.d_partitions[DISKPART(sb.st_rdev)]; 186 size = (uint64_t)lab.d_secsize * parta->p_size; 187 goto out; 188 } 189 190 if (ioctl(fd, DIOCGWEDGEINFO, &dkw) == 0) { 191 /* 192 * XXX: should use DIOCGDISKINFO to query 193 * sector size, but that requires proplib, 194 * so just don't bother for now. it's nice 195 * that something as difficult as figuring out 196 * a partition's size has been made so easy. 197 */ 198 size = dkw.dkw_size << DEV_BSHIFT; 199 goto out; 200 } 201 202 rv = errno; 203 #endif /* __NetBSD__ */ 204 } 205 206 out: 207 if (rv == 0 && sizep) 208 *sizep = size; 209 if (rv == 0 && ftp) 210 *ftp = ft; 211 if (fd != -1) 212 close(fd); 213 214 ET(rv); 215 } 216 217 int 218 rumpuser_malloc(size_t howmuch, int alignment, void **memp) 219 { 220 void *mem = NULL; 221 int rv; 222 223 if (alignment == 0) 224 alignment = sizeof(void *); 225 226 rv = posix_memalign(&mem, (size_t)alignment, howmuch); 227 if (__predict_false(rv != 0)) { 228 if (rv == EINVAL) { 229 printf("rumpuser_malloc: invalid alignment %d\n", 230 alignment); 231 abort(); 232 } 233 } 234 235 *memp = mem; 236 ET(rv); 237 } 238 239 /*ARGSUSED1*/ 240 void 241 rumpuser_free(void *ptr, size_t size) 242 { 243 244 free(ptr); 245 } 246 247 int 248 rumpuser_anonmmap(void *prefaddr, size_t size, int alignbit, 249 int exec, void **memp) 250 { 251 void *mem; 252 int prot, rv; 253 254 #ifndef MAP_ALIGNED 255 #define MAP_ALIGNED(a) 0 256 if (alignbit) 257 fprintf(stderr, "rumpuser_anonmmap: warning, requested " 258 "alignment not supported by hypervisor\n"); 259 #endif 260 261 prot = PROT_READ|PROT_WRITE; 262 if (exec) 263 prot |= PROT_EXEC; 264 mem = mmap(prefaddr, size, prot, 265 MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(alignbit), -1, 0); 266 if (mem == MAP_FAILED) { 267 rv = errno; 268 } else { 269 *memp = mem; 270 rv = 0; 271 } 272 273 ET(rv); 274 } 275 276 void 277 rumpuser_unmap(void *addr, size_t len) 278 { 279 280 munmap(addr, len); 281 } 282 283 int 284 rumpuser_open(const char *path, int ruflags, int *fdp) 285 { 286 int fd, flags, rv; 287 288 switch (ruflags & RUMPUSER_OPEN_ACCMODE) { 289 case RUMPUSER_OPEN_RDONLY: 290 flags = O_RDONLY; 291 break; 292 case RUMPUSER_OPEN_WRONLY: 293 flags = O_WRONLY; 294 break; 295 case RUMPUSER_OPEN_RDWR: 296 flags = O_RDWR; 297 break; 298 default: 299 rv = EINVAL; 300 goto out; 301 } 302 303 #define TESTSET(_ru_, _h_) if (ruflags & _ru_) flags |= _h_; 304 TESTSET(RUMPUSER_OPEN_CREATE, O_CREAT); 305 TESTSET(RUMPUSER_OPEN_EXCL, O_EXCL); 306 #undef TESTSET 307 308 KLOCK_WRAP(fd = open(path, flags, 0644)); 309 if (fd == -1) { 310 rv = errno; 311 } else { 312 *fdp = fd; 313 rv = 0; 314 } 315 316 out: 317 ET(rv); 318 } 319 320 int 321 rumpuser_close(int fd) 322 { 323 int nlocks; 324 325 rumpkern_unsched(&nlocks, NULL); 326 fsync(fd); 327 close(fd); 328 rumpkern_sched(nlocks, NULL); 329 330 ET(0); 331 } 332 333 /* 334 * Assume "struct rumpuser_iovec" and "struct iovec" are the same. 335 * If you encounter POSIX platforms where they aren't, add some 336 * translation for iovlen > 1. 337 */ 338 int 339 rumpuser_iovread(int fd, struct rumpuser_iovec *ruiov, size_t iovlen, 340 int64_t roff, size_t *retp) 341 { 342 struct iovec *iov = (struct iovec *)ruiov; 343 off_t off = (off_t)roff; 344 ssize_t nn; 345 int rv; 346 347 if (off == RUMPUSER_IOV_NOSEEK) { 348 KLOCK_WRAP(nn = readv(fd, iov, iovlen)); 349 } else { 350 int nlocks; 351 352 rumpkern_unsched(&nlocks, NULL); 353 if (lseek(fd, off, SEEK_SET) == off) { 354 nn = readv(fd, iov, iovlen); 355 } else { 356 nn = -1; 357 } 358 rumpkern_sched(nlocks, NULL); 359 } 360 361 if (nn == -1) { 362 rv = errno; 363 } else { 364 *retp = (size_t)nn; 365 rv = 0; 366 } 367 368 ET(rv); 369 } 370 371 int 372 rumpuser_iovwrite(int fd, const struct rumpuser_iovec *ruiov, size_t iovlen, 373 int64_t roff, size_t *retp) 374 { 375 const struct iovec *iov = (const struct iovec *)ruiov; 376 off_t off = (off_t)roff; 377 ssize_t nn; 378 int rv; 379 380 if (off == RUMPUSER_IOV_NOSEEK) { 381 KLOCK_WRAP(nn = writev(fd, iov, iovlen)); 382 } else { 383 int nlocks; 384 385 rumpkern_unsched(&nlocks, NULL); 386 if (lseek(fd, off, SEEK_SET) == off) { 387 nn = writev(fd, iov, iovlen); 388 } else { 389 nn = -1; 390 } 391 rumpkern_sched(nlocks, NULL); 392 } 393 394 if (nn == -1) { 395 rv = errno; 396 } else { 397 *retp = (size_t)nn; 398 rv = 0; 399 } 400 401 ET(rv); 402 } 403 404 int 405 rumpuser_syncfd(int fd, int flags, uint64_t start, uint64_t len) 406 { 407 int rv = 0; 408 409 /* 410 * For now, assume fd is regular file and does not care 411 * about read syncing 412 */ 413 if ((flags & RUMPUSER_SYNCFD_BOTH) == 0) { 414 rv = EINVAL; 415 goto out; 416 } 417 if ((flags & RUMPUSER_SYNCFD_WRITE) == 0) { 418 rv = 0; 419 goto out; 420 } 421 422 #ifdef __NetBSD__ 423 { 424 int fsflags = FDATASYNC; 425 426 if (fsflags & RUMPUSER_SYNCFD_SYNC) 427 fsflags |= FDISKSYNC; 428 if (fsync_range(fd, fsflags, start, len) == -1) 429 rv = errno; 430 } 431 #else 432 /* el-simplo */ 433 if (fsync(fd) == -1) 434 rv = errno; 435 #endif 436 437 out: 438 ET(rv); 439 } 440 441 int 442 rumpuser_clock_gettime(int enum_rumpclock, int64_t *sec, long *nsec) 443 { 444 enum rumpclock rclk = enum_rumpclock; 445 struct timespec ts; 446 clockid_t clk; 447 int rv; 448 449 switch (rclk) { 450 case RUMPUSER_CLOCK_RELWALL: 451 clk = CLOCK_REALTIME; 452 break; 453 case RUMPUSER_CLOCK_ABSMONO: 454 #ifdef HAVE_CLOCK_NANOSLEEP 455 clk = CLOCK_MONOTONIC; 456 #else 457 clk = CLOCK_REALTIME; 458 #endif 459 break; 460 default: 461 abort(); 462 } 463 464 if (clock_gettime(clk, &ts) == -1) { 465 rv = errno; 466 } else { 467 *sec = ts.tv_sec; 468 *nsec = ts.tv_nsec; 469 rv = 0; 470 } 471 472 ET(rv); 473 } 474 475 int 476 rumpuser_clock_sleep(int enum_rumpclock, int64_t sec, long nsec) 477 { 478 enum rumpclock rclk = enum_rumpclock; 479 struct timespec rqt, rmt; 480 int nlocks; 481 int rv; 482 483 rumpkern_unsched(&nlocks, NULL); 484 485 /*LINTED*/ 486 rqt.tv_sec = sec; 487 /*LINTED*/ 488 rqt.tv_nsec = nsec; 489 490 switch (rclk) { 491 case RUMPUSER_CLOCK_RELWALL: 492 do { 493 rv = nanosleep(&rqt, &rmt); 494 rqt = rmt; 495 } while (rv == -1 && errno == EINTR); 496 if (rv == -1) { 497 rv = errno; 498 } 499 break; 500 case RUMPUSER_CLOCK_ABSMONO: 501 do { 502 #ifdef HAVE_CLOCK_NANOSLEEP 503 rv = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, 504 &rqt, NULL); 505 #else 506 /* le/la/der/die/das sigh. timevalspec tailspin */ 507 struct timespec ts, tsr; 508 clock_gettime(CLOCK_REALTIME, &ts); 509 if (ts.tv_sec == rqt.tv_sec ? 510 ts.tv_nsec > rqt.tv_nsec : ts.tv_sec > rqt.tv_sec) { 511 rv = 0; 512 } else { 513 tsr.tv_sec = rqt.tv_sec - ts.tv_sec; 514 tsr.tv_nsec = rqt.tv_nsec - ts.tv_nsec; 515 if (tsr.tv_nsec < 0) { 516 tsr.tv_sec--; 517 tsr.tv_nsec += 1000*1000*1000; 518 } 519 rv = nanosleep(&tsr, NULL); 520 } 521 #endif 522 } while (rv == -1 && errno == EINTR); 523 if (rv == -1) { 524 rv = errno; 525 } 526 break; 527 default: 528 abort(); 529 } 530 531 rumpkern_sched(nlocks, NULL); 532 533 ET(rv); 534 } 535 536 static int 537 gethostncpu(void) 538 { 539 int ncpu = 1; /* unknown, really */ 540 541 #ifdef _SC_NPROCESSORS_ONLN 542 ncpu = sysconf(_SC_NPROCESSORS_ONLN); 543 #endif 544 545 return ncpu; 546 } 547 548 int 549 rumpuser_getparam(const char *name, void *buf, size_t blen) 550 { 551 int rv; 552 553 if (strcmp(name, RUMPUSER_PARAM_NCPU) == 0) { 554 int ncpu; 555 556 if (getenv_r("RUMP_NCPU", buf, blen) == -1) { 557 sprintf(buf, "2"); /* default */ 558 } else if (strcmp(buf, "host") == 0) { 559 ncpu = gethostncpu(); 560 snprintf(buf, blen, "%d", ncpu); 561 } 562 rv = 0; 563 } else if (strcmp(name, RUMPUSER_PARAM_HOSTNAME) == 0) { 564 char tmp[MAXHOSTNAMELEN]; 565 566 if (gethostname(tmp, sizeof(tmp)) == -1) { 567 snprintf(buf, blen, "rump-%05d", (int)getpid()); 568 } else { 569 snprintf(buf, blen, "rump-%05d.%s", 570 (int)getpid(), tmp); 571 } 572 rv = 0; 573 } else if (*name == '_') { 574 rv = EINVAL; 575 } else { 576 if (getenv_r(name, buf, blen) == -1) 577 rv = errno; 578 else 579 rv = 0; 580 } 581 582 ET(rv); 583 } 584 585 void 586 rumpuser_putchar(int c) 587 { 588 589 putchar(c); 590 } 591 592 __dead void 593 rumpuser_exit(int rv) 594 { 595 596 if (rv == RUMPUSER_PANIC) 597 abort(); 598 else 599 exit(rv); 600 } 601 602 void 603 rumpuser_seterrno(int error) 604 { 605 606 errno = error; 607 } 608 609 /* 610 * This is meant for safe debugging prints from the kernel. 611 */ 612 void 613 rumpuser_dprintf(const char *format, ...) 614 { 615 va_list ap; 616 617 va_start(ap, format); 618 vfprintf(stderr, format, ap); 619 va_end(ap); 620 } 621 622 int 623 rumpuser_kill(int64_t pid, int rumpsig) 624 { 625 int sig; 626 627 sig = rumpuser__sig_rump2host(rumpsig); 628 if (sig > 0) 629 raise(sig); 630 return 0; 631 } 632 633 int 634 rumpuser_getrandom(void *buf, size_t buflen, int flags, size_t *retp) 635 { 636 size_t origlen = buflen; 637 uint32_t *p = buf; 638 uint32_t tmp; 639 int chunk; 640 641 do { 642 chunk = buflen < 4 ? buflen : 4; /* portable MIN ... */ 643 tmp = RUMPUSER_RANDOM(); 644 memcpy(p, &tmp, chunk); 645 p++; 646 buflen -= chunk; 647 } while (chunk); 648 649 *retp = origlen; 650 ET(0); 651 } 652