1 /* $NetBSD: rumpuser.c,v 1.55 2013/10/27 16:39:46 rmind Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "rumpuser_port.h" 29 30 #if !defined(lint) 31 __RCSID("$NetBSD: rumpuser.c,v 1.55 2013/10/27 16:39:46 rmind Exp $"); 32 #endif /* !lint */ 33 34 #include <sys/ioctl.h> 35 #include <sys/mman.h> 36 #include <sys/uio.h> 37 #include <sys/stat.h> 38 #include <sys/time.h> 39 40 #ifdef __NetBSD__ 41 #include <sys/disk.h> 42 #include <sys/disklabel.h> 43 #include <sys/dkio.h> 44 #endif 45 46 #if defined(__NetBSD__) || defined(__FreeBSD__) || \ 47 defined(__DragonFly__) || defined(__APPLE__) 48 #define __BSD__ 49 #endif 50 51 #if defined(__BSD__) 52 #include <sys/sysctl.h> 53 #endif 54 55 #include <assert.h> 56 #include <errno.h> 57 #include <fcntl.h> 58 #include <netdb.h> 59 #include <signal.h> 60 #include <stdarg.h> 61 #include <stdint.h> 62 #include <stdio.h> 63 #include <stdlib.h> 64 #include <string.h> 65 #include <time.h> 66 #include <unistd.h> 67 68 #include <rump/rumpuser.h> 69 70 #include "rumpuser_int.h" 71 72 struct rumpuser_hyperup rumpuser__hyp; 73 74 int 75 rumpuser_init(int version, const struct rumpuser_hyperup *hyp) 76 { 77 78 if (version != RUMPUSER_VERSION) { 79 fprintf(stderr, "rumpuser mismatch, kern: %d, hypervisor %d\n", 80 version, RUMPUSER_VERSION); 81 return 1; 82 } 83 84 #ifdef RUMPUSER_USE_DEVRANDOM 85 uint32_t rv; 86 int fd; 87 88 if ((fd = open("/dev/urandom", O_RDONLY)) == -1) { 89 srandom(time(NULL)); 90 } else { 91 if (read(fd, &rv, sizeof(rv)) != sizeof(rv)) 92 srandom(time(NULL)); 93 else 94 srandom(rv); 95 close(fd); 96 } 97 #endif 98 99 rumpuser__thrinit(); 100 rumpuser__hyp = *hyp; 101 102 return 0; 103 } 104 105 int 106 rumpuser_getfileinfo(const char *path, uint64_t *sizep, int *ftp) 107 { 108 struct stat sb; 109 uint64_t size = 0; 110 int needsdev = 0, rv = 0, ft = 0; 111 int fd = -1; 112 113 if (stat(path, &sb) == -1) { 114 rv = errno; 115 goto out; 116 } 117 118 switch (sb.st_mode & S_IFMT) { 119 case S_IFDIR: 120 ft = RUMPUSER_FT_DIR; 121 break; 122 case S_IFREG: 123 ft = RUMPUSER_FT_REG; 124 break; 125 case S_IFBLK: 126 ft = RUMPUSER_FT_BLK; 127 needsdev = 1; 128 break; 129 case S_IFCHR: 130 ft = RUMPUSER_FT_CHR; 131 needsdev = 1; 132 break; 133 default: 134 ft = RUMPUSER_FT_OTHER; 135 break; 136 } 137 138 if (!needsdev) { 139 size = sb.st_size; 140 } else if (sizep) { 141 /* 142 * Welcome to the jungle. Of course querying the kernel 143 * for a device partition size is supposed to be far from 144 * trivial. On NetBSD we use ioctl. On $other platform 145 * we have a problem. We try "the lseek trick" and just 146 * fail if that fails. Platform specific code can later 147 * be written here if appropriate. 148 * 149 * On NetBSD we hope and pray that for block devices nobody 150 * else is holding them open, because otherwise the kernel 151 * will not permit us to open it. Thankfully, this is 152 * usually called only in bootstrap and then we can 153 * forget about it. 154 */ 155 #ifndef __NetBSD__ 156 off_t off; 157 158 fd = open(path, O_RDONLY); 159 if (fd == -1) { 160 rv = errno; 161 goto out; 162 } 163 164 off = lseek(fd, 0, SEEK_END); 165 if (off != 0) { 166 size = off; 167 goto out; 168 } 169 fprintf(stderr, "error: device size query not implemented on " 170 "this platform\n"); 171 rv = EOPNOTSUPP; 172 goto out; 173 #else 174 struct disklabel lab; 175 struct partition *parta; 176 struct dkwedge_info dkw; 177 178 fd = open(path, O_RDONLY); 179 if (fd == -1) { 180 rv = errno; 181 goto out; 182 } 183 184 if (ioctl(fd, DIOCGDINFO, &lab) == 0) { 185 parta = &lab.d_partitions[DISKPART(sb.st_rdev)]; 186 size = (uint64_t)lab.d_secsize * parta->p_size; 187 goto out; 188 } 189 190 if (ioctl(fd, DIOCGWEDGEINFO, &dkw) == 0) { 191 /* 192 * XXX: should use DIOCGDISKINFO to query 193 * sector size, but that requires proplib, 194 * so just don't bother for now. it's nice 195 * that something as difficult as figuring out 196 * a partition's size has been made so easy. 197 */ 198 size = dkw.dkw_size << DEV_BSHIFT; 199 goto out; 200 } 201 202 rv = errno; 203 #endif /* __NetBSD__ */ 204 } 205 206 out: 207 if (rv == 0 && sizep) 208 *sizep = size; 209 if (rv == 0 && ftp) 210 *ftp = ft; 211 if (fd != -1) 212 close(fd); 213 214 ET(rv); 215 } 216 217 int 218 rumpuser_malloc(size_t howmuch, int alignment, void **memp) 219 { 220 void *mem; 221 int rv; 222 223 if (alignment == 0) 224 alignment = sizeof(void *); 225 226 rv = posix_memalign(&mem, (size_t)alignment, howmuch); 227 if (__predict_false(rv != 0)) { 228 if (rv == EINVAL) { 229 printf("rumpuser_malloc: invalid alignment %d\n", 230 alignment); 231 abort(); 232 } 233 } 234 235 *memp = mem; 236 ET(rv); 237 } 238 239 /*ARGSUSED1*/ 240 void 241 rumpuser_free(void *ptr, size_t size) 242 { 243 244 free(ptr); 245 } 246 247 int 248 rumpuser_anonmmap(void *prefaddr, size_t size, int alignbit, 249 int exec, void **memp) 250 { 251 void *mem; 252 int prot, rv; 253 254 #ifndef MAP_ALIGNED 255 #define MAP_ALIGNED(a) 0 256 if (alignbit) 257 fprintf(stderr, "rumpuser_anonmmap: warning, requested " 258 "alignment not supported by hypervisor\n"); 259 #endif 260 261 prot = PROT_READ|PROT_WRITE; 262 if (exec) 263 prot |= PROT_EXEC; 264 mem = mmap(prefaddr, size, prot, 265 MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(alignbit), -1, 0); 266 if (mem == MAP_FAILED) { 267 rv = errno; 268 } else { 269 *memp = mem; 270 rv = 0; 271 } 272 273 ET(rv); 274 } 275 276 void 277 rumpuser_unmap(void *addr, size_t len) 278 { 279 280 munmap(addr, len); 281 } 282 283 int 284 rumpuser_open(const char *path, int ruflags, int *fdp) 285 { 286 int fd, flags, rv; 287 288 switch (ruflags & RUMPUSER_OPEN_ACCMODE) { 289 case RUMPUSER_OPEN_RDONLY: 290 flags = O_RDONLY; 291 break; 292 case RUMPUSER_OPEN_WRONLY: 293 flags = O_WRONLY; 294 break; 295 case RUMPUSER_OPEN_RDWR: 296 flags = O_RDWR; 297 break; 298 default: 299 rv = EINVAL; 300 goto out; 301 } 302 303 #define TESTSET(_ru_, _h_) if (ruflags & _ru_) flags |= _h_; 304 TESTSET(RUMPUSER_OPEN_CREATE, O_CREAT); 305 TESTSET(RUMPUSER_OPEN_EXCL, O_EXCL); 306 #undef TESTSET 307 308 KLOCK_WRAP(fd = open(path, flags, 0644)); 309 if (fd == -1) { 310 rv = errno; 311 } else { 312 *fdp = fd; 313 rv = 0; 314 } 315 316 out: 317 ET(rv); 318 } 319 320 int 321 rumpuser_close(int fd) 322 { 323 int nlocks; 324 325 rumpkern_unsched(&nlocks, NULL); 326 fsync(fd); 327 close(fd); 328 rumpkern_sched(nlocks, NULL); 329 330 ET(0); 331 } 332 333 /* 334 * Assume "struct rumpuser_iovec" and "struct iovec" are the same. 335 * If you encounter POSIX platforms where they aren't, add some 336 * translation for iovlen > 1. 337 */ 338 int 339 rumpuser_iovread(int fd, struct rumpuser_iovec *ruiov, size_t iovlen, 340 int64_t roff, size_t *retp) 341 { 342 struct iovec *iov = (struct iovec *)ruiov; 343 off_t off = (off_t)roff; 344 ssize_t nn; 345 int rv; 346 347 if (off == RUMPUSER_IOV_NOSEEK) { 348 KLOCK_WRAP(nn = readv(fd, iov, iovlen)); 349 } else { 350 int nlocks; 351 352 rumpkern_unsched(&nlocks, NULL); 353 if (lseek(fd, off, SEEK_SET) == off) { 354 nn = readv(fd, iov, iovlen); 355 } else { 356 nn = -1; 357 } 358 rumpkern_sched(nlocks, NULL); 359 } 360 361 if (nn == -1) { 362 rv = errno; 363 } else { 364 *retp = (size_t)nn; 365 rv = 0; 366 } 367 368 ET(rv); 369 } 370 371 int 372 rumpuser_iovwrite(int fd, const struct rumpuser_iovec *ruiov, size_t iovlen, 373 int64_t roff, size_t *retp) 374 { 375 const struct iovec *iov = (const struct iovec *)ruiov; 376 off_t off = (off_t)roff; 377 ssize_t nn; 378 int rv; 379 380 if (off == RUMPUSER_IOV_NOSEEK) { 381 KLOCK_WRAP(nn = writev(fd, iov, iovlen)); 382 } else { 383 int nlocks; 384 385 rumpkern_unsched(&nlocks, NULL); 386 if (lseek(fd, off, SEEK_SET) == off) { 387 nn = writev(fd, iov, iovlen); 388 } else { 389 nn = -1; 390 } 391 rumpkern_sched(nlocks, NULL); 392 } 393 394 if (nn == -1) { 395 rv = errno; 396 } else { 397 *retp = (size_t)nn; 398 rv = 0; 399 } 400 401 ET(rv); 402 } 403 404 int 405 rumpuser_syncfd(int fd, int flags, uint64_t start, uint64_t len) 406 { 407 int rv = 0; 408 409 /* 410 * For now, assume fd is regular file and does not care 411 * about read syncing 412 */ 413 if ((flags & RUMPUSER_SYNCFD_BOTH) == 0) { 414 rv = EINVAL; 415 goto out; 416 } 417 if ((flags & RUMPUSER_SYNCFD_WRITE) == 0) { 418 rv = 0; 419 goto out; 420 } 421 422 #ifdef __NetBSD__ 423 { 424 int fsflags = FDATASYNC; 425 426 if (fsflags & RUMPUSER_SYNCFD_SYNC) 427 fsflags |= FDISKSYNC; 428 if (fsync_range(fd, fsflags, start, len) == -1) 429 rv = errno; 430 } 431 #else 432 /* el-simplo */ 433 if (fsync(fd) == -1) 434 rv = errno; 435 #endif 436 437 out: 438 ET(rv); 439 } 440 441 int 442 rumpuser_clock_gettime(int enum_rumpclock, int64_t *sec, long *nsec) 443 { 444 enum rumpclock rclk = enum_rumpclock; 445 struct timespec ts; 446 clockid_t clk; 447 int rv; 448 449 switch (rclk) { 450 case RUMPUSER_CLOCK_RELWALL: 451 clk = CLOCK_REALTIME; 452 break; 453 case RUMPUSER_CLOCK_ABSMONO: 454 #ifdef HAVE_CLOCK_NANOSLEEP 455 clk = CLOCK_MONOTONIC; 456 #else 457 clk = CLOCK_REALTIME; 458 #endif 459 break; 460 default: 461 abort(); 462 } 463 464 if (clock_gettime(clk, &ts) == -1) { 465 rv = errno; 466 } else { 467 *sec = ts.tv_sec; 468 *nsec = ts.tv_nsec; 469 rv = 0; 470 } 471 472 ET(rv); 473 } 474 475 int 476 rumpuser_clock_sleep(int enum_rumpclock, int64_t sec, long nsec) 477 { 478 enum rumpclock rclk = enum_rumpclock; 479 struct timespec rqt, rmt; 480 int nlocks; 481 int rv; 482 483 rumpkern_unsched(&nlocks, NULL); 484 485 /*LINTED*/ 486 rqt.tv_sec = sec; 487 /*LINTED*/ 488 rqt.tv_nsec = nsec; 489 490 switch (rclk) { 491 case RUMPUSER_CLOCK_RELWALL: 492 do { 493 rv = nanosleep(&rqt, &rmt); 494 rqt = rmt; 495 } while (rv == -1 && errno == EINTR); 496 if (rv == -1) { 497 rv = errno; 498 } 499 break; 500 case RUMPUSER_CLOCK_ABSMONO: 501 do { 502 #ifdef HAVE_CLOCK_NANOSLEEP 503 rv = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, 504 &rqt, NULL); 505 #else 506 /* le/la/der/die/das sigh. timevalspec tailspin */ 507 struct timespec ts, tsr; 508 clock_gettime(CLOCK_REALTIME, &ts); 509 if (ts.tv_sec == rqt.tv_sec ? 510 ts.tv_nsec > rqt.tv_nsec : ts.tv_sec > rqt.tv_sec) { 511 rv = 0; 512 } else { 513 tsr.tv_sec = rqt.tv_sec - ts.tv_sec; 514 tsr.tv_nsec = rqt.tv_nsec - ts.tv_nsec; 515 if (tsr.tv_nsec < 0) { 516 tsr.tv_sec--; 517 tsr.tv_nsec += 1000*1000*1000; 518 } 519 rv = nanosleep(&tsr, NULL); 520 } 521 #endif 522 } while (rv == -1 && errno == EINTR); 523 if (rv == -1) { 524 rv = errno; 525 } 526 break; 527 default: 528 abort(); 529 } 530 531 rumpkern_sched(nlocks, NULL); 532 533 ET(rv); 534 } 535 536 static int 537 gethostncpu(void) 538 { 539 int ncpu = 1; 540 541 #if defined(__BSD__) 542 size_t sz = sizeof(ncpu); 543 544 sysctlbyname("hw.ncpu", &ncpu, &sz, NULL, 0); 545 #elif defined(__linux__) || defined(__CYGWIN__) 546 FILE *fp; 547 char *line = NULL; 548 size_t n = 0; 549 550 /* If anyone knows a better way, I'm all ears */ 551 if ((fp = fopen("/proc/cpuinfo", "r")) != NULL) { 552 ncpu = 0; 553 while (getline(&line, &n, fp) != -1) { 554 if (strncmp(line, 555 "processor", sizeof("processor")-1) == 0) 556 ncpu++; 557 } 558 if (ncpu == 0) 559 ncpu = 1; 560 free(line); 561 fclose(fp); 562 } 563 #elif __sun__ 564 /* XXX: this is just a rough estimate ... */ 565 ncpu = sysconf(_SC_NPROCESSORS_ONLN); 566 #endif 567 568 return ncpu; 569 } 570 571 int 572 rumpuser_getparam(const char *name, void *buf, size_t blen) 573 { 574 int rv; 575 576 if (strcmp(name, RUMPUSER_PARAM_NCPU) == 0) { 577 int ncpu; 578 579 if (getenv_r("RUMP_NCPU", buf, blen) == -1) { 580 sprintf(buf, "2"); /* default */ 581 } else if (strcmp(buf, "host") == 0) { 582 ncpu = gethostncpu(); 583 snprintf(buf, blen, "%d", ncpu); 584 } 585 rv = 0; 586 } else if (strcmp(name, RUMPUSER_PARAM_HOSTNAME) == 0) { 587 char tmp[MAXHOSTNAMELEN]; 588 589 if (gethostname(tmp, sizeof(tmp)) == -1) { 590 snprintf(buf, blen, "rump-%05d", (int)getpid()); 591 } else { 592 snprintf(buf, blen, "rump-%05d.%s", 593 (int)getpid(), tmp); 594 } 595 rv = 0; 596 } else if (*name == '_') { 597 rv = EINVAL; 598 } else { 599 if (getenv_r(name, buf, blen) == -1) 600 rv = errno; 601 else 602 rv = 0; 603 } 604 605 ET(rv); 606 } 607 608 void 609 rumpuser_putchar(int c) 610 { 611 612 putchar(c); 613 } 614 615 __dead void 616 rumpuser_exit(int rv) 617 { 618 619 if (rv == RUMPUSER_PANIC) 620 abort(); 621 else 622 exit(rv); 623 } 624 625 void 626 rumpuser_seterrno(int error) 627 { 628 629 errno = error; 630 } 631 632 /* 633 * This is meant for safe debugging prints from the kernel. 634 */ 635 void 636 rumpuser_dprintf(const char *format, ...) 637 { 638 va_list ap; 639 640 va_start(ap, format); 641 vfprintf(stderr, format, ap); 642 va_end(ap); 643 } 644 645 int 646 rumpuser_kill(int64_t pid, int sig) 647 { 648 int rv; 649 650 #ifdef __NetBSD__ 651 int error; 652 653 if (pid == RUMPUSER_PID_SELF) { 654 error = raise(sig); 655 } else { 656 error = kill((pid_t)pid, sig); 657 } 658 if (error == -1) 659 rv = errno; 660 else 661 rv = 0; 662 #else 663 /* XXXfixme: signal numbers may not match on non-NetBSD */ 664 rv = EOPNOTSUPP; 665 #endif 666 667 ET(rv); 668 } 669 670 int 671 rumpuser_getrandom(void *buf, size_t buflen, int flags, size_t *retp) 672 { 673 size_t origlen = buflen; 674 uint32_t *p = buf; 675 uint32_t tmp; 676 int chunk; 677 678 do { 679 chunk = buflen < 4 ? buflen : 4; /* portable MIN ... */ 680 tmp = RUMPUSER_RANDOM(); 681 memcpy(p, &tmp, chunk); 682 p++; 683 buflen -= chunk; 684 } while (chunk); 685 686 *retp = origlen; 687 ET(0); 688 } 689