1 /* $NetBSD: rumpuser.c,v 1.53 2013/05/15 15:57:01 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "rumpuser_port.h" 29 30 #if !defined(lint) 31 __RCSID("$NetBSD: rumpuser.c,v 1.53 2013/05/15 15:57:01 pooka Exp $"); 32 #endif /* !lint */ 33 34 #include <sys/ioctl.h> 35 #include <sys/mman.h> 36 #include <sys/uio.h> 37 #include <sys/stat.h> 38 #include <sys/time.h> 39 40 #ifdef __NetBSD__ 41 #include <sys/disk.h> 42 #include <sys/disklabel.h> 43 #include <sys/dkio.h> 44 #endif 45 46 #if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) 47 #include <sys/sysctl.h> 48 #endif 49 50 #include <assert.h> 51 #include <errno.h> 52 #include <fcntl.h> 53 #include <netdb.h> 54 #include <signal.h> 55 #include <stdarg.h> 56 #include <stdint.h> 57 #include <stdio.h> 58 #include <stdlib.h> 59 #include <string.h> 60 #include <time.h> 61 #include <unistd.h> 62 63 #include <rump/rumpuser.h> 64 65 #include "rumpuser_int.h" 66 67 struct rumpuser_hyperup rumpuser__hyp; 68 69 int 70 rumpuser_init(int version, const struct rumpuser_hyperup *hyp) 71 { 72 73 if (version != RUMPUSER_VERSION) { 74 fprintf(stderr, "rumpuser mismatch, kern: %d, hypervisor %d\n", 75 version, RUMPUSER_VERSION); 76 return 1; 77 } 78 79 #ifdef RUMPUSER_USE_DEVRANDOM 80 uint32_t rv; 81 int fd; 82 83 if ((fd = open("/dev/urandom", O_RDONLY)) == -1) { 84 srandom(time(NULL)); 85 } else { 86 if (read(fd, &rv, sizeof(rv)) != sizeof(rv)) 87 srandom(time(NULL)); 88 else 89 srandom(rv); 90 close(fd); 91 } 92 #endif 93 94 rumpuser__thrinit(); 95 rumpuser__hyp = *hyp; 96 97 return 0; 98 } 99 100 int 101 rumpuser_getfileinfo(const char *path, uint64_t *sizep, int *ftp) 102 { 103 struct stat sb; 104 uint64_t size = 0; 105 int needsdev = 0, rv = 0, ft = 0; 106 int fd = -1; 107 108 if (stat(path, &sb) == -1) { 109 rv = errno; 110 goto out; 111 } 112 113 switch (sb.st_mode & S_IFMT) { 114 case S_IFDIR: 115 ft = RUMPUSER_FT_DIR; 116 break; 117 case S_IFREG: 118 ft = RUMPUSER_FT_REG; 119 break; 120 case S_IFBLK: 121 ft = RUMPUSER_FT_BLK; 122 needsdev = 1; 123 break; 124 case S_IFCHR: 125 ft = RUMPUSER_FT_CHR; 126 needsdev = 1; 127 break; 128 default: 129 ft = RUMPUSER_FT_OTHER; 130 break; 131 } 132 133 if (!needsdev) { 134 size = sb.st_size; 135 } else if (sizep) { 136 /* 137 * Welcome to the jungle. Of course querying the kernel 138 * for a device partition size is supposed to be far from 139 * trivial. On NetBSD we use ioctl. On $other platform 140 * we have a problem. We try "the lseek trick" and just 141 * fail if that fails. Platform specific code can later 142 * be written here if appropriate. 143 * 144 * On NetBSD we hope and pray that for block devices nobody 145 * else is holding them open, because otherwise the kernel 146 * will not permit us to open it. Thankfully, this is 147 * usually called only in bootstrap and then we can 148 * forget about it. 149 */ 150 #ifndef __NetBSD__ 151 off_t off; 152 153 fd = open(path, O_RDONLY); 154 if (fd == -1) { 155 rv = errno; 156 goto out; 157 } 158 159 off = lseek(fd, 0, SEEK_END); 160 if (off != 0) { 161 size = off; 162 goto out; 163 } 164 fprintf(stderr, "error: device size query not implemented on " 165 "this platform\n"); 166 rv = EOPNOTSUPP; 167 goto out; 168 #else 169 struct disklabel lab; 170 struct partition *parta; 171 struct dkwedge_info dkw; 172 173 fd = open(path, O_RDONLY); 174 if (fd == -1) { 175 rv = errno; 176 goto out; 177 } 178 179 if (ioctl(fd, DIOCGDINFO, &lab) == 0) { 180 parta = &lab.d_partitions[DISKPART(sb.st_rdev)]; 181 size = (uint64_t)lab.d_secsize * parta->p_size; 182 goto out; 183 } 184 185 if (ioctl(fd, DIOCGWEDGEINFO, &dkw) == 0) { 186 /* 187 * XXX: should use DIOCGDISKINFO to query 188 * sector size, but that requires proplib, 189 * so just don't bother for now. it's nice 190 * that something as difficult as figuring out 191 * a partition's size has been made so easy. 192 */ 193 size = dkw.dkw_size << DEV_BSHIFT; 194 goto out; 195 } 196 197 rv = errno; 198 #endif /* __NetBSD__ */ 199 } 200 201 out: 202 if (rv == 0 && sizep) 203 *sizep = size; 204 if (rv == 0 && ftp) 205 *ftp = ft; 206 if (fd != -1) 207 close(fd); 208 209 ET(rv); 210 } 211 212 int 213 rumpuser_malloc(size_t howmuch, int alignment, void **memp) 214 { 215 void *mem; 216 int rv; 217 218 if (alignment == 0) 219 alignment = sizeof(void *); 220 221 rv = posix_memalign(&mem, (size_t)alignment, howmuch); 222 if (__predict_false(rv != 0)) { 223 if (rv == EINVAL) { 224 printf("rumpuser_malloc: invalid alignment %d\n", 225 alignment); 226 abort(); 227 } 228 } 229 230 *memp = mem; 231 ET(rv); 232 } 233 234 /*ARGSUSED1*/ 235 void 236 rumpuser_free(void *ptr, size_t size) 237 { 238 239 free(ptr); 240 } 241 242 int 243 rumpuser_anonmmap(void *prefaddr, size_t size, int alignbit, 244 int exec, void **memp) 245 { 246 void *mem; 247 int prot, rv; 248 249 #ifndef MAP_ALIGNED 250 #define MAP_ALIGNED(a) 0 251 if (alignbit) 252 fprintf(stderr, "rumpuser_anonmmap: warning, requested " 253 "alignment not supported by hypervisor\n"); 254 #endif 255 256 prot = PROT_READ|PROT_WRITE; 257 if (exec) 258 prot |= PROT_EXEC; 259 mem = mmap(prefaddr, size, prot, 260 MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(alignbit), -1, 0); 261 if (mem == MAP_FAILED) { 262 rv = errno; 263 } else { 264 *memp = mem; 265 rv = 0; 266 } 267 268 ET(rv); 269 } 270 271 void 272 rumpuser_unmap(void *addr, size_t len) 273 { 274 275 munmap(addr, len); 276 } 277 278 int 279 rumpuser_open(const char *path, int ruflags, int *fdp) 280 { 281 int fd, flags, rv; 282 283 switch (ruflags & RUMPUSER_OPEN_ACCMODE) { 284 case RUMPUSER_OPEN_RDONLY: 285 flags = O_RDONLY; 286 break; 287 case RUMPUSER_OPEN_WRONLY: 288 flags = O_WRONLY; 289 break; 290 case RUMPUSER_OPEN_RDWR: 291 flags = O_RDWR; 292 break; 293 default: 294 rv = EINVAL; 295 goto out; 296 } 297 298 #define TESTSET(_ru_, _h_) if (ruflags & _ru_) flags |= _h_; 299 TESTSET(RUMPUSER_OPEN_CREATE, O_CREAT); 300 TESTSET(RUMPUSER_OPEN_EXCL, O_EXCL); 301 #undef TESTSET 302 303 KLOCK_WRAP(fd = open(path, flags, 0644)); 304 if (fd == -1) { 305 rv = errno; 306 } else { 307 *fdp = fd; 308 rv = 0; 309 } 310 311 out: 312 ET(rv); 313 } 314 315 int 316 rumpuser_close(int fd) 317 { 318 int nlocks; 319 320 rumpkern_unsched(&nlocks, NULL); 321 fsync(fd); 322 close(fd); 323 rumpkern_sched(nlocks, NULL); 324 325 ET(0); 326 } 327 328 /* 329 * Assume "struct rumpuser_iovec" and "struct iovec" are the same. 330 * If you encounter POSIX platforms where they aren't, add some 331 * translation for iovlen > 1. 332 */ 333 int 334 rumpuser_iovread(int fd, struct rumpuser_iovec *ruiov, size_t iovlen, 335 int64_t roff, size_t *retp) 336 { 337 struct iovec *iov = (struct iovec *)ruiov; 338 off_t off = (off_t)roff; 339 ssize_t nn; 340 int rv; 341 342 if (off == RUMPUSER_IOV_NOSEEK) { 343 KLOCK_WRAP(nn = readv(fd, iov, iovlen)); 344 } else { 345 int nlocks; 346 347 rumpkern_unsched(&nlocks, NULL); 348 if (lseek(fd, off, SEEK_SET) == off) { 349 nn = readv(fd, iov, iovlen); 350 } else { 351 nn = -1; 352 } 353 rumpkern_sched(nlocks, NULL); 354 } 355 356 if (nn == -1) { 357 rv = errno; 358 } else { 359 *retp = (size_t)nn; 360 rv = 0; 361 } 362 363 ET(rv); 364 } 365 366 int 367 rumpuser_iovwrite(int fd, const struct rumpuser_iovec *ruiov, size_t iovlen, 368 int64_t roff, size_t *retp) 369 { 370 const struct iovec *iov = (const struct iovec *)ruiov; 371 off_t off = (off_t)roff; 372 ssize_t nn; 373 int rv; 374 375 if (off == RUMPUSER_IOV_NOSEEK) { 376 KLOCK_WRAP(nn = writev(fd, iov, iovlen)); 377 } else { 378 int nlocks; 379 380 rumpkern_unsched(&nlocks, NULL); 381 if (lseek(fd, off, SEEK_SET) == off) { 382 nn = writev(fd, iov, iovlen); 383 } else { 384 nn = -1; 385 } 386 rumpkern_sched(nlocks, NULL); 387 } 388 389 if (nn == -1) { 390 rv = errno; 391 } else { 392 *retp = (size_t)nn; 393 rv = 0; 394 } 395 396 ET(rv); 397 } 398 399 int 400 rumpuser_syncfd(int fd, int flags, uint64_t start, uint64_t len) 401 { 402 int rv = 0; 403 404 /* 405 * For now, assume fd is regular file and does not care 406 * about read syncing 407 */ 408 if ((flags & RUMPUSER_SYNCFD_BOTH) == 0) { 409 rv = EINVAL; 410 goto out; 411 } 412 if ((flags & RUMPUSER_SYNCFD_WRITE) == 0) { 413 rv = 0; 414 goto out; 415 } 416 417 #ifdef __NetBSD__ 418 { 419 int fsflags = FDATASYNC; 420 421 if (fsflags & RUMPUSER_SYNCFD_SYNC) 422 fsflags |= FDISKSYNC; 423 if (fsync_range(fd, fsflags, start, len) == -1) 424 rv = errno; 425 } 426 #else 427 /* el-simplo */ 428 if (fsync(fd) == -1) 429 rv = errno; 430 #endif 431 432 out: 433 ET(rv); 434 } 435 436 int 437 rumpuser_clock_gettime(int enum_rumpclock, int64_t *sec, long *nsec) 438 { 439 enum rumpclock rclk = enum_rumpclock; 440 struct timespec ts; 441 clockid_t clk; 442 int rv; 443 444 switch (rclk) { 445 case RUMPUSER_CLOCK_RELWALL: 446 clk = CLOCK_REALTIME; 447 break; 448 case RUMPUSER_CLOCK_ABSMONO: 449 #ifdef HAVE_CLOCK_NANOSLEEP 450 clk = CLOCK_MONOTONIC; 451 #else 452 clk = CLOCK_REALTIME; 453 #endif 454 break; 455 default: 456 abort(); 457 } 458 459 if (clock_gettime(clk, &ts) == -1) { 460 rv = errno; 461 } else { 462 *sec = ts.tv_sec; 463 *nsec = ts.tv_nsec; 464 rv = 0; 465 } 466 467 ET(rv); 468 } 469 470 int 471 rumpuser_clock_sleep(int enum_rumpclock, int64_t sec, long nsec) 472 { 473 enum rumpclock rclk = enum_rumpclock; 474 struct timespec rqt, rmt; 475 int nlocks; 476 int rv; 477 478 rumpkern_unsched(&nlocks, NULL); 479 480 /*LINTED*/ 481 rqt.tv_sec = sec; 482 /*LINTED*/ 483 rqt.tv_nsec = nsec; 484 485 switch (rclk) { 486 case RUMPUSER_CLOCK_RELWALL: 487 do { 488 rv = nanosleep(&rqt, &rmt); 489 rqt = rmt; 490 } while (rv == -1 && errno == EINTR); 491 if (rv == -1) { 492 rv = errno; 493 } 494 break; 495 case RUMPUSER_CLOCK_ABSMONO: 496 do { 497 #ifdef HAVE_CLOCK_NANOSLEEP 498 rv = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, 499 &rqt, NULL); 500 #else 501 /* le/la/der/die/das sigh. timevalspec tailspin */ 502 struct timespec ts, tsr; 503 clock_gettime(CLOCK_REALTIME, &ts); 504 if (ts.tv_sec == rqt.tv_sec ? 505 ts.tv_nsec > rqt.tv_nsec : ts.tv_sec > rqt.tv_sec) { 506 rv = 0; 507 } else { 508 tsr.tv_sec = rqt.tv_sec - ts.tv_sec; 509 tsr.tv_nsec = rqt.tv_nsec - ts.tv_nsec; 510 if (tsr.tv_nsec < 0) { 511 tsr.tv_sec--; 512 tsr.tv_nsec += 1000*1000*1000; 513 } 514 rv = nanosleep(&tsr, NULL); 515 } 516 #endif 517 } while (rv == -1 && errno == EINTR); 518 if (rv == -1) { 519 rv = errno; 520 } 521 break; 522 default: 523 abort(); 524 } 525 526 rumpkern_sched(nlocks, NULL); 527 528 ET(rv); 529 } 530 531 static int 532 gethostncpu(void) 533 { 534 int ncpu = 1; 535 536 #if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) 537 size_t sz = sizeof(ncpu); 538 539 sysctlbyname("hw.ncpu", &ncpu, &sz, NULL, 0); 540 #elif defined(__linux__) || defined(__CYGWIN__) 541 FILE *fp; 542 char *line = NULL; 543 size_t n = 0; 544 545 /* If anyone knows a better way, I'm all ears */ 546 if ((fp = fopen("/proc/cpuinfo", "r")) != NULL) { 547 ncpu = 0; 548 while (getline(&line, &n, fp) != -1) { 549 if (strncmp(line, 550 "processor", sizeof("processor")-1) == 0) 551 ncpu++; 552 } 553 if (ncpu == 0) 554 ncpu = 1; 555 free(line); 556 fclose(fp); 557 } 558 #elif __sun__ 559 /* XXX: this is just a rough estimate ... */ 560 ncpu = sysconf(_SC_NPROCESSORS_ONLN); 561 #endif 562 563 return ncpu; 564 } 565 566 int 567 rumpuser_getparam(const char *name, void *buf, size_t blen) 568 { 569 int rv; 570 571 if (strcmp(name, RUMPUSER_PARAM_NCPU) == 0) { 572 int ncpu; 573 574 if (getenv_r("RUMP_NCPU", buf, blen) == -1) { 575 ncpu = gethostncpu(); 576 snprintf(buf, blen, "%d", ncpu); 577 } 578 rv = 0; 579 } else if (strcmp(name, RUMPUSER_PARAM_HOSTNAME) == 0) { 580 char tmp[MAXHOSTNAMELEN]; 581 582 if (gethostname(tmp, sizeof(tmp)) == -1) { 583 snprintf(buf, blen, "rump-%05d", (int)getpid()); 584 } else { 585 snprintf(buf, blen, "rump-%05d.%s", 586 (int)getpid(), tmp); 587 } 588 rv = 0; 589 } else if (*name == '_') { 590 rv = EINVAL; 591 } else { 592 if (getenv_r(name, buf, blen) == -1) 593 rv = errno; 594 else 595 rv = 0; 596 } 597 598 ET(rv); 599 } 600 601 void 602 rumpuser_putchar(int c) 603 { 604 605 putchar(c); 606 } 607 608 void 609 rumpuser_exit(int rv) 610 { 611 612 if (rv == RUMPUSER_PANIC) 613 abort(); 614 else 615 exit(rv); 616 } 617 618 void 619 rumpuser_seterrno(int error) 620 { 621 622 errno = error; 623 } 624 625 /* 626 * This is meant for safe debugging prints from the kernel. 627 */ 628 void 629 rumpuser_dprintf(const char *format, ...) 630 { 631 va_list ap; 632 633 va_start(ap, format); 634 vfprintf(stderr, format, ap); 635 va_end(ap); 636 } 637 638 int 639 rumpuser_kill(int64_t pid, int sig) 640 { 641 int rv; 642 643 #ifdef __NetBSD__ 644 int error; 645 646 if (pid == RUMPUSER_PID_SELF) { 647 error = raise(sig); 648 } else { 649 error = kill((pid_t)pid, sig); 650 } 651 if (error == -1) 652 rv = errno; 653 else 654 rv = 0; 655 #else 656 /* XXXfixme: signal numbers may not match on non-NetBSD */ 657 rv = EOPNOTSUPP; 658 #endif 659 660 ET(rv); 661 } 662 663 int 664 rumpuser_getrandom(void *buf, size_t buflen, int flags, size_t *retp) 665 { 666 size_t origlen = buflen; 667 uint32_t *p = buf; 668 uint32_t tmp; 669 int chunk; 670 671 do { 672 chunk = buflen < 4 ? buflen : 4; /* portable MIN ... */ 673 tmp = RUMPUSER_RANDOM(); 674 memcpy(p, &tmp, chunk); 675 p++; 676 buflen -= chunk; 677 } while (chunk); 678 679 *retp = origlen; 680 ET(0); 681 } 682