1 /* $NetBSD: rumpuser.c,v 1.15 2011/02/06 21:05:53 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 #if !defined(lint) 30 __RCSID("$NetBSD: rumpuser.c,v 1.15 2011/02/06 21:05:53 pooka Exp $"); 31 #endif /* !lint */ 32 33 /* thank the maker for this */ 34 #ifdef __linux__ 35 #define _XOPEN_SOURCE 500 36 #define _BSD_SOURCE 37 #define _FILE_OFFSET_BITS 64 38 #include <features.h> 39 #endif 40 41 #include <sys/param.h> 42 #include <sys/event.h> 43 #include <sys/ioctl.h> 44 #include <sys/mman.h> 45 #include <sys/uio.h> 46 47 #ifdef __NetBSD__ 48 #include <sys/disk.h> 49 #include <sys/disklabel.h> 50 #include <sys/dkio.h> 51 #include <sys/sysctl.h> 52 #endif 53 54 #include <assert.h> 55 #include <err.h> 56 #include <errno.h> 57 #include <fcntl.h> 58 #include <poll.h> 59 #include <signal.h> 60 #include <stdarg.h> 61 #include <stdint.h> 62 #include <stdio.h> 63 #include <stdlib.h> 64 #include <string.h> 65 #include <time.h> 66 #include <unistd.h> 67 68 #include <rump/rumpuser.h> 69 70 #include "rumpuser_int.h" 71 72 int 73 rumpuser_getversion() 74 { 75 76 return RUMPUSER_VERSION; 77 } 78 79 int 80 rumpuser_getfileinfo(const char *path, uint64_t *sizep, int *ftp, int *error) 81 { 82 struct stat sb; 83 uint64_t size; 84 int needsdev = 0, rv = 0, ft; 85 int fd = -1; 86 87 if (stat(path, &sb) == -1) { 88 seterror(errno); 89 return -1; 90 } 91 92 switch (sb.st_mode & S_IFMT) { 93 case S_IFDIR: 94 ft = RUMPUSER_FT_DIR; 95 break; 96 case S_IFREG: 97 ft = RUMPUSER_FT_REG; 98 break; 99 case S_IFBLK: 100 ft = RUMPUSER_FT_BLK; 101 needsdev = 1; 102 break; 103 case S_IFCHR: 104 ft = RUMPUSER_FT_CHR; 105 needsdev = 1; 106 break; 107 default: 108 ft = RUMPUSER_FT_OTHER; 109 break; 110 } 111 112 if (!needsdev) { 113 size = sb.st_size; 114 } else if (sizep) { 115 /* 116 * Welcome to the jungle. Of course querying the kernel 117 * for a device partition size is supposed to be far from 118 * trivial. On NetBSD we use ioctl. On $other platform 119 * we have a problem. We try "the lseek trick" and just 120 * fail if that fails. Platform specific code can later 121 * be written here if appropriate. 122 * 123 * On NetBSD we hope and pray that for block devices nobody 124 * else is holding them open, because otherwise the kernel 125 * will not permit us to open it. Thankfully, this is 126 * usually called only in bootstrap and then we can 127 * forget about it. 128 */ 129 #ifndef __NetBSD__ 130 off_t off; 131 132 fd = open(path, O_RDONLY); 133 if (fd == -1) { 134 seterror(errno); 135 rv = -1; 136 goto out; 137 } 138 139 off = lseek(fd, 0, SEEK_END); 140 if (off != 0) { 141 size = off; 142 goto out; 143 } 144 fprintf(stderr, "error: device size query not implemented on " 145 "this platform\n"); 146 seterror(EOPNOTSUPP); 147 rv = -1; 148 goto out; 149 #else 150 struct disklabel lab; 151 struct partition *parta; 152 struct dkwedge_info dkw; 153 154 fd = open(path, O_RDONLY); 155 if (fd == -1) { 156 seterror(errno); 157 rv = -1; 158 goto out; 159 } 160 161 if (ioctl(fd, DIOCGDINFO, &lab) == 0) { 162 parta = &lab.d_partitions[DISKPART(sb.st_rdev)]; 163 size = (uint64_t)lab.d_secsize * parta->p_size; 164 goto out; 165 } 166 167 if (ioctl(fd, DIOCGWEDGEINFO, &dkw) == 0) { 168 /* 169 * XXX: should use DIOCGDISKINFO to query 170 * sector size, but that requires proplib, 171 * so just don't bother for now. it's nice 172 * that something as difficult as figuring out 173 * a partition's size has been made so easy. 174 */ 175 size = dkw.dkw_size << DEV_BSHIFT; 176 goto out; 177 } 178 179 seterror(errno); 180 rv = -1; 181 #endif /* __NetBSD__ */ 182 } 183 184 out: 185 if (rv == 0 && sizep) 186 *sizep = size; 187 if (rv == 0 && ftp) 188 *ftp = ft; 189 if (fd != -1) 190 close(fd); 191 192 return rv; 193 } 194 195 int 196 rumpuser_nanosleep(uint64_t *sec, uint64_t *nsec, int *error) 197 { 198 struct timespec rqt, rmt; 199 int rv; 200 201 /*LINTED*/ 202 rqt.tv_sec = *sec; 203 /*LINTED*/ 204 rqt.tv_nsec = *nsec; 205 206 KLOCK_WRAP(rv = nanosleep(&rqt, &rmt)); 207 if (rv == -1) 208 seterror(errno); 209 210 *sec = rmt.tv_sec; 211 *nsec = rmt.tv_nsec; 212 213 return rv; 214 } 215 216 void * 217 rumpuser_malloc(size_t howmuch, int alignment) 218 { 219 void *mem; 220 int rv; 221 222 if (alignment == 0) 223 alignment = sizeof(void *); 224 225 rv = posix_memalign(&mem, (size_t)alignment, howmuch); 226 if (__predict_false(rv != 0)) { 227 if (rv == EINVAL) { 228 printf("rumpuser_malloc: invalid alignment %d\n", 229 alignment); 230 abort(); 231 } 232 mem = NULL; 233 } 234 235 return mem; 236 } 237 238 void * 239 rumpuser_realloc(void *ptr, size_t howmuch) 240 { 241 242 return realloc(ptr, howmuch); 243 } 244 245 void 246 rumpuser_free(void *ptr) 247 { 248 249 free(ptr); 250 } 251 252 void * 253 rumpuser_anonmmap(void *prefaddr, size_t size, int alignbit, 254 int exec, int *error) 255 { 256 void *rv; 257 int prot; 258 259 prot = PROT_READ|PROT_WRITE; 260 if (exec) 261 prot |= PROT_EXEC; 262 /* XXX: MAP_ALIGNED() is not portable */ 263 rv = mmap(prefaddr, size, prot, 264 MAP_ANON | MAP_ALIGNED(alignbit), -1, 0); 265 if (rv == MAP_FAILED) { 266 seterror(errno); 267 return NULL; 268 } 269 return rv; 270 } 271 272 void 273 rumpuser_unmap(void *addr, size_t len) 274 { 275 int rv; 276 277 rv = munmap(addr, len); 278 assert(rv == 0); 279 } 280 281 void * 282 rumpuser_filemmap(int fd, off_t offset, size_t len, int flags, int *error) 283 { 284 void *rv; 285 int mmflags, prot; 286 287 if (flags & RUMPUSER_FILEMMAP_TRUNCATE) 288 ftruncate(fd, offset + len); 289 290 mmflags = MAP_FILE; 291 if (flags & RUMPUSER_FILEMMAP_SHARED) 292 mmflags |= MAP_SHARED; 293 else 294 mmflags |= MAP_PRIVATE; 295 296 prot = 0; 297 if (flags & RUMPUSER_FILEMMAP_READ) 298 prot |= PROT_READ; 299 if (flags & RUMPUSER_FILEMMAP_WRITE) 300 prot |= PROT_WRITE; 301 302 rv = mmap(NULL, len, PROT_READ|PROT_WRITE, mmflags, fd, offset); 303 if (rv == MAP_FAILED) { 304 seterror(errno); 305 return NULL; 306 } 307 308 seterror(0); 309 return rv; 310 } 311 312 int 313 rumpuser_memsync(void *addr, size_t len, int *error) 314 { 315 316 DOCALL_KLOCK(int, (msync(addr, len, MS_SYNC))); 317 } 318 319 int 320 rumpuser_open(const char *path, int flags, int *error) 321 { 322 323 DOCALL(int, (open(path, flags, 0644))); 324 } 325 326 int 327 rumpuser_ioctl(int fd, u_long cmd, void *data, int *error) 328 { 329 330 DOCALL_KLOCK(int, (ioctl(fd, cmd, data))); 331 } 332 333 int 334 rumpuser_close(int fd, int *error) 335 { 336 337 DOCALL(int, close(fd)); 338 } 339 340 int 341 rumpuser_fsync(int fd, int *error) 342 { 343 344 DOCALL_KLOCK(int, fsync(fd)); 345 } 346 347 ssize_t 348 rumpuser_read(int fd, void *data, size_t size, int *error) 349 { 350 ssize_t rv; 351 352 KLOCK_WRAP(rv = read(fd, data, size)); 353 if (rv == -1) 354 seterror(errno); 355 356 return rv; 357 } 358 359 ssize_t 360 rumpuser_pread(int fd, void *data, size_t size, off_t offset, int *error) 361 { 362 ssize_t rv; 363 364 KLOCK_WRAP(rv = pread(fd, data, size, offset)); 365 if (rv == -1) 366 seterror(errno); 367 368 return rv; 369 } 370 371 void 372 rumpuser_read_bio(int fd, void *data, size_t size, off_t offset, 373 rump_biodone_fn biodone, void *biodonecookie) 374 { 375 ssize_t rv; 376 int error = 0; 377 378 rv = rumpuser_pread(fd, data, size, offset, &error); 379 /* check against <0 instead of ==-1 to get typing below right */ 380 if (rv < 0) 381 rv = 0; 382 383 /* LINTED: see above */ 384 biodone(biodonecookie, rv, error); 385 } 386 387 ssize_t 388 rumpuser_write(int fd, const void *data, size_t size, int *error) 389 { 390 ssize_t rv; 391 392 KLOCK_WRAP(rv = write(fd, data, size)); 393 if (rv == -1) 394 seterror(errno); 395 396 return rv; 397 } 398 399 ssize_t 400 rumpuser_pwrite(int fd, const void *data, size_t size, off_t offset, int *error) 401 { 402 ssize_t rv; 403 404 KLOCK_WRAP(rv = pwrite(fd, data, size, offset)); 405 if (rv == -1) 406 seterror(errno); 407 408 return rv; 409 } 410 411 void 412 rumpuser_write_bio(int fd, const void *data, size_t size, off_t offset, 413 rump_biodone_fn biodone, void *biodonecookie) 414 { 415 ssize_t rv; 416 int error = 0; 417 418 rv = rumpuser_pwrite(fd, data, size, offset, &error); 419 /* check against <0 instead of ==-1 to get typing below right */ 420 if (rv < 0) 421 rv = 0; 422 423 /* LINTED: see above */ 424 biodone(biodonecookie, rv, error); 425 } 426 427 ssize_t 428 rumpuser_readv(int fd, const struct rumpuser_iovec *riov, int iovcnt, 429 int *error) 430 { 431 struct iovec *iovp; 432 ssize_t rv; 433 int i; 434 435 iovp = malloc(iovcnt * sizeof(struct iovec)); 436 if (iovp == NULL) { 437 seterror(ENOMEM); 438 return -1; 439 } 440 for (i = 0; i < iovcnt; i++) { 441 iovp[i].iov_base = riov[i].iov_base; 442 /*LINTED*/ 443 iovp[i].iov_len = riov[i].iov_len; 444 } 445 446 KLOCK_WRAP(rv = readv(fd, iovp, iovcnt)); 447 if (rv == -1) 448 seterror(errno); 449 free(iovp); 450 451 return rv; 452 } 453 454 ssize_t 455 rumpuser_writev(int fd, const struct rumpuser_iovec *riov, int iovcnt, 456 int *error) 457 { 458 struct iovec *iovp; 459 ssize_t rv; 460 int i; 461 462 iovp = malloc(iovcnt * sizeof(struct iovec)); 463 if (iovp == NULL) { 464 seterror(ENOMEM); 465 return -1; 466 } 467 for (i = 0; i < iovcnt; i++) { 468 iovp[i].iov_base = riov[i].iov_base; 469 /*LINTED*/ 470 iovp[i].iov_len = riov[i].iov_len; 471 } 472 473 KLOCK_WRAP(rv = writev(fd, iovp, iovcnt)); 474 if (rv == -1) 475 seterror(errno); 476 free(iovp); 477 478 return rv; 479 } 480 481 int 482 rumpuser_gettime(uint64_t *sec, uint64_t *nsec, int *error) 483 { 484 struct timeval tv; 485 int rv; 486 487 rv = gettimeofday(&tv, NULL); 488 if (rv == -1) { 489 seterror(errno); 490 return rv; 491 } 492 493 *sec = tv.tv_sec; 494 *nsec = tv.tv_usec * 1000; 495 496 return 0; 497 } 498 499 int 500 rumpuser_getenv(const char *name, char *buf, size_t blen, int *error) 501 { 502 503 DOCALL(int, getenv_r(name, buf, blen)); 504 } 505 506 int 507 rumpuser_gethostname(char *name, size_t namelen, int *error) 508 { 509 char tmp[MAXHOSTNAMELEN]; 510 511 if (gethostname(tmp, sizeof(tmp)) == -1) { 512 snprintf(name, namelen, "rump-%05d.rumpdomain", getpid()); 513 } else { 514 snprintf(name, namelen, "rump-%05d.%s.rumpdomain", 515 getpid(), tmp); 516 } 517 518 *error = 0; 519 return 0; 520 } 521 522 int 523 rumpuser_poll(struct pollfd *fds, int nfds, int timeout, int *error) 524 { 525 526 DOCALL_KLOCK(int, (poll(fds, (nfds_t)nfds, timeout))); 527 } 528 529 int 530 rumpuser_putchar(int c, int *error) 531 { 532 533 DOCALL(int, (putchar(c))); 534 } 535 536 void 537 rumpuser_exit(int rv) 538 { 539 540 if (rv == RUMPUSER_PANIC) 541 abort(); 542 else 543 exit(rv); 544 } 545 546 void 547 rumpuser_seterrno(int error) 548 { 549 550 errno = error; 551 } 552 553 int 554 rumpuser_writewatchfile_setup(int kq, int fd, intptr_t opaque, int *error) 555 { 556 struct kevent kev; 557 558 if (kq == -1) { 559 kq = kqueue(); 560 if (kq == -1) { 561 seterror(errno); 562 return -1; 563 } 564 } 565 566 EV_SET(&kev, fd, EVFILT_VNODE, EV_ADD|EV_ENABLE|EV_CLEAR, 567 NOTE_WRITE, 0, opaque); 568 if (kevent(kq, &kev, 1, NULL, 0, NULL) == -1) { 569 seterror(errno); 570 return -1; 571 } 572 573 return kq; 574 } 575 576 int 577 rumpuser_writewatchfile_wait(int kq, intptr_t *opaque, int *error) 578 { 579 struct kevent kev; 580 int rv; 581 582 again: 583 KLOCK_WRAP(rv = kevent(kq, NULL, 0, &kev, 1, NULL)); 584 if (rv == -1) { 585 if (errno == EINTR) 586 goto again; 587 seterror(errno); 588 return -1; 589 } 590 591 if (opaque) 592 *opaque = kev.udata; 593 return rv; 594 } 595 596 /* 597 * This is meant for safe debugging prints from the kernel. 598 */ 599 int 600 rumpuser_dprintf(const char *format, ...) 601 { 602 va_list ap; 603 int rv; 604 605 va_start(ap, format); 606 rv = vfprintf(stderr, format, ap); 607 va_end(ap); 608 609 return rv; 610 } 611 612 int 613 rumpuser_kill(int64_t pid, int sig, int *error) 614 { 615 616 #ifdef __NetBSD__ 617 if (pid == RUMPUSER_PID_SELF) { 618 DOCALL(int, raise(sig)); 619 } else { 620 DOCALL(int, kill((pid_t)pid, sig)); 621 } 622 #else 623 /* XXXfixme: signal numbers may not match on non-NetBSD */ 624 seterror(EOPNOTSUPP); 625 return -1; 626 #endif 627 } 628 629 int 630 rumpuser_getnhostcpu(void) 631 { 632 int ncpu; 633 size_t sz = sizeof(ncpu); 634 635 #ifdef __NetBSD__ 636 if (sysctlbyname("hw.ncpu", &ncpu, &sz, NULL, 0) == -1) 637 return 1; 638 return ncpu; 639 #else 640 return 1; 641 #endif 642 } 643