1 /* $NetBSD: rumpuser.c,v 1.9 2010/08/11 10:25:59 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 #if !defined(lint) 30 __RCSID("$NetBSD: rumpuser.c,v 1.9 2010/08/11 10:25:59 pooka Exp $"); 31 #endif /* !lint */ 32 33 /* thank the maker for this */ 34 #ifdef __linux__ 35 #define _XOPEN_SOURCE 500 36 #define _BSD_SOURCE 37 #define _FILE_OFFSET_BITS 64 38 #include <features.h> 39 #endif 40 41 #include <sys/param.h> 42 #include <sys/event.h> 43 #include <sys/ioctl.h> 44 #include <sys/mman.h> 45 #include <sys/uio.h> 46 47 #ifdef __NetBSD__ 48 #include <sys/disklabel.h> 49 #include <sys/sysctl.h> 50 #endif 51 52 #include <assert.h> 53 #include <err.h> 54 #include <errno.h> 55 #include <fcntl.h> 56 #include <poll.h> 57 #include <signal.h> 58 #include <stdarg.h> 59 #include <stdint.h> 60 #include <stdio.h> 61 #include <stdlib.h> 62 #include <string.h> 63 #include <time.h> 64 #include <unistd.h> 65 66 #include <rump/rumpuser.h> 67 68 #include "rumpuser_int.h" 69 70 int 71 rumpuser_getversion() 72 { 73 74 return RUMPUSER_VERSION; 75 } 76 77 int 78 rumpuser_getfileinfo(const char *path, uint64_t *sizep, int *ftp, int *error) 79 { 80 struct stat sb; 81 uint64_t size; 82 int needsdev = 0, rv = 0, ft; 83 84 if (stat(path, &sb) == -1) { 85 *error = errno; 86 return -1; 87 } 88 89 switch (sb.st_mode & S_IFMT) { 90 case S_IFDIR: 91 ft = RUMPUSER_FT_DIR; 92 break; 93 case S_IFREG: 94 ft = RUMPUSER_FT_REG; 95 break; 96 case S_IFBLK: 97 ft = RUMPUSER_FT_BLK; 98 needsdev = 1; 99 break; 100 case S_IFCHR: 101 ft = RUMPUSER_FT_CHR; 102 needsdev = 1; 103 break; 104 default: 105 ft = RUMPUSER_FT_OTHER; 106 break; 107 } 108 109 if (!needsdev) { 110 size = sb.st_size; 111 } else if (sizep) { 112 /* 113 * Welcome to the jungle. Of course querying the kernel 114 * for a device partition size is supposed to be far from 115 * trivial. On NetBSD we use ioctl. On $other platform 116 * we have a problem. We try "the lseek trick" and just 117 * fail if that fails. Platform specific code can later 118 * be written here if appropriate. 119 * 120 * On NetBSD we hope and pray that for block devices nobody 121 * else is holding them open, because otherwise the kernel 122 * will not permit us to open it. Thankfully, this is 123 * usually called only in bootstrap and then we can 124 * forget about it. 125 */ 126 #ifndef __NetBSD__ 127 off_t off; 128 int fd; 129 130 fd = open(path, O_RDONLY); 131 if (fd == -1) { 132 *error = errno; 133 rv = -1; 134 goto out; 135 } 136 137 off = lseek(fd, 0, SEEK_END); 138 close(fd); 139 if (off != 0) { 140 size = off; 141 goto out; 142 } 143 fprintf(stderr, "error: device size query not implemented on " 144 "this platform\n"); 145 *error = EOPNOTSUPP; 146 rv = -1; 147 goto out; 148 #else 149 struct disklabel lab; 150 struct partition *parta; 151 int fd; 152 153 fd = open(path, O_RDONLY); 154 if (fd == -1) { 155 *error = errno; 156 rv = -1; 157 goto out; 158 } 159 160 if (ioctl(fd, DIOCGDINFO, &lab) == -1) { 161 *error = errno; 162 rv = -1; 163 goto out; 164 } 165 close(fd); 166 167 parta = &lab.d_partitions[DISKPART(sb.st_rdev)]; 168 size = (uint64_t)lab.d_secsize * parta->p_size; 169 #endif /* __NetBSD__ */ 170 } 171 172 out: 173 if (rv == 0 && sizep) 174 *sizep = size; 175 if (rv == 0 && ftp) 176 *ftp = ft; 177 178 return rv; 179 } 180 181 int 182 rumpuser_nanosleep(uint64_t *sec, uint64_t *nsec, int *error) 183 { 184 struct timespec rqt, rmt; 185 int rv; 186 187 /*LINTED*/ 188 rqt.tv_sec = *sec; 189 /*LINTED*/ 190 rqt.tv_nsec = *nsec; 191 192 KLOCK_WRAP(rv = nanosleep(&rqt, &rmt)); 193 if (rv == -1) 194 *error = errno; 195 196 *sec = rmt.tv_sec; 197 *nsec = rmt.tv_nsec; 198 199 return rv; 200 } 201 202 void * 203 rumpuser_malloc(size_t howmuch, int alignment) 204 { 205 void *mem; 206 int rv; 207 208 if (alignment == 0) 209 alignment = sizeof(void *); 210 211 rv = posix_memalign(&mem, alignment, howmuch); 212 if (__predict_false(rv != 0)) { 213 if (rv == EINVAL) { 214 printf("rumpuser_malloc: invalid alignment %d\n", 215 alignment); 216 abort(); 217 } 218 mem = NULL; 219 } 220 221 return mem; 222 } 223 224 void * 225 rumpuser_realloc(void *ptr, size_t howmuch) 226 { 227 228 return realloc(ptr, howmuch); 229 } 230 231 void 232 rumpuser_free(void *ptr) 233 { 234 235 free(ptr); 236 } 237 238 void * 239 rumpuser_anonmmap(void *prefaddr, size_t size, int alignbit, 240 int exec, int *error) 241 { 242 void *rv; 243 int prot; 244 245 prot = PROT_READ|PROT_WRITE; 246 if (exec) 247 prot |= PROT_EXEC; 248 /* XXX: MAP_ALIGNED() is not portable */ 249 rv = mmap(prefaddr, size, prot, 250 MAP_ANON | MAP_ALIGNED(alignbit), -1, 0); 251 if (rv == MAP_FAILED) { 252 *error = errno; 253 return NULL; 254 } 255 return rv; 256 } 257 258 void 259 rumpuser_unmap(void *addr, size_t len) 260 { 261 int rv; 262 263 rv = munmap(addr, len); 264 assert(rv == 0); 265 } 266 267 void * 268 rumpuser_filemmap(int fd, off_t offset, size_t len, int flags, int *error) 269 { 270 void *rv; 271 int mmflags, prot; 272 273 if (flags & RUMPUSER_FILEMMAP_TRUNCATE) 274 ftruncate(fd, offset + len); 275 276 mmflags = MAP_FILE; 277 if (flags & RUMPUSER_FILEMMAP_SHARED) 278 mmflags |= MAP_SHARED; 279 else 280 mmflags |= MAP_PRIVATE; 281 282 prot = 0; 283 if (flags & RUMPUSER_FILEMMAP_READ) 284 prot |= PROT_READ; 285 if (flags & RUMPUSER_FILEMMAP_WRITE) 286 prot |= PROT_WRITE; 287 288 rv = mmap(NULL, len, PROT_READ|PROT_WRITE, mmflags, fd, offset); 289 if (rv == MAP_FAILED) { 290 *error = errno; 291 return NULL; 292 } 293 294 *error = 0; 295 return rv; 296 } 297 298 int 299 rumpuser_memsync(void *addr, size_t len, int *error) 300 { 301 302 DOCALL_KLOCK(int, (msync(addr, len, MS_SYNC))); 303 } 304 305 int 306 rumpuser_open(const char *path, int flags, int *error) 307 { 308 309 DOCALL(int, (open(path, flags, 0644))); 310 } 311 312 int 313 rumpuser_ioctl(int fd, u_long cmd, void *data, int *error) 314 { 315 316 DOCALL_KLOCK(int, (ioctl(fd, cmd, data))); 317 } 318 319 int 320 rumpuser_close(int fd, int *error) 321 { 322 323 DOCALL(int, close(fd)); 324 } 325 326 int 327 rumpuser_fsync(int fd, int *error) 328 { 329 330 DOCALL_KLOCK(int, fsync(fd)); 331 } 332 333 ssize_t 334 rumpuser_read(int fd, void *data, size_t size, int *error) 335 { 336 ssize_t rv; 337 338 KLOCK_WRAP(rv = read(fd, data, size)); 339 if (rv == -1) 340 *error = errno; 341 342 return rv; 343 } 344 345 ssize_t 346 rumpuser_pread(int fd, void *data, size_t size, off_t offset, int *error) 347 { 348 ssize_t rv; 349 350 KLOCK_WRAP(rv = pread(fd, data, size, offset)); 351 if (rv == -1) 352 *error = errno; 353 354 return rv; 355 } 356 357 void 358 rumpuser_read_bio(int fd, void *data, size_t size, off_t offset, 359 rump_biodone_fn biodone, void *biodonecookie) 360 { 361 ssize_t rv; 362 int error = 0; 363 364 rv = rumpuser_pread(fd, data, size, offset, &error); 365 /* check against <0 instead of ==-1 to get typing below right */ 366 if (rv < 0) 367 rv = 0; 368 369 /* LINTED: see above */ 370 biodone(biodonecookie, rv, error); 371 } 372 373 ssize_t 374 rumpuser_write(int fd, const void *data, size_t size, int *error) 375 { 376 ssize_t rv; 377 378 KLOCK_WRAP(rv = write(fd, data, size)); 379 if (rv == -1) 380 *error = errno; 381 382 return rv; 383 } 384 385 ssize_t 386 rumpuser_pwrite(int fd, const void *data, size_t size, off_t offset, int *error) 387 { 388 ssize_t rv; 389 390 KLOCK_WRAP(rv = pwrite(fd, data, size, offset)); 391 if (rv == -1) 392 *error = errno; 393 394 return rv; 395 } 396 397 void 398 rumpuser_write_bio(int fd, const void *data, size_t size, off_t offset, 399 rump_biodone_fn biodone, void *biodonecookie) 400 { 401 ssize_t rv; 402 int error = 0; 403 404 rv = rumpuser_pwrite(fd, data, size, offset, &error); 405 /* check against <0 instead of ==-1 to get typing below right */ 406 if (rv < 0) 407 rv = 0; 408 409 /* LINTED: see above */ 410 biodone(biodonecookie, rv, error); 411 } 412 413 ssize_t 414 rumpuser_readv(int fd, const struct rumpuser_iovec *riov, int iovcnt, 415 int *error) 416 { 417 struct iovec *iovp; 418 ssize_t rv; 419 int i; 420 421 iovp = malloc(iovcnt * sizeof(struct iovec)); 422 if (iovp == NULL) { 423 *error = ENOMEM; 424 return -1; 425 } 426 for (i = 0; i < iovcnt; i++) { 427 iovp[i].iov_base = riov[i].iov_base; 428 /*LINTED*/ 429 iovp[i].iov_len = riov[i].iov_len; 430 } 431 432 KLOCK_WRAP(rv = readv(fd, iovp, iovcnt)); 433 if (rv == -1) 434 *error = errno; 435 free(iovp); 436 437 return rv; 438 } 439 440 ssize_t 441 rumpuser_writev(int fd, const struct rumpuser_iovec *riov, int iovcnt, 442 int *error) 443 { 444 struct iovec *iovp; 445 ssize_t rv; 446 int i; 447 448 iovp = malloc(iovcnt * sizeof(struct iovec)); 449 if (iovp == NULL) { 450 *error = ENOMEM; 451 return -1; 452 } 453 for (i = 0; i < iovcnt; i++) { 454 iovp[i].iov_base = riov[i].iov_base; 455 /*LINTED*/ 456 iovp[i].iov_len = riov[i].iov_len; 457 } 458 459 KLOCK_WRAP(rv = writev(fd, iovp, iovcnt)); 460 if (rv == -1) 461 *error = errno; 462 free(iovp); 463 464 return rv; 465 } 466 467 int 468 rumpuser_gettime(uint64_t *sec, uint64_t *nsec, int *error) 469 { 470 struct timeval tv; 471 int rv; 472 473 rv = gettimeofday(&tv, NULL); 474 if (rv == -1) { 475 *error = errno; 476 return rv; 477 } 478 479 *sec = tv.tv_sec; 480 *nsec = tv.tv_usec * 1000; 481 482 return 0; 483 } 484 485 int 486 rumpuser_getenv(const char *name, char *buf, size_t blen, int *error) 487 { 488 489 DOCALL(int, getenv_r(name, buf, blen)); 490 } 491 492 int 493 rumpuser_gethostname(char *name, size_t namelen, int *error) 494 { 495 496 DOCALL(int, (gethostname(name, namelen))); 497 } 498 499 int 500 rumpuser_poll(struct pollfd *fds, int nfds, int timeout, int *error) 501 { 502 503 DOCALL_KLOCK(int, (poll(fds, (nfds_t)nfds, timeout))); 504 } 505 506 int 507 rumpuser_putchar(int c, int *error) 508 { 509 510 DOCALL(int, (putchar(c))); 511 } 512 513 void 514 rumpuser_exit(int rv) 515 { 516 517 if (rv == RUMPUSER_PANIC) 518 abort(); 519 else 520 exit(rv); 521 } 522 523 void 524 rumpuser_seterrno(int error) 525 { 526 527 errno = error; 528 } 529 530 int 531 rumpuser_writewatchfile_setup(int kq, int fd, intptr_t opaque, int *error) 532 { 533 struct kevent kev; 534 535 if (kq == -1) { 536 kq = kqueue(); 537 if (kq == -1) { 538 *error = errno; 539 return -1; 540 } 541 } 542 543 EV_SET(&kev, fd, EVFILT_VNODE, EV_ADD|EV_ENABLE|EV_CLEAR, 544 NOTE_WRITE, 0, opaque); 545 if (kevent(kq, &kev, 1, NULL, 0, NULL) == -1) { 546 *error = errno; 547 return -1; 548 } 549 550 return kq; 551 } 552 553 int 554 rumpuser_writewatchfile_wait(int kq, intptr_t *opaque, int *error) 555 { 556 struct kevent kev; 557 int rv; 558 559 again: 560 KLOCK_WRAP(rv = kevent(kq, NULL, 0, &kev, 1, NULL)); 561 if (rv == -1) { 562 if (errno == EINTR) 563 goto again; 564 *error = errno; 565 return -1; 566 } 567 568 if (opaque) 569 *opaque = kev.udata; 570 return rv; 571 } 572 573 /* 574 * This is meant for safe debugging prints from the kernel. 575 */ 576 int 577 rumpuser_dprintf(const char *format, ...) 578 { 579 va_list ap; 580 int rv; 581 582 va_start(ap, format); 583 rv = vfprintf(stderr, format, ap); 584 va_end(ap); 585 586 return rv; 587 } 588 589 int 590 rumpuser_kill(int64_t pid, int sig, int *error) 591 { 592 593 #ifdef __NetBSD__ 594 if (pid == RUMPUSER_PID_SELF) { 595 DOCALL(int, raise(sig)); 596 } else { 597 DOCALL(int, kill((pid_t)pid, sig)); 598 } 599 #else 600 /* XXXfixme: signal numbers may not match on non-NetBSD */ 601 *error = EOPNOTSUPP; 602 return -1; 603 #endif 604 } 605 606 int 607 rumpuser_getnhostcpu(void) 608 { 609 int ncpu; 610 size_t sz = sizeof(ncpu); 611 612 #ifdef __NetBSD__ 613 if (sysctlbyname("hw.ncpu", &ncpu, &sz, NULL, 0) == -1) 614 return 1; 615 return ncpu; 616 #else 617 return 1; 618 #endif 619 } 620