1 /* $NetBSD: hijack.c,v 1.90 2011/04/21 13:38:14 joerg Exp $ */ 2 3 /*- 4 * Copyright (c) 2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* Disable namespace mangling, Fortification is useless here anyway. */ 29 #undef _FORTIFY_SOURCE 30 31 #include <sys/cdefs.h> 32 __RCSID("$NetBSD: hijack.c,v 1.90 2011/04/21 13:38:14 joerg Exp $"); 33 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/event.h> 37 #include <sys/ioctl.h> 38 #include <sys/mman.h> 39 #include <sys/mount.h> 40 #include <sys/poll.h> 41 #include <sys/socket.h> 42 #include <sys/statvfs.h> 43 44 #include <rump/rumpclient.h> 45 #include <rump/rump_syscalls.h> 46 47 #include <assert.h> 48 #include <dlfcn.h> 49 #include <err.h> 50 #include <errno.h> 51 #include <fcntl.h> 52 #include <poll.h> 53 #include <pthread.h> 54 #include <signal.h> 55 #include <stdarg.h> 56 #include <stdbool.h> 57 #include <stdio.h> 58 #include <stdlib.h> 59 #include <string.h> 60 #include <time.h> 61 #include <unistd.h> 62 63 #include "hijack.h" 64 65 enum dualcall { 66 DUALCALL_WRITE, DUALCALL_WRITEV, DUALCALL_PWRITE, DUALCALL_PWRITEV, 67 DUALCALL_IOCTL, DUALCALL_FCNTL, 68 DUALCALL_SOCKET, DUALCALL_ACCEPT, DUALCALL_BIND, DUALCALL_CONNECT, 69 DUALCALL_GETPEERNAME, DUALCALL_GETSOCKNAME, DUALCALL_LISTEN, 70 DUALCALL_RECVFROM, DUALCALL_RECVMSG, 71 DUALCALL_SENDTO, DUALCALL_SENDMSG, 72 DUALCALL_GETSOCKOPT, DUALCALL_SETSOCKOPT, 73 DUALCALL_SHUTDOWN, 74 DUALCALL_READ, DUALCALL_READV, DUALCALL_PREAD, DUALCALL_PREADV, 75 DUALCALL_DUP2, 76 DUALCALL_CLOSE, 77 DUALCALL_POLLTS, 78 DUALCALL_KEVENT, 79 DUALCALL_STAT, DUALCALL_LSTAT, DUALCALL_FSTAT, 80 DUALCALL_CHMOD, DUALCALL_LCHMOD, DUALCALL_FCHMOD, 81 DUALCALL_CHOWN, DUALCALL_LCHOWN, DUALCALL_FCHOWN, 82 DUALCALL_OPEN, 83 DUALCALL_STATVFS1, DUALCALL_FSTATVFS1, 84 DUALCALL_CHDIR, DUALCALL_FCHDIR, 85 DUALCALL_LSEEK, 86 DUALCALL_GETDENTS, 87 DUALCALL_UNLINK, DUALCALL_SYMLINK, DUALCALL_READLINK, 88 DUALCALL_RENAME, 89 DUALCALL_MKDIR, DUALCALL_RMDIR, 90 DUALCALL_UTIMES, DUALCALL_LUTIMES, DUALCALL_FUTIMES, 91 DUALCALL_TRUNCATE, DUALCALL_FTRUNCATE, 92 DUALCALL_FSYNC, DUALCALL_FSYNC_RANGE, 93 DUALCALL_MOUNT, DUALCALL_UNMOUNT, 94 DUALCALL___GETCWD, 95 DUALCALL_CHFLAGS, DUALCALL_LCHFLAGS, DUALCALL_FCHFLAGS, 96 DUALCALL_ACCESS, 97 DUALCALL_MKNOD, 98 DUALCALL___SYSCTL, 99 DUALCALL_GETVFSSTAT, DUALCALL_NFSSVC, 100 DUALCALL_GETFH, DUALCALL_FHOPEN, DUALCALL_FHSTAT, DUALCALL_FHSTATVFS1, 101 #if __NetBSD_Prereq__(5,99,48) 102 DUALCALL_QUOTACTL, 103 #endif 104 DUALCALL__NUM 105 }; 106 107 #define RSYS_STRING(a) __STRING(a) 108 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a)) 109 110 /* 111 * Would be nice to get this automatically in sync with libc. 112 * Also, this does not work for compat-using binaries! 113 */ 114 #if !__NetBSD_Prereq__(5,99,7) 115 #define REALSELECT select 116 #define REALPOLLTS pollts 117 #define REALKEVENT kevent 118 #define REALSTAT __stat30 119 #define REALLSTAT __lstat30 120 #define REALFSTAT __fstat30 121 #define REALUTIMES utimes 122 #define REALLUTIMES lutimes 123 #define REALFUTIMES futimes 124 #define REALMKNOD mknod 125 #define REALFHSTAT __fhstat40 126 #else 127 #define REALSELECT _sys___select50 128 #define REALPOLLTS _sys___pollts50 129 #define REALKEVENT _sys___kevent50 130 #define REALSTAT __stat50 131 #define REALLSTAT __lstat50 132 #define REALFSTAT __fstat50 133 #define REALUTIMES __utimes50 134 #define REALLUTIMES __lutimes50 135 #define REALFUTIMES __futimes50 136 #define REALMKNOD __mknod50 137 #define REALFHSTAT __fhstat50 138 #endif 139 #define REALREAD _sys_read 140 #define REALPREAD _sys_pread 141 #define REALPWRITE _sys_pwrite 142 #define REALGETDENTS __getdents30 143 #define REALMOUNT __mount50 144 #define REALGETFH __getfh30 145 #define REALFHOPEN __fhopen40 146 #define REALFHSTATVFS1 __fhstatvfs140 147 #define REALQUOTACTL __quotactl50 148 149 int REALSELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *); 150 int REALPOLLTS(struct pollfd *, nfds_t, 151 const struct timespec *, const sigset_t *); 152 int REALKEVENT(int, const struct kevent *, size_t, struct kevent *, size_t, 153 const struct timespec *); 154 ssize_t REALREAD(int, void *, size_t); 155 ssize_t REALPREAD(int, void *, size_t, off_t); 156 ssize_t REALPWRITE(int, const void *, size_t, off_t); 157 int REALSTAT(const char *, struct stat *); 158 int REALLSTAT(const char *, struct stat *); 159 int REALFSTAT(int, struct stat *); 160 int REALGETDENTS(int, char *, size_t); 161 int REALUTIMES(const char *, const struct timeval [2]); 162 int REALLUTIMES(const char *, const struct timeval [2]); 163 int REALFUTIMES(int, const struct timeval [2]); 164 int REALMOUNT(const char *, const char *, int, void *, size_t); 165 int __getcwd(char *, size_t); 166 int REALMKNOD(const char *, mode_t, dev_t); 167 int REALGETFH(const char *, void *, size_t *); 168 int REALFHOPEN(const void *, size_t, int); 169 int REALFHSTAT(const void *, size_t, struct stat *); 170 int REALFHSTATVFS1(const void *, size_t, struct statvfs *, int); 171 int REALQUOTACTL(const char *, struct plistref *); 172 173 #define S(a) __STRING(a) 174 struct sysnames { 175 enum dualcall scm_callnum; 176 const char *scm_hostname; 177 const char *scm_rumpname; 178 } syscnames[] = { 179 { DUALCALL_SOCKET, "__socket30", RSYS_NAME(SOCKET) }, 180 { DUALCALL_ACCEPT, "accept", RSYS_NAME(ACCEPT) }, 181 { DUALCALL_BIND, "bind", RSYS_NAME(BIND) }, 182 { DUALCALL_CONNECT, "connect", RSYS_NAME(CONNECT) }, 183 { DUALCALL_GETPEERNAME, "getpeername", RSYS_NAME(GETPEERNAME) }, 184 { DUALCALL_GETSOCKNAME, "getsockname", RSYS_NAME(GETSOCKNAME) }, 185 { DUALCALL_LISTEN, "listen", RSYS_NAME(LISTEN) }, 186 { DUALCALL_RECVFROM, "recvfrom", RSYS_NAME(RECVFROM) }, 187 { DUALCALL_RECVMSG, "recvmsg", RSYS_NAME(RECVMSG) }, 188 { DUALCALL_SENDTO, "sendto", RSYS_NAME(SENDTO) }, 189 { DUALCALL_SENDMSG, "sendmsg", RSYS_NAME(SENDMSG) }, 190 { DUALCALL_GETSOCKOPT, "getsockopt", RSYS_NAME(GETSOCKOPT) }, 191 { DUALCALL_SETSOCKOPT, "setsockopt", RSYS_NAME(SETSOCKOPT) }, 192 { DUALCALL_SHUTDOWN, "shutdown", RSYS_NAME(SHUTDOWN) }, 193 { DUALCALL_READ, S(REALREAD), RSYS_NAME(READ) }, 194 { DUALCALL_READV, "readv", RSYS_NAME(READV) }, 195 { DUALCALL_PREAD, S(REALPREAD), RSYS_NAME(PREAD) }, 196 { DUALCALL_PREADV, "preadv", RSYS_NAME(PREADV) }, 197 { DUALCALL_WRITE, "write", RSYS_NAME(WRITE) }, 198 { DUALCALL_WRITEV, "writev", RSYS_NAME(WRITEV) }, 199 { DUALCALL_PWRITE, S(REALPWRITE), RSYS_NAME(PWRITE) }, 200 { DUALCALL_PWRITEV, "pwritev", RSYS_NAME(PWRITEV) }, 201 { DUALCALL_IOCTL, "ioctl", RSYS_NAME(IOCTL) }, 202 { DUALCALL_FCNTL, "fcntl", RSYS_NAME(FCNTL) }, 203 { DUALCALL_DUP2, "dup2", RSYS_NAME(DUP2) }, 204 { DUALCALL_CLOSE, "close", RSYS_NAME(CLOSE) }, 205 { DUALCALL_POLLTS, S(REALPOLLTS), RSYS_NAME(POLLTS) }, 206 { DUALCALL_KEVENT, S(REALKEVENT), RSYS_NAME(KEVENT) }, 207 { DUALCALL_STAT, S(REALSTAT), RSYS_NAME(STAT) }, 208 { DUALCALL_LSTAT, S(REALLSTAT), RSYS_NAME(LSTAT) }, 209 { DUALCALL_FSTAT, S(REALFSTAT), RSYS_NAME(FSTAT) }, 210 { DUALCALL_CHOWN, "chown", RSYS_NAME(CHOWN) }, 211 { DUALCALL_LCHOWN, "lchown", RSYS_NAME(LCHOWN) }, 212 { DUALCALL_FCHOWN, "fchown", RSYS_NAME(FCHOWN) }, 213 { DUALCALL_CHMOD, "chmod", RSYS_NAME(CHMOD) }, 214 { DUALCALL_LCHMOD, "lchmod", RSYS_NAME(LCHMOD) }, 215 { DUALCALL_FCHMOD, "fchmod", RSYS_NAME(FCHMOD) }, 216 { DUALCALL_UTIMES, S(REALUTIMES), RSYS_NAME(UTIMES) }, 217 { DUALCALL_LUTIMES, S(REALLUTIMES), RSYS_NAME(LUTIMES) }, 218 { DUALCALL_FUTIMES, S(REALFUTIMES), RSYS_NAME(FUTIMES) }, 219 { DUALCALL_OPEN, "open", RSYS_NAME(OPEN) }, 220 { DUALCALL_STATVFS1, "statvfs1", RSYS_NAME(STATVFS1) }, 221 { DUALCALL_FSTATVFS1, "fstatvfs1", RSYS_NAME(FSTATVFS1) }, 222 { DUALCALL_CHDIR, "chdir", RSYS_NAME(CHDIR) }, 223 { DUALCALL_FCHDIR, "fchdir", RSYS_NAME(FCHDIR) }, 224 { DUALCALL_LSEEK, "lseek", RSYS_NAME(LSEEK) }, 225 { DUALCALL_GETDENTS, "__getdents30", RSYS_NAME(GETDENTS) }, 226 { DUALCALL_UNLINK, "unlink", RSYS_NAME(UNLINK) }, 227 { DUALCALL_SYMLINK, "symlink", RSYS_NAME(SYMLINK) }, 228 { DUALCALL_READLINK, "readlink", RSYS_NAME(READLINK) }, 229 { DUALCALL_RENAME, "rename", RSYS_NAME(RENAME) }, 230 { DUALCALL_MKDIR, "mkdir", RSYS_NAME(MKDIR) }, 231 { DUALCALL_RMDIR, "rmdir", RSYS_NAME(RMDIR) }, 232 { DUALCALL_TRUNCATE, "truncate", RSYS_NAME(TRUNCATE) }, 233 { DUALCALL_FTRUNCATE, "ftruncate", RSYS_NAME(FTRUNCATE) }, 234 { DUALCALL_FSYNC, "fsync", RSYS_NAME(FSYNC) }, 235 { DUALCALL_FSYNC_RANGE, "fsync_range", RSYS_NAME(FSYNC_RANGE) }, 236 { DUALCALL_MOUNT, S(REALMOUNT), RSYS_NAME(MOUNT) }, 237 { DUALCALL_UNMOUNT, "unmount", RSYS_NAME(UNMOUNT) }, 238 { DUALCALL___GETCWD, "__getcwd", RSYS_NAME(__GETCWD) }, 239 { DUALCALL_CHFLAGS, "chflags", RSYS_NAME(CHFLAGS) }, 240 { DUALCALL_LCHFLAGS, "lchflags", RSYS_NAME(LCHFLAGS) }, 241 { DUALCALL_FCHFLAGS, "fchflags", RSYS_NAME(FCHFLAGS) }, 242 { DUALCALL_ACCESS, "access", RSYS_NAME(ACCESS) }, 243 { DUALCALL_MKNOD, S(REALMKNOD), RSYS_NAME(MKNOD) }, 244 { DUALCALL___SYSCTL, "__sysctl", RSYS_NAME(__SYSCTL) }, 245 { DUALCALL_GETVFSSTAT, "getvfsstat", RSYS_NAME(GETVFSSTAT) }, 246 { DUALCALL_NFSSVC, "nfssvc", RSYS_NAME(NFSSVC) }, 247 { DUALCALL_GETFH, S(REALGETFH), RSYS_NAME(GETFH) }, 248 { DUALCALL_FHOPEN, S(REALFHOPEN),RSYS_NAME(FHOPEN) }, 249 { DUALCALL_FHSTAT, S(REALFHSTAT),RSYS_NAME(FHSTAT) }, 250 { DUALCALL_FHSTATVFS1, S(REALFHSTATVFS1),RSYS_NAME(FHSTATVFS1) }, 251 #if __NetBSD_Prereq__(5,99,48) 252 { DUALCALL_QUOTACTL, S(REALQUOTACTL),RSYS_NAME(QUOTACTL) }, 253 #endif 254 }; 255 #undef S 256 257 struct bothsys { 258 void *bs_host; 259 void *bs_rump; 260 } syscalls[DUALCALL__NUM]; 261 #define GETSYSCALL(which, name) syscalls[DUALCALL_##name].bs_##which 262 263 static pid_t (*host_fork)(void); 264 static int (*host_daemon)(int, int); 265 static void * (*host_mmap)(void *, size_t, int, int, int, off_t); 266 267 /* 268 * This tracks if our process is in a subdirectory of /rump. 269 * It's preserved over exec. 270 */ 271 static bool pwdinrump; 272 273 enum pathtype { PATH_HOST, PATH_RUMP, PATH_RUMPBLANKET }; 274 275 static bool fd_isrump(int); 276 static enum pathtype path_isrump(const char *); 277 278 /* default FD_SETSIZE is 256 ==> default fdoff is 128 */ 279 static int hijack_fdoff = FD_SETSIZE/2; 280 281 /* 282 * Maintain a mapping table for the usual dup2 suspects. 283 * Could use atomic ops to operate on dup2vec, but an application 284 * racing there is not well-defined, so don't bother. 285 */ 286 /* note: you cannot change this without editing the env-passing code */ 287 #define DUP2HIGH 2 288 static uint32_t dup2vec[DUP2HIGH+1]; 289 #define DUP2BIT (1<<31) 290 #define DUP2ALIAS (1<<30) 291 #define DUP2FDMASK ((1<<30)-1) 292 293 static bool 294 isdup2d(int fd) 295 { 296 297 return fd <= DUP2HIGH && fd >= 0 && dup2vec[fd] & DUP2BIT; 298 } 299 300 static int 301 mapdup2(int hostfd) 302 { 303 304 _DIAGASSERT(isdup2d(hostfd)); 305 return dup2vec[hostfd] & DUP2FDMASK; 306 } 307 308 static int 309 unmapdup2(int rumpfd) 310 { 311 int i; 312 313 for (i = 0; i <= DUP2HIGH; i++) { 314 if (dup2vec[i] & DUP2BIT && 315 (dup2vec[i] & DUP2FDMASK) == (unsigned)rumpfd) 316 return i; 317 } 318 return -1; 319 } 320 321 static void 322 setdup2(int hostfd, int rumpfd) 323 { 324 325 if (hostfd > DUP2HIGH) { 326 _DIAGASSERT(0); 327 return; 328 } 329 330 dup2vec[hostfd] = DUP2BIT | DUP2ALIAS | rumpfd; 331 } 332 333 static void 334 clrdup2(int hostfd) 335 { 336 337 if (hostfd > DUP2HIGH) { 338 _DIAGASSERT(0); 339 return; 340 } 341 342 dup2vec[hostfd] = 0; 343 } 344 345 static bool 346 killdup2alias(int rumpfd) 347 { 348 int hostfd; 349 350 if ((hostfd = unmapdup2(rumpfd)) == -1) 351 return false; 352 353 if (dup2vec[hostfd] & DUP2ALIAS) { 354 dup2vec[hostfd] &= ~DUP2ALIAS; 355 return true; 356 } 357 return false; 358 } 359 360 //#define DEBUGJACK 361 #ifdef DEBUGJACK 362 #define DPRINTF(x) mydprintf x 363 static void 364 mydprintf(const char *fmt, ...) 365 { 366 va_list ap; 367 368 if (isdup2d(STDERR_FILENO)) 369 return; 370 371 va_start(ap, fmt); 372 vfprintf(stderr, fmt, ap); 373 va_end(ap); 374 } 375 376 static const char * 377 whichfd(int fd) 378 { 379 380 if (fd == -1) 381 return "-1"; 382 else if (fd_isrump(fd)) 383 return "rump"; 384 else 385 return "host"; 386 } 387 388 static const char * 389 whichpath(const char *path) 390 { 391 392 if (path_isrump(path)) 393 return "rump"; 394 else 395 return "host"; 396 } 397 398 #else 399 #define DPRINTF(x) 400 #endif 401 402 #define FDCALL(type, name, rcname, args, proto, vars) \ 403 type name args \ 404 { \ 405 type (*fun) proto; \ 406 \ 407 DPRINTF(("%s -> %d (%s)\n", __STRING(name), fd, whichfd(fd))); \ 408 if (fd_isrump(fd)) { \ 409 fun = syscalls[rcname].bs_rump; \ 410 fd = fd_host2rump(fd); \ 411 } else { \ 412 fun = syscalls[rcname].bs_host; \ 413 } \ 414 \ 415 return fun vars; \ 416 } 417 418 #define PATHCALL(type, name, rcname, args, proto, vars) \ 419 type name args \ 420 { \ 421 type (*fun) proto; \ 422 enum pathtype pt; \ 423 \ 424 DPRINTF(("%s -> %s (%s)\n", __STRING(name), path, \ 425 whichpath(path))); \ 426 if ((pt = path_isrump(path)) != PATH_HOST) { \ 427 fun = syscalls[rcname].bs_rump; \ 428 if (pt == PATH_RUMP) \ 429 path = path_host2rump(path); \ 430 } else { \ 431 fun = syscalls[rcname].bs_host; \ 432 } \ 433 \ 434 return fun vars; \ 435 } 436 437 #define VFSCALL(bit, type, name, rcname, args, proto, vars) \ 438 type name args \ 439 { \ 440 type (*fun) proto; \ 441 \ 442 DPRINTF(("%s (0x%x, 0x%x)\n", __STRING(name), bit, vfsbits)); \ 443 if (vfsbits & bit) { \ 444 fun = syscalls[rcname].bs_rump; \ 445 } else { \ 446 fun = syscalls[rcname].bs_host; \ 447 } \ 448 \ 449 return fun vars; \ 450 } 451 452 /* 453 * These variables are set from the RUMPHIJACK string and control 454 * which operations can product rump kernel file descriptors. 455 * This should be easily extendable for future needs. 456 */ 457 #define RUMPHIJACK_DEFAULT "path=/rump,socket=all:nolocal" 458 static bool rumpsockets[PF_MAX]; 459 static const char *rumpprefix; 460 static size_t rumpprefixlen; 461 462 static struct { 463 int pf; 464 const char *name; 465 } socketmap[] = { 466 { PF_LOCAL, "local" }, 467 { PF_INET, "inet" }, 468 { PF_LINK, "link" }, 469 #ifdef PF_OROUTE 470 { PF_OROUTE, "oroute" }, 471 #endif 472 { PF_ROUTE, "route" }, 473 { PF_INET6, "inet6" }, 474 #ifdef PF_MPLS 475 { PF_MPLS, "mpls" }, 476 #endif 477 { -1, NULL } 478 }; 479 480 static void 481 sockparser(char *buf) 482 { 483 char *p, *l; 484 bool value; 485 int i; 486 487 /* if "all" is present, it must be specified first */ 488 if (strncmp(buf, "all", strlen("all")) == 0) { 489 for (i = 0; i < (int)__arraycount(rumpsockets); i++) { 490 rumpsockets[i] = true; 491 } 492 buf += strlen("all"); 493 if (*buf == ':') 494 buf++; 495 } 496 497 for (p = strtok_r(buf, ":", &l); p; p = strtok_r(NULL, ":", &l)) { 498 value = true; 499 if (strncmp(p, "no", strlen("no")) == 0) { 500 value = false; 501 p += strlen("no"); 502 } 503 504 for (i = 0; socketmap[i].name; i++) { 505 if (strcmp(p, socketmap[i].name) == 0) { 506 rumpsockets[socketmap[i].pf] = value; 507 break; 508 } 509 } 510 if (socketmap[i].name == NULL) { 511 errx(1, "invalid socket specifier %s", p); 512 } 513 } 514 } 515 516 static void 517 pathparser(char *buf) 518 { 519 520 /* sanity-check */ 521 if (*buf != '/') 522 errx(1, "hijack path specifier must begin with ``/''"); 523 rumpprefixlen = strlen(buf); 524 if (rumpprefixlen < 2) 525 errx(1, "invalid hijack prefix: %s", buf); 526 if (buf[rumpprefixlen-1] == '/' && strspn(buf, "/") != rumpprefixlen) 527 errx(1, "hijack prefix may end in slash only if pure " 528 "slash, gave %s", buf); 529 530 if ((rumpprefix = strdup(buf)) == NULL) 531 err(1, "strdup"); 532 rumpprefixlen = strlen(rumpprefix); 533 } 534 535 static struct blanket { 536 const char *pfx; 537 size_t len; 538 } *blanket; 539 static int nblanket; 540 541 static void 542 blanketparser(char *buf) 543 { 544 char *p, *l; 545 int i; 546 547 for (nblanket = 0, p = buf; p; p = strchr(p+1, ':'), nblanket++) 548 continue; 549 550 blanket = malloc(nblanket * sizeof(*blanket)); 551 if (blanket == NULL) 552 err(1, "alloc blanket %d", nblanket); 553 554 for (p = strtok_r(buf, ":", &l), i = 0; p; 555 p = strtok_r(NULL, ":", &l), i++) { 556 blanket[i].pfx = strdup(p); 557 if (blanket[i].pfx == NULL) 558 err(1, "strdup blanket"); 559 blanket[i].len = strlen(p); 560 561 if (blanket[i].len == 0 || *blanket[i].pfx != '/') 562 errx(1, "invalid blanket specifier %s", p); 563 if (*(blanket[i].pfx + blanket[i].len-1) == '/') 564 errx(1, "invalid blanket specifier %s", p); 565 } 566 } 567 568 #define VFSBIT_NFSSVC 0x01 569 #define VFSBIT_GETVFSSTAT 0x02 570 #define VFSBIT_FHCALLS 0x04 571 static unsigned vfsbits; 572 573 static struct { 574 int bit; 575 const char *name; 576 } vfscalls[] = { 577 { VFSBIT_NFSSVC, "nfssvc" }, 578 { VFSBIT_GETVFSSTAT, "getvfsstat" }, 579 { VFSBIT_FHCALLS, "fhcalls" }, 580 { -1, NULL } 581 }; 582 583 static void 584 vfsparser(char *buf) 585 { 586 char *p, *l; 587 bool turnon; 588 unsigned int fullmask; 589 int i; 590 591 /* build the full mask and sanity-check while we're at it */ 592 fullmask = 0; 593 for (i = 0; vfscalls[i].name != NULL; i++) { 594 if (fullmask & vfscalls[i].bit) 595 errx(1, "problem exists between vi and chair"); 596 fullmask |= vfscalls[i].bit; 597 } 598 599 600 /* if "all" is present, it must be specified first */ 601 if (strncmp(buf, "all", strlen("all")) == 0) { 602 vfsbits = fullmask; 603 buf += strlen("all"); 604 if (*buf == ':') 605 buf++; 606 } 607 608 for (p = strtok_r(buf, ":", &l); p; p = strtok_r(NULL, ":", &l)) { 609 turnon = true; 610 if (strncmp(p, "no", strlen("no")) == 0) { 611 turnon = false; 612 p += strlen("no"); 613 } 614 615 for (i = 0; vfscalls[i].name; i++) { 616 if (strcmp(p, vfscalls[i].name) == 0) { 617 if (turnon) 618 vfsbits |= vfscalls[i].bit; 619 else 620 vfsbits &= ~vfscalls[i].bit; 621 break; 622 } 623 } 624 if (vfscalls[i].name == NULL) { 625 errx(1, "invalid vfscall specifier %s", p); 626 } 627 } 628 } 629 630 static bool rumpsysctl = false; 631 632 static void 633 sysctlparser(char *buf) 634 { 635 636 if (buf == NULL) { 637 rumpsysctl = true; 638 return; 639 } 640 641 if (strcasecmp(buf, "y") == 0 || strcasecmp(buf, "yes") == 0 || 642 strcasecmp(buf, "yep") == 0 || strcasecmp(buf, "tottakai") == 0) { 643 rumpsysctl = true; 644 return; 645 } 646 if (strcasecmp(buf, "n") == 0 || strcasecmp(buf, "no") == 0) { 647 rumpsysctl = false; 648 return; 649 } 650 651 errx(1, "sysctl value should be y(es)/n(o), gave: %s", buf); 652 } 653 654 static void 655 fdoffparser(char *buf) 656 { 657 unsigned long fdoff; 658 char *ep; 659 660 if (*buf == '-') { 661 errx(1, "fdoff must not be negative"); 662 } 663 fdoff = strtoul(buf, &ep, 10); 664 if (*ep != '\0') 665 errx(1, "invalid fdoff specifier \"%s\"", buf); 666 if (fdoff >= INT_MAX/2 || fdoff < 3) 667 errx(1, "fdoff out of range"); 668 hijack_fdoff = fdoff; 669 } 670 671 static struct { 672 void (*parsefn)(char *); 673 const char *name; 674 bool needvalues; 675 } hijackparse[] = { 676 { sockparser, "socket", true }, 677 { pathparser, "path", true }, 678 { blanketparser, "blanket", true }, 679 { vfsparser, "vfs", true }, 680 { sysctlparser, "sysctl", false }, 681 { fdoffparser, "fdoff", true }, 682 { NULL, NULL, false }, 683 }; 684 685 static void 686 parsehijack(char *hijack) 687 { 688 char *p, *p2, *l; 689 const char *hijackcopy; 690 bool nop2; 691 int i; 692 693 if ((hijackcopy = strdup(hijack)) == NULL) 694 err(1, "strdup"); 695 696 /* disable everything explicitly */ 697 for (i = 0; i < PF_MAX; i++) 698 rumpsockets[i] = false; 699 700 for (p = strtok_r(hijack, ",", &l); p; p = strtok_r(NULL, ",", &l)) { 701 nop2 = false; 702 p2 = strchr(p, '='); 703 if (!p2) { 704 nop2 = true; 705 p2 = p + strlen(p); 706 } 707 708 for (i = 0; hijackparse[i].parsefn; i++) { 709 if (strncmp(hijackparse[i].name, p, 710 (size_t)(p2-p)) == 0) { 711 if (nop2 && hijackparse[i].needvalues) 712 errx(1, "invalid hijack specifier: %s", 713 hijackcopy); 714 hijackparse[i].parsefn(nop2 ? NULL : p2+1); 715 break; 716 } 717 } 718 719 if (hijackparse[i].parsefn == NULL) 720 errx(1, "invalid hijack specifier name in %s", p); 721 } 722 723 } 724 725 static void __attribute__((constructor)) 726 rcinit(void) 727 { 728 char buf[1024]; 729 unsigned i, j; 730 731 host_fork = dlsym(RTLD_NEXT, "fork"); 732 host_daemon = dlsym(RTLD_NEXT, "daemon"); 733 host_mmap = dlsym(RTLD_NEXT, "mmap"); 734 735 /* 736 * In theory cannot print anything during lookups because 737 * we might not have the call vector set up. so, the errx() 738 * is a bit of a strech, but it might work. 739 */ 740 741 for (i = 0; i < DUALCALL__NUM; i++) { 742 /* build runtime O(1) access */ 743 for (j = 0; j < __arraycount(syscnames); j++) { 744 if (syscnames[j].scm_callnum == i) 745 break; 746 } 747 748 if (j == __arraycount(syscnames)) 749 errx(1, "rumphijack error: syscall pos %d missing", i); 750 751 syscalls[i].bs_host = dlsym(RTLD_NEXT, 752 syscnames[j].scm_hostname); 753 if (syscalls[i].bs_host == NULL) 754 errx(1, "hostcall %s not found!", 755 syscnames[j].scm_hostname); 756 757 syscalls[i].bs_rump = dlsym(RTLD_NEXT, 758 syscnames[j].scm_rumpname); 759 if (syscalls[i].bs_rump == NULL) 760 errx(1, "rumpcall %s not found!", 761 syscnames[j].scm_rumpname); 762 } 763 764 if (rumpclient_init() == -1) 765 err(1, "rumpclient init"); 766 767 /* check which syscalls we're supposed to hijack */ 768 if (getenv_r("RUMPHIJACK", buf, sizeof(buf)) == -1) { 769 strcpy(buf, RUMPHIJACK_DEFAULT); 770 } 771 parsehijack(buf); 772 773 /* set client persistence level */ 774 if (getenv_r("RUMPHIJACK_RETRYCONNECT", buf, sizeof(buf)) != -1) { 775 if (strcmp(buf, "die") == 0) 776 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_DIE); 777 else if (strcmp(buf, "inftime") == 0) 778 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_INFTIME); 779 else if (strcmp(buf, "once") == 0) 780 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_ONCE); 781 else { 782 time_t timeout; 783 char *ep; 784 785 timeout = (time_t)strtoll(buf, &ep, 10); 786 if (timeout <= 0 || ep != buf + strlen(buf)) 787 errx(1, "RUMPHIJACK_RETRYCONNECT must be " 788 "keyword or integer, got: %s", buf); 789 790 rumpclient_setconnretry(timeout); 791 } 792 } 793 794 if (getenv_r("RUMPHIJACK__DUP2INFO", buf, sizeof(buf)) == 0) { 795 if (sscanf(buf, "%u,%u,%u", 796 &dup2vec[0], &dup2vec[1], &dup2vec[2]) != 3) { 797 warnx("invalid dup2mask: %s", buf); 798 memset(dup2vec, 0, sizeof(dup2vec)); 799 } 800 unsetenv("RUMPHIJACK__DUP2INFO"); 801 } 802 if (getenv_r("RUMPHIJACK__PWDINRUMP", buf, sizeof(buf)) == 0) { 803 pwdinrump = true; 804 unsetenv("RUMPHIJACK__PWDINRUMP"); 805 } 806 } 807 808 static int 809 fd_rump2host(int fd) 810 { 811 812 if (fd == -1) 813 return fd; 814 return fd + hijack_fdoff; 815 } 816 817 static int 818 fd_rump2host_withdup(int fd) 819 { 820 int hfd; 821 822 _DIAGASSERT(fd != -1); 823 hfd = unmapdup2(fd); 824 if (hfd != -1) { 825 _DIAGASSERT(hfd <= DUP2HIGH); 826 return hfd; 827 } 828 return fd_rump2host(fd); 829 } 830 831 static int 832 fd_host2rump(int fd) 833 { 834 835 if (!isdup2d(fd)) 836 return fd - hijack_fdoff; 837 else 838 return mapdup2(fd); 839 } 840 841 static bool 842 fd_isrump(int fd) 843 { 844 845 return isdup2d(fd) || fd >= hijack_fdoff; 846 } 847 848 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= hijack_fdoff) 849 850 static enum pathtype 851 path_isrump(const char *path) 852 { 853 size_t plen; 854 int i; 855 856 if (rumpprefix == NULL && nblanket == 0) 857 return PATH_HOST; 858 859 if (*path == '/') { 860 plen = strlen(path); 861 if (rumpprefix && plen >= rumpprefixlen) { 862 if (strncmp(path, rumpprefix, rumpprefixlen) == 0 863 && (plen == rumpprefixlen 864 || *(path + rumpprefixlen) == '/')) { 865 return PATH_RUMP; 866 } 867 } 868 for (i = 0; i < nblanket; i++) { 869 if (strncmp(path, blanket[i].pfx, blanket[i].len) == 0) 870 return PATH_RUMPBLANKET; 871 } 872 873 return PATH_HOST; 874 } else { 875 return pwdinrump ? PATH_RUMP : PATH_HOST; 876 } 877 } 878 879 static const char *rootpath = "/"; 880 static const char * 881 path_host2rump(const char *path) 882 { 883 const char *rv; 884 885 if (*path == '/') { 886 rv = path + rumpprefixlen; 887 if (*rv == '\0') 888 rv = rootpath; 889 } else { 890 rv = path; 891 } 892 893 return rv; 894 } 895 896 static int 897 dodup(int oldd, int minfd) 898 { 899 int (*op_fcntl)(int, int, ...); 900 int newd; 901 int isrump; 902 903 DPRINTF(("dup -> %d (minfd %d)\n", oldd, minfd)); 904 if (fd_isrump(oldd)) { 905 op_fcntl = GETSYSCALL(rump, FCNTL); 906 oldd = fd_host2rump(oldd); 907 if (minfd >= hijack_fdoff) 908 minfd -= hijack_fdoff; 909 isrump = 1; 910 } else { 911 op_fcntl = GETSYSCALL(host, FCNTL); 912 isrump = 0; 913 } 914 915 newd = op_fcntl(oldd, F_DUPFD, minfd); 916 917 if (isrump) 918 newd = fd_rump2host(newd); 919 DPRINTF(("dup <- %d\n", newd)); 920 921 return newd; 922 } 923 924 /* 925 * Check that host fd value does not exceed fdoffset and if necessary 926 * dup the file descriptor so that it doesn't collide with the dup2mask. 927 */ 928 static int 929 fd_host2host(int fd) 930 { 931 int (*op_fcntl)(int, int, ...) = GETSYSCALL(host, FCNTL); 932 int (*op_close)(int) = GETSYSCALL(host, CLOSE); 933 int ofd, i; 934 935 if (fd >= hijack_fdoff) { 936 op_close(fd); 937 errno = ENFILE; 938 return -1; 939 } 940 941 for (i = 1; isdup2d(fd); i++) { 942 ofd = fd; 943 fd = op_fcntl(ofd, F_DUPFD, i); 944 op_close(ofd); 945 } 946 947 return fd; 948 } 949 950 int 951 open(const char *path, int flags, ...) 952 { 953 int (*op_open)(const char *, int, ...); 954 bool isrump; 955 va_list ap; 956 enum pathtype pt; 957 int fd; 958 959 DPRINTF(("open -> %s (%s)\n", path, whichpath(path))); 960 961 if ((pt = path_isrump(path)) != PATH_HOST) { 962 if (pt == PATH_RUMP) 963 path = path_host2rump(path); 964 op_open = GETSYSCALL(rump, OPEN); 965 isrump = true; 966 } else { 967 op_open = GETSYSCALL(host, OPEN); 968 isrump = false; 969 } 970 971 va_start(ap, flags); 972 fd = op_open(path, flags, va_arg(ap, mode_t)); 973 va_end(ap); 974 975 if (isrump) 976 fd = fd_rump2host(fd); 977 else 978 fd = fd_host2host(fd); 979 980 DPRINTF(("open <- %d (%s)\n", fd, whichfd(fd))); 981 return fd; 982 } 983 984 int 985 chdir(const char *path) 986 { 987 int (*op_chdir)(const char *); 988 enum pathtype pt; 989 int rv; 990 991 if ((pt = path_isrump(path)) != PATH_HOST) { 992 op_chdir = GETSYSCALL(rump, CHDIR); 993 if (pt == PATH_RUMP) 994 path = path_host2rump(path); 995 } else { 996 op_chdir = GETSYSCALL(host, CHDIR); 997 } 998 999 rv = op_chdir(path); 1000 if (rv == 0) 1001 pwdinrump = pt != PATH_HOST; 1002 1003 return rv; 1004 } 1005 1006 int 1007 fchdir(int fd) 1008 { 1009 int (*op_fchdir)(int); 1010 bool isrump; 1011 int rv; 1012 1013 if (fd_isrump(fd)) { 1014 op_fchdir = GETSYSCALL(rump, FCHDIR); 1015 isrump = true; 1016 fd = fd_host2rump(fd); 1017 } else { 1018 op_fchdir = GETSYSCALL(host, FCHDIR); 1019 isrump = false; 1020 } 1021 1022 rv = op_fchdir(fd); 1023 if (rv == 0) { 1024 pwdinrump = isrump; 1025 } 1026 1027 return rv; 1028 } 1029 1030 int 1031 __getcwd(char *bufp, size_t len) 1032 { 1033 int (*op___getcwd)(char *, size_t); 1034 size_t prefixgap; 1035 bool iamslash; 1036 int rv; 1037 1038 if (pwdinrump && rumpprefix) { 1039 if (rumpprefix[rumpprefixlen-1] == '/') 1040 iamslash = true; 1041 else 1042 iamslash = false; 1043 1044 if (iamslash) 1045 prefixgap = rumpprefixlen - 1; /* ``//+path'' */ 1046 else 1047 prefixgap = rumpprefixlen; /* ``/pfx+/path'' */ 1048 if (len <= prefixgap) { 1049 errno = ERANGE; 1050 return -1; 1051 } 1052 1053 op___getcwd = GETSYSCALL(rump, __GETCWD); 1054 rv = op___getcwd(bufp + prefixgap, len - prefixgap); 1055 if (rv == -1) 1056 return rv; 1057 1058 /* augment the "/" part only for a non-root path */ 1059 memcpy(bufp, rumpprefix, rumpprefixlen); 1060 1061 /* append / only to non-root cwd */ 1062 if (rv != 2) 1063 bufp[prefixgap] = '/'; 1064 1065 /* don't append extra slash in the purely-slash case */ 1066 if (rv == 2 && !iamslash) 1067 bufp[rumpprefixlen] = '\0'; 1068 } else if (pwdinrump) { 1069 /* assume blanket. we can't provide a prefix here */ 1070 op___getcwd = GETSYSCALL(rump, __GETCWD); 1071 rv = op___getcwd(bufp, len); 1072 } else { 1073 op___getcwd = GETSYSCALL(host, __GETCWD); 1074 rv = op___getcwd(bufp, len); 1075 } 1076 1077 return rv; 1078 } 1079 1080 int 1081 rename(const char *from, const char *to) 1082 { 1083 int (*op_rename)(const char *, const char *); 1084 enum pathtype ptf, ptt; 1085 1086 if ((ptf = path_isrump(from)) != PATH_HOST) { 1087 if ((ptt = path_isrump(to)) == PATH_HOST) { 1088 errno = EXDEV; 1089 return -1; 1090 } 1091 1092 if (ptf == PATH_RUMP) 1093 from = path_host2rump(from); 1094 if (ptt == PATH_RUMP) 1095 to = path_host2rump(to); 1096 op_rename = GETSYSCALL(rump, RENAME); 1097 } else { 1098 if (path_isrump(to) != PATH_HOST) { 1099 errno = EXDEV; 1100 return -1; 1101 } 1102 1103 op_rename = GETSYSCALL(host, RENAME); 1104 } 1105 1106 return op_rename(from, to); 1107 } 1108 1109 int __socket30(int, int, int); 1110 int 1111 __socket30(int domain, int type, int protocol) 1112 { 1113 int (*op_socket)(int, int, int); 1114 int fd; 1115 bool isrump; 1116 1117 isrump = domain < PF_MAX && rumpsockets[domain]; 1118 1119 if (isrump) 1120 op_socket = GETSYSCALL(rump, SOCKET); 1121 else 1122 op_socket = GETSYSCALL(host, SOCKET); 1123 fd = op_socket(domain, type, protocol); 1124 1125 if (isrump) 1126 fd = fd_rump2host(fd); 1127 else 1128 fd = fd_host2host(fd); 1129 DPRINTF(("socket <- %d\n", fd)); 1130 1131 return fd; 1132 } 1133 1134 int 1135 accept(int s, struct sockaddr *addr, socklen_t *addrlen) 1136 { 1137 int (*op_accept)(int, struct sockaddr *, socklen_t *); 1138 int fd; 1139 bool isrump; 1140 1141 isrump = fd_isrump(s); 1142 1143 DPRINTF(("accept -> %d", s)); 1144 if (isrump) { 1145 op_accept = GETSYSCALL(rump, ACCEPT); 1146 s = fd_host2rump(s); 1147 } else { 1148 op_accept = GETSYSCALL(host, ACCEPT); 1149 } 1150 fd = op_accept(s, addr, addrlen); 1151 if (fd != -1 && isrump) 1152 fd = fd_rump2host(fd); 1153 else 1154 fd = fd_host2host(fd); 1155 1156 DPRINTF((" <- %d\n", fd)); 1157 1158 return fd; 1159 } 1160 1161 /* 1162 * ioctl and fcntl are varargs calls and need special treatment 1163 */ 1164 int 1165 ioctl(int fd, unsigned long cmd, ...) 1166 { 1167 int (*op_ioctl)(int, unsigned long cmd, ...); 1168 va_list ap; 1169 int rv; 1170 1171 DPRINTF(("ioctl -> %d\n", fd)); 1172 if (fd_isrump(fd)) { 1173 fd = fd_host2rump(fd); 1174 op_ioctl = GETSYSCALL(rump, IOCTL); 1175 } else { 1176 op_ioctl = GETSYSCALL(host, IOCTL); 1177 } 1178 1179 va_start(ap, cmd); 1180 rv = op_ioctl(fd, cmd, va_arg(ap, void *)); 1181 va_end(ap); 1182 return rv; 1183 } 1184 1185 int 1186 fcntl(int fd, int cmd, ...) 1187 { 1188 int (*op_fcntl)(int, int, ...); 1189 va_list ap; 1190 int rv, minfd, i, maxdup2; 1191 1192 DPRINTF(("fcntl -> %d (cmd %d)\n", fd, cmd)); 1193 1194 switch (cmd) { 1195 case F_DUPFD: 1196 va_start(ap, cmd); 1197 minfd = va_arg(ap, int); 1198 va_end(ap); 1199 return dodup(fd, minfd); 1200 1201 case F_CLOSEM: 1202 /* 1203 * So, if fd < HIJACKOFF, we want to do a host closem. 1204 */ 1205 1206 if (fd < hijack_fdoff) { 1207 int closemfd = fd; 1208 1209 if (rumpclient__closenotify(&closemfd, 1210 RUMPCLIENT_CLOSE_FCLOSEM) == -1) 1211 return -1; 1212 op_fcntl = GETSYSCALL(host, FCNTL); 1213 rv = op_fcntl(closemfd, cmd); 1214 if (rv) 1215 return rv; 1216 } 1217 1218 /* 1219 * Additionally, we want to do a rump closem, but only 1220 * for the file descriptors not dup2'd. 1221 */ 1222 1223 for (i = 0, maxdup2 = 0; i <= DUP2HIGH; i++) { 1224 if (dup2vec[i] & DUP2BIT) { 1225 int val; 1226 1227 val = dup2vec[i] & DUP2FDMASK; 1228 maxdup2 = MAX(val, maxdup2); 1229 } 1230 } 1231 1232 if (fd >= hijack_fdoff) 1233 fd -= hijack_fdoff; 1234 else 1235 fd = 0; 1236 fd = MAX(maxdup2+1, fd); 1237 1238 /* hmm, maybe we should close rump fd's not within dup2mask? */ 1239 return rump_sys_fcntl(fd, F_CLOSEM); 1240 1241 case F_MAXFD: 1242 /* 1243 * For maxfd, if there's a rump kernel fd, return 1244 * it hostified. Otherwise, return host's MAXFD 1245 * return value. 1246 */ 1247 if ((rv = rump_sys_fcntl(fd, F_MAXFD)) != -1) { 1248 /* 1249 * This might go a little wrong in case 1250 * of dup2 to [012], but I'm not sure if 1251 * there's a justification for tracking 1252 * that info. Consider e.g. 1253 * dup2(rumpfd, 2) followed by rump_sys_open() 1254 * returning 1. We should return 1+HIJACKOFF, 1255 * not 2+HIJACKOFF. However, if [01] is not 1256 * open, the correct return value is 2. 1257 */ 1258 return fd_rump2host(fd); 1259 } else { 1260 op_fcntl = GETSYSCALL(host, FCNTL); 1261 return op_fcntl(fd, F_MAXFD); 1262 } 1263 /*NOTREACHED*/ 1264 1265 default: 1266 if (fd_isrump(fd)) { 1267 fd = fd_host2rump(fd); 1268 op_fcntl = GETSYSCALL(rump, FCNTL); 1269 } else { 1270 op_fcntl = GETSYSCALL(host, FCNTL); 1271 } 1272 1273 va_start(ap, cmd); 1274 rv = op_fcntl(fd, cmd, va_arg(ap, void *)); 1275 va_end(ap); 1276 return rv; 1277 } 1278 /*NOTREACHED*/ 1279 } 1280 1281 int 1282 close(int fd) 1283 { 1284 int (*op_close)(int); 1285 int rv; 1286 1287 DPRINTF(("close -> %d\n", fd)); 1288 if (fd_isrump(fd)) { 1289 bool undup2 = false; 1290 int ofd; 1291 1292 if (isdup2d(ofd = fd)) { 1293 undup2 = true; 1294 } 1295 1296 fd = fd_host2rump(fd); 1297 if (!undup2 && killdup2alias(fd)) { 1298 return 0; 1299 } 1300 1301 op_close = GETSYSCALL(rump, CLOSE); 1302 rv = op_close(fd); 1303 if (rv == 0 && undup2) { 1304 clrdup2(ofd); 1305 } 1306 } else { 1307 if (rumpclient__closenotify(&fd, RUMPCLIENT_CLOSE_CLOSE) == -1) 1308 return -1; 1309 op_close = GETSYSCALL(host, CLOSE); 1310 rv = op_close(fd); 1311 } 1312 1313 return rv; 1314 } 1315 1316 /* 1317 * write cannot issue a standard debug printf due to recursion 1318 */ 1319 ssize_t 1320 write(int fd, const void *buf, size_t blen) 1321 { 1322 ssize_t (*op_write)(int, const void *, size_t); 1323 1324 if (fd_isrump(fd)) { 1325 fd = fd_host2rump(fd); 1326 op_write = GETSYSCALL(rump, WRITE); 1327 } else { 1328 op_write = GETSYSCALL(host, WRITE); 1329 } 1330 1331 return op_write(fd, buf, blen); 1332 } 1333 1334 /* 1335 * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since 1336 * many programs do that. dup2 of a rump kernel fd to another value 1337 * not >= fdoff is an error. 1338 * 1339 * Note: cannot rump2host newd, because it is often hardcoded. 1340 */ 1341 int 1342 dup2(int oldd, int newd) 1343 { 1344 int (*host_dup2)(int, int); 1345 int rv; 1346 1347 DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd)); 1348 1349 if (fd_isrump(oldd)) { 1350 int (*op_close)(int) = GETSYSCALL(host, CLOSE); 1351 1352 /* only allow fd 0-2 for cross-kernel dup */ 1353 if (!(newd >= 0 && newd <= 2 && !fd_isrump(newd))) { 1354 errno = EBADF; 1355 return -1; 1356 } 1357 1358 /* regular dup2? */ 1359 if (fd_isrump(newd)) { 1360 newd = fd_host2rump(newd); 1361 rv = rump_sys_dup2(oldd, newd); 1362 return fd_rump2host(rv); 1363 } 1364 1365 /* 1366 * dup2 rump => host? just establish an 1367 * entry in the mapping table. 1368 */ 1369 op_close(newd); 1370 setdup2(newd, fd_host2rump(oldd)); 1371 rv = 0; 1372 } else { 1373 host_dup2 = syscalls[DUALCALL_DUP2].bs_host; 1374 if (rumpclient__closenotify(&newd, RUMPCLIENT_CLOSE_DUP2) == -1) 1375 return -1; 1376 rv = host_dup2(oldd, newd); 1377 } 1378 1379 return rv; 1380 } 1381 1382 int 1383 dup(int oldd) 1384 { 1385 1386 return dodup(oldd, 0); 1387 } 1388 1389 pid_t 1390 fork() 1391 { 1392 pid_t rv; 1393 1394 DPRINTF(("fork\n")); 1395 1396 rv = rumpclient__dofork(host_fork); 1397 1398 DPRINTF(("fork returns %d\n", rv)); 1399 return rv; 1400 } 1401 /* we do not have the luxury of not requiring a stackframe */ 1402 __strong_alias(__vfork14,fork); 1403 1404 int 1405 daemon(int nochdir, int noclose) 1406 { 1407 struct rumpclient_fork *rf; 1408 1409 if ((rf = rumpclient_prefork()) == NULL) 1410 return -1; 1411 1412 if (host_daemon(nochdir, noclose) == -1) 1413 return -1; 1414 1415 if (rumpclient_fork_init(rf) == -1) 1416 return -1; 1417 1418 return 0; 1419 } 1420 1421 int 1422 execve(const char *path, char *const argv[], char *const envp[]) 1423 { 1424 char buf[128]; 1425 char *dup2str; 1426 const char *pwdinrumpstr; 1427 char **newenv; 1428 size_t nelem; 1429 int rv, sverrno; 1430 int bonus = 2, i = 0; 1431 1432 snprintf(buf, sizeof(buf), "RUMPHIJACK__DUP2INFO=%u,%u,%u", 1433 dup2vec[0], dup2vec[1], dup2vec[2]); 1434 dup2str = strdup(buf); 1435 if (dup2str == NULL) { 1436 errno = ENOMEM; 1437 return -1; 1438 } 1439 1440 if (pwdinrump) { 1441 pwdinrumpstr = "RUMPHIJACK__PWDINRUMP=true"; 1442 bonus++; 1443 } else { 1444 pwdinrumpstr = NULL; 1445 } 1446 1447 for (nelem = 0; envp && envp[nelem]; nelem++) 1448 continue; 1449 newenv = malloc(sizeof(*newenv) * (nelem+bonus)); 1450 if (newenv == NULL) { 1451 free(dup2str); 1452 errno = ENOMEM; 1453 return -1; 1454 } 1455 memcpy(newenv, envp, nelem*sizeof(*newenv)); 1456 newenv[nelem+i] = dup2str; 1457 i++; 1458 1459 if (pwdinrumpstr) { 1460 newenv[nelem+i] = __UNCONST(pwdinrumpstr); 1461 i++; 1462 } 1463 newenv[nelem+i] = NULL; 1464 _DIAGASSERT(i < bonus); 1465 1466 rv = rumpclient_exec(path, argv, newenv); 1467 1468 _DIAGASSERT(rv != 0); 1469 sverrno = errno; 1470 free(newenv); 1471 free(dup2str); 1472 errno = sverrno; 1473 return rv; 1474 } 1475 1476 /* 1477 * select is done by calling poll. 1478 */ 1479 int 1480 REALSELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, 1481 struct timeval *timeout) 1482 { 1483 struct pollfd *pfds; 1484 struct timespec ts, *tsp = NULL; 1485 nfds_t realnfds; 1486 int i, j; 1487 int rv, incr; 1488 1489 DPRINTF(("select\n")); 1490 1491 /* 1492 * Well, first we must scan the fds to figure out how many 1493 * fds there really are. This is because up to and including 1494 * nb5 poll() silently refuses nfds > process_maxopen_fds. 1495 * Seems to be fixed in current, thank the maker. 1496 * god damn cluster...bomb. 1497 */ 1498 1499 for (i = 0, realnfds = 0; i < nfds; i++) { 1500 if (readfds && FD_ISSET(i, readfds)) { 1501 realnfds++; 1502 continue; 1503 } 1504 if (writefds && FD_ISSET(i, writefds)) { 1505 realnfds++; 1506 continue; 1507 } 1508 if (exceptfds && FD_ISSET(i, exceptfds)) { 1509 realnfds++; 1510 continue; 1511 } 1512 } 1513 1514 if (realnfds) { 1515 pfds = calloc(realnfds, sizeof(*pfds)); 1516 if (!pfds) 1517 return -1; 1518 } else { 1519 pfds = NULL; 1520 } 1521 1522 for (i = 0, j = 0; i < nfds; i++) { 1523 incr = 0; 1524 if (readfds && FD_ISSET(i, readfds)) { 1525 pfds[j].fd = i; 1526 pfds[j].events |= POLLIN; 1527 incr=1; 1528 } 1529 if (writefds && FD_ISSET(i, writefds)) { 1530 pfds[j].fd = i; 1531 pfds[j].events |= POLLOUT; 1532 incr=1; 1533 } 1534 if (exceptfds && FD_ISSET(i, exceptfds)) { 1535 pfds[j].fd = i; 1536 pfds[j].events |= POLLHUP|POLLERR; 1537 incr=1; 1538 } 1539 if (incr) 1540 j++; 1541 } 1542 assert(j == (int)realnfds); 1543 1544 if (timeout) { 1545 TIMEVAL_TO_TIMESPEC(timeout, &ts); 1546 tsp = &ts; 1547 } 1548 rv = REALPOLLTS(pfds, realnfds, tsp, NULL); 1549 /* 1550 * "If select() returns with an error the descriptor sets 1551 * will be unmodified" 1552 */ 1553 if (rv < 0) 1554 goto out; 1555 1556 /* 1557 * zero out results (can't use FD_ZERO for the 1558 * obvious select-me-not reason). whee. 1559 * 1560 * We do this here since some software ignores the return 1561 * value of select, and hence if the timeout expires, it may 1562 * assume all input descriptors have activity. 1563 */ 1564 for (i = 0; i < nfds; i++) { 1565 if (readfds) 1566 FD_CLR(i, readfds); 1567 if (writefds) 1568 FD_CLR(i, writefds); 1569 if (exceptfds) 1570 FD_CLR(i, exceptfds); 1571 } 1572 if (rv == 0) 1573 goto out; 1574 1575 /* 1576 * We have >0 fds with activity. Harvest the results. 1577 */ 1578 for (i = 0; i < (int)realnfds; i++) { 1579 if (readfds) { 1580 if (pfds[i].revents & POLLIN) { 1581 FD_SET(pfds[i].fd, readfds); 1582 } 1583 } 1584 if (writefds) { 1585 if (pfds[i].revents & POLLOUT) { 1586 FD_SET(pfds[i].fd, writefds); 1587 } 1588 } 1589 if (exceptfds) { 1590 if (pfds[i].revents & (POLLHUP|POLLERR)) { 1591 FD_SET(pfds[i].fd, exceptfds); 1592 } 1593 } 1594 } 1595 1596 out: 1597 free(pfds); 1598 return rv; 1599 } 1600 1601 static void 1602 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall) 1603 { 1604 nfds_t i; 1605 1606 for (i = 0; i < nfds; i++) { 1607 if (fds[i].fd == -1) 1608 continue; 1609 1610 if (fd_isrump(fds[i].fd)) 1611 (*rumpcall)++; 1612 else 1613 (*hostcall)++; 1614 } 1615 } 1616 1617 static void 1618 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int)) 1619 { 1620 nfds_t i; 1621 1622 for (i = 0; i < nfds; i++) { 1623 fds[i].fd = fdadj(fds[i].fd); 1624 } 1625 } 1626 1627 /* 1628 * poll is easy as long as the call comes in the fds only in one 1629 * kernel. otherwise its quite tricky... 1630 */ 1631 struct pollarg { 1632 struct pollfd *pfds; 1633 nfds_t nfds; 1634 const struct timespec *ts; 1635 const sigset_t *sigmask; 1636 int pipefd; 1637 int errnum; 1638 }; 1639 1640 static void * 1641 hostpoll(void *arg) 1642 { 1643 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *, 1644 const sigset_t *); 1645 struct pollarg *parg = arg; 1646 intptr_t rv; 1647 1648 op_pollts = GETSYSCALL(host, POLLTS); 1649 rv = op_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask); 1650 if (rv == -1) 1651 parg->errnum = errno; 1652 rump_sys_write(parg->pipefd, &rv, sizeof(rv)); 1653 1654 return (void *)(intptr_t)rv; 1655 } 1656 1657 int 1658 REALPOLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts, 1659 const sigset_t *sigmask) 1660 { 1661 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *, 1662 const sigset_t *); 1663 int (*host_close)(int); 1664 int hostcall = 0, rumpcall = 0; 1665 pthread_t pt; 1666 nfds_t i; 1667 int rv; 1668 1669 DPRINTF(("poll\n")); 1670 checkpoll(fds, nfds, &hostcall, &rumpcall); 1671 1672 if (hostcall && rumpcall) { 1673 struct pollfd *pfd_host = NULL, *pfd_rump = NULL; 1674 int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1}; 1675 struct pollarg parg; 1676 uintptr_t lrv; 1677 int sverrno = 0, trv; 1678 1679 /* 1680 * ok, this is where it gets tricky. We must support 1681 * this since it's a very common operation in certain 1682 * types of software (telnet, netcat, etc). We allocate 1683 * two vectors and run two poll commands in separate 1684 * threads. Whichever returns first "wins" and the 1685 * other kernel's fds won't show activity. 1686 */ 1687 rv = -1; 1688 1689 /* allocate full vector for O(n) joining after call */ 1690 pfd_host = malloc(sizeof(*pfd_host)*(nfds+1)); 1691 if (!pfd_host) 1692 goto out; 1693 pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1)); 1694 if (!pfd_rump) { 1695 goto out; 1696 } 1697 1698 /* 1699 * then, open two pipes, one for notifications 1700 * to each kernel. 1701 * 1702 * At least the rump pipe should probably be 1703 * cached, along with the helper threads. This 1704 * should give a microbenchmark improvement (haven't 1705 * experienced a macro-level problem yet, though). 1706 */ 1707 if ((rv = rump_sys_pipe(rpipe)) == -1) { 1708 sverrno = errno; 1709 } 1710 if (rv == 0 && (rv = pipe(hpipe)) == -1) { 1711 sverrno = errno; 1712 } 1713 1714 /* split vectors (or signal errors) */ 1715 for (i = 0; i < nfds; i++) { 1716 int fd; 1717 1718 fds[i].revents = 0; 1719 if (fds[i].fd == -1) { 1720 pfd_host[i].fd = -1; 1721 pfd_rump[i].fd = -1; 1722 } else if (fd_isrump(fds[i].fd)) { 1723 pfd_host[i].fd = -1; 1724 fd = fd_host2rump(fds[i].fd); 1725 if (fd == rpipe[0] || fd == rpipe[1]) { 1726 fds[i].revents = POLLNVAL; 1727 if (rv != -1) 1728 rv++; 1729 } 1730 pfd_rump[i].fd = fd; 1731 pfd_rump[i].events = fds[i].events; 1732 } else { 1733 pfd_rump[i].fd = -1; 1734 fd = fds[i].fd; 1735 if (fd == hpipe[0] || fd == hpipe[1]) { 1736 fds[i].revents = POLLNVAL; 1737 if (rv != -1) 1738 rv++; 1739 } 1740 pfd_host[i].fd = fd; 1741 pfd_host[i].events = fds[i].events; 1742 } 1743 pfd_rump[i].revents = pfd_host[i].revents = 0; 1744 } 1745 if (rv) { 1746 goto out; 1747 } 1748 1749 pfd_host[nfds].fd = hpipe[0]; 1750 pfd_host[nfds].events = POLLIN; 1751 pfd_rump[nfds].fd = rpipe[0]; 1752 pfd_rump[nfds].events = POLLIN; 1753 1754 /* 1755 * then, create a thread to do host part and meanwhile 1756 * do rump kernel part right here 1757 */ 1758 1759 parg.pfds = pfd_host; 1760 parg.nfds = nfds+1; 1761 parg.ts = ts; 1762 parg.sigmask = sigmask; 1763 parg.pipefd = rpipe[1]; 1764 pthread_create(&pt, NULL, hostpoll, &parg); 1765 1766 op_pollts = GETSYSCALL(rump, POLLTS); 1767 lrv = op_pollts(pfd_rump, nfds+1, ts, NULL); 1768 sverrno = errno; 1769 write(hpipe[1], &rv, sizeof(rv)); 1770 pthread_join(pt, (void *)&trv); 1771 1772 /* check who "won" and merge results */ 1773 if (lrv != 0 && pfd_host[nfds].revents & POLLIN) { 1774 rv = trv; 1775 1776 for (i = 0; i < nfds; i++) { 1777 if (pfd_rump[i].fd != -1) 1778 fds[i].revents = pfd_rump[i].revents; 1779 } 1780 sverrno = parg.errnum; 1781 } else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) { 1782 rv = trv; 1783 1784 for (i = 0; i < nfds; i++) { 1785 if (pfd_host[i].fd != -1) 1786 fds[i].revents = pfd_host[i].revents; 1787 } 1788 } else { 1789 rv = 0; 1790 } 1791 1792 out: 1793 host_close = GETSYSCALL(host, CLOSE); 1794 if (rpipe[0] != -1) 1795 rump_sys_close(rpipe[0]); 1796 if (rpipe[1] != -1) 1797 rump_sys_close(rpipe[1]); 1798 if (hpipe[0] != -1) 1799 host_close(hpipe[0]); 1800 if (hpipe[1] != -1) 1801 host_close(hpipe[1]); 1802 free(pfd_host); 1803 free(pfd_rump); 1804 errno = sverrno; 1805 } else { 1806 if (hostcall) { 1807 op_pollts = GETSYSCALL(host, POLLTS); 1808 } else { 1809 op_pollts = GETSYSCALL(rump, POLLTS); 1810 adjustpoll(fds, nfds, fd_host2rump); 1811 } 1812 1813 rv = op_pollts(fds, nfds, ts, sigmask); 1814 if (rumpcall) 1815 adjustpoll(fds, nfds, fd_rump2host_withdup); 1816 } 1817 1818 return rv; 1819 } 1820 1821 int 1822 poll(struct pollfd *fds, nfds_t nfds, int timeout) 1823 { 1824 struct timespec ts; 1825 struct timespec *tsp = NULL; 1826 1827 if (timeout != INFTIM) { 1828 ts.tv_sec = timeout / 1000; 1829 ts.tv_nsec = (timeout % 1000) * 1000*1000; 1830 1831 tsp = &ts; 1832 } 1833 1834 return REALPOLLTS(fds, nfds, tsp, NULL); 1835 } 1836 1837 int 1838 REALKEVENT(int kq, const struct kevent *changelist, size_t nchanges, 1839 struct kevent *eventlist, size_t nevents, 1840 const struct timespec *timeout) 1841 { 1842 int (*op_kevent)(int, const struct kevent *, size_t, 1843 struct kevent *, size_t, const struct timespec *); 1844 const struct kevent *ev; 1845 size_t i; 1846 1847 /* 1848 * Check that we don't attempt to kevent rump kernel fd's. 1849 * That needs similar treatment to select/poll, but is slightly 1850 * trickier since we need to manage to different kq descriptors. 1851 * (TODO, in case you're wondering). 1852 */ 1853 for (i = 0; i < nchanges; i++) { 1854 ev = &changelist[i]; 1855 if (ev->filter == EVFILT_READ || ev->filter == EVFILT_WRITE || 1856 ev->filter == EVFILT_VNODE) { 1857 if (fd_isrump((int)ev->ident)) { 1858 errno = ENOTSUP; 1859 return -1; 1860 } 1861 } 1862 } 1863 1864 op_kevent = GETSYSCALL(host, KEVENT); 1865 return op_kevent(kq, changelist, nchanges, eventlist, nevents, timeout); 1866 } 1867 1868 /* 1869 * mmapping from a rump kernel is not supported, so disallow it. 1870 */ 1871 void * 1872 mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset) 1873 { 1874 1875 if (flags & MAP_FILE && fd_isrump(fd)) { 1876 errno = ENOSYS; 1877 return MAP_FAILED; 1878 } 1879 return host_mmap(addr, len, prot, flags, fd, offset); 1880 } 1881 1882 /* 1883 * these go to one or the other on a per-process configuration 1884 */ 1885 int __sysctl(const int *, unsigned int, void *, size_t *, const void *, size_t); 1886 int 1887 __sysctl(const int *name, unsigned int namelen, void *old, size_t *oldlenp, 1888 const void *new, size_t newlen) 1889 { 1890 int (*op___sysctl)(const int *, unsigned int, void *, size_t *, 1891 const void *, size_t); 1892 1893 if (rumpsysctl) { 1894 op___sysctl = GETSYSCALL(rump, __SYSCTL); 1895 } else { 1896 op___sysctl = GETSYSCALL(host, __SYSCTL); 1897 /* we haven't inited yet */ 1898 if (__predict_false(op___sysctl == NULL)) { 1899 op___sysctl = rumphijack_dlsym(RTLD_NEXT, "__sysctl"); 1900 } 1901 } 1902 1903 return op___sysctl(name, namelen, old, oldlenp, new, newlen); 1904 } 1905 1906 /* 1907 * Rest are std type calls. 1908 */ 1909 1910 FDCALL(int, bind, DUALCALL_BIND, \ 1911 (int fd, const struct sockaddr *name, socklen_t namelen), \ 1912 (int, const struct sockaddr *, socklen_t), \ 1913 (fd, name, namelen)) 1914 1915 FDCALL(int, connect, DUALCALL_CONNECT, \ 1916 (int fd, const struct sockaddr *name, socklen_t namelen), \ 1917 (int, const struct sockaddr *, socklen_t), \ 1918 (fd, name, namelen)) 1919 1920 FDCALL(int, getpeername, DUALCALL_GETPEERNAME, \ 1921 (int fd, struct sockaddr *name, socklen_t *namelen), \ 1922 (int, struct sockaddr *, socklen_t *), \ 1923 (fd, name, namelen)) 1924 1925 FDCALL(int, getsockname, DUALCALL_GETSOCKNAME, \ 1926 (int fd, struct sockaddr *name, socklen_t *namelen), \ 1927 (int, struct sockaddr *, socklen_t *), \ 1928 (fd, name, namelen)) 1929 1930 FDCALL(int, listen, DUALCALL_LISTEN, \ 1931 (int fd, int backlog), \ 1932 (int, int), \ 1933 (fd, backlog)) 1934 1935 FDCALL(ssize_t, recvfrom, DUALCALL_RECVFROM, \ 1936 (int fd, void *buf, size_t len, int flags, \ 1937 struct sockaddr *from, socklen_t *fromlen), \ 1938 (int, void *, size_t, int, struct sockaddr *, socklen_t *), \ 1939 (fd, buf, len, flags, from, fromlen)) 1940 1941 FDCALL(ssize_t, sendto, DUALCALL_SENDTO, \ 1942 (int fd, const void *buf, size_t len, int flags, \ 1943 const struct sockaddr *to, socklen_t tolen), \ 1944 (int, const void *, size_t, int, \ 1945 const struct sockaddr *, socklen_t), \ 1946 (fd, buf, len, flags, to, tolen)) 1947 1948 FDCALL(ssize_t, recvmsg, DUALCALL_RECVMSG, \ 1949 (int fd, struct msghdr *msg, int flags), \ 1950 (int, struct msghdr *, int), \ 1951 (fd, msg, flags)) 1952 1953 FDCALL(ssize_t, sendmsg, DUALCALL_SENDMSG, \ 1954 (int fd, const struct msghdr *msg, int flags), \ 1955 (int, const struct msghdr *, int), \ 1956 (fd, msg, flags)) 1957 1958 FDCALL(int, getsockopt, DUALCALL_GETSOCKOPT, \ 1959 (int fd, int level, int optn, void *optval, socklen_t *optlen), \ 1960 (int, int, int, void *, socklen_t *), \ 1961 (fd, level, optn, optval, optlen)) 1962 1963 FDCALL(int, setsockopt, DUALCALL_SETSOCKOPT, \ 1964 (int fd, int level, int optn, \ 1965 const void *optval, socklen_t optlen), \ 1966 (int, int, int, const void *, socklen_t), \ 1967 (fd, level, optn, optval, optlen)) 1968 1969 FDCALL(int, shutdown, DUALCALL_SHUTDOWN, \ 1970 (int fd, int how), \ 1971 (int, int), \ 1972 (fd, how)) 1973 1974 FDCALL(ssize_t, REALREAD, DUALCALL_READ, \ 1975 (int fd, void *buf, size_t buflen), \ 1976 (int, void *, size_t), \ 1977 (fd, buf, buflen)) 1978 1979 FDCALL(ssize_t, readv, DUALCALL_READV, \ 1980 (int fd, const struct iovec *iov, int iovcnt), \ 1981 (int, const struct iovec *, int), \ 1982 (fd, iov, iovcnt)) 1983 1984 FDCALL(ssize_t, REALPREAD, DUALCALL_PREAD, \ 1985 (int fd, void *buf, size_t nbytes, off_t offset), \ 1986 (int, void *, size_t, off_t), \ 1987 (fd, buf, nbytes, offset)) 1988 1989 FDCALL(ssize_t, preadv, DUALCALL_PREADV, \ 1990 (int fd, const struct iovec *iov, int iovcnt, off_t offset), \ 1991 (int, const struct iovec *, int, off_t), \ 1992 (fd, iov, iovcnt, offset)) 1993 1994 FDCALL(ssize_t, writev, DUALCALL_WRITEV, \ 1995 (int fd, const struct iovec *iov, int iovcnt), \ 1996 (int, const struct iovec *, int), \ 1997 (fd, iov, iovcnt)) 1998 1999 FDCALL(ssize_t, REALPWRITE, DUALCALL_PWRITE, \ 2000 (int fd, const void *buf, size_t nbytes, off_t offset), \ 2001 (int, const void *, size_t, off_t), \ 2002 (fd, buf, nbytes, offset)) 2003 2004 FDCALL(ssize_t, pwritev, DUALCALL_PWRITEV, \ 2005 (int fd, const struct iovec *iov, int iovcnt, off_t offset), \ 2006 (int, const struct iovec *, int, off_t), \ 2007 (fd, iov, iovcnt, offset)) 2008 2009 FDCALL(int, REALFSTAT, DUALCALL_FSTAT, \ 2010 (int fd, struct stat *sb), \ 2011 (int, struct stat *), \ 2012 (fd, sb)) 2013 2014 FDCALL(int, fstatvfs1, DUALCALL_FSTATVFS1, \ 2015 (int fd, struct statvfs *buf, int flags), \ 2016 (int, struct statvfs *, int), \ 2017 (fd, buf, flags)) 2018 2019 FDCALL(off_t, lseek, DUALCALL_LSEEK, \ 2020 (int fd, off_t offset, int whence), \ 2021 (int, off_t, int), \ 2022 (fd, offset, whence)) 2023 __strong_alias(_lseek,lseek); 2024 2025 FDCALL(int, REALGETDENTS, DUALCALL_GETDENTS, \ 2026 (int fd, char *buf, size_t nbytes), \ 2027 (int, char *, size_t), \ 2028 (fd, buf, nbytes)) 2029 2030 FDCALL(int, fchown, DUALCALL_FCHOWN, \ 2031 (int fd, uid_t owner, gid_t group), \ 2032 (int, uid_t, gid_t), \ 2033 (fd, owner, group)) 2034 2035 FDCALL(int, fchmod, DUALCALL_FCHMOD, \ 2036 (int fd, mode_t mode), \ 2037 (int, mode_t), \ 2038 (fd, mode)) 2039 2040 FDCALL(int, ftruncate, DUALCALL_FTRUNCATE, \ 2041 (int fd, off_t length), \ 2042 (int, off_t), \ 2043 (fd, length)) 2044 2045 FDCALL(int, fsync, DUALCALL_FSYNC, \ 2046 (int fd), \ 2047 (int), \ 2048 (fd)) 2049 2050 FDCALL(int, fsync_range, DUALCALL_FSYNC_RANGE, \ 2051 (int fd, int how, off_t start, off_t length), \ 2052 (int, int, off_t, off_t), \ 2053 (fd, how, start, length)) 2054 2055 FDCALL(int, futimes, DUALCALL_FUTIMES, \ 2056 (int fd, const struct timeval *tv), \ 2057 (int, const struct timeval *), \ 2058 (fd, tv)) 2059 2060 FDCALL(int, fchflags, DUALCALL_FCHFLAGS, \ 2061 (int fd, u_long flags), \ 2062 (int, u_long), \ 2063 (fd, flags)) 2064 2065 /* 2066 * path-based selectors 2067 */ 2068 2069 PATHCALL(int, REALSTAT, DUALCALL_STAT, \ 2070 (const char *path, struct stat *sb), \ 2071 (const char *, struct stat *), \ 2072 (path, sb)) 2073 2074 PATHCALL(int, REALLSTAT, DUALCALL_LSTAT, \ 2075 (const char *path, struct stat *sb), \ 2076 (const char *, struct stat *), \ 2077 (path, sb)) 2078 2079 PATHCALL(int, chown, DUALCALL_CHOWN, \ 2080 (const char *path, uid_t owner, gid_t group), \ 2081 (const char *, uid_t, gid_t), \ 2082 (path, owner, group)) 2083 2084 PATHCALL(int, lchown, DUALCALL_LCHOWN, \ 2085 (const char *path, uid_t owner, gid_t group), \ 2086 (const char *, uid_t, gid_t), \ 2087 (path, owner, group)) 2088 2089 PATHCALL(int, chmod, DUALCALL_CHMOD, \ 2090 (const char *path, mode_t mode), \ 2091 (const char *, mode_t), \ 2092 (path, mode)) 2093 2094 PATHCALL(int, lchmod, DUALCALL_LCHMOD, \ 2095 (const char *path, mode_t mode), \ 2096 (const char *, mode_t), \ 2097 (path, mode)) 2098 2099 PATHCALL(int, statvfs1, DUALCALL_STATVFS1, \ 2100 (const char *path, struct statvfs *buf, int flags), \ 2101 (const char *, struct statvfs *, int), \ 2102 (path, buf, flags)) 2103 2104 PATHCALL(int, unlink, DUALCALL_UNLINK, \ 2105 (const char *path), \ 2106 (const char *), \ 2107 (path)) 2108 2109 PATHCALL(int, symlink, DUALCALL_SYMLINK, \ 2110 (const char *target, const char *path), \ 2111 (const char *, const char *), \ 2112 (target, path)) 2113 2114 PATHCALL(ssize_t, readlink, DUALCALL_READLINK, \ 2115 (const char *path, char *buf, size_t bufsiz), \ 2116 (const char *, char *, size_t), \ 2117 (path, buf, bufsiz)) 2118 2119 PATHCALL(int, mkdir, DUALCALL_MKDIR, \ 2120 (const char *path, mode_t mode), \ 2121 (const char *, mode_t), \ 2122 (path, mode)) 2123 2124 PATHCALL(int, rmdir, DUALCALL_RMDIR, \ 2125 (const char *path), \ 2126 (const char *), \ 2127 (path)) 2128 2129 PATHCALL(int, utimes, DUALCALL_UTIMES, \ 2130 (const char *path, const struct timeval *tv), \ 2131 (const char *, const struct timeval *), \ 2132 (path, tv)) 2133 2134 PATHCALL(int, lutimes, DUALCALL_LUTIMES, \ 2135 (const char *path, const struct timeval *tv), \ 2136 (const char *, const struct timeval *), \ 2137 (path, tv)) 2138 2139 PATHCALL(int, chflags, DUALCALL_CHFLAGS, \ 2140 (const char *path, u_long flags), \ 2141 (const char *, u_long), \ 2142 (path, flags)) 2143 2144 PATHCALL(int, lchflags, DUALCALL_LCHFLAGS, \ 2145 (const char *path, u_long flags), \ 2146 (const char *, u_long), \ 2147 (path, flags)) 2148 2149 PATHCALL(int, truncate, DUALCALL_TRUNCATE, \ 2150 (const char *path, off_t length), \ 2151 (const char *, off_t), \ 2152 (path, length)) 2153 2154 PATHCALL(int, access, DUALCALL_ACCESS, \ 2155 (const char *path, int mode), \ 2156 (const char *, int), \ 2157 (path, mode)) 2158 2159 PATHCALL(int, REALMKNOD, DUALCALL_MKNOD, \ 2160 (const char *path, mode_t mode, dev_t dev), \ 2161 (const char *, mode_t, dev_t), \ 2162 (path, mode, dev)) 2163 2164 /* 2165 * Note: with mount the decisive parameter is the mount 2166 * destination directory. This is because we don't really know 2167 * about the "source" directory in a generic call (and besides, 2168 * it might not even exist, cf. nfs). 2169 */ 2170 PATHCALL(int, REALMOUNT, DUALCALL_MOUNT, \ 2171 (const char *type, const char *path, int flags, \ 2172 void *data, size_t dlen), \ 2173 (const char *, const char *, int, void *, size_t), \ 2174 (type, path, flags, data, dlen)) 2175 2176 PATHCALL(int, unmount, DUALCALL_UNMOUNT, \ 2177 (const char *path, int flags), \ 2178 (const char *, int), \ 2179 (path, flags)) 2180 2181 #if __NetBSD_Prereq__(5,99,48) 2182 PATHCALL(int, REALQUOTACTL, DUALCALL_QUOTACTL, \ 2183 (const char *path, struct plistref *p), \ 2184 (const char *, struct plistref *), \ 2185 (path, p)) 2186 #endif 2187 2188 PATHCALL(int, REALGETFH, DUALCALL_GETFH, \ 2189 (const char *path, void *fhp, size_t *fh_size), \ 2190 (const char *, void *, size_t *), \ 2191 (path, fhp, fh_size)) 2192 2193 /* 2194 * These act different on a per-process vfs configuration 2195 */ 2196 2197 VFSCALL(VFSBIT_GETVFSSTAT, int, getvfsstat, DUALCALL_GETVFSSTAT, \ 2198 (struct statvfs *buf, size_t buflen, int flags), \ 2199 (struct statvfs *, size_t, int), \ 2200 (buf, buflen, flags)) 2201 2202 VFSCALL(VFSBIT_FHCALLS, int, REALFHOPEN, DUALCALL_FHOPEN, \ 2203 (const void *fhp, size_t fh_size, int flags), \ 2204 (const char *, size_t, int), \ 2205 (fhp, fh_size, flags)) 2206 2207 VFSCALL(VFSBIT_FHCALLS, int, REALFHSTAT, DUALCALL_FHSTAT, \ 2208 (const void *fhp, size_t fh_size, struct stat *sb), \ 2209 (const char *, size_t, struct stat *), \ 2210 (fhp, fh_size, sb)) 2211 2212 VFSCALL(VFSBIT_FHCALLS, int, REALFHSTATVFS1, DUALCALL_FHSTATVFS1, \ 2213 (const void *fhp, size_t fh_size, struct statvfs *sb, int flgs),\ 2214 (const char *, size_t, struct statvfs *, int), \ 2215 (fhp, fh_size, sb, flgs)) 2216 2217 /* finally, put nfssvc here. "keep the namespace clean" */ 2218 2219 #include <nfs/rpcv2.h> 2220 #include <nfs/nfs.h> 2221 2222 int 2223 nfssvc(int flags, void *argstructp) 2224 { 2225 int (*op_nfssvc)(int, void *); 2226 2227 if (vfsbits & VFSBIT_NFSSVC){ 2228 struct nfsd_args *nfsdargs; 2229 2230 /* massage the socket descriptor if necessary */ 2231 if (flags == NFSSVC_ADDSOCK) { 2232 nfsdargs = argstructp; 2233 nfsdargs->sock = fd_host2rump(nfsdargs->sock); 2234 } 2235 op_nfssvc = GETSYSCALL(rump, NFSSVC); 2236 } else 2237 op_nfssvc = GETSYSCALL(host, NFSSVC); 2238 2239 return op_nfssvc(flags, argstructp); 2240 } 2241