1 /* $NetBSD: hijack.c,v 1.91 2012/02/01 05:34:41 dholland Exp $ */ 2 3 /*- 4 * Copyright (c) 2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* Disable namespace mangling, Fortification is useless here anyway. */ 29 #undef _FORTIFY_SOURCE 30 31 #include <sys/cdefs.h> 32 __RCSID("$NetBSD: hijack.c,v 1.91 2012/02/01 05:34:41 dholland Exp $"); 33 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/event.h> 37 #include <sys/ioctl.h> 38 #include <sys/mman.h> 39 #include <sys/mount.h> 40 #include <sys/poll.h> 41 #include <sys/socket.h> 42 #include <sys/statvfs.h> 43 #include <sys/quotactl.h> 44 45 #include <rump/rumpclient.h> 46 #include <rump/rump_syscalls.h> 47 48 #include <assert.h> 49 #include <dlfcn.h> 50 #include <err.h> 51 #include <errno.h> 52 #include <fcntl.h> 53 #include <poll.h> 54 #include <pthread.h> 55 #include <signal.h> 56 #include <stdarg.h> 57 #include <stdbool.h> 58 #include <stdio.h> 59 #include <stdlib.h> 60 #include <string.h> 61 #include <time.h> 62 #include <unistd.h> 63 64 #include "hijack.h" 65 66 enum dualcall { 67 DUALCALL_WRITE, DUALCALL_WRITEV, DUALCALL_PWRITE, DUALCALL_PWRITEV, 68 DUALCALL_IOCTL, DUALCALL_FCNTL, 69 DUALCALL_SOCKET, DUALCALL_ACCEPT, DUALCALL_BIND, DUALCALL_CONNECT, 70 DUALCALL_GETPEERNAME, DUALCALL_GETSOCKNAME, DUALCALL_LISTEN, 71 DUALCALL_RECVFROM, DUALCALL_RECVMSG, 72 DUALCALL_SENDTO, DUALCALL_SENDMSG, 73 DUALCALL_GETSOCKOPT, DUALCALL_SETSOCKOPT, 74 DUALCALL_SHUTDOWN, 75 DUALCALL_READ, DUALCALL_READV, DUALCALL_PREAD, DUALCALL_PREADV, 76 DUALCALL_DUP2, 77 DUALCALL_CLOSE, 78 DUALCALL_POLLTS, 79 DUALCALL_KEVENT, 80 DUALCALL_STAT, DUALCALL_LSTAT, DUALCALL_FSTAT, 81 DUALCALL_CHMOD, DUALCALL_LCHMOD, DUALCALL_FCHMOD, 82 DUALCALL_CHOWN, DUALCALL_LCHOWN, DUALCALL_FCHOWN, 83 DUALCALL_OPEN, 84 DUALCALL_STATVFS1, DUALCALL_FSTATVFS1, 85 DUALCALL_CHDIR, DUALCALL_FCHDIR, 86 DUALCALL_LSEEK, 87 DUALCALL_GETDENTS, 88 DUALCALL_UNLINK, DUALCALL_SYMLINK, DUALCALL_READLINK, 89 DUALCALL_RENAME, 90 DUALCALL_MKDIR, DUALCALL_RMDIR, 91 DUALCALL_UTIMES, DUALCALL_LUTIMES, DUALCALL_FUTIMES, 92 DUALCALL_TRUNCATE, DUALCALL_FTRUNCATE, 93 DUALCALL_FSYNC, DUALCALL_FSYNC_RANGE, 94 DUALCALL_MOUNT, DUALCALL_UNMOUNT, 95 DUALCALL___GETCWD, 96 DUALCALL_CHFLAGS, DUALCALL_LCHFLAGS, DUALCALL_FCHFLAGS, 97 DUALCALL_ACCESS, 98 DUALCALL_MKNOD, 99 DUALCALL___SYSCTL, 100 DUALCALL_GETVFSSTAT, DUALCALL_NFSSVC, 101 DUALCALL_GETFH, DUALCALL_FHOPEN, DUALCALL_FHSTAT, DUALCALL_FHSTATVFS1, 102 #if __NetBSD_Prereq__(5,99,48) 103 DUALCALL_QUOTACTL, 104 #endif 105 DUALCALL__NUM 106 }; 107 108 #define RSYS_STRING(a) __STRING(a) 109 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a)) 110 111 /* 112 * Would be nice to get this automatically in sync with libc. 113 * Also, this does not work for compat-using binaries! 114 */ 115 #if !__NetBSD_Prereq__(5,99,7) 116 #define REALSELECT select 117 #define REALPOLLTS pollts 118 #define REALKEVENT kevent 119 #define REALSTAT __stat30 120 #define REALLSTAT __lstat30 121 #define REALFSTAT __fstat30 122 #define REALUTIMES utimes 123 #define REALLUTIMES lutimes 124 #define REALFUTIMES futimes 125 #define REALMKNOD mknod 126 #define REALFHSTAT __fhstat40 127 #else 128 #define REALSELECT _sys___select50 129 #define REALPOLLTS _sys___pollts50 130 #define REALKEVENT _sys___kevent50 131 #define REALSTAT __stat50 132 #define REALLSTAT __lstat50 133 #define REALFSTAT __fstat50 134 #define REALUTIMES __utimes50 135 #define REALLUTIMES __lutimes50 136 #define REALFUTIMES __futimes50 137 #define REALMKNOD __mknod50 138 #define REALFHSTAT __fhstat50 139 #endif 140 #define REALREAD _sys_read 141 #define REALPREAD _sys_pread 142 #define REALPWRITE _sys_pwrite 143 #define REALGETDENTS __getdents30 144 #define REALMOUNT __mount50 145 #define REALGETFH __getfh30 146 #define REALFHOPEN __fhopen40 147 #define REALFHSTATVFS1 __fhstatvfs140 148 #define OLDREALQUOTACTL __quotactl50 /* 5.99.48-62 only */ 149 150 int REALSELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *); 151 int REALPOLLTS(struct pollfd *, nfds_t, 152 const struct timespec *, const sigset_t *); 153 int REALKEVENT(int, const struct kevent *, size_t, struct kevent *, size_t, 154 const struct timespec *); 155 ssize_t REALREAD(int, void *, size_t); 156 ssize_t REALPREAD(int, void *, size_t, off_t); 157 ssize_t REALPWRITE(int, const void *, size_t, off_t); 158 int REALSTAT(const char *, struct stat *); 159 int REALLSTAT(const char *, struct stat *); 160 int REALFSTAT(int, struct stat *); 161 int REALGETDENTS(int, char *, size_t); 162 int REALUTIMES(const char *, const struct timeval [2]); 163 int REALLUTIMES(const char *, const struct timeval [2]); 164 int REALFUTIMES(int, const struct timeval [2]); 165 int REALMOUNT(const char *, const char *, int, void *, size_t); 166 int __getcwd(char *, size_t); 167 int REALMKNOD(const char *, mode_t, dev_t); 168 int REALGETFH(const char *, void *, size_t *); 169 int REALFHOPEN(const void *, size_t, int); 170 int REALFHSTAT(const void *, size_t, struct stat *); 171 int REALFHSTATVFS1(const void *, size_t, struct statvfs *, int); 172 int OLDREALQUOTACTL(const char *, struct plistref *); 173 174 #define S(a) __STRING(a) 175 struct sysnames { 176 enum dualcall scm_callnum; 177 const char *scm_hostname; 178 const char *scm_rumpname; 179 } syscnames[] = { 180 { DUALCALL_SOCKET, "__socket30", RSYS_NAME(SOCKET) }, 181 { DUALCALL_ACCEPT, "accept", RSYS_NAME(ACCEPT) }, 182 { DUALCALL_BIND, "bind", RSYS_NAME(BIND) }, 183 { DUALCALL_CONNECT, "connect", RSYS_NAME(CONNECT) }, 184 { DUALCALL_GETPEERNAME, "getpeername", RSYS_NAME(GETPEERNAME) }, 185 { DUALCALL_GETSOCKNAME, "getsockname", RSYS_NAME(GETSOCKNAME) }, 186 { DUALCALL_LISTEN, "listen", RSYS_NAME(LISTEN) }, 187 { DUALCALL_RECVFROM, "recvfrom", RSYS_NAME(RECVFROM) }, 188 { DUALCALL_RECVMSG, "recvmsg", RSYS_NAME(RECVMSG) }, 189 { DUALCALL_SENDTO, "sendto", RSYS_NAME(SENDTO) }, 190 { DUALCALL_SENDMSG, "sendmsg", RSYS_NAME(SENDMSG) }, 191 { DUALCALL_GETSOCKOPT, "getsockopt", RSYS_NAME(GETSOCKOPT) }, 192 { DUALCALL_SETSOCKOPT, "setsockopt", RSYS_NAME(SETSOCKOPT) }, 193 { DUALCALL_SHUTDOWN, "shutdown", RSYS_NAME(SHUTDOWN) }, 194 { DUALCALL_READ, S(REALREAD), RSYS_NAME(READ) }, 195 { DUALCALL_READV, "readv", RSYS_NAME(READV) }, 196 { DUALCALL_PREAD, S(REALPREAD), RSYS_NAME(PREAD) }, 197 { DUALCALL_PREADV, "preadv", RSYS_NAME(PREADV) }, 198 { DUALCALL_WRITE, "write", RSYS_NAME(WRITE) }, 199 { DUALCALL_WRITEV, "writev", RSYS_NAME(WRITEV) }, 200 { DUALCALL_PWRITE, S(REALPWRITE), RSYS_NAME(PWRITE) }, 201 { DUALCALL_PWRITEV, "pwritev", RSYS_NAME(PWRITEV) }, 202 { DUALCALL_IOCTL, "ioctl", RSYS_NAME(IOCTL) }, 203 { DUALCALL_FCNTL, "fcntl", RSYS_NAME(FCNTL) }, 204 { DUALCALL_DUP2, "dup2", RSYS_NAME(DUP2) }, 205 { DUALCALL_CLOSE, "close", RSYS_NAME(CLOSE) }, 206 { DUALCALL_POLLTS, S(REALPOLLTS), RSYS_NAME(POLLTS) }, 207 { DUALCALL_KEVENT, S(REALKEVENT), RSYS_NAME(KEVENT) }, 208 { DUALCALL_STAT, S(REALSTAT), RSYS_NAME(STAT) }, 209 { DUALCALL_LSTAT, S(REALLSTAT), RSYS_NAME(LSTAT) }, 210 { DUALCALL_FSTAT, S(REALFSTAT), RSYS_NAME(FSTAT) }, 211 { DUALCALL_CHOWN, "chown", RSYS_NAME(CHOWN) }, 212 { DUALCALL_LCHOWN, "lchown", RSYS_NAME(LCHOWN) }, 213 { DUALCALL_FCHOWN, "fchown", RSYS_NAME(FCHOWN) }, 214 { DUALCALL_CHMOD, "chmod", RSYS_NAME(CHMOD) }, 215 { DUALCALL_LCHMOD, "lchmod", RSYS_NAME(LCHMOD) }, 216 { DUALCALL_FCHMOD, "fchmod", RSYS_NAME(FCHMOD) }, 217 { DUALCALL_UTIMES, S(REALUTIMES), RSYS_NAME(UTIMES) }, 218 { DUALCALL_LUTIMES, S(REALLUTIMES), RSYS_NAME(LUTIMES) }, 219 { DUALCALL_FUTIMES, S(REALFUTIMES), RSYS_NAME(FUTIMES) }, 220 { DUALCALL_OPEN, "open", RSYS_NAME(OPEN) }, 221 { DUALCALL_STATVFS1, "statvfs1", RSYS_NAME(STATVFS1) }, 222 { DUALCALL_FSTATVFS1, "fstatvfs1", RSYS_NAME(FSTATVFS1) }, 223 { DUALCALL_CHDIR, "chdir", RSYS_NAME(CHDIR) }, 224 { DUALCALL_FCHDIR, "fchdir", RSYS_NAME(FCHDIR) }, 225 { DUALCALL_LSEEK, "lseek", RSYS_NAME(LSEEK) }, 226 { DUALCALL_GETDENTS, "__getdents30", RSYS_NAME(GETDENTS) }, 227 { DUALCALL_UNLINK, "unlink", RSYS_NAME(UNLINK) }, 228 { DUALCALL_SYMLINK, "symlink", RSYS_NAME(SYMLINK) }, 229 { DUALCALL_READLINK, "readlink", RSYS_NAME(READLINK) }, 230 { DUALCALL_RENAME, "rename", RSYS_NAME(RENAME) }, 231 { DUALCALL_MKDIR, "mkdir", RSYS_NAME(MKDIR) }, 232 { DUALCALL_RMDIR, "rmdir", RSYS_NAME(RMDIR) }, 233 { DUALCALL_TRUNCATE, "truncate", RSYS_NAME(TRUNCATE) }, 234 { DUALCALL_FTRUNCATE, "ftruncate", RSYS_NAME(FTRUNCATE) }, 235 { DUALCALL_FSYNC, "fsync", RSYS_NAME(FSYNC) }, 236 { DUALCALL_FSYNC_RANGE, "fsync_range", RSYS_NAME(FSYNC_RANGE) }, 237 { DUALCALL_MOUNT, S(REALMOUNT), RSYS_NAME(MOUNT) }, 238 { DUALCALL_UNMOUNT, "unmount", RSYS_NAME(UNMOUNT) }, 239 { DUALCALL___GETCWD, "__getcwd", RSYS_NAME(__GETCWD) }, 240 { DUALCALL_CHFLAGS, "chflags", RSYS_NAME(CHFLAGS) }, 241 { DUALCALL_LCHFLAGS, "lchflags", RSYS_NAME(LCHFLAGS) }, 242 { DUALCALL_FCHFLAGS, "fchflags", RSYS_NAME(FCHFLAGS) }, 243 { DUALCALL_ACCESS, "access", RSYS_NAME(ACCESS) }, 244 { DUALCALL_MKNOD, S(REALMKNOD), RSYS_NAME(MKNOD) }, 245 { DUALCALL___SYSCTL, "__sysctl", RSYS_NAME(__SYSCTL) }, 246 { DUALCALL_GETVFSSTAT, "getvfsstat", RSYS_NAME(GETVFSSTAT) }, 247 { DUALCALL_NFSSVC, "nfssvc", RSYS_NAME(NFSSVC) }, 248 { DUALCALL_GETFH, S(REALGETFH), RSYS_NAME(GETFH) }, 249 { DUALCALL_FHOPEN, S(REALFHOPEN),RSYS_NAME(FHOPEN) }, 250 { DUALCALL_FHSTAT, S(REALFHSTAT),RSYS_NAME(FHSTAT) }, 251 { DUALCALL_FHSTATVFS1, S(REALFHSTATVFS1),RSYS_NAME(FHSTATVFS1) }, 252 #if __NetBSD_Prereq__(5,99,63) 253 { DUALCALL_QUOTACTL, "__quotactl", RSYS_NAME(__QUOTACTL) }, 254 #elif __NetBSD_Prereq__(5,99,48) 255 { DUALCALL_QUOTACTL, S(OLDREALQUOTACTL),RSYS_NAME(QUOTACTL) }, 256 #endif 257 }; 258 #undef S 259 260 struct bothsys { 261 void *bs_host; 262 void *bs_rump; 263 } syscalls[DUALCALL__NUM]; 264 #define GETSYSCALL(which, name) syscalls[DUALCALL_##name].bs_##which 265 266 static pid_t (*host_fork)(void); 267 static int (*host_daemon)(int, int); 268 static void * (*host_mmap)(void *, size_t, int, int, int, off_t); 269 270 /* 271 * This tracks if our process is in a subdirectory of /rump. 272 * It's preserved over exec. 273 */ 274 static bool pwdinrump; 275 276 enum pathtype { PATH_HOST, PATH_RUMP, PATH_RUMPBLANKET }; 277 278 static bool fd_isrump(int); 279 static enum pathtype path_isrump(const char *); 280 281 /* default FD_SETSIZE is 256 ==> default fdoff is 128 */ 282 static int hijack_fdoff = FD_SETSIZE/2; 283 284 /* 285 * Maintain a mapping table for the usual dup2 suspects. 286 * Could use atomic ops to operate on dup2vec, but an application 287 * racing there is not well-defined, so don't bother. 288 */ 289 /* note: you cannot change this without editing the env-passing code */ 290 #define DUP2HIGH 2 291 static uint32_t dup2vec[DUP2HIGH+1]; 292 #define DUP2BIT (1<<31) 293 #define DUP2ALIAS (1<<30) 294 #define DUP2FDMASK ((1<<30)-1) 295 296 static bool 297 isdup2d(int fd) 298 { 299 300 return fd <= DUP2HIGH && fd >= 0 && dup2vec[fd] & DUP2BIT; 301 } 302 303 static int 304 mapdup2(int hostfd) 305 { 306 307 _DIAGASSERT(isdup2d(hostfd)); 308 return dup2vec[hostfd] & DUP2FDMASK; 309 } 310 311 static int 312 unmapdup2(int rumpfd) 313 { 314 int i; 315 316 for (i = 0; i <= DUP2HIGH; i++) { 317 if (dup2vec[i] & DUP2BIT && 318 (dup2vec[i] & DUP2FDMASK) == (unsigned)rumpfd) 319 return i; 320 } 321 return -1; 322 } 323 324 static void 325 setdup2(int hostfd, int rumpfd) 326 { 327 328 if (hostfd > DUP2HIGH) { 329 _DIAGASSERT(0); 330 return; 331 } 332 333 dup2vec[hostfd] = DUP2BIT | DUP2ALIAS | rumpfd; 334 } 335 336 static void 337 clrdup2(int hostfd) 338 { 339 340 if (hostfd > DUP2HIGH) { 341 _DIAGASSERT(0); 342 return; 343 } 344 345 dup2vec[hostfd] = 0; 346 } 347 348 static bool 349 killdup2alias(int rumpfd) 350 { 351 int hostfd; 352 353 if ((hostfd = unmapdup2(rumpfd)) == -1) 354 return false; 355 356 if (dup2vec[hostfd] & DUP2ALIAS) { 357 dup2vec[hostfd] &= ~DUP2ALIAS; 358 return true; 359 } 360 return false; 361 } 362 363 //#define DEBUGJACK 364 #ifdef DEBUGJACK 365 #define DPRINTF(x) mydprintf x 366 static void 367 mydprintf(const char *fmt, ...) 368 { 369 va_list ap; 370 371 if (isdup2d(STDERR_FILENO)) 372 return; 373 374 va_start(ap, fmt); 375 vfprintf(stderr, fmt, ap); 376 va_end(ap); 377 } 378 379 static const char * 380 whichfd(int fd) 381 { 382 383 if (fd == -1) 384 return "-1"; 385 else if (fd_isrump(fd)) 386 return "rump"; 387 else 388 return "host"; 389 } 390 391 static const char * 392 whichpath(const char *path) 393 { 394 395 if (path_isrump(path)) 396 return "rump"; 397 else 398 return "host"; 399 } 400 401 #else 402 #define DPRINTF(x) 403 #endif 404 405 #define FDCALL(type, name, rcname, args, proto, vars) \ 406 type name args \ 407 { \ 408 type (*fun) proto; \ 409 \ 410 DPRINTF(("%s -> %d (%s)\n", __STRING(name), fd, whichfd(fd))); \ 411 if (fd_isrump(fd)) { \ 412 fun = syscalls[rcname].bs_rump; \ 413 fd = fd_host2rump(fd); \ 414 } else { \ 415 fun = syscalls[rcname].bs_host; \ 416 } \ 417 \ 418 return fun vars; \ 419 } 420 421 #define PATHCALL(type, name, rcname, args, proto, vars) \ 422 type name args \ 423 { \ 424 type (*fun) proto; \ 425 enum pathtype pt; \ 426 \ 427 DPRINTF(("%s -> %s (%s)\n", __STRING(name), path, \ 428 whichpath(path))); \ 429 if ((pt = path_isrump(path)) != PATH_HOST) { \ 430 fun = syscalls[rcname].bs_rump; \ 431 if (pt == PATH_RUMP) \ 432 path = path_host2rump(path); \ 433 } else { \ 434 fun = syscalls[rcname].bs_host; \ 435 } \ 436 \ 437 return fun vars; \ 438 } 439 440 #define VFSCALL(bit, type, name, rcname, args, proto, vars) \ 441 type name args \ 442 { \ 443 type (*fun) proto; \ 444 \ 445 DPRINTF(("%s (0x%x, 0x%x)\n", __STRING(name), bit, vfsbits)); \ 446 if (vfsbits & bit) { \ 447 fun = syscalls[rcname].bs_rump; \ 448 } else { \ 449 fun = syscalls[rcname].bs_host; \ 450 } \ 451 \ 452 return fun vars; \ 453 } 454 455 /* 456 * These variables are set from the RUMPHIJACK string and control 457 * which operations can product rump kernel file descriptors. 458 * This should be easily extendable for future needs. 459 */ 460 #define RUMPHIJACK_DEFAULT "path=/rump,socket=all:nolocal" 461 static bool rumpsockets[PF_MAX]; 462 static const char *rumpprefix; 463 static size_t rumpprefixlen; 464 465 static struct { 466 int pf; 467 const char *name; 468 } socketmap[] = { 469 { PF_LOCAL, "local" }, 470 { PF_INET, "inet" }, 471 { PF_LINK, "link" }, 472 #ifdef PF_OROUTE 473 { PF_OROUTE, "oroute" }, 474 #endif 475 { PF_ROUTE, "route" }, 476 { PF_INET6, "inet6" }, 477 #ifdef PF_MPLS 478 { PF_MPLS, "mpls" }, 479 #endif 480 { -1, NULL } 481 }; 482 483 static void 484 sockparser(char *buf) 485 { 486 char *p, *l; 487 bool value; 488 int i; 489 490 /* if "all" is present, it must be specified first */ 491 if (strncmp(buf, "all", strlen("all")) == 0) { 492 for (i = 0; i < (int)__arraycount(rumpsockets); i++) { 493 rumpsockets[i] = true; 494 } 495 buf += strlen("all"); 496 if (*buf == ':') 497 buf++; 498 } 499 500 for (p = strtok_r(buf, ":", &l); p; p = strtok_r(NULL, ":", &l)) { 501 value = true; 502 if (strncmp(p, "no", strlen("no")) == 0) { 503 value = false; 504 p += strlen("no"); 505 } 506 507 for (i = 0; socketmap[i].name; i++) { 508 if (strcmp(p, socketmap[i].name) == 0) { 509 rumpsockets[socketmap[i].pf] = value; 510 break; 511 } 512 } 513 if (socketmap[i].name == NULL) { 514 errx(1, "invalid socket specifier %s", p); 515 } 516 } 517 } 518 519 static void 520 pathparser(char *buf) 521 { 522 523 /* sanity-check */ 524 if (*buf != '/') 525 errx(1, "hijack path specifier must begin with ``/''"); 526 rumpprefixlen = strlen(buf); 527 if (rumpprefixlen < 2) 528 errx(1, "invalid hijack prefix: %s", buf); 529 if (buf[rumpprefixlen-1] == '/' && strspn(buf, "/") != rumpprefixlen) 530 errx(1, "hijack prefix may end in slash only if pure " 531 "slash, gave %s", buf); 532 533 if ((rumpprefix = strdup(buf)) == NULL) 534 err(1, "strdup"); 535 rumpprefixlen = strlen(rumpprefix); 536 } 537 538 static struct blanket { 539 const char *pfx; 540 size_t len; 541 } *blanket; 542 static int nblanket; 543 544 static void 545 blanketparser(char *buf) 546 { 547 char *p, *l; 548 int i; 549 550 for (nblanket = 0, p = buf; p; p = strchr(p+1, ':'), nblanket++) 551 continue; 552 553 blanket = malloc(nblanket * sizeof(*blanket)); 554 if (blanket == NULL) 555 err(1, "alloc blanket %d", nblanket); 556 557 for (p = strtok_r(buf, ":", &l), i = 0; p; 558 p = strtok_r(NULL, ":", &l), i++) { 559 blanket[i].pfx = strdup(p); 560 if (blanket[i].pfx == NULL) 561 err(1, "strdup blanket"); 562 blanket[i].len = strlen(p); 563 564 if (blanket[i].len == 0 || *blanket[i].pfx != '/') 565 errx(1, "invalid blanket specifier %s", p); 566 if (*(blanket[i].pfx + blanket[i].len-1) == '/') 567 errx(1, "invalid blanket specifier %s", p); 568 } 569 } 570 571 #define VFSBIT_NFSSVC 0x01 572 #define VFSBIT_GETVFSSTAT 0x02 573 #define VFSBIT_FHCALLS 0x04 574 static unsigned vfsbits; 575 576 static struct { 577 int bit; 578 const char *name; 579 } vfscalls[] = { 580 { VFSBIT_NFSSVC, "nfssvc" }, 581 { VFSBIT_GETVFSSTAT, "getvfsstat" }, 582 { VFSBIT_FHCALLS, "fhcalls" }, 583 { -1, NULL } 584 }; 585 586 static void 587 vfsparser(char *buf) 588 { 589 char *p, *l; 590 bool turnon; 591 unsigned int fullmask; 592 int i; 593 594 /* build the full mask and sanity-check while we're at it */ 595 fullmask = 0; 596 for (i = 0; vfscalls[i].name != NULL; i++) { 597 if (fullmask & vfscalls[i].bit) 598 errx(1, "problem exists between vi and chair"); 599 fullmask |= vfscalls[i].bit; 600 } 601 602 603 /* if "all" is present, it must be specified first */ 604 if (strncmp(buf, "all", strlen("all")) == 0) { 605 vfsbits = fullmask; 606 buf += strlen("all"); 607 if (*buf == ':') 608 buf++; 609 } 610 611 for (p = strtok_r(buf, ":", &l); p; p = strtok_r(NULL, ":", &l)) { 612 turnon = true; 613 if (strncmp(p, "no", strlen("no")) == 0) { 614 turnon = false; 615 p += strlen("no"); 616 } 617 618 for (i = 0; vfscalls[i].name; i++) { 619 if (strcmp(p, vfscalls[i].name) == 0) { 620 if (turnon) 621 vfsbits |= vfscalls[i].bit; 622 else 623 vfsbits &= ~vfscalls[i].bit; 624 break; 625 } 626 } 627 if (vfscalls[i].name == NULL) { 628 errx(1, "invalid vfscall specifier %s", p); 629 } 630 } 631 } 632 633 static bool rumpsysctl = false; 634 635 static void 636 sysctlparser(char *buf) 637 { 638 639 if (buf == NULL) { 640 rumpsysctl = true; 641 return; 642 } 643 644 if (strcasecmp(buf, "y") == 0 || strcasecmp(buf, "yes") == 0 || 645 strcasecmp(buf, "yep") == 0 || strcasecmp(buf, "tottakai") == 0) { 646 rumpsysctl = true; 647 return; 648 } 649 if (strcasecmp(buf, "n") == 0 || strcasecmp(buf, "no") == 0) { 650 rumpsysctl = false; 651 return; 652 } 653 654 errx(1, "sysctl value should be y(es)/n(o), gave: %s", buf); 655 } 656 657 static void 658 fdoffparser(char *buf) 659 { 660 unsigned long fdoff; 661 char *ep; 662 663 if (*buf == '-') { 664 errx(1, "fdoff must not be negative"); 665 } 666 fdoff = strtoul(buf, &ep, 10); 667 if (*ep != '\0') 668 errx(1, "invalid fdoff specifier \"%s\"", buf); 669 if (fdoff >= INT_MAX/2 || fdoff < 3) 670 errx(1, "fdoff out of range"); 671 hijack_fdoff = fdoff; 672 } 673 674 static struct { 675 void (*parsefn)(char *); 676 const char *name; 677 bool needvalues; 678 } hijackparse[] = { 679 { sockparser, "socket", true }, 680 { pathparser, "path", true }, 681 { blanketparser, "blanket", true }, 682 { vfsparser, "vfs", true }, 683 { sysctlparser, "sysctl", false }, 684 { fdoffparser, "fdoff", true }, 685 { NULL, NULL, false }, 686 }; 687 688 static void 689 parsehijack(char *hijack) 690 { 691 char *p, *p2, *l; 692 const char *hijackcopy; 693 bool nop2; 694 int i; 695 696 if ((hijackcopy = strdup(hijack)) == NULL) 697 err(1, "strdup"); 698 699 /* disable everything explicitly */ 700 for (i = 0; i < PF_MAX; i++) 701 rumpsockets[i] = false; 702 703 for (p = strtok_r(hijack, ",", &l); p; p = strtok_r(NULL, ",", &l)) { 704 nop2 = false; 705 p2 = strchr(p, '='); 706 if (!p2) { 707 nop2 = true; 708 p2 = p + strlen(p); 709 } 710 711 for (i = 0; hijackparse[i].parsefn; i++) { 712 if (strncmp(hijackparse[i].name, p, 713 (size_t)(p2-p)) == 0) { 714 if (nop2 && hijackparse[i].needvalues) 715 errx(1, "invalid hijack specifier: %s", 716 hijackcopy); 717 hijackparse[i].parsefn(nop2 ? NULL : p2+1); 718 break; 719 } 720 } 721 722 if (hijackparse[i].parsefn == NULL) 723 errx(1, "invalid hijack specifier name in %s", p); 724 } 725 726 } 727 728 static void __attribute__((constructor)) 729 rcinit(void) 730 { 731 char buf[1024]; 732 unsigned i, j; 733 734 host_fork = dlsym(RTLD_NEXT, "fork"); 735 host_daemon = dlsym(RTLD_NEXT, "daemon"); 736 host_mmap = dlsym(RTLD_NEXT, "mmap"); 737 738 /* 739 * In theory cannot print anything during lookups because 740 * we might not have the call vector set up. so, the errx() 741 * is a bit of a strech, but it might work. 742 */ 743 744 for (i = 0; i < DUALCALL__NUM; i++) { 745 /* build runtime O(1) access */ 746 for (j = 0; j < __arraycount(syscnames); j++) { 747 if (syscnames[j].scm_callnum == i) 748 break; 749 } 750 751 if (j == __arraycount(syscnames)) 752 errx(1, "rumphijack error: syscall pos %d missing", i); 753 754 syscalls[i].bs_host = dlsym(RTLD_NEXT, 755 syscnames[j].scm_hostname); 756 if (syscalls[i].bs_host == NULL) 757 errx(1, "hostcall %s not found!", 758 syscnames[j].scm_hostname); 759 760 syscalls[i].bs_rump = dlsym(RTLD_NEXT, 761 syscnames[j].scm_rumpname); 762 if (syscalls[i].bs_rump == NULL) 763 errx(1, "rumpcall %s not found!", 764 syscnames[j].scm_rumpname); 765 } 766 767 if (rumpclient_init() == -1) 768 err(1, "rumpclient init"); 769 770 /* check which syscalls we're supposed to hijack */ 771 if (getenv_r("RUMPHIJACK", buf, sizeof(buf)) == -1) { 772 strcpy(buf, RUMPHIJACK_DEFAULT); 773 } 774 parsehijack(buf); 775 776 /* set client persistence level */ 777 if (getenv_r("RUMPHIJACK_RETRYCONNECT", buf, sizeof(buf)) != -1) { 778 if (strcmp(buf, "die") == 0) 779 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_DIE); 780 else if (strcmp(buf, "inftime") == 0) 781 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_INFTIME); 782 else if (strcmp(buf, "once") == 0) 783 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_ONCE); 784 else { 785 time_t timeout; 786 char *ep; 787 788 timeout = (time_t)strtoll(buf, &ep, 10); 789 if (timeout <= 0 || ep != buf + strlen(buf)) 790 errx(1, "RUMPHIJACK_RETRYCONNECT must be " 791 "keyword or integer, got: %s", buf); 792 793 rumpclient_setconnretry(timeout); 794 } 795 } 796 797 if (getenv_r("RUMPHIJACK__DUP2INFO", buf, sizeof(buf)) == 0) { 798 if (sscanf(buf, "%u,%u,%u", 799 &dup2vec[0], &dup2vec[1], &dup2vec[2]) != 3) { 800 warnx("invalid dup2mask: %s", buf); 801 memset(dup2vec, 0, sizeof(dup2vec)); 802 } 803 unsetenv("RUMPHIJACK__DUP2INFO"); 804 } 805 if (getenv_r("RUMPHIJACK__PWDINRUMP", buf, sizeof(buf)) == 0) { 806 pwdinrump = true; 807 unsetenv("RUMPHIJACK__PWDINRUMP"); 808 } 809 } 810 811 static int 812 fd_rump2host(int fd) 813 { 814 815 if (fd == -1) 816 return fd; 817 return fd + hijack_fdoff; 818 } 819 820 static int 821 fd_rump2host_withdup(int fd) 822 { 823 int hfd; 824 825 _DIAGASSERT(fd != -1); 826 hfd = unmapdup2(fd); 827 if (hfd != -1) { 828 _DIAGASSERT(hfd <= DUP2HIGH); 829 return hfd; 830 } 831 return fd_rump2host(fd); 832 } 833 834 static int 835 fd_host2rump(int fd) 836 { 837 838 if (!isdup2d(fd)) 839 return fd - hijack_fdoff; 840 else 841 return mapdup2(fd); 842 } 843 844 static bool 845 fd_isrump(int fd) 846 { 847 848 return isdup2d(fd) || fd >= hijack_fdoff; 849 } 850 851 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= hijack_fdoff) 852 853 static enum pathtype 854 path_isrump(const char *path) 855 { 856 size_t plen; 857 int i; 858 859 if (rumpprefix == NULL && nblanket == 0) 860 return PATH_HOST; 861 862 if (*path == '/') { 863 plen = strlen(path); 864 if (rumpprefix && plen >= rumpprefixlen) { 865 if (strncmp(path, rumpprefix, rumpprefixlen) == 0 866 && (plen == rumpprefixlen 867 || *(path + rumpprefixlen) == '/')) { 868 return PATH_RUMP; 869 } 870 } 871 for (i = 0; i < nblanket; i++) { 872 if (strncmp(path, blanket[i].pfx, blanket[i].len) == 0) 873 return PATH_RUMPBLANKET; 874 } 875 876 return PATH_HOST; 877 } else { 878 return pwdinrump ? PATH_RUMP : PATH_HOST; 879 } 880 } 881 882 static const char *rootpath = "/"; 883 static const char * 884 path_host2rump(const char *path) 885 { 886 const char *rv; 887 888 if (*path == '/') { 889 rv = path + rumpprefixlen; 890 if (*rv == '\0') 891 rv = rootpath; 892 } else { 893 rv = path; 894 } 895 896 return rv; 897 } 898 899 static int 900 dodup(int oldd, int minfd) 901 { 902 int (*op_fcntl)(int, int, ...); 903 int newd; 904 int isrump; 905 906 DPRINTF(("dup -> %d (minfd %d)\n", oldd, minfd)); 907 if (fd_isrump(oldd)) { 908 op_fcntl = GETSYSCALL(rump, FCNTL); 909 oldd = fd_host2rump(oldd); 910 if (minfd >= hijack_fdoff) 911 minfd -= hijack_fdoff; 912 isrump = 1; 913 } else { 914 op_fcntl = GETSYSCALL(host, FCNTL); 915 isrump = 0; 916 } 917 918 newd = op_fcntl(oldd, F_DUPFD, minfd); 919 920 if (isrump) 921 newd = fd_rump2host(newd); 922 DPRINTF(("dup <- %d\n", newd)); 923 924 return newd; 925 } 926 927 /* 928 * Check that host fd value does not exceed fdoffset and if necessary 929 * dup the file descriptor so that it doesn't collide with the dup2mask. 930 */ 931 static int 932 fd_host2host(int fd) 933 { 934 int (*op_fcntl)(int, int, ...) = GETSYSCALL(host, FCNTL); 935 int (*op_close)(int) = GETSYSCALL(host, CLOSE); 936 int ofd, i; 937 938 if (fd >= hijack_fdoff) { 939 op_close(fd); 940 errno = ENFILE; 941 return -1; 942 } 943 944 for (i = 1; isdup2d(fd); i++) { 945 ofd = fd; 946 fd = op_fcntl(ofd, F_DUPFD, i); 947 op_close(ofd); 948 } 949 950 return fd; 951 } 952 953 int 954 open(const char *path, int flags, ...) 955 { 956 int (*op_open)(const char *, int, ...); 957 bool isrump; 958 va_list ap; 959 enum pathtype pt; 960 int fd; 961 962 DPRINTF(("open -> %s (%s)\n", path, whichpath(path))); 963 964 if ((pt = path_isrump(path)) != PATH_HOST) { 965 if (pt == PATH_RUMP) 966 path = path_host2rump(path); 967 op_open = GETSYSCALL(rump, OPEN); 968 isrump = true; 969 } else { 970 op_open = GETSYSCALL(host, OPEN); 971 isrump = false; 972 } 973 974 va_start(ap, flags); 975 fd = op_open(path, flags, va_arg(ap, mode_t)); 976 va_end(ap); 977 978 if (isrump) 979 fd = fd_rump2host(fd); 980 else 981 fd = fd_host2host(fd); 982 983 DPRINTF(("open <- %d (%s)\n", fd, whichfd(fd))); 984 return fd; 985 } 986 987 int 988 chdir(const char *path) 989 { 990 int (*op_chdir)(const char *); 991 enum pathtype pt; 992 int rv; 993 994 if ((pt = path_isrump(path)) != PATH_HOST) { 995 op_chdir = GETSYSCALL(rump, CHDIR); 996 if (pt == PATH_RUMP) 997 path = path_host2rump(path); 998 } else { 999 op_chdir = GETSYSCALL(host, CHDIR); 1000 } 1001 1002 rv = op_chdir(path); 1003 if (rv == 0) 1004 pwdinrump = pt != PATH_HOST; 1005 1006 return rv; 1007 } 1008 1009 int 1010 fchdir(int fd) 1011 { 1012 int (*op_fchdir)(int); 1013 bool isrump; 1014 int rv; 1015 1016 if (fd_isrump(fd)) { 1017 op_fchdir = GETSYSCALL(rump, FCHDIR); 1018 isrump = true; 1019 fd = fd_host2rump(fd); 1020 } else { 1021 op_fchdir = GETSYSCALL(host, FCHDIR); 1022 isrump = false; 1023 } 1024 1025 rv = op_fchdir(fd); 1026 if (rv == 0) { 1027 pwdinrump = isrump; 1028 } 1029 1030 return rv; 1031 } 1032 1033 int 1034 __getcwd(char *bufp, size_t len) 1035 { 1036 int (*op___getcwd)(char *, size_t); 1037 size_t prefixgap; 1038 bool iamslash; 1039 int rv; 1040 1041 if (pwdinrump && rumpprefix) { 1042 if (rumpprefix[rumpprefixlen-1] == '/') 1043 iamslash = true; 1044 else 1045 iamslash = false; 1046 1047 if (iamslash) 1048 prefixgap = rumpprefixlen - 1; /* ``//+path'' */ 1049 else 1050 prefixgap = rumpprefixlen; /* ``/pfx+/path'' */ 1051 if (len <= prefixgap) { 1052 errno = ERANGE; 1053 return -1; 1054 } 1055 1056 op___getcwd = GETSYSCALL(rump, __GETCWD); 1057 rv = op___getcwd(bufp + prefixgap, len - prefixgap); 1058 if (rv == -1) 1059 return rv; 1060 1061 /* augment the "/" part only for a non-root path */ 1062 memcpy(bufp, rumpprefix, rumpprefixlen); 1063 1064 /* append / only to non-root cwd */ 1065 if (rv != 2) 1066 bufp[prefixgap] = '/'; 1067 1068 /* don't append extra slash in the purely-slash case */ 1069 if (rv == 2 && !iamslash) 1070 bufp[rumpprefixlen] = '\0'; 1071 } else if (pwdinrump) { 1072 /* assume blanket. we can't provide a prefix here */ 1073 op___getcwd = GETSYSCALL(rump, __GETCWD); 1074 rv = op___getcwd(bufp, len); 1075 } else { 1076 op___getcwd = GETSYSCALL(host, __GETCWD); 1077 rv = op___getcwd(bufp, len); 1078 } 1079 1080 return rv; 1081 } 1082 1083 int 1084 rename(const char *from, const char *to) 1085 { 1086 int (*op_rename)(const char *, const char *); 1087 enum pathtype ptf, ptt; 1088 1089 if ((ptf = path_isrump(from)) != PATH_HOST) { 1090 if ((ptt = path_isrump(to)) == PATH_HOST) { 1091 errno = EXDEV; 1092 return -1; 1093 } 1094 1095 if (ptf == PATH_RUMP) 1096 from = path_host2rump(from); 1097 if (ptt == PATH_RUMP) 1098 to = path_host2rump(to); 1099 op_rename = GETSYSCALL(rump, RENAME); 1100 } else { 1101 if (path_isrump(to) != PATH_HOST) { 1102 errno = EXDEV; 1103 return -1; 1104 } 1105 1106 op_rename = GETSYSCALL(host, RENAME); 1107 } 1108 1109 return op_rename(from, to); 1110 } 1111 1112 int __socket30(int, int, int); 1113 int 1114 __socket30(int domain, int type, int protocol) 1115 { 1116 int (*op_socket)(int, int, int); 1117 int fd; 1118 bool isrump; 1119 1120 isrump = domain < PF_MAX && rumpsockets[domain]; 1121 1122 if (isrump) 1123 op_socket = GETSYSCALL(rump, SOCKET); 1124 else 1125 op_socket = GETSYSCALL(host, SOCKET); 1126 fd = op_socket(domain, type, protocol); 1127 1128 if (isrump) 1129 fd = fd_rump2host(fd); 1130 else 1131 fd = fd_host2host(fd); 1132 DPRINTF(("socket <- %d\n", fd)); 1133 1134 return fd; 1135 } 1136 1137 int 1138 accept(int s, struct sockaddr *addr, socklen_t *addrlen) 1139 { 1140 int (*op_accept)(int, struct sockaddr *, socklen_t *); 1141 int fd; 1142 bool isrump; 1143 1144 isrump = fd_isrump(s); 1145 1146 DPRINTF(("accept -> %d", s)); 1147 if (isrump) { 1148 op_accept = GETSYSCALL(rump, ACCEPT); 1149 s = fd_host2rump(s); 1150 } else { 1151 op_accept = GETSYSCALL(host, ACCEPT); 1152 } 1153 fd = op_accept(s, addr, addrlen); 1154 if (fd != -1 && isrump) 1155 fd = fd_rump2host(fd); 1156 else 1157 fd = fd_host2host(fd); 1158 1159 DPRINTF((" <- %d\n", fd)); 1160 1161 return fd; 1162 } 1163 1164 /* 1165 * ioctl and fcntl are varargs calls and need special treatment 1166 */ 1167 int 1168 ioctl(int fd, unsigned long cmd, ...) 1169 { 1170 int (*op_ioctl)(int, unsigned long cmd, ...); 1171 va_list ap; 1172 int rv; 1173 1174 DPRINTF(("ioctl -> %d\n", fd)); 1175 if (fd_isrump(fd)) { 1176 fd = fd_host2rump(fd); 1177 op_ioctl = GETSYSCALL(rump, IOCTL); 1178 } else { 1179 op_ioctl = GETSYSCALL(host, IOCTL); 1180 } 1181 1182 va_start(ap, cmd); 1183 rv = op_ioctl(fd, cmd, va_arg(ap, void *)); 1184 va_end(ap); 1185 return rv; 1186 } 1187 1188 int 1189 fcntl(int fd, int cmd, ...) 1190 { 1191 int (*op_fcntl)(int, int, ...); 1192 va_list ap; 1193 int rv, minfd, i, maxdup2; 1194 1195 DPRINTF(("fcntl -> %d (cmd %d)\n", fd, cmd)); 1196 1197 switch (cmd) { 1198 case F_DUPFD: 1199 va_start(ap, cmd); 1200 minfd = va_arg(ap, int); 1201 va_end(ap); 1202 return dodup(fd, minfd); 1203 1204 case F_CLOSEM: 1205 /* 1206 * So, if fd < HIJACKOFF, we want to do a host closem. 1207 */ 1208 1209 if (fd < hijack_fdoff) { 1210 int closemfd = fd; 1211 1212 if (rumpclient__closenotify(&closemfd, 1213 RUMPCLIENT_CLOSE_FCLOSEM) == -1) 1214 return -1; 1215 op_fcntl = GETSYSCALL(host, FCNTL); 1216 rv = op_fcntl(closemfd, cmd); 1217 if (rv) 1218 return rv; 1219 } 1220 1221 /* 1222 * Additionally, we want to do a rump closem, but only 1223 * for the file descriptors not dup2'd. 1224 */ 1225 1226 for (i = 0, maxdup2 = 0; i <= DUP2HIGH; i++) { 1227 if (dup2vec[i] & DUP2BIT) { 1228 int val; 1229 1230 val = dup2vec[i] & DUP2FDMASK; 1231 maxdup2 = MAX(val, maxdup2); 1232 } 1233 } 1234 1235 if (fd >= hijack_fdoff) 1236 fd -= hijack_fdoff; 1237 else 1238 fd = 0; 1239 fd = MAX(maxdup2+1, fd); 1240 1241 /* hmm, maybe we should close rump fd's not within dup2mask? */ 1242 return rump_sys_fcntl(fd, F_CLOSEM); 1243 1244 case F_MAXFD: 1245 /* 1246 * For maxfd, if there's a rump kernel fd, return 1247 * it hostified. Otherwise, return host's MAXFD 1248 * return value. 1249 */ 1250 if ((rv = rump_sys_fcntl(fd, F_MAXFD)) != -1) { 1251 /* 1252 * This might go a little wrong in case 1253 * of dup2 to [012], but I'm not sure if 1254 * there's a justification for tracking 1255 * that info. Consider e.g. 1256 * dup2(rumpfd, 2) followed by rump_sys_open() 1257 * returning 1. We should return 1+HIJACKOFF, 1258 * not 2+HIJACKOFF. However, if [01] is not 1259 * open, the correct return value is 2. 1260 */ 1261 return fd_rump2host(fd); 1262 } else { 1263 op_fcntl = GETSYSCALL(host, FCNTL); 1264 return op_fcntl(fd, F_MAXFD); 1265 } 1266 /*NOTREACHED*/ 1267 1268 default: 1269 if (fd_isrump(fd)) { 1270 fd = fd_host2rump(fd); 1271 op_fcntl = GETSYSCALL(rump, FCNTL); 1272 } else { 1273 op_fcntl = GETSYSCALL(host, FCNTL); 1274 } 1275 1276 va_start(ap, cmd); 1277 rv = op_fcntl(fd, cmd, va_arg(ap, void *)); 1278 va_end(ap); 1279 return rv; 1280 } 1281 /*NOTREACHED*/ 1282 } 1283 1284 int 1285 close(int fd) 1286 { 1287 int (*op_close)(int); 1288 int rv; 1289 1290 DPRINTF(("close -> %d\n", fd)); 1291 if (fd_isrump(fd)) { 1292 bool undup2 = false; 1293 int ofd; 1294 1295 if (isdup2d(ofd = fd)) { 1296 undup2 = true; 1297 } 1298 1299 fd = fd_host2rump(fd); 1300 if (!undup2 && killdup2alias(fd)) { 1301 return 0; 1302 } 1303 1304 op_close = GETSYSCALL(rump, CLOSE); 1305 rv = op_close(fd); 1306 if (rv == 0 && undup2) { 1307 clrdup2(ofd); 1308 } 1309 } else { 1310 if (rumpclient__closenotify(&fd, RUMPCLIENT_CLOSE_CLOSE) == -1) 1311 return -1; 1312 op_close = GETSYSCALL(host, CLOSE); 1313 rv = op_close(fd); 1314 } 1315 1316 return rv; 1317 } 1318 1319 /* 1320 * write cannot issue a standard debug printf due to recursion 1321 */ 1322 ssize_t 1323 write(int fd, const void *buf, size_t blen) 1324 { 1325 ssize_t (*op_write)(int, const void *, size_t); 1326 1327 if (fd_isrump(fd)) { 1328 fd = fd_host2rump(fd); 1329 op_write = GETSYSCALL(rump, WRITE); 1330 } else { 1331 op_write = GETSYSCALL(host, WRITE); 1332 } 1333 1334 return op_write(fd, buf, blen); 1335 } 1336 1337 /* 1338 * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since 1339 * many programs do that. dup2 of a rump kernel fd to another value 1340 * not >= fdoff is an error. 1341 * 1342 * Note: cannot rump2host newd, because it is often hardcoded. 1343 */ 1344 int 1345 dup2(int oldd, int newd) 1346 { 1347 int (*host_dup2)(int, int); 1348 int rv; 1349 1350 DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd)); 1351 1352 if (fd_isrump(oldd)) { 1353 int (*op_close)(int) = GETSYSCALL(host, CLOSE); 1354 1355 /* only allow fd 0-2 for cross-kernel dup */ 1356 if (!(newd >= 0 && newd <= 2 && !fd_isrump(newd))) { 1357 errno = EBADF; 1358 return -1; 1359 } 1360 1361 /* regular dup2? */ 1362 if (fd_isrump(newd)) { 1363 newd = fd_host2rump(newd); 1364 rv = rump_sys_dup2(oldd, newd); 1365 return fd_rump2host(rv); 1366 } 1367 1368 /* 1369 * dup2 rump => host? just establish an 1370 * entry in the mapping table. 1371 */ 1372 op_close(newd); 1373 setdup2(newd, fd_host2rump(oldd)); 1374 rv = 0; 1375 } else { 1376 host_dup2 = syscalls[DUALCALL_DUP2].bs_host; 1377 if (rumpclient__closenotify(&newd, RUMPCLIENT_CLOSE_DUP2) == -1) 1378 return -1; 1379 rv = host_dup2(oldd, newd); 1380 } 1381 1382 return rv; 1383 } 1384 1385 int 1386 dup(int oldd) 1387 { 1388 1389 return dodup(oldd, 0); 1390 } 1391 1392 pid_t 1393 fork() 1394 { 1395 pid_t rv; 1396 1397 DPRINTF(("fork\n")); 1398 1399 rv = rumpclient__dofork(host_fork); 1400 1401 DPRINTF(("fork returns %d\n", rv)); 1402 return rv; 1403 } 1404 /* we do not have the luxury of not requiring a stackframe */ 1405 __strong_alias(__vfork14,fork); 1406 1407 int 1408 daemon(int nochdir, int noclose) 1409 { 1410 struct rumpclient_fork *rf; 1411 1412 if ((rf = rumpclient_prefork()) == NULL) 1413 return -1; 1414 1415 if (host_daemon(nochdir, noclose) == -1) 1416 return -1; 1417 1418 if (rumpclient_fork_init(rf) == -1) 1419 return -1; 1420 1421 return 0; 1422 } 1423 1424 int 1425 execve(const char *path, char *const argv[], char *const envp[]) 1426 { 1427 char buf[128]; 1428 char *dup2str; 1429 const char *pwdinrumpstr; 1430 char **newenv; 1431 size_t nelem; 1432 int rv, sverrno; 1433 int bonus = 2, i = 0; 1434 1435 snprintf(buf, sizeof(buf), "RUMPHIJACK__DUP2INFO=%u,%u,%u", 1436 dup2vec[0], dup2vec[1], dup2vec[2]); 1437 dup2str = strdup(buf); 1438 if (dup2str == NULL) { 1439 errno = ENOMEM; 1440 return -1; 1441 } 1442 1443 if (pwdinrump) { 1444 pwdinrumpstr = "RUMPHIJACK__PWDINRUMP=true"; 1445 bonus++; 1446 } else { 1447 pwdinrumpstr = NULL; 1448 } 1449 1450 for (nelem = 0; envp && envp[nelem]; nelem++) 1451 continue; 1452 newenv = malloc(sizeof(*newenv) * (nelem+bonus)); 1453 if (newenv == NULL) { 1454 free(dup2str); 1455 errno = ENOMEM; 1456 return -1; 1457 } 1458 memcpy(newenv, envp, nelem*sizeof(*newenv)); 1459 newenv[nelem+i] = dup2str; 1460 i++; 1461 1462 if (pwdinrumpstr) { 1463 newenv[nelem+i] = __UNCONST(pwdinrumpstr); 1464 i++; 1465 } 1466 newenv[nelem+i] = NULL; 1467 _DIAGASSERT(i < bonus); 1468 1469 rv = rumpclient_exec(path, argv, newenv); 1470 1471 _DIAGASSERT(rv != 0); 1472 sverrno = errno; 1473 free(newenv); 1474 free(dup2str); 1475 errno = sverrno; 1476 return rv; 1477 } 1478 1479 /* 1480 * select is done by calling poll. 1481 */ 1482 int 1483 REALSELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, 1484 struct timeval *timeout) 1485 { 1486 struct pollfd *pfds; 1487 struct timespec ts, *tsp = NULL; 1488 nfds_t realnfds; 1489 int i, j; 1490 int rv, incr; 1491 1492 DPRINTF(("select\n")); 1493 1494 /* 1495 * Well, first we must scan the fds to figure out how many 1496 * fds there really are. This is because up to and including 1497 * nb5 poll() silently refuses nfds > process_maxopen_fds. 1498 * Seems to be fixed in current, thank the maker. 1499 * god damn cluster...bomb. 1500 */ 1501 1502 for (i = 0, realnfds = 0; i < nfds; i++) { 1503 if (readfds && FD_ISSET(i, readfds)) { 1504 realnfds++; 1505 continue; 1506 } 1507 if (writefds && FD_ISSET(i, writefds)) { 1508 realnfds++; 1509 continue; 1510 } 1511 if (exceptfds && FD_ISSET(i, exceptfds)) { 1512 realnfds++; 1513 continue; 1514 } 1515 } 1516 1517 if (realnfds) { 1518 pfds = calloc(realnfds, sizeof(*pfds)); 1519 if (!pfds) 1520 return -1; 1521 } else { 1522 pfds = NULL; 1523 } 1524 1525 for (i = 0, j = 0; i < nfds; i++) { 1526 incr = 0; 1527 if (readfds && FD_ISSET(i, readfds)) { 1528 pfds[j].fd = i; 1529 pfds[j].events |= POLLIN; 1530 incr=1; 1531 } 1532 if (writefds && FD_ISSET(i, writefds)) { 1533 pfds[j].fd = i; 1534 pfds[j].events |= POLLOUT; 1535 incr=1; 1536 } 1537 if (exceptfds && FD_ISSET(i, exceptfds)) { 1538 pfds[j].fd = i; 1539 pfds[j].events |= POLLHUP|POLLERR; 1540 incr=1; 1541 } 1542 if (incr) 1543 j++; 1544 } 1545 assert(j == (int)realnfds); 1546 1547 if (timeout) { 1548 TIMEVAL_TO_TIMESPEC(timeout, &ts); 1549 tsp = &ts; 1550 } 1551 rv = REALPOLLTS(pfds, realnfds, tsp, NULL); 1552 /* 1553 * "If select() returns with an error the descriptor sets 1554 * will be unmodified" 1555 */ 1556 if (rv < 0) 1557 goto out; 1558 1559 /* 1560 * zero out results (can't use FD_ZERO for the 1561 * obvious select-me-not reason). whee. 1562 * 1563 * We do this here since some software ignores the return 1564 * value of select, and hence if the timeout expires, it may 1565 * assume all input descriptors have activity. 1566 */ 1567 for (i = 0; i < nfds; i++) { 1568 if (readfds) 1569 FD_CLR(i, readfds); 1570 if (writefds) 1571 FD_CLR(i, writefds); 1572 if (exceptfds) 1573 FD_CLR(i, exceptfds); 1574 } 1575 if (rv == 0) 1576 goto out; 1577 1578 /* 1579 * We have >0 fds with activity. Harvest the results. 1580 */ 1581 for (i = 0; i < (int)realnfds; i++) { 1582 if (readfds) { 1583 if (pfds[i].revents & POLLIN) { 1584 FD_SET(pfds[i].fd, readfds); 1585 } 1586 } 1587 if (writefds) { 1588 if (pfds[i].revents & POLLOUT) { 1589 FD_SET(pfds[i].fd, writefds); 1590 } 1591 } 1592 if (exceptfds) { 1593 if (pfds[i].revents & (POLLHUP|POLLERR)) { 1594 FD_SET(pfds[i].fd, exceptfds); 1595 } 1596 } 1597 } 1598 1599 out: 1600 free(pfds); 1601 return rv; 1602 } 1603 1604 static void 1605 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall) 1606 { 1607 nfds_t i; 1608 1609 for (i = 0; i < nfds; i++) { 1610 if (fds[i].fd == -1) 1611 continue; 1612 1613 if (fd_isrump(fds[i].fd)) 1614 (*rumpcall)++; 1615 else 1616 (*hostcall)++; 1617 } 1618 } 1619 1620 static void 1621 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int)) 1622 { 1623 nfds_t i; 1624 1625 for (i = 0; i < nfds; i++) { 1626 fds[i].fd = fdadj(fds[i].fd); 1627 } 1628 } 1629 1630 /* 1631 * poll is easy as long as the call comes in the fds only in one 1632 * kernel. otherwise its quite tricky... 1633 */ 1634 struct pollarg { 1635 struct pollfd *pfds; 1636 nfds_t nfds; 1637 const struct timespec *ts; 1638 const sigset_t *sigmask; 1639 int pipefd; 1640 int errnum; 1641 }; 1642 1643 static void * 1644 hostpoll(void *arg) 1645 { 1646 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *, 1647 const sigset_t *); 1648 struct pollarg *parg = arg; 1649 intptr_t rv; 1650 1651 op_pollts = GETSYSCALL(host, POLLTS); 1652 rv = op_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask); 1653 if (rv == -1) 1654 parg->errnum = errno; 1655 rump_sys_write(parg->pipefd, &rv, sizeof(rv)); 1656 1657 return (void *)(intptr_t)rv; 1658 } 1659 1660 int 1661 REALPOLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts, 1662 const sigset_t *sigmask) 1663 { 1664 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *, 1665 const sigset_t *); 1666 int (*host_close)(int); 1667 int hostcall = 0, rumpcall = 0; 1668 pthread_t pt; 1669 nfds_t i; 1670 int rv; 1671 1672 DPRINTF(("poll\n")); 1673 checkpoll(fds, nfds, &hostcall, &rumpcall); 1674 1675 if (hostcall && rumpcall) { 1676 struct pollfd *pfd_host = NULL, *pfd_rump = NULL; 1677 int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1}; 1678 struct pollarg parg; 1679 uintptr_t lrv; 1680 int sverrno = 0, trv; 1681 1682 /* 1683 * ok, this is where it gets tricky. We must support 1684 * this since it's a very common operation in certain 1685 * types of software (telnet, netcat, etc). We allocate 1686 * two vectors and run two poll commands in separate 1687 * threads. Whichever returns first "wins" and the 1688 * other kernel's fds won't show activity. 1689 */ 1690 rv = -1; 1691 1692 /* allocate full vector for O(n) joining after call */ 1693 pfd_host = malloc(sizeof(*pfd_host)*(nfds+1)); 1694 if (!pfd_host) 1695 goto out; 1696 pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1)); 1697 if (!pfd_rump) { 1698 goto out; 1699 } 1700 1701 /* 1702 * then, open two pipes, one for notifications 1703 * to each kernel. 1704 * 1705 * At least the rump pipe should probably be 1706 * cached, along with the helper threads. This 1707 * should give a microbenchmark improvement (haven't 1708 * experienced a macro-level problem yet, though). 1709 */ 1710 if ((rv = rump_sys_pipe(rpipe)) == -1) { 1711 sverrno = errno; 1712 } 1713 if (rv == 0 && (rv = pipe(hpipe)) == -1) { 1714 sverrno = errno; 1715 } 1716 1717 /* split vectors (or signal errors) */ 1718 for (i = 0; i < nfds; i++) { 1719 int fd; 1720 1721 fds[i].revents = 0; 1722 if (fds[i].fd == -1) { 1723 pfd_host[i].fd = -1; 1724 pfd_rump[i].fd = -1; 1725 } else if (fd_isrump(fds[i].fd)) { 1726 pfd_host[i].fd = -1; 1727 fd = fd_host2rump(fds[i].fd); 1728 if (fd == rpipe[0] || fd == rpipe[1]) { 1729 fds[i].revents = POLLNVAL; 1730 if (rv != -1) 1731 rv++; 1732 } 1733 pfd_rump[i].fd = fd; 1734 pfd_rump[i].events = fds[i].events; 1735 } else { 1736 pfd_rump[i].fd = -1; 1737 fd = fds[i].fd; 1738 if (fd == hpipe[0] || fd == hpipe[1]) { 1739 fds[i].revents = POLLNVAL; 1740 if (rv != -1) 1741 rv++; 1742 } 1743 pfd_host[i].fd = fd; 1744 pfd_host[i].events = fds[i].events; 1745 } 1746 pfd_rump[i].revents = pfd_host[i].revents = 0; 1747 } 1748 if (rv) { 1749 goto out; 1750 } 1751 1752 pfd_host[nfds].fd = hpipe[0]; 1753 pfd_host[nfds].events = POLLIN; 1754 pfd_rump[nfds].fd = rpipe[0]; 1755 pfd_rump[nfds].events = POLLIN; 1756 1757 /* 1758 * then, create a thread to do host part and meanwhile 1759 * do rump kernel part right here 1760 */ 1761 1762 parg.pfds = pfd_host; 1763 parg.nfds = nfds+1; 1764 parg.ts = ts; 1765 parg.sigmask = sigmask; 1766 parg.pipefd = rpipe[1]; 1767 pthread_create(&pt, NULL, hostpoll, &parg); 1768 1769 op_pollts = GETSYSCALL(rump, POLLTS); 1770 lrv = op_pollts(pfd_rump, nfds+1, ts, NULL); 1771 sverrno = errno; 1772 write(hpipe[1], &rv, sizeof(rv)); 1773 pthread_join(pt, (void *)&trv); 1774 1775 /* check who "won" and merge results */ 1776 if (lrv != 0 && pfd_host[nfds].revents & POLLIN) { 1777 rv = trv; 1778 1779 for (i = 0; i < nfds; i++) { 1780 if (pfd_rump[i].fd != -1) 1781 fds[i].revents = pfd_rump[i].revents; 1782 } 1783 sverrno = parg.errnum; 1784 } else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) { 1785 rv = trv; 1786 1787 for (i = 0; i < nfds; i++) { 1788 if (pfd_host[i].fd != -1) 1789 fds[i].revents = pfd_host[i].revents; 1790 } 1791 } else { 1792 rv = 0; 1793 } 1794 1795 out: 1796 host_close = GETSYSCALL(host, CLOSE); 1797 if (rpipe[0] != -1) 1798 rump_sys_close(rpipe[0]); 1799 if (rpipe[1] != -1) 1800 rump_sys_close(rpipe[1]); 1801 if (hpipe[0] != -1) 1802 host_close(hpipe[0]); 1803 if (hpipe[1] != -1) 1804 host_close(hpipe[1]); 1805 free(pfd_host); 1806 free(pfd_rump); 1807 errno = sverrno; 1808 } else { 1809 if (hostcall) { 1810 op_pollts = GETSYSCALL(host, POLLTS); 1811 } else { 1812 op_pollts = GETSYSCALL(rump, POLLTS); 1813 adjustpoll(fds, nfds, fd_host2rump); 1814 } 1815 1816 rv = op_pollts(fds, nfds, ts, sigmask); 1817 if (rumpcall) 1818 adjustpoll(fds, nfds, fd_rump2host_withdup); 1819 } 1820 1821 return rv; 1822 } 1823 1824 int 1825 poll(struct pollfd *fds, nfds_t nfds, int timeout) 1826 { 1827 struct timespec ts; 1828 struct timespec *tsp = NULL; 1829 1830 if (timeout != INFTIM) { 1831 ts.tv_sec = timeout / 1000; 1832 ts.tv_nsec = (timeout % 1000) * 1000*1000; 1833 1834 tsp = &ts; 1835 } 1836 1837 return REALPOLLTS(fds, nfds, tsp, NULL); 1838 } 1839 1840 int 1841 REALKEVENT(int kq, const struct kevent *changelist, size_t nchanges, 1842 struct kevent *eventlist, size_t nevents, 1843 const struct timespec *timeout) 1844 { 1845 int (*op_kevent)(int, const struct kevent *, size_t, 1846 struct kevent *, size_t, const struct timespec *); 1847 const struct kevent *ev; 1848 size_t i; 1849 1850 /* 1851 * Check that we don't attempt to kevent rump kernel fd's. 1852 * That needs similar treatment to select/poll, but is slightly 1853 * trickier since we need to manage to different kq descriptors. 1854 * (TODO, in case you're wondering). 1855 */ 1856 for (i = 0; i < nchanges; i++) { 1857 ev = &changelist[i]; 1858 if (ev->filter == EVFILT_READ || ev->filter == EVFILT_WRITE || 1859 ev->filter == EVFILT_VNODE) { 1860 if (fd_isrump((int)ev->ident)) { 1861 errno = ENOTSUP; 1862 return -1; 1863 } 1864 } 1865 } 1866 1867 op_kevent = GETSYSCALL(host, KEVENT); 1868 return op_kevent(kq, changelist, nchanges, eventlist, nevents, timeout); 1869 } 1870 1871 /* 1872 * mmapping from a rump kernel is not supported, so disallow it. 1873 */ 1874 void * 1875 mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset) 1876 { 1877 1878 if (flags & MAP_FILE && fd_isrump(fd)) { 1879 errno = ENOSYS; 1880 return MAP_FAILED; 1881 } 1882 return host_mmap(addr, len, prot, flags, fd, offset); 1883 } 1884 1885 /* 1886 * these go to one or the other on a per-process configuration 1887 */ 1888 int __sysctl(const int *, unsigned int, void *, size_t *, const void *, size_t); 1889 int 1890 __sysctl(const int *name, unsigned int namelen, void *old, size_t *oldlenp, 1891 const void *new, size_t newlen) 1892 { 1893 int (*op___sysctl)(const int *, unsigned int, void *, size_t *, 1894 const void *, size_t); 1895 1896 if (rumpsysctl) { 1897 op___sysctl = GETSYSCALL(rump, __SYSCTL); 1898 } else { 1899 op___sysctl = GETSYSCALL(host, __SYSCTL); 1900 /* we haven't inited yet */ 1901 if (__predict_false(op___sysctl == NULL)) { 1902 op___sysctl = rumphijack_dlsym(RTLD_NEXT, "__sysctl"); 1903 } 1904 } 1905 1906 return op___sysctl(name, namelen, old, oldlenp, new, newlen); 1907 } 1908 1909 /* 1910 * Rest are std type calls. 1911 */ 1912 1913 FDCALL(int, bind, DUALCALL_BIND, \ 1914 (int fd, const struct sockaddr *name, socklen_t namelen), \ 1915 (int, const struct sockaddr *, socklen_t), \ 1916 (fd, name, namelen)) 1917 1918 FDCALL(int, connect, DUALCALL_CONNECT, \ 1919 (int fd, const struct sockaddr *name, socklen_t namelen), \ 1920 (int, const struct sockaddr *, socklen_t), \ 1921 (fd, name, namelen)) 1922 1923 FDCALL(int, getpeername, DUALCALL_GETPEERNAME, \ 1924 (int fd, struct sockaddr *name, socklen_t *namelen), \ 1925 (int, struct sockaddr *, socklen_t *), \ 1926 (fd, name, namelen)) 1927 1928 FDCALL(int, getsockname, DUALCALL_GETSOCKNAME, \ 1929 (int fd, struct sockaddr *name, socklen_t *namelen), \ 1930 (int, struct sockaddr *, socklen_t *), \ 1931 (fd, name, namelen)) 1932 1933 FDCALL(int, listen, DUALCALL_LISTEN, \ 1934 (int fd, int backlog), \ 1935 (int, int), \ 1936 (fd, backlog)) 1937 1938 FDCALL(ssize_t, recvfrom, DUALCALL_RECVFROM, \ 1939 (int fd, void *buf, size_t len, int flags, \ 1940 struct sockaddr *from, socklen_t *fromlen), \ 1941 (int, void *, size_t, int, struct sockaddr *, socklen_t *), \ 1942 (fd, buf, len, flags, from, fromlen)) 1943 1944 FDCALL(ssize_t, sendto, DUALCALL_SENDTO, \ 1945 (int fd, const void *buf, size_t len, int flags, \ 1946 const struct sockaddr *to, socklen_t tolen), \ 1947 (int, const void *, size_t, int, \ 1948 const struct sockaddr *, socklen_t), \ 1949 (fd, buf, len, flags, to, tolen)) 1950 1951 FDCALL(ssize_t, recvmsg, DUALCALL_RECVMSG, \ 1952 (int fd, struct msghdr *msg, int flags), \ 1953 (int, struct msghdr *, int), \ 1954 (fd, msg, flags)) 1955 1956 FDCALL(ssize_t, sendmsg, DUALCALL_SENDMSG, \ 1957 (int fd, const struct msghdr *msg, int flags), \ 1958 (int, const struct msghdr *, int), \ 1959 (fd, msg, flags)) 1960 1961 FDCALL(int, getsockopt, DUALCALL_GETSOCKOPT, \ 1962 (int fd, int level, int optn, void *optval, socklen_t *optlen), \ 1963 (int, int, int, void *, socklen_t *), \ 1964 (fd, level, optn, optval, optlen)) 1965 1966 FDCALL(int, setsockopt, DUALCALL_SETSOCKOPT, \ 1967 (int fd, int level, int optn, \ 1968 const void *optval, socklen_t optlen), \ 1969 (int, int, int, const void *, socklen_t), \ 1970 (fd, level, optn, optval, optlen)) 1971 1972 FDCALL(int, shutdown, DUALCALL_SHUTDOWN, \ 1973 (int fd, int how), \ 1974 (int, int), \ 1975 (fd, how)) 1976 1977 FDCALL(ssize_t, REALREAD, DUALCALL_READ, \ 1978 (int fd, void *buf, size_t buflen), \ 1979 (int, void *, size_t), \ 1980 (fd, buf, buflen)) 1981 1982 FDCALL(ssize_t, readv, DUALCALL_READV, \ 1983 (int fd, const struct iovec *iov, int iovcnt), \ 1984 (int, const struct iovec *, int), \ 1985 (fd, iov, iovcnt)) 1986 1987 FDCALL(ssize_t, REALPREAD, DUALCALL_PREAD, \ 1988 (int fd, void *buf, size_t nbytes, off_t offset), \ 1989 (int, void *, size_t, off_t), \ 1990 (fd, buf, nbytes, offset)) 1991 1992 FDCALL(ssize_t, preadv, DUALCALL_PREADV, \ 1993 (int fd, const struct iovec *iov, int iovcnt, off_t offset), \ 1994 (int, const struct iovec *, int, off_t), \ 1995 (fd, iov, iovcnt, offset)) 1996 1997 FDCALL(ssize_t, writev, DUALCALL_WRITEV, \ 1998 (int fd, const struct iovec *iov, int iovcnt), \ 1999 (int, const struct iovec *, int), \ 2000 (fd, iov, iovcnt)) 2001 2002 FDCALL(ssize_t, REALPWRITE, DUALCALL_PWRITE, \ 2003 (int fd, const void *buf, size_t nbytes, off_t offset), \ 2004 (int, const void *, size_t, off_t), \ 2005 (fd, buf, nbytes, offset)) 2006 2007 FDCALL(ssize_t, pwritev, DUALCALL_PWRITEV, \ 2008 (int fd, const struct iovec *iov, int iovcnt, off_t offset), \ 2009 (int, const struct iovec *, int, off_t), \ 2010 (fd, iov, iovcnt, offset)) 2011 2012 FDCALL(int, REALFSTAT, DUALCALL_FSTAT, \ 2013 (int fd, struct stat *sb), \ 2014 (int, struct stat *), \ 2015 (fd, sb)) 2016 2017 FDCALL(int, fstatvfs1, DUALCALL_FSTATVFS1, \ 2018 (int fd, struct statvfs *buf, int flags), \ 2019 (int, struct statvfs *, int), \ 2020 (fd, buf, flags)) 2021 2022 FDCALL(off_t, lseek, DUALCALL_LSEEK, \ 2023 (int fd, off_t offset, int whence), \ 2024 (int, off_t, int), \ 2025 (fd, offset, whence)) 2026 __strong_alias(_lseek,lseek); 2027 2028 FDCALL(int, REALGETDENTS, DUALCALL_GETDENTS, \ 2029 (int fd, char *buf, size_t nbytes), \ 2030 (int, char *, size_t), \ 2031 (fd, buf, nbytes)) 2032 2033 FDCALL(int, fchown, DUALCALL_FCHOWN, \ 2034 (int fd, uid_t owner, gid_t group), \ 2035 (int, uid_t, gid_t), \ 2036 (fd, owner, group)) 2037 2038 FDCALL(int, fchmod, DUALCALL_FCHMOD, \ 2039 (int fd, mode_t mode), \ 2040 (int, mode_t), \ 2041 (fd, mode)) 2042 2043 FDCALL(int, ftruncate, DUALCALL_FTRUNCATE, \ 2044 (int fd, off_t length), \ 2045 (int, off_t), \ 2046 (fd, length)) 2047 2048 FDCALL(int, fsync, DUALCALL_FSYNC, \ 2049 (int fd), \ 2050 (int), \ 2051 (fd)) 2052 2053 FDCALL(int, fsync_range, DUALCALL_FSYNC_RANGE, \ 2054 (int fd, int how, off_t start, off_t length), \ 2055 (int, int, off_t, off_t), \ 2056 (fd, how, start, length)) 2057 2058 FDCALL(int, futimes, DUALCALL_FUTIMES, \ 2059 (int fd, const struct timeval *tv), \ 2060 (int, const struct timeval *), \ 2061 (fd, tv)) 2062 2063 FDCALL(int, fchflags, DUALCALL_FCHFLAGS, \ 2064 (int fd, u_long flags), \ 2065 (int, u_long), \ 2066 (fd, flags)) 2067 2068 /* 2069 * path-based selectors 2070 */ 2071 2072 PATHCALL(int, REALSTAT, DUALCALL_STAT, \ 2073 (const char *path, struct stat *sb), \ 2074 (const char *, struct stat *), \ 2075 (path, sb)) 2076 2077 PATHCALL(int, REALLSTAT, DUALCALL_LSTAT, \ 2078 (const char *path, struct stat *sb), \ 2079 (const char *, struct stat *), \ 2080 (path, sb)) 2081 2082 PATHCALL(int, chown, DUALCALL_CHOWN, \ 2083 (const char *path, uid_t owner, gid_t group), \ 2084 (const char *, uid_t, gid_t), \ 2085 (path, owner, group)) 2086 2087 PATHCALL(int, lchown, DUALCALL_LCHOWN, \ 2088 (const char *path, uid_t owner, gid_t group), \ 2089 (const char *, uid_t, gid_t), \ 2090 (path, owner, group)) 2091 2092 PATHCALL(int, chmod, DUALCALL_CHMOD, \ 2093 (const char *path, mode_t mode), \ 2094 (const char *, mode_t), \ 2095 (path, mode)) 2096 2097 PATHCALL(int, lchmod, DUALCALL_LCHMOD, \ 2098 (const char *path, mode_t mode), \ 2099 (const char *, mode_t), \ 2100 (path, mode)) 2101 2102 PATHCALL(int, statvfs1, DUALCALL_STATVFS1, \ 2103 (const char *path, struct statvfs *buf, int flags), \ 2104 (const char *, struct statvfs *, int), \ 2105 (path, buf, flags)) 2106 2107 PATHCALL(int, unlink, DUALCALL_UNLINK, \ 2108 (const char *path), \ 2109 (const char *), \ 2110 (path)) 2111 2112 PATHCALL(int, symlink, DUALCALL_SYMLINK, \ 2113 (const char *target, const char *path), \ 2114 (const char *, const char *), \ 2115 (target, path)) 2116 2117 PATHCALL(ssize_t, readlink, DUALCALL_READLINK, \ 2118 (const char *path, char *buf, size_t bufsiz), \ 2119 (const char *, char *, size_t), \ 2120 (path, buf, bufsiz)) 2121 2122 PATHCALL(int, mkdir, DUALCALL_MKDIR, \ 2123 (const char *path, mode_t mode), \ 2124 (const char *, mode_t), \ 2125 (path, mode)) 2126 2127 PATHCALL(int, rmdir, DUALCALL_RMDIR, \ 2128 (const char *path), \ 2129 (const char *), \ 2130 (path)) 2131 2132 PATHCALL(int, utimes, DUALCALL_UTIMES, \ 2133 (const char *path, const struct timeval *tv), \ 2134 (const char *, const struct timeval *), \ 2135 (path, tv)) 2136 2137 PATHCALL(int, lutimes, DUALCALL_LUTIMES, \ 2138 (const char *path, const struct timeval *tv), \ 2139 (const char *, const struct timeval *), \ 2140 (path, tv)) 2141 2142 PATHCALL(int, chflags, DUALCALL_CHFLAGS, \ 2143 (const char *path, u_long flags), \ 2144 (const char *, u_long), \ 2145 (path, flags)) 2146 2147 PATHCALL(int, lchflags, DUALCALL_LCHFLAGS, \ 2148 (const char *path, u_long flags), \ 2149 (const char *, u_long), \ 2150 (path, flags)) 2151 2152 PATHCALL(int, truncate, DUALCALL_TRUNCATE, \ 2153 (const char *path, off_t length), \ 2154 (const char *, off_t), \ 2155 (path, length)) 2156 2157 PATHCALL(int, access, DUALCALL_ACCESS, \ 2158 (const char *path, int mode), \ 2159 (const char *, int), \ 2160 (path, mode)) 2161 2162 PATHCALL(int, REALMKNOD, DUALCALL_MKNOD, \ 2163 (const char *path, mode_t mode, dev_t dev), \ 2164 (const char *, mode_t, dev_t), \ 2165 (path, mode, dev)) 2166 2167 /* 2168 * Note: with mount the decisive parameter is the mount 2169 * destination directory. This is because we don't really know 2170 * about the "source" directory in a generic call (and besides, 2171 * it might not even exist, cf. nfs). 2172 */ 2173 PATHCALL(int, REALMOUNT, DUALCALL_MOUNT, \ 2174 (const char *type, const char *path, int flags, \ 2175 void *data, size_t dlen), \ 2176 (const char *, const char *, int, void *, size_t), \ 2177 (type, path, flags, data, dlen)) 2178 2179 PATHCALL(int, unmount, DUALCALL_UNMOUNT, \ 2180 (const char *path, int flags), \ 2181 (const char *, int), \ 2182 (path, flags)) 2183 2184 #if __NetBSD_Prereq__(5,99,63) 2185 PATHCALL(int, __quotactl, DUALCALL_QUOTACTL, \ 2186 (const char *path, struct quotactl_args *args), \ 2187 (const char *, struct quotactl_args *), \ 2188 (path, args)) 2189 #elif __NetBSD_Prereq__(5,99,48) 2190 PATHCALL(int, OLDREALQUOTACTL, DUALCALL_QUOTACTL, \ 2191 (const char *path, struct plistref *p), \ 2192 (const char *, struct plistref *), \ 2193 (path, p)) 2194 #endif 2195 2196 PATHCALL(int, REALGETFH, DUALCALL_GETFH, \ 2197 (const char *path, void *fhp, size_t *fh_size), \ 2198 (const char *, void *, size_t *), \ 2199 (path, fhp, fh_size)) 2200 2201 /* 2202 * These act different on a per-process vfs configuration 2203 */ 2204 2205 VFSCALL(VFSBIT_GETVFSSTAT, int, getvfsstat, DUALCALL_GETVFSSTAT, \ 2206 (struct statvfs *buf, size_t buflen, int flags), \ 2207 (struct statvfs *, size_t, int), \ 2208 (buf, buflen, flags)) 2209 2210 VFSCALL(VFSBIT_FHCALLS, int, REALFHOPEN, DUALCALL_FHOPEN, \ 2211 (const void *fhp, size_t fh_size, int flags), \ 2212 (const char *, size_t, int), \ 2213 (fhp, fh_size, flags)) 2214 2215 VFSCALL(VFSBIT_FHCALLS, int, REALFHSTAT, DUALCALL_FHSTAT, \ 2216 (const void *fhp, size_t fh_size, struct stat *sb), \ 2217 (const char *, size_t, struct stat *), \ 2218 (fhp, fh_size, sb)) 2219 2220 VFSCALL(VFSBIT_FHCALLS, int, REALFHSTATVFS1, DUALCALL_FHSTATVFS1, \ 2221 (const void *fhp, size_t fh_size, struct statvfs *sb, int flgs),\ 2222 (const char *, size_t, struct statvfs *, int), \ 2223 (fhp, fh_size, sb, flgs)) 2224 2225 /* finally, put nfssvc here. "keep the namespace clean" */ 2226 2227 #include <nfs/rpcv2.h> 2228 #include <nfs/nfs.h> 2229 2230 int 2231 nfssvc(int flags, void *argstructp) 2232 { 2233 int (*op_nfssvc)(int, void *); 2234 2235 if (vfsbits & VFSBIT_NFSSVC){ 2236 struct nfsd_args *nfsdargs; 2237 2238 /* massage the socket descriptor if necessary */ 2239 if (flags == NFSSVC_ADDSOCK) { 2240 nfsdargs = argstructp; 2241 nfsdargs->sock = fd_host2rump(nfsdargs->sock); 2242 } 2243 op_nfssvc = GETSYSCALL(rump, NFSSVC); 2244 } else 2245 op_nfssvc = GETSYSCALL(host, NFSSVC); 2246 2247 return op_nfssvc(flags, argstructp); 2248 } 2249