1 /* $NetBSD: hijack.c,v 1.16 2011/01/19 11:27:01 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __RCSID("$NetBSD: hijack.c,v 1.16 2011/01/19 11:27:01 pooka Exp $"); 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/event.h> 34 #include <sys/ioctl.h> 35 #include <sys/socket.h> 36 #include <sys/poll.h> 37 38 #include <rump/rumpclient.h> 39 #include <rump/rump_syscalls.h> 40 41 #include <assert.h> 42 #include <dlfcn.h> 43 #include <err.h> 44 #include <errno.h> 45 #include <fcntl.h> 46 #include <poll.h> 47 #include <pthread.h> 48 #include <signal.h> 49 #include <stdarg.h> 50 #include <stdbool.h> 51 #include <stdio.h> 52 #include <stdlib.h> 53 #include <time.h> 54 #include <unistd.h> 55 56 enum { RUMPCALL_SOCKET, RUMPCALL_ACCEPT, RUMPCALL_BIND, RUMPCALL_CONNECT, 57 RUMPCALL_GETPEERNAME, RUMPCALL_GETSOCKNAME, RUMPCALL_LISTEN, 58 RUMPCALL_RECVFROM, RUMPCALL_RECVMSG, 59 RUMPCALL_SENDTO, RUMPCALL_SENDMSG, 60 RUMPCALL_GETSOCKOPT, RUMPCALL_SETSOCKOPT, 61 RUMPCALL_SHUTDOWN, 62 RUMPCALL_READ, RUMPCALL_READV, 63 RUMPCALL_WRITE, RUMPCALL_WRITEV, 64 RUMPCALL_IOCTL, RUMPCALL_FCNTL, 65 RUMPCALL_CLOSE, 66 RUMPCALL_POLLTS, 67 RUMPCALL__NUM 68 }; 69 70 #define RSYS_STRING(a) __STRING(a) 71 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a)) 72 73 const char *sysnames[] = { 74 RSYS_NAME(SOCKET), 75 RSYS_NAME(ACCEPT), 76 RSYS_NAME(BIND), 77 RSYS_NAME(CONNECT), 78 RSYS_NAME(GETPEERNAME), 79 RSYS_NAME(GETSOCKNAME), 80 RSYS_NAME(LISTEN), 81 RSYS_NAME(RECVFROM), 82 RSYS_NAME(RECVMSG), 83 RSYS_NAME(SENDTO), 84 RSYS_NAME(SENDMSG), 85 RSYS_NAME(GETSOCKOPT), 86 RSYS_NAME(SETSOCKOPT), 87 RSYS_NAME(SHUTDOWN), 88 RSYS_NAME(READ), 89 RSYS_NAME(READV), 90 RSYS_NAME(WRITE), 91 RSYS_NAME(WRITEV), 92 RSYS_NAME(IOCTL), 93 RSYS_NAME(FCNTL), 94 RSYS_NAME(CLOSE), 95 RSYS_NAME(POLLTS), 96 }; 97 98 static int (*host_socket)(int, int, int); 99 static int (*host_connect)(int, const struct sockaddr *, socklen_t); 100 static int (*host_bind)(int, const struct sockaddr *, socklen_t); 101 static int (*host_listen)(int, int); 102 static int (*host_accept)(int, struct sockaddr *, socklen_t *); 103 static int (*host_getpeername)(int, struct sockaddr *, socklen_t *); 104 static int (*host_getsockname)(int, struct sockaddr *, socklen_t *); 105 static int (*host_setsockopt)(int, int, int, const void *, socklen_t); 106 107 static ssize_t (*host_read)(int, void *, size_t); 108 static ssize_t (*host_readv)(int, const struct iovec *, int); 109 static ssize_t (*host_write)(int, const void *, size_t); 110 static ssize_t (*host_writev)(int, const struct iovec *, int); 111 static int (*host_ioctl)(int, unsigned long, ...); 112 static int (*host_fcntl)(int, int, ...); 113 static int (*host_close)(int); 114 static int (*host_pollts)(struct pollfd *, nfds_t, 115 const struct timespec *, const sigset_t *); 116 static pid_t (*host_fork)(void); 117 static int (*host_dup2)(int, int); 118 static int (*host_shutdown)(int, int); 119 /* XXX */ 120 static void *host_sendto; 121 static void *host_recvfrom; 122 123 static void *rumpcalls[RUMPCALL__NUM]; 124 125 /* 126 * Would be nice to get this automatically in sync with libc. 127 * Also, this does not work for compat-using binaries! 128 */ 129 130 #if !__NetBSD_Prereq__(5,99,7) 131 #define SELECT select 132 #define POLLTS pollts 133 #define POLL poll 134 #else 135 #define SELECT __select50 136 #define POLLTS __pollts50 137 #define POLL __poll50 138 139 int SELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *); 140 int POLLTS(struct pollfd *, nfds_t, const struct timespec *, const sigset_t *); 141 int POLL(struct pollfd *, nfds_t, int); 142 #endif 143 144 /* 145 * This is called from librumpclient in case of LD_PRELOAD. 146 * It ensures correct RTLD_NEXT. 147 */ 148 static void * 149 hijackdlsym(void *handle, const char *symbol) 150 { 151 152 return dlsym(handle, symbol); 153 } 154 155 /* low calorie sockets? */ 156 static bool hostlocalsockets = true; 157 158 static void __attribute__((constructor)) 159 rcinit(void) 160 { 161 int (*rumpcinit)(void); 162 void **rumpcdlsym; 163 void *hand; 164 int i; 165 166 hand = dlopen("librumpclient.so", RTLD_LAZY|RTLD_GLOBAL); 167 if (!hand) 168 err(1, "cannot open librumpclient.so"); 169 rumpcinit = dlsym(hand, "rumpclient_init"); 170 _DIAGASSERT(rumpcinit); 171 172 rumpcdlsym = dlsym(hand, "rumpclient_dlsym"); 173 *rumpcdlsym = hijackdlsym; 174 175 host_socket = dlsym(RTLD_NEXT, "__socket30"); 176 host_listen = dlsym(RTLD_NEXT, "listen"); 177 host_connect = dlsym(RTLD_NEXT, "connect"); 178 host_bind = dlsym(RTLD_NEXT, "bind"); 179 host_accept = dlsym(RTLD_NEXT, "accept"); 180 host_getpeername = dlsym(RTLD_NEXT, "getpeername"); 181 host_getsockname = dlsym(RTLD_NEXT, "getsockname"); 182 host_setsockopt = dlsym(RTLD_NEXT, "setsockopt"); 183 184 host_read = dlsym(RTLD_NEXT, "read"); 185 host_readv = dlsym(RTLD_NEXT, "readv"); 186 host_write = dlsym(RTLD_NEXT, "write"); 187 host_writev = dlsym(RTLD_NEXT, "writev"); 188 host_ioctl = dlsym(RTLD_NEXT, "ioctl"); 189 host_fcntl = dlsym(RTLD_NEXT, "fcntl"); 190 host_close = dlsym(RTLD_NEXT, "close"); 191 host_pollts = dlsym(RTLD_NEXT, "pollts"); 192 host_fork = dlsym(RTLD_NEXT, "fork"); 193 host_dup2 = dlsym(RTLD_NEXT, "dup2"); 194 host_shutdown = dlsym(RTLD_NEXT, "shutdown"); 195 host_sendto = dlsym(RTLD_NEXT, "sendto"); 196 host_recvfrom = dlsym(RTLD_NEXT, "recvfrom"); 197 198 for (i = 0; i < RUMPCALL__NUM; i++) { 199 rumpcalls[i] = dlsym(hand, sysnames[i]); 200 if (!rumpcalls[i]) { 201 fprintf(stderr, "rumphijack: cannot find symbol: %s\n", 202 sysnames[i]); 203 exit(1); 204 } 205 } 206 207 if (rumpcinit() == -1) 208 err(1, "rumpclient init"); 209 } 210 211 static unsigned dup2mask; 212 #define ISDUP2D(fd) (1<<(fd) & dup2mask) 213 214 //#define DEBUGJACK 215 #ifdef DEBUGJACK 216 #define DPRINTF(x) mydprintf x 217 static void 218 mydprintf(const char *fmt, ...) 219 { 220 va_list ap; 221 222 if (ISDUP2D(STDERR_FILENO)) 223 return; 224 225 va_start(ap, fmt); 226 vfprintf(stderr, fmt, ap); 227 va_end(ap); 228 } 229 230 #else 231 #define DPRINTF(x) 232 #endif 233 234 /* XXX: need runtime selection. low for now due to FD_SETSIZE */ 235 #define HIJACK_FDOFF 128 236 #define HIJACK_SELECT 128 /* XXX */ 237 #define HIJACK_ASSERT 128 /* XXX */ 238 static int 239 fd_rump2host(int fd) 240 { 241 242 if (fd == -1) 243 return fd; 244 245 if (!ISDUP2D(fd)) 246 fd += HIJACK_FDOFF; 247 248 return fd; 249 } 250 251 static int 252 fd_host2rump(int fd) 253 { 254 255 if (!ISDUP2D(fd)) 256 fd -= HIJACK_FDOFF; 257 return fd; 258 } 259 260 static bool 261 fd_isrump(int fd) 262 { 263 264 return ISDUP2D(fd) || fd >= HIJACK_FDOFF; 265 } 266 267 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= HIJACK_ASSERT) 268 #undef HIJACK_FDOFF 269 270 int __socket30(int, int, int); 271 int 272 __socket30(int domain, int type, int protocol) 273 { 274 int (*rc_socket)(int, int, int); 275 int fd; 276 bool dohost; 277 278 dohost = hostlocalsockets && (domain == AF_LOCAL); 279 280 if (dohost) 281 rc_socket = host_socket; 282 else 283 rc_socket = rumpcalls[RUMPCALL_SOCKET]; 284 fd = rc_socket(domain, type, protocol); 285 286 if (!dohost) 287 fd = fd_rump2host(fd); 288 DPRINTF(("socket <- %d\n", fd)); 289 290 return fd; 291 } 292 293 int 294 accept(int s, struct sockaddr *addr, socklen_t *addrlen) 295 { 296 int (*rc_accept)(int, struct sockaddr *, socklen_t *); 297 int fd; 298 bool isrump; 299 300 isrump = fd_isrump(s); 301 302 DPRINTF(("accept -> %d", s)); 303 if (isrump) { 304 rc_accept = rumpcalls[RUMPCALL_ACCEPT]; 305 s = fd_host2rump(s); 306 } else { 307 rc_accept = host_accept; 308 } 309 fd = rc_accept(s, addr, addrlen); 310 if (fd != -1 && isrump) 311 fd = fd_rump2host(fd); 312 313 DPRINTF((" <- %d\n", fd)); 314 315 return fd; 316 } 317 318 int 319 bind(int s, const struct sockaddr *name, socklen_t namelen) 320 { 321 int (*rc_bind)(int, const struct sockaddr *, socklen_t); 322 323 DPRINTF(("bind -> %d\n", s)); 324 if (fd_isrump(s)) { 325 rc_bind = rumpcalls[RUMPCALL_BIND]; 326 s = fd_host2rump(s); 327 } else { 328 rc_bind = host_bind; 329 } 330 return rc_bind(s, name, namelen); 331 } 332 333 int 334 connect(int s, const struct sockaddr *name, socklen_t namelen) 335 { 336 int (*rc_connect)(int, const struct sockaddr *, socklen_t); 337 338 DPRINTF(("connect -> %d\n", s)); 339 if (fd_isrump(s)) { 340 rc_connect = rumpcalls[RUMPCALL_CONNECT]; 341 s = fd_host2rump(s); 342 } else { 343 rc_connect = host_connect; 344 } 345 346 return rc_connect(s, name, namelen); 347 } 348 349 int 350 getpeername(int s, struct sockaddr *name, socklen_t *namelen) 351 { 352 int (*rc_getpeername)(int, struct sockaddr *, socklen_t *); 353 354 DPRINTF(("getpeername -> %d\n", s)); 355 if (fd_isrump(s)) { 356 rc_getpeername = rumpcalls[RUMPCALL_GETPEERNAME]; 357 s = fd_host2rump(s); 358 } else { 359 rc_getpeername = host_getpeername; 360 } 361 return rc_getpeername(s, name, namelen); 362 } 363 364 int 365 getsockname(int s, struct sockaddr *name, socklen_t *namelen) 366 { 367 int (*rc_getsockname)(int, struct sockaddr *, socklen_t *); 368 369 DPRINTF(("getsockname -> %d\n", s)); 370 if (fd_isrump(s)) { 371 rc_getsockname = rumpcalls[RUMPCALL_GETSOCKNAME]; 372 s = fd_host2rump(s); 373 } else { 374 rc_getsockname = host_getsockname; 375 } 376 return rc_getsockname(s, name, namelen); 377 } 378 379 int 380 listen(int s, int backlog) 381 { 382 int (*rc_listen)(int, int); 383 384 DPRINTF(("listen -> %d\n", s)); 385 if (fd_isrump(s)) { 386 rc_listen = rumpcalls[RUMPCALL_LISTEN]; 387 s = fd_host2rump(s); 388 } else { 389 rc_listen = host_listen; 390 } 391 return rc_listen(s, backlog); 392 } 393 394 ssize_t 395 recv(int s, void *buf, size_t len, int flags) 396 { 397 398 return recvfrom(s, buf, len, flags, NULL, NULL); 399 } 400 401 ssize_t 402 recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from, 403 socklen_t *fromlen) 404 { 405 int (*rc_recvfrom)(int, void *, size_t, int, 406 struct sockaddr *, socklen_t *); 407 408 DPRINTF(("recvfrom\n")); 409 if (fd_isrump(s)) { 410 rc_recvfrom = rumpcalls[RUMPCALL_RECVFROM]; 411 s = fd_host2rump(s); 412 } else { 413 rc_recvfrom = host_recvfrom; 414 } 415 416 return rc_recvfrom(s, buf, len, flags, from, fromlen); 417 } 418 419 ssize_t 420 recvmsg(int s, struct msghdr *msg, int flags) 421 { 422 int (*rc_recvmsg)(int, struct msghdr *, int); 423 424 DPRINTF(("recvmsg\n")); 425 assertfd(s); 426 rc_recvmsg = rumpcalls[RUMPCALL_RECVMSG]; 427 return rc_recvmsg(fd_host2rump(s), msg, flags); 428 } 429 430 ssize_t 431 send(int s, const void *buf, size_t len, int flags) 432 { 433 434 return sendto(s, buf, len, flags, NULL, 0); 435 } 436 437 ssize_t 438 sendto(int s, const void *buf, size_t len, int flags, 439 const struct sockaddr *to, socklen_t tolen) 440 { 441 int (*rc_sendto)(int, const void *, size_t, int, 442 const struct sockaddr *, socklen_t); 443 444 if (s == -1) 445 return len; 446 DPRINTF(("sendto\n")); 447 448 if (fd_isrump(s)) { 449 rc_sendto = rumpcalls[RUMPCALL_SENDTO]; 450 s = fd_host2rump(s); 451 } else { 452 rc_sendto = host_sendto; 453 } 454 return rc_sendto(s, buf, len, flags, to, tolen); 455 } 456 457 ssize_t 458 sendmsg(int s, const struct msghdr *msg, int flags) 459 { 460 int (*rc_sendmsg)(int, const struct msghdr *, int); 461 462 DPRINTF(("sendmsg\n")); 463 assertfd(s); 464 rc_sendmsg = rumpcalls[RUMPCALL_SENDTO]; 465 return rc_sendmsg(fd_host2rump(s), msg, flags); 466 } 467 468 int 469 getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen) 470 { 471 int (*rc_getsockopt)(int, int, int, void *, socklen_t *); 472 473 DPRINTF(("getsockopt -> %d\n", s)); 474 assertfd(s); 475 rc_getsockopt = rumpcalls[RUMPCALL_GETSOCKOPT]; 476 return rc_getsockopt(fd_host2rump(s), level, optname, optval, optlen); 477 } 478 479 int 480 setsockopt(int s, int level, int optname, const void *optval, socklen_t optlen) 481 { 482 int (*rc_setsockopt)(int, int, int, const void *, socklen_t); 483 484 DPRINTF(("setsockopt -> %d\n", s)); 485 if (fd_isrump(s)) { 486 rc_setsockopt = rumpcalls[RUMPCALL_SETSOCKOPT]; 487 s = fd_host2rump(s); 488 } else { 489 rc_setsockopt = host_setsockopt; 490 } 491 return rc_setsockopt(s, level, optname, optval, optlen); 492 } 493 494 int 495 shutdown(int s, int how) 496 { 497 int (*rc_shutdown)(int, int); 498 499 DPRINTF(("shutdown -> %d\n", s)); 500 if (fd_isrump(s)) { 501 rc_shutdown = rumpcalls[RUMPCALL_SHUTDOWN]; 502 s = fd_host2rump(s); 503 } else { 504 rc_shutdown = host_shutdown; 505 } 506 return rc_shutdown(s, how); 507 } 508 509 /* 510 * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since 511 * many programs do that. dup2 of a rump kernel fd to another value 512 * not >= fdoff is an error. 513 * 514 * Note: cannot rump2host newd, because it is often hardcoded. 515 * 516 * XXX: should disable debug prints after stdout/stderr are dup2'd 517 */ 518 int 519 dup2(int oldd, int newd) 520 { 521 int rv; 522 523 DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd)); 524 525 if (fd_isrump(oldd)) { 526 if (!(newd >= 0 && newd <= 2)) 527 return EBADF; 528 oldd = fd_host2rump(oldd); 529 rv = rump_sys_dup2(oldd, newd); 530 if (rv != -1) 531 dup2mask |= 1<<newd; 532 } else { 533 rv = host_dup2(oldd, newd); 534 } 535 536 return rv; 537 } 538 539 /* 540 * We just wrap fork the appropriate rump client calls to preserve 541 * the file descriptors of the forked parent in the child, but 542 * prevent double use of connection fd. 543 */ 544 545 pid_t 546 fork() 547 { 548 struct rumpclient_fork *rf; 549 pid_t rv; 550 551 DPRINTF(("fork\n")); 552 553 if ((rf = rumpclient_prefork()) == NULL) 554 return -1; 555 556 switch ((rv = host_fork())) { 557 case -1: 558 /* XXX: cancel rf */ 559 break; 560 case 0: 561 if (rumpclient_fork_init(rf) == -1) 562 rv = -1; 563 break; 564 default: 565 break; 566 } 567 568 DPRINTF(("fork returns %d\n", rv)); 569 return rv; 570 } 571 572 /* 573 * Hybrids 574 */ 575 576 ssize_t 577 read(int fd, void *buf, size_t len) 578 { 579 ssize_t (*op_read)(int, void *, size_t); 580 ssize_t n; 581 582 DPRINTF(("read %d\n", fd)); 583 if (fd_isrump(fd)) { 584 fd = fd_host2rump(fd); 585 op_read = rumpcalls[RUMPCALL_READ]; 586 } else { 587 op_read = host_read; 588 } 589 590 n = op_read(fd, buf, len); 591 return n; 592 } 593 594 ssize_t 595 readv(int fd, const struct iovec *iov, int iovcnt) 596 { 597 ssize_t (*op_readv)(int, const struct iovec *, int); 598 599 DPRINTF(("readv %d\n", fd)); 600 if (fd_isrump(fd)) { 601 fd = fd_host2rump(fd); 602 op_readv = rumpcalls[RUMPCALL_READV]; 603 } else { 604 op_readv = host_readv; 605 } 606 607 return op_readv(fd, iov, iovcnt); 608 } 609 610 ssize_t 611 write(int fd, const void *buf, size_t len) 612 { 613 ssize_t (*op_write)(int, const void *, size_t); 614 615 if (fd_isrump(fd)) { 616 fd = fd_host2rump(fd); 617 op_write = rumpcalls[RUMPCALL_WRITE]; 618 } else { 619 op_write = host_write; 620 } 621 622 return op_write(fd, buf, len); 623 } 624 625 ssize_t 626 writev(int fd, const struct iovec *iov, int iovcnt) 627 { 628 ssize_t (*op_writev)(int, const struct iovec *, int); 629 630 DPRINTF(("writev %d\n", fd)); 631 if (fd_isrump(fd)) { 632 fd = fd_host2rump(fd); 633 op_writev = rumpcalls[RUMPCALL_WRITEV]; 634 } else { 635 op_writev = host_writev; 636 } 637 638 return op_writev(fd, iov, iovcnt); 639 } 640 641 int 642 ioctl(int fd, unsigned long cmd, ...) 643 { 644 int (*op_ioctl)(int, unsigned long cmd, ...); 645 va_list ap; 646 int rv; 647 648 DPRINTF(("ioctl\n")); 649 if (fd_isrump(fd)) { 650 fd = fd_host2rump(fd); 651 op_ioctl = rumpcalls[RUMPCALL_IOCTL]; 652 } else { 653 op_ioctl = host_ioctl; 654 } 655 656 va_start(ap, cmd); 657 rv = op_ioctl(fd, cmd, va_arg(ap, void *)); 658 va_end(ap); 659 return rv; 660 } 661 662 int 663 fcntl(int fd, int cmd, ...) 664 { 665 int (*op_fcntl)(int, int, ...); 666 va_list ap; 667 int rv; 668 669 DPRINTF(("fcntl\n")); 670 if (fd_isrump(fd)) { 671 fd = fd_host2rump(fd); 672 op_fcntl = rumpcalls[RUMPCALL_FCNTL]; 673 } else { 674 op_fcntl = host_fcntl; 675 } 676 677 va_start(ap, cmd); 678 rv = op_fcntl(fd, cmd, va_arg(ap, void *)); 679 va_end(ap); 680 return rv; 681 } 682 683 int 684 close(int fd) 685 { 686 int (*op_close)(int); 687 688 DPRINTF(("close %d\n", fd)); 689 if (fd_isrump(fd)) { 690 fd = fd_host2rump(fd); 691 op_close = rumpcalls[RUMPCALL_CLOSE]; 692 } else { 693 op_close = host_close; 694 } 695 696 return op_close(fd); 697 } 698 699 int 700 SELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, 701 struct timeval *timeout) 702 { 703 struct pollfd *pfds; 704 struct timespec ts, *tsp = NULL; 705 nfds_t i, j, realnfds; 706 int rv, incr; 707 708 DPRINTF(("select\n")); 709 710 /* 711 * Well, first we must scan the fds to figure out how many 712 * fds there really are. This is because up to and including 713 * nb5 poll() silently refuses nfds > process_open_fds. 714 * Seems to be fixed in current, thank the maker. 715 * god damn cluster...bomb. 716 */ 717 718 for (i = 0, realnfds = 0; i < nfds; i++) { 719 if (readfds && FD_ISSET(i, readfds)) { 720 realnfds++; 721 continue; 722 } 723 if (writefds && FD_ISSET(i, writefds)) { 724 realnfds++; 725 continue; 726 } 727 if (exceptfds && FD_ISSET(i, exceptfds)) { 728 realnfds++; 729 continue; 730 } 731 } 732 733 if (realnfds) { 734 pfds = malloc(sizeof(*pfds) * realnfds); 735 if (!pfds) 736 return -1; 737 } else { 738 pfds = NULL; 739 } 740 741 for (i = 0, j = 0; i < nfds; i++) { 742 incr = 0; 743 pfds[j].events = pfds[j].revents = 0; 744 if (readfds && FD_ISSET(i, readfds)) { 745 pfds[j].fd = i; 746 pfds[j].events |= POLLIN; 747 incr=1; 748 } 749 if (writefds && FD_ISSET(i, writefds)) { 750 pfds[j].fd = i; 751 pfds[j].events |= POLLOUT; 752 incr=1; 753 } 754 if (exceptfds && FD_ISSET(i, exceptfds)) { 755 pfds[j].fd = i; 756 pfds[j].events |= POLLHUP|POLLERR; 757 incr=1; 758 } 759 if (incr) 760 j++; 761 } 762 763 if (timeout) { 764 TIMEVAL_TO_TIMESPEC(timeout, &ts); 765 tsp = &ts; 766 } 767 rv = pollts(pfds, realnfds, tsp, NULL); 768 if (rv <= 0) 769 goto out; 770 771 /* 772 * ok, harvest results. first zero out entries (can't use 773 * FD_ZERO for the obvious select-me-not reason). whee. 774 */ 775 for (i = 0; i < nfds; i++) { 776 if (readfds) 777 FD_CLR(i, readfds); 778 if (writefds) 779 FD_CLR(i, writefds); 780 if (exceptfds) 781 FD_CLR(i, exceptfds); 782 } 783 784 /* and then plug in the results */ 785 for (i = 0; i < realnfds; i++) { 786 if (readfds) { 787 if (pfds[i].revents & POLLIN) { 788 FD_SET(pfds[i].fd, readfds); 789 } 790 } 791 if (writefds) { 792 if (pfds[i].revents & POLLOUT) { 793 FD_SET(pfds[i].fd, writefds); 794 } 795 } 796 if (exceptfds) { 797 if (pfds[i].revents & (POLLHUP|POLLERR)) { 798 FD_SET(pfds[i].fd, exceptfds); 799 } 800 } 801 } 802 803 out: 804 free(pfds); 805 return rv; 806 } 807 808 static void 809 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall) 810 { 811 nfds_t i; 812 813 for (i = 0; i < nfds; i++) { 814 if (fds[i].fd == -1) 815 continue; 816 817 if (fd_isrump(fds[i].fd)) 818 (*rumpcall)++; 819 else 820 (*hostcall)++; 821 } 822 } 823 824 static void 825 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int)) 826 { 827 nfds_t i; 828 829 for (i = 0; i < nfds; i++) { 830 fds[i].fd = fdadj(fds[i].fd); 831 } 832 } 833 834 /* 835 * poll is easy as long as the call comes in the fds only in one 836 * kernel. otherwise its quite tricky... 837 */ 838 struct pollarg { 839 struct pollfd *pfds; 840 nfds_t nfds; 841 const struct timespec *ts; 842 const sigset_t *sigmask; 843 int pipefd; 844 int errnum; 845 }; 846 847 static void * 848 hostpoll(void *arg) 849 { 850 struct pollarg *parg = arg; 851 intptr_t rv; 852 853 rv = host_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask); 854 if (rv == -1) 855 parg->errnum = errno; 856 rump_sys_write(parg->pipefd, &rv, sizeof(rv)); 857 858 return (void *)(intptr_t)rv; 859 } 860 861 int 862 POLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts, 863 const sigset_t *sigmask) 864 { 865 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *, 866 const sigset_t *); 867 int hostcall = 0, rumpcall = 0; 868 pthread_t pt; 869 nfds_t i; 870 int rv; 871 872 DPRINTF(("poll\n")); 873 checkpoll(fds, nfds, &hostcall, &rumpcall); 874 875 if (hostcall && rumpcall) { 876 struct pollfd *pfd_host = NULL, *pfd_rump = NULL; 877 int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1}; 878 struct pollarg parg; 879 uintptr_t lrv; 880 int sverrno = 0, trv; 881 882 /* 883 * ok, this is where it gets tricky. We must support 884 * this since it's a very common operation in certain 885 * types of software (telnet, netcat, etc). We allocate 886 * two vectors and run two poll commands in separate 887 * threads. Whichever returns first "wins" and the 888 * other kernel's fds won't show activity. 889 */ 890 rv = -1; 891 892 /* allocate full vector for O(n) joining after call */ 893 pfd_host = malloc(sizeof(*pfd_host)*(nfds+1)); 894 if (!pfd_host) 895 goto out; 896 pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1)); 897 if (!pfd_rump) { 898 goto out; 899 } 900 901 /* split vectors */ 902 for (i = 0; i < nfds; i++) { 903 if (fds[i].fd == -1) { 904 pfd_host[i].fd = -1; 905 pfd_rump[i].fd = -1; 906 } else if (fd_isrump(fds[i].fd)) { 907 pfd_host[i].fd = -1; 908 pfd_rump[i].fd = fd_host2rump(fds[i].fd); 909 pfd_rump[i].events = fds[i].events; 910 } else { 911 pfd_rump[i].fd = -1; 912 pfd_host[i].fd = fds[i].fd; 913 pfd_host[i].events = fds[i].events; 914 } 915 fds[i].revents = 0; 916 } 917 918 /* 919 * then, open two pipes, one for notifications 920 * to each kernel. 921 */ 922 if (rump_sys_pipe(rpipe) == -1) 923 goto out; 924 if (pipe(hpipe) == -1) 925 goto out; 926 927 pfd_host[nfds].fd = hpipe[0]; 928 pfd_host[nfds].events = POLLIN; 929 pfd_rump[nfds].fd = rpipe[0]; 930 pfd_rump[nfds].events = POLLIN; 931 932 /* 933 * then, create a thread to do host part and meanwhile 934 * do rump kernel part right here 935 */ 936 937 parg.pfds = pfd_host; 938 parg.nfds = nfds+1; 939 parg.ts = ts; 940 parg.sigmask = sigmask; 941 parg.pipefd = rpipe[1]; 942 pthread_create(&pt, NULL, hostpoll, &parg); 943 944 op_pollts = rumpcalls[RUMPCALL_POLLTS]; 945 lrv = op_pollts(pfd_rump, nfds+1, ts, NULL); 946 sverrno = errno; 947 write(hpipe[1], &rv, sizeof(rv)); 948 pthread_join(pt, (void *)&trv); 949 950 /* check who "won" and merge results */ 951 if (lrv != 0 && pfd_host[nfds].revents & POLLIN) { 952 rv = trv; 953 954 for (i = 0; i < nfds; i++) { 955 if (pfd_rump[i].fd != -1) 956 fds[i].revents = pfd_rump[i].revents; 957 } 958 sverrno = parg.errnum; 959 } else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) { 960 rv = trv; 961 962 for (i = 0; i < nfds; i++) { 963 if (pfd_host[i].fd != -1) 964 fds[i].revents = pfd_host[i].revents; 965 } 966 } else { 967 rv = 0; 968 } 969 970 out: 971 if (rpipe[0] != -1) 972 rump_sys_close(rpipe[0]); 973 if (rpipe[1] != -1) 974 rump_sys_close(rpipe[1]); 975 if (hpipe[0] != -1) 976 host_close(hpipe[0]); 977 if (hpipe[1] != -1) 978 host_close(hpipe[1]); 979 free(pfd_host); 980 free(pfd_rump); 981 errno = sverrno; 982 } else { 983 if (hostcall) { 984 op_pollts = host_pollts; 985 } else { 986 op_pollts = rumpcalls[RUMPCALL_POLLTS]; 987 adjustpoll(fds, nfds, fd_host2rump); 988 } 989 990 rv = op_pollts(fds, nfds, ts, sigmask); 991 if (rumpcall) 992 adjustpoll(fds, nfds, fd_rump2host); 993 } 994 995 return rv; 996 } 997 998 int 999 POLL(struct pollfd *fds, nfds_t nfds, int timeout) 1000 { 1001 struct timespec ts; 1002 struct timespec *tsp = NULL; 1003 1004 if (timeout != INFTIM) { 1005 ts.tv_sec = timeout / 1000; 1006 ts.tv_nsec = (timeout % 1000) * 1000*1000; 1007 1008 tsp = &ts; 1009 } 1010 1011 return pollts(fds, nfds, tsp, NULL); 1012 } 1013 1014 int 1015 kqueue(void) 1016 { 1017 1018 abort(); 1019 } 1020 1021 int 1022 kevent(int kq, const struct kevent *changelist, size_t nchanges, 1023 struct kevent *eventlist, size_t nevents, 1024 const struct timespec *timeout) 1025 { 1026 1027 abort(); 1028 } 1029