1 /* $NetBSD: rumpclient.c,v 1.51 2012/08/29 10:38:53 msaitoh Exp $ */ 2 3 /* 4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * Client side routines for rump syscall proxy. 30 */ 31 32 #include "rumpuser_port.h" 33 34 /* 35 * We use kqueue on NetBSD, poll elsewhere. Theoretically we could 36 * use kqueue on other BSD's too, but I haven't tested those. We 37 * want to use kqueue because it will give us the ability to get signal 38 * notifications but defer their handling to a stage where we do not 39 * hold the communication lock. Taking a signal while holding on to 40 * that lock may cause a deadlock. Therefore, block signals throughout 41 * the RPC when using poll. This unfortunately means that the normal 42 * SIGINT way of stopping a process while it is undergoing rump kernel 43 * RPC will not work. If anyone know which Linux system call handles 44 * the above scenario correctly, I'm all ears. 45 */ 46 47 #ifdef __NetBSD__ 48 #define USE_KQUEUE 49 #endif 50 51 #include <sys/cdefs.h> 52 __RCSID("$NetBSD: rumpclient.c,v 1.51 2012/08/29 10:38:53 msaitoh Exp $"); 53 54 #include <sys/param.h> 55 #include <sys/mman.h> 56 #include <sys/socket.h> 57 #include <sys/time.h> 58 59 #ifdef USE_KQUEUE 60 #include <sys/event.h> 61 #endif 62 63 #include <arpa/inet.h> 64 #include <netinet/in.h> 65 #include <netinet/tcp.h> 66 67 #include <assert.h> 68 #include <dlfcn.h> 69 #include <err.h> 70 #include <errno.h> 71 #include <fcntl.h> 72 #include <link.h> 73 #include <poll.h> 74 #include <pthread.h> 75 #include <signal.h> 76 #include <stdarg.h> 77 #include <stdbool.h> 78 #include <stdio.h> 79 #include <stdlib.h> 80 #include <string.h> 81 #include <unistd.h> 82 83 #include <rump/rumpclient.h> 84 85 #define HOSTOPS 86 int (*host_socket)(int, int, int); 87 int (*host_close)(int); 88 int (*host_connect)(int, const struct sockaddr *, socklen_t); 89 int (*host_fcntl)(int, int, ...); 90 int (*host_poll)(struct pollfd *, nfds_t, int); 91 ssize_t (*host_read)(int, void *, size_t); 92 ssize_t (*host_sendmsg)(int, const struct msghdr *, int); 93 int (*host_setsockopt)(int, int, int, const void *, socklen_t); 94 int (*host_dup)(int); 95 96 #ifdef USE_KQUEUE 97 int (*host_kqueue)(void); 98 int (*host_kevent)(int, const struct kevent *, size_t, 99 struct kevent *, size_t, const struct timespec *); 100 #endif 101 102 int (*host_execve)(const char *, char *const[], char *const[]); 103 104 #include "sp_common.c" 105 106 static struct spclient clispc = { 107 .spc_fd = -1, 108 }; 109 110 static int kq = -1; 111 static sigset_t fullset; 112 113 static int doconnect(void); 114 static int handshake_req(struct spclient *, int, void *, int, bool); 115 116 /* 117 * Default: don't retry. Most clients can't handle it 118 * (consider e.g. fds suddenly going missing). 119 */ 120 static time_t retrytimo = 0; 121 122 /* always defined to nothingness for now */ 123 #define ERRLOG(a) 124 125 static int 126 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen) 127 { 128 struct timeval starttime, curtime; 129 time_t prevreconmsg; 130 unsigned reconretries; 131 int rv; 132 133 for (prevreconmsg = 0, reconretries = 0;;) { 134 rv = dosend(spc, iov, iovlen); 135 if (__predict_false(rv == ENOTCONN || rv == EBADF)) { 136 /* no persistent connections */ 137 if (retrytimo == 0) { 138 rv = ENOTCONN; 139 break; 140 } 141 if (retrytimo == RUMPCLIENT_RETRYCONN_DIE) 142 _exit(1); 143 144 if (!prevreconmsg) { 145 prevreconmsg = time(NULL); 146 gettimeofday(&starttime, NULL); 147 } 148 if (reconretries == 1) { 149 if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) { 150 rv = ENOTCONN; 151 break; 152 } 153 fprintf(stderr, "rump_sp: connection to " 154 "kernel lost, trying to reconnect ...\n"); 155 } else if (time(NULL) - prevreconmsg > 120) { 156 fprintf(stderr, "rump_sp: still trying to " 157 "reconnect ...\n"); 158 prevreconmsg = time(NULL); 159 } 160 161 /* check that we aren't over the limit */ 162 if (retrytimo > 0) { 163 struct timeval tmp; 164 165 gettimeofday(&curtime, NULL); 166 timersub(&curtime, &starttime, &tmp); 167 if (tmp.tv_sec >= retrytimo) { 168 fprintf(stderr, "rump_sp: reconnect " 169 "failed, %lld second timeout\n", 170 (long long)retrytimo); 171 return ENOTCONN; 172 } 173 } 174 175 /* adhoc backoff timer */ 176 if (reconretries < 10) { 177 usleep(100000 * reconretries); 178 } else { 179 sleep(MIN(10, reconretries-9)); 180 } 181 reconretries++; 182 183 if ((rv = doconnect()) != 0) 184 continue; 185 if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST, 186 NULL, 0, true)) != 0) 187 continue; 188 189 /* 190 * ok, reconnect succesful. we need to return to 191 * the upper layer to get the entire PDU resent. 192 */ 193 if (reconretries != 1) 194 fprintf(stderr, "rump_sp: reconnected!\n"); 195 rv = EAGAIN; 196 break; 197 } else { 198 _DIAGASSERT(errno != EAGAIN); 199 break; 200 } 201 } 202 203 return rv; 204 } 205 206 static int 207 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask, 208 bool keeplock) 209 { 210 uint64_t mygen; 211 bool imalive = true; 212 213 pthread_mutex_lock(&spc->spc_mtx); 214 if (!keeplock) 215 sendunlockl(spc); 216 mygen = spc->spc_generation; 217 218 rw->rw_error = 0; 219 while (!rw->rw_done && rw->rw_error == 0) { 220 if (__predict_false(spc->spc_generation != mygen || !imalive)) 221 break; 222 223 /* are we free to receive? */ 224 if (spc->spc_istatus == SPCSTATUS_FREE) { 225 int gotresp, dosig, rv; 226 227 spc->spc_istatus = SPCSTATUS_BUSY; 228 pthread_mutex_unlock(&spc->spc_mtx); 229 230 dosig = 0; 231 for (gotresp = 0; !gotresp; ) { 232 #ifdef USE_KQUEUE 233 struct kevent kev[8]; 234 int i; 235 236 /* 237 * typically we don't have a frame waiting 238 * when we come in here, so call kevent now 239 */ 240 rv = host_kevent(kq, NULL, 0, 241 kev, __arraycount(kev), NULL); 242 243 if (__predict_false(rv == -1)) { 244 goto activity; 245 } 246 247 /* 248 * XXX: don't know how this can happen 249 * (timeout cannot expire since there 250 * isn't one), but it does happen. 251 * treat it as an expectional condition 252 * and go through tryread to determine 253 * alive status. 254 */ 255 if (__predict_false(rv == 0)) 256 goto activity; 257 258 for (i = 0; i < rv; i++) { 259 if (kev[i].filter == EVFILT_SIGNAL) 260 dosig++; 261 } 262 if (dosig) 263 goto cleanup; 264 265 /* 266 * ok, activity. try to read a frame to 267 * determine what happens next. 268 */ 269 activity: 270 #else /* USE_KQUEUE */ 271 struct pollfd pfd; 272 273 pfd.fd = clispc.spc_fd; 274 pfd.events = POLLIN; 275 276 rv = host_poll(&pfd, 1, -1); 277 #endif /* !USE_KQUEUE */ 278 279 switch (readframe(spc)) { 280 case 0: 281 continue; 282 case -1: 283 imalive = false; 284 goto cleanup; 285 default: 286 /* case 1 */ 287 break; 288 } 289 290 switch (spc->spc_hdr.rsp_class) { 291 case RUMPSP_RESP: 292 case RUMPSP_ERROR: 293 kickwaiter(spc); 294 gotresp = spc->spc_hdr.rsp_reqno == 295 rw->rw_reqno; 296 break; 297 case RUMPSP_REQ: 298 handlereq(spc); 299 break; 300 default: 301 /* panic */ 302 break; 303 } 304 } 305 306 cleanup: 307 pthread_mutex_lock(&spc->spc_mtx); 308 if (spc->spc_istatus == SPCSTATUS_WANTED) 309 kickall(spc); 310 spc->spc_istatus = SPCSTATUS_FREE; 311 312 /* take one for the team */ 313 if (dosig) { 314 pthread_mutex_unlock(&spc->spc_mtx); 315 pthread_sigmask(SIG_SETMASK, mask, NULL); 316 pthread_sigmask(SIG_SETMASK, &fullset, NULL); 317 pthread_mutex_lock(&spc->spc_mtx); 318 } 319 } else { 320 spc->spc_istatus = SPCSTATUS_WANTED; 321 pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx); 322 } 323 } 324 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries); 325 pthread_mutex_unlock(&spc->spc_mtx); 326 pthread_cond_destroy(&rw->rw_cv); 327 328 if (spc->spc_generation != mygen || !imalive) { 329 return ENOTCONN; 330 } 331 return rw->rw_error; 332 } 333 334 static int 335 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum, 336 const void *data, size_t dlen, void **resp) 337 { 338 struct rsp_hdr rhdr; 339 struct respwait rw; 340 struct iovec iov[2]; 341 int rv; 342 343 rhdr.rsp_len = sizeof(rhdr) + dlen; 344 rhdr.rsp_class = RUMPSP_REQ; 345 rhdr.rsp_type = RUMPSP_SYSCALL; 346 rhdr.rsp_sysnum = sysnum; 347 348 IOVPUT(iov[0], rhdr); 349 IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen); 350 351 do { 352 putwait(spc, &rw, &rhdr); 353 if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) { 354 unputwait(spc, &rw); 355 continue; 356 } 357 358 rv = cliwaitresp(spc, &rw, omask, false); 359 if (rv == ENOTCONN) 360 rv = EAGAIN; 361 } while (rv == EAGAIN); 362 363 *resp = rw.rw_data; 364 return rv; 365 } 366 367 static int 368 handshake_req(struct spclient *spc, int type, void *data, 369 int cancel, bool haslock) 370 { 371 struct handshake_fork rf; 372 const char *myprogname = NULL; /* XXXgcc */ 373 struct rsp_hdr rhdr; 374 struct respwait rw; 375 sigset_t omask; 376 size_t bonus; 377 struct iovec iov[2]; 378 int rv; 379 380 if (type == HANDSHAKE_FORK) { 381 bonus = sizeof(rf); 382 } else { 383 #ifdef __NetBSD__ 384 /* would procfs work on NetBSD too? */ 385 myprogname = getprogname(); 386 #else 387 int fd = open("/proc/self/comm", O_RDONLY); 388 if (fd == -1) { 389 myprogname = "???"; 390 } else { 391 static char commname[128]; 392 393 memset(commname, 0, sizeof(commname)); 394 if (read(fd, commname, sizeof(commname)) > 0) { 395 char *n; 396 397 n = strrchr(commname, '\n'); 398 if (n) 399 *n = '\0'; 400 myprogname = commname; 401 } else { 402 myprogname = "???"; 403 } 404 close(fd); 405 } 406 #endif 407 bonus = strlen(myprogname)+1; 408 } 409 410 /* performs server handshake */ 411 rhdr.rsp_len = sizeof(rhdr) + bonus; 412 rhdr.rsp_class = RUMPSP_REQ; 413 rhdr.rsp_type = RUMPSP_HANDSHAKE; 414 rhdr.rsp_handshake = type; 415 416 IOVPUT(iov[0], rhdr); 417 418 pthread_sigmask(SIG_SETMASK, &fullset, &omask); 419 if (haslock) 420 putwait_locked(spc, &rw, &rhdr); 421 else 422 putwait(spc, &rw, &rhdr); 423 if (type == HANDSHAKE_FORK) { 424 memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */ 425 rf.rf_cancel = cancel; 426 IOVPUT(iov[1], rf); 427 } else { 428 IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus); 429 } 430 rv = send_with_recon(spc, iov, __arraycount(iov)); 431 if (rv || cancel) { 432 if (haslock) 433 unputwait_locked(spc, &rw); 434 else 435 unputwait(spc, &rw); 436 if (cancel) { 437 goto out; 438 } 439 } else { 440 rv = cliwaitresp(spc, &rw, &omask, haslock); 441 } 442 if (rv) 443 goto out; 444 445 rv = *(int *)rw.rw_data; 446 free(rw.rw_data); 447 448 out: 449 pthread_sigmask(SIG_SETMASK, &omask, NULL); 450 return rv; 451 } 452 453 static int 454 prefork_req(struct spclient *spc, sigset_t *omask, void **resp) 455 { 456 struct rsp_hdr rhdr; 457 struct respwait rw; 458 struct iovec iov[1]; 459 int rv; 460 461 rhdr.rsp_len = sizeof(rhdr); 462 rhdr.rsp_class = RUMPSP_REQ; 463 rhdr.rsp_type = RUMPSP_PREFORK; 464 rhdr.rsp_error = 0; 465 466 IOVPUT(iov[0], rhdr); 467 468 do { 469 putwait(spc, &rw, &rhdr); 470 rv = send_with_recon(spc, iov, __arraycount(iov)); 471 if (rv != 0) { 472 unputwait(spc, &rw); 473 continue; 474 } 475 476 rv = cliwaitresp(spc, &rw, omask, false); 477 if (rv == ENOTCONN) 478 rv = EAGAIN; 479 } while (rv == EAGAIN); 480 481 *resp = rw.rw_data; 482 return rv; 483 } 484 485 /* 486 * prevent response code from deadlocking with reconnect code 487 */ 488 static int 489 resp_sendlock(struct spclient *spc) 490 { 491 int rv = 0; 492 493 pthread_mutex_lock(&spc->spc_mtx); 494 while (spc->spc_ostatus != SPCSTATUS_FREE) { 495 if (__predict_false(spc->spc_reconnecting)) { 496 rv = EBUSY; 497 goto out; 498 } 499 spc->spc_ostatus = SPCSTATUS_WANTED; 500 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx); 501 } 502 spc->spc_ostatus = SPCSTATUS_BUSY; 503 504 out: 505 pthread_mutex_unlock(&spc->spc_mtx); 506 return rv; 507 } 508 509 static void 510 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen, 511 int wantstr) 512 { 513 struct rsp_hdr rhdr; 514 struct iovec iov[2]; 515 516 if (wantstr) 517 dlen = MIN(dlen, strlen(data)+1); 518 519 rhdr.rsp_len = sizeof(rhdr) + dlen; 520 rhdr.rsp_reqno = reqno; 521 rhdr.rsp_class = RUMPSP_RESP; 522 rhdr.rsp_type = RUMPSP_COPYIN; 523 rhdr.rsp_sysnum = 0; 524 525 IOVPUT(iov[0], rhdr); 526 IOVPUT_WITHSIZE(iov[1], data, dlen); 527 528 if (resp_sendlock(spc) != 0) 529 return; 530 (void)SENDIOV(spc, iov); 531 sendunlock(spc); 532 } 533 534 static void 535 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr) 536 { 537 struct rsp_hdr rhdr; 538 struct iovec iov[2]; 539 540 rhdr.rsp_len = sizeof(rhdr) + sizeof(addr); 541 rhdr.rsp_reqno = reqno; 542 rhdr.rsp_class = RUMPSP_RESP; 543 rhdr.rsp_type = RUMPSP_ANONMMAP; 544 rhdr.rsp_sysnum = 0; 545 546 IOVPUT(iov[0], rhdr); 547 IOVPUT(iov[1], addr); 548 549 if (resp_sendlock(spc) != 0) 550 return; 551 (void)SENDIOV(spc, iov); 552 sendunlock(spc); 553 } 554 555 int 556 rumpclient_syscall(int sysnum, const void *data, size_t dlen, 557 register_t *retval) 558 { 559 struct rsp_sysresp *resp; 560 sigset_t omask; 561 void *rdata; 562 int rv; 563 564 pthread_sigmask(SIG_SETMASK, &fullset, &omask); 565 566 DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n", 567 sysnum, data, dlen)); 568 569 rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata); 570 if (rv) 571 goto out; 572 573 resp = rdata; 574 DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n", 575 sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1])); 576 577 memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval)); 578 rv = resp->rsys_error; 579 free(rdata); 580 581 out: 582 pthread_sigmask(SIG_SETMASK, &omask, NULL); 583 return rv; 584 } 585 586 static void 587 handlereq(struct spclient *spc) 588 { 589 struct rsp_copydata *copydata; 590 struct rsp_hdr *rhdr = &spc->spc_hdr; 591 void *mapaddr; 592 size_t maplen; 593 int reqtype = spc->spc_hdr.rsp_type; 594 595 switch (reqtype) { 596 case RUMPSP_COPYIN: 597 case RUMPSP_COPYINSTR: 598 /*LINTED*/ 599 copydata = (struct rsp_copydata *)spc->spc_buf; 600 DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n", 601 copydata->rcp_addr, copydata->rcp_len)); 602 send_copyin_resp(spc, spc->spc_hdr.rsp_reqno, 603 copydata->rcp_addr, copydata->rcp_len, 604 reqtype == RUMPSP_COPYINSTR); 605 break; 606 case RUMPSP_COPYOUT: 607 case RUMPSP_COPYOUTSTR: 608 /*LINTED*/ 609 copydata = (struct rsp_copydata *)spc->spc_buf; 610 DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n", 611 copydata->rcp_addr, copydata->rcp_len)); 612 /*LINTED*/ 613 memcpy(copydata->rcp_addr, copydata->rcp_data, 614 copydata->rcp_len); 615 break; 616 case RUMPSP_ANONMMAP: 617 /*LINTED*/ 618 maplen = *(size_t *)spc->spc_buf; 619 mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE, 620 MAP_ANON, -1, 0); 621 if (mapaddr == MAP_FAILED) 622 mapaddr = NULL; 623 DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr)); 624 send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr); 625 break; 626 case RUMPSP_RAISE: 627 DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo)); 628 raise((int)rhdr->rsp_signo); 629 /* 630 * We most likely have signals blocked, but the signal 631 * will be handled soon enough when we return. 632 */ 633 break; 634 default: 635 printf("PANIC: INVALID TYPE %d\n", reqtype); 636 abort(); 637 break; 638 } 639 640 spcfreebuf(spc); 641 } 642 643 static unsigned ptab_idx; 644 static struct sockaddr *serv_sa; 645 646 /* dup until we get a "good" fd which does not collide with stdio */ 647 static int 648 dupgood(int myfd, int mustchange) 649 { 650 int ofds[4]; 651 int sverrno; 652 unsigned int i; 653 654 for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) { 655 assert(i < __arraycount(ofds)); 656 ofds[i] = myfd; 657 myfd = host_dup(myfd); 658 if (mustchange) { 659 i--; /* prevent closing old fd */ 660 mustchange = 0; 661 } 662 } 663 664 sverrno = 0; 665 if (myfd == -1 && i > 0) 666 sverrno = errno; 667 668 while (i-- > 0) { 669 host_close(ofds[i]); 670 } 671 672 if (sverrno) 673 errno = sverrno; 674 675 return myfd; 676 } 677 678 static int 679 doconnect(void) 680 { 681 struct respwait rw; 682 struct rsp_hdr rhdr; 683 char banner[MAXBANNER]; 684 int s, error, flags; 685 ssize_t n; 686 687 if (kq != -1) 688 host_close(kq); 689 kq = -1; 690 s = -1; 691 692 if (clispc.spc_fd != -1) 693 host_close(clispc.spc_fd); 694 clispc.spc_fd = -1; 695 696 /* 697 * for reconnect, gate everyone out of the receiver code 698 */ 699 putwait_locked(&clispc, &rw, &rhdr); 700 701 pthread_mutex_lock(&clispc.spc_mtx); 702 clispc.spc_reconnecting = 1; 703 pthread_cond_broadcast(&clispc.spc_cv); 704 clispc.spc_generation++; 705 while (clispc.spc_istatus != SPCSTATUS_FREE) { 706 clispc.spc_istatus = SPCSTATUS_WANTED; 707 pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx); 708 } 709 kickall(&clispc); 710 711 /* 712 * we can release it already since we hold the 713 * send lock during reconnect 714 * XXX: assert it 715 */ 716 clispc.spc_istatus = SPCSTATUS_FREE; 717 pthread_mutex_unlock(&clispc.spc_mtx); 718 unputwait_locked(&clispc, &rw); 719 720 free(clispc.spc_buf); 721 clispc.spc_off = 0; 722 723 s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0); 724 if (s == -1) 725 return -1; 726 727 while (host_connect(s, serv_sa, parsetab[ptab_idx].slen) == -1) { 728 if (errno == EINTR) 729 continue; 730 ERRLOG(("rump_sp: client connect failed: %s\n", 731 strerror(errno))); 732 return -1; 733 } 734 735 if ((error = parsetab[ptab_idx].connhook(s)) != 0) { 736 ERRLOG(("rump_sp: connect hook failed\n")); 737 return -1; 738 } 739 740 if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) { 741 ERRLOG(("rump_sp: failed to read banner\n")); 742 return -1; 743 } 744 745 if (banner[n-1] != '\n') { 746 ERRLOG(("rump_sp: invalid banner\n")); 747 return -1; 748 } 749 banner[n] = '\0'; 750 /* XXX parse the banner some day */ 751 752 flags = host_fcntl(s, F_GETFL, 0); 753 if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) { 754 ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno))); 755 return -1; 756 } 757 clispc.spc_fd = s; 758 clispc.spc_state = SPCSTATE_RUNNING; 759 clispc.spc_reconnecting = 0; 760 761 #ifdef USE_KQUEUE 762 { 763 struct kevent kev[NSIG+1]; 764 int i; 765 766 /* setup kqueue, we want all signals and the fd */ 767 if ((kq = dupgood(host_kqueue(), 0)) == -1) { 768 ERRLOG(("rump_sp: cannot setup kqueue")); 769 return -1; 770 } 771 772 for (i = 0; i < NSIG; i++) { 773 EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0); 774 } 775 EV_SET(&kev[NSIG], clispc.spc_fd, 776 EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0); 777 if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) { 778 ERRLOG(("rump_sp: kevent() failed")); 779 return -1; 780 } 781 } 782 #endif /* USE_KQUEUE */ 783 784 return 0; 785 } 786 787 static int 788 doinit(void) 789 { 790 791 TAILQ_INIT(&clispc.spc_respwait); 792 pthread_mutex_init(&clispc.spc_mtx, NULL); 793 pthread_cond_init(&clispc.spc_cv, NULL); 794 795 return 0; 796 } 797 798 void *rumpclient__dlsym(void *, const char *); 799 void * 800 rumpclient__dlsym(void *handle, const char *symbol) 801 { 802 803 return dlsym(handle, symbol); 804 } 805 void *rumphijack_dlsym(void *, const char *) 806 __attribute__((__weak__, alias("rumpclient__dlsym"))); 807 808 static pid_t init_done = 0; 809 810 int 811 rumpclient_init(void) 812 { 813 char *p; 814 int error; 815 int rv = -1; 816 int hstype; 817 pid_t mypid; 818 819 /* 820 * Make sure we're not riding the context of a previous 821 * host fork. Note: it's *possible* that after n>1 forks 822 * we have the same pid as one of our exited parents, but 823 * I'm pretty sure there are 0 practical implications, since 824 * it means generations would have to skip rumpclient init. 825 */ 826 if (init_done == (mypid = getpid())) 827 return 0; 828 829 /* kq does not traverse fork() */ 830 if (init_done != 0) 831 kq = -1; 832 init_done = mypid; 833 834 sigfillset(&fullset); 835 836 /* 837 * sag mir, wo die symbols sind. zogen fort, der krieg beginnt. 838 * wann wird man je verstehen? wann wird man je verstehen? 839 */ 840 #define FINDSYM2(_name_,_syscall_) \ 841 if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT, \ 842 #_syscall_)) == NULL) { \ 843 if (rumphijack_dlsym == rumpclient__dlsym) \ 844 host_##_name_ = _name_; /* static fallback */ \ 845 if (host_##_name_ == NULL) \ 846 errx(1, "cannot find %s: %s", #_syscall_, \ 847 dlerror()); \ 848 } 849 #define FINDSYM(_name_) FINDSYM2(_name_,_name_) 850 #ifdef __NetBSD__ 851 FINDSYM2(socket,__socket30) 852 #else 853 FINDSYM(socket) 854 #endif 855 856 FINDSYM(close) 857 FINDSYM(connect) 858 FINDSYM(fcntl) 859 FINDSYM(poll) 860 FINDSYM(read) 861 FINDSYM(sendmsg) 862 FINDSYM(setsockopt) 863 FINDSYM(dup) 864 FINDSYM(execve) 865 866 #ifdef USE_KQUEUE 867 FINDSYM(kqueue) 868 #if !__NetBSD_Prereq__(5,99,7) 869 FINDSYM(kevent) 870 #else 871 FINDSYM2(kevent,_sys___kevent50) 872 #endif 873 #endif /* USE_KQUEUE */ 874 875 #undef FINDSYM 876 #undef FINDSY2 877 878 if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) { 879 if ((p = getenv("RUMP_SERVER")) == NULL) { 880 errno = ENOENT; 881 goto out; 882 } 883 } 884 885 if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) { 886 errno = error; 887 goto out; 888 } 889 890 if (doinit() == -1) 891 goto out; 892 893 if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) { 894 sscanf(p, "%d,%d", &clispc.spc_fd, &kq); 895 unsetenv("RUMPCLIENT__EXECFD"); 896 hstype = HANDSHAKE_EXEC; 897 } else { 898 if (doconnect() == -1) 899 goto out; 900 hstype = HANDSHAKE_GUEST; 901 } 902 903 error = handshake_req(&clispc, hstype, NULL, 0, false); 904 if (error) { 905 pthread_mutex_destroy(&clispc.spc_mtx); 906 pthread_cond_destroy(&clispc.spc_cv); 907 if (clispc.spc_fd != -1) 908 host_close(clispc.spc_fd); 909 errno = error; 910 goto out; 911 } 912 rv = 0; 913 914 out: 915 if (rv == -1) 916 init_done = 0; 917 return rv; 918 } 919 920 struct rumpclient_fork { 921 uint32_t fork_auth[AUTHLEN]; 922 struct spclient fork_spc; 923 int fork_kq; 924 }; 925 926 struct rumpclient_fork * 927 rumpclient_prefork(void) 928 { 929 struct rumpclient_fork *rpf; 930 sigset_t omask; 931 void *resp; 932 int rv; 933 934 pthread_sigmask(SIG_SETMASK, &fullset, &omask); 935 rpf = malloc(sizeof(*rpf)); 936 if (rpf == NULL) 937 goto out; 938 939 if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) { 940 free(rpf); 941 errno = rv; 942 rpf = NULL; 943 goto out; 944 } 945 946 memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth)); 947 free(resp); 948 949 rpf->fork_spc = clispc; 950 rpf->fork_kq = kq; 951 952 out: 953 pthread_sigmask(SIG_SETMASK, &omask, NULL); 954 return rpf; 955 } 956 957 int 958 rumpclient_fork_init(struct rumpclient_fork *rpf) 959 { 960 int error; 961 int osock; 962 963 osock = clispc.spc_fd; 964 memset(&clispc, 0, sizeof(clispc)); 965 clispc.spc_fd = osock; 966 967 kq = -1; /* kqueue descriptor is not copied over fork() */ 968 969 if (doinit() == -1) 970 return -1; 971 if (doconnect() == -1) 972 return -1; 973 974 error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth, 975 0, false); 976 if (error) { 977 pthread_mutex_destroy(&clispc.spc_mtx); 978 pthread_cond_destroy(&clispc.spc_cv); 979 errno = error; 980 return -1; 981 } 982 983 return 0; 984 } 985 986 /*ARGSUSED*/ 987 void 988 rumpclient_fork_cancel(struct rumpclient_fork *rpf) 989 { 990 991 /* EUNIMPL */ 992 } 993 994 void 995 rumpclient_fork_vparent(struct rumpclient_fork *rpf) 996 { 997 998 clispc = rpf->fork_spc; 999 kq = rpf->fork_kq; 1000 } 1001 1002 void 1003 rumpclient_setconnretry(time_t timeout) 1004 { 1005 1006 if (timeout < RUMPCLIENT_RETRYCONN_DIE) 1007 return; /* gigo */ 1008 1009 retrytimo = timeout; 1010 } 1011 1012 int 1013 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant) 1014 { 1015 int fd = *fdp; 1016 int untilfd, rv; 1017 int newfd; 1018 1019 switch (variant) { 1020 case RUMPCLIENT_CLOSE_FCLOSEM: 1021 untilfd = MAX(clispc.spc_fd, kq); 1022 for (; fd <= untilfd; fd++) { 1023 if (fd == clispc.spc_fd || fd == kq) 1024 continue; 1025 rv = host_close(fd); 1026 if (rv == -1) 1027 return -1; 1028 } 1029 *fdp = fd; 1030 break; 1031 1032 case RUMPCLIENT_CLOSE_CLOSE: 1033 case RUMPCLIENT_CLOSE_DUP2: 1034 if (fd == clispc.spc_fd) { 1035 newfd = dupgood(clispc.spc_fd, 1); 1036 if (newfd == -1) 1037 return -1; 1038 1039 #ifdef USE_KQUEUE 1040 { 1041 struct kevent kev[2]; 1042 1043 /* 1044 * now, we have a new socket number, so change 1045 * the file descriptor that kqueue is 1046 * monitoring. remove old and add new. 1047 */ 1048 EV_SET(&kev[0], clispc.spc_fd, 1049 EVFILT_READ, EV_DELETE, 0, 0, 0); 1050 EV_SET(&kev[1], newfd, 1051 EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0); 1052 if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) { 1053 int sverrno = errno; 1054 host_close(newfd); 1055 errno = sverrno; 1056 return -1; 1057 } 1058 clispc.spc_fd = newfd; 1059 } 1060 } 1061 if (fd == kq) { 1062 newfd = dupgood(kq, 1); 1063 if (newfd == -1) 1064 return -1; 1065 kq = newfd; 1066 #else /* USE_KQUEUE */ 1067 clispc.spc_fd = newfd; 1068 #endif /* !USE_KQUEUE */ 1069 } 1070 break; 1071 } 1072 1073 return 0; 1074 } 1075 1076 pid_t 1077 rumpclient_fork(void) 1078 { 1079 1080 return rumpclient__dofork(fork); 1081 } 1082 1083 /* 1084 * Process is about to exec. Save info about our existing connection 1085 * in the env. rumpclient will check for this info in init(). 1086 * This is mostly for the benefit of rumphijack, but regular applications 1087 * may use it as well. 1088 */ 1089 int 1090 rumpclient_exec(const char *path, char *const argv[], char *const envp[]) 1091 { 1092 char buf[4096]; 1093 char **newenv; 1094 char *envstr, *envstr2; 1095 size_t nelem; 1096 int rv, sverrno; 1097 1098 snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d", 1099 clispc.spc_fd, kq); 1100 envstr = malloc(strlen(buf)+1); 1101 if (envstr == NULL) { 1102 return ENOMEM; 1103 } 1104 strcpy(envstr, buf); 1105 1106 /* do we have a fully parsed url we want to forward in the env? */ 1107 if (*parsedurl != '\0') { 1108 snprintf(buf, sizeof(buf), 1109 "RUMP__PARSEDSERVER=%s", parsedurl); 1110 envstr2 = malloc(strlen(buf)+1); 1111 if (envstr2 == NULL) { 1112 free(envstr); 1113 return ENOMEM; 1114 } 1115 strcpy(envstr2, buf); 1116 } else { 1117 envstr2 = NULL; 1118 } 1119 1120 for (nelem = 0; envp && envp[nelem]; nelem++) 1121 continue; 1122 1123 newenv = malloc(sizeof(*newenv) * (nelem+3)); 1124 if (newenv == NULL) { 1125 free(envstr2); 1126 free(envstr); 1127 return ENOMEM; 1128 } 1129 memcpy(&newenv[0], envp, nelem*sizeof(*envp)); 1130 1131 newenv[nelem] = envstr; 1132 newenv[nelem+1] = envstr2; 1133 newenv[nelem+2] = NULL; 1134 1135 rv = host_execve(path, argv, newenv); 1136 1137 _DIAGASSERT(rv != 0); 1138 sverrno = errno; 1139 free(envstr2); 1140 free(envstr); 1141 free(newenv); 1142 errno = sverrno; 1143 return rv; 1144 } 1145 1146 int 1147 rumpclient_daemon(int nochdir, int noclose) 1148 { 1149 struct rumpclient_fork *rf; 1150 int sverrno; 1151 1152 if ((rf = rumpclient_prefork()) == NULL) 1153 return -1; 1154 1155 if (daemon(nochdir, noclose) == -1) { 1156 sverrno = errno; 1157 rumpclient_fork_cancel(rf); 1158 errno = sverrno; 1159 return -1; 1160 } 1161 1162 if (rumpclient_fork_init(rf) == -1) 1163 return -1; 1164 1165 return 0; 1166 } 1167