1 /* $NetBSD: rumpuser_sp.c,v 1.66 2014/06/14 11:52:42 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * Sysproxy routines. This provides system RPC support over host sockets. 30 * The most notable limitation is that the client and server must share 31 * the same ABI. This does not mean that they have to be the same 32 * machine or that they need to run the same version of the host OS, 33 * just that they must agree on the data structures. This even *might* 34 * work correctly from one hardware architecture to another. 35 */ 36 37 #include "rumpuser_port.h" 38 39 #if !defined(lint) 40 __RCSID("$NetBSD: rumpuser_sp.c,v 1.66 2014/06/14 11:52:42 pooka Exp $"); 41 #endif /* !lint */ 42 43 #include <sys/types.h> 44 #include <sys/mman.h> 45 #include <sys/socket.h> 46 47 #include <arpa/inet.h> 48 #include <netinet/in.h> 49 #include <netinet/tcp.h> 50 51 #include <assert.h> 52 #include <errno.h> 53 #include <fcntl.h> 54 #include <poll.h> 55 #include <pthread.h> 56 #include <stdarg.h> 57 #include <stdio.h> 58 #include <stdlib.h> 59 #include <string.h> 60 #include <unistd.h> 61 62 #include <rump/rump.h> /* XXX: for rfork flags */ 63 #include <rump/rumpuser.h> 64 65 #include "rumpuser_int.h" 66 67 #include "sp_common.c" 68 69 #ifndef MAXCLI 70 #define MAXCLI 256 71 #endif 72 #ifndef MAXWORKER 73 #define MAXWORKER 128 74 #endif 75 #ifndef IDLEWORKER 76 #define IDLEWORKER 16 77 #endif 78 int rumpsp_maxworker = MAXWORKER; 79 int rumpsp_idleworker = IDLEWORKER; 80 81 static struct pollfd pfdlist[MAXCLI]; 82 static struct spclient spclist[MAXCLI]; 83 static unsigned int disco; 84 static volatile int spfini; 85 86 static char banner[MAXBANNER]; 87 88 #define PROTOMAJOR 0 89 #define PROTOMINOR 4 90 91 92 /* how to use atomic ops on Linux? */ 93 #if defined(__linux__) || defined(__APPLE__) || defined(__CYGWIN__) || defined(__OpenBSD__) 94 static pthread_mutex_t discomtx = PTHREAD_MUTEX_INITIALIZER; 95 96 static void 97 signaldisco(void) 98 { 99 100 pthread_mutex_lock(&discomtx); 101 disco++; 102 pthread_mutex_unlock(&discomtx); 103 } 104 105 static unsigned int 106 getdisco(void) 107 { 108 unsigned int discocnt; 109 110 pthread_mutex_lock(&discomtx); 111 discocnt = disco; 112 disco = 0; 113 pthread_mutex_unlock(&discomtx); 114 115 return discocnt; 116 } 117 118 #elif defined(__FreeBSD__) || defined(__DragonFly__) 119 120 #include <machine/atomic.h> 121 #define signaldisco() atomic_add_int(&disco, 1) 122 #define getdisco() atomic_readandclear_int(&disco) 123 124 #else /* NetBSD */ 125 126 #include <sys/atomic.h> 127 #define signaldisco() atomic_inc_uint(&disco) 128 #define getdisco() atomic_swap_uint(&disco, 0) 129 130 #endif 131 132 133 struct prefork { 134 uint32_t pf_auth[AUTHLEN]; 135 struct lwp *pf_lwp; 136 137 LIST_ENTRY(prefork) pf_entries; /* global list */ 138 LIST_ENTRY(prefork) pf_spcentries; /* linked from forking spc */ 139 }; 140 static LIST_HEAD(, prefork) preforks = LIST_HEAD_INITIALIZER(preforks); 141 static pthread_mutex_t pfmtx; 142 143 /* 144 * This version is for the server. It's optimized for multiple threads 145 * and is *NOT* reentrant wrt to signals. 146 */ 147 static int 148 waitresp(struct spclient *spc, struct respwait *rw) 149 { 150 int spcstate; 151 int rv = 0; 152 153 pthread_mutex_lock(&spc->spc_mtx); 154 sendunlockl(spc); 155 while (!rw->rw_done && spc->spc_state != SPCSTATE_DYING) { 156 pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx); 157 } 158 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries); 159 spcstate = spc->spc_state; 160 pthread_mutex_unlock(&spc->spc_mtx); 161 162 pthread_cond_destroy(&rw->rw_cv); 163 164 if (rv) 165 return rv; 166 if (spcstate == SPCSTATE_DYING) 167 return ENOTCONN; 168 return rw->rw_error; 169 } 170 171 /* 172 * Manual wrappers, since librump does not have access to the 173 * user namespace wrapped interfaces. 174 */ 175 176 static void 177 lwproc_switch(struct lwp *l) 178 { 179 180 rumpuser__hyp.hyp_schedule(); 181 rumpuser__hyp.hyp_lwproc_switch(l); 182 rumpuser__hyp.hyp_unschedule(); 183 } 184 185 static void 186 lwproc_release(void) 187 { 188 189 rumpuser__hyp.hyp_schedule(); 190 rumpuser__hyp.hyp_lwproc_release(); 191 rumpuser__hyp.hyp_unschedule(); 192 } 193 194 static int 195 lwproc_rfork(struct spclient *spc, int flags, const char *comm) 196 { 197 int rv; 198 199 rumpuser__hyp.hyp_schedule(); 200 rv = rumpuser__hyp.hyp_lwproc_rfork(spc, flags, comm); 201 rumpuser__hyp.hyp_unschedule(); 202 203 return rv; 204 } 205 206 static int 207 lwproc_newlwp(pid_t pid) 208 { 209 int rv; 210 211 rumpuser__hyp.hyp_schedule(); 212 rv = rumpuser__hyp.hyp_lwproc_newlwp(pid); 213 rumpuser__hyp.hyp_unschedule(); 214 215 return rv; 216 } 217 218 static struct lwp * 219 lwproc_curlwp(void) 220 { 221 struct lwp *l; 222 223 rumpuser__hyp.hyp_schedule(); 224 l = rumpuser__hyp.hyp_lwproc_curlwp(); 225 rumpuser__hyp.hyp_unschedule(); 226 227 return l; 228 } 229 230 static pid_t 231 lwproc_getpid(void) 232 { 233 pid_t p; 234 235 rumpuser__hyp.hyp_schedule(); 236 p = rumpuser__hyp.hyp_getpid(); 237 rumpuser__hyp.hyp_unschedule(); 238 239 return p; 240 } 241 242 static void 243 lwproc_execnotify(const char *comm) 244 { 245 246 rumpuser__hyp.hyp_schedule(); 247 rumpuser__hyp.hyp_execnotify(comm); 248 rumpuser__hyp.hyp_unschedule(); 249 } 250 251 static void 252 lwproc_lwpexit(void) 253 { 254 255 rumpuser__hyp.hyp_schedule(); 256 rumpuser__hyp.hyp_lwpexit(); 257 rumpuser__hyp.hyp_unschedule(); 258 } 259 260 static int 261 rumpsyscall(int sysnum, void *data, register_t *regrv) 262 { 263 long retval[2] = {0, 0}; 264 int rv; 265 266 rumpuser__hyp.hyp_schedule(); 267 rv = rumpuser__hyp.hyp_syscall(sysnum, data, retval); 268 rumpuser__hyp.hyp_unschedule(); 269 270 regrv[0] = retval[0]; 271 regrv[1] = retval[1]; 272 return rv; 273 } 274 275 static uint64_t 276 nextreq(struct spclient *spc) 277 { 278 uint64_t nw; 279 280 pthread_mutex_lock(&spc->spc_mtx); 281 nw = spc->spc_nextreq++; 282 pthread_mutex_unlock(&spc->spc_mtx); 283 284 return nw; 285 } 286 287 /* 288 * XXX: we send responses with "blocking" I/O. This is not 289 * ok for the main thread. XXXFIXME 290 */ 291 292 static void 293 send_error_resp(struct spclient *spc, uint64_t reqno, enum rumpsp_err error) 294 { 295 struct rsp_hdr rhdr; 296 struct iovec iov[1]; 297 298 rhdr.rsp_len = sizeof(rhdr); 299 rhdr.rsp_reqno = reqno; 300 rhdr.rsp_class = RUMPSP_ERROR; 301 rhdr.rsp_type = 0; 302 rhdr.rsp_error = error; 303 304 IOVPUT(iov[0], rhdr); 305 306 sendlock(spc); 307 (void)SENDIOV(spc, iov); 308 sendunlock(spc); 309 } 310 311 static int 312 send_handshake_resp(struct spclient *spc, uint64_t reqno, int error) 313 { 314 struct rsp_hdr rhdr; 315 struct iovec iov[2]; 316 int rv; 317 318 rhdr.rsp_len = sizeof(rhdr) + sizeof(error); 319 rhdr.rsp_reqno = reqno; 320 rhdr.rsp_class = RUMPSP_RESP; 321 rhdr.rsp_type = RUMPSP_HANDSHAKE; 322 rhdr.rsp_error = 0; 323 324 IOVPUT(iov[0], rhdr); 325 IOVPUT(iov[1], error); 326 327 sendlock(spc); 328 rv = SENDIOV(spc, iov); 329 sendunlock(spc); 330 331 return rv; 332 } 333 334 static int 335 send_syscall_resp(struct spclient *spc, uint64_t reqno, int error, 336 register_t *retval) 337 { 338 struct rsp_hdr rhdr; 339 struct rsp_sysresp sysresp; 340 struct iovec iov[2]; 341 int rv; 342 343 rhdr.rsp_len = sizeof(rhdr) + sizeof(sysresp); 344 rhdr.rsp_reqno = reqno; 345 rhdr.rsp_class = RUMPSP_RESP; 346 rhdr.rsp_type = RUMPSP_SYSCALL; 347 rhdr.rsp_sysnum = 0; 348 349 sysresp.rsys_error = error; 350 memcpy(sysresp.rsys_retval, retval, sizeof(sysresp.rsys_retval)); 351 352 IOVPUT(iov[0], rhdr); 353 IOVPUT(iov[1], sysresp); 354 355 sendlock(spc); 356 rv = SENDIOV(spc, iov); 357 sendunlock(spc); 358 359 return rv; 360 } 361 362 static int 363 send_prefork_resp(struct spclient *spc, uint64_t reqno, uint32_t *auth) 364 { 365 struct rsp_hdr rhdr; 366 struct iovec iov[2]; 367 int rv; 368 369 rhdr.rsp_len = sizeof(rhdr) + AUTHLEN*sizeof(*auth); 370 rhdr.rsp_reqno = reqno; 371 rhdr.rsp_class = RUMPSP_RESP; 372 rhdr.rsp_type = RUMPSP_PREFORK; 373 rhdr.rsp_sysnum = 0; 374 375 IOVPUT(iov[0], rhdr); 376 IOVPUT_WITHSIZE(iov[1], auth, AUTHLEN*sizeof(*auth)); 377 378 sendlock(spc); 379 rv = SENDIOV(spc, iov); 380 sendunlock(spc); 381 382 return rv; 383 } 384 385 static int 386 copyin_req(struct spclient *spc, const void *remaddr, size_t *dlen, 387 int wantstr, void **resp) 388 { 389 struct rsp_hdr rhdr; 390 struct rsp_copydata copydata; 391 struct respwait rw; 392 struct iovec iov[2]; 393 int rv; 394 395 DPRINTF(("copyin_req: %zu bytes from %p\n", *dlen, remaddr)); 396 397 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata); 398 rhdr.rsp_class = RUMPSP_REQ; 399 if (wantstr) 400 rhdr.rsp_type = RUMPSP_COPYINSTR; 401 else 402 rhdr.rsp_type = RUMPSP_COPYIN; 403 rhdr.rsp_sysnum = 0; 404 405 copydata.rcp_addr = __UNCONST(remaddr); 406 copydata.rcp_len = *dlen; 407 408 IOVPUT(iov[0], rhdr); 409 IOVPUT(iov[1], copydata); 410 411 putwait(spc, &rw, &rhdr); 412 rv = SENDIOV(spc, iov); 413 if (rv) { 414 unputwait(spc, &rw); 415 return rv; 416 } 417 418 rv = waitresp(spc, &rw); 419 420 DPRINTF(("copyin: response %d\n", rv)); 421 422 *resp = rw.rw_data; 423 if (wantstr) 424 *dlen = rw.rw_dlen; 425 426 return rv; 427 428 } 429 430 static int 431 send_copyout_req(struct spclient *spc, const void *remaddr, 432 const void *data, size_t dlen) 433 { 434 struct rsp_hdr rhdr; 435 struct rsp_copydata copydata; 436 struct iovec iov[3]; 437 int rv; 438 439 DPRINTF(("copyout_req (async): %zu bytes to %p\n", dlen, remaddr)); 440 441 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata) + dlen; 442 rhdr.rsp_reqno = nextreq(spc); 443 rhdr.rsp_class = RUMPSP_REQ; 444 rhdr.rsp_type = RUMPSP_COPYOUT; 445 rhdr.rsp_sysnum = 0; 446 447 copydata.rcp_addr = __UNCONST(remaddr); 448 copydata.rcp_len = dlen; 449 450 IOVPUT(iov[0], rhdr); 451 IOVPUT(iov[1], copydata); 452 IOVPUT_WITHSIZE(iov[2], __UNCONST(data), dlen); 453 454 sendlock(spc); 455 rv = SENDIOV(spc, iov); 456 sendunlock(spc); 457 458 return rv; 459 } 460 461 static int 462 anonmmap_req(struct spclient *spc, size_t howmuch, void **resp) 463 { 464 struct rsp_hdr rhdr; 465 struct respwait rw; 466 struct iovec iov[2]; 467 int rv; 468 469 DPRINTF(("anonmmap_req: %zu bytes\n", howmuch)); 470 471 rhdr.rsp_len = sizeof(rhdr) + sizeof(howmuch); 472 rhdr.rsp_class = RUMPSP_REQ; 473 rhdr.rsp_type = RUMPSP_ANONMMAP; 474 rhdr.rsp_sysnum = 0; 475 476 IOVPUT(iov[0], rhdr); 477 IOVPUT(iov[1], howmuch); 478 479 putwait(spc, &rw, &rhdr); 480 rv = SENDIOV(spc, iov); 481 if (rv) { 482 unputwait(spc, &rw); 483 return rv; 484 } 485 486 rv = waitresp(spc, &rw); 487 488 *resp = rw.rw_data; 489 490 DPRINTF(("anonmmap: mapped at %p\n", **(void ***)resp)); 491 492 return rv; 493 } 494 495 static int 496 send_raise_req(struct spclient *spc, int signo) 497 { 498 struct rsp_hdr rhdr; 499 struct iovec iov[1]; 500 int rv; 501 502 rhdr.rsp_len = sizeof(rhdr); 503 rhdr.rsp_class = RUMPSP_REQ; 504 rhdr.rsp_type = RUMPSP_RAISE; 505 rhdr.rsp_signo = signo; 506 507 IOVPUT(iov[0], rhdr); 508 509 sendlock(spc); 510 rv = SENDIOV(spc, iov); 511 sendunlock(spc); 512 513 return rv; 514 } 515 516 static void 517 spcref(struct spclient *spc) 518 { 519 520 pthread_mutex_lock(&spc->spc_mtx); 521 spc->spc_refcnt++; 522 pthread_mutex_unlock(&spc->spc_mtx); 523 } 524 525 static void 526 spcrelease(struct spclient *spc) 527 { 528 int ref; 529 530 pthread_mutex_lock(&spc->spc_mtx); 531 ref = --spc->spc_refcnt; 532 if (__predict_false(spc->spc_inexec && ref <= 2)) 533 pthread_cond_broadcast(&spc->spc_cv); 534 pthread_mutex_unlock(&spc->spc_mtx); 535 536 if (ref > 0) 537 return; 538 539 DPRINTF(("rump_sp: spcrelease: spc %p fd %d\n", spc, spc->spc_fd)); 540 541 _DIAGASSERT(TAILQ_EMPTY(&spc->spc_respwait)); 542 _DIAGASSERT(spc->spc_buf == NULL); 543 544 if (spc->spc_mainlwp) { 545 lwproc_switch(spc->spc_mainlwp); 546 lwproc_release(); 547 } 548 spc->spc_mainlwp = NULL; 549 550 close(spc->spc_fd); 551 spc->spc_fd = -1; 552 spc->spc_state = SPCSTATE_NEW; 553 554 signaldisco(); 555 } 556 557 static void 558 serv_handledisco(unsigned int idx) 559 { 560 struct spclient *spc = &spclist[idx]; 561 int dolwpexit; 562 563 DPRINTF(("rump_sp: disconnecting [%u]\n", idx)); 564 565 pfdlist[idx].fd = -1; 566 pfdlist[idx].revents = 0; 567 pthread_mutex_lock(&spc->spc_mtx); 568 spc->spc_state = SPCSTATE_DYING; 569 kickall(spc); 570 sendunlockl(spc); 571 /* exec uses mainlwp in another thread, but also nuked all lwps */ 572 dolwpexit = !spc->spc_inexec; 573 pthread_mutex_unlock(&spc->spc_mtx); 574 575 if (dolwpexit && spc->spc_mainlwp) { 576 lwproc_switch(spc->spc_mainlwp); 577 lwproc_lwpexit(); 578 lwproc_switch(NULL); 579 } 580 581 /* 582 * Nobody's going to attempt to send/receive anymore, 583 * so reinit info relevant to that. 584 */ 585 /*LINTED:pointer casts may be ok*/ 586 memset((char *)spc + SPC_ZEROFF, 0, sizeof(*spc) - SPC_ZEROFF); 587 588 spcrelease(spc); 589 } 590 591 static void 592 serv_shutdown(void) 593 { 594 struct spclient *spc; 595 unsigned int i; 596 597 for (i = 1; i < MAXCLI; i++) { 598 spc = &spclist[i]; 599 if (spc->spc_fd == -1) 600 continue; 601 602 shutdown(spc->spc_fd, SHUT_RDWR); 603 serv_handledisco(i); 604 605 spcrelease(spc); 606 } 607 } 608 609 static unsigned 610 serv_handleconn(int fd, connecthook_fn connhook, int busy) 611 { 612 struct sockaddr_storage ss; 613 socklen_t sl = sizeof(ss); 614 int newfd, flags; 615 unsigned i; 616 617 /*LINTED: cast ok */ 618 newfd = accept(fd, (struct sockaddr *)&ss, &sl); 619 if (newfd == -1) 620 return 0; 621 622 if (busy) { 623 close(newfd); /* EBUSY */ 624 return 0; 625 } 626 627 flags = fcntl(newfd, F_GETFL, 0); 628 if (fcntl(newfd, F_SETFL, flags | O_NONBLOCK) == -1) { 629 close(newfd); 630 return 0; 631 } 632 633 if (connhook(newfd) != 0) { 634 close(newfd); 635 return 0; 636 } 637 638 /* write out a banner for the client */ 639 if (send(newfd, banner, strlen(banner), MSG_NOSIGNAL) 640 != (ssize_t)strlen(banner)) { 641 close(newfd); 642 return 0; 643 } 644 645 /* find empty slot the simple way */ 646 for (i = 0; i < MAXCLI; i++) { 647 if (pfdlist[i].fd == -1 && spclist[i].spc_state == SPCSTATE_NEW) 648 break; 649 } 650 651 /* 652 * Although not finding a slot is impossible (cf. how this routine 653 * is called), the compiler can still think that i == MAXCLI 654 * if this code is either compiled with NDEBUG or the platform 655 * does not use __dead for assert(). Therefore, add an explicit 656 * check to avoid an array-bounds error. 657 */ 658 /* assert(i < MAXCLI); */ 659 if (i == MAXCLI) 660 abort(); 661 662 pfdlist[i].fd = newfd; 663 spclist[i].spc_fd = newfd; 664 spclist[i].spc_istatus = SPCSTATUS_BUSY; /* dedicated receiver */ 665 spclist[i].spc_refcnt = 1; 666 667 TAILQ_INIT(&spclist[i].spc_respwait); 668 669 DPRINTF(("rump_sp: added new connection fd %d at idx %u\n", newfd, i)); 670 671 return i; 672 } 673 674 static void 675 serv_handlesyscall(struct spclient *spc, struct rsp_hdr *rhdr, uint8_t *data) 676 { 677 register_t retval[2] = {0, 0}; 678 int rv, sysnum; 679 680 sysnum = (int)rhdr->rsp_sysnum; 681 DPRINTF(("rump_sp: handling syscall %d from client %d\n", 682 sysnum, spc->spc_pid)); 683 684 if (__predict_false((rv = lwproc_newlwp(spc->spc_pid)) != 0)) { 685 retval[0] = -1; 686 send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval); 687 return; 688 } 689 spc->spc_syscallreq = rhdr->rsp_reqno; 690 rv = rumpsyscall(sysnum, data, retval); 691 spc->spc_syscallreq = 0; 692 lwproc_release(); 693 694 DPRINTF(("rump_sp: got return value %d & %d/%d\n", 695 rv, retval[0], retval[1])); 696 697 send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval); 698 } 699 700 static void 701 serv_handleexec(struct spclient *spc, struct rsp_hdr *rhdr, char *comm) 702 { 703 size_t commlen = rhdr->rsp_len - HDRSZ; 704 705 pthread_mutex_lock(&spc->spc_mtx); 706 /* one for the connection and one for us */ 707 while (spc->spc_refcnt > 2) 708 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx); 709 pthread_mutex_unlock(&spc->spc_mtx); 710 711 /* 712 * ok, all the threads are dead (or one is still alive and 713 * the connection is dead, in which case this doesn't matter 714 * very much). proceed with exec. 715 */ 716 717 /* ensure comm is 0-terminated */ 718 /* TODO: make sure it contains sensible chars? */ 719 comm[commlen] = '\0'; 720 721 lwproc_switch(spc->spc_mainlwp); 722 lwproc_execnotify(comm); 723 lwproc_switch(NULL); 724 725 pthread_mutex_lock(&spc->spc_mtx); 726 spc->spc_inexec = 0; 727 pthread_mutex_unlock(&spc->spc_mtx); 728 send_handshake_resp(spc, rhdr->rsp_reqno, 0); 729 } 730 731 enum sbatype { SBA_SYSCALL, SBA_EXEC }; 732 733 struct servbouncearg { 734 struct spclient *sba_spc; 735 struct rsp_hdr sba_hdr; 736 enum sbatype sba_type; 737 uint8_t *sba_data; 738 739 TAILQ_ENTRY(servbouncearg) sba_entries; 740 }; 741 static pthread_mutex_t sbamtx; 742 static pthread_cond_t sbacv; 743 static int nworker, idleworker, nwork; 744 static TAILQ_HEAD(, servbouncearg) wrklist = TAILQ_HEAD_INITIALIZER(wrklist); 745 746 /*ARGSUSED*/ 747 static void * 748 serv_workbouncer(void *arg) 749 { 750 struct servbouncearg *sba; 751 752 for (;;) { 753 pthread_mutex_lock(&sbamtx); 754 if (__predict_false(idleworker - nwork >= rumpsp_idleworker)) { 755 nworker--; 756 pthread_mutex_unlock(&sbamtx); 757 break; 758 } 759 idleworker++; 760 while (TAILQ_EMPTY(&wrklist)) { 761 _DIAGASSERT(nwork == 0); 762 pthread_cond_wait(&sbacv, &sbamtx); 763 } 764 idleworker--; 765 766 sba = TAILQ_FIRST(&wrklist); 767 TAILQ_REMOVE(&wrklist, sba, sba_entries); 768 nwork--; 769 pthread_mutex_unlock(&sbamtx); 770 771 if (__predict_true(sba->sba_type == SBA_SYSCALL)) { 772 serv_handlesyscall(sba->sba_spc, 773 &sba->sba_hdr, sba->sba_data); 774 } else { 775 _DIAGASSERT(sba->sba_type == SBA_EXEC); 776 serv_handleexec(sba->sba_spc, &sba->sba_hdr, 777 (char *)sba->sba_data); 778 } 779 spcrelease(sba->sba_spc); 780 free(sba->sba_data); 781 free(sba); 782 } 783 784 return NULL; 785 } 786 787 static int 788 sp_copyin(void *arg, const void *raddr, void *laddr, size_t *len, int wantstr) 789 { 790 struct spclient *spc = arg; 791 void *rdata = NULL; /* XXXuninit */ 792 int rv, nlocks; 793 794 rumpkern_unsched(&nlocks, NULL); 795 796 rv = copyin_req(spc, raddr, len, wantstr, &rdata); 797 if (rv) 798 goto out; 799 800 memcpy(laddr, rdata, *len); 801 free(rdata); 802 803 out: 804 rumpkern_sched(nlocks, NULL); 805 if (rv) 806 rv = EFAULT; 807 ET(rv); 808 } 809 810 int 811 rumpuser_sp_copyin(void *arg, const void *raddr, void *laddr, size_t len) 812 { 813 int rv; 814 815 rv = sp_copyin(arg, raddr, laddr, &len, 0); 816 ET(rv); 817 } 818 819 int 820 rumpuser_sp_copyinstr(void *arg, const void *raddr, void *laddr, size_t *len) 821 { 822 int rv; 823 824 rv = sp_copyin(arg, raddr, laddr, len, 1); 825 ET(rv); 826 } 827 828 static int 829 sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen) 830 { 831 struct spclient *spc = arg; 832 int nlocks, rv; 833 834 rumpkern_unsched(&nlocks, NULL); 835 rv = send_copyout_req(spc, raddr, laddr, dlen); 836 rumpkern_sched(nlocks, NULL); 837 838 if (rv) 839 rv = EFAULT; 840 ET(rv); 841 } 842 843 int 844 rumpuser_sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen) 845 { 846 int rv; 847 848 rv = sp_copyout(arg, laddr, raddr, dlen); 849 ET(rv); 850 } 851 852 int 853 rumpuser_sp_copyoutstr(void *arg, const void *laddr, void *raddr, size_t *dlen) 854 { 855 int rv; 856 857 rv = sp_copyout(arg, laddr, raddr, *dlen); 858 ET(rv); 859 } 860 861 int 862 rumpuser_sp_anonmmap(void *arg, size_t howmuch, void **addr) 863 { 864 struct spclient *spc = arg; 865 void *resp, *rdata; 866 int nlocks, rv; 867 868 rumpkern_unsched(&nlocks, NULL); 869 870 rv = anonmmap_req(spc, howmuch, &rdata); 871 if (rv) { 872 rv = EFAULT; 873 goto out; 874 } 875 876 resp = *(void **)rdata; 877 free(rdata); 878 879 if (resp == NULL) { 880 rv = ENOMEM; 881 } 882 883 *addr = resp; 884 885 out: 886 rumpkern_sched(nlocks, NULL); 887 ET(rv); 888 } 889 890 int 891 rumpuser_sp_raise(void *arg, int signo) 892 { 893 struct spclient *spc = arg; 894 int rv, nlocks; 895 896 rumpkern_unsched(&nlocks, NULL); 897 rv = send_raise_req(spc, signo); 898 rumpkern_sched(nlocks, NULL); 899 900 return rv; 901 } 902 903 static pthread_attr_t pattr_detached; 904 static void 905 schedulework(struct spclient *spc, enum sbatype sba_type) 906 { 907 struct servbouncearg *sba; 908 pthread_t pt; 909 uint64_t reqno; 910 int retries = 0; 911 912 reqno = spc->spc_hdr.rsp_reqno; 913 while ((sba = malloc(sizeof(*sba))) == NULL) { 914 if (nworker == 0 || retries > 10) { 915 send_error_resp(spc, reqno, RUMPSP_ERR_TRYAGAIN); 916 spcfreebuf(spc); 917 return; 918 } 919 /* slim chance of more memory? */ 920 usleep(10000); 921 } 922 923 sba->sba_spc = spc; 924 sba->sba_type = sba_type; 925 sba->sba_hdr = spc->spc_hdr; 926 sba->sba_data = spc->spc_buf; 927 spcresetbuf(spc); 928 929 spcref(spc); 930 931 pthread_mutex_lock(&sbamtx); 932 TAILQ_INSERT_TAIL(&wrklist, sba, sba_entries); 933 nwork++; 934 if (nwork <= idleworker) { 935 /* do we have a daemon's tool (i.e. idle threads)? */ 936 pthread_cond_signal(&sbacv); 937 } else if (nworker < rumpsp_maxworker) { 938 /* 939 * Else, need to create one 940 * (if we can, otherwise just expect another 941 * worker to pick up the syscall) 942 */ 943 if (pthread_create(&pt, &pattr_detached, 944 serv_workbouncer, NULL) == 0) { 945 nworker++; 946 } 947 } 948 pthread_mutex_unlock(&sbamtx); 949 } 950 951 /* 952 * 953 * Startup routines and mainloop for server. 954 * 955 */ 956 957 struct spservarg { 958 int sps_sock; 959 connecthook_fn sps_connhook; 960 struct lwp *sps_l; 961 }; 962 963 static void 964 handlereq(struct spclient *spc) 965 { 966 uint64_t reqno; 967 int error; 968 969 reqno = spc->spc_hdr.rsp_reqno; 970 if (__predict_false(spc->spc_state == SPCSTATE_NEW)) { 971 if (spc->spc_hdr.rsp_type != RUMPSP_HANDSHAKE) { 972 send_error_resp(spc, reqno, RUMPSP_ERR_AUTH); 973 shutdown(spc->spc_fd, SHUT_RDWR); 974 spcfreebuf(spc); 975 return; 976 } 977 978 if (spc->spc_hdr.rsp_handshake == HANDSHAKE_GUEST) { 979 char *comm = (char *)spc->spc_buf; 980 size_t commlen = spc->spc_hdr.rsp_len - HDRSZ; 981 982 /* ensure it's 0-terminated */ 983 /* XXX make sure it contains sensible chars? */ 984 comm[commlen] = '\0'; 985 986 if ((error = lwproc_rfork(spc, 987 RUMP_RFFDG, comm)) != 0) { 988 shutdown(spc->spc_fd, SHUT_RDWR); 989 } 990 991 spcfreebuf(spc); 992 if (error) 993 return; 994 995 spc->spc_mainlwp = lwproc_curlwp(); 996 997 send_handshake_resp(spc, reqno, 0); 998 } else if (spc->spc_hdr.rsp_handshake == HANDSHAKE_FORK) { 999 struct lwp *tmpmain; 1000 struct prefork *pf; 1001 struct handshake_fork *rfp; 1002 int cancel; 1003 1004 if (spc->spc_off-HDRSZ != sizeof(*rfp)) { 1005 send_error_resp(spc, reqno, 1006 RUMPSP_ERR_MALFORMED_REQUEST); 1007 shutdown(spc->spc_fd, SHUT_RDWR); 1008 spcfreebuf(spc); 1009 return; 1010 } 1011 1012 /*LINTED*/ 1013 rfp = (void *)spc->spc_buf; 1014 cancel = rfp->rf_cancel; 1015 1016 pthread_mutex_lock(&pfmtx); 1017 LIST_FOREACH(pf, &preforks, pf_entries) { 1018 if (memcmp(rfp->rf_auth, pf->pf_auth, 1019 sizeof(rfp->rf_auth)) == 0) { 1020 LIST_REMOVE(pf, pf_entries); 1021 LIST_REMOVE(pf, pf_spcentries); 1022 break; 1023 } 1024 } 1025 pthread_mutex_unlock(&pfmtx); 1026 spcfreebuf(spc); 1027 1028 if (!pf) { 1029 send_error_resp(spc, reqno, 1030 RUMPSP_ERR_INVALID_PREFORK); 1031 shutdown(spc->spc_fd, SHUT_RDWR); 1032 return; 1033 } 1034 1035 tmpmain = pf->pf_lwp; 1036 free(pf); 1037 lwproc_switch(tmpmain); 1038 if (cancel) { 1039 lwproc_release(); 1040 shutdown(spc->spc_fd, SHUT_RDWR); 1041 return; 1042 } 1043 1044 /* 1045 * So, we forked already during "prefork" to save 1046 * the file descriptors from a parent exit 1047 * race condition. But now we need to fork 1048 * a second time since the initial fork has 1049 * the wrong spc pointer. (yea, optimize 1050 * interfaces some day if anyone cares) 1051 */ 1052 if ((error = lwproc_rfork(spc, 0, NULL)) != 0) { 1053 send_error_resp(spc, reqno, 1054 RUMPSP_ERR_RFORK_FAILED); 1055 shutdown(spc->spc_fd, SHUT_RDWR); 1056 lwproc_release(); 1057 return; 1058 } 1059 spc->spc_mainlwp = lwproc_curlwp(); 1060 lwproc_switch(tmpmain); 1061 lwproc_release(); 1062 lwproc_switch(spc->spc_mainlwp); 1063 1064 send_handshake_resp(spc, reqno, 0); 1065 } else { 1066 send_error_resp(spc, reqno, RUMPSP_ERR_AUTH); 1067 shutdown(spc->spc_fd, SHUT_RDWR); 1068 spcfreebuf(spc); 1069 return; 1070 } 1071 1072 spc->spc_pid = lwproc_getpid(); 1073 1074 DPRINTF(("rump_sp: handshake for client %p complete, pid %d\n", 1075 spc, spc->spc_pid)); 1076 1077 lwproc_switch(NULL); 1078 spc->spc_state = SPCSTATE_RUNNING; 1079 return; 1080 } 1081 1082 if (__predict_false(spc->spc_hdr.rsp_type == RUMPSP_PREFORK)) { 1083 struct prefork *pf; 1084 uint32_t auth[AUTHLEN]; 1085 size_t randlen; 1086 int inexec; 1087 1088 DPRINTF(("rump_sp: prefork handler executing for %p\n", spc)); 1089 spcfreebuf(spc); 1090 1091 pthread_mutex_lock(&spc->spc_mtx); 1092 inexec = spc->spc_inexec; 1093 pthread_mutex_unlock(&spc->spc_mtx); 1094 if (inexec) { 1095 send_error_resp(spc, reqno, RUMPSP_ERR_INEXEC); 1096 shutdown(spc->spc_fd, SHUT_RDWR); 1097 return; 1098 } 1099 1100 pf = malloc(sizeof(*pf)); 1101 if (pf == NULL) { 1102 send_error_resp(spc, reqno, RUMPSP_ERR_NOMEM); 1103 return; 1104 } 1105 1106 /* 1107 * Use client main lwp to fork. this is never used by 1108 * worker threads (except in exec, but we checked for that 1109 * above) so we can safely use it here. 1110 */ 1111 lwproc_switch(spc->spc_mainlwp); 1112 if ((error = lwproc_rfork(spc, RUMP_RFFDG, NULL)) != 0) { 1113 DPRINTF(("rump_sp: fork failed: %d (%p)\n",error, spc)); 1114 send_error_resp(spc, reqno, RUMPSP_ERR_RFORK_FAILED); 1115 lwproc_switch(NULL); 1116 free(pf); 1117 return; 1118 } 1119 1120 /* Ok, we have a new process context and a new curlwp */ 1121 rumpuser_getrandom(auth, sizeof(auth), 0, &randlen); 1122 memcpy(pf->pf_auth, auth, sizeof(pf->pf_auth)); 1123 pf->pf_lwp = lwproc_curlwp(); 1124 lwproc_switch(NULL); 1125 1126 pthread_mutex_lock(&pfmtx); 1127 LIST_INSERT_HEAD(&preforks, pf, pf_entries); 1128 LIST_INSERT_HEAD(&spc->spc_pflist, pf, pf_spcentries); 1129 pthread_mutex_unlock(&pfmtx); 1130 1131 DPRINTF(("rump_sp: prefork handler success %p\n", spc)); 1132 1133 send_prefork_resp(spc, reqno, auth); 1134 return; 1135 } 1136 1137 if (__predict_false(spc->spc_hdr.rsp_type == RUMPSP_HANDSHAKE)) { 1138 int inexec; 1139 1140 if (spc->spc_hdr.rsp_handshake != HANDSHAKE_EXEC) { 1141 send_error_resp(spc, reqno, 1142 RUMPSP_ERR_MALFORMED_REQUEST); 1143 shutdown(spc->spc_fd, SHUT_RDWR); 1144 spcfreebuf(spc); 1145 return; 1146 } 1147 1148 pthread_mutex_lock(&spc->spc_mtx); 1149 inexec = spc->spc_inexec; 1150 pthread_mutex_unlock(&spc->spc_mtx); 1151 if (inexec) { 1152 send_error_resp(spc, reqno, RUMPSP_ERR_INEXEC); 1153 shutdown(spc->spc_fd, SHUT_RDWR); 1154 spcfreebuf(spc); 1155 return; 1156 } 1157 1158 pthread_mutex_lock(&spc->spc_mtx); 1159 spc->spc_inexec = 1; 1160 pthread_mutex_unlock(&spc->spc_mtx); 1161 1162 /* 1163 * start to drain lwps. we will wait for it to finish 1164 * in another thread 1165 */ 1166 lwproc_switch(spc->spc_mainlwp); 1167 lwproc_lwpexit(); 1168 lwproc_switch(NULL); 1169 1170 /* 1171 * exec has to wait for lwps to drain, so finish it off 1172 * in another thread 1173 */ 1174 schedulework(spc, SBA_EXEC); 1175 return; 1176 } 1177 1178 if (__predict_false(spc->spc_hdr.rsp_type != RUMPSP_SYSCALL)) { 1179 send_error_resp(spc, reqno, RUMPSP_ERR_MALFORMED_REQUEST); 1180 spcfreebuf(spc); 1181 return; 1182 } 1183 1184 schedulework(spc, SBA_SYSCALL); 1185 } 1186 1187 static void * 1188 spserver(void *arg) 1189 { 1190 struct spservarg *sarg = arg; 1191 struct spclient *spc; 1192 unsigned idx; 1193 int seen; 1194 int rv; 1195 unsigned int nfds, maxidx; 1196 1197 lwproc_switch(sarg->sps_l); 1198 1199 for (idx = 0; idx < MAXCLI; idx++) { 1200 pfdlist[idx].fd = -1; 1201 pfdlist[idx].events = POLLIN; 1202 1203 spc = &spclist[idx]; 1204 pthread_mutex_init(&spc->spc_mtx, NULL); 1205 pthread_cond_init(&spc->spc_cv, NULL); 1206 spc->spc_fd = -1; 1207 } 1208 pfdlist[0].fd = spclist[0].spc_fd = sarg->sps_sock; 1209 pfdlist[0].events = POLLIN; 1210 nfds = 1; 1211 maxidx = 0; 1212 1213 pthread_attr_init(&pattr_detached); 1214 pthread_attr_setdetachstate(&pattr_detached, PTHREAD_CREATE_DETACHED); 1215 #if NOTYET 1216 pthread_attr_setstacksize(&pattr_detached, 32*1024); 1217 #endif 1218 1219 pthread_mutex_init(&sbamtx, NULL); 1220 pthread_cond_init(&sbacv, NULL); 1221 1222 DPRINTF(("rump_sp: server mainloop\n")); 1223 1224 for (;;) { 1225 int discoed; 1226 1227 /* g/c hangarounds (eventually) */ 1228 discoed = getdisco(); 1229 while (discoed--) { 1230 nfds--; 1231 idx = maxidx; 1232 while (idx) { 1233 if (pfdlist[idx].fd != -1) { 1234 maxidx = idx; 1235 break; 1236 } 1237 idx--; 1238 } 1239 DPRINTF(("rump_sp: set maxidx to [%u]\n", 1240 maxidx)); 1241 } 1242 1243 DPRINTF(("rump_sp: loop nfd %d\n", maxidx+1)); 1244 seen = 0; 1245 rv = poll(pfdlist, maxidx+1, INFTIM); 1246 assert(maxidx+1 <= MAXCLI); 1247 assert(rv != 0); 1248 if (rv == -1) { 1249 if (errno == EINTR) 1250 continue; 1251 fprintf(stderr, "rump_spserver: poll returned %d\n", 1252 errno); 1253 break; 1254 } 1255 1256 for (idx = 0; seen < rv && idx < MAXCLI; idx++) { 1257 if ((pfdlist[idx].revents & POLLIN) == 0) 1258 continue; 1259 1260 seen++; 1261 DPRINTF(("rump_sp: activity at [%u] %d/%d\n", 1262 idx, seen, rv)); 1263 if (idx > 0) { 1264 spc = &spclist[idx]; 1265 DPRINTF(("rump_sp: mainloop read [%u]\n", idx)); 1266 switch (readframe(spc)) { 1267 case 0: 1268 break; 1269 case -1: 1270 serv_handledisco(idx); 1271 break; 1272 default: 1273 switch (spc->spc_hdr.rsp_class) { 1274 case RUMPSP_RESP: 1275 kickwaiter(spc); 1276 break; 1277 case RUMPSP_REQ: 1278 handlereq(spc); 1279 break; 1280 default: 1281 send_error_resp(spc, 1282 spc->spc_hdr.rsp_reqno, 1283 RUMPSP_ERR_MALFORMED_REQUEST); 1284 spcfreebuf(spc); 1285 break; 1286 } 1287 break; 1288 } 1289 1290 } else { 1291 DPRINTF(("rump_sp: mainloop new connection\n")); 1292 1293 if (__predict_false(spfini)) { 1294 close(spclist[0].spc_fd); 1295 serv_shutdown(); 1296 goto out; 1297 } 1298 1299 idx = serv_handleconn(pfdlist[0].fd, 1300 sarg->sps_connhook, nfds == MAXCLI); 1301 if (idx) 1302 nfds++; 1303 if (idx > maxidx) 1304 maxidx = idx; 1305 DPRINTF(("rump_sp: maxid now %d\n", maxidx)); 1306 } 1307 } 1308 } 1309 1310 out: 1311 return NULL; 1312 } 1313 1314 static unsigned cleanupidx; 1315 static struct sockaddr *cleanupsa; 1316 int 1317 rumpuser_sp_init(const char *url, 1318 const char *ostype, const char *osrelease, const char *machine) 1319 { 1320 pthread_t pt; 1321 struct spservarg *sarg; 1322 struct sockaddr *sap; 1323 struct lwp *calllwp; 1324 char *p; 1325 unsigned idx = 0; /* XXXgcc */ 1326 int error, s; 1327 1328 p = strdup(url); 1329 if (p == NULL) { 1330 error = ENOMEM; 1331 goto out; 1332 } 1333 error = parseurl(p, &sap, &idx, 1); 1334 free(p); 1335 if (error) 1336 goto out; 1337 1338 snprintf(banner, sizeof(banner), "RUMPSP-%d.%d-%s-%s/%s\n", 1339 PROTOMAJOR, PROTOMINOR, ostype, osrelease, machine); 1340 1341 s = socket(parsetab[idx].domain, SOCK_STREAM, 0); 1342 if (s == -1) { 1343 error = errno; 1344 goto out; 1345 } 1346 1347 sarg = malloc(sizeof(*sarg)); 1348 if (sarg == NULL) { 1349 close(s); 1350 error = ENOMEM; 1351 goto out; 1352 } 1353 1354 sarg->sps_sock = s; 1355 sarg->sps_connhook = parsetab[idx].connhook; 1356 1357 cleanupidx = idx; 1358 cleanupsa = sap; 1359 1360 /* sloppy error recovery */ 1361 1362 /*LINTED*/ 1363 if (bind(s, sap, parsetab[idx].slen) == -1) { 1364 error = errno; 1365 fprintf(stderr, "rump_sp: server bind failed\n"); 1366 goto out; 1367 } 1368 if (listen(s, MAXCLI) == -1) { 1369 error = errno; 1370 fprintf(stderr, "rump_sp: server listen failed\n"); 1371 goto out; 1372 } 1373 1374 /* 1375 * Create a context that the client threads run off of. 1376 * We fork a dedicated context so as to ensure that all 1377 * client threads get the same set of fd's. We fork off 1378 * of whatever context the caller is running in (most likely 1379 * an implicit thread, i.e. proc 1) and do not 1380 * close fd's. The assumption is that people who 1381 * write servers (i.e. "kernels") know what they're doing. 1382 */ 1383 calllwp = lwproc_curlwp(); 1384 if ((error = lwproc_rfork(NULL, RUMP_RFFDG, "spserver")) != 0) { 1385 fprintf(stderr, "rump_sp: rfork failed"); 1386 goto out; 1387 } 1388 sarg->sps_l = lwproc_curlwp(); 1389 lwproc_switch(calllwp); 1390 if ((error = pthread_create(&pt, NULL, spserver, sarg)) != 0) { 1391 fprintf(stderr, "rump_sp: cannot create wrkr thread\n"); 1392 goto out; 1393 } 1394 pthread_detach(pt); 1395 1396 out: 1397 ET(error); 1398 } 1399 1400 void 1401 rumpuser_sp_fini(void *arg) 1402 { 1403 struct spclient *spc = arg; 1404 register_t retval[2] = {0, 0}; 1405 1406 if (spclist[0].spc_fd) { 1407 parsetab[cleanupidx].cleanup(cleanupsa); 1408 } 1409 1410 /* 1411 * stuff response into the socket, since the rump kernel container 1412 * is just about to exit 1413 */ 1414 if (spc && spc->spc_syscallreq) 1415 send_syscall_resp(spc, spc->spc_syscallreq, 0, retval); 1416 1417 if (spclist[0].spc_fd) { 1418 shutdown(spclist[0].spc_fd, SHUT_RDWR); 1419 spfini = 1; 1420 } 1421 1422 /* 1423 * could release thread, but don't bother, since the container 1424 * will be stone dead in a moment. 1425 */ 1426 } 1427