1 /* $NetBSD: rumpuser_sp.c,v 1.51 2013/01/14 21:00:16 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * Sysproxy routines. This provides system RPC support over host sockets. 30 * The most notable limitation is that the client and server must share 31 * the same ABI. This does not mean that they have to be the same 32 * machine or that they need to run the same version of the host OS, 33 * just that they must agree on the data structures. This even *might* 34 * work correctly from one hardware architecture to another. 35 */ 36 37 #include "rumpuser_port.h" 38 39 #if !defined(lint) 40 __RCSID("$NetBSD: rumpuser_sp.c,v 1.51 2013/01/14 21:00:16 pooka Exp $"); 41 #endif /* !lint */ 42 43 #include <sys/types.h> 44 #include <sys/mman.h> 45 #include <sys/socket.h> 46 47 #include <arpa/inet.h> 48 #include <netinet/in.h> 49 #include <netinet/tcp.h> 50 51 #include <assert.h> 52 #include <errno.h> 53 #include <fcntl.h> 54 #include <poll.h> 55 #include <pthread.h> 56 #include <stdarg.h> 57 #include <stdio.h> 58 #include <stdlib.h> 59 #include <string.h> 60 #include <unistd.h> 61 62 #include <rump/rump.h> /* XXX: for rfork flags */ 63 #include <rump/rumpuser.h> 64 65 #include "rumpuser_int.h" 66 67 #include "sp_common.c" 68 69 #ifndef MAXCLI 70 #define MAXCLI 256 71 #endif 72 #ifndef MAXWORKER 73 #define MAXWORKER 128 74 #endif 75 #ifndef IDLEWORKER 76 #define IDLEWORKER 16 77 #endif 78 int rumpsp_maxworker = MAXWORKER; 79 int rumpsp_idleworker = IDLEWORKER; 80 81 static struct pollfd pfdlist[MAXCLI]; 82 static struct spclient spclist[MAXCLI]; 83 static unsigned int disco; 84 static volatile int spfini; 85 86 static struct rumpuser_sp_ops spops; 87 88 static char banner[MAXBANNER]; 89 90 #define PROTOMAJOR 0 91 #define PROTOMINOR 4 92 93 94 /* how to use atomic ops on Linux? */ 95 #if defined(__linux__) || defined(__CYGWIN__) 96 static pthread_mutex_t discomtx = PTHREAD_MUTEX_INITIALIZER; 97 98 static void 99 signaldisco(void) 100 { 101 102 pthread_mutex_lock(&discomtx); 103 disco++; 104 pthread_mutex_unlock(&discomtx); 105 } 106 107 static unsigned int 108 getdisco(void) 109 { 110 unsigned int discocnt; 111 112 pthread_mutex_lock(&discomtx); 113 discocnt = disco; 114 disco = 0; 115 pthread_mutex_unlock(&discomtx); 116 117 return discocnt; 118 } 119 120 #elif defined(__FreeBSD__) || defined(__DragonFly__) 121 122 #include <machine/atomic.h> 123 #define signaldisco() atomic_add_int(&disco, 1) 124 #define getdisco() atomic_readandclear_int(&disco) 125 126 #else /* NetBSD */ 127 128 #include <sys/atomic.h> 129 #define signaldisco() atomic_inc_uint(&disco) 130 #define getdisco() atomic_swap_uint(&disco, 0) 131 132 #endif 133 134 135 struct prefork { 136 uint32_t pf_auth[AUTHLEN]; 137 struct lwp *pf_lwp; 138 139 LIST_ENTRY(prefork) pf_entries; /* global list */ 140 LIST_ENTRY(prefork) pf_spcentries; /* linked from forking spc */ 141 }; 142 static LIST_HEAD(, prefork) preforks = LIST_HEAD_INITIALIZER(preforks); 143 static pthread_mutex_t pfmtx; 144 145 /* 146 * This version is for the server. It's optimized for multiple threads 147 * and is *NOT* reentrant wrt to signals. 148 */ 149 static int 150 waitresp(struct spclient *spc, struct respwait *rw) 151 { 152 int spcstate; 153 int rv = 0; 154 155 pthread_mutex_lock(&spc->spc_mtx); 156 sendunlockl(spc); 157 while (!rw->rw_done && spc->spc_state != SPCSTATE_DYING) { 158 pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx); 159 } 160 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries); 161 spcstate = spc->spc_state; 162 pthread_mutex_unlock(&spc->spc_mtx); 163 164 pthread_cond_destroy(&rw->rw_cv); 165 166 if (rv) 167 return rv; 168 if (spcstate == SPCSTATE_DYING) 169 return ENOTCONN; 170 return rw->rw_error; 171 } 172 173 /* 174 * Manual wrappers, since librump does not have access to the 175 * user namespace wrapped interfaces. 176 */ 177 178 static void 179 lwproc_switch(struct lwp *l) 180 { 181 182 spops.spop_schedule(); 183 spops.spop_lwproc_switch(l); 184 spops.spop_unschedule(); 185 } 186 187 static void 188 lwproc_release(void) 189 { 190 191 spops.spop_schedule(); 192 spops.spop_lwproc_release(); 193 spops.spop_unschedule(); 194 } 195 196 static int 197 lwproc_rfork(struct spclient *spc, int flags, const char *comm) 198 { 199 int rv; 200 201 spops.spop_schedule(); 202 rv = spops.spop_lwproc_rfork(spc, flags, comm); 203 spops.spop_unschedule(); 204 205 return rv; 206 } 207 208 static int 209 lwproc_newlwp(pid_t pid) 210 { 211 int rv; 212 213 spops.spop_schedule(); 214 rv = spops.spop_lwproc_newlwp(pid); 215 spops.spop_unschedule(); 216 217 return rv; 218 } 219 220 static struct lwp * 221 lwproc_curlwp(void) 222 { 223 struct lwp *l; 224 225 spops.spop_schedule(); 226 l = spops.spop_lwproc_curlwp(); 227 spops.spop_unschedule(); 228 229 return l; 230 } 231 232 static pid_t 233 lwproc_getpid(void) 234 { 235 pid_t p; 236 237 spops.spop_schedule(); 238 p = spops.spop_getpid(); 239 spops.spop_unschedule(); 240 241 return p; 242 } 243 244 static void 245 lwproc_execnotify(const char *comm) 246 { 247 248 spops.spop_schedule(); 249 spops.spop_execnotify(comm); 250 spops.spop_unschedule(); 251 } 252 253 static void 254 lwproc_lwpexit(void) 255 { 256 257 spops.spop_schedule(); 258 spops.spop_lwpexit(); 259 spops.spop_unschedule(); 260 } 261 262 static int 263 rumpsyscall(int sysnum, void *data, register_t *retval) 264 { 265 int rv; 266 267 spops.spop_schedule(); 268 rv = spops.spop_syscall(sysnum, data, retval); 269 spops.spop_unschedule(); 270 271 return rv; 272 } 273 274 static uint64_t 275 nextreq(struct spclient *spc) 276 { 277 uint64_t nw; 278 279 pthread_mutex_lock(&spc->spc_mtx); 280 nw = spc->spc_nextreq++; 281 pthread_mutex_unlock(&spc->spc_mtx); 282 283 return nw; 284 } 285 286 /* 287 * XXX: we send responses with "blocking" I/O. This is not 288 * ok for the main thread. XXXFIXME 289 */ 290 291 static void 292 send_error_resp(struct spclient *spc, uint64_t reqno, enum rumpsp_err error) 293 { 294 struct rsp_hdr rhdr; 295 struct iovec iov[1]; 296 297 rhdr.rsp_len = sizeof(rhdr); 298 rhdr.rsp_reqno = reqno; 299 rhdr.rsp_class = RUMPSP_ERROR; 300 rhdr.rsp_type = 0; 301 rhdr.rsp_error = error; 302 303 IOVPUT(iov[0], rhdr); 304 305 sendlock(spc); 306 (void)SENDIOV(spc, iov); 307 sendunlock(spc); 308 } 309 310 static int 311 send_handshake_resp(struct spclient *spc, uint64_t reqno, int error) 312 { 313 struct rsp_hdr rhdr; 314 struct iovec iov[2]; 315 int rv; 316 317 rhdr.rsp_len = sizeof(rhdr) + sizeof(error); 318 rhdr.rsp_reqno = reqno; 319 rhdr.rsp_class = RUMPSP_RESP; 320 rhdr.rsp_type = RUMPSP_HANDSHAKE; 321 rhdr.rsp_error = 0; 322 323 IOVPUT(iov[0], rhdr); 324 IOVPUT(iov[1], error); 325 326 sendlock(spc); 327 rv = SENDIOV(spc, iov); 328 sendunlock(spc); 329 330 return rv; 331 } 332 333 static int 334 send_syscall_resp(struct spclient *spc, uint64_t reqno, int error, 335 register_t *retval) 336 { 337 struct rsp_hdr rhdr; 338 struct rsp_sysresp sysresp; 339 struct iovec iov[2]; 340 int rv; 341 342 rhdr.rsp_len = sizeof(rhdr) + sizeof(sysresp); 343 rhdr.rsp_reqno = reqno; 344 rhdr.rsp_class = RUMPSP_RESP; 345 rhdr.rsp_type = RUMPSP_SYSCALL; 346 rhdr.rsp_sysnum = 0; 347 348 sysresp.rsys_error = error; 349 memcpy(sysresp.rsys_retval, retval, sizeof(sysresp.rsys_retval)); 350 351 IOVPUT(iov[0], rhdr); 352 IOVPUT(iov[1], sysresp); 353 354 sendlock(spc); 355 rv = SENDIOV(spc, iov); 356 sendunlock(spc); 357 358 return rv; 359 } 360 361 static int 362 send_prefork_resp(struct spclient *spc, uint64_t reqno, uint32_t *auth) 363 { 364 struct rsp_hdr rhdr; 365 struct iovec iov[2]; 366 int rv; 367 368 rhdr.rsp_len = sizeof(rhdr) + AUTHLEN*sizeof(*auth); 369 rhdr.rsp_reqno = reqno; 370 rhdr.rsp_class = RUMPSP_RESP; 371 rhdr.rsp_type = RUMPSP_PREFORK; 372 rhdr.rsp_sysnum = 0; 373 374 IOVPUT(iov[0], rhdr); 375 IOVPUT_WITHSIZE(iov[1], auth, AUTHLEN*sizeof(*auth)); 376 377 sendlock(spc); 378 rv = SENDIOV(spc, iov); 379 sendunlock(spc); 380 381 return rv; 382 } 383 384 static int 385 copyin_req(struct spclient *spc, const void *remaddr, size_t *dlen, 386 int wantstr, void **resp) 387 { 388 struct rsp_hdr rhdr; 389 struct rsp_copydata copydata; 390 struct respwait rw; 391 struct iovec iov[2]; 392 int rv; 393 394 DPRINTF(("copyin_req: %zu bytes from %p\n", *dlen, remaddr)); 395 396 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata); 397 rhdr.rsp_class = RUMPSP_REQ; 398 if (wantstr) 399 rhdr.rsp_type = RUMPSP_COPYINSTR; 400 else 401 rhdr.rsp_type = RUMPSP_COPYIN; 402 rhdr.rsp_sysnum = 0; 403 404 copydata.rcp_addr = __UNCONST(remaddr); 405 copydata.rcp_len = *dlen; 406 407 IOVPUT(iov[0], rhdr); 408 IOVPUT(iov[1], copydata); 409 410 putwait(spc, &rw, &rhdr); 411 rv = SENDIOV(spc, iov); 412 if (rv) { 413 unputwait(spc, &rw); 414 return rv; 415 } 416 417 rv = waitresp(spc, &rw); 418 419 DPRINTF(("copyin: response %d\n", rv)); 420 421 *resp = rw.rw_data; 422 if (wantstr) 423 *dlen = rw.rw_dlen; 424 425 return rv; 426 427 } 428 429 static int 430 send_copyout_req(struct spclient *spc, const void *remaddr, 431 const void *data, size_t dlen) 432 { 433 struct rsp_hdr rhdr; 434 struct rsp_copydata copydata; 435 struct iovec iov[3]; 436 int rv; 437 438 DPRINTF(("copyout_req (async): %zu bytes to %p\n", dlen, remaddr)); 439 440 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata) + dlen; 441 rhdr.rsp_reqno = nextreq(spc); 442 rhdr.rsp_class = RUMPSP_REQ; 443 rhdr.rsp_type = RUMPSP_COPYOUT; 444 rhdr.rsp_sysnum = 0; 445 446 copydata.rcp_addr = __UNCONST(remaddr); 447 copydata.rcp_len = dlen; 448 449 IOVPUT(iov[0], rhdr); 450 IOVPUT(iov[1], copydata); 451 IOVPUT_WITHSIZE(iov[2], __UNCONST(data), dlen); 452 453 sendlock(spc); 454 rv = SENDIOV(spc, iov); 455 sendunlock(spc); 456 457 return rv; 458 } 459 460 static int 461 anonmmap_req(struct spclient *spc, size_t howmuch, void **resp) 462 { 463 struct rsp_hdr rhdr; 464 struct respwait rw; 465 struct iovec iov[2]; 466 int rv; 467 468 DPRINTF(("anonmmap_req: %zu bytes\n", howmuch)); 469 470 rhdr.rsp_len = sizeof(rhdr) + sizeof(howmuch); 471 rhdr.rsp_class = RUMPSP_REQ; 472 rhdr.rsp_type = RUMPSP_ANONMMAP; 473 rhdr.rsp_sysnum = 0; 474 475 IOVPUT(iov[0], rhdr); 476 IOVPUT(iov[1], howmuch); 477 478 putwait(spc, &rw, &rhdr); 479 rv = SENDIOV(spc, iov); 480 if (rv) { 481 unputwait(spc, &rw); 482 return rv; 483 } 484 485 rv = waitresp(spc, &rw); 486 487 *resp = rw.rw_data; 488 489 DPRINTF(("anonmmap: mapped at %p\n", **(void ***)resp)); 490 491 return rv; 492 } 493 494 static int 495 send_raise_req(struct spclient *spc, int signo) 496 { 497 struct rsp_hdr rhdr; 498 struct iovec iov[1]; 499 int rv; 500 501 rhdr.rsp_len = sizeof(rhdr); 502 rhdr.rsp_class = RUMPSP_REQ; 503 rhdr.rsp_type = RUMPSP_RAISE; 504 rhdr.rsp_signo = signo; 505 506 IOVPUT(iov[0], rhdr); 507 508 sendlock(spc); 509 rv = SENDIOV(spc, iov); 510 sendunlock(spc); 511 512 return rv; 513 } 514 515 static void 516 spcref(struct spclient *spc) 517 { 518 519 pthread_mutex_lock(&spc->spc_mtx); 520 spc->spc_refcnt++; 521 pthread_mutex_unlock(&spc->spc_mtx); 522 } 523 524 static void 525 spcrelease(struct spclient *spc) 526 { 527 int ref; 528 529 pthread_mutex_lock(&spc->spc_mtx); 530 ref = --spc->spc_refcnt; 531 if (__predict_false(spc->spc_inexec && ref <= 2)) 532 pthread_cond_broadcast(&spc->spc_cv); 533 pthread_mutex_unlock(&spc->spc_mtx); 534 535 if (ref > 0) 536 return; 537 538 DPRINTF(("rump_sp: spcrelease: spc %p fd %d\n", spc, spc->spc_fd)); 539 540 _DIAGASSERT(TAILQ_EMPTY(&spc->spc_respwait)); 541 _DIAGASSERT(spc->spc_buf == NULL); 542 543 if (spc->spc_mainlwp) { 544 lwproc_switch(spc->spc_mainlwp); 545 lwproc_release(); 546 } 547 spc->spc_mainlwp = NULL; 548 549 close(spc->spc_fd); 550 spc->spc_fd = -1; 551 spc->spc_state = SPCSTATE_NEW; 552 553 signaldisco(); 554 } 555 556 static void 557 serv_handledisco(unsigned int idx) 558 { 559 struct spclient *spc = &spclist[idx]; 560 int dolwpexit; 561 562 DPRINTF(("rump_sp: disconnecting [%u]\n", idx)); 563 564 pfdlist[idx].fd = -1; 565 pfdlist[idx].revents = 0; 566 pthread_mutex_lock(&spc->spc_mtx); 567 spc->spc_state = SPCSTATE_DYING; 568 kickall(spc); 569 sendunlockl(spc); 570 /* exec uses mainlwp in another thread, but also nuked all lwps */ 571 dolwpexit = !spc->spc_inexec; 572 pthread_mutex_unlock(&spc->spc_mtx); 573 574 if (dolwpexit && spc->spc_mainlwp) { 575 lwproc_switch(spc->spc_mainlwp); 576 lwproc_lwpexit(); 577 lwproc_switch(NULL); 578 } 579 580 /* 581 * Nobody's going to attempt to send/receive anymore, 582 * so reinit info relevant to that. 583 */ 584 /*LINTED:pointer casts may be ok*/ 585 memset((char *)spc + SPC_ZEROFF, 0, sizeof(*spc) - SPC_ZEROFF); 586 587 spcrelease(spc); 588 } 589 590 static void 591 serv_shutdown(void) 592 { 593 struct spclient *spc; 594 unsigned int i; 595 596 for (i = 1; i < MAXCLI; i++) { 597 spc = &spclist[i]; 598 if (spc->spc_fd == -1) 599 continue; 600 601 shutdown(spc->spc_fd, SHUT_RDWR); 602 serv_handledisco(i); 603 604 spcrelease(spc); 605 } 606 } 607 608 static unsigned 609 serv_handleconn(int fd, connecthook_fn connhook, int busy) 610 { 611 struct sockaddr_storage ss; 612 socklen_t sl = sizeof(ss); 613 int newfd, flags; 614 unsigned i; 615 616 /*LINTED: cast ok */ 617 newfd = accept(fd, (struct sockaddr *)&ss, &sl); 618 if (newfd == -1) 619 return 0; 620 621 if (busy) { 622 close(newfd); /* EBUSY */ 623 return 0; 624 } 625 626 flags = fcntl(newfd, F_GETFL, 0); 627 if (fcntl(newfd, F_SETFL, flags | O_NONBLOCK) == -1) { 628 close(newfd); 629 return 0; 630 } 631 632 if (connhook(newfd) != 0) { 633 close(newfd); 634 return 0; 635 } 636 637 /* write out a banner for the client */ 638 if (send(newfd, banner, strlen(banner), MSG_NOSIGNAL) 639 != (ssize_t)strlen(banner)) { 640 close(newfd); 641 return 0; 642 } 643 644 /* find empty slot the simple way */ 645 for (i = 0; i < MAXCLI; i++) { 646 if (pfdlist[i].fd == -1 && spclist[i].spc_state == SPCSTATE_NEW) 647 break; 648 } 649 650 assert(i < MAXCLI); 651 652 pfdlist[i].fd = newfd; 653 spclist[i].spc_fd = newfd; 654 spclist[i].spc_istatus = SPCSTATUS_BUSY; /* dedicated receiver */ 655 spclist[i].spc_refcnt = 1; 656 657 TAILQ_INIT(&spclist[i].spc_respwait); 658 659 DPRINTF(("rump_sp: added new connection fd %d at idx %u\n", newfd, i)); 660 661 return i; 662 } 663 664 static void 665 serv_handlesyscall(struct spclient *spc, struct rsp_hdr *rhdr, uint8_t *data) 666 { 667 register_t retval[2] = {0, 0}; 668 int rv, sysnum; 669 670 sysnum = (int)rhdr->rsp_sysnum; 671 DPRINTF(("rump_sp: handling syscall %d from client %d\n", 672 sysnum, spc->spc_pid)); 673 674 if (__predict_false((rv = lwproc_newlwp(spc->spc_pid)) != 0)) { 675 retval[0] = -1; 676 send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval); 677 return; 678 } 679 spc->spc_syscallreq = rhdr->rsp_reqno; 680 rv = rumpsyscall(sysnum, data, retval); 681 spc->spc_syscallreq = 0; 682 lwproc_release(); 683 684 DPRINTF(("rump_sp: got return value %d & %d/%d\n", 685 rv, retval[0], retval[1])); 686 687 send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval); 688 } 689 690 static void 691 serv_handleexec(struct spclient *spc, struct rsp_hdr *rhdr, char *comm) 692 { 693 size_t commlen = rhdr->rsp_len - HDRSZ; 694 695 pthread_mutex_lock(&spc->spc_mtx); 696 /* one for the connection and one for us */ 697 while (spc->spc_refcnt > 2) 698 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx); 699 pthread_mutex_unlock(&spc->spc_mtx); 700 701 /* 702 * ok, all the threads are dead (or one is still alive and 703 * the connection is dead, in which case this doesn't matter 704 * very much). proceed with exec. 705 */ 706 707 /* ensure comm is 0-terminated */ 708 /* TODO: make sure it contains sensible chars? */ 709 comm[commlen] = '\0'; 710 711 lwproc_switch(spc->spc_mainlwp); 712 lwproc_execnotify(comm); 713 lwproc_switch(NULL); 714 715 pthread_mutex_lock(&spc->spc_mtx); 716 spc->spc_inexec = 0; 717 pthread_mutex_unlock(&spc->spc_mtx); 718 send_handshake_resp(spc, rhdr->rsp_reqno, 0); 719 } 720 721 enum sbatype { SBA_SYSCALL, SBA_EXEC }; 722 723 struct servbouncearg { 724 struct spclient *sba_spc; 725 struct rsp_hdr sba_hdr; 726 enum sbatype sba_type; 727 uint8_t *sba_data; 728 729 TAILQ_ENTRY(servbouncearg) sba_entries; 730 }; 731 static pthread_mutex_t sbamtx; 732 static pthread_cond_t sbacv; 733 static int nworker, idleworker, nwork; 734 static TAILQ_HEAD(, servbouncearg) wrklist = TAILQ_HEAD_INITIALIZER(wrklist); 735 736 /*ARGSUSED*/ 737 static void * 738 serv_workbouncer(void *arg) 739 { 740 struct servbouncearg *sba; 741 742 for (;;) { 743 pthread_mutex_lock(&sbamtx); 744 if (__predict_false(idleworker - nwork >= rumpsp_idleworker)) { 745 nworker--; 746 pthread_mutex_unlock(&sbamtx); 747 break; 748 } 749 idleworker++; 750 while (TAILQ_EMPTY(&wrklist)) { 751 _DIAGASSERT(nwork == 0); 752 pthread_cond_wait(&sbacv, &sbamtx); 753 } 754 idleworker--; 755 756 sba = TAILQ_FIRST(&wrklist); 757 TAILQ_REMOVE(&wrklist, sba, sba_entries); 758 nwork--; 759 pthread_mutex_unlock(&sbamtx); 760 761 if (__predict_true(sba->sba_type == SBA_SYSCALL)) { 762 serv_handlesyscall(sba->sba_spc, 763 &sba->sba_hdr, sba->sba_data); 764 } else { 765 _DIAGASSERT(sba->sba_type == SBA_EXEC); 766 serv_handleexec(sba->sba_spc, &sba->sba_hdr, 767 (char *)sba->sba_data); 768 } 769 spcrelease(sba->sba_spc); 770 free(sba->sba_data); 771 free(sba); 772 } 773 774 return NULL; 775 } 776 777 static int 778 sp_copyin(void *arg, const void *raddr, void *laddr, size_t *len, int wantstr) 779 { 780 struct spclient *spc = arg; 781 void *rdata = NULL; /* XXXuninit */ 782 int rv, nlocks; 783 784 rumpuser__kunlock(0, &nlocks, NULL); 785 786 rv = copyin_req(spc, raddr, len, wantstr, &rdata); 787 if (rv) 788 goto out; 789 790 memcpy(laddr, rdata, *len); 791 free(rdata); 792 793 out: 794 rumpuser__klock(nlocks, NULL); 795 if (rv) 796 return EFAULT; 797 return 0; 798 } 799 800 int 801 rumpuser_sp_copyin(void *arg, const void *raddr, void *laddr, size_t len) 802 { 803 804 return sp_copyin(arg, raddr, laddr, &len, 0); 805 } 806 807 int 808 rumpuser_sp_copyinstr(void *arg, const void *raddr, void *laddr, size_t *len) 809 { 810 811 return sp_copyin(arg, raddr, laddr, len, 1); 812 } 813 814 static int 815 sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen) 816 { 817 struct spclient *spc = arg; 818 int nlocks, rv; 819 820 rumpuser__kunlock(0, &nlocks, NULL); 821 rv = send_copyout_req(spc, raddr, laddr, dlen); 822 rumpuser__klock(nlocks, NULL); 823 824 if (rv) 825 return EFAULT; 826 return 0; 827 } 828 829 int 830 rumpuser_sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen) 831 { 832 833 return sp_copyout(arg, laddr, raddr, dlen); 834 } 835 836 int 837 rumpuser_sp_copyoutstr(void *arg, const void *laddr, void *raddr, size_t *dlen) 838 { 839 840 return sp_copyout(arg, laddr, raddr, *dlen); 841 } 842 843 int 844 rumpuser_sp_anonmmap(void *arg, size_t howmuch, void **addr) 845 { 846 struct spclient *spc = arg; 847 void *resp, *rdata; 848 int nlocks, rv; 849 850 rumpuser__kunlock(0, &nlocks, NULL); 851 852 rv = anonmmap_req(spc, howmuch, &rdata); 853 if (rv) { 854 rv = EFAULT; 855 goto out; 856 } 857 858 resp = *(void **)rdata; 859 free(rdata); 860 861 if (resp == NULL) { 862 rv = ENOMEM; 863 } 864 865 *addr = resp; 866 867 out: 868 rumpuser__klock(nlocks, NULL); 869 870 if (rv) 871 return rv; 872 return 0; 873 } 874 875 int 876 rumpuser_sp_raise(void *arg, int signo) 877 { 878 struct spclient *spc = arg; 879 int rv, nlocks; 880 881 rumpuser__kunlock(0, &nlocks, NULL); 882 rv = send_raise_req(spc, signo); 883 rumpuser__klock(nlocks, NULL); 884 885 return rv; 886 } 887 888 static pthread_attr_t pattr_detached; 889 static void 890 schedulework(struct spclient *spc, enum sbatype sba_type) 891 { 892 struct servbouncearg *sba; 893 pthread_t pt; 894 uint64_t reqno; 895 int retries = 0; 896 897 reqno = spc->spc_hdr.rsp_reqno; 898 while ((sba = malloc(sizeof(*sba))) == NULL) { 899 if (nworker == 0 || retries > 10) { 900 send_error_resp(spc, reqno, RUMPSP_ERR_TRYAGAIN); 901 spcfreebuf(spc); 902 return; 903 } 904 /* slim chance of more memory? */ 905 usleep(10000); 906 } 907 908 sba->sba_spc = spc; 909 sba->sba_type = sba_type; 910 sba->sba_hdr = spc->spc_hdr; 911 sba->sba_data = spc->spc_buf; 912 spcresetbuf(spc); 913 914 spcref(spc); 915 916 pthread_mutex_lock(&sbamtx); 917 TAILQ_INSERT_TAIL(&wrklist, sba, sba_entries); 918 nwork++; 919 if (nwork <= idleworker) { 920 /* do we have a daemon's tool (i.e. idle threads)? */ 921 pthread_cond_signal(&sbacv); 922 } else if (nworker < rumpsp_maxworker) { 923 /* 924 * Else, need to create one 925 * (if we can, otherwise just expect another 926 * worker to pick up the syscall) 927 */ 928 if (pthread_create(&pt, &pattr_detached, 929 serv_workbouncer, NULL) == 0) { 930 nworker++; 931 } 932 } 933 pthread_mutex_unlock(&sbamtx); 934 } 935 936 /* 937 * 938 * Startup routines and mainloop for server. 939 * 940 */ 941 942 struct spservarg { 943 int sps_sock; 944 connecthook_fn sps_connhook; 945 }; 946 947 static void 948 handlereq(struct spclient *spc) 949 { 950 uint64_t reqno; 951 int error, i; 952 953 reqno = spc->spc_hdr.rsp_reqno; 954 if (__predict_false(spc->spc_state == SPCSTATE_NEW)) { 955 if (spc->spc_hdr.rsp_type != RUMPSP_HANDSHAKE) { 956 send_error_resp(spc, reqno, RUMPSP_ERR_AUTH); 957 shutdown(spc->spc_fd, SHUT_RDWR); 958 spcfreebuf(spc); 959 return; 960 } 961 962 if (spc->spc_hdr.rsp_handshake == HANDSHAKE_GUEST) { 963 char *comm = (char *)spc->spc_buf; 964 size_t commlen = spc->spc_hdr.rsp_len - HDRSZ; 965 966 /* ensure it's 0-terminated */ 967 /* XXX make sure it contains sensible chars? */ 968 comm[commlen] = '\0'; 969 970 if ((error = lwproc_rfork(spc, 971 RUMP_RFCFDG, comm)) != 0) { 972 shutdown(spc->spc_fd, SHUT_RDWR); 973 } 974 975 spcfreebuf(spc); 976 if (error) 977 return; 978 979 spc->spc_mainlwp = lwproc_curlwp(); 980 981 send_handshake_resp(spc, reqno, 0); 982 } else if (spc->spc_hdr.rsp_handshake == HANDSHAKE_FORK) { 983 struct lwp *tmpmain; 984 struct prefork *pf; 985 struct handshake_fork *rfp; 986 int cancel; 987 988 if (spc->spc_off-HDRSZ != sizeof(*rfp)) { 989 send_error_resp(spc, reqno, 990 RUMPSP_ERR_MALFORMED_REQUEST); 991 shutdown(spc->spc_fd, SHUT_RDWR); 992 spcfreebuf(spc); 993 return; 994 } 995 996 /*LINTED*/ 997 rfp = (void *)spc->spc_buf; 998 cancel = rfp->rf_cancel; 999 1000 pthread_mutex_lock(&pfmtx); 1001 LIST_FOREACH(pf, &preforks, pf_entries) { 1002 if (memcmp(rfp->rf_auth, pf->pf_auth, 1003 sizeof(rfp->rf_auth)) == 0) { 1004 LIST_REMOVE(pf, pf_entries); 1005 LIST_REMOVE(pf, pf_spcentries); 1006 break; 1007 } 1008 } 1009 pthread_mutex_lock(&pfmtx); 1010 spcfreebuf(spc); 1011 1012 if (!pf) { 1013 send_error_resp(spc, reqno, 1014 RUMPSP_ERR_INVALID_PREFORK); 1015 shutdown(spc->spc_fd, SHUT_RDWR); 1016 return; 1017 } 1018 1019 tmpmain = pf->pf_lwp; 1020 free(pf); 1021 lwproc_switch(tmpmain); 1022 if (cancel) { 1023 lwproc_release(); 1024 shutdown(spc->spc_fd, SHUT_RDWR); 1025 return; 1026 } 1027 1028 /* 1029 * So, we forked already during "prefork" to save 1030 * the file descriptors from a parent exit 1031 * race condition. But now we need to fork 1032 * a second time since the initial fork has 1033 * the wrong spc pointer. (yea, optimize 1034 * interfaces some day if anyone cares) 1035 */ 1036 if ((error = lwproc_rfork(spc, 0, NULL)) != 0) { 1037 send_error_resp(spc, reqno, 1038 RUMPSP_ERR_RFORK_FAILED); 1039 shutdown(spc->spc_fd, SHUT_RDWR); 1040 lwproc_release(); 1041 return; 1042 } 1043 spc->spc_mainlwp = lwproc_curlwp(); 1044 lwproc_switch(tmpmain); 1045 lwproc_release(); 1046 lwproc_switch(spc->spc_mainlwp); 1047 1048 send_handshake_resp(spc, reqno, 0); 1049 } else { 1050 send_error_resp(spc, reqno, RUMPSP_ERR_AUTH); 1051 shutdown(spc->spc_fd, SHUT_RDWR); 1052 spcfreebuf(spc); 1053 return; 1054 } 1055 1056 spc->spc_pid = lwproc_getpid(); 1057 1058 DPRINTF(("rump_sp: handshake for client %p complete, pid %d\n", 1059 spc, spc->spc_pid)); 1060 1061 lwproc_switch(NULL); 1062 spc->spc_state = SPCSTATE_RUNNING; 1063 return; 1064 } 1065 1066 if (__predict_false(spc->spc_hdr.rsp_type == RUMPSP_PREFORK)) { 1067 struct prefork *pf; 1068 uint32_t auth[AUTHLEN]; 1069 int inexec; 1070 1071 DPRINTF(("rump_sp: prefork handler executing for %p\n", spc)); 1072 spcfreebuf(spc); 1073 1074 pthread_mutex_lock(&spc->spc_mtx); 1075 inexec = spc->spc_inexec; 1076 pthread_mutex_unlock(&spc->spc_mtx); 1077 if (inexec) { 1078 send_error_resp(spc, reqno, RUMPSP_ERR_INEXEC); 1079 shutdown(spc->spc_fd, SHUT_RDWR); 1080 return; 1081 } 1082 1083 pf = malloc(sizeof(*pf)); 1084 if (pf == NULL) { 1085 send_error_resp(spc, reqno, RUMPSP_ERR_NOMEM); 1086 return; 1087 } 1088 1089 /* 1090 * Use client main lwp to fork. this is never used by 1091 * worker threads (except in exec, but we checked for that 1092 * above) so we can safely use it here. 1093 */ 1094 lwproc_switch(spc->spc_mainlwp); 1095 if ((error = lwproc_rfork(spc, RUMP_RFFDG, NULL)) != 0) { 1096 DPRINTF(("rump_sp: fork failed: %d (%p)\n",error, spc)); 1097 send_error_resp(spc, reqno, RUMPSP_ERR_RFORK_FAILED); 1098 lwproc_switch(NULL); 1099 free(pf); 1100 return; 1101 } 1102 1103 /* Ok, we have a new process context and a new curlwp */ 1104 for (i = 0; i < AUTHLEN; i++) { 1105 pf->pf_auth[i] = auth[i] = arc4random(); 1106 } 1107 pf->pf_lwp = lwproc_curlwp(); 1108 lwproc_switch(NULL); 1109 1110 pthread_mutex_lock(&pfmtx); 1111 LIST_INSERT_HEAD(&preforks, pf, pf_entries); 1112 LIST_INSERT_HEAD(&spc->spc_pflist, pf, pf_spcentries); 1113 pthread_mutex_unlock(&pfmtx); 1114 1115 DPRINTF(("rump_sp: prefork handler success %p\n", spc)); 1116 1117 send_prefork_resp(spc, reqno, auth); 1118 return; 1119 } 1120 1121 if (__predict_false(spc->spc_hdr.rsp_type == RUMPSP_HANDSHAKE)) { 1122 int inexec; 1123 1124 if (spc->spc_hdr.rsp_handshake != HANDSHAKE_EXEC) { 1125 send_error_resp(spc, reqno, 1126 RUMPSP_ERR_MALFORMED_REQUEST); 1127 shutdown(spc->spc_fd, SHUT_RDWR); 1128 spcfreebuf(spc); 1129 return; 1130 } 1131 1132 pthread_mutex_lock(&spc->spc_mtx); 1133 inexec = spc->spc_inexec; 1134 pthread_mutex_unlock(&spc->spc_mtx); 1135 if (inexec) { 1136 send_error_resp(spc, reqno, RUMPSP_ERR_INEXEC); 1137 shutdown(spc->spc_fd, SHUT_RDWR); 1138 spcfreebuf(spc); 1139 return; 1140 } 1141 1142 pthread_mutex_lock(&spc->spc_mtx); 1143 spc->spc_inexec = 1; 1144 pthread_mutex_unlock(&spc->spc_mtx); 1145 1146 /* 1147 * start to drain lwps. we will wait for it to finish 1148 * in another thread 1149 */ 1150 lwproc_switch(spc->spc_mainlwp); 1151 lwproc_lwpexit(); 1152 lwproc_switch(NULL); 1153 1154 /* 1155 * exec has to wait for lwps to drain, so finish it off 1156 * in another thread 1157 */ 1158 schedulework(spc, SBA_EXEC); 1159 return; 1160 } 1161 1162 if (__predict_false(spc->spc_hdr.rsp_type != RUMPSP_SYSCALL)) { 1163 send_error_resp(spc, reqno, RUMPSP_ERR_MALFORMED_REQUEST); 1164 spcfreebuf(spc); 1165 return; 1166 } 1167 1168 schedulework(spc, SBA_SYSCALL); 1169 } 1170 1171 static void * 1172 spserver(void *arg) 1173 { 1174 struct spservarg *sarg = arg; 1175 struct spclient *spc; 1176 unsigned idx; 1177 int seen; 1178 int rv; 1179 unsigned int nfds, maxidx; 1180 1181 for (idx = 0; idx < MAXCLI; idx++) { 1182 pfdlist[idx].fd = -1; 1183 pfdlist[idx].events = POLLIN; 1184 1185 spc = &spclist[idx]; 1186 pthread_mutex_init(&spc->spc_mtx, NULL); 1187 pthread_cond_init(&spc->spc_cv, NULL); 1188 spc->spc_fd = -1; 1189 } 1190 pfdlist[0].fd = spclist[0].spc_fd = sarg->sps_sock; 1191 pfdlist[0].events = POLLIN; 1192 nfds = 1; 1193 maxidx = 0; 1194 1195 pthread_attr_init(&pattr_detached); 1196 pthread_attr_setdetachstate(&pattr_detached, PTHREAD_CREATE_DETACHED); 1197 #if NOTYET 1198 pthread_attr_setstacksize(&pattr_detached, 32*1024); 1199 #endif 1200 1201 pthread_mutex_init(&sbamtx, NULL); 1202 pthread_cond_init(&sbacv, NULL); 1203 1204 DPRINTF(("rump_sp: server mainloop\n")); 1205 1206 for (;;) { 1207 int discoed; 1208 1209 /* g/c hangarounds (eventually) */ 1210 discoed = getdisco(); 1211 while (discoed--) { 1212 nfds--; 1213 idx = maxidx; 1214 while (idx) { 1215 if (pfdlist[idx].fd != -1) { 1216 maxidx = idx; 1217 break; 1218 } 1219 idx--; 1220 } 1221 DPRINTF(("rump_sp: set maxidx to [%u]\n", 1222 maxidx)); 1223 } 1224 1225 DPRINTF(("rump_sp: loop nfd %d\n", maxidx+1)); 1226 seen = 0; 1227 rv = poll(pfdlist, maxidx+1, INFTIM); 1228 assert(maxidx+1 <= MAXCLI); 1229 assert(rv != 0); 1230 if (rv == -1) { 1231 if (errno == EINTR) 1232 continue; 1233 fprintf(stderr, "rump_spserver: poll returned %d\n", 1234 errno); 1235 break; 1236 } 1237 1238 for (idx = 0; seen < rv && idx < MAXCLI; idx++) { 1239 if ((pfdlist[idx].revents & POLLIN) == 0) 1240 continue; 1241 1242 seen++; 1243 DPRINTF(("rump_sp: activity at [%u] %d/%d\n", 1244 idx, seen, rv)); 1245 if (idx > 0) { 1246 spc = &spclist[idx]; 1247 DPRINTF(("rump_sp: mainloop read [%u]\n", idx)); 1248 switch (readframe(spc)) { 1249 case 0: 1250 break; 1251 case -1: 1252 serv_handledisco(idx); 1253 break; 1254 default: 1255 switch (spc->spc_hdr.rsp_class) { 1256 case RUMPSP_RESP: 1257 kickwaiter(spc); 1258 break; 1259 case RUMPSP_REQ: 1260 handlereq(spc); 1261 break; 1262 default: 1263 send_error_resp(spc, 1264 spc->spc_hdr.rsp_reqno, 1265 RUMPSP_ERR_MALFORMED_REQUEST); 1266 spcfreebuf(spc); 1267 break; 1268 } 1269 break; 1270 } 1271 1272 } else { 1273 DPRINTF(("rump_sp: mainloop new connection\n")); 1274 1275 if (__predict_false(spfini)) { 1276 close(spclist[0].spc_fd); 1277 serv_shutdown(); 1278 goto out; 1279 } 1280 1281 idx = serv_handleconn(pfdlist[0].fd, 1282 sarg->sps_connhook, nfds == MAXCLI); 1283 if (idx) 1284 nfds++; 1285 if (idx > maxidx) 1286 maxidx = idx; 1287 DPRINTF(("rump_sp: maxid now %d\n", maxidx)); 1288 } 1289 } 1290 } 1291 1292 out: 1293 return NULL; 1294 } 1295 1296 static unsigned cleanupidx; 1297 static struct sockaddr *cleanupsa; 1298 int 1299 rumpuser_sp_init(const char *url, const struct rumpuser_sp_ops *spopsp, 1300 const char *ostype, const char *osrelease, const char *machine) 1301 { 1302 pthread_t pt; 1303 struct spservarg *sarg; 1304 struct sockaddr *sap; 1305 char *p; 1306 unsigned idx; 1307 int error, s; 1308 1309 p = strdup(url); 1310 if (p == NULL) 1311 return ENOMEM; 1312 error = parseurl(p, &sap, &idx, 1); 1313 free(p); 1314 if (error) 1315 return error; 1316 1317 snprintf(banner, sizeof(banner), "RUMPSP-%d.%d-%s-%s/%s\n", 1318 PROTOMAJOR, PROTOMINOR, ostype, osrelease, machine); 1319 1320 s = socket(parsetab[idx].domain, SOCK_STREAM, 0); 1321 if (s == -1) 1322 return errno; 1323 1324 spops = *spopsp; 1325 sarg = malloc(sizeof(*sarg)); 1326 if (sarg == NULL) { 1327 close(s); 1328 return ENOMEM; 1329 } 1330 1331 sarg->sps_sock = s; 1332 sarg->sps_connhook = parsetab[idx].connhook; 1333 1334 cleanupidx = idx; 1335 cleanupsa = sap; 1336 1337 /* sloppy error recovery */ 1338 1339 /*LINTED*/ 1340 if (bind(s, sap, parsetab[idx].slen) == -1) { 1341 fprintf(stderr, "rump_sp: server bind failed\n"); 1342 return errno; 1343 } 1344 1345 if (listen(s, MAXCLI) == -1) { 1346 fprintf(stderr, "rump_sp: server listen failed\n"); 1347 return errno; 1348 } 1349 1350 if ((error = pthread_create(&pt, NULL, spserver, sarg)) != 0) { 1351 fprintf(stderr, "rump_sp: cannot create wrkr thread\n"); 1352 return errno; 1353 } 1354 pthread_detach(pt); 1355 1356 return 0; 1357 } 1358 1359 void 1360 rumpuser_sp_fini(void *arg) 1361 { 1362 struct spclient *spc = arg; 1363 register_t retval[2] = {0, 0}; 1364 1365 if (spclist[0].spc_fd) { 1366 parsetab[cleanupidx].cleanup(cleanupsa); 1367 } 1368 1369 /* 1370 * stuff response into the socket, since this process is just 1371 * about to exit 1372 */ 1373 if (spc && spc->spc_syscallreq) 1374 send_syscall_resp(spc, spc->spc_syscallreq, 0, retval); 1375 1376 if (spclist[0].spc_fd) { 1377 shutdown(spclist[0].spc_fd, SHUT_RDWR); 1378 spfini = 1; 1379 } 1380 } 1381