1 /* $NetBSD: rumpuser_sp.c,v 1.42 2011/02/15 16:10:41 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * Sysproxy routines. This provides system RPC support over host sockets. 30 * The most notable limitation is that the client and server must share 31 * the same ABI. This does not mean that they have to be the same 32 * machine or that they need to run the same version of the host OS, 33 * just that they must agree on the data structures. This even *might* 34 * work correctly from one hardware architecture to another. 35 */ 36 37 #include <sys/cdefs.h> 38 __RCSID("$NetBSD: rumpuser_sp.c,v 1.42 2011/02/15 16:10:41 pooka Exp $"); 39 40 #include <sys/types.h> 41 #include <sys/atomic.h> 42 #include <sys/mman.h> 43 #include <sys/socket.h> 44 45 #include <arpa/inet.h> 46 #include <netinet/in.h> 47 #include <netinet/tcp.h> 48 49 #include <assert.h> 50 #include <errno.h> 51 #include <fcntl.h> 52 #include <poll.h> 53 #include <pthread.h> 54 #include <stdarg.h> 55 #include <stdio.h> 56 #include <stdlib.h> 57 #include <string.h> 58 #include <unistd.h> 59 60 #include <rump/rump.h> /* XXX: for rfork flags */ 61 #include <rump/rumpuser.h> 62 #include "rumpuser_int.h" 63 64 #include "sp_common.c" 65 66 #ifndef MAXCLI 67 #define MAXCLI 256 68 #endif 69 #ifndef MAXWORKER 70 #define MAXWORKER 128 71 #endif 72 #ifndef IDLEWORKER 73 #define IDLEWORKER 16 74 #endif 75 int rumpsp_maxworker = MAXWORKER; 76 int rumpsp_idleworker = IDLEWORKER; 77 78 static struct pollfd pfdlist[MAXCLI]; 79 static struct spclient spclist[MAXCLI]; 80 static unsigned int disco; 81 static volatile int spfini; 82 83 static struct rumpuser_sp_ops spops; 84 85 static char banner[MAXBANNER]; 86 87 #define PROTOMAJOR 0 88 #define PROTOMINOR 3 89 90 struct prefork { 91 uint32_t pf_auth[AUTHLEN]; 92 struct lwp *pf_lwp; 93 94 LIST_ENTRY(prefork) pf_entries; /* global list */ 95 LIST_ENTRY(prefork) pf_spcentries; /* linked from forking spc */ 96 }; 97 static LIST_HEAD(, prefork) preforks = LIST_HEAD_INITIALIZER(preforks); 98 static pthread_mutex_t pfmtx; 99 100 /* 101 * This version is for the server. It's optimized for multiple threads 102 * and is *NOT* reentrant wrt to signals. 103 */ 104 static int 105 waitresp(struct spclient *spc, struct respwait *rw) 106 { 107 int spcstate; 108 int rv = 0; 109 110 pthread_mutex_lock(&spc->spc_mtx); 111 sendunlockl(spc); 112 while (!rw->rw_done && spc->spc_state != SPCSTATE_DYING) { 113 pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx); 114 } 115 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries); 116 spcstate = spc->spc_state; 117 pthread_mutex_unlock(&spc->spc_mtx); 118 119 pthread_cond_destroy(&rw->rw_cv); 120 121 if (rv) 122 return rv; 123 if (spcstate == SPCSTATE_DYING) 124 return ENOTCONN; 125 return rw->rw_error; 126 } 127 128 /* 129 * Manual wrappers, since librump does not have access to the 130 * user namespace wrapped interfaces. 131 */ 132 133 static void 134 lwproc_switch(struct lwp *l) 135 { 136 137 spops.spop_schedule(); 138 spops.spop_lwproc_switch(l); 139 spops.spop_unschedule(); 140 } 141 142 static void 143 lwproc_release(void) 144 { 145 146 spops.spop_schedule(); 147 spops.spop_lwproc_release(); 148 spops.spop_unschedule(); 149 } 150 151 static int 152 lwproc_rfork(struct spclient *spc, int flags, const char *comm) 153 { 154 int rv; 155 156 spops.spop_schedule(); 157 rv = spops.spop_lwproc_rfork(spc, flags, comm); 158 spops.spop_unschedule(); 159 160 return rv; 161 } 162 163 static int 164 lwproc_newlwp(pid_t pid) 165 { 166 int rv; 167 168 spops.spop_schedule(); 169 rv = spops.spop_lwproc_newlwp(pid); 170 spops.spop_unschedule(); 171 172 return rv; 173 } 174 175 static struct lwp * 176 lwproc_curlwp(void) 177 { 178 struct lwp *l; 179 180 spops.spop_schedule(); 181 l = spops.spop_lwproc_curlwp(); 182 spops.spop_unschedule(); 183 184 return l; 185 } 186 187 static pid_t 188 lwproc_getpid(void) 189 { 190 pid_t p; 191 192 spops.spop_schedule(); 193 p = spops.spop_getpid(); 194 spops.spop_unschedule(); 195 196 return p; 197 } 198 static void 199 lwproc_execnotify(const char *comm) 200 { 201 202 spops.spop_schedule(); 203 spops.spop_execnotify(comm); 204 spops.spop_unschedule(); 205 } 206 207 static void 208 lwproc_procexit(void) 209 { 210 211 spops.spop_schedule(); 212 spops.spop_procexit(); 213 spops.spop_unschedule(); 214 } 215 216 static int 217 rumpsyscall(int sysnum, void *data, register_t *retval) 218 { 219 int rv; 220 221 spops.spop_schedule(); 222 rv = spops.spop_syscall(sysnum, data, retval); 223 spops.spop_unschedule(); 224 225 return rv; 226 } 227 228 static uint64_t 229 nextreq(struct spclient *spc) 230 { 231 uint64_t nw; 232 233 pthread_mutex_lock(&spc->spc_mtx); 234 nw = spc->spc_nextreq++; 235 pthread_mutex_unlock(&spc->spc_mtx); 236 237 return nw; 238 } 239 240 static void 241 send_error_resp(struct spclient *spc, uint64_t reqno, int error) 242 { 243 struct rsp_hdr rhdr; 244 245 rhdr.rsp_len = sizeof(rhdr); 246 rhdr.rsp_reqno = reqno; 247 rhdr.rsp_class = RUMPSP_ERROR; 248 rhdr.rsp_type = 0; 249 rhdr.rsp_error = error; 250 251 sendlock(spc); 252 (void)dosend(spc, &rhdr, sizeof(rhdr)); 253 sendunlock(spc); 254 } 255 256 static int 257 send_handshake_resp(struct spclient *spc, uint64_t reqno, int error) 258 { 259 struct rsp_hdr rhdr; 260 int rv; 261 262 rhdr.rsp_len = sizeof(rhdr) + sizeof(error); 263 rhdr.rsp_reqno = reqno; 264 rhdr.rsp_class = RUMPSP_RESP; 265 rhdr.rsp_type = RUMPSP_HANDSHAKE; 266 rhdr.rsp_error = 0; 267 268 sendlock(spc); 269 rv = dosend(spc, &rhdr, sizeof(rhdr)); 270 rv = dosend(spc, &error, sizeof(error)); 271 sendunlock(spc); 272 273 return rv; 274 } 275 276 static int 277 send_syscall_resp(struct spclient *spc, uint64_t reqno, int error, 278 register_t *retval) 279 { 280 struct rsp_hdr rhdr; 281 struct rsp_sysresp sysresp; 282 int rv; 283 284 rhdr.rsp_len = sizeof(rhdr) + sizeof(sysresp); 285 rhdr.rsp_reqno = reqno; 286 rhdr.rsp_class = RUMPSP_RESP; 287 rhdr.rsp_type = RUMPSP_SYSCALL; 288 rhdr.rsp_sysnum = 0; 289 290 sysresp.rsys_error = error; 291 memcpy(sysresp.rsys_retval, retval, sizeof(sysresp.rsys_retval)); 292 293 sendlock(spc); 294 rv = dosend(spc, &rhdr, sizeof(rhdr)); 295 rv = dosend(spc, &sysresp, sizeof(sysresp)); 296 sendunlock(spc); 297 298 return rv; 299 } 300 301 static int 302 send_prefork_resp(struct spclient *spc, uint64_t reqno, uint32_t *auth) 303 { 304 struct rsp_hdr rhdr; 305 int rv; 306 307 rhdr.rsp_len = sizeof(rhdr) + AUTHLEN*sizeof(*auth); 308 rhdr.rsp_reqno = reqno; 309 rhdr.rsp_class = RUMPSP_RESP; 310 rhdr.rsp_type = RUMPSP_PREFORK; 311 rhdr.rsp_sysnum = 0; 312 313 sendlock(spc); 314 rv = dosend(spc, &rhdr, sizeof(rhdr)); 315 rv = dosend(spc, auth, AUTHLEN*sizeof(*auth)); 316 sendunlock(spc); 317 318 return rv; 319 } 320 321 static int 322 copyin_req(struct spclient *spc, const void *remaddr, size_t *dlen, 323 int wantstr, void **resp) 324 { 325 struct rsp_hdr rhdr; 326 struct rsp_copydata copydata; 327 struct respwait rw; 328 int rv; 329 330 DPRINTF(("copyin_req: %zu bytes from %p\n", *dlen, remaddr)); 331 332 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata); 333 rhdr.rsp_class = RUMPSP_REQ; 334 if (wantstr) 335 rhdr.rsp_type = RUMPSP_COPYINSTR; 336 else 337 rhdr.rsp_type = RUMPSP_COPYIN; 338 rhdr.rsp_sysnum = 0; 339 340 copydata.rcp_addr = __UNCONST(remaddr); 341 copydata.rcp_len = *dlen; 342 343 putwait(spc, &rw, &rhdr); 344 rv = dosend(spc, &rhdr, sizeof(rhdr)); 345 rv = dosend(spc, ©data, sizeof(copydata)); 346 if (rv) { 347 unputwait(spc, &rw); 348 return rv; 349 } 350 351 rv = waitresp(spc, &rw); 352 353 DPRINTF(("copyin: response %d\n", rv)); 354 355 *resp = rw.rw_data; 356 if (wantstr) 357 *dlen = rw.rw_dlen; 358 359 return rv; 360 361 } 362 363 static int 364 send_copyout_req(struct spclient *spc, const void *remaddr, 365 const void *data, size_t dlen) 366 { 367 struct rsp_hdr rhdr; 368 struct rsp_copydata copydata; 369 int rv; 370 371 DPRINTF(("copyout_req (async): %zu bytes to %p\n", dlen, remaddr)); 372 373 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata) + dlen; 374 rhdr.rsp_reqno = nextreq(spc); 375 rhdr.rsp_class = RUMPSP_REQ; 376 rhdr.rsp_type = RUMPSP_COPYOUT; 377 rhdr.rsp_sysnum = 0; 378 379 copydata.rcp_addr = __UNCONST(remaddr); 380 copydata.rcp_len = dlen; 381 382 sendlock(spc); 383 rv = dosend(spc, &rhdr, sizeof(rhdr)); 384 rv = dosend(spc, ©data, sizeof(copydata)); 385 rv = dosend(spc, data, dlen); 386 sendunlock(spc); 387 388 return rv; 389 } 390 391 static int 392 anonmmap_req(struct spclient *spc, size_t howmuch, void **resp) 393 { 394 struct rsp_hdr rhdr; 395 struct respwait rw; 396 int rv; 397 398 DPRINTF(("anonmmap_req: %zu bytes\n", howmuch)); 399 400 rhdr.rsp_len = sizeof(rhdr) + sizeof(howmuch); 401 rhdr.rsp_class = RUMPSP_REQ; 402 rhdr.rsp_type = RUMPSP_ANONMMAP; 403 rhdr.rsp_sysnum = 0; 404 405 putwait(spc, &rw, &rhdr); 406 rv = dosend(spc, &rhdr, sizeof(rhdr)); 407 rv = dosend(spc, &howmuch, sizeof(howmuch)); 408 if (rv) { 409 unputwait(spc, &rw); 410 return rv; 411 } 412 413 rv = waitresp(spc, &rw); 414 415 *resp = rw.rw_data; 416 417 DPRINTF(("anonmmap: mapped at %p\n", **(void ***)resp)); 418 419 return rv; 420 } 421 422 static int 423 send_raise_req(struct spclient *spc, int signo) 424 { 425 struct rsp_hdr rhdr; 426 int rv; 427 428 rhdr.rsp_len = sizeof(rhdr); 429 rhdr.rsp_class = RUMPSP_REQ; 430 rhdr.rsp_type = RUMPSP_RAISE; 431 rhdr.rsp_signo = signo; 432 433 sendlock(spc); 434 rv = dosend(spc, &rhdr, sizeof(rhdr)); 435 sendunlock(spc); 436 437 return rv; 438 } 439 440 static void 441 spcref(struct spclient *spc) 442 { 443 444 pthread_mutex_lock(&spc->spc_mtx); 445 spc->spc_refcnt++; 446 pthread_mutex_unlock(&spc->spc_mtx); 447 } 448 449 static void 450 spcrelease(struct spclient *spc) 451 { 452 int ref; 453 454 pthread_mutex_lock(&spc->spc_mtx); 455 ref = --spc->spc_refcnt; 456 pthread_mutex_unlock(&spc->spc_mtx); 457 458 if (ref > 0) 459 return; 460 461 DPRINTF(("rump_sp: spcrelease: spc %p fd %d\n", spc, spc->spc_fd)); 462 463 _DIAGASSERT(TAILQ_EMPTY(&spc->spc_respwait)); 464 _DIAGASSERT(spc->spc_buf == NULL); 465 466 if (spc->spc_mainlwp) { 467 lwproc_switch(spc->spc_mainlwp); 468 lwproc_release(); 469 } 470 spc->spc_mainlwp = NULL; 471 472 close(spc->spc_fd); 473 spc->spc_fd = -1; 474 spc->spc_state = SPCSTATE_NEW; 475 476 atomic_inc_uint(&disco); 477 } 478 479 static void 480 serv_handledisco(unsigned int idx) 481 { 482 struct spclient *spc = &spclist[idx]; 483 484 DPRINTF(("rump_sp: disconnecting [%u]\n", idx)); 485 486 pfdlist[idx].fd = -1; 487 pfdlist[idx].revents = 0; 488 pthread_mutex_lock(&spc->spc_mtx); 489 spc->spc_state = SPCSTATE_DYING; 490 kickall(spc); 491 sendunlockl(spc); 492 pthread_mutex_unlock(&spc->spc_mtx); 493 494 if (spc->spc_mainlwp) { 495 lwproc_switch(spc->spc_mainlwp); 496 lwproc_procexit(); 497 lwproc_switch(NULL); 498 } 499 500 /* 501 * Nobody's going to attempt to send/receive anymore, 502 * so reinit info relevant to that. 503 */ 504 /*LINTED:pointer casts may be ok*/ 505 memset((char *)spc + SPC_ZEROFF, 0, sizeof(*spc) - SPC_ZEROFF); 506 507 spcrelease(spc); 508 } 509 510 static void 511 serv_shutdown(void) 512 { 513 struct spclient *spc; 514 unsigned int i; 515 516 for (i = 1; i < MAXCLI; i++) { 517 spc = &spclist[i]; 518 if (spc->spc_fd == -1) 519 continue; 520 521 shutdown(spc->spc_fd, SHUT_RDWR); 522 serv_handledisco(i); 523 524 spcrelease(spc); 525 } 526 } 527 528 static unsigned 529 serv_handleconn(int fd, connecthook_fn connhook, int busy) 530 { 531 struct sockaddr_storage ss; 532 socklen_t sl = sizeof(ss); 533 int newfd, flags; 534 unsigned i; 535 536 /*LINTED: cast ok */ 537 newfd = accept(fd, (struct sockaddr *)&ss, &sl); 538 if (newfd == -1) 539 return 0; 540 541 if (busy) { 542 close(newfd); /* EBUSY */ 543 return 0; 544 } 545 546 flags = fcntl(newfd, F_GETFL, 0); 547 if (fcntl(newfd, F_SETFL, flags | O_NONBLOCK) == -1) { 548 close(newfd); 549 return 0; 550 } 551 552 if (connhook(newfd) != 0) { 553 close(newfd); 554 return 0; 555 } 556 557 /* write out a banner for the client */ 558 if (send(newfd, banner, strlen(banner), MSG_NOSIGNAL) 559 != (ssize_t)strlen(banner)) { 560 close(newfd); 561 return 0; 562 } 563 564 /* find empty slot the simple way */ 565 for (i = 0; i < MAXCLI; i++) { 566 if (pfdlist[i].fd == -1 && spclist[i].spc_state == SPCSTATE_NEW) 567 break; 568 } 569 570 assert(i < MAXCLI); 571 572 pfdlist[i].fd = newfd; 573 spclist[i].spc_fd = newfd; 574 spclist[i].spc_istatus = SPCSTATUS_BUSY; /* dedicated receiver */ 575 spclist[i].spc_refcnt = 1; 576 577 TAILQ_INIT(&spclist[i].spc_respwait); 578 579 DPRINTF(("rump_sp: added new connection fd %d at idx %u\n", newfd, i)); 580 581 return i; 582 } 583 584 static void 585 serv_handlesyscall(struct spclient *spc, struct rsp_hdr *rhdr, uint8_t *data) 586 { 587 register_t retval[2] = {0, 0}; 588 int rv, sysnum; 589 590 sysnum = (int)rhdr->rsp_sysnum; 591 DPRINTF(("rump_sp: handling syscall %d from client %d\n", 592 sysnum, spc->spc_pid)); 593 594 lwproc_newlwp(spc->spc_pid); 595 spc->spc_syscallreq = rhdr->rsp_reqno; 596 rv = rumpsyscall(sysnum, data, retval); 597 spc->spc_syscallreq = 0; 598 lwproc_release(); 599 600 DPRINTF(("rump_sp: got return value %d & %d/%d\n", 601 rv, retval[0], retval[1])); 602 603 send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval); 604 } 605 606 struct sysbouncearg { 607 struct spclient *sba_spc; 608 struct rsp_hdr sba_hdr; 609 uint8_t *sba_data; 610 611 TAILQ_ENTRY(sysbouncearg) sba_entries; 612 }; 613 static pthread_mutex_t sbamtx; 614 static pthread_cond_t sbacv; 615 static int nworker, idleworker, nwork; 616 static TAILQ_HEAD(, sysbouncearg) syslist = TAILQ_HEAD_INITIALIZER(syslist); 617 618 /*ARGSUSED*/ 619 static void * 620 serv_syscallbouncer(void *arg) 621 { 622 struct sysbouncearg *sba; 623 624 for (;;) { 625 pthread_mutex_lock(&sbamtx); 626 if (__predict_false(idleworker >= rumpsp_idleworker)) { 627 nworker--; 628 pthread_mutex_unlock(&sbamtx); 629 break; 630 } 631 idleworker++; 632 while (TAILQ_EMPTY(&syslist)) { 633 _DIAGASSERT(nwork == 0); 634 pthread_cond_wait(&sbacv, &sbamtx); 635 } 636 idleworker--; 637 638 sba = TAILQ_FIRST(&syslist); 639 TAILQ_REMOVE(&syslist, sba, sba_entries); 640 nwork--; 641 pthread_mutex_unlock(&sbamtx); 642 643 serv_handlesyscall(sba->sba_spc, 644 &sba->sba_hdr, sba->sba_data); 645 spcrelease(sba->sba_spc); 646 free(sba->sba_data); 647 free(sba); 648 } 649 650 return NULL; 651 } 652 653 static int 654 sp_copyin(void *arg, const void *raddr, void *laddr, size_t *len, int wantstr) 655 { 656 struct spclient *spc = arg; 657 void *rdata = NULL; /* XXXuninit */ 658 int rv, nlocks; 659 660 rumpuser__kunlock(0, &nlocks, NULL); 661 662 rv = copyin_req(spc, raddr, len, wantstr, &rdata); 663 if (rv) 664 goto out; 665 666 memcpy(laddr, rdata, *len); 667 free(rdata); 668 669 out: 670 rumpuser__klock(nlocks, NULL); 671 if (rv) 672 return EFAULT; 673 return 0; 674 } 675 676 int 677 rumpuser_sp_copyin(void *arg, const void *raddr, void *laddr, size_t len) 678 { 679 680 return sp_copyin(arg, raddr, laddr, &len, 0); 681 } 682 683 int 684 rumpuser_sp_copyinstr(void *arg, const void *raddr, void *laddr, size_t *len) 685 { 686 687 return sp_copyin(arg, raddr, laddr, len, 1); 688 } 689 690 static int 691 sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen) 692 { 693 struct spclient *spc = arg; 694 int nlocks, rv; 695 696 rumpuser__kunlock(0, &nlocks, NULL); 697 rv = send_copyout_req(spc, raddr, laddr, dlen); 698 rumpuser__klock(nlocks, NULL); 699 700 if (rv) 701 return EFAULT; 702 return 0; 703 } 704 705 int 706 rumpuser_sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen) 707 { 708 709 return sp_copyout(arg, laddr, raddr, dlen); 710 } 711 712 int 713 rumpuser_sp_copyoutstr(void *arg, const void *laddr, void *raddr, size_t *dlen) 714 { 715 716 return sp_copyout(arg, laddr, raddr, *dlen); 717 } 718 719 int 720 rumpuser_sp_anonmmap(void *arg, size_t howmuch, void **addr) 721 { 722 struct spclient *spc = arg; 723 void *resp, *rdata; 724 int nlocks, rv; 725 726 rumpuser__kunlock(0, &nlocks, NULL); 727 728 rv = anonmmap_req(spc, howmuch, &rdata); 729 if (rv) { 730 rv = EFAULT; 731 goto out; 732 } 733 734 resp = *(void **)rdata; 735 free(rdata); 736 737 if (resp == NULL) { 738 rv = ENOMEM; 739 } 740 741 *addr = resp; 742 743 out: 744 rumpuser__klock(nlocks, NULL); 745 746 if (rv) 747 return rv; 748 return 0; 749 } 750 751 int 752 rumpuser_sp_raise(void *arg, int signo) 753 { 754 struct spclient *spc = arg; 755 int rv, nlocks; 756 757 rumpuser__kunlock(0, &nlocks, NULL); 758 rv = send_raise_req(spc, signo); 759 rumpuser__klock(nlocks, NULL); 760 761 return rv; 762 } 763 764 /* 765 * 766 * Startup routines and mainloop for server. 767 * 768 */ 769 770 struct spservarg { 771 int sps_sock; 772 connecthook_fn sps_connhook; 773 }; 774 775 static pthread_attr_t pattr_detached; 776 static void 777 handlereq(struct spclient *spc) 778 { 779 struct sysbouncearg *sba; 780 pthread_t pt; 781 uint64_t reqno; 782 int retries, error, i; 783 784 reqno = spc->spc_hdr.rsp_reqno; 785 if (__predict_false(spc->spc_state == SPCSTATE_NEW)) { 786 if (spc->spc_hdr.rsp_type != RUMPSP_HANDSHAKE) { 787 send_error_resp(spc, reqno, EAUTH); 788 shutdown(spc->spc_fd, SHUT_RDWR); 789 spcfreebuf(spc); 790 return; 791 } 792 793 if (spc->spc_hdr.rsp_handshake == HANDSHAKE_GUEST) { 794 char *comm = (char *)spc->spc_buf; 795 size_t commlen = spc->spc_hdr.rsp_len - HDRSZ; 796 797 /* ensure it's 0-terminated */ 798 /* XXX make sure it contains sensible chars? */ 799 comm[commlen] = '\0'; 800 801 if ((error = lwproc_rfork(spc, 802 RUMP_RFCFDG, comm)) != 0) { 803 shutdown(spc->spc_fd, SHUT_RDWR); 804 } 805 806 spcfreebuf(spc); 807 if (error) 808 return; 809 810 spc->spc_mainlwp = lwproc_curlwp(); 811 812 send_handshake_resp(spc, reqno, 0); 813 } else if (spc->spc_hdr.rsp_handshake == HANDSHAKE_FORK) { 814 struct lwp *tmpmain; 815 struct prefork *pf; 816 struct handshake_fork *rfp; 817 int cancel; 818 819 if (spc->spc_off-HDRSZ != sizeof(*rfp)) { 820 send_error_resp(spc, reqno, EINVAL); 821 shutdown(spc->spc_fd, SHUT_RDWR); 822 spcfreebuf(spc); 823 return; 824 } 825 826 /*LINTED*/ 827 rfp = (void *)spc->spc_buf; 828 cancel = rfp->rf_cancel; 829 830 pthread_mutex_lock(&pfmtx); 831 LIST_FOREACH(pf, &preforks, pf_entries) { 832 if (memcmp(rfp->rf_auth, pf->pf_auth, 833 sizeof(rfp->rf_auth)) == 0) { 834 LIST_REMOVE(pf, pf_entries); 835 LIST_REMOVE(pf, pf_spcentries); 836 break; 837 } 838 } 839 pthread_mutex_lock(&pfmtx); 840 spcfreebuf(spc); 841 842 if (!pf) { 843 send_error_resp(spc, reqno, ESRCH); 844 shutdown(spc->spc_fd, SHUT_RDWR); 845 return; 846 } 847 848 tmpmain = pf->pf_lwp; 849 free(pf); 850 lwproc_switch(tmpmain); 851 if (cancel) { 852 lwproc_release(); 853 shutdown(spc->spc_fd, SHUT_RDWR); 854 return; 855 } 856 857 /* 858 * So, we forked already during "prefork" to save 859 * the file descriptors from a parent exit 860 * race condition. But now we need to fork 861 * a second time since the initial fork has 862 * the wrong spc pointer. (yea, optimize 863 * interfaces some day if anyone cares) 864 */ 865 if ((error = lwproc_rfork(spc, 0, NULL)) != 0) { 866 send_error_resp(spc, reqno, error); 867 shutdown(spc->spc_fd, SHUT_RDWR); 868 lwproc_release(); 869 return; 870 } 871 spc->spc_mainlwp = lwproc_curlwp(); 872 lwproc_switch(tmpmain); 873 lwproc_release(); 874 lwproc_switch(spc->spc_mainlwp); 875 876 send_handshake_resp(spc, reqno, 0); 877 } 878 879 spc->spc_pid = lwproc_getpid(); 880 881 DPRINTF(("rump_sp: handshake for client %p complete, pid %d\n", 882 spc, spc->spc_pid)); 883 884 lwproc_switch(NULL); 885 spc->spc_state = SPCSTATE_RUNNING; 886 return; 887 } 888 889 if (__predict_false(spc->spc_hdr.rsp_type == RUMPSP_PREFORK)) { 890 struct prefork *pf; 891 uint32_t auth[AUTHLEN]; 892 893 DPRINTF(("rump_sp: prefork handler executing for %p\n", spc)); 894 spcfreebuf(spc); 895 896 pf = malloc(sizeof(*pf)); 897 if (pf == NULL) { 898 send_error_resp(spc, reqno, ENOMEM); 899 return; 900 } 901 902 /* 903 * Use client main lwp to fork. this is never used by 904 * worker threads (except if spc refcount goes to 0), 905 * so we can safely use it here. 906 */ 907 lwproc_switch(spc->spc_mainlwp); 908 if ((error = lwproc_rfork(spc, RUMP_RFFDG, NULL)) != 0) { 909 DPRINTF(("rump_sp: fork failed: %d (%p)\n",error, spc)); 910 send_error_resp(spc, reqno, error); 911 lwproc_switch(NULL); 912 free(pf); 913 return; 914 } 915 916 /* Ok, we have a new process context and a new curlwp */ 917 for (i = 0; i < AUTHLEN; i++) { 918 pf->pf_auth[i] = auth[i] = arc4random(); 919 } 920 pf->pf_lwp = lwproc_curlwp(); 921 lwproc_switch(NULL); 922 923 pthread_mutex_lock(&pfmtx); 924 LIST_INSERT_HEAD(&preforks, pf, pf_entries); 925 LIST_INSERT_HEAD(&spc->spc_pflist, pf, pf_spcentries); 926 pthread_mutex_unlock(&pfmtx); 927 928 DPRINTF(("rump_sp: prefork handler success %p\n", spc)); 929 930 send_prefork_resp(spc, reqno, auth); 931 return; 932 } 933 934 if (__predict_false(spc->spc_hdr.rsp_type == RUMPSP_HANDSHAKE)) { 935 char *comm = (char *)spc->spc_buf; 936 size_t commlen = spc->spc_hdr.rsp_len - HDRSZ; 937 938 if (spc->spc_hdr.rsp_handshake != HANDSHAKE_EXEC) { 939 send_error_resp(spc, reqno, EINVAL); 940 spcfreebuf(spc); 941 return; 942 } 943 944 /* ensure it's 0-terminated */ 945 /* XXX make sure it contains sensible chars? */ 946 comm[commlen] = '\0'; 947 948 lwproc_switch(spc->spc_mainlwp); 949 lwproc_execnotify(comm); 950 lwproc_switch(NULL); 951 952 send_handshake_resp(spc, reqno, 0); 953 spcfreebuf(spc); 954 return; 955 } 956 957 if (__predict_false(spc->spc_hdr.rsp_type != RUMPSP_SYSCALL)) { 958 send_error_resp(spc, reqno, EINVAL); 959 spcfreebuf(spc); 960 return; 961 } 962 963 retries = 0; 964 while ((sba = malloc(sizeof(*sba))) == NULL) { 965 if (nworker == 0 || retries > 10) { 966 send_error_resp(spc, reqno, EAGAIN); 967 spcfreebuf(spc); 968 return; 969 } 970 /* slim chance of more memory? */ 971 usleep(10000); 972 } 973 974 sba->sba_spc = spc; 975 sba->sba_hdr = spc->spc_hdr; 976 sba->sba_data = spc->spc_buf; 977 spcresetbuf(spc); 978 979 spcref(spc); 980 981 pthread_mutex_lock(&sbamtx); 982 TAILQ_INSERT_TAIL(&syslist, sba, sba_entries); 983 nwork++; 984 if (nwork <= idleworker) { 985 /* do we have a daemon's tool (i.e. idle threads)? */ 986 pthread_cond_signal(&sbacv); 987 } else if (nworker < rumpsp_maxworker) { 988 /* 989 * Else, need to create one 990 * (if we can, otherwise just expect another 991 * worker to pick up the syscall) 992 */ 993 if (pthread_create(&pt, &pattr_detached, 994 serv_syscallbouncer, NULL) == 0) { 995 nworker++; 996 } 997 } 998 pthread_mutex_unlock(&sbamtx); 999 } 1000 1001 static void * 1002 spserver(void *arg) 1003 { 1004 struct spservarg *sarg = arg; 1005 struct spclient *spc; 1006 unsigned idx; 1007 int seen; 1008 int rv; 1009 unsigned int nfds, maxidx; 1010 1011 for (idx = 0; idx < MAXCLI; idx++) { 1012 pfdlist[idx].fd = -1; 1013 pfdlist[idx].events = POLLIN; 1014 1015 spc = &spclist[idx]; 1016 pthread_mutex_init(&spc->spc_mtx, NULL); 1017 pthread_cond_init(&spc->spc_cv, NULL); 1018 spc->spc_fd = -1; 1019 } 1020 pfdlist[0].fd = spclist[0].spc_fd = sarg->sps_sock; 1021 pfdlist[0].events = POLLIN; 1022 nfds = 1; 1023 maxidx = 0; 1024 1025 pthread_attr_init(&pattr_detached); 1026 pthread_attr_setdetachstate(&pattr_detached, PTHREAD_CREATE_DETACHED); 1027 /* XXX: doesn't stacksize currently work on NetBSD */ 1028 pthread_attr_setstacksize(&pattr_detached, 32*1024); 1029 1030 pthread_mutex_init(&sbamtx, NULL); 1031 pthread_cond_init(&sbacv, NULL); 1032 1033 DPRINTF(("rump_sp: server mainloop\n")); 1034 1035 for (;;) { 1036 int discoed; 1037 1038 /* g/c hangarounds (eventually) */ 1039 discoed = atomic_swap_uint(&disco, 0); 1040 while (discoed--) { 1041 nfds--; 1042 idx = maxidx; 1043 while (idx) { 1044 if (pfdlist[idx].fd != -1) { 1045 maxidx = idx; 1046 break; 1047 } 1048 idx--; 1049 } 1050 DPRINTF(("rump_sp: set maxidx to [%u]\n", 1051 maxidx)); 1052 } 1053 1054 DPRINTF(("rump_sp: loop nfd %d\n", maxidx+1)); 1055 seen = 0; 1056 rv = poll(pfdlist, maxidx+1, INFTIM); 1057 assert(maxidx+1 <= MAXCLI); 1058 assert(rv != 0); 1059 if (rv == -1) { 1060 if (errno == EINTR) 1061 continue; 1062 fprintf(stderr, "rump_spserver: poll returned %d\n", 1063 errno); 1064 break; 1065 } 1066 1067 for (idx = 0; seen < rv && idx < MAXCLI; idx++) { 1068 if ((pfdlist[idx].revents & POLLIN) == 0) 1069 continue; 1070 1071 seen++; 1072 DPRINTF(("rump_sp: activity at [%u] %d/%d\n", 1073 idx, seen, rv)); 1074 if (idx > 0) { 1075 spc = &spclist[idx]; 1076 DPRINTF(("rump_sp: mainloop read [%u]\n", idx)); 1077 switch (readframe(spc)) { 1078 case 0: 1079 break; 1080 case -1: 1081 serv_handledisco(idx); 1082 break; 1083 default: 1084 switch (spc->spc_hdr.rsp_class) { 1085 case RUMPSP_RESP: 1086 kickwaiter(spc); 1087 break; 1088 case RUMPSP_REQ: 1089 handlereq(spc); 1090 break; 1091 default: 1092 send_error_resp(spc, 1093 spc->spc_hdr.rsp_reqno, 1094 ENOENT); 1095 spcfreebuf(spc); 1096 break; 1097 } 1098 break; 1099 } 1100 1101 } else { 1102 DPRINTF(("rump_sp: mainloop new connection\n")); 1103 1104 if (__predict_false(spfini)) { 1105 close(spclist[0].spc_fd); 1106 serv_shutdown(); 1107 goto out; 1108 } 1109 1110 idx = serv_handleconn(pfdlist[0].fd, 1111 sarg->sps_connhook, nfds == MAXCLI); 1112 if (idx) 1113 nfds++; 1114 if (idx > maxidx) 1115 maxidx = idx; 1116 DPRINTF(("rump_sp: maxid now %d\n", maxidx)); 1117 } 1118 } 1119 } 1120 1121 out: 1122 return NULL; 1123 } 1124 1125 static unsigned cleanupidx; 1126 static struct sockaddr *cleanupsa; 1127 int 1128 rumpuser_sp_init(const char *url, const struct rumpuser_sp_ops *spopsp, 1129 const char *ostype, const char *osrelease, const char *machine) 1130 { 1131 pthread_t pt; 1132 struct spservarg *sarg; 1133 struct sockaddr *sap; 1134 char *p; 1135 unsigned idx; 1136 int error, s; 1137 1138 p = strdup(url); 1139 if (p == NULL) 1140 return ENOMEM; 1141 error = parseurl(p, &sap, &idx, 1); 1142 free(p); 1143 if (error) 1144 return error; 1145 1146 snprintf(banner, sizeof(banner), "RUMPSP-%d.%d-%s-%s/%s\n", 1147 PROTOMAJOR, PROTOMINOR, ostype, osrelease, machine); 1148 1149 s = socket(parsetab[idx].domain, SOCK_STREAM, 0); 1150 if (s == -1) 1151 return errno; 1152 1153 spops = *spopsp; 1154 sarg = malloc(sizeof(*sarg)); 1155 if (sarg == NULL) { 1156 close(s); 1157 return ENOMEM; 1158 } 1159 1160 sarg->sps_sock = s; 1161 sarg->sps_connhook = parsetab[idx].connhook; 1162 1163 cleanupidx = idx; 1164 cleanupsa = sap; 1165 1166 /* sloppy error recovery */ 1167 1168 /*LINTED*/ 1169 if (bind(s, sap, sap->sa_len) == -1) { 1170 fprintf(stderr, "rump_sp: server bind failed\n"); 1171 return errno; 1172 } 1173 1174 if (listen(s, MAXCLI) == -1) { 1175 fprintf(stderr, "rump_sp: server listen failed\n"); 1176 return errno; 1177 } 1178 1179 if ((error = pthread_create(&pt, NULL, spserver, sarg)) != 0) { 1180 fprintf(stderr, "rump_sp: cannot create wrkr thread\n"); 1181 return errno; 1182 } 1183 pthread_detach(pt); 1184 1185 return 0; 1186 } 1187 1188 void 1189 rumpuser_sp_fini(void *arg) 1190 { 1191 struct spclient *spc = arg; 1192 register_t retval[2] = {0, 0}; 1193 1194 if (spclist[0].spc_fd) { 1195 parsetab[cleanupidx].cleanup(cleanupsa); 1196 } 1197 1198 /* 1199 * stuff response into the socket, since this process is just 1200 * about to exit 1201 */ 1202 if (spc && spc->spc_syscallreq) 1203 send_syscall_resp(spc, spc->spc_syscallreq, 0, retval); 1204 1205 if (spclist[0].spc_fd) { 1206 shutdown(spclist[0].spc_fd, SHUT_RDWR); 1207 spfini = 1; 1208 } 1209 } 1210