1 /* $NetBSD: rumpuser_sp.c,v 1.45 2011/03/08 15:34:37 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * Sysproxy routines. This provides system RPC support over host sockets. 30 * The most notable limitation is that the client and server must share 31 * the same ABI. This does not mean that they have to be the same 32 * machine or that they need to run the same version of the host OS, 33 * just that they must agree on the data structures. This even *might* 34 * work correctly from one hardware architecture to another. 35 */ 36 37 #include <sys/cdefs.h> 38 __RCSID("$NetBSD: rumpuser_sp.c,v 1.45 2011/03/08 15:34:37 pooka Exp $"); 39 40 #include <sys/types.h> 41 #include <sys/atomic.h> 42 #include <sys/mman.h> 43 #include <sys/socket.h> 44 45 #include <arpa/inet.h> 46 #include <netinet/in.h> 47 #include <netinet/tcp.h> 48 49 #include <assert.h> 50 #include <errno.h> 51 #include <fcntl.h> 52 #include <poll.h> 53 #include <pthread.h> 54 #include <stdarg.h> 55 #include <stdio.h> 56 #include <stdlib.h> 57 #include <string.h> 58 #include <unistd.h> 59 60 #include <rump/rump.h> /* XXX: for rfork flags */ 61 #include <rump/rumpuser.h> 62 #include "rumpuser_int.h" 63 64 #include "sp_common.c" 65 66 #ifndef MAXCLI 67 #define MAXCLI 256 68 #endif 69 #ifndef MAXWORKER 70 #define MAXWORKER 128 71 #endif 72 #ifndef IDLEWORKER 73 #define IDLEWORKER 16 74 #endif 75 int rumpsp_maxworker = MAXWORKER; 76 int rumpsp_idleworker = IDLEWORKER; 77 78 static struct pollfd pfdlist[MAXCLI]; 79 static struct spclient spclist[MAXCLI]; 80 static unsigned int disco; 81 static volatile int spfini; 82 83 static struct rumpuser_sp_ops spops; 84 85 static char banner[MAXBANNER]; 86 87 #define PROTOMAJOR 0 88 #define PROTOMINOR 3 89 90 struct prefork { 91 uint32_t pf_auth[AUTHLEN]; 92 struct lwp *pf_lwp; 93 94 LIST_ENTRY(prefork) pf_entries; /* global list */ 95 LIST_ENTRY(prefork) pf_spcentries; /* linked from forking spc */ 96 }; 97 static LIST_HEAD(, prefork) preforks = LIST_HEAD_INITIALIZER(preforks); 98 static pthread_mutex_t pfmtx; 99 100 /* 101 * This version is for the server. It's optimized for multiple threads 102 * and is *NOT* reentrant wrt to signals. 103 */ 104 static int 105 waitresp(struct spclient *spc, struct respwait *rw) 106 { 107 int spcstate; 108 int rv = 0; 109 110 pthread_mutex_lock(&spc->spc_mtx); 111 sendunlockl(spc); 112 while (!rw->rw_done && spc->spc_state != SPCSTATE_DYING) { 113 pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx); 114 } 115 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries); 116 spcstate = spc->spc_state; 117 pthread_mutex_unlock(&spc->spc_mtx); 118 119 pthread_cond_destroy(&rw->rw_cv); 120 121 if (rv) 122 return rv; 123 if (spcstate == SPCSTATE_DYING) 124 return ENOTCONN; 125 return rw->rw_error; 126 } 127 128 /* 129 * Manual wrappers, since librump does not have access to the 130 * user namespace wrapped interfaces. 131 */ 132 133 static void 134 lwproc_switch(struct lwp *l) 135 { 136 137 spops.spop_schedule(); 138 spops.spop_lwproc_switch(l); 139 spops.spop_unschedule(); 140 } 141 142 static void 143 lwproc_release(void) 144 { 145 146 spops.spop_schedule(); 147 spops.spop_lwproc_release(); 148 spops.spop_unschedule(); 149 } 150 151 static int 152 lwproc_rfork(struct spclient *spc, int flags, const char *comm) 153 { 154 int rv; 155 156 spops.spop_schedule(); 157 rv = spops.spop_lwproc_rfork(spc, flags, comm); 158 spops.spop_unschedule(); 159 160 return rv; 161 } 162 163 static int 164 lwproc_newlwp(pid_t pid) 165 { 166 int rv; 167 168 spops.spop_schedule(); 169 rv = spops.spop_lwproc_newlwp(pid); 170 spops.spop_unschedule(); 171 172 return rv; 173 } 174 175 static struct lwp * 176 lwproc_curlwp(void) 177 { 178 struct lwp *l; 179 180 spops.spop_schedule(); 181 l = spops.spop_lwproc_curlwp(); 182 spops.spop_unschedule(); 183 184 return l; 185 } 186 187 static pid_t 188 lwproc_getpid(void) 189 { 190 pid_t p; 191 192 spops.spop_schedule(); 193 p = spops.spop_getpid(); 194 spops.spop_unschedule(); 195 196 return p; 197 } 198 199 static void 200 lwproc_execnotify(const char *comm) 201 { 202 203 spops.spop_schedule(); 204 spops.spop_execnotify(comm); 205 spops.spop_unschedule(); 206 } 207 208 static void 209 lwproc_lwpexit(void) 210 { 211 212 spops.spop_schedule(); 213 spops.spop_lwpexit(); 214 spops.spop_unschedule(); 215 } 216 217 static int 218 rumpsyscall(int sysnum, void *data, register_t *retval) 219 { 220 int rv; 221 222 spops.spop_schedule(); 223 rv = spops.spop_syscall(sysnum, data, retval); 224 spops.spop_unschedule(); 225 226 return rv; 227 } 228 229 static uint64_t 230 nextreq(struct spclient *spc) 231 { 232 uint64_t nw; 233 234 pthread_mutex_lock(&spc->spc_mtx); 235 nw = spc->spc_nextreq++; 236 pthread_mutex_unlock(&spc->spc_mtx); 237 238 return nw; 239 } 240 241 /* 242 * XXX: we send responses with "blocking" I/O. This is not 243 * ok for the main thread. XXXFIXME 244 */ 245 246 static void 247 send_error_resp(struct spclient *spc, uint64_t reqno, int error) 248 { 249 struct rsp_hdr rhdr; 250 struct iovec iov[1]; 251 252 rhdr.rsp_len = sizeof(rhdr); 253 rhdr.rsp_reqno = reqno; 254 rhdr.rsp_class = RUMPSP_ERROR; 255 rhdr.rsp_type = 0; 256 rhdr.rsp_error = error; 257 258 IOVPUT(iov[0], rhdr); 259 260 sendlock(spc); 261 (void)SENDIOV(spc, iov); 262 sendunlock(spc); 263 } 264 265 static int 266 send_handshake_resp(struct spclient *spc, uint64_t reqno, int error) 267 { 268 struct rsp_hdr rhdr; 269 struct iovec iov[2]; 270 int rv; 271 272 rhdr.rsp_len = sizeof(rhdr) + sizeof(error); 273 rhdr.rsp_reqno = reqno; 274 rhdr.rsp_class = RUMPSP_RESP; 275 rhdr.rsp_type = RUMPSP_HANDSHAKE; 276 rhdr.rsp_error = 0; 277 278 IOVPUT(iov[0], rhdr); 279 IOVPUT(iov[1], error); 280 281 sendlock(spc); 282 rv = SENDIOV(spc, iov); 283 sendunlock(spc); 284 285 return rv; 286 } 287 288 static int 289 send_syscall_resp(struct spclient *spc, uint64_t reqno, int error, 290 register_t *retval) 291 { 292 struct rsp_hdr rhdr; 293 struct rsp_sysresp sysresp; 294 struct iovec iov[2]; 295 int rv; 296 297 rhdr.rsp_len = sizeof(rhdr) + sizeof(sysresp); 298 rhdr.rsp_reqno = reqno; 299 rhdr.rsp_class = RUMPSP_RESP; 300 rhdr.rsp_type = RUMPSP_SYSCALL; 301 rhdr.rsp_sysnum = 0; 302 303 sysresp.rsys_error = error; 304 memcpy(sysresp.rsys_retval, retval, sizeof(sysresp.rsys_retval)); 305 306 IOVPUT(iov[0], rhdr); 307 IOVPUT(iov[1], sysresp); 308 309 sendlock(spc); 310 rv = SENDIOV(spc, iov); 311 sendunlock(spc); 312 313 return rv; 314 } 315 316 static int 317 send_prefork_resp(struct spclient *spc, uint64_t reqno, uint32_t *auth) 318 { 319 struct rsp_hdr rhdr; 320 struct iovec iov[2]; 321 int rv; 322 323 rhdr.rsp_len = sizeof(rhdr) + AUTHLEN*sizeof(*auth); 324 rhdr.rsp_reqno = reqno; 325 rhdr.rsp_class = RUMPSP_RESP; 326 rhdr.rsp_type = RUMPSP_PREFORK; 327 rhdr.rsp_sysnum = 0; 328 329 IOVPUT(iov[0], rhdr); 330 IOVPUT_WITHSIZE(iov[1], auth, AUTHLEN*sizeof(*auth)); 331 332 sendlock(spc); 333 rv = SENDIOV(spc, iov); 334 sendunlock(spc); 335 336 return rv; 337 } 338 339 static int 340 copyin_req(struct spclient *spc, const void *remaddr, size_t *dlen, 341 int wantstr, void **resp) 342 { 343 struct rsp_hdr rhdr; 344 struct rsp_copydata copydata; 345 struct respwait rw; 346 struct iovec iov[2]; 347 int rv; 348 349 DPRINTF(("copyin_req: %zu bytes from %p\n", *dlen, remaddr)); 350 351 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata); 352 rhdr.rsp_class = RUMPSP_REQ; 353 if (wantstr) 354 rhdr.rsp_type = RUMPSP_COPYINSTR; 355 else 356 rhdr.rsp_type = RUMPSP_COPYIN; 357 rhdr.rsp_sysnum = 0; 358 359 copydata.rcp_addr = __UNCONST(remaddr); 360 copydata.rcp_len = *dlen; 361 362 IOVPUT(iov[0], rhdr); 363 IOVPUT(iov[1], copydata); 364 365 putwait(spc, &rw, &rhdr); 366 rv = SENDIOV(spc, iov); 367 if (rv) { 368 unputwait(spc, &rw); 369 return rv; 370 } 371 372 rv = waitresp(spc, &rw); 373 374 DPRINTF(("copyin: response %d\n", rv)); 375 376 *resp = rw.rw_data; 377 if (wantstr) 378 *dlen = rw.rw_dlen; 379 380 return rv; 381 382 } 383 384 static int 385 send_copyout_req(struct spclient *spc, const void *remaddr, 386 const void *data, size_t dlen) 387 { 388 struct rsp_hdr rhdr; 389 struct rsp_copydata copydata; 390 struct iovec iov[3]; 391 int rv; 392 393 DPRINTF(("copyout_req (async): %zu bytes to %p\n", dlen, remaddr)); 394 395 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata) + dlen; 396 rhdr.rsp_reqno = nextreq(spc); 397 rhdr.rsp_class = RUMPSP_REQ; 398 rhdr.rsp_type = RUMPSP_COPYOUT; 399 rhdr.rsp_sysnum = 0; 400 401 copydata.rcp_addr = __UNCONST(remaddr); 402 copydata.rcp_len = dlen; 403 404 IOVPUT(iov[0], rhdr); 405 IOVPUT(iov[1], copydata); 406 IOVPUT_WITHSIZE(iov[2], __UNCONST(data), dlen); 407 408 sendlock(spc); 409 rv = SENDIOV(spc, iov); 410 sendunlock(spc); 411 412 return rv; 413 } 414 415 static int 416 anonmmap_req(struct spclient *spc, size_t howmuch, void **resp) 417 { 418 struct rsp_hdr rhdr; 419 struct respwait rw; 420 struct iovec iov[2]; 421 int rv; 422 423 DPRINTF(("anonmmap_req: %zu bytes\n", howmuch)); 424 425 rhdr.rsp_len = sizeof(rhdr) + sizeof(howmuch); 426 rhdr.rsp_class = RUMPSP_REQ; 427 rhdr.rsp_type = RUMPSP_ANONMMAP; 428 rhdr.rsp_sysnum = 0; 429 430 IOVPUT(iov[0], rhdr); 431 IOVPUT(iov[1], howmuch); 432 433 putwait(spc, &rw, &rhdr); 434 rv = SENDIOV(spc, iov); 435 if (rv) { 436 unputwait(spc, &rw); 437 return rv; 438 } 439 440 rv = waitresp(spc, &rw); 441 442 *resp = rw.rw_data; 443 444 DPRINTF(("anonmmap: mapped at %p\n", **(void ***)resp)); 445 446 return rv; 447 } 448 449 static int 450 send_raise_req(struct spclient *spc, int signo) 451 { 452 struct rsp_hdr rhdr; 453 struct iovec iov[1]; 454 int rv; 455 456 rhdr.rsp_len = sizeof(rhdr); 457 rhdr.rsp_class = RUMPSP_REQ; 458 rhdr.rsp_type = RUMPSP_RAISE; 459 rhdr.rsp_signo = signo; 460 461 IOVPUT(iov[0], rhdr); 462 463 sendlock(spc); 464 rv = SENDIOV(spc, iov); 465 sendunlock(spc); 466 467 return rv; 468 } 469 470 static void 471 spcref(struct spclient *spc) 472 { 473 474 pthread_mutex_lock(&spc->spc_mtx); 475 spc->spc_refcnt++; 476 pthread_mutex_unlock(&spc->spc_mtx); 477 } 478 479 static void 480 spcrelease(struct spclient *spc) 481 { 482 int ref; 483 484 pthread_mutex_lock(&spc->spc_mtx); 485 ref = --spc->spc_refcnt; 486 if (__predict_false(spc->spc_inexec && ref <= 2)) 487 pthread_cond_broadcast(&spc->spc_cv); 488 pthread_mutex_unlock(&spc->spc_mtx); 489 490 if (ref > 0) 491 return; 492 493 DPRINTF(("rump_sp: spcrelease: spc %p fd %d\n", spc, spc->spc_fd)); 494 495 _DIAGASSERT(TAILQ_EMPTY(&spc->spc_respwait)); 496 _DIAGASSERT(spc->spc_buf == NULL); 497 498 if (spc->spc_mainlwp) { 499 lwproc_switch(spc->spc_mainlwp); 500 lwproc_release(); 501 } 502 spc->spc_mainlwp = NULL; 503 504 close(spc->spc_fd); 505 spc->spc_fd = -1; 506 spc->spc_state = SPCSTATE_NEW; 507 508 atomic_inc_uint(&disco); 509 } 510 511 static void 512 serv_handledisco(unsigned int idx) 513 { 514 struct spclient *spc = &spclist[idx]; 515 int dolwpexit; 516 517 DPRINTF(("rump_sp: disconnecting [%u]\n", idx)); 518 519 pfdlist[idx].fd = -1; 520 pfdlist[idx].revents = 0; 521 pthread_mutex_lock(&spc->spc_mtx); 522 spc->spc_state = SPCSTATE_DYING; 523 kickall(spc); 524 sendunlockl(spc); 525 /* exec uses mainlwp in another thread, but also nuked all lwps */ 526 dolwpexit = !spc->spc_inexec; 527 pthread_mutex_unlock(&spc->spc_mtx); 528 529 if (dolwpexit && spc->spc_mainlwp) { 530 lwproc_switch(spc->spc_mainlwp); 531 lwproc_lwpexit(); 532 lwproc_switch(NULL); 533 } 534 535 /* 536 * Nobody's going to attempt to send/receive anymore, 537 * so reinit info relevant to that. 538 */ 539 /*LINTED:pointer casts may be ok*/ 540 memset((char *)spc + SPC_ZEROFF, 0, sizeof(*spc) - SPC_ZEROFF); 541 542 spcrelease(spc); 543 } 544 545 static void 546 serv_shutdown(void) 547 { 548 struct spclient *spc; 549 unsigned int i; 550 551 for (i = 1; i < MAXCLI; i++) { 552 spc = &spclist[i]; 553 if (spc->spc_fd == -1) 554 continue; 555 556 shutdown(spc->spc_fd, SHUT_RDWR); 557 serv_handledisco(i); 558 559 spcrelease(spc); 560 } 561 } 562 563 static unsigned 564 serv_handleconn(int fd, connecthook_fn connhook, int busy) 565 { 566 struct sockaddr_storage ss; 567 socklen_t sl = sizeof(ss); 568 int newfd, flags; 569 unsigned i; 570 571 /*LINTED: cast ok */ 572 newfd = accept(fd, (struct sockaddr *)&ss, &sl); 573 if (newfd == -1) 574 return 0; 575 576 if (busy) { 577 close(newfd); /* EBUSY */ 578 return 0; 579 } 580 581 flags = fcntl(newfd, F_GETFL, 0); 582 if (fcntl(newfd, F_SETFL, flags | O_NONBLOCK) == -1) { 583 close(newfd); 584 return 0; 585 } 586 587 if (connhook(newfd) != 0) { 588 close(newfd); 589 return 0; 590 } 591 592 /* write out a banner for the client */ 593 if (send(newfd, banner, strlen(banner), MSG_NOSIGNAL) 594 != (ssize_t)strlen(banner)) { 595 close(newfd); 596 return 0; 597 } 598 599 /* find empty slot the simple way */ 600 for (i = 0; i < MAXCLI; i++) { 601 if (pfdlist[i].fd == -1 && spclist[i].spc_state == SPCSTATE_NEW) 602 break; 603 } 604 605 assert(i < MAXCLI); 606 607 pfdlist[i].fd = newfd; 608 spclist[i].spc_fd = newfd; 609 spclist[i].spc_istatus = SPCSTATUS_BUSY; /* dedicated receiver */ 610 spclist[i].spc_refcnt = 1; 611 612 TAILQ_INIT(&spclist[i].spc_respwait); 613 614 DPRINTF(("rump_sp: added new connection fd %d at idx %u\n", newfd, i)); 615 616 return i; 617 } 618 619 static void 620 serv_handlesyscall(struct spclient *spc, struct rsp_hdr *rhdr, uint8_t *data) 621 { 622 register_t retval[2] = {0, 0}; 623 int rv, sysnum; 624 625 sysnum = (int)rhdr->rsp_sysnum; 626 DPRINTF(("rump_sp: handling syscall %d from client %d\n", 627 sysnum, spc->spc_pid)); 628 629 if (__predict_false((rv = lwproc_newlwp(spc->spc_pid)) != 0)) { 630 retval[0] = -1; 631 send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval); 632 return; 633 } 634 spc->spc_syscallreq = rhdr->rsp_reqno; 635 rv = rumpsyscall(sysnum, data, retval); 636 spc->spc_syscallreq = 0; 637 lwproc_release(); 638 639 DPRINTF(("rump_sp: got return value %d & %d/%d\n", 640 rv, retval[0], retval[1])); 641 642 send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval); 643 } 644 645 static void 646 serv_handleexec(struct spclient *spc, struct rsp_hdr *rhdr, char *comm) 647 { 648 size_t commlen = rhdr->rsp_len - HDRSZ; 649 650 pthread_mutex_lock(&spc->spc_mtx); 651 /* one for the connection and one for us */ 652 while (spc->spc_refcnt > 2) 653 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx); 654 pthread_mutex_unlock(&spc->spc_mtx); 655 656 /* 657 * ok, all the threads are dead (or one is still alive and 658 * the connection is dead, in which case this doesn't matter 659 * very much). proceed with exec. 660 */ 661 662 /* ensure comm is 0-terminated */ 663 /* TODO: make sure it contains sensible chars? */ 664 comm[commlen] = '\0'; 665 666 lwproc_switch(spc->spc_mainlwp); 667 lwproc_execnotify(comm); 668 lwproc_switch(NULL); 669 670 pthread_mutex_lock(&spc->spc_mtx); 671 spc->spc_inexec = 0; 672 pthread_mutex_unlock(&spc->spc_mtx); 673 send_handshake_resp(spc, rhdr->rsp_reqno, 0); 674 } 675 676 enum sbatype { SBA_SYSCALL, SBA_EXEC }; 677 678 struct servbouncearg { 679 struct spclient *sba_spc; 680 struct rsp_hdr sba_hdr; 681 enum sbatype sba_type; 682 uint8_t *sba_data; 683 684 TAILQ_ENTRY(servbouncearg) sba_entries; 685 }; 686 static pthread_mutex_t sbamtx; 687 static pthread_cond_t sbacv; 688 static int nworker, idleworker, nwork; 689 static TAILQ_HEAD(, servbouncearg) wrklist = TAILQ_HEAD_INITIALIZER(wrklist); 690 691 /*ARGSUSED*/ 692 static void * 693 serv_workbouncer(void *arg) 694 { 695 struct servbouncearg *sba; 696 697 for (;;) { 698 pthread_mutex_lock(&sbamtx); 699 if (__predict_false(idleworker - nwork >= rumpsp_idleworker)) { 700 nworker--; 701 pthread_mutex_unlock(&sbamtx); 702 break; 703 } 704 idleworker++; 705 while (TAILQ_EMPTY(&wrklist)) { 706 _DIAGASSERT(nwork == 0); 707 pthread_cond_wait(&sbacv, &sbamtx); 708 } 709 idleworker--; 710 711 sba = TAILQ_FIRST(&wrklist); 712 TAILQ_REMOVE(&wrklist, sba, sba_entries); 713 nwork--; 714 pthread_mutex_unlock(&sbamtx); 715 716 if (__predict_true(sba->sba_type == SBA_SYSCALL)) { 717 serv_handlesyscall(sba->sba_spc, 718 &sba->sba_hdr, sba->sba_data); 719 } else { 720 _DIAGASSERT(sba->sba_type == SBA_EXEC); 721 serv_handleexec(sba->sba_spc, &sba->sba_hdr, 722 (char *)sba->sba_data); 723 } 724 spcrelease(sba->sba_spc); 725 free(sba->sba_data); 726 free(sba); 727 } 728 729 return NULL; 730 } 731 732 static int 733 sp_copyin(void *arg, const void *raddr, void *laddr, size_t *len, int wantstr) 734 { 735 struct spclient *spc = arg; 736 void *rdata = NULL; /* XXXuninit */ 737 int rv, nlocks; 738 739 rumpuser__kunlock(0, &nlocks, NULL); 740 741 rv = copyin_req(spc, raddr, len, wantstr, &rdata); 742 if (rv) 743 goto out; 744 745 memcpy(laddr, rdata, *len); 746 free(rdata); 747 748 out: 749 rumpuser__klock(nlocks, NULL); 750 if (rv) 751 return EFAULT; 752 return 0; 753 } 754 755 int 756 rumpuser_sp_copyin(void *arg, const void *raddr, void *laddr, size_t len) 757 { 758 759 return sp_copyin(arg, raddr, laddr, &len, 0); 760 } 761 762 int 763 rumpuser_sp_copyinstr(void *arg, const void *raddr, void *laddr, size_t *len) 764 { 765 766 return sp_copyin(arg, raddr, laddr, len, 1); 767 } 768 769 static int 770 sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen) 771 { 772 struct spclient *spc = arg; 773 int nlocks, rv; 774 775 rumpuser__kunlock(0, &nlocks, NULL); 776 rv = send_copyout_req(spc, raddr, laddr, dlen); 777 rumpuser__klock(nlocks, NULL); 778 779 if (rv) 780 return EFAULT; 781 return 0; 782 } 783 784 int 785 rumpuser_sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen) 786 { 787 788 return sp_copyout(arg, laddr, raddr, dlen); 789 } 790 791 int 792 rumpuser_sp_copyoutstr(void *arg, const void *laddr, void *raddr, size_t *dlen) 793 { 794 795 return sp_copyout(arg, laddr, raddr, *dlen); 796 } 797 798 int 799 rumpuser_sp_anonmmap(void *arg, size_t howmuch, void **addr) 800 { 801 struct spclient *spc = arg; 802 void *resp, *rdata; 803 int nlocks, rv; 804 805 rumpuser__kunlock(0, &nlocks, NULL); 806 807 rv = anonmmap_req(spc, howmuch, &rdata); 808 if (rv) { 809 rv = EFAULT; 810 goto out; 811 } 812 813 resp = *(void **)rdata; 814 free(rdata); 815 816 if (resp == NULL) { 817 rv = ENOMEM; 818 } 819 820 *addr = resp; 821 822 out: 823 rumpuser__klock(nlocks, NULL); 824 825 if (rv) 826 return rv; 827 return 0; 828 } 829 830 int 831 rumpuser_sp_raise(void *arg, int signo) 832 { 833 struct spclient *spc = arg; 834 int rv, nlocks; 835 836 rumpuser__kunlock(0, &nlocks, NULL); 837 rv = send_raise_req(spc, signo); 838 rumpuser__klock(nlocks, NULL); 839 840 return rv; 841 } 842 843 static pthread_attr_t pattr_detached; 844 static void 845 schedulework(struct spclient *spc, enum sbatype sba_type) 846 { 847 struct servbouncearg *sba; 848 pthread_t pt; 849 uint64_t reqno; 850 int retries = 0; 851 852 reqno = spc->spc_hdr.rsp_reqno; 853 while ((sba = malloc(sizeof(*sba))) == NULL) { 854 if (nworker == 0 || retries > 10) { 855 send_error_resp(spc, reqno, EAGAIN); 856 spcfreebuf(spc); 857 return; 858 } 859 /* slim chance of more memory? */ 860 usleep(10000); 861 } 862 863 sba->sba_spc = spc; 864 sba->sba_type = sba_type; 865 sba->sba_hdr = spc->spc_hdr; 866 sba->sba_data = spc->spc_buf; 867 spcresetbuf(spc); 868 869 spcref(spc); 870 871 pthread_mutex_lock(&sbamtx); 872 TAILQ_INSERT_TAIL(&wrklist, sba, sba_entries); 873 nwork++; 874 if (nwork <= idleworker) { 875 /* do we have a daemon's tool (i.e. idle threads)? */ 876 pthread_cond_signal(&sbacv); 877 } else if (nworker < rumpsp_maxworker) { 878 /* 879 * Else, need to create one 880 * (if we can, otherwise just expect another 881 * worker to pick up the syscall) 882 */ 883 if (pthread_create(&pt, &pattr_detached, 884 serv_workbouncer, NULL) == 0) { 885 nworker++; 886 } 887 } 888 pthread_mutex_unlock(&sbamtx); 889 } 890 891 /* 892 * 893 * Startup routines and mainloop for server. 894 * 895 */ 896 897 struct spservarg { 898 int sps_sock; 899 connecthook_fn sps_connhook; 900 }; 901 902 static void 903 handlereq(struct spclient *spc) 904 { 905 uint64_t reqno; 906 int error, i; 907 908 reqno = spc->spc_hdr.rsp_reqno; 909 if (__predict_false(spc->spc_state == SPCSTATE_NEW)) { 910 if (spc->spc_hdr.rsp_type != RUMPSP_HANDSHAKE) { 911 send_error_resp(spc, reqno, EAUTH); 912 shutdown(spc->spc_fd, SHUT_RDWR); 913 spcfreebuf(spc); 914 return; 915 } 916 917 if (spc->spc_hdr.rsp_handshake == HANDSHAKE_GUEST) { 918 char *comm = (char *)spc->spc_buf; 919 size_t commlen = spc->spc_hdr.rsp_len - HDRSZ; 920 921 /* ensure it's 0-terminated */ 922 /* XXX make sure it contains sensible chars? */ 923 comm[commlen] = '\0'; 924 925 if ((error = lwproc_rfork(spc, 926 RUMP_RFCFDG, comm)) != 0) { 927 shutdown(spc->spc_fd, SHUT_RDWR); 928 } 929 930 spcfreebuf(spc); 931 if (error) 932 return; 933 934 spc->spc_mainlwp = lwproc_curlwp(); 935 936 send_handshake_resp(spc, reqno, 0); 937 } else if (spc->spc_hdr.rsp_handshake == HANDSHAKE_FORK) { 938 struct lwp *tmpmain; 939 struct prefork *pf; 940 struct handshake_fork *rfp; 941 int cancel; 942 943 if (spc->spc_off-HDRSZ != sizeof(*rfp)) { 944 send_error_resp(spc, reqno, EINVAL); 945 shutdown(spc->spc_fd, SHUT_RDWR); 946 spcfreebuf(spc); 947 return; 948 } 949 950 /*LINTED*/ 951 rfp = (void *)spc->spc_buf; 952 cancel = rfp->rf_cancel; 953 954 pthread_mutex_lock(&pfmtx); 955 LIST_FOREACH(pf, &preforks, pf_entries) { 956 if (memcmp(rfp->rf_auth, pf->pf_auth, 957 sizeof(rfp->rf_auth)) == 0) { 958 LIST_REMOVE(pf, pf_entries); 959 LIST_REMOVE(pf, pf_spcentries); 960 break; 961 } 962 } 963 pthread_mutex_lock(&pfmtx); 964 spcfreebuf(spc); 965 966 if (!pf) { 967 send_error_resp(spc, reqno, ESRCH); 968 shutdown(spc->spc_fd, SHUT_RDWR); 969 return; 970 } 971 972 tmpmain = pf->pf_lwp; 973 free(pf); 974 lwproc_switch(tmpmain); 975 if (cancel) { 976 lwproc_release(); 977 shutdown(spc->spc_fd, SHUT_RDWR); 978 return; 979 } 980 981 /* 982 * So, we forked already during "prefork" to save 983 * the file descriptors from a parent exit 984 * race condition. But now we need to fork 985 * a second time since the initial fork has 986 * the wrong spc pointer. (yea, optimize 987 * interfaces some day if anyone cares) 988 */ 989 if ((error = lwproc_rfork(spc, 0, NULL)) != 0) { 990 send_error_resp(spc, reqno, error); 991 shutdown(spc->spc_fd, SHUT_RDWR); 992 lwproc_release(); 993 return; 994 } 995 spc->spc_mainlwp = lwproc_curlwp(); 996 lwproc_switch(tmpmain); 997 lwproc_release(); 998 lwproc_switch(spc->spc_mainlwp); 999 1000 send_handshake_resp(spc, reqno, 0); 1001 } else { 1002 send_error_resp(spc, reqno, EAUTH); 1003 shutdown(spc->spc_fd, SHUT_RDWR); 1004 spcfreebuf(spc); 1005 return; 1006 } 1007 1008 spc->spc_pid = lwproc_getpid(); 1009 1010 DPRINTF(("rump_sp: handshake for client %p complete, pid %d\n", 1011 spc, spc->spc_pid)); 1012 1013 lwproc_switch(NULL); 1014 spc->spc_state = SPCSTATE_RUNNING; 1015 return; 1016 } 1017 1018 if (__predict_false(spc->spc_hdr.rsp_type == RUMPSP_PREFORK)) { 1019 struct prefork *pf; 1020 uint32_t auth[AUTHLEN]; 1021 int inexec; 1022 1023 DPRINTF(("rump_sp: prefork handler executing for %p\n", spc)); 1024 spcfreebuf(spc); 1025 1026 pthread_mutex_lock(&spc->spc_mtx); 1027 inexec = spc->spc_inexec; 1028 pthread_mutex_unlock(&spc->spc_mtx); 1029 if (inexec) { 1030 send_error_resp(spc, reqno, EBUSY); 1031 shutdown(spc->spc_fd, SHUT_RDWR); 1032 return; 1033 } 1034 1035 pf = malloc(sizeof(*pf)); 1036 if (pf == NULL) { 1037 send_error_resp(spc, reqno, ENOMEM); 1038 return; 1039 } 1040 1041 /* 1042 * Use client main lwp to fork. this is never used by 1043 * worker threads (except in exec, but we checked for that 1044 * above) so we can safely use it here. 1045 */ 1046 lwproc_switch(spc->spc_mainlwp); 1047 if ((error = lwproc_rfork(spc, RUMP_RFFDG, NULL)) != 0) { 1048 DPRINTF(("rump_sp: fork failed: %d (%p)\n",error, spc)); 1049 send_error_resp(spc, reqno, error); 1050 lwproc_switch(NULL); 1051 free(pf); 1052 return; 1053 } 1054 1055 /* Ok, we have a new process context and a new curlwp */ 1056 for (i = 0; i < AUTHLEN; i++) { 1057 pf->pf_auth[i] = auth[i] = arc4random(); 1058 } 1059 pf->pf_lwp = lwproc_curlwp(); 1060 lwproc_switch(NULL); 1061 1062 pthread_mutex_lock(&pfmtx); 1063 LIST_INSERT_HEAD(&preforks, pf, pf_entries); 1064 LIST_INSERT_HEAD(&spc->spc_pflist, pf, pf_spcentries); 1065 pthread_mutex_unlock(&pfmtx); 1066 1067 DPRINTF(("rump_sp: prefork handler success %p\n", spc)); 1068 1069 send_prefork_resp(spc, reqno, auth); 1070 return; 1071 } 1072 1073 if (__predict_false(spc->spc_hdr.rsp_type == RUMPSP_HANDSHAKE)) { 1074 int inexec; 1075 1076 if (spc->spc_hdr.rsp_handshake != HANDSHAKE_EXEC) { 1077 send_error_resp(spc, reqno, EINVAL); 1078 shutdown(spc->spc_fd, SHUT_RDWR); 1079 spcfreebuf(spc); 1080 return; 1081 } 1082 1083 pthread_mutex_lock(&spc->spc_mtx); 1084 inexec = spc->spc_inexec; 1085 pthread_mutex_unlock(&spc->spc_mtx); 1086 if (inexec) { 1087 send_error_resp(spc, reqno, EBUSY); 1088 shutdown(spc->spc_fd, SHUT_RDWR); 1089 spcfreebuf(spc); 1090 return; 1091 } 1092 1093 pthread_mutex_lock(&spc->spc_mtx); 1094 spc->spc_inexec = 1; 1095 pthread_mutex_unlock(&spc->spc_mtx); 1096 1097 /* 1098 * start to drain lwps. we will wait for it to finish 1099 * in another thread 1100 */ 1101 lwproc_switch(spc->spc_mainlwp); 1102 lwproc_lwpexit(); 1103 lwproc_switch(NULL); 1104 1105 /* 1106 * exec has to wait for lwps to drain, so finish it off 1107 * in another thread 1108 */ 1109 schedulework(spc, SBA_EXEC); 1110 return; 1111 } 1112 1113 if (__predict_false(spc->spc_hdr.rsp_type != RUMPSP_SYSCALL)) { 1114 send_error_resp(spc, reqno, EINVAL); 1115 spcfreebuf(spc); 1116 return; 1117 } 1118 1119 schedulework(spc, SBA_SYSCALL); 1120 } 1121 1122 static void * 1123 spserver(void *arg) 1124 { 1125 struct spservarg *sarg = arg; 1126 struct spclient *spc; 1127 unsigned idx; 1128 int seen; 1129 int rv; 1130 unsigned int nfds, maxidx; 1131 1132 for (idx = 0; idx < MAXCLI; idx++) { 1133 pfdlist[idx].fd = -1; 1134 pfdlist[idx].events = POLLIN; 1135 1136 spc = &spclist[idx]; 1137 pthread_mutex_init(&spc->spc_mtx, NULL); 1138 pthread_cond_init(&spc->spc_cv, NULL); 1139 spc->spc_fd = -1; 1140 } 1141 pfdlist[0].fd = spclist[0].spc_fd = sarg->sps_sock; 1142 pfdlist[0].events = POLLIN; 1143 nfds = 1; 1144 maxidx = 0; 1145 1146 pthread_attr_init(&pattr_detached); 1147 pthread_attr_setdetachstate(&pattr_detached, PTHREAD_CREATE_DETACHED); 1148 /* XXX: doesn't stacksize currently work on NetBSD */ 1149 pthread_attr_setstacksize(&pattr_detached, 32*1024); 1150 1151 pthread_mutex_init(&sbamtx, NULL); 1152 pthread_cond_init(&sbacv, NULL); 1153 1154 DPRINTF(("rump_sp: server mainloop\n")); 1155 1156 for (;;) { 1157 int discoed; 1158 1159 /* g/c hangarounds (eventually) */ 1160 discoed = atomic_swap_uint(&disco, 0); 1161 while (discoed--) { 1162 nfds--; 1163 idx = maxidx; 1164 while (idx) { 1165 if (pfdlist[idx].fd != -1) { 1166 maxidx = idx; 1167 break; 1168 } 1169 idx--; 1170 } 1171 DPRINTF(("rump_sp: set maxidx to [%u]\n", 1172 maxidx)); 1173 } 1174 1175 DPRINTF(("rump_sp: loop nfd %d\n", maxidx+1)); 1176 seen = 0; 1177 rv = poll(pfdlist, maxidx+1, INFTIM); 1178 assert(maxidx+1 <= MAXCLI); 1179 assert(rv != 0); 1180 if (rv == -1) { 1181 if (errno == EINTR) 1182 continue; 1183 fprintf(stderr, "rump_spserver: poll returned %d\n", 1184 errno); 1185 break; 1186 } 1187 1188 for (idx = 0; seen < rv && idx < MAXCLI; idx++) { 1189 if ((pfdlist[idx].revents & POLLIN) == 0) 1190 continue; 1191 1192 seen++; 1193 DPRINTF(("rump_sp: activity at [%u] %d/%d\n", 1194 idx, seen, rv)); 1195 if (idx > 0) { 1196 spc = &spclist[idx]; 1197 DPRINTF(("rump_sp: mainloop read [%u]\n", idx)); 1198 switch (readframe(spc)) { 1199 case 0: 1200 break; 1201 case -1: 1202 serv_handledisco(idx); 1203 break; 1204 default: 1205 switch (spc->spc_hdr.rsp_class) { 1206 case RUMPSP_RESP: 1207 kickwaiter(spc); 1208 break; 1209 case RUMPSP_REQ: 1210 handlereq(spc); 1211 break; 1212 default: 1213 send_error_resp(spc, 1214 spc->spc_hdr.rsp_reqno, 1215 ENOENT); 1216 spcfreebuf(spc); 1217 break; 1218 } 1219 break; 1220 } 1221 1222 } else { 1223 DPRINTF(("rump_sp: mainloop new connection\n")); 1224 1225 if (__predict_false(spfini)) { 1226 close(spclist[0].spc_fd); 1227 serv_shutdown(); 1228 goto out; 1229 } 1230 1231 idx = serv_handleconn(pfdlist[0].fd, 1232 sarg->sps_connhook, nfds == MAXCLI); 1233 if (idx) 1234 nfds++; 1235 if (idx > maxidx) 1236 maxidx = idx; 1237 DPRINTF(("rump_sp: maxid now %d\n", maxidx)); 1238 } 1239 } 1240 } 1241 1242 out: 1243 return NULL; 1244 } 1245 1246 static unsigned cleanupidx; 1247 static struct sockaddr *cleanupsa; 1248 int 1249 rumpuser_sp_init(const char *url, const struct rumpuser_sp_ops *spopsp, 1250 const char *ostype, const char *osrelease, const char *machine) 1251 { 1252 pthread_t pt; 1253 struct spservarg *sarg; 1254 struct sockaddr *sap; 1255 char *p; 1256 unsigned idx; 1257 int error, s; 1258 1259 p = strdup(url); 1260 if (p == NULL) 1261 return ENOMEM; 1262 error = parseurl(p, &sap, &idx, 1); 1263 free(p); 1264 if (error) 1265 return error; 1266 1267 snprintf(banner, sizeof(banner), "RUMPSP-%d.%d-%s-%s/%s\n", 1268 PROTOMAJOR, PROTOMINOR, ostype, osrelease, machine); 1269 1270 s = socket(parsetab[idx].domain, SOCK_STREAM, 0); 1271 if (s == -1) 1272 return errno; 1273 1274 spops = *spopsp; 1275 sarg = malloc(sizeof(*sarg)); 1276 if (sarg == NULL) { 1277 close(s); 1278 return ENOMEM; 1279 } 1280 1281 sarg->sps_sock = s; 1282 sarg->sps_connhook = parsetab[idx].connhook; 1283 1284 cleanupidx = idx; 1285 cleanupsa = sap; 1286 1287 /* sloppy error recovery */ 1288 1289 /*LINTED*/ 1290 if (bind(s, sap, sap->sa_len) == -1) { 1291 fprintf(stderr, "rump_sp: server bind failed\n"); 1292 return errno; 1293 } 1294 1295 if (listen(s, MAXCLI) == -1) { 1296 fprintf(stderr, "rump_sp: server listen failed\n"); 1297 return errno; 1298 } 1299 1300 if ((error = pthread_create(&pt, NULL, spserver, sarg)) != 0) { 1301 fprintf(stderr, "rump_sp: cannot create wrkr thread\n"); 1302 return errno; 1303 } 1304 pthread_detach(pt); 1305 1306 return 0; 1307 } 1308 1309 void 1310 rumpuser_sp_fini(void *arg) 1311 { 1312 struct spclient *spc = arg; 1313 register_t retval[2] = {0, 0}; 1314 1315 if (spclist[0].spc_fd) { 1316 parsetab[cleanupidx].cleanup(cleanupsa); 1317 } 1318 1319 /* 1320 * stuff response into the socket, since this process is just 1321 * about to exit 1322 */ 1323 if (spc && spc->spc_syscallreq) 1324 send_syscall_resp(spc, spc->spc_syscallreq, 0, retval); 1325 1326 if (spclist[0].spc_fd) { 1327 shutdown(spclist[0].spc_fd, SHUT_RDWR); 1328 spfini = 1; 1329 } 1330 } 1331