1 /* $NetBSD: sp_common.c,v 1.31 2011/03/08 15:34:37 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * Common client/server sysproxy routines. #included. 30 */ 31 32 #include <sys/cdefs.h> 33 34 #include <sys/types.h> 35 #include <sys/mman.h> 36 #include <sys/queue.h> 37 #include <sys/socket.h> 38 #include <sys/un.h> 39 #include <sys/syslimits.h> 40 41 #include <arpa/inet.h> 42 #include <netinet/in.h> 43 #include <netinet/tcp.h> 44 45 #include <assert.h> 46 #include <errno.h> 47 #include <fcntl.h> 48 #include <inttypes.h> 49 #include <poll.h> 50 #include <pthread.h> 51 #include <stdarg.h> 52 #include <stddef.h> 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <string.h> 56 #include <unistd.h> 57 58 //#define DEBUG 59 #ifdef DEBUG 60 #define DPRINTF(x) mydprintf x 61 static void 62 mydprintf(const char *fmt, ...) 63 { 64 va_list ap; 65 66 va_start(ap, fmt); 67 vfprintf(stderr, fmt, ap); 68 va_end(ap); 69 } 70 #else 71 #define DPRINTF(x) 72 #endif 73 74 #ifndef HOSTOPS 75 #define host_poll poll 76 #define host_read read 77 #define host_sendmsg sendmsg 78 #define host_setsockopt setsockopt 79 #endif 80 81 #define IOVPUT(_io_, _b_) _io_.iov_base = &_b_; _io_.iov_len = sizeof(_b_); 82 #define IOVPUT_WITHSIZE(_io_, _b_, _l_) _io_.iov_base = _b_; _io_.iov_len = _l_; 83 #define SENDIOV(_spc_, _iov_) dosend(_spc_, _iov_, __arraycount(_iov_)) 84 85 /* 86 * Bah, I hate writing on-off-wire conversions in C 87 */ 88 89 enum { RUMPSP_REQ, RUMPSP_RESP, RUMPSP_ERROR }; 90 enum { RUMPSP_HANDSHAKE, 91 RUMPSP_SYSCALL, 92 RUMPSP_COPYIN, RUMPSP_COPYINSTR, 93 RUMPSP_COPYOUT, RUMPSP_COPYOUTSTR, 94 RUMPSP_ANONMMAP, 95 RUMPSP_PREFORK, 96 RUMPSP_RAISE }; 97 98 enum { HANDSHAKE_GUEST, HANDSHAKE_AUTH, HANDSHAKE_FORK, HANDSHAKE_EXEC }; 99 100 #define AUTHLEN 4 /* 128bit fork auth */ 101 102 struct rsp_hdr { 103 uint64_t rsp_len; 104 uint64_t rsp_reqno; 105 uint16_t rsp_class; 106 uint16_t rsp_type; 107 /* 108 * We want this structure 64bit-aligned for typecast fun, 109 * so might as well use the following for something. 110 */ 111 union { 112 uint32_t sysnum; 113 uint32_t error; 114 uint32_t handshake; 115 uint32_t signo; 116 } u; 117 }; 118 #define HDRSZ sizeof(struct rsp_hdr) 119 #define rsp_sysnum u.sysnum 120 #define rsp_error u.error 121 #define rsp_handshake u.handshake 122 #define rsp_signo u.signo 123 124 #define MAXBANNER 96 125 126 /* 127 * Data follows the header. We have two types of structured data. 128 */ 129 130 /* copyin/copyout */ 131 struct rsp_copydata { 132 size_t rcp_len; 133 void *rcp_addr; 134 uint8_t rcp_data[0]; 135 }; 136 137 /* syscall response */ 138 struct rsp_sysresp { 139 int rsys_error; 140 register_t rsys_retval[2]; 141 }; 142 143 struct handshake_fork { 144 uint32_t rf_auth[4]; 145 int rf_cancel; 146 }; 147 148 struct respwait { 149 uint64_t rw_reqno; 150 void *rw_data; 151 size_t rw_dlen; 152 int rw_done; 153 int rw_error; 154 155 pthread_cond_t rw_cv; 156 157 TAILQ_ENTRY(respwait) rw_entries; 158 }; 159 160 struct prefork; 161 struct spclient { 162 int spc_fd; 163 int spc_refcnt; 164 int spc_state; 165 166 pthread_mutex_t spc_mtx; 167 pthread_cond_t spc_cv; 168 169 struct lwp *spc_mainlwp; 170 pid_t spc_pid; 171 172 TAILQ_HEAD(, respwait) spc_respwait; 173 174 /* rest of the fields are zeroed upon disconnect */ 175 #define SPC_ZEROFF offsetof(struct spclient, spc_pfd) 176 struct pollfd *spc_pfd; 177 178 struct rsp_hdr spc_hdr; 179 uint8_t *spc_buf; 180 size_t spc_off; 181 182 uint64_t spc_nextreq; 183 uint64_t spc_syscallreq; 184 uint64_t spc_generation; 185 int spc_ostatus, spc_istatus; 186 int spc_reconnecting; 187 int spc_inexec; 188 189 LIST_HEAD(, prefork) spc_pflist; 190 }; 191 #define SPCSTATUS_FREE 0 192 #define SPCSTATUS_BUSY 1 193 #define SPCSTATUS_WANTED 2 194 195 #define SPCSTATE_NEW 0 196 #define SPCSTATE_RUNNING 1 197 #define SPCSTATE_DYING 2 198 199 typedef int (*addrparse_fn)(const char *, struct sockaddr **, int); 200 typedef int (*connecthook_fn)(int); 201 typedef void (*cleanup_fn)(struct sockaddr *); 202 203 static int readframe(struct spclient *); 204 static void handlereq(struct spclient *); 205 206 static __inline void 207 spcresetbuf(struct spclient *spc) 208 { 209 210 spc->spc_buf = NULL; 211 spc->spc_off = 0; 212 } 213 214 static __inline void 215 spcfreebuf(struct spclient *spc) 216 { 217 218 free(spc->spc_buf); 219 spcresetbuf(spc); 220 } 221 222 static void 223 sendlockl(struct spclient *spc) 224 { 225 226 while (spc->spc_ostatus != SPCSTATUS_FREE) { 227 spc->spc_ostatus = SPCSTATUS_WANTED; 228 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx); 229 } 230 spc->spc_ostatus = SPCSTATUS_BUSY; 231 } 232 233 static void __unused 234 sendlock(struct spclient *spc) 235 { 236 237 pthread_mutex_lock(&spc->spc_mtx); 238 sendlockl(spc); 239 pthread_mutex_unlock(&spc->spc_mtx); 240 } 241 242 static void 243 sendunlockl(struct spclient *spc) 244 { 245 246 if (spc->spc_ostatus == SPCSTATUS_WANTED) 247 pthread_cond_broadcast(&spc->spc_cv); 248 spc->spc_ostatus = SPCSTATUS_FREE; 249 } 250 251 static void 252 sendunlock(struct spclient *spc) 253 { 254 255 pthread_mutex_lock(&spc->spc_mtx); 256 sendunlockl(spc); 257 pthread_mutex_unlock(&spc->spc_mtx); 258 } 259 260 static int 261 dosend(struct spclient *spc, struct iovec *iov, size_t iovlen) 262 { 263 struct msghdr msg; 264 struct pollfd pfd; 265 ssize_t n = 0; 266 int fd = spc->spc_fd; 267 268 pfd.fd = fd; 269 pfd.events = POLLOUT; 270 271 memset(&msg, 0, sizeof(msg)); 272 273 for (;;) { 274 /* not first round? poll */ 275 if (n) { 276 if (host_poll(&pfd, 1, INFTIM) == -1) { 277 if (errno == EINTR) 278 continue; 279 return errno; 280 } 281 } 282 283 msg.msg_iov = iov; 284 msg.msg_iovlen = iovlen; 285 n = host_sendmsg(fd, &msg, MSG_NOSIGNAL); 286 if (n == -1) { 287 if (errno == EPIPE) 288 return ENOTCONN; 289 if (errno != EAGAIN) 290 return errno; 291 continue; 292 } 293 if (n == 0) { 294 return ENOTCONN; 295 } 296 297 /* ok, need to adjust iovec for potential next round */ 298 while (n >= (ssize_t)iov[0].iov_len && iovlen) { 299 n -= iov[0].iov_len; 300 iov++; 301 iovlen--; 302 } 303 304 if (iovlen == 0) { 305 _DIAGASSERT(n == 0); 306 break; 307 } else { 308 iov[0].iov_base = (uint8_t *)iov[0].iov_base + n; 309 iov[0].iov_len -= n; 310 } 311 } 312 313 return 0; 314 } 315 316 static void 317 doputwait(struct spclient *spc, struct respwait *rw, struct rsp_hdr *rhdr) 318 { 319 320 rw->rw_data = NULL; 321 rw->rw_dlen = rw->rw_done = rw->rw_error = 0; 322 pthread_cond_init(&rw->rw_cv, NULL); 323 324 pthread_mutex_lock(&spc->spc_mtx); 325 rw->rw_reqno = rhdr->rsp_reqno = spc->spc_nextreq++; 326 TAILQ_INSERT_TAIL(&spc->spc_respwait, rw, rw_entries); 327 } 328 329 static void __unused 330 putwait_locked(struct spclient *spc, struct respwait *rw, struct rsp_hdr *rhdr) 331 { 332 333 doputwait(spc, rw, rhdr); 334 pthread_mutex_unlock(&spc->spc_mtx); 335 } 336 337 static void 338 putwait(struct spclient *spc, struct respwait *rw, struct rsp_hdr *rhdr) 339 { 340 341 doputwait(spc, rw, rhdr); 342 sendlockl(spc); 343 pthread_mutex_unlock(&spc->spc_mtx); 344 } 345 346 static void 347 dounputwait(struct spclient *spc, struct respwait *rw) 348 { 349 350 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries); 351 pthread_mutex_unlock(&spc->spc_mtx); 352 pthread_cond_destroy(&rw->rw_cv); 353 354 } 355 356 static void __unused 357 unputwait_locked(struct spclient *spc, struct respwait *rw) 358 { 359 360 pthread_mutex_lock(&spc->spc_mtx); 361 dounputwait(spc, rw); 362 } 363 364 static void 365 unputwait(struct spclient *spc, struct respwait *rw) 366 { 367 368 pthread_mutex_lock(&spc->spc_mtx); 369 sendunlockl(spc); 370 371 dounputwait(spc, rw); 372 } 373 374 static void 375 kickwaiter(struct spclient *spc) 376 { 377 struct respwait *rw; 378 int error = 0; 379 380 pthread_mutex_lock(&spc->spc_mtx); 381 TAILQ_FOREACH(rw, &spc->spc_respwait, rw_entries) { 382 if (rw->rw_reqno == spc->spc_hdr.rsp_reqno) 383 break; 384 } 385 if (rw == NULL) { 386 DPRINTF(("no waiter found, invalid reqno %" PRIu64 "?\n", 387 spc->spc_hdr.rsp_reqno)); 388 pthread_mutex_unlock(&spc->spc_mtx); 389 spcfreebuf(spc); 390 return; 391 } 392 DPRINTF(("rump_sp: client %p woke up waiter at %p\n", spc, rw)); 393 rw->rw_data = spc->spc_buf; 394 rw->rw_done = 1; 395 rw->rw_dlen = (size_t)(spc->spc_off - HDRSZ); 396 if (spc->spc_hdr.rsp_class == RUMPSP_ERROR) { 397 error = rw->rw_error = spc->spc_hdr.rsp_error; 398 } 399 pthread_cond_signal(&rw->rw_cv); 400 pthread_mutex_unlock(&spc->spc_mtx); 401 402 if (error) 403 spcfreebuf(spc); 404 else 405 spcresetbuf(spc); 406 } 407 408 static void 409 kickall(struct spclient *spc) 410 { 411 struct respwait *rw; 412 413 /* DIAGASSERT(mutex_owned(spc_lock)) */ 414 TAILQ_FOREACH(rw, &spc->spc_respwait, rw_entries) 415 pthread_cond_broadcast(&rw->rw_cv); 416 } 417 418 static int 419 readframe(struct spclient *spc) 420 { 421 int fd = spc->spc_fd; 422 size_t left; 423 size_t framelen; 424 ssize_t n; 425 426 /* still reading header? */ 427 if (spc->spc_off < HDRSZ) { 428 DPRINTF(("rump_sp: readframe getting header at offset %zu\n", 429 spc->spc_off)); 430 431 left = HDRSZ - spc->spc_off; 432 /*LINTED: cast ok */ 433 n = host_read(fd, (uint8_t*)&spc->spc_hdr + spc->spc_off, left); 434 if (n == 0) { 435 return -1; 436 } 437 if (n == -1) { 438 if (errno == EAGAIN) 439 return 0; 440 return -1; 441 } 442 443 spc->spc_off += n; 444 if (spc->spc_off < HDRSZ) { 445 return 0; 446 } 447 448 /*LINTED*/ 449 framelen = spc->spc_hdr.rsp_len; 450 451 if (framelen < HDRSZ) { 452 return -1; 453 } else if (framelen == HDRSZ) { 454 return 1; 455 } 456 457 spc->spc_buf = malloc(framelen - HDRSZ); 458 if (spc->spc_buf == NULL) { 459 return -1; 460 } 461 memset(spc->spc_buf, 0, framelen - HDRSZ); 462 463 /* "fallthrough" */ 464 } else { 465 /*LINTED*/ 466 framelen = spc->spc_hdr.rsp_len; 467 } 468 469 left = framelen - spc->spc_off; 470 471 DPRINTF(("rump_sp: readframe getting body at offset %zu, left %zu\n", 472 spc->spc_off, left)); 473 474 if (left == 0) 475 return 1; 476 n = host_read(fd, spc->spc_buf + (spc->spc_off - HDRSZ), left); 477 if (n == 0) { 478 return -1; 479 } 480 if (n == -1) { 481 if (errno == EAGAIN) 482 return 0; 483 return -1; 484 } 485 spc->spc_off += n; 486 left -= n; 487 488 /* got everything? */ 489 if (left == 0) 490 return 1; 491 else 492 return 0; 493 } 494 495 static int 496 tcp_parse(const char *addr, struct sockaddr **sa, int allow_wildcard) 497 { 498 struct sockaddr_in sin; 499 char buf[64]; 500 const char *p; 501 size_t l; 502 int port; 503 504 memset(&sin, 0, sizeof(sin)); 505 sin.sin_len = sizeof(sin); 506 sin.sin_family = AF_INET; 507 508 p = strchr(addr, ':'); 509 if (!p) { 510 fprintf(stderr, "rump_sp_tcp: missing port specifier\n"); 511 return EINVAL; 512 } 513 514 l = p - addr; 515 if (l > sizeof(buf)-1) { 516 fprintf(stderr, "rump_sp_tcp: address too long\n"); 517 return EINVAL; 518 } 519 strncpy(buf, addr, l); 520 buf[l] = '\0'; 521 522 /* special INADDR_ANY treatment */ 523 if (strcmp(buf, "*") == 0 || strcmp(buf, "0") == 0) { 524 sin.sin_addr.s_addr = INADDR_ANY; 525 } else { 526 switch (inet_pton(AF_INET, buf, &sin.sin_addr)) { 527 case 1: 528 break; 529 case 0: 530 fprintf(stderr, "rump_sp_tcp: cannot parse %s\n", buf); 531 return EINVAL; 532 case -1: 533 fprintf(stderr, "rump_sp_tcp: inet_pton failed\n"); 534 return errno; 535 default: 536 assert(/*CONSTCOND*/0); 537 return EINVAL; 538 } 539 } 540 541 if (!allow_wildcard && sin.sin_addr.s_addr == INADDR_ANY) { 542 fprintf(stderr, "rump_sp_tcp: client needs !INADDR_ANY\n"); 543 return EINVAL; 544 } 545 546 /* advance to port number & parse */ 547 p++; 548 l = strspn(p, "0123456789"); 549 if (l == 0) { 550 fprintf(stderr, "rump_sp_tcp: port now found: %s\n", p); 551 return EINVAL; 552 } 553 strncpy(buf, p, l); 554 buf[l] = '\0'; 555 556 if (*(p+l) != '/' && *(p+l) != '\0') { 557 fprintf(stderr, "rump_sp_tcp: junk at end of port: %s\n", addr); 558 return EINVAL; 559 } 560 561 port = atoi(buf); 562 if (port < 0 || port >= (1<<(8*sizeof(in_port_t)))) { 563 fprintf(stderr, "rump_sp_tcp: port %d out of range\n", port); 564 return ERANGE; 565 } 566 sin.sin_port = htons(port); 567 568 *sa = malloc(sizeof(sin)); 569 if (*sa == NULL) 570 return errno; 571 memcpy(*sa, &sin, sizeof(sin)); 572 return 0; 573 } 574 575 static int 576 tcp_connecthook(int s) 577 { 578 int x; 579 580 x = 1; 581 host_setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &x, sizeof(x)); 582 583 return 0; 584 } 585 586 static char parsedurl[256]; 587 588 /*ARGSUSED*/ 589 static int 590 unix_parse(const char *addr, struct sockaddr **sa, int allow_wildcard) 591 { 592 struct sockaddr_un sun; 593 size_t slen; 594 int savepath = 0; 595 596 if (strlen(addr) > sizeof(sun.sun_path)) 597 return ENAMETOOLONG; 598 599 /* 600 * The pathname can be all kinds of spaghetti elementals, 601 * so meek and obidient we accept everything. However, use 602 * full path for easy cleanup in case someone gives a relative 603 * one and the server does a chdir() between now than the 604 * cleanup. 605 */ 606 memset(&sun, 0, sizeof(sun)); 607 sun.sun_family = AF_LOCAL; 608 if (*addr != '/') { 609 char mywd[PATH_MAX]; 610 611 if (getcwd(mywd, sizeof(mywd)) == NULL) { 612 fprintf(stderr, "warning: cannot determine cwd, " 613 "omitting socket cleanup\n"); 614 } else { 615 if (strlen(addr) + strlen(mywd) > sizeof(sun.sun_path)) 616 return ENAMETOOLONG; 617 strlcpy(sun.sun_path, mywd, sizeof(sun.sun_path)); 618 strlcat(sun.sun_path, "/", sizeof(sun.sun_path)); 619 savepath = 1; 620 } 621 } 622 strlcat(sun.sun_path, addr, sizeof(sun.sun_path)); 623 sun.sun_len = SUN_LEN(&sun); 624 slen = sun.sun_len+1; /* get the 0 too */ 625 626 if (savepath && *parsedurl == '\0') { 627 snprintf(parsedurl, sizeof(parsedurl), 628 "unix://%s", sun.sun_path); 629 } 630 631 *sa = malloc(slen); 632 if (*sa == NULL) 633 return errno; 634 memcpy(*sa, &sun, slen); 635 636 return 0; 637 } 638 639 static void 640 unix_cleanup(struct sockaddr *sa) 641 { 642 struct sockaddr_un *sun = (void *)sa; 643 644 /* 645 * cleanup only absolute paths. see unix_parse() above 646 */ 647 if (*sun->sun_path == '/') { 648 unlink(sun->sun_path); 649 } 650 } 651 652 /*ARGSUSED*/ 653 static int 654 notsupp(void) 655 { 656 657 fprintf(stderr, "rump_sp: support not yet implemented\n"); 658 return EOPNOTSUPP; 659 } 660 661 static int 662 success(void) 663 { 664 665 return 0; 666 } 667 668 struct { 669 const char *id; 670 int domain; 671 addrparse_fn ap; 672 connecthook_fn connhook; 673 cleanup_fn cleanup; 674 } parsetab[] = { 675 { "tcp", PF_INET, tcp_parse, tcp_connecthook, (cleanup_fn)success }, 676 { "unix", PF_LOCAL, unix_parse, (connecthook_fn)success, unix_cleanup }, 677 { "tcp6", PF_INET6, (addrparse_fn)notsupp, (connecthook_fn)success, 678 (cleanup_fn)success }, 679 }; 680 #define NPARSE (sizeof(parsetab)/sizeof(parsetab[0])) 681 682 static int 683 parseurl(const char *url, struct sockaddr **sap, unsigned *idxp, 684 int allow_wildcard) 685 { 686 char id[16]; 687 const char *p, *p2; 688 size_t l; 689 unsigned i; 690 int error; 691 692 /* 693 * Parse the url 694 */ 695 696 p = url; 697 p2 = strstr(p, "://"); 698 if (!p2) { 699 fprintf(stderr, "rump_sp: invalid locator ``%s''\n", p); 700 return EINVAL; 701 } 702 l = p2-p; 703 if (l > sizeof(id)-1) { 704 fprintf(stderr, "rump_sp: identifier too long in ``%s''\n", p); 705 return EINVAL; 706 } 707 708 strncpy(id, p, l); 709 id[l] = '\0'; 710 p2 += 3; /* beginning of address */ 711 712 for (i = 0; i < NPARSE; i++) { 713 if (strcmp(id, parsetab[i].id) == 0) { 714 error = parsetab[i].ap(p2, sap, allow_wildcard); 715 if (error) 716 return error; 717 break; 718 } 719 } 720 if (i == NPARSE) { 721 fprintf(stderr, "rump_sp: invalid identifier ``%s''\n", p); 722 return EINVAL; 723 } 724 725 *idxp = i; 726 return 0; 727 } 728