1 /* $NetBSD: sp_common.c,v 1.28 2011/02/15 10:37:07 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * Common client/server sysproxy routines. #included. 30 */ 31 32 #include <sys/cdefs.h> 33 34 #include <sys/types.h> 35 #include <sys/mman.h> 36 #include <sys/queue.h> 37 #include <sys/socket.h> 38 #include <sys/un.h> 39 #include <sys/syslimits.h> 40 41 #include <arpa/inet.h> 42 #include <netinet/in.h> 43 #include <netinet/tcp.h> 44 45 #include <assert.h> 46 #include <errno.h> 47 #include <fcntl.h> 48 #include <inttypes.h> 49 #include <poll.h> 50 #include <pthread.h> 51 #include <stdarg.h> 52 #include <stddef.h> 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <string.h> 56 #include <unistd.h> 57 58 //#define DEBUG 59 #ifdef DEBUG 60 #define DPRINTF(x) mydprintf x 61 static void 62 mydprintf(const char *fmt, ...) 63 { 64 va_list ap; 65 66 va_start(ap, fmt); 67 vfprintf(stderr, fmt, ap); 68 va_end(ap); 69 } 70 #else 71 #define DPRINTF(x) 72 #endif 73 74 #ifndef HOSTOPS 75 #define host_poll poll 76 #define host_read read 77 #define host_sendto sendto 78 #define host_setsockopt setsockopt 79 #endif 80 81 /* 82 * Bah, I hate writing on-off-wire conversions in C 83 */ 84 85 enum { RUMPSP_REQ, RUMPSP_RESP, RUMPSP_ERROR }; 86 enum { RUMPSP_HANDSHAKE, 87 RUMPSP_SYSCALL, 88 RUMPSP_COPYIN, RUMPSP_COPYINSTR, 89 RUMPSP_COPYOUT, RUMPSP_COPYOUTSTR, 90 RUMPSP_ANONMMAP, 91 RUMPSP_PREFORK, 92 RUMPSP_RAISE }; 93 94 enum { HANDSHAKE_GUEST, HANDSHAKE_AUTH, HANDSHAKE_FORK, HANDSHAKE_EXEC }; 95 96 #define AUTHLEN 4 /* 128bit fork auth */ 97 98 struct rsp_hdr { 99 uint64_t rsp_len; 100 uint64_t rsp_reqno; 101 uint16_t rsp_class; 102 uint16_t rsp_type; 103 /* 104 * We want this structure 64bit-aligned for typecast fun, 105 * so might as well use the following for something. 106 */ 107 union { 108 uint32_t sysnum; 109 uint32_t error; 110 uint32_t handshake; 111 uint32_t signo; 112 } u; 113 }; 114 #define HDRSZ sizeof(struct rsp_hdr) 115 #define rsp_sysnum u.sysnum 116 #define rsp_error u.error 117 #define rsp_handshake u.handshake 118 #define rsp_signo u.signo 119 120 #define MAXBANNER 96 121 122 /* 123 * Data follows the header. We have two types of structured data. 124 */ 125 126 /* copyin/copyout */ 127 struct rsp_copydata { 128 size_t rcp_len; 129 void *rcp_addr; 130 uint8_t rcp_data[0]; 131 }; 132 133 /* syscall response */ 134 struct rsp_sysresp { 135 int rsys_error; 136 register_t rsys_retval[2]; 137 }; 138 139 struct handshake_fork { 140 uint32_t rf_auth[4]; 141 int rf_cancel; 142 }; 143 144 struct respwait { 145 uint64_t rw_reqno; 146 void *rw_data; 147 size_t rw_dlen; 148 int rw_done; 149 int rw_error; 150 151 pthread_cond_t rw_cv; 152 153 TAILQ_ENTRY(respwait) rw_entries; 154 }; 155 156 struct prefork; 157 struct spclient { 158 int spc_fd; 159 int spc_refcnt; 160 int spc_state; 161 162 pthread_mutex_t spc_mtx; 163 pthread_cond_t spc_cv; 164 165 struct lwp *spc_mainlwp; 166 pid_t spc_pid; 167 168 TAILQ_HEAD(, respwait) spc_respwait; 169 170 /* rest of the fields are zeroed upon disconnect */ 171 #define SPC_ZEROFF offsetof(struct spclient, spc_pfd) 172 struct pollfd *spc_pfd; 173 174 struct rsp_hdr spc_hdr; 175 uint8_t *spc_buf; 176 size_t spc_off; 177 178 uint64_t spc_nextreq; 179 uint64_t spc_syscallreq; 180 uint64_t spc_generation; 181 int spc_ostatus, spc_istatus; 182 int spc_reconnecting; 183 184 LIST_HEAD(, prefork) spc_pflist; 185 }; 186 #define SPCSTATUS_FREE 0 187 #define SPCSTATUS_BUSY 1 188 #define SPCSTATUS_WANTED 2 189 190 #define SPCSTATE_NEW 0 191 #define SPCSTATE_RUNNING 1 192 #define SPCSTATE_DYING 2 193 194 typedef int (*addrparse_fn)(const char *, struct sockaddr **, int); 195 typedef int (*connecthook_fn)(int); 196 typedef void (*cleanup_fn)(struct sockaddr *); 197 198 static int readframe(struct spclient *); 199 static void handlereq(struct spclient *); 200 201 static __inline void 202 spcresetbuf(struct spclient *spc) 203 { 204 205 spc->spc_buf = NULL; 206 spc->spc_off = 0; 207 } 208 209 static __inline void 210 spcfreebuf(struct spclient *spc) 211 { 212 213 free(spc->spc_buf); 214 spcresetbuf(spc); 215 } 216 217 static void 218 sendlockl(struct spclient *spc) 219 { 220 221 while (spc->spc_ostatus != SPCSTATUS_FREE) { 222 spc->spc_ostatus = SPCSTATUS_WANTED; 223 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx); 224 } 225 spc->spc_ostatus = SPCSTATUS_BUSY; 226 } 227 228 static void __unused 229 sendlock(struct spclient *spc) 230 { 231 232 pthread_mutex_lock(&spc->spc_mtx); 233 sendlockl(spc); 234 pthread_mutex_unlock(&spc->spc_mtx); 235 } 236 237 static void 238 sendunlockl(struct spclient *spc) 239 { 240 241 if (spc->spc_ostatus == SPCSTATUS_WANTED) 242 pthread_cond_broadcast(&spc->spc_cv); 243 spc->spc_ostatus = SPCSTATUS_FREE; 244 } 245 246 static void 247 sendunlock(struct spclient *spc) 248 { 249 250 pthread_mutex_lock(&spc->spc_mtx); 251 sendunlockl(spc); 252 pthread_mutex_unlock(&spc->spc_mtx); 253 } 254 255 static int 256 dosend(struct spclient *spc, const void *data, size_t dlen) 257 { 258 struct pollfd pfd; 259 const uint8_t *sdata = data; 260 ssize_t n; 261 size_t sent; 262 int fd = spc->spc_fd; 263 264 pfd.fd = fd; 265 pfd.events = POLLOUT; 266 267 for (sent = 0, n = 0; sent < dlen; ) { 268 if (n) { 269 if (host_poll(&pfd, 1, INFTIM) == -1) { 270 if (errno == EINTR) 271 continue; 272 return errno; 273 } 274 } 275 276 n = host_sendto(fd, sdata + sent, dlen - sent, 277 MSG_NOSIGNAL, NULL, 0); 278 if (n == -1) { 279 if (errno == EPIPE) 280 return ENOTCONN; 281 if (errno != EAGAIN) 282 return errno; 283 continue; 284 } 285 if (n == 0) { 286 return ENOTCONN; 287 } 288 sent += n; 289 } 290 291 return 0; 292 } 293 294 static void 295 doputwait(struct spclient *spc, struct respwait *rw, struct rsp_hdr *rhdr) 296 { 297 298 rw->rw_data = NULL; 299 rw->rw_dlen = rw->rw_done = rw->rw_error = 0; 300 pthread_cond_init(&rw->rw_cv, NULL); 301 302 pthread_mutex_lock(&spc->spc_mtx); 303 rw->rw_reqno = rhdr->rsp_reqno = spc->spc_nextreq++; 304 TAILQ_INSERT_TAIL(&spc->spc_respwait, rw, rw_entries); 305 } 306 307 static void __unused 308 putwait_locked(struct spclient *spc, struct respwait *rw, struct rsp_hdr *rhdr) 309 { 310 311 doputwait(spc, rw, rhdr); 312 pthread_mutex_unlock(&spc->spc_mtx); 313 } 314 315 static void 316 putwait(struct spclient *spc, struct respwait *rw, struct rsp_hdr *rhdr) 317 { 318 319 doputwait(spc, rw, rhdr); 320 sendlockl(spc); 321 pthread_mutex_unlock(&spc->spc_mtx); 322 } 323 324 static void 325 dounputwait(struct spclient *spc, struct respwait *rw) 326 { 327 328 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries); 329 pthread_mutex_unlock(&spc->spc_mtx); 330 pthread_cond_destroy(&rw->rw_cv); 331 332 } 333 334 static void __unused 335 unputwait_locked(struct spclient *spc, struct respwait *rw) 336 { 337 338 pthread_mutex_lock(&spc->spc_mtx); 339 dounputwait(spc, rw); 340 } 341 342 static void 343 unputwait(struct spclient *spc, struct respwait *rw) 344 { 345 346 pthread_mutex_lock(&spc->spc_mtx); 347 sendunlockl(spc); 348 349 dounputwait(spc, rw); 350 } 351 352 static void 353 kickwaiter(struct spclient *spc) 354 { 355 struct respwait *rw; 356 int error = 0; 357 358 pthread_mutex_lock(&spc->spc_mtx); 359 TAILQ_FOREACH(rw, &spc->spc_respwait, rw_entries) { 360 if (rw->rw_reqno == spc->spc_hdr.rsp_reqno) 361 break; 362 } 363 if (rw == NULL) { 364 DPRINTF(("no waiter found, invalid reqno %" PRIu64 "?\n", 365 spc->spc_hdr.rsp_reqno)); 366 pthread_mutex_unlock(&spc->spc_mtx); 367 spcfreebuf(spc); 368 return; 369 } 370 DPRINTF(("rump_sp: client %p woke up waiter at %p\n", spc, rw)); 371 rw->rw_data = spc->spc_buf; 372 rw->rw_done = 1; 373 rw->rw_dlen = (size_t)(spc->spc_off - HDRSZ); 374 if (spc->spc_hdr.rsp_class == RUMPSP_ERROR) { 375 error = rw->rw_error = spc->spc_hdr.rsp_error; 376 } 377 pthread_cond_signal(&rw->rw_cv); 378 pthread_mutex_unlock(&spc->spc_mtx); 379 380 if (error) 381 spcfreebuf(spc); 382 else 383 spcresetbuf(spc); 384 } 385 386 static void 387 kickall(struct spclient *spc) 388 { 389 struct respwait *rw; 390 391 /* DIAGASSERT(mutex_owned(spc_lock)) */ 392 TAILQ_FOREACH(rw, &spc->spc_respwait, rw_entries) 393 pthread_cond_broadcast(&rw->rw_cv); 394 } 395 396 static int 397 readframe(struct spclient *spc) 398 { 399 int fd = spc->spc_fd; 400 size_t left; 401 size_t framelen; 402 ssize_t n; 403 404 /* still reading header? */ 405 if (spc->spc_off < HDRSZ) { 406 DPRINTF(("rump_sp: readframe getting header at offset %zu\n", 407 spc->spc_off)); 408 409 left = HDRSZ - spc->spc_off; 410 /*LINTED: cast ok */ 411 n = host_read(fd, (uint8_t*)&spc->spc_hdr + spc->spc_off, left); 412 if (n == 0) { 413 return -1; 414 } 415 if (n == -1) { 416 if (errno == EAGAIN) 417 return 0; 418 return -1; 419 } 420 421 spc->spc_off += n; 422 if (spc->spc_off < HDRSZ) 423 return -1; 424 425 /*LINTED*/ 426 framelen = spc->spc_hdr.rsp_len; 427 428 if (framelen < HDRSZ) { 429 return -1; 430 } else if (framelen == HDRSZ) { 431 return 1; 432 } 433 434 spc->spc_buf = malloc(framelen - HDRSZ); 435 if (spc->spc_buf == NULL) { 436 return -1; 437 } 438 memset(spc->spc_buf, 0, framelen - HDRSZ); 439 440 /* "fallthrough" */ 441 } else { 442 /*LINTED*/ 443 framelen = spc->spc_hdr.rsp_len; 444 } 445 446 left = framelen - spc->spc_off; 447 448 DPRINTF(("rump_sp: readframe getting body at offset %zu, left %zu\n", 449 spc->spc_off, left)); 450 451 if (left == 0) 452 return 1; 453 n = host_read(fd, spc->spc_buf + (spc->spc_off - HDRSZ), left); 454 if (n == 0) { 455 return -1; 456 } 457 if (n == -1) { 458 if (errno == EAGAIN) 459 return 0; 460 return -1; 461 } 462 spc->spc_off += n; 463 left -= n; 464 465 /* got everything? */ 466 if (left == 0) 467 return 1; 468 else 469 return 0; 470 } 471 472 static int 473 tcp_parse(const char *addr, struct sockaddr **sa, int allow_wildcard) 474 { 475 struct sockaddr_in sin; 476 char buf[64]; 477 const char *p; 478 size_t l; 479 int port; 480 481 memset(&sin, 0, sizeof(sin)); 482 sin.sin_len = sizeof(sin); 483 sin.sin_family = AF_INET; 484 485 p = strchr(addr, ':'); 486 if (!p) { 487 fprintf(stderr, "rump_sp_tcp: missing port specifier\n"); 488 return EINVAL; 489 } 490 491 l = p - addr; 492 if (l > sizeof(buf)-1) { 493 fprintf(stderr, "rump_sp_tcp: address too long\n"); 494 return EINVAL; 495 } 496 strncpy(buf, addr, l); 497 buf[l] = '\0'; 498 499 /* special INADDR_ANY treatment */ 500 if (strcmp(buf, "*") == 0 || strcmp(buf, "0") == 0) { 501 sin.sin_addr.s_addr = INADDR_ANY; 502 } else { 503 switch (inet_pton(AF_INET, buf, &sin.sin_addr)) { 504 case 1: 505 break; 506 case 0: 507 fprintf(stderr, "rump_sp_tcp: cannot parse %s\n", buf); 508 return EINVAL; 509 case -1: 510 fprintf(stderr, "rump_sp_tcp: inet_pton failed\n"); 511 return errno; 512 default: 513 assert(/*CONSTCOND*/0); 514 return EINVAL; 515 } 516 } 517 518 if (!allow_wildcard && sin.sin_addr.s_addr == INADDR_ANY) { 519 fprintf(stderr, "rump_sp_tcp: client needs !INADDR_ANY\n"); 520 return EINVAL; 521 } 522 523 /* advance to port number & parse */ 524 p++; 525 l = strspn(p, "0123456789"); 526 if (l == 0) { 527 fprintf(stderr, "rump_sp_tcp: port now found: %s\n", p); 528 return EINVAL; 529 } 530 strncpy(buf, p, l); 531 buf[l] = '\0'; 532 533 if (*(p+l) != '/' && *(p+l) != '\0') { 534 fprintf(stderr, "rump_sp_tcp: junk at end of port: %s\n", addr); 535 return EINVAL; 536 } 537 538 port = atoi(buf); 539 if (port < 0 || port >= (1<<(8*sizeof(in_port_t)))) { 540 fprintf(stderr, "rump_sp_tcp: port %d out of range\n", port); 541 return ERANGE; 542 } 543 sin.sin_port = htons(port); 544 545 *sa = malloc(sizeof(sin)); 546 if (*sa == NULL) 547 return errno; 548 memcpy(*sa, &sin, sizeof(sin)); 549 return 0; 550 } 551 552 static int 553 tcp_connecthook(int s) 554 { 555 int x; 556 557 x = 1; 558 host_setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &x, sizeof(x)); 559 560 return 0; 561 } 562 563 static char parsedurl[256]; 564 565 /*ARGSUSED*/ 566 static int 567 unix_parse(const char *addr, struct sockaddr **sa, int allow_wildcard) 568 { 569 struct sockaddr_un sun; 570 size_t slen; 571 int savepath = 0; 572 573 if (strlen(addr) > sizeof(sun.sun_path)) 574 return ENAMETOOLONG; 575 576 /* 577 * The pathname can be all kinds of spaghetti elementals, 578 * so meek and obidient we accept everything. However, use 579 * full path for easy cleanup in case someone gives a relative 580 * one and the server does a chdir() between now than the 581 * cleanup. 582 */ 583 memset(&sun, 0, sizeof(sun)); 584 sun.sun_family = AF_LOCAL; 585 if (*addr != '/') { 586 char mywd[PATH_MAX]; 587 588 if (getcwd(mywd, sizeof(mywd)) == NULL) { 589 fprintf(stderr, "warning: cannot determine cwd, " 590 "omitting socket cleanup\n"); 591 } else { 592 if (strlen(addr) + strlen(mywd) > sizeof(sun.sun_path)) 593 return ENAMETOOLONG; 594 strlcpy(sun.sun_path, mywd, sizeof(sun.sun_path)); 595 strlcat(sun.sun_path, "/", sizeof(sun.sun_path)); 596 savepath = 1; 597 } 598 } 599 strlcat(sun.sun_path, addr, sizeof(sun.sun_path)); 600 sun.sun_len = SUN_LEN(&sun); 601 slen = sun.sun_len+1; /* get the 0 too */ 602 603 if (savepath && *parsedurl == '\0') { 604 snprintf(parsedurl, sizeof(parsedurl), 605 "unix://%s", sun.sun_path); 606 } 607 608 *sa = malloc(slen); 609 if (*sa == NULL) 610 return errno; 611 memcpy(*sa, &sun, slen); 612 613 return 0; 614 } 615 616 static void 617 unix_cleanup(struct sockaddr *sa) 618 { 619 struct sockaddr_un *sun = (void *)sa; 620 621 /* 622 * cleanup only absolute paths. see unix_parse() above 623 */ 624 if (*sun->sun_path == '/') { 625 unlink(sun->sun_path); 626 } 627 } 628 629 /*ARGSUSED*/ 630 static int 631 notsupp(void) 632 { 633 634 fprintf(stderr, "rump_sp: support not yet implemented\n"); 635 return EOPNOTSUPP; 636 } 637 638 static int 639 success(void) 640 { 641 642 return 0; 643 } 644 645 struct { 646 const char *id; 647 int domain; 648 addrparse_fn ap; 649 connecthook_fn connhook; 650 cleanup_fn cleanup; 651 } parsetab[] = { 652 { "tcp", PF_INET, tcp_parse, tcp_connecthook, (cleanup_fn)success }, 653 { "unix", PF_LOCAL, unix_parse, (connecthook_fn)success, unix_cleanup }, 654 { "tcp6", PF_INET6, (addrparse_fn)notsupp, (connecthook_fn)success, 655 (cleanup_fn)success }, 656 }; 657 #define NPARSE (sizeof(parsetab)/sizeof(parsetab[0])) 658 659 static int 660 parseurl(const char *url, struct sockaddr **sap, unsigned *idxp, 661 int allow_wildcard) 662 { 663 char id[16]; 664 const char *p, *p2; 665 size_t l; 666 unsigned i; 667 int error; 668 669 /* 670 * Parse the url 671 */ 672 673 p = url; 674 p2 = strstr(p, "://"); 675 if (!p2) { 676 fprintf(stderr, "rump_sp: invalid locator ``%s''\n", p); 677 return EINVAL; 678 } 679 l = p2-p; 680 if (l > sizeof(id)-1) { 681 fprintf(stderr, "rump_sp: identifier too long in ``%s''\n", p); 682 return EINVAL; 683 } 684 685 strncpy(id, p, l); 686 id[l] = '\0'; 687 p2 += 3; /* beginning of address */ 688 689 for (i = 0; i < NPARSE; i++) { 690 if (strcmp(id, parsetab[i].id) == 0) { 691 error = parsetab[i].ap(p2, sap, allow_wildcard); 692 if (error) 693 return error; 694 break; 695 } 696 } 697 if (i == NPARSE) { 698 fprintf(stderr, "rump_sp: invalid identifier ``%s''\n", p); 699 return EINVAL; 700 } 701 702 *idxp = i; 703 return 0; 704 } 705