1 /* $NetBSD: sp_common.c,v 1.17 2010/12/16 17:05:44 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2010 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * Common client/server sysproxy routines. #included. 30 */ 31 32 #include <sys/cdefs.h> 33 34 #include <sys/types.h> 35 #include <sys/mman.h> 36 #include <sys/queue.h> 37 #include <sys/socket.h> 38 #include <sys/un.h> 39 #include <sys/syslimits.h> 40 41 #include <arpa/inet.h> 42 #include <netinet/in.h> 43 #include <netinet/tcp.h> 44 45 #include <assert.h> 46 #include <errno.h> 47 #include <fcntl.h> 48 #include <inttypes.h> 49 #include <poll.h> 50 #include <pthread.h> 51 #include <stdarg.h> 52 #include <stddef.h> 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <string.h> 56 #include <unistd.h> 57 58 //#define DEBUG 59 #ifdef DEBUG 60 #define DPRINTF(x) mydprintf x 61 static void 62 mydprintf(const char *fmt, ...) 63 { 64 va_list ap; 65 66 va_start(ap, fmt); 67 vfprintf(stderr, fmt, ap); 68 va_end(ap); 69 } 70 #else 71 #define DPRINTF(x) 72 #endif 73 74 /* 75 * Bah, I hate writing on-off-wire conversions in C 76 */ 77 78 enum { RUMPSP_REQ, RUMPSP_RESP, RUMPSP_ERROR }; 79 enum { RUMPSP_HANDSHAKE, 80 RUMPSP_SYSCALL, 81 RUMPSP_COPYIN, RUMPSP_COPYINSTR, 82 RUMPSP_COPYOUT, RUMPSP_COPYOUTSTR, 83 RUMPSP_ANONMMAP }; 84 85 enum { HANDSHAKE_GUEST, HANDSHAKE_AUTH }; /* more to come */ 86 87 struct rsp_hdr { 88 uint64_t rsp_len; 89 uint64_t rsp_reqno; 90 uint16_t rsp_class; 91 uint16_t rsp_type; 92 /* 93 * We want this structure 64bit-aligned for typecast fun, 94 * so might as well use the following for something. 95 */ 96 union { 97 uint32_t sysnum; 98 uint32_t error; 99 uint32_t handshake; 100 } u; 101 }; 102 #define HDRSZ sizeof(struct rsp_hdr) 103 #define rsp_sysnum u.sysnum 104 #define rsp_error u.error 105 #define rsp_handshake u.handshake 106 107 #define MAXBANNER 96 108 109 /* 110 * Data follows the header. We have two types of structured data. 111 */ 112 113 /* copyin/copyout */ 114 struct rsp_copydata { 115 size_t rcp_len; 116 void *rcp_addr; 117 uint8_t rcp_data[0]; 118 }; 119 120 /* syscall response */ 121 struct rsp_sysresp { 122 int rsys_error; 123 register_t rsys_retval[2]; 124 }; 125 126 struct respwait { 127 uint64_t rw_reqno; 128 void *rw_data; 129 size_t rw_dlen; 130 int rw_error; 131 132 pthread_cond_t rw_cv; 133 134 TAILQ_ENTRY(respwait) rw_entries; 135 }; 136 137 struct spclient { 138 int spc_fd; 139 int spc_refcnt; 140 int spc_state; 141 142 pthread_mutex_t spc_mtx; 143 pthread_cond_t spc_cv; 144 145 struct lwp *spc_mainlwp; 146 pid_t spc_pid; 147 148 TAILQ_HEAD(, respwait) spc_respwait; 149 150 /* rest of the fields are zeroed upon disconnect */ 151 #define SPC_ZEROFF offsetof(struct spclient, spc_pfd) 152 struct pollfd *spc_pfd; 153 154 struct rsp_hdr spc_hdr; 155 uint8_t *spc_buf; 156 size_t spc_off; 157 158 uint64_t spc_nextreq; 159 int spc_ostatus, spc_istatus; 160 }; 161 #define SPCSTATUS_FREE 0 162 #define SPCSTATUS_BUSY 1 163 #define SPCSTATUS_WANTED 2 164 165 #define SPCSTATE_NEW 0 166 #define SPCSTATE_RUNNING 1 167 #define SPCSTATE_DYING 2 168 169 typedef int (*addrparse_fn)(const char *, struct sockaddr **, int); 170 typedef int (*connecthook_fn)(int); 171 typedef void (*cleanup_fn)(struct sockaddr *); 172 173 static int readframe(struct spclient *); 174 static void handlereq(struct spclient *); 175 176 static __inline void 177 spcresetbuf(struct spclient *spc) 178 { 179 180 spc->spc_buf = NULL; 181 spc->spc_off = 0; 182 } 183 184 static __inline void 185 spcfreebuf(struct spclient *spc) 186 { 187 188 free(spc->spc_buf); 189 spcresetbuf(spc); 190 } 191 192 static void 193 sendlockl(struct spclient *spc) 194 { 195 196 /* assert(pthread_mutex_owned) */ 197 while (spc->spc_ostatus != SPCSTATUS_FREE) { 198 spc->spc_ostatus = SPCSTATUS_WANTED; 199 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx); 200 } 201 spc->spc_ostatus = SPCSTATUS_BUSY; 202 } 203 204 static void 205 sendlock(struct spclient *spc) 206 { 207 208 pthread_mutex_lock(&spc->spc_mtx); 209 sendlockl(spc); 210 pthread_mutex_unlock(&spc->spc_mtx); 211 } 212 213 static void 214 sendunlockl(struct spclient *spc) 215 { 216 217 /* assert(pthread_mutex_owned) */ 218 if (spc->spc_ostatus == SPCSTATUS_WANTED) 219 pthread_cond_broadcast(&spc->spc_cv); 220 spc->spc_ostatus = SPCSTATUS_FREE; 221 } 222 223 static void 224 sendunlock(struct spclient *spc) 225 { 226 227 pthread_mutex_lock(&spc->spc_mtx); 228 sendunlockl(spc); 229 pthread_mutex_unlock(&spc->spc_mtx); 230 } 231 232 static int 233 dosend(struct spclient *spc, const void *data, size_t dlen) 234 { 235 struct pollfd pfd; 236 const uint8_t *sdata = data; 237 ssize_t n; 238 size_t sent; 239 int fd = spc->spc_fd; 240 241 pfd.fd = fd; 242 pfd.events = POLLOUT; 243 244 for (sent = 0, n = 0; sent < dlen; ) { 245 if (n) { 246 if (poll(&pfd, 1, INFTIM) == -1) { 247 if (errno == EINTR) 248 continue; 249 return errno; 250 } 251 } 252 253 n = send(fd, sdata + sent, dlen - sent, MSG_NOSIGNAL); 254 if (n == 0) { 255 return EFAULT; 256 } 257 if (n == -1) { 258 if (errno != EAGAIN) 259 return EFAULT; 260 continue; 261 } 262 sent += n; 263 } 264 265 return 0; 266 } 267 268 static void 269 putwait(struct spclient *spc, struct respwait *rw, struct rsp_hdr *rhdr) 270 { 271 272 rw->rw_data = NULL; 273 rw->rw_dlen = 0; 274 pthread_cond_init(&rw->rw_cv, NULL); 275 276 pthread_mutex_lock(&spc->spc_mtx); 277 rw->rw_reqno = rhdr->rsp_reqno = spc->spc_nextreq++; 278 TAILQ_INSERT_TAIL(&spc->spc_respwait, rw, rw_entries); 279 280 sendlockl(spc); 281 } 282 283 static void 284 unputwait(struct spclient *spc, struct respwait *rw) 285 { 286 287 sendunlockl(spc); 288 289 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries); 290 pthread_mutex_unlock(&spc->spc_mtx); 291 pthread_cond_destroy(&rw->rw_cv); 292 } 293 294 static void 295 kickwaiter(struct spclient *spc) 296 { 297 struct respwait *rw; 298 int error; 299 300 pthread_mutex_lock(&spc->spc_mtx); 301 TAILQ_FOREACH(rw, &spc->spc_respwait, rw_entries) { 302 if (rw->rw_reqno == spc->spc_hdr.rsp_reqno) 303 break; 304 } 305 if (rw == NULL) { 306 DPRINTF(("no waiter found, invalid reqno %" PRIu64 "?\n", 307 spc->spc_hdr.rsp_reqno)); 308 return; 309 } 310 DPRINTF(("rump_sp: client %p woke up waiter at %p\n", spc, rw)); 311 rw->rw_data = spc->spc_buf; 312 rw->rw_dlen = (size_t)(spc->spc_off - HDRSZ); 313 if (spc->spc_hdr.rsp_class == RUMPSP_ERROR) { 314 error = rw->rw_error = spc->spc_hdr.rsp_error; 315 } else { 316 error = rw->rw_error = 0; 317 } 318 pthread_cond_signal(&rw->rw_cv); 319 pthread_mutex_unlock(&spc->spc_mtx); 320 321 if (error) 322 spcfreebuf(spc); 323 else 324 spcresetbuf(spc); 325 } 326 327 static void 328 kickall(struct spclient *spc) 329 { 330 struct respwait *rw; 331 332 /* DIAGASSERT(mutex_owned(spc_lock)) */ 333 TAILQ_FOREACH(rw, &spc->spc_respwait, rw_entries) 334 pthread_cond_broadcast(&rw->rw_cv); 335 } 336 337 static int 338 waitresp(struct spclient *spc, struct respwait *rw) 339 { 340 struct pollfd pfd; 341 int rv = 0; 342 343 sendunlockl(spc); 344 345 rw->rw_error = 0; 346 while (rw->rw_data == NULL && rw->rw_error == 0 347 && spc->spc_state != SPCSTATE_DYING){ 348 /* are we free to receive? */ 349 if (spc->spc_istatus == SPCSTATUS_FREE) { 350 int gotresp; 351 352 spc->spc_istatus = SPCSTATUS_BUSY; 353 pthread_mutex_unlock(&spc->spc_mtx); 354 355 pfd.fd = spc->spc_fd; 356 pfd.events = POLLIN; 357 358 for (gotresp = 0; !gotresp; ) { 359 switch (readframe(spc)) { 360 case 0: 361 poll(&pfd, 1, INFTIM); 362 continue; 363 case -1: 364 rv = errno; 365 spc->spc_state = SPCSTATE_DYING; 366 goto cleanup; 367 default: 368 break; 369 } 370 371 switch (spc->spc_hdr.rsp_class) { 372 case RUMPSP_RESP: 373 case RUMPSP_ERROR: 374 kickwaiter(spc); 375 gotresp = spc->spc_hdr.rsp_reqno == 376 rw->rw_reqno; 377 break; 378 case RUMPSP_REQ: 379 handlereq(spc); 380 break; 381 default: 382 /* panic */ 383 break; 384 } 385 } 386 cleanup: 387 pthread_mutex_lock(&spc->spc_mtx); 388 if (spc->spc_istatus == SPCSTATUS_WANTED) 389 kickall(spc); 390 spc->spc_istatus = SPCSTATUS_FREE; 391 } else { 392 spc->spc_istatus = SPCSTATUS_WANTED; 393 pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx); 394 } 395 } 396 397 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries); 398 pthread_mutex_unlock(&spc->spc_mtx); 399 400 pthread_cond_destroy(&rw->rw_cv); 401 402 if (rv) 403 return rv; 404 if (spc->spc_state == SPCSTATE_DYING) 405 return ENOTCONN; 406 return rw->rw_error; 407 } 408 409 static int 410 readframe(struct spclient *spc) 411 { 412 int fd = spc->spc_fd; 413 size_t left; 414 size_t framelen; 415 ssize_t n; 416 417 /* still reading header? */ 418 if (spc->spc_off < HDRSZ) { 419 DPRINTF(("rump_sp: readframe getting header at offset %zu\n", 420 spc->spc_off)); 421 422 left = HDRSZ - spc->spc_off; 423 /*LINTED: cast ok */ 424 n = read(fd, (uint8_t *)&spc->spc_hdr + spc->spc_off, left); 425 if (n == 0) { 426 return -1; 427 } 428 if (n == -1) { 429 if (errno == EAGAIN) 430 return 0; 431 return -1; 432 } 433 434 spc->spc_off += n; 435 if (spc->spc_off < HDRSZ) 436 return -1; 437 438 /*LINTED*/ 439 framelen = spc->spc_hdr.rsp_len; 440 441 if (framelen < HDRSZ) { 442 return -1; 443 } else if (framelen == HDRSZ) { 444 return 1; 445 } 446 447 spc->spc_buf = malloc(framelen - HDRSZ); 448 if (spc->spc_buf == NULL) { 449 return -1; 450 } 451 memset(spc->spc_buf, 0, framelen - HDRSZ); 452 453 /* "fallthrough" */ 454 } else { 455 /*LINTED*/ 456 framelen = spc->spc_hdr.rsp_len; 457 } 458 459 left = framelen - spc->spc_off; 460 461 DPRINTF(("rump_sp: readframe getting body at offset %zu, left %zu\n", 462 spc->spc_off, left)); 463 464 if (left == 0) 465 return 1; 466 n = read(fd, spc->spc_buf + (spc->spc_off - HDRSZ), left); 467 if (n == 0) { 468 return -1; 469 } 470 if (n == -1) { 471 if (errno == EAGAIN) 472 return 0; 473 return -1; 474 } 475 spc->spc_off += n; 476 left -= n; 477 478 /* got everything? */ 479 if (left == 0) 480 return 1; 481 else 482 return 0; 483 } 484 485 static int 486 tcp_parse(const char *addr, struct sockaddr **sa, int allow_wildcard) 487 { 488 struct sockaddr_in sin; 489 char buf[64]; 490 const char *p; 491 size_t l; 492 int port; 493 494 memset(&sin, 0, sizeof(sin)); 495 sin.sin_len = sizeof(sin); 496 sin.sin_family = AF_INET; 497 498 p = strchr(addr, ':'); 499 if (!p) { 500 fprintf(stderr, "rump_sp_tcp: missing port specifier\n"); 501 return EINVAL; 502 } 503 504 l = p - addr; 505 if (l > sizeof(buf)-1) { 506 fprintf(stderr, "rump_sp_tcp: address too long\n"); 507 return EINVAL; 508 } 509 strncpy(buf, addr, l); 510 buf[l] = '\0'; 511 512 /* special INADDR_ANY treatment */ 513 if (strcmp(buf, "*") == 0 || strcmp(buf, "0") == 0) { 514 sin.sin_addr.s_addr = INADDR_ANY; 515 } else { 516 switch (inet_pton(AF_INET, buf, &sin.sin_addr)) { 517 case 1: 518 break; 519 case 0: 520 fprintf(stderr, "rump_sp_tcp: cannot parse %s\n", buf); 521 return EINVAL; 522 case -1: 523 fprintf(stderr, "rump_sp_tcp: inet_pton failed\n"); 524 return errno; 525 default: 526 assert(/*CONSTCOND*/0); 527 return EINVAL; 528 } 529 } 530 531 if (!allow_wildcard && sin.sin_addr.s_addr == INADDR_ANY) { 532 fprintf(stderr, "rump_sp_tcp: client needs !INADDR_ANY\n"); 533 return EINVAL; 534 } 535 536 /* advance to port number & parse */ 537 p++; 538 l = strspn(p, "0123456789"); 539 if (l == 0) { 540 fprintf(stderr, "rump_sp_tcp: port now found: %s\n", p); 541 return EINVAL; 542 } 543 strncpy(buf, p, l); 544 buf[l] = '\0'; 545 546 if (*(p+l) != '/' && *(p+l) != '\0') { 547 fprintf(stderr, "rump_sp_tcp: junk at end of port: %s\n", addr); 548 return EINVAL; 549 } 550 551 port = atoi(buf); 552 if (port < 0 || port >= (1<<(8*sizeof(in_port_t)))) { 553 fprintf(stderr, "rump_sp_tcp: port %d out of range\n", port); 554 return ERANGE; 555 } 556 sin.sin_port = htons(port); 557 558 *sa = malloc(sizeof(sin)); 559 if (*sa == NULL) 560 return errno; 561 memcpy(*sa, &sin, sizeof(sin)); 562 return 0; 563 } 564 565 static int 566 tcp_connecthook(int s) 567 { 568 int x; 569 570 x = 1; 571 setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &x, sizeof(x)); 572 573 return 0; 574 } 575 576 /*ARGSUSED*/ 577 static int 578 unix_parse(const char *addr, struct sockaddr **sa, int allow_wildcard) 579 { 580 struct sockaddr_un sun; 581 size_t slen; 582 583 if (strlen(addr) > sizeof(sun.sun_path)) 584 return ENAMETOOLONG; 585 586 /* 587 * The pathname can be all kinds of spaghetti elementals, 588 * so meek and obidient we accept everything. However, use 589 * full path for easy cleanup in case someone gives a relative 590 * one and the server does a chdir() between now than the 591 * cleanup. 592 */ 593 memset(&sun, 0, sizeof(sun)); 594 sun.sun_family = AF_LOCAL; 595 if (*addr != '/') { 596 char mywd[PATH_MAX]; 597 598 if (getcwd(mywd, sizeof(mywd)) == NULL) { 599 fprintf(stderr, "warning: cannot determine cwd, " 600 "omitting socket cleanup\n"); 601 } else { 602 if (strlen(addr) + strlen(mywd) > sizeof(sun.sun_path)) 603 return ENAMETOOLONG; 604 strlcpy(sun.sun_path, mywd, sizeof(sun.sun_path)); 605 strlcat(sun.sun_path, "/", sizeof(sun.sun_path)); 606 } 607 } 608 strlcat(sun.sun_path, addr, sizeof(sun.sun_path)); 609 sun.sun_len = SUN_LEN(&sun); 610 slen = sun.sun_len+1; /* get the 0 too */ 611 612 *sa = malloc(slen); 613 if (*sa == NULL) 614 return errno; 615 memcpy(*sa, &sun, slen); 616 617 return 0; 618 } 619 620 static void 621 unix_cleanup(struct sockaddr *sa) 622 { 623 struct sockaddr_un *sun = (void *)sa; 624 625 /* 626 * cleanup only absolute paths. see unix_parse() above 627 */ 628 if (*sun->sun_path == '/') { 629 unlink(sun->sun_path); 630 } 631 } 632 633 /*ARGSUSED*/ 634 static int 635 notsupp(void) 636 { 637 638 fprintf(stderr, "rump_sp: support not yet implemented\n"); 639 return EOPNOTSUPP; 640 } 641 642 static int 643 success(void) 644 { 645 646 return 0; 647 } 648 649 struct { 650 const char *id; 651 int domain; 652 addrparse_fn ap; 653 connecthook_fn connhook; 654 cleanup_fn cleanup; 655 } parsetab[] = { 656 { "tcp", PF_INET, tcp_parse, tcp_connecthook, (cleanup_fn)success }, 657 { "unix", PF_LOCAL, unix_parse, (connecthook_fn)success, unix_cleanup }, 658 { "tcp6", PF_INET6, (addrparse_fn)notsupp, (connecthook_fn)success, 659 (cleanup_fn)success }, 660 }; 661 #define NPARSE (sizeof(parsetab)/sizeof(parsetab[0])) 662 663 static int 664 parseurl(const char *url, struct sockaddr **sap, unsigned *idxp, 665 int allow_wildcard) 666 { 667 char id[16]; 668 const char *p, *p2; 669 size_t l; 670 unsigned i; 671 int error; 672 673 /* 674 * Parse the url 675 */ 676 677 p = url; 678 p2 = strstr(p, "://"); 679 if (!p2) { 680 fprintf(stderr, "rump_sp: invalid locator ``%s''\n", p); 681 return EINVAL; 682 } 683 l = p2-p; 684 if (l > sizeof(id)-1) { 685 fprintf(stderr, "rump_sp: identifier too long in ``%s''\n", p); 686 return EINVAL; 687 } 688 689 strncpy(id, p, l); 690 id[l] = '\0'; 691 p2 += 3; /* beginning of address */ 692 693 for (i = 0; i < NPARSE; i++) { 694 if (strcmp(id, parsetab[i].id) == 0) { 695 error = parsetab[i].ap(p2, sap, allow_wildcard); 696 if (error) 697 return error; 698 break; 699 } 700 } 701 if (i == NPARSE) { 702 fprintf(stderr, "rump_sp: invalid identifier ``%s''\n", p); 703 return EINVAL; 704 } 705 706 *idxp = i; 707 return 0; 708 } 709