1 /* $OpenBSD: http.c,v 1.92 2024/11/21 13:32:27 claudio Exp $ */ 2 /* 3 * Copyright (c) 2020 Nils Fisher <nils_fisher@hotmail.com> 4 * Copyright (c) 2020 Claudio Jeker <claudio@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 /*- 20 * Copyright (c) 1997 The NetBSD Foundation, Inc. 21 * All rights reserved. 22 * 23 * This code is derived from software contributed to The NetBSD Foundation 24 * by Jason Thorpe and Luke Mewburn. 25 * 26 * Redistribution and use in source and binary forms, with or without 27 * modification, are permitted provided that the following conditions 28 * are met: 29 * 1. Redistributions of source code must retain the above copyright 30 * notice, this list of conditions and the following disclaimer. 31 * 2. Redistributions in binary form must reproduce the above copyright 32 * notice, this list of conditions and the following disclaimer in the 33 * documentation and/or other materials provided with the distribution. 34 * 35 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 36 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 37 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 39 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 40 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 41 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 42 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 43 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 44 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 45 * POSSIBILITY OF SUCH DAMAGE. 46 */ 47 #include <sys/types.h> 48 #include <sys/queue.h> 49 #include <sys/socket.h> 50 51 #include <assert.h> 52 #include <ctype.h> 53 #include <err.h> 54 #include <errno.h> 55 #include <imsg.h> 56 #include <limits.h> 57 #include <netdb.h> 58 #include <poll.h> 59 #include <signal.h> 60 #include <stdio.h> 61 #include <stdlib.h> 62 #include <string.h> 63 #include <unistd.h> 64 #include <vis.h> 65 #include <zlib.h> 66 67 #include <tls.h> 68 69 #include "extern.h" 70 71 #define HTTP_USER_AGENT "OpenBSD rpki-client" 72 #define HTTP_BUF_SIZE (32 * 1024) 73 #define HTTP_IDLE_TIMEOUT 10 74 #define MAX_CONTENTLEN (2 * 1024 * 1024 * 1024UL) 75 #define NPFDS (MAX_HTTP_REQUESTS + 1) 76 77 enum res { 78 DONE, 79 WANT_POLLIN, 80 WANT_POLLOUT, 81 }; 82 83 enum http_state { 84 STATE_FREE, 85 STATE_CONNECT, 86 STATE_TLSCONNECT, 87 STATE_PROXY_REQUEST, 88 STATE_PROXY_STATUS, 89 STATE_PROXY_RESPONSE, 90 STATE_REQUEST, 91 STATE_RESPONSE_STATUS, 92 STATE_RESPONSE_HEADER, 93 STATE_RESPONSE_DATA, 94 STATE_RESPONSE_CHUNKED_HEADER, 95 STATE_RESPONSE_CHUNKED_CRLF, 96 STATE_RESPONSE_CHUNKED_TRAILER, 97 STATE_WRITE_DATA, 98 STATE_IDLE, 99 STATE_CLOSE, 100 }; 101 102 struct http_proxy { 103 char *proxyhost; 104 char *proxyport; 105 char *proxyauth; 106 } proxy; 107 108 struct http_zlib { 109 z_stream zs; 110 char *zbuf; 111 size_t zbufsz; 112 size_t zbufpos; 113 size_t zinsz; 114 int zdone; 115 }; 116 117 struct http_connection { 118 LIST_ENTRY(http_connection) entry; 119 char *host; 120 char *port; 121 char *last_modified; 122 char *redir_uri; 123 struct http_request *req; 124 struct pollfd *pfd; 125 struct addrinfo *res0; 126 struct addrinfo *res; 127 struct tls *tls; 128 char *buf; 129 struct http_zlib *zlibctx; 130 size_t bufsz; 131 size_t bufpos; 132 size_t iosz; 133 size_t totalsz; 134 time_t idle_time; 135 time_t io_time; 136 int status; 137 int fd; 138 int chunked; 139 int gzipped; 140 int keep_alive; 141 short events; 142 enum http_state state; 143 }; 144 145 LIST_HEAD(http_conn_list, http_connection); 146 147 struct http_request { 148 TAILQ_ENTRY(http_request) entry; 149 char *uri; 150 char *modified_since; 151 char *host; 152 char *port; 153 const char *path; /* points into uri */ 154 unsigned int id; 155 int outfd; 156 int redirect_loop; 157 }; 158 159 TAILQ_HEAD(http_req_queue, http_request); 160 161 static struct http_conn_list active = LIST_HEAD_INITIALIZER(active); 162 static struct http_conn_list idle = LIST_HEAD_INITIALIZER(idle); 163 static struct http_req_queue queue = TAILQ_HEAD_INITIALIZER(queue); 164 static unsigned int http_conn_count; 165 166 static struct msgbuf *msgq; 167 static struct sockaddr_storage http_bindaddr; 168 static struct tls_config *tls_config; 169 static uint8_t *tls_ca_mem; 170 static size_t tls_ca_size; 171 172 /* HTTP request API */ 173 static void http_req_new(unsigned int, char *, char *, int, int); 174 static void http_req_free(struct http_request *); 175 static void http_req_done(unsigned int, enum http_result, const char *); 176 static void http_req_fail(unsigned int); 177 static int http_req_schedule(struct http_request *); 178 179 /* HTTP decompression helper */ 180 static int http_inflate_new(struct http_connection *); 181 static void http_inflate_free(struct http_connection *); 182 static void http_inflate_done(struct http_connection *); 183 static int http_inflate_data(struct http_connection *); 184 static enum res http_inflate_advance(struct http_connection *); 185 186 /* HTTP connection API */ 187 static void http_new(struct http_request *); 188 static void http_free(struct http_connection *); 189 190 static enum res http_done(struct http_connection *, enum http_result); 191 static enum res http_failed(struct http_connection *); 192 193 /* HTTP connection FSM functions */ 194 static void http_do(struct http_connection *, 195 enum res (*)(struct http_connection *)); 196 197 /* These functions can be used with http_do() */ 198 static enum res http_connect(struct http_connection *); 199 static enum res http_request(struct http_connection *); 200 static enum res http_close(struct http_connection *); 201 static enum res http_handle(struct http_connection *); 202 203 /* Internal state functions used by the above functions */ 204 static enum res http_finish_connect(struct http_connection *); 205 static enum res proxy_connect(struct http_connection *); 206 static enum res http_tls_connect(struct http_connection *); 207 static enum res http_tls_handshake(struct http_connection *); 208 static enum res http_read(struct http_connection *); 209 static enum res http_write(struct http_connection *); 210 static enum res proxy_read(struct http_connection *); 211 static enum res proxy_write(struct http_connection *); 212 static enum res data_write(struct http_connection *); 213 static enum res data_inflate_write(struct http_connection *); 214 215 /* 216 * Return a string that can be used in error message to identify the 217 * connection. 218 */ 219 static const char * 220 http_info(const char *uri) 221 { 222 static char buf[80]; 223 224 if (strnvis(buf, uri, sizeof buf, VIS_SAFE) >= (int)sizeof buf) { 225 /* overflow, add indicator */ 226 memcpy(buf + sizeof buf - 4, "...", 4); 227 } 228 229 return buf; 230 } 231 232 /* 233 * Return IP address in presentation format. 234 */ 235 static const char * 236 ip_info(const struct http_connection *conn) 237 { 238 static char ipbuf[NI_MAXHOST]; 239 240 if (conn->res == NULL) 241 return "unknown"; 242 243 if (getnameinfo(conn->res->ai_addr, conn->res->ai_addrlen, ipbuf, 244 sizeof(ipbuf), NULL, 0, NI_NUMERICHOST) != 0) 245 return "unknown"; 246 247 return ipbuf; 248 } 249 250 static const char * 251 conn_info(const struct http_connection *conn) 252 { 253 static char buf[100 + NI_MAXHOST]; 254 const char *uri; 255 256 if (conn->req == NULL) 257 uri = conn->host; 258 else 259 uri = conn->req->uri; 260 261 snprintf(buf, sizeof(buf), "%s (%s)", http_info(uri), ip_info(conn)); 262 return buf; 263 } 264 265 /* 266 * Determine whether the character needs encoding, per RFC2396. 267 */ 268 static int 269 to_encode(const char *c0) 270 { 271 /* 2.4.3. Excluded US-ASCII Characters */ 272 const char *excluded_chars = 273 " " /* space */ 274 "<>#\"" /* delims (modulo "%", see below) */ 275 "{}|\\^[]`" /* unwise */ 276 ; 277 const unsigned char *c = (const unsigned char *)c0; 278 279 /* 280 * No corresponding graphic US-ASCII. 281 * Control characters and octets not used in US-ASCII. 282 */ 283 return (iscntrl(*c) || !isascii(*c) || 284 285 /* 286 * '%' is also reserved, if is not followed by two 287 * hexadecimal digits. 288 */ 289 strchr(excluded_chars, *c) != NULL || 290 (*c == '%' && (!isxdigit(c[1]) || !isxdigit(c[2])))); 291 } 292 293 /* 294 * Encode given URL, per RFC2396. 295 * Allocate and return string to the caller. 296 */ 297 static char * 298 url_encode(const char *path) 299 { 300 size_t i, length, new_length; 301 char *epath, *epathp; 302 303 length = new_length = strlen(path); 304 305 /* 306 * First pass: 307 * Count unsafe characters, and determine length of the 308 * final URL. 309 */ 310 for (i = 0; i < length; i++) 311 if (to_encode(path + i)) 312 new_length += 2; 313 314 epath = epathp = malloc(new_length + 1); /* One more for '\0'. */ 315 if (epath == NULL) 316 err(1, NULL); 317 318 /* 319 * Second pass: 320 * Encode, and copy final URL. 321 */ 322 for (i = 0; i < length; i++) 323 if (to_encode(path + i)) { 324 snprintf(epathp, 4, "%%" "%02x", 325 (unsigned char)path[i]); 326 epathp += 3; 327 } else 328 *(epathp++) = path[i]; 329 330 *epathp = '\0'; 331 return (epath); 332 } 333 334 static char 335 hextochar(const char *str) 336 { 337 unsigned char c, ret; 338 339 c = str[0]; 340 ret = c; 341 if (isalpha(c)) 342 ret -= isupper(c) ? 'A' - 10 : 'a' - 10; 343 else 344 ret -= '0'; 345 ret *= 16; 346 347 c = str[1]; 348 ret += c; 349 if (isalpha(c)) 350 ret -= isupper(c) ? 'A' - 10 : 'a' - 10; 351 else 352 ret -= '0'; 353 return ret; 354 } 355 356 static char * 357 url_decode(const char *str) 358 { 359 char *ret, c; 360 int i, reallen; 361 362 if (str == NULL) 363 return NULL; 364 if ((ret = malloc(strlen(str) + 1)) == NULL) 365 err(1, "Can't allocate memory for URL decoding"); 366 for (i = 0, reallen = 0; str[i] != '\0'; i++, reallen++, ret++) { 367 c = str[i]; 368 if (c == '+') { 369 *ret = ' '; 370 continue; 371 } 372 /* 373 * Cannot use strtol here because next char 374 * after %xx may be a digit. 375 */ 376 if (c == '%' && isxdigit((unsigned char)str[i + 1]) && 377 isxdigit((unsigned char)str[i + 2])) { 378 *ret = hextochar(&str[i + 1]); 379 i += 2; 380 continue; 381 } 382 *ret = c; 383 } 384 *ret = '\0'; 385 return ret - reallen; 386 } 387 388 static char * 389 recode_credentials(const char *userinfo) 390 { 391 char *ui, *creds; 392 size_t ulen; 393 394 /* url-decode the user and pass */ 395 ui = url_decode(userinfo); 396 397 ulen = strlen(ui); 398 if (base64_encode(ui, ulen, &creds) == -1) 399 errx(1, "error in base64 encoding"); 400 free(ui); 401 return (creds); 402 } 403 404 /* 405 * Parse a proxy URI and split it up into host, port and userinfo. 406 */ 407 static void 408 proxy_parse_uri(char *uri) 409 { 410 char *fullhost, *host, *port = NULL, *cred, *cookie = NULL; 411 412 if (uri == NULL) 413 return; 414 415 if (strncasecmp(uri, HTTP_PROTO, HTTP_PROTO_LEN) != 0) 416 errx(1, "%s: http_proxy not using http schema", http_info(uri)); 417 418 host = uri + HTTP_PROTO_LEN; 419 if ((fullhost = strndup(host, strcspn(host, "/"))) == NULL) 420 err(1, NULL); 421 422 cred = fullhost; 423 host = strchr(cred, '@'); 424 if (host != NULL) 425 *host++ = '\0'; 426 else { 427 host = cred; 428 cred = NULL; 429 } 430 431 if (*host == '[') { 432 char *hosttail; 433 434 if ((hosttail = strrchr(host, ']')) == NULL) 435 errx(1, "%s: unmatched opening bracket", 436 http_info(uri)); 437 if (hosttail[1] == '\0' || hosttail[1] == ':') 438 host++; 439 if (hosttail[1] == ':') 440 port = hosttail + 2; 441 *hosttail = '\0'; 442 } else { 443 if ((port = strrchr(host, ':')) != NULL) 444 *port++ = '\0'; 445 } 446 447 if (port == NULL) 448 port = "443"; 449 450 if (cred != NULL) { 451 if (strchr(cred, ':') == NULL) 452 errx(1, "%s: malformed proxy url", http_info(uri)); 453 cred = recode_credentials(cred); 454 if (asprintf(&cookie, "Proxy-Authorization: Basic %s\r\n", 455 cred) == -1) 456 err(1, NULL); 457 free(cred); 458 } else 459 if ((cookie = strdup("")) == NULL) 460 err(1, NULL); 461 462 if ((proxy.proxyhost = strdup(host)) == NULL) 463 err(1, NULL); 464 if ((proxy.proxyport = strdup(port)) == NULL) 465 err(1, NULL); 466 proxy.proxyauth = cookie; 467 468 free(fullhost); 469 } 470 471 /* 472 * Parse a URI and split it up into host, port and path. 473 * Does some basic URI validation. Both host and port need to be freed 474 * by the caller whereas path points into the uri. 475 */ 476 static int 477 http_parse_uri(char *uri, char **ohost, char **oport, char **opath) 478 { 479 char *host, *port = NULL, *path; 480 char *hosttail; 481 482 if (strncasecmp(uri, HTTPS_PROTO, HTTPS_PROTO_LEN) != 0) { 483 warnx("%s: not using https schema", http_info(uri)); 484 return -1; 485 } 486 host = uri + HTTPS_PROTO_LEN; 487 if ((path = strchr(host, '/')) == NULL) { 488 warnx("%s: missing https path", http_info(uri)); 489 return -1; 490 } 491 if (path - uri > INT_MAX - 1) { 492 warnx("%s: preposterous host length", http_info(uri)); 493 return -1; 494 } 495 496 if (memchr(host, '@', path - host) != NULL) { 497 warnx("%s: URI with userinfo not supported", http_info(uri)); 498 return -1; 499 } 500 501 if (*host == '[') { 502 if ((hosttail = memrchr(host, ']', path - host)) == NULL) { 503 warnx("%s: unmatched opening bracket", http_info(uri)); 504 return -1; 505 } 506 if (hosttail[1] == '/' || hosttail[1] == ':') 507 host++; 508 if (hosttail[1] == ':') 509 port = hosttail + 2; 510 } else { 511 if ((hosttail = memrchr(host, ':', path - host)) != NULL) 512 port = hosttail + 1; 513 else 514 hosttail = path; 515 } 516 517 if ((host = strndup(host, hosttail - host)) == NULL) 518 err(1, NULL); 519 if (port != NULL) { 520 if ((port = strndup(port, path - port)) == NULL) 521 err(1, NULL); 522 } else { 523 if ((port = strdup("443")) == NULL) 524 err(1, NULL); 525 } 526 /* do not include the initial / in path */ 527 path++; 528 529 *ohost = host; 530 *oport = port; 531 *opath = path; 532 533 return 0; 534 } 535 536 /* 537 * Lookup the IP addresses for host:port. 538 * Returns 0 on success and -1 on failure. 539 */ 540 static int 541 http_resolv(struct addrinfo **res, const char *host, const char *port) 542 { 543 struct addrinfo hints; 544 int error; 545 546 memset(&hints, 0, sizeof(hints)); 547 hints.ai_family = PF_UNSPEC; 548 hints.ai_socktype = SOCK_STREAM; 549 error = getaddrinfo(host, port, &hints, res); 550 /* 551 * If the services file is corrupt/missing, fall back 552 * on our hard-coded defines. 553 */ 554 if (error == EAI_SERVICE) 555 error = getaddrinfo(host, "443", &hints, res); 556 if (error != 0) { 557 warnx("%s: %s", host, gai_strerror(error)); 558 return -1; 559 } 560 561 return 0; 562 } 563 564 /* 565 * Create and queue a new request. 566 */ 567 static void 568 http_req_new(unsigned int id, char *uri, char *modified_since, int count, 569 int outfd) 570 { 571 struct http_request *req; 572 char *host, *port, *path; 573 574 if (http_parse_uri(uri, &host, &port, &path) == -1) { 575 free(uri); 576 free(modified_since); 577 close(outfd); 578 http_req_fail(id); 579 return; 580 } 581 582 if ((req = calloc(1, sizeof(*req))) == NULL) 583 err(1, NULL); 584 585 req->id = id; 586 req->outfd = outfd; 587 req->host = host; 588 req->port = port; 589 req->path = path; 590 req->uri = uri; 591 req->modified_since = modified_since; 592 req->redirect_loop = count; 593 594 TAILQ_INSERT_TAIL(&queue, req, entry); 595 } 596 597 /* 598 * Free a request, request is not allowed to be on the req queue. 599 */ 600 static void 601 http_req_free(struct http_request *req) 602 { 603 if (req == NULL) 604 return; 605 606 free(req->host); 607 free(req->port); 608 /* no need to free req->path it points into req->uri */ 609 free(req->uri); 610 free(req->modified_since); 611 612 if (req->outfd != -1) 613 close(req->outfd); 614 } 615 616 /* 617 * Enqueue request response 618 */ 619 static void 620 http_req_done(unsigned int id, enum http_result res, const char *last_modified) 621 { 622 struct ibuf *b; 623 624 b = io_new_buffer(); 625 io_simple_buffer(b, &id, sizeof(id)); 626 io_simple_buffer(b, &res, sizeof(res)); 627 io_str_buffer(b, last_modified); 628 io_close_buffer(msgq, b); 629 } 630 631 /* 632 * Enqueue request failure response 633 */ 634 static void 635 http_req_fail(unsigned int id) 636 { 637 struct ibuf *b; 638 enum http_result res = HTTP_FAILED; 639 640 b = io_new_buffer(); 641 io_simple_buffer(b, &id, sizeof(id)); 642 io_simple_buffer(b, &res, sizeof(res)); 643 io_str_buffer(b, NULL); 644 io_close_buffer(msgq, b); 645 } 646 647 /* 648 * Schedule new requests until maximum number of connections is reached. 649 * Try to reuse an idle connection if one exists that matches host and port. 650 */ 651 static int 652 http_req_schedule(struct http_request *req) 653 { 654 struct http_connection *conn; 655 656 TAILQ_REMOVE(&queue, req, entry); 657 658 /* check list of idle connections first */ 659 LIST_FOREACH(conn, &idle, entry) { 660 if (strcmp(conn->host, req->host) != 0) 661 continue; 662 if (strcmp(conn->port, req->port) != 0) 663 continue; 664 665 LIST_REMOVE(conn, entry); 666 LIST_INSERT_HEAD(&active, conn, entry); 667 668 /* use established connection */ 669 conn->req = req; 670 conn->idle_time = 0; 671 672 /* start request */ 673 http_do(conn, http_request); 674 if (conn->state == STATE_FREE) 675 http_free(conn); 676 return 1; 677 } 678 679 if (http_conn_count < MAX_HTTP_REQUESTS) { 680 http_new(req); 681 return 1; 682 } 683 684 /* no more slots free, requeue */ 685 TAILQ_INSERT_HEAD(&queue, req, entry); 686 return 0; 687 } 688 689 /* 690 * Allocate everything to allow inline decompression during write out. 691 * Returns 0 on success, -1 on failure. 692 */ 693 static int 694 http_inflate_new(struct http_connection *conn) 695 { 696 struct http_zlib *zctx; 697 698 if (conn->zlibctx != NULL) 699 return 0; 700 701 if ((zctx = calloc(1, sizeof(*zctx))) == NULL) 702 goto fail; 703 zctx->zbufsz = HTTP_BUF_SIZE; 704 if ((zctx->zbuf = malloc(zctx->zbufsz)) == NULL) 705 goto fail; 706 if (inflateInit2(&zctx->zs, MAX_WBITS + 32) != Z_OK) 707 goto fail; 708 conn->zlibctx = zctx; 709 return 0; 710 711 fail: 712 warnx("%s: decompression initialisation failed", conn_info(conn)); 713 if (zctx != NULL) 714 free(zctx->zbuf); 715 free(zctx); 716 return -1; 717 } 718 719 /* Free all memory used by the decompression API */ 720 static void 721 http_inflate_free(struct http_connection *conn) 722 { 723 if (conn->zlibctx == NULL) 724 return; 725 inflateEnd(&conn->zlibctx->zs); 726 free(conn->zlibctx->zbuf); 727 free(conn->zlibctx); 728 conn->zlibctx = NULL; 729 } 730 731 /* Reset the decompression state to allow a new request to use it */ 732 static void 733 http_inflate_done(struct http_connection *conn) 734 { 735 if (inflateReset(&conn->zlibctx->zs) != Z_OK) 736 http_inflate_free(conn); 737 } 738 739 /* 740 * Inflate the data from conn->buf into zctx->zbuf. The number of bytes 741 * available in zctx->zbuf is stored in zctx->zbufpos. 742 * Returns -1 on failure. 743 */ 744 static int 745 http_inflate_data(struct http_connection *conn) 746 { 747 struct http_zlib *zctx = conn->zlibctx; 748 size_t bsz = conn->bufpos; 749 int rv; 750 751 if (conn->iosz < bsz) 752 bsz = conn->iosz; 753 754 zctx->zdone = 0; 755 zctx->zbufpos = 0; 756 zctx->zinsz = bsz; 757 zctx->zs.next_in = conn->buf; 758 zctx->zs.avail_in = bsz; 759 zctx->zs.next_out = zctx->zbuf; 760 zctx->zs.avail_out = zctx->zbufsz; 761 762 switch ((rv = inflate(&zctx->zs, Z_NO_FLUSH))) { 763 case Z_OK: 764 break; 765 case Z_STREAM_END: 766 zctx->zdone = 1; 767 break; 768 default: 769 if (zctx->zs.msg != NULL) 770 warnx("%s: inflate failed: %s", conn_info(conn), 771 zctx->zs.msg); 772 else 773 warnx("%s: inflate failed error %d", conn_info(conn), 774 rv); 775 return -1; 776 } 777 778 /* calculate how much can be written out */ 779 zctx->zbufpos = zctx->zbufsz - zctx->zs.avail_out; 780 return 0; 781 } 782 783 /* 784 * Advance the input buffer after the output buffer has been fully written. 785 * If compression is done finish the transaction else read more data. 786 */ 787 static enum res 788 http_inflate_advance(struct http_connection *conn) 789 { 790 struct http_zlib *zctx = conn->zlibctx; 791 size_t bsz = zctx->zinsz - zctx->zs.avail_in; 792 793 /* adjust compressed input buffer */ 794 conn->bufpos -= bsz; 795 conn->iosz -= bsz; 796 memmove(conn->buf, conn->buf + bsz, conn->bufpos); 797 798 if (zctx->zdone) { 799 /* all compressed data processed */ 800 conn->gzipped = 0; 801 http_inflate_done(conn); 802 803 if (conn->iosz == 0) { 804 if (!conn->chunked) { 805 return http_done(conn, HTTP_OK); 806 } else { 807 conn->state = STATE_RESPONSE_CHUNKED_CRLF; 808 return http_read(conn); 809 } 810 } else { 811 warnx("%s: inflate extra data after end", 812 conn_info(conn)); 813 return http_failed(conn); 814 } 815 } 816 817 if (conn->chunked && conn->iosz == 0) 818 conn->state = STATE_RESPONSE_CHUNKED_CRLF; 819 else 820 conn->state = STATE_RESPONSE_DATA; 821 return http_read(conn); 822 } 823 824 /* 825 * Create a new HTTP connection which will be used for the HTTP request req. 826 * On errors a req failure is issued and both connection and request are freed. 827 */ 828 static void 829 http_new(struct http_request *req) 830 { 831 struct http_connection *conn; 832 833 if ((conn = calloc(1, sizeof(*conn))) == NULL) 834 err(1, NULL); 835 836 conn->fd = -1; 837 conn->req = req; 838 if ((conn->host = strdup(req->host)) == NULL) 839 err(1, NULL); 840 if ((conn->port = strdup(req->port)) == NULL) 841 err(1, NULL); 842 843 LIST_INSERT_HEAD(&active, conn, entry); 844 http_conn_count++; 845 846 if (proxy.proxyhost != NULL) { 847 if (http_resolv(&conn->res0, proxy.proxyhost, 848 proxy.proxyport) == -1) { 849 http_req_fail(req->id); 850 http_free(conn); 851 return; 852 } 853 } else { 854 if (http_resolv(&conn->res0, conn->host, conn->port) == -1) { 855 http_req_fail(req->id); 856 http_free(conn); 857 return; 858 } 859 } 860 861 /* connect and start request */ 862 http_do(conn, http_connect); 863 if (conn->state == STATE_FREE) 864 http_free(conn); 865 } 866 867 /* 868 * Free a no longer active connection, releasing all memory and closing 869 * any open file descriptor. 870 */ 871 static void 872 http_free(struct http_connection *conn) 873 { 874 assert(conn->state == STATE_FREE); 875 876 LIST_REMOVE(conn, entry); 877 http_conn_count--; 878 879 http_req_free(conn->req); 880 http_inflate_free(conn); 881 free(conn->host); 882 free(conn->port); 883 free(conn->last_modified); 884 free(conn->redir_uri); 885 free(conn->buf); 886 887 if (conn->res0 != NULL) 888 freeaddrinfo(conn->res0); 889 890 tls_free(conn->tls); 891 892 if (conn->fd != -1) 893 close(conn->fd); 894 free(conn); 895 } 896 897 /* 898 * Called when a request on this connection is finished. 899 * Move connection into idle state and onto idle queue. 900 * If there is a request connected to it send back a response 901 * with http_result res, else ignore the res. 902 */ 903 static enum res 904 http_done(struct http_connection *conn, enum http_result res) 905 { 906 assert(conn->bufpos == 0); 907 assert(conn->iosz == 0); 908 assert(conn->chunked == 0); 909 assert(conn->redir_uri == NULL); 910 911 if (conn->gzipped) { 912 conn->gzipped = 0; 913 http_inflate_done(conn); 914 } 915 916 conn->state = STATE_IDLE; 917 conn->idle_time = getmonotime() + HTTP_IDLE_TIMEOUT; 918 919 if (conn->req) { 920 http_req_done(conn->req->id, res, conn->last_modified); 921 http_req_free(conn->req); 922 conn->req = NULL; 923 } 924 925 if (!conn->keep_alive) 926 return http_close(conn); 927 928 LIST_REMOVE(conn, entry); 929 LIST_INSERT_HEAD(&idle, conn, entry); 930 931 /* reset status and keep-alive for good measures */ 932 conn->status = 0; 933 conn->keep_alive = 0; 934 935 return WANT_POLLIN; 936 } 937 938 /* 939 * Called in case of error, moves connection into free state. 940 * This will skip proper shutdown of the TLS session. 941 * If a request is pending fail and free the request. 942 */ 943 static enum res 944 http_failed(struct http_connection *conn) 945 { 946 conn->state = STATE_FREE; 947 948 if (conn->req) { 949 http_req_fail(conn->req->id); 950 http_req_free(conn->req); 951 conn->req = NULL; 952 } 953 954 return DONE; 955 } 956 957 /* 958 * Called in case of connect timeout, try an alternate connection. 959 */ 960 static enum res 961 http_connect_failed(struct http_connection *conn) 962 { 963 assert(conn->state == STATE_CONNECT); 964 close(conn->fd); 965 conn->fd = -1; 966 967 return http_connect(conn); 968 } 969 970 /* 971 * Call the function f and update the connection events based 972 * on the return value. 973 */ 974 static void 975 http_do(struct http_connection *conn, enum res (*f)(struct http_connection *)) 976 { 977 switch (f(conn)) { 978 case DONE: 979 conn->events = 0; 980 break; 981 case WANT_POLLIN: 982 conn->events = POLLIN; 983 break; 984 case WANT_POLLOUT: 985 conn->events = POLLOUT; 986 break; 987 default: 988 errx(1, "%s: unexpected function return", conn_info(conn)); 989 } 990 } 991 992 /* 993 * Connection successfully establish, initiate TLS handshake or proxy request. 994 */ 995 static enum res 996 http_connect_done(struct http_connection *conn) 997 { 998 if (proxy.proxyhost != NULL) 999 return proxy_connect(conn); 1000 return http_tls_connect(conn); 1001 } 1002 1003 /* 1004 * Start an asynchronous connect. 1005 */ 1006 static enum res 1007 http_connect(struct http_connection *conn) 1008 { 1009 const char *cause = NULL; 1010 struct addrinfo *res; 1011 1012 assert(conn->fd == -1); 1013 conn->state = STATE_CONNECT; 1014 1015 /* start the loop below with first or next address */ 1016 if (conn->res == NULL) 1017 conn->res = conn->res0; 1018 else 1019 conn->res = conn->res->ai_next; 1020 for (; conn->res != NULL; conn->res = conn->res->ai_next) { 1021 int fd, save_errno; 1022 1023 res = conn->res; 1024 fd = socket(res->ai_family, 1025 res->ai_socktype | SOCK_NONBLOCK, res->ai_protocol); 1026 if (fd == -1) { 1027 cause = "socket"; 1028 continue; 1029 } 1030 conn->fd = fd; 1031 1032 if (http_bindaddr.ss_family == res->ai_family) { 1033 if (bind(conn->fd, (struct sockaddr *)&http_bindaddr, 1034 res->ai_addrlen) == -1) { 1035 save_errno = errno; 1036 close(conn->fd); 1037 conn->fd = -1; 1038 errno = save_errno; 1039 cause = "bind"; 1040 continue; 1041 } 1042 } 1043 1044 if (connect(conn->fd, res->ai_addr, res->ai_addrlen) == -1) { 1045 if (errno == EINPROGRESS) { 1046 /* wait for async connect to finish. */ 1047 return WANT_POLLOUT; 1048 } else { 1049 save_errno = errno; 1050 close(conn->fd); 1051 conn->fd = -1; 1052 errno = save_errno; 1053 cause = "connect"; 1054 continue; 1055 } 1056 } 1057 1058 break; /* okay we got one */ 1059 } 1060 1061 if (conn->fd == -1) { 1062 if (cause != NULL) { 1063 conn->res = res; 1064 warn("%s: %s", conn_info(conn), cause); 1065 } 1066 return http_failed(conn); 1067 } 1068 1069 return http_connect_done(conn); 1070 } 1071 1072 /* 1073 * Called once an asynchronous connect request finished. 1074 */ 1075 static enum res 1076 http_finish_connect(struct http_connection *conn) 1077 { 1078 int error = 0; 1079 socklen_t len; 1080 1081 len = sizeof(error); 1082 if (getsockopt(conn->fd, SOL_SOCKET, SO_ERROR, &error, &len) == -1) { 1083 warn("%s: getsockopt SO_ERROR", conn_info(conn)); 1084 return http_connect_failed(conn); 1085 } 1086 if (error != 0) { 1087 errno = error; 1088 warn("%s: connect", conn_info(conn)); 1089 return http_connect_failed(conn); 1090 } 1091 1092 return http_connect_done(conn); 1093 } 1094 1095 /* 1096 * Initiate TLS session on a new connection. 1097 */ 1098 static enum res 1099 http_tls_connect(struct http_connection *conn) 1100 { 1101 assert(conn->state == STATE_CONNECT); 1102 conn->state = STATE_TLSCONNECT; 1103 1104 if ((conn->tls = tls_client()) == NULL) { 1105 warn("tls_client"); 1106 return http_failed(conn); 1107 } 1108 if (tls_configure(conn->tls, tls_config) == -1) { 1109 warnx("%s: TLS configuration: %s", conn_info(conn), 1110 tls_error(conn->tls)); 1111 return http_failed(conn); 1112 } 1113 if (tls_connect_socket(conn->tls, conn->fd, conn->host) == -1) { 1114 warnx("%s: TLS connect: %s", conn_info(conn), 1115 tls_error(conn->tls)); 1116 return http_failed(conn); 1117 } 1118 1119 return http_tls_handshake(conn); 1120 } 1121 1122 /* 1123 * Do the tls_handshake and then send out the HTTP request. 1124 */ 1125 static enum res 1126 http_tls_handshake(struct http_connection *conn) 1127 { 1128 switch (tls_handshake(conn->tls)) { 1129 case -1: 1130 warnx("%s: TLS handshake: %s", conn_info(conn), 1131 tls_error(conn->tls)); 1132 return http_failed(conn); 1133 case TLS_WANT_POLLIN: 1134 return WANT_POLLIN; 1135 case TLS_WANT_POLLOUT: 1136 return WANT_POLLOUT; 1137 } 1138 1139 return http_request(conn); 1140 } 1141 1142 static enum res 1143 proxy_connect(struct http_connection *conn) 1144 { 1145 char *host; 1146 int r; 1147 1148 assert(conn->state == STATE_CONNECT); 1149 conn->state = STATE_PROXY_REQUEST; 1150 1151 /* Construct the Host header from host and port info */ 1152 if (strchr(conn->host, ':')) { 1153 if (asprintf(&host, "[%s]:%s", conn->host, conn->port) == -1) 1154 err(1, NULL); 1155 1156 } else { 1157 if (asprintf(&host, "%s:%s", conn->host, conn->port) == -1) 1158 err(1, NULL); 1159 } 1160 1161 free(conn->buf); 1162 conn->bufpos = 0; 1163 /* XXX handle auth */ 1164 if ((r = asprintf(&conn->buf, "CONNECT %s HTTP/1.1\r\n" 1165 "Host: %s\r\n" 1166 "User-Agent: " HTTP_USER_AGENT "\r\n%s\r\n", host, host, 1167 proxy.proxyauth)) == -1) 1168 err(1, NULL); 1169 conn->bufsz = r; 1170 1171 free(host); 1172 1173 return proxy_write(conn); 1174 } 1175 1176 /* 1177 * Build the HTTP request and send it out. 1178 */ 1179 static enum res 1180 http_request(struct http_connection *conn) 1181 { 1182 char *host, *epath, *modified_since; 1183 int r, with_port = 0; 1184 1185 assert(conn->state == STATE_IDLE || conn->state == STATE_TLSCONNECT); 1186 conn->state = STATE_REQUEST; 1187 1188 /* 1189 * Send port number only if it's specified and does not equal 1190 * the default. Some broken HTTP servers get confused if you explicitly 1191 * send them the port number. 1192 */ 1193 if (strcmp(conn->port, "443") != 0) 1194 with_port = 1; 1195 1196 /* Construct the Host header from host and port info */ 1197 if (strchr(conn->host, ':')) { 1198 if (asprintf(&host, "[%s]%s%s", conn->host, 1199 with_port ? ":" : "", with_port ? conn->port : "") == -1) 1200 err(1, NULL); 1201 1202 } else { 1203 if (asprintf(&host, "%s%s%s", conn->host, 1204 with_port ? ":" : "", with_port ? conn->port : "") == -1) 1205 err(1, NULL); 1206 } 1207 1208 /* 1209 * Construct and send the request. Proxy requests don't want leading /. 1210 */ 1211 epath = url_encode(conn->req->path); 1212 1213 modified_since = NULL; 1214 if (conn->req->modified_since != NULL) { 1215 if (asprintf(&modified_since, "If-Modified-Since: %s\r\n", 1216 conn->req->modified_since) == -1) 1217 err(1, NULL); 1218 } 1219 1220 free(conn->buf); 1221 conn->bufpos = 0; 1222 if ((r = asprintf(&conn->buf, 1223 "GET /%s HTTP/1.1\r\n" 1224 "Host: %s\r\n" 1225 "Accept: */*\r\n" 1226 "Accept-Encoding: gzip, deflate\r\n" 1227 "User-Agent: " HTTP_USER_AGENT "\r\n" 1228 "%s\r\n", 1229 epath, host, 1230 modified_since ? modified_since : "")) == -1) 1231 err(1, NULL); 1232 conn->bufsz = r; 1233 1234 free(epath); 1235 free(host); 1236 free(modified_since); 1237 1238 return http_write(conn); 1239 } 1240 1241 /* 1242 * Parse the HTTP status line. 1243 * Return 0 for status codes 100, 103, 200, 203, 301-304, 307-308. 1244 * The other 1xx and 2xx status codes are explicitly not handled and are 1245 * considered an error. 1246 * Failure codes and other errors return -1. 1247 * The redirect loop limit is enforced here. 1248 */ 1249 static int 1250 http_parse_status(struct http_connection *conn, char *buf) 1251 { 1252 #define HTTP_11 "HTTP/1.1 " 1253 const char *errstr; 1254 char *cp, ststr[4]; 1255 char gerror[200]; 1256 int status; 1257 1258 /* Check if the protocol is 1.1 and enable keep-alive in that case */ 1259 if (strncmp(buf, HTTP_11, strlen(HTTP_11)) == 0) 1260 conn->keep_alive = 1; 1261 1262 cp = strchr(buf, ' '); 1263 if (cp == NULL) { 1264 warnx("Improper response from %s", conn_info(conn)); 1265 return -1; 1266 } else 1267 cp++; 1268 1269 strlcpy(ststr, cp, sizeof(ststr)); 1270 status = strtonum(ststr, 100, 599, &errstr); 1271 if (errstr != NULL) { 1272 strnvis(gerror, cp, sizeof gerror, VIS_SAFE); 1273 warnx("Error retrieving %s: %s", conn_info(conn), 1274 gerror); 1275 return -1; 1276 } 1277 1278 switch (status) { 1279 case 301: /* Redirect: moved permanently */ 1280 case 302: /* Redirect: found / moved temporarily */ 1281 case 303: /* Redirect: see other */ 1282 case 307: /* Redirect: temporary redirect */ 1283 case 308: /* Redirect: permanent redirect */ 1284 if (conn->req->redirect_loop++ > 10) { 1285 warnx("%s: Too many redirections requested", 1286 conn_info(conn)); 1287 return -1; 1288 } 1289 /* FALLTHROUGH */ 1290 case 100: /* Informational: continue (ignored) */ 1291 case 103: /* Informational: early hints (ignored) */ 1292 /* FALLTHROUGH */ 1293 case 200: /* Success: OK */ 1294 case 203: /* Success: non-authoritative information (proxy) */ 1295 case 304: /* Redirect: not modified */ 1296 conn->status = status; 1297 break; 1298 default: 1299 strnvis(gerror, cp, sizeof gerror, VIS_SAFE); 1300 warnx("Error retrieving %s: %s", conn_info(conn), 1301 gerror); 1302 return -1; 1303 } 1304 1305 return 0; 1306 } 1307 1308 /* 1309 * Returns true if the connection status is any of the redirect codes. 1310 */ 1311 static inline int 1312 http_isredirect(struct http_connection *conn) 1313 { 1314 if ((conn->status >= 301 && conn->status <= 303) || 1315 conn->status == 307 || conn->status == 308) 1316 return 1; 1317 return 0; 1318 } 1319 1320 static inline int 1321 http_isok(struct http_connection *conn) 1322 { 1323 if (conn->status >= 200 && conn->status < 300) 1324 return 1; 1325 return 0; 1326 } 1327 1328 static void 1329 http_redirect(struct http_connection *conn) 1330 { 1331 char *uri, *mod_since = NULL; 1332 int outfd; 1333 1334 /* move uri and fd out for new request */ 1335 outfd = conn->req->outfd; 1336 conn->req->outfd = -1; 1337 1338 uri = conn->redir_uri; 1339 conn->redir_uri = NULL; 1340 1341 if (conn->req->modified_since) 1342 if ((mod_since = strdup(conn->req->modified_since)) == NULL) 1343 err(1, NULL); 1344 1345 logx("redirect to %s", http_info(uri)); 1346 http_req_new(conn->req->id, uri, mod_since, conn->req->redirect_loop, 1347 outfd); 1348 1349 /* clear request before moving connection to idle */ 1350 http_req_free(conn->req); 1351 conn->req = NULL; 1352 } 1353 1354 static int 1355 http_parse_header(struct http_connection *conn, char *buf) 1356 { 1357 #define CONTENTLEN "Content-Length:" 1358 #define LOCATION "Location:" 1359 #define CONNECTION "Connection:" 1360 #define TRANSFER_ENCODING "Transfer-Encoding:" 1361 #define CONTENT_ENCODING "Content-Encoding:" 1362 #define LAST_MODIFIED "Last-Modified:" 1363 const char *errstr; 1364 char *cp, *redirurl; 1365 char *locbase, *loctail; 1366 1367 cp = buf; 1368 /* empty line, end of header */ 1369 if (*cp == '\0') 1370 return 0; 1371 else if (strncasecmp(cp, CONTENTLEN, sizeof(CONTENTLEN) - 1) == 0) { 1372 cp += sizeof(CONTENTLEN) - 1; 1373 cp += strspn(cp, " \t"); 1374 conn->iosz = strtonum(cp, 0, MAX_CONTENTLEN, &errstr); 1375 if (errstr != NULL) { 1376 warnx("Content-Length of %s is %s", 1377 conn_info(conn), errstr); 1378 return -1; 1379 } 1380 } else if (http_isredirect(conn) && 1381 strncasecmp(cp, LOCATION, sizeof(LOCATION) - 1) == 0) { 1382 cp += sizeof(LOCATION) - 1; 1383 cp += strspn(cp, " \t"); 1384 /* 1385 * If there is a colon before the first slash, this URI 1386 * is not relative. RFC 3986 4.2 1387 */ 1388 if (cp[strcspn(cp, ":/")] != ':') { 1389 /* XXX doesn't handle protocol-relative URIs */ 1390 if (*cp == '/') { 1391 locbase = NULL; 1392 cp++; 1393 } else { 1394 locbase = strdup(conn->req->path); 1395 if (locbase == NULL) 1396 err(1, NULL); 1397 loctail = strchr(locbase, '#'); 1398 if (loctail != NULL) 1399 *loctail = '\0'; 1400 loctail = strchr(locbase, '?'); 1401 if (loctail != NULL) 1402 *loctail = '\0'; 1403 loctail = strrchr(locbase, '/'); 1404 if (loctail == NULL) { 1405 free(locbase); 1406 locbase = NULL; 1407 } else 1408 loctail[1] = '\0'; 1409 } 1410 /* Construct URL from relative redirect */ 1411 if (asprintf(&redirurl, "%.*s/%s%s", 1412 (int)(conn->req->path - conn->req->uri), 1413 conn->req->uri, locbase ? locbase : "", cp) == -1) 1414 err(1, "Cannot build redirect URL"); 1415 free(locbase); 1416 } else if ((redirurl = strdup(cp)) == NULL) 1417 err(1, "Cannot build redirect URL"); 1418 loctail = strchr(redirurl, '#'); 1419 if (loctail != NULL) 1420 *loctail = '\0'; 1421 conn->redir_uri = redirurl; 1422 if (!valid_origin(redirurl, conn->req->uri)) { 1423 warnx("%s: cross origin redirect to %s", conn->req->uri, 1424 http_info(redirurl)); 1425 return -1; 1426 } 1427 } else if (strncasecmp(cp, TRANSFER_ENCODING, 1428 sizeof(TRANSFER_ENCODING) - 1) == 0) { 1429 cp += sizeof(TRANSFER_ENCODING) - 1; 1430 cp += strspn(cp, " \t"); 1431 if (strcasecmp(cp, "chunked") == 0) 1432 conn->chunked = 1; 1433 } else if (strncasecmp(cp, CONTENT_ENCODING, 1434 sizeof(CONTENT_ENCODING) - 1) == 0) { 1435 cp += sizeof(CONTENT_ENCODING) - 1; 1436 cp += strspn(cp, " \t"); 1437 if (strcasecmp(cp, "gzip") == 0 || 1438 strcasecmp(cp, "deflate") == 0) { 1439 if (http_inflate_new(conn) == -1) 1440 return -1; 1441 conn->gzipped = 1; 1442 } 1443 } else if (strncasecmp(cp, CONNECTION, sizeof(CONNECTION) - 1) == 0) { 1444 cp += sizeof(CONNECTION) - 1; 1445 cp += strspn(cp, " \t"); 1446 if (strcasecmp(cp, "close") == 0) 1447 conn->keep_alive = 0; 1448 else if (strcasecmp(cp, "keep-alive") == 0) 1449 conn->keep_alive = 1; 1450 } else if (strncasecmp(cp, LAST_MODIFIED, 1451 sizeof(LAST_MODIFIED) - 1) == 0) { 1452 cp += sizeof(LAST_MODIFIED) - 1; 1453 cp += strspn(cp, " \t"); 1454 free(conn->last_modified); 1455 if ((conn->last_modified = strdup(cp)) == NULL) 1456 err(1, NULL); 1457 } 1458 1459 return 1; 1460 } 1461 1462 /* 1463 * Return one line from the HTTP response. 1464 * The line returned has any possible '\r' and '\n' at the end stripped. 1465 * The buffer is advanced to the start of the next line. 1466 * If there is currently no full line in the buffer NULL is returned. 1467 */ 1468 static char * 1469 http_get_line(struct http_connection *conn) 1470 { 1471 char *end, *line; 1472 size_t len; 1473 1474 end = memchr(conn->buf, '\n', conn->bufpos); 1475 if (end == NULL) 1476 return NULL; 1477 1478 len = end - conn->buf; 1479 while (len > 0 && (conn->buf[len - 1] == '\r' || 1480 conn->buf[len - 1] == ' ' || conn->buf[len - 1] == '\t')) 1481 --len; 1482 1483 if ((line = strndup(conn->buf, len)) == NULL) 1484 err(1, NULL); 1485 1486 /* consume line including \n */ 1487 end++; 1488 conn->bufpos -= end - conn->buf; 1489 memmove(conn->buf, end, conn->bufpos); 1490 1491 return line; 1492 } 1493 1494 /* 1495 * Parse the header between data chunks during chunked transfers. 1496 * Returns 0 if a new chunk size could be correctly read. 1497 * If the chuck size could not be converted properly -1 is returned. 1498 */ 1499 static int 1500 http_parse_chunked(struct http_connection *conn, char *buf) 1501 { 1502 char *header = buf; 1503 char *end; 1504 unsigned long chunksize; 1505 1506 /* strip any optional chunk extension */ 1507 header[strcspn(header, "; \t")] = '\0'; 1508 errno = 0; 1509 chunksize = strtoul(header, &end, 16); 1510 if (header[0] == '\0' || *end != '\0' || (errno == ERANGE && 1511 chunksize == ULONG_MAX) || chunksize > MAX_CONTENTLEN) 1512 return -1; 1513 1514 conn->iosz = chunksize; 1515 return 0; 1516 } 1517 1518 static enum res 1519 http_read(struct http_connection *conn) 1520 { 1521 ssize_t s; 1522 char *buf; 1523 int done; 1524 1525 if (conn->bufpos > 0) 1526 goto again; 1527 1528 read_more: 1529 s = tls_read(conn->tls, conn->buf + conn->bufpos, 1530 conn->bufsz - conn->bufpos); 1531 if (s == -1) { 1532 warnx("%s: TLS read: %s", conn_info(conn), 1533 tls_error(conn->tls)); 1534 return http_failed(conn); 1535 } else if (s == TLS_WANT_POLLIN) { 1536 return WANT_POLLIN; 1537 } else if (s == TLS_WANT_POLLOUT) { 1538 return WANT_POLLOUT; 1539 } 1540 1541 if (s == 0) { 1542 if (conn->req) 1543 warnx("%s: short read, connection closed", 1544 conn_info(conn)); 1545 return http_failed(conn); 1546 } 1547 1548 conn->bufpos += s; 1549 1550 again: 1551 switch (conn->state) { 1552 case STATE_PROXY_STATUS: 1553 buf = http_get_line(conn); 1554 if (buf == NULL) 1555 goto read_more; 1556 if (http_parse_status(conn, buf) == -1) { 1557 free(buf); 1558 return http_failed(conn); 1559 } 1560 free(buf); 1561 conn->state = STATE_PROXY_RESPONSE; 1562 goto again; 1563 case STATE_PROXY_RESPONSE: 1564 while (1) { 1565 buf = http_get_line(conn); 1566 if (buf == NULL) 1567 goto read_more; 1568 /* empty line, end of header */ 1569 if (*buf == '\0') { 1570 free(buf); 1571 break; 1572 } 1573 free(buf); 1574 } 1575 /* proxy is ready to take connection */ 1576 if (conn->status == 200) { 1577 conn->state = STATE_CONNECT; 1578 return http_tls_connect(conn); 1579 } 1580 return http_failed(conn); 1581 case STATE_RESPONSE_STATUS: 1582 buf = http_get_line(conn); 1583 if (buf == NULL) 1584 goto read_more; 1585 1586 if (http_parse_status(conn, buf) == -1) { 1587 free(buf); 1588 return http_failed(conn); 1589 } 1590 free(buf); 1591 conn->state = STATE_RESPONSE_HEADER; 1592 goto again; 1593 case STATE_RESPONSE_HEADER: 1594 done = 0; 1595 while (!done) { 1596 int rv; 1597 1598 buf = http_get_line(conn); 1599 if (buf == NULL) 1600 goto read_more; 1601 1602 rv = http_parse_header(conn, buf); 1603 free(buf); 1604 1605 if (rv == -1) 1606 return http_failed(conn); 1607 if (rv == 0) 1608 done = 1; 1609 } 1610 1611 /* Check status header and decide what to do next */ 1612 if (http_isok(conn) || http_isredirect(conn)) { 1613 if (http_isredirect(conn)) 1614 http_redirect(conn); 1615 1616 conn->totalsz = 0; 1617 if (conn->chunked) 1618 conn->state = STATE_RESPONSE_CHUNKED_HEADER; 1619 else 1620 conn->state = STATE_RESPONSE_DATA; 1621 goto again; 1622 } else if (conn->status == 100 || conn->status == 103) { 1623 conn->state = STATE_RESPONSE_STATUS; 1624 } else if (conn->status == 304) { 1625 return http_done(conn, HTTP_NOT_MOD); 1626 } 1627 1628 return http_failed(conn); 1629 case STATE_RESPONSE_DATA: 1630 if (conn->bufpos != conn->bufsz && 1631 conn->iosz > conn->bufpos) 1632 goto read_more; 1633 1634 /* got a buffer full of data */ 1635 if (conn->req == NULL) { 1636 /* 1637 * After redirects all data needs to be discarded. 1638 */ 1639 if (conn->iosz < conn->bufpos) { 1640 conn->bufpos -= conn->iosz; 1641 conn->iosz = 0; 1642 } else { 1643 conn->iosz -= conn->bufpos; 1644 conn->bufpos = 0; 1645 } 1646 if (conn->chunked) 1647 conn->state = STATE_RESPONSE_CHUNKED_CRLF; 1648 else 1649 conn->state = STATE_RESPONSE_DATA; 1650 goto read_more; 1651 } 1652 1653 conn->state = STATE_WRITE_DATA; 1654 return WANT_POLLOUT; 1655 case STATE_RESPONSE_CHUNKED_HEADER: 1656 assert(conn->iosz == 0); 1657 1658 buf = http_get_line(conn); 1659 if (buf == NULL) 1660 goto read_more; 1661 if (http_parse_chunked(conn, buf) != 0) { 1662 warnx("%s: bad chunk encoding", conn_info(conn)); 1663 free(buf); 1664 return http_failed(conn); 1665 } 1666 free(buf); 1667 1668 /* 1669 * check if transfer is done, in which case the last trailer 1670 * still needs to be processed. 1671 */ 1672 if (conn->iosz == 0) 1673 conn->state = STATE_RESPONSE_CHUNKED_TRAILER; 1674 else 1675 conn->state = STATE_RESPONSE_DATA; 1676 goto again; 1677 case STATE_RESPONSE_CHUNKED_CRLF: 1678 buf = http_get_line(conn); 1679 if (buf == NULL) 1680 goto read_more; 1681 /* expect empty line to finish a chunk of data */ 1682 if (*buf != '\0') { 1683 warnx("%s: bad chunk encoding", conn_info(conn)); 1684 free(buf); 1685 return http_failed(conn); 1686 } 1687 free(buf); 1688 conn->state = STATE_RESPONSE_CHUNKED_HEADER; 1689 goto again; 1690 case STATE_RESPONSE_CHUNKED_TRAILER: 1691 buf = http_get_line(conn); 1692 if (buf == NULL) 1693 goto read_more; 1694 /* the trailer may include extra headers, just ignore them */ 1695 if (*buf != '\0') { 1696 free(buf); 1697 goto again; 1698 } 1699 free(buf); 1700 conn->chunked = 0; 1701 return http_done(conn, HTTP_OK); 1702 default: 1703 errx(1, "unexpected http state"); 1704 } 1705 } 1706 1707 /* 1708 * Send out the HTTP request. When done, replace buffer with the read buffer. 1709 */ 1710 static enum res 1711 http_write(struct http_connection *conn) 1712 { 1713 ssize_t s; 1714 1715 assert(conn->state == STATE_REQUEST); 1716 1717 while (conn->bufpos < conn->bufsz) { 1718 s = tls_write(conn->tls, conn->buf + conn->bufpos, 1719 conn->bufsz - conn->bufpos); 1720 if (s == -1) { 1721 warnx("%s: TLS write: %s", conn_info(conn), 1722 tls_error(conn->tls)); 1723 return http_failed(conn); 1724 } else if (s == TLS_WANT_POLLIN) { 1725 return WANT_POLLIN; 1726 } else if (s == TLS_WANT_POLLOUT) { 1727 return WANT_POLLOUT; 1728 } 1729 1730 conn->bufpos += s; 1731 } 1732 1733 /* done writing, first thing we need the status */ 1734 conn->state = STATE_RESPONSE_STATUS; 1735 1736 /* free write buffer and allocate the read buffer */ 1737 free(conn->buf); 1738 conn->bufpos = 0; 1739 conn->bufsz = HTTP_BUF_SIZE; 1740 if ((conn->buf = malloc(conn->bufsz)) == NULL) 1741 err(1, NULL); 1742 1743 return http_read(conn); 1744 } 1745 1746 static enum res 1747 proxy_read(struct http_connection *conn) 1748 { 1749 ssize_t s; 1750 char *buf; 1751 int done; 1752 1753 s = read(conn->fd, conn->buf + conn->bufpos, 1754 conn->bufsz - conn->bufpos); 1755 if (s == -1) { 1756 warn("%s: read", conn_info(conn)); 1757 return http_failed(conn); 1758 } 1759 1760 if (s == 0) { 1761 if (conn->req) 1762 warnx("%s: short read, connection closed", 1763 conn_info(conn)); 1764 return http_failed(conn); 1765 } 1766 1767 conn->bufpos += s; 1768 1769 again: 1770 switch (conn->state) { 1771 case STATE_PROXY_STATUS: 1772 buf = http_get_line(conn); 1773 if (buf == NULL) 1774 return WANT_POLLIN; 1775 if (http_parse_status(conn, buf) == -1) { 1776 free(buf); 1777 return http_failed(conn); 1778 } 1779 free(buf); 1780 conn->state = STATE_PROXY_RESPONSE; 1781 goto again; 1782 case STATE_PROXY_RESPONSE: 1783 done = 0; 1784 while (!done) { 1785 buf = http_get_line(conn); 1786 if (buf == NULL) 1787 return WANT_POLLIN; 1788 /* empty line, end of header */ 1789 if (*buf == '\0') 1790 done = 1; 1791 free(buf); 1792 } 1793 /* proxy is ready, connect to remote */ 1794 if (conn->status == 200) { 1795 conn->state = STATE_CONNECT; 1796 return http_tls_connect(conn); 1797 } 1798 return http_failed(conn); 1799 default: 1800 errx(1, "unexpected http state"); 1801 } 1802 } 1803 1804 /* 1805 * Send out the proxy request. When done, replace buffer with the read buffer. 1806 */ 1807 static enum res 1808 proxy_write(struct http_connection *conn) 1809 { 1810 ssize_t s; 1811 1812 assert(conn->state == STATE_PROXY_REQUEST); 1813 1814 s = write(conn->fd, conn->buf + conn->bufpos, 1815 conn->bufsz - conn->bufpos); 1816 if (s == -1) { 1817 warn("%s: write", conn_info(conn)); 1818 return http_failed(conn); 1819 } 1820 conn->bufpos += s; 1821 if (conn->bufpos < conn->bufsz) 1822 return WANT_POLLOUT; 1823 1824 /* done writing, first thing we need the status */ 1825 conn->state = STATE_PROXY_STATUS; 1826 1827 /* free write buffer and allocate the read buffer */ 1828 free(conn->buf); 1829 conn->bufpos = 0; 1830 conn->bufsz = HTTP_BUF_SIZE; 1831 if ((conn->buf = malloc(conn->bufsz)) == NULL) 1832 err(1, NULL); 1833 1834 return WANT_POLLIN; 1835 } 1836 1837 /* 1838 * Properly shutdown the TLS session else move connection into free state. 1839 */ 1840 static enum res 1841 http_close(struct http_connection *conn) 1842 { 1843 assert(conn->state == STATE_IDLE || conn->state == STATE_CLOSE); 1844 1845 conn->state = STATE_CLOSE; 1846 LIST_REMOVE(conn, entry); 1847 LIST_INSERT_HEAD(&active, conn, entry); 1848 1849 if (conn->tls != NULL) { 1850 switch (tls_close(conn->tls)) { 1851 case TLS_WANT_POLLIN: 1852 return WANT_POLLIN; 1853 case TLS_WANT_POLLOUT: 1854 return WANT_POLLOUT; 1855 case 0: 1856 case -1: 1857 break; 1858 } 1859 } 1860 1861 conn->state = STATE_FREE; 1862 return DONE; 1863 } 1864 1865 /* 1866 * Write data into provided file descriptor. If all data got written 1867 * the connection may change into idle state. 1868 */ 1869 static enum res 1870 data_write(struct http_connection *conn) 1871 { 1872 ssize_t s; 1873 size_t bsz = conn->bufpos; 1874 1875 assert(conn->state == STATE_WRITE_DATA); 1876 1877 if (conn->iosz < bsz) 1878 bsz = conn->iosz; 1879 1880 s = write(conn->req->outfd, conn->buf, bsz); 1881 if (s == -1) { 1882 warn("%s: data write", conn_info(conn)); 1883 return http_failed(conn); 1884 } 1885 1886 conn->totalsz += s; 1887 if (conn->totalsz > MAX_CONTENTLEN) { 1888 warn("%s: too much data offered", conn_info(conn)); 1889 return http_failed(conn); 1890 } 1891 1892 conn->bufpos -= s; 1893 conn->iosz -= s; 1894 memmove(conn->buf, conn->buf + s, conn->bufpos); 1895 1896 /* check if regular file transfer is finished */ 1897 if (!conn->chunked && conn->iosz == 0) 1898 return http_done(conn, HTTP_OK); 1899 1900 /* all data written, switch back to read */ 1901 if (conn->bufpos == 0 || conn->iosz == 0) { 1902 if (conn->chunked && conn->iosz == 0) 1903 conn->state = STATE_RESPONSE_CHUNKED_CRLF; 1904 else 1905 conn->state = STATE_RESPONSE_DATA; 1906 return http_read(conn); 1907 } 1908 1909 /* still more data to write in buffer */ 1910 return WANT_POLLOUT; 1911 } 1912 1913 /* 1914 * Inflate and write data into provided file descriptor. 1915 * This is a simplified version of data_write() that just writes out the 1916 * decompressed file stream. All the buffer handling is done by 1917 * http_inflate_data() and http_inflate_advance(). 1918 */ 1919 static enum res 1920 data_inflate_write(struct http_connection *conn) 1921 { 1922 struct http_zlib *zctx = conn->zlibctx; 1923 ssize_t s; 1924 1925 assert(conn->state == STATE_WRITE_DATA); 1926 1927 /* no decompressed data, get more */ 1928 if (zctx->zbufpos == 0) 1929 if (http_inflate_data(conn) == -1) 1930 return http_failed(conn); 1931 1932 s = write(conn->req->outfd, zctx->zbuf, zctx->zbufpos); 1933 if (s == -1) { 1934 warn("%s: data write", conn_info(conn)); 1935 return http_failed(conn); 1936 } 1937 1938 conn->totalsz += s; 1939 if (conn->totalsz > MAX_CONTENTLEN) { 1940 warn("%s: too much decompressed data offered", conn_info(conn)); 1941 return http_failed(conn); 1942 } 1943 1944 /* adjust output buffer */ 1945 zctx->zbufpos -= s; 1946 memmove(zctx->zbuf, zctx->zbuf + s, zctx->zbufpos); 1947 1948 /* all decompressed data written, progress input */ 1949 if (zctx->zbufpos == 0) 1950 return http_inflate_advance(conn); 1951 1952 /* still more data to write in buffer */ 1953 return WANT_POLLOUT; 1954 } 1955 1956 /* 1957 * Do one IO call depending on the connection state. 1958 * Return WANT_POLLIN or WANT_POLLOUT to poll for more data. 1959 * If 0 is returned this stage is finished and the protocol should move 1960 * to the next stage by calling http_nextstep(). On error return -1. 1961 */ 1962 static enum res 1963 http_handle(struct http_connection *conn) 1964 { 1965 assert(conn->pfd != NULL && conn->pfd->revents != 0); 1966 1967 conn->io_time = 0; 1968 1969 switch (conn->state) { 1970 case STATE_CONNECT: 1971 return http_finish_connect(conn); 1972 case STATE_TLSCONNECT: 1973 return http_tls_handshake(conn); 1974 case STATE_REQUEST: 1975 return http_write(conn); 1976 case STATE_PROXY_REQUEST: 1977 return proxy_write(conn); 1978 case STATE_PROXY_STATUS: 1979 case STATE_PROXY_RESPONSE: 1980 return proxy_read(conn); 1981 case STATE_RESPONSE_STATUS: 1982 case STATE_RESPONSE_HEADER: 1983 case STATE_RESPONSE_DATA: 1984 case STATE_RESPONSE_CHUNKED_HEADER: 1985 case STATE_RESPONSE_CHUNKED_CRLF: 1986 case STATE_RESPONSE_CHUNKED_TRAILER: 1987 return http_read(conn); 1988 case STATE_WRITE_DATA: 1989 if (conn->gzipped) 1990 return data_inflate_write(conn); 1991 else 1992 return data_write(conn); 1993 case STATE_CLOSE: 1994 return http_close(conn); 1995 case STATE_IDLE: 1996 conn->state = STATE_RESPONSE_HEADER; 1997 LIST_REMOVE(conn, entry); 1998 LIST_INSERT_HEAD(&active, conn, entry); 1999 return http_read(conn); 2000 case STATE_FREE: 2001 errx(1, "bad http state"); 2002 } 2003 errx(1, "unknown http state"); 2004 } 2005 2006 /* 2007 * Initialisation done before pledge() call to load certificates. 2008 */ 2009 static void 2010 http_setup(void) 2011 { 2012 char *httpproxy; 2013 2014 tls_config = tls_config_new(); 2015 if (tls_config == NULL) 2016 errx(1, "tls config failed"); 2017 2018 #if 0 2019 /* TODO Should we allow extra protos and ciphers? */ 2020 if (tls_config_set_protocols(tls_config, TLS_PROTOCOLS_ALL) == -1) 2021 errx(1, "tls set protocols failed: %s", 2022 tls_config_error(tls_config)); 2023 if (tls_config_set_ciphers(tls_config, "legacy") == -1) 2024 errx(1, "tls set ciphers failed: %s", 2025 tls_config_error(tls_config)); 2026 #endif 2027 2028 /* load cert file from disk now */ 2029 tls_ca_mem = tls_load_file(tls_default_ca_cert_file(), 2030 &tls_ca_size, NULL); 2031 if (tls_ca_mem == NULL) 2032 err(1, "tls_load_file: %s", tls_default_ca_cert_file()); 2033 tls_config_set_ca_mem(tls_config, tls_ca_mem, tls_ca_size); 2034 2035 if ((httpproxy = getenv("http_proxy")) != NULL && *httpproxy == '\0') 2036 httpproxy = NULL; 2037 2038 proxy_parse_uri(httpproxy); 2039 } 2040 2041 void 2042 proc_http(char *bind_addr, int fd) 2043 { 2044 struct pollfd pfds[NPFDS]; 2045 struct http_connection *conn, *nc; 2046 struct http_request *req, *nr; 2047 struct ibuf *b; 2048 2049 if (pledge("stdio rpath inet dns recvfd", NULL) == -1) 2050 err(1, "pledge"); 2051 2052 if (bind_addr != NULL) { 2053 struct addrinfo hints, *res; 2054 2055 bzero(&hints, sizeof(hints)); 2056 hints.ai_family = AF_UNSPEC; 2057 hints.ai_socktype = SOCK_DGRAM; /*dummy*/ 2058 hints.ai_flags = AI_NUMERICHOST; 2059 if (getaddrinfo(bind_addr, NULL, &hints, &res) == 0) { 2060 memcpy(&http_bindaddr, res->ai_addr, res->ai_addrlen); 2061 freeaddrinfo(res); 2062 } 2063 } 2064 http_setup(); 2065 2066 if (pledge("stdio inet dns recvfd", NULL) == -1) 2067 err(1, "pledge"); 2068 2069 if ((msgq = msgbuf_new_reader(sizeof(size_t), io_parse_hdr, NULL)) == 2070 NULL) 2071 err(1, NULL); 2072 2073 for (;;) { 2074 time_t now; 2075 int timeout; 2076 size_t i; 2077 2078 memset(&pfds, 0, sizeof(pfds)); 2079 pfds[0].fd = fd; 2080 pfds[0].events = POLLIN; 2081 if (msgbuf_queuelen(msgq) > 0) 2082 pfds[0].events |= POLLOUT; 2083 2084 i = 1; 2085 timeout = INFTIM; 2086 now = getmonotime(); 2087 LIST_FOREACH(conn, &active, entry) { 2088 if (i >= NPFDS) 2089 errx(1, "too many connections"); 2090 2091 if (conn->io_time == 0) { 2092 if (conn->state == STATE_CONNECT) 2093 conn->io_time = now + MAX_CONN_TIMEOUT; 2094 else 2095 conn->io_time = now + MAX_IO_TIMEOUT; 2096 } 2097 2098 if (conn->io_time <= now) 2099 timeout = 0; 2100 else { 2101 int diff = conn->io_time - now; 2102 diff *= 1000; 2103 if (timeout == INFTIM || diff < timeout) 2104 timeout = diff; 2105 } 2106 if (conn->state == STATE_WRITE_DATA) 2107 pfds[i].fd = conn->req->outfd; 2108 else 2109 pfds[i].fd = conn->fd; 2110 2111 pfds[i].events = conn->events; 2112 conn->pfd = &pfds[i]; 2113 i++; 2114 } 2115 LIST_FOREACH(conn, &idle, entry) { 2116 if (i >= NPFDS) 2117 errx(1, "too many connections"); 2118 2119 if (conn->idle_time <= now) 2120 timeout = 0; 2121 else { 2122 int diff = conn->idle_time - now; 2123 diff *= 1000; 2124 if (timeout == INFTIM || diff < timeout) 2125 timeout = diff; 2126 } 2127 pfds[i].fd = conn->fd; 2128 pfds[i].events = POLLIN; 2129 conn->pfd = &pfds[i]; 2130 i++; 2131 } 2132 2133 if (poll(pfds, i, timeout) == -1) { 2134 if (errno == EINTR) 2135 continue; 2136 err(1, "poll"); 2137 } 2138 2139 if (pfds[0].revents & POLLHUP) 2140 break; 2141 if (pfds[0].revents & POLLOUT) { 2142 if (msgbuf_write(fd, msgq) == -1) { 2143 if (errno == EPIPE) 2144 errx(1, "write: connection closed"); 2145 else 2146 err(1, "write"); 2147 } 2148 } 2149 if (pfds[0].revents & POLLIN) { 2150 switch (msgbuf_read(fd, msgq)) { 2151 case -1: 2152 err(1, "msgbuf_read"); 2153 case 0: 2154 errx(1, "msgbuf_read: connection closed"); 2155 } 2156 while ((b = io_buf_get(msgq)) != NULL) { 2157 unsigned int id; 2158 char *uri; 2159 char *mod; 2160 2161 io_read_buf(b, &id, sizeof(id)); 2162 io_read_str(b, &uri); 2163 io_read_str(b, &mod); 2164 2165 /* queue up new requests */ 2166 http_req_new(id, uri, mod, 0, ibuf_fd_get(b)); 2167 ibuf_free(b); 2168 } 2169 } 2170 2171 now = getmonotime(); 2172 /* process idle connections */ 2173 LIST_FOREACH_SAFE(conn, &idle, entry, nc) { 2174 if (conn->pfd != NULL && conn->pfd->revents != 0) 2175 http_do(conn, http_handle); 2176 else if (conn->idle_time <= now) { 2177 conn->io_time = 0; 2178 http_do(conn, http_close); 2179 } 2180 2181 if (conn->state == STATE_FREE) 2182 http_free(conn); 2183 } 2184 2185 /* then active http requests */ 2186 LIST_FOREACH_SAFE(conn, &active, entry, nc) { 2187 /* check if event is ready */ 2188 if (conn->pfd != NULL && conn->pfd->revents != 0) 2189 http_do(conn, http_handle); 2190 else if (conn->io_time != 0 && conn->io_time <= now) { 2191 conn->io_time = 0; 2192 if (conn->state == STATE_CONNECT) { 2193 warnx("%s: connect timeout", 2194 conn_info(conn)); 2195 http_do(conn, http_connect_failed); 2196 } else { 2197 warnx("%s: timeout, connection closed", 2198 conn_info(conn)); 2199 http_do(conn, http_failed); 2200 } 2201 } 2202 2203 if (conn->state == STATE_FREE) 2204 http_free(conn); 2205 } 2206 2207 TAILQ_FOREACH_SAFE(req, &queue, entry, nr) 2208 if (!http_req_schedule(req)) 2209 break; 2210 } 2211 2212 exit(0); 2213 } 2214