1 /* $Id: http.c,v 1.8 2016/09/13 17:13:37 deraadt Exp $ */ 2 /* 3 * Copyright (c) 2016 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <sys/socket.h> 19 #include <sys/param.h> 20 #include <arpa/inet.h> 21 22 #include <ctype.h> 23 #include <err.h> 24 #include <limits.h> 25 #include <netdb.h> 26 #include <stdio.h> 27 #include <stdint.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <tls.h> 31 #include <unistd.h> 32 33 #include "http.h" 34 #include "extern.h" 35 36 /* 37 * A buffer for transferring HTTP/S data. 38 */ 39 struct httpxfer { 40 char *hbuf; /* header transfer buffer */ 41 size_t hbufsz; /* header buffer size */ 42 int headok; /* header has been parsed */ 43 char *bbuf; /* body transfer buffer */ 44 size_t bbufsz; /* body buffer size */ 45 int bodyok; /* body has been parsed */ 46 char *headbuf; /* lookaside buffer for headers */ 47 struct httphead *head; /* parsed headers */ 48 size_t headsz; /* number of headers */ 49 }; 50 51 /* 52 * An HTTP/S connection object. 53 */ 54 struct http { 55 int fd; /* connected socket */ 56 short port; /* port number */ 57 struct source src; /* endpoint (raw) host */ 58 char *path; /* path to request */ 59 char *host; /* name of endpoint host */ 60 struct tls_config *cfg; /* if TLS */ 61 struct tls *ctx; /* if TLS */ 62 writefp writer; /* write function */ 63 readfp reader; /* read function */ 64 }; 65 66 static ssize_t 67 dosysread(char *buf, size_t sz, const struct http *http) 68 { 69 ssize_t rc; 70 71 rc = read(http->fd, buf, sz); 72 if (rc < 0) 73 warn("%s: read", http->src.ip); 74 return (rc); 75 } 76 77 static ssize_t 78 dosyswrite(const void *buf, size_t sz, const struct http *http) 79 { 80 ssize_t rc; 81 82 rc = write(http->fd, buf, sz); 83 if (rc < 0) 84 warn("%s: write", http->src.ip); 85 return (rc); 86 } 87 88 static ssize_t 89 dotlsread(char *buf, size_t sz, const struct http *http) 90 { 91 ssize_t rc; 92 93 do { 94 rc = tls_read(http->ctx, buf, sz); 95 } while (TLS_WANT_POLLIN == rc || TLS_WANT_POLLOUT == rc); 96 97 if (rc < 0) 98 warnx("%s: tls_read: %s", http->src.ip, 99 tls_error(http->ctx)); 100 return (rc); 101 } 102 103 static ssize_t 104 dotlswrite(const void *buf, size_t sz, const struct http *http) 105 { 106 ssize_t rc; 107 108 do { 109 rc = tls_write(http->ctx, buf, sz); 110 } while (TLS_WANT_POLLIN == rc || TLS_WANT_POLLOUT == rc); 111 112 if (rc < 0) 113 warnx("%s: tls_write: %s", http->src.ip, 114 tls_error(http->ctx)); 115 return (rc); 116 } 117 118 static ssize_t 119 http_read(char *buf, size_t sz, const struct http *http) 120 { 121 ssize_t ssz, xfer; 122 123 xfer = 0; 124 do { 125 if ((ssz = http->reader(buf, sz, http)) < 0) 126 return (-1); 127 if (0 == ssz) 128 break; 129 xfer += ssz; 130 sz -= ssz; 131 buf += ssz; 132 } while (ssz > 0 && sz > 0); 133 134 return (xfer); 135 } 136 137 static int 138 http_write(const char *buf, size_t sz, const struct http *http) 139 { 140 ssize_t ssz, xfer; 141 142 xfer = sz; 143 while (sz > 0) { 144 if ((ssz = http->writer(buf, sz, http)) < 0) 145 return (-1); 146 sz -= ssz; 147 buf += (size_t)ssz; 148 } 149 return (xfer); 150 } 151 152 /* 153 * Between 5.8 and 5.9, libtls changed its semantics. 154 * In the old way, tls_close() will close the underlying file 155 * descriptors. 156 * In the new way, it won't. 157 */ 158 void 159 http_disconnect(struct http *http) 160 { 161 162 if (NULL != http->ctx) { 163 /* TLS connection. */ 164 if (-1 == tls_close(http->ctx)) 165 warnx("%s: tls_close: %s", http->src.ip, 166 tls_error(http->ctx)); 167 if (NULL != http->ctx) 168 tls_free(http->ctx); 169 #if ! defined(TLS_READ_AGAIN) && ! defined(TLS_WRITE_AGAIN) 170 if (-1 == close(http->fd)) 171 warn("%s: close", http->src.ip); 172 #endif 173 } else if (-1 != http->fd) { 174 /* Non-TLS connection. */ 175 if (-1 == close(http->fd)) 176 warn("%s: close", http->src.ip); 177 } 178 179 http->fd = -1; 180 http->ctx = NULL; 181 } 182 183 void 184 http_free(struct http *http) 185 { 186 187 if (NULL == http) 188 return; 189 http_disconnect(http); 190 if (NULL != http->cfg) 191 tls_config_free(http->cfg); 192 free(http->host); 193 free(http->path); 194 free(http->src.ip); 195 free(http); 196 } 197 198 struct http * 199 http_alloc(const struct source *addrs, size_t addrsz, 200 const char *host, short port, const char *path) 201 { 202 struct sockaddr_storage ss; 203 int family, fd, c; 204 socklen_t len; 205 size_t cur, i = 0; 206 struct http *http; 207 208 /* Do this while we still have addresses to connect. */ 209 again: 210 if (i == addrsz) 211 return (NULL); 212 cur = i++; 213 214 /* Convert to PF_INET or PF_INET6 address from string. */ 215 216 memset(&ss, 0, sizeof(struct sockaddr_storage)); 217 218 if (4 == addrs[cur].family) { 219 family = PF_INET; 220 ((struct sockaddr_in *)&ss)->sin_family = AF_INET; 221 ((struct sockaddr_in *)&ss)->sin_port = htons(port); 222 c = inet_pton(AF_INET, addrs[cur].ip, 223 &((struct sockaddr_in *)&ss)->sin_addr); 224 len = sizeof(struct sockaddr_in); 225 } else if (6 == addrs[cur].family) { 226 family = PF_INET6; 227 ((struct sockaddr_in6 *)&ss)->sin6_family = AF_INET6; 228 ((struct sockaddr_in6 *)&ss)->sin6_port = htons(port); 229 c = inet_pton(AF_INET6, addrs[cur].ip, 230 &((struct sockaddr_in6 *)&ss)->sin6_addr); 231 len = sizeof(struct sockaddr_in6); 232 } else { 233 warnx("%s: unknown family", addrs[cur].ip); 234 goto again; 235 } 236 237 if (c < 0) { 238 warn("%s: inet_ntop", addrs[cur].ip); 239 goto again; 240 } else if (0 == c) { 241 warnx("%s: inet_ntop", addrs[cur].ip); 242 goto again; 243 } 244 245 /* Create socket and connect. */ 246 247 fd = socket(family, SOCK_STREAM, 0); 248 if (-1 == fd) { 249 warn("%s: socket", addrs[cur].ip); 250 goto again; 251 } else if (-1 == connect(fd, (struct sockaddr *)&ss, len)) { 252 warn("%s: connect", addrs[cur].ip); 253 close(fd); 254 goto again; 255 } 256 257 /* Allocate the communicator. */ 258 259 http = calloc(1, sizeof(struct http)); 260 if (NULL == http) { 261 warn("calloc"); 262 close(fd); 263 return (NULL); 264 } 265 http->fd = fd; 266 http->port = port; 267 http->src.family = addrs[cur].family; 268 http->src.ip = strdup(addrs[cur].ip); 269 http->host = strdup(host); 270 http->path = strdup(path); 271 if (NULL == http->src.ip || NULL == http->host || NULL == http->path) { 272 warn("strdup"); 273 goto err; 274 } 275 276 /* If necessary, do our TLS setup. */ 277 278 if (443 != port) { 279 http->writer = dosyswrite; 280 http->reader = dosysread; 281 return (http); 282 } 283 284 http->writer = dotlswrite; 285 http->reader = dotlsread; 286 287 if (-1 == tls_init()) { 288 warn("tls_init"); 289 goto err; 290 } 291 292 http->cfg = tls_config_new(); 293 if (NULL == http->cfg) { 294 warn("tls_config_new"); 295 goto err; 296 } 297 298 tls_config_set_protocols(http->cfg, TLS_PROTOCOLS_ALL); 299 300 /* FIXME: is this necessary? */ 301 tls_config_insecure_noverifycert(http->cfg); 302 303 if (-1 == tls_config_set_ciphers(http->cfg, "compat")) { 304 warn("tls_config_set_ciphers"); 305 goto err; 306 } else if (NULL == (http->ctx = tls_client())) { 307 warn("tls_client"); 308 goto err; 309 } else if (-1 == tls_configure(http->ctx, http->cfg)) { 310 warnx("%s: tls_configure: %s", 311 http->src.ip, tls_error(http->ctx)); 312 goto err; 313 } 314 315 if (0 != tls_connect_socket(http->ctx, http->fd, http->host)) { 316 warnx("%s: tls_connect_socket: %s, %s", http->src.ip, 317 http->host, tls_error(http->ctx)); 318 goto err; 319 } 320 321 return (http); 322 err: 323 http_free(http); 324 return (NULL); 325 } 326 327 struct httpxfer * 328 http_open(const struct http *http, const void *p, size_t psz) 329 { 330 char *req; 331 int c; 332 struct httpxfer *trans; 333 334 if (NULL == p) { 335 c = asprintf(&req, 336 "GET %s HTTP/1.0\r\n" 337 "Host: %s\r\n" 338 "\r\n", 339 http->path, http->host); 340 } else { 341 c = asprintf(&req, 342 "POST %s HTTP/1.0\r\n" 343 "Host: %s\r\n" 344 "Content-Length: %zu\r\n" 345 "\r\n", 346 http->path, http->host, psz); 347 } 348 if (-1 == c) { 349 warn("asprintf"); 350 return (NULL); 351 } else if (!http_write(req, c, http)) { 352 free(req); 353 return (NULL); 354 } else if (NULL != p && ! http_write(p, psz, http)) { 355 free(req); 356 return (NULL); 357 } 358 359 free(req); 360 361 trans = calloc(1, sizeof(struct httpxfer)); 362 if (NULL == trans) 363 warn("calloc"); 364 return (trans); 365 } 366 367 void 368 http_close(struct httpxfer *x) 369 { 370 371 if (NULL == x) 372 return; 373 free(x->hbuf); 374 free(x->bbuf); 375 free(x->headbuf); 376 free(x->head); 377 free(x); 378 } 379 380 /* 381 * Read the HTTP body from the wire. 382 * If invoked multiple times, this will return the same pointer with the 383 * same data (or NULL, if the original invocation returned NULL). 384 * Returns NULL if read or allocation errors occur. 385 * You must not free the returned pointer. 386 */ 387 char * 388 http_body_read(const struct http *http, struct httpxfer *trans, size_t *sz) 389 { 390 char buf[BUFSIZ]; 391 ssize_t ssz; 392 void *pp; 393 size_t szp; 394 395 if (NULL == sz) 396 sz = &szp; 397 398 /* Have we already parsed this? */ 399 400 if (trans->bodyok > 0) { 401 *sz = trans->bbufsz; 402 return (trans->bbuf); 403 } else if (trans->bodyok < 0) 404 return (NULL); 405 406 *sz = 0; 407 trans->bodyok = -1; 408 409 do { 410 /* If less than sizeof(buf), at EOF. */ 411 if ((ssz = http_read(buf, sizeof(buf), http)) < 0) 412 return (NULL); 413 else if (0 == ssz) 414 break; 415 pp = realloc(trans->bbuf, trans->bbufsz + ssz); 416 if (NULL == pp) { 417 warn("realloc"); 418 return (NULL); 419 } 420 trans->bbuf = pp; 421 memcpy(trans->bbuf + trans->bbufsz, buf, ssz); 422 trans->bbufsz += ssz; 423 } while (sizeof(buf) == ssz); 424 425 trans->bodyok = 1; 426 *sz = trans->bbufsz; 427 return (trans->bbuf); 428 } 429 430 struct httphead * 431 http_head_get(const char *v, struct httphead *h, size_t hsz) 432 { 433 size_t i; 434 435 for (i = 0; i < hsz; i++) { 436 if (strcmp(h[i].key, v)) 437 continue; 438 return (&h[i]); 439 } 440 return (NULL); 441 } 442 443 /* 444 * Look through the headers and determine our HTTP code. 445 * This will return -1 on failure, otherwise the code. 446 */ 447 int 448 http_head_status(const struct http *http, struct httphead *h, size_t sz) 449 { 450 int rc; 451 unsigned int code; 452 struct httphead *st; 453 454 if (NULL == (st = http_head_get("Status", h, sz))) { 455 warnx("%s: no status header", http->src.ip); 456 return (-1); 457 } 458 459 rc = sscanf(st->val, "%*s %u %*s", &code); 460 if (rc < 0) { 461 warn("sscanf"); 462 return (-1); 463 } else if (1 != rc) { 464 warnx("%s: cannot convert status header", http->src.ip); 465 return (-1); 466 } 467 return (code); 468 } 469 470 /* 471 * Parse headers from the transfer. 472 * Malformed headers are skipped. 473 * A special "Status" header is added for the HTTP status line. 474 * This can only happen once http_head_read has been called with 475 * success. 476 * This can be invoked multiple times: it will only parse the headers 477 * once and after that it will just return the cache. 478 * You must not free the returned pointer. 479 * If the original header parse failed, or if memory allocation fails 480 * internally, this returns NULL. 481 */ 482 struct httphead * 483 http_head_parse(const struct http *http, struct httpxfer *trans, size_t *sz) 484 { 485 size_t hsz, szp; 486 struct httphead *h; 487 char *cp, *ep, *ccp, *buf; 488 489 if (NULL == sz) 490 sz = &szp; 491 492 /* 493 * If we've already parsed the headers, return the 494 * previously-parsed buffer now. 495 * If we have errors on the stream, return NULL now. 496 */ 497 498 if (NULL != trans->head) { 499 *sz = trans->headsz; 500 return (trans->head); 501 } else if (trans->headok <= 0) 502 return (NULL); 503 504 if (NULL == (buf = strdup(trans->hbuf))) { 505 warn("strdup"); 506 return (NULL); 507 } 508 hsz = 0; 509 cp = buf; 510 511 do { 512 if (NULL != (cp = strstr(cp, "\r\n"))) 513 cp += 2; 514 hsz++; 515 } while (NULL != cp); 516 517 /* 518 * Allocate headers, then step through the data buffer, parsing 519 * out headers as we have them. 520 * We know at this point that the buffer is nil-terminated in 521 * the usual way. 522 */ 523 524 h = calloc(hsz, sizeof(struct httphead)); 525 if (NULL == h) { 526 warn("calloc"); 527 free(buf); 528 return (NULL); 529 } 530 531 *sz = hsz; 532 hsz = 0; 533 cp = buf; 534 535 do { 536 if (NULL != (ep = strstr(cp, "\r\n"))) { 537 *ep = '\0'; 538 ep += 2; 539 } 540 if (0 == hsz) { 541 h[hsz].key = "Status"; 542 h[hsz++].val = cp; 543 continue; 544 } 545 546 /* Skip bad headers. */ 547 if (NULL == (ccp = strchr(cp, ':'))) { 548 warnx("%s: header without separator", http->src.ip); 549 continue; 550 } 551 552 *ccp++ = '\0'; 553 while (isspace((int)*ccp)) 554 ccp++; 555 h[hsz].key = cp; 556 h[hsz++].val = ccp; 557 } while (NULL != (cp = ep)); 558 559 trans->headbuf = buf; 560 trans->head = h; 561 trans->headsz = hsz; 562 return (h); 563 } 564 565 /* 566 * Read the HTTP headers from the wire. 567 * If invoked multiple times, this will return the same pointer with the 568 * same data (or NULL, if the original invocation returned NULL). 569 * Returns NULL if read or allocation errors occur. 570 * You must not free the returned pointer. 571 */ 572 char * 573 http_head_read(const struct http *http, struct httpxfer *trans, size_t *sz) 574 { 575 char buf[BUFSIZ]; 576 ssize_t ssz; 577 char *ep; 578 void *pp; 579 size_t szp; 580 581 if (NULL == sz) 582 sz = &szp; 583 584 /* Have we already parsed this? */ 585 586 if (trans->headok > 0) { 587 *sz = trans->hbufsz; 588 return (trans->hbuf); 589 } else if (trans->headok < 0) 590 return (NULL); 591 592 *sz = 0; 593 ep = NULL; 594 trans->headok = -1; 595 596 /* 597 * Begin by reading by BUFSIZ blocks until we reach the header 598 * termination marker (two CRLFs). 599 * We might read into our body, but that's ok: we'll copy out 600 * the body parts into our body buffer afterward. 601 */ 602 603 do { 604 /* If less than sizeof(buf), at EOF. */ 605 if ((ssz = http_read(buf, sizeof(buf), http)) < 0) 606 return (NULL); 607 else if (0 == ssz) 608 break; 609 pp = realloc(trans->hbuf, trans->hbufsz + ssz); 610 if (NULL == pp) { 611 warn("realloc"); 612 return (NULL); 613 } 614 trans->hbuf = pp; 615 memcpy(trans->hbuf + trans->hbufsz, buf, ssz); 616 trans->hbufsz += ssz; 617 /* Search for end of headers marker. */ 618 ep = memmem(trans->hbuf, trans->hbufsz, "\r\n\r\n", 4); 619 } while (NULL == ep && sizeof(buf) == ssz); 620 621 if (NULL == ep) { 622 warnx("%s: partial transfer", http->src.ip); 623 return (NULL); 624 } 625 *ep = '\0'; 626 627 /* 628 * The header data is invalid if it has any binary characters in 629 * it: check that now. 630 * This is important because we want to guarantee that all 631 * header keys and pairs are properly nil-terminated. 632 */ 633 634 if (strlen(trans->hbuf) != (uintptr_t)(ep - trans->hbuf)) { 635 warnx("%s: binary data in header", http->src.ip); 636 return (NULL); 637 } 638 639 /* 640 * Copy remaining buffer into body buffer. 641 */ 642 643 ep += 4; 644 trans->bbufsz = (trans->hbuf + trans->hbufsz) - ep; 645 trans->bbuf = malloc(trans->bbufsz); 646 if (NULL == trans->bbuf) { 647 warn("malloc"); 648 return (NULL); 649 } 650 memcpy(trans->bbuf, ep, trans->bbufsz); 651 652 trans->headok = 1; 653 *sz = trans->hbufsz; 654 return (trans->hbuf); 655 } 656 657 void 658 http_get_free(struct httpget *g) 659 { 660 661 if (NULL == g) 662 return; 663 http_close(g->xfer); 664 http_free(g->http); 665 free(g); 666 } 667 668 struct httpget * 669 http_get(const struct source *addrs, size_t addrsz, const char *domain, 670 short port, const char *path, const void *post, size_t postsz) 671 { 672 struct http *h; 673 struct httpxfer *x; 674 struct httpget *g; 675 struct httphead *head; 676 size_t headsz, bodsz, headrsz; 677 int code; 678 char *bod, *headr; 679 680 h = http_alloc(addrs, addrsz, domain, port, path); 681 if (NULL == h) 682 return (NULL); 683 684 if (NULL == (x = http_open(h, post, postsz))) { 685 http_free(h); 686 return (NULL); 687 } else if (NULL == (headr = http_head_read(h, x, &headrsz))) { 688 http_close(x); 689 http_free(h); 690 return (NULL); 691 } else if (NULL == (bod = http_body_read(h, x, &bodsz))) { 692 http_close(x); 693 http_free(h); 694 return (NULL); 695 } 696 697 http_disconnect(h); 698 699 if (NULL == (head = http_head_parse(h, x, &headsz))) { 700 http_close(x); 701 http_free(h); 702 return (NULL); 703 } else if ((code = http_head_status(h, head, headsz)) < 0) { 704 http_close(x); 705 http_free(h); 706 return (NULL); 707 } 708 709 if (NULL == (g = calloc(1, sizeof(struct httpget)))) { 710 warn("calloc"); 711 http_close(x); 712 http_free(h); 713 return (NULL); 714 } 715 716 g->headpart = headr; 717 g->headpartsz = headrsz; 718 g->bodypart = bod; 719 g->bodypartsz = bodsz; 720 g->head = head; 721 g->headsz = headsz; 722 g->code = code; 723 g->xfer = x; 724 g->http = h; 725 return (g); 726 } 727 728 #if 0 729 int 730 main(void) 731 { 732 struct httpget *g; 733 struct httphead *httph; 734 size_t i, httphsz; 735 struct source addrs[2]; 736 size_t addrsz; 737 738 #if 0 739 addrs[0].ip = "127.0.0.1"; 740 addrs[0].family = 4; 741 addrsz = 1; 742 #else 743 addrs[0].ip = "2a00:1450:400a:806::2004"; 744 addrs[0].family = 6; 745 addrs[1].ip = "193.135.3.123"; 746 addrs[1].family = 4; 747 addrsz = 2; 748 #endif 749 750 #if 0 751 g = http_get(addrs, addrsz, "localhost", 80, "/index.html"); 752 #else 753 g = http_get(addrs, addrsz, "www.google.ch", 80, "/index.html", 754 NULL, 0); 755 #endif 756 757 if (NULL == g) 758 errx(EXIT_FAILURE, "http_get"); 759 760 httph = http_head_parse(g->http, g->xfer, &httphsz); 761 warnx("code: %d", g->code); 762 763 for (i = 0; i < httphsz; i++) 764 warnx("head: [%s]=[%s]", httph[i].key, httph[i].val); 765 766 http_get_free(g); 767 return (EXIT_SUCCESS); 768 } 769 #endif 770