1 /* $NetBSD: http.c,v 1.3 2014/01/07 02:13:00 joerg Exp $ */ 2 /*- 3 * Copyright (c) 2000-2004 Dag-Erling Co�dan Sm�rgrav 4 * Copyright (c) 2003 Thomas Klausner <wiz@NetBSD.org> 5 * Copyright (c) 2008, 2009 Joerg Sonnenberger <joerg@NetBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 * 31 * $FreeBSD: http.c,v 1.83 2008/02/06 11:39:55 des Exp $ 32 */ 33 34 /* 35 * The following copyright applies to the base64 code: 36 * 37 *- 38 * Copyright 1997 Massachusetts Institute of Technology 39 * 40 * Permission to use, copy, modify, and distribute this software and 41 * its documentation for any purpose and without fee is hereby 42 * granted, provided that both the above copyright notice and this 43 * permission notice appear in all copies, that both the above 44 * copyright notice and this permission notice appear in all 45 * supporting documentation, and that the name of M.I.T. not be used 46 * in advertising or publicity pertaining to distribution of the 47 * software without specific, written prior permission. M.I.T. makes 48 * no representations about the suitability of this software for any 49 * purpose. It is provided "as is" without express or implied 50 * warranty. 51 * 52 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 53 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 54 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 55 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 56 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 57 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 58 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 59 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 60 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 61 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 62 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 */ 65 66 #if defined(__linux__) || defined(__MINT__) 67 /* Keep this down to Linux or MiNT, it can create surprises elsewhere. */ 68 #define _GNU_SOURCE 69 #endif 70 71 /* Needed for gmtime_r on Interix */ 72 #define _REENTRANT 73 74 #if HAVE_CONFIG_H 75 #include "config.h" 76 #endif 77 #ifndef NETBSD 78 #include <nbcompat.h> 79 #endif 80 81 #include <sys/types.h> 82 #include <sys/socket.h> 83 84 #include <ctype.h> 85 #include <errno.h> 86 #include <locale.h> 87 #include <stdarg.h> 88 #ifndef NETBSD 89 #include <nbcompat/stdio.h> 90 #else 91 #include <stdio.h> 92 #endif 93 #include <stdlib.h> 94 #include <string.h> 95 #include <time.h> 96 #include <unistd.h> 97 98 #include <netinet/in.h> 99 #include <netinet/tcp.h> 100 101 #ifndef NETBSD 102 #include <nbcompat/netdb.h> 103 #else 104 #include <netdb.h> 105 #endif 106 107 #include <arpa/inet.h> 108 109 #include "fetch.h" 110 #include "common.h" 111 #include "httperr.h" 112 113 /* Maximum number of redirects to follow */ 114 #define MAX_REDIRECT 5 115 116 /* Symbolic names for reply codes we care about */ 117 #define HTTP_OK 200 118 #define HTTP_PARTIAL 206 119 #define HTTP_MOVED_PERM 301 120 #define HTTP_MOVED_TEMP 302 121 #define HTTP_SEE_OTHER 303 122 #define HTTP_NOT_MODIFIED 304 123 #define HTTP_TEMP_REDIRECT 307 124 #define HTTP_NEED_AUTH 401 125 #define HTTP_NEED_PROXY_AUTH 407 126 #define HTTP_BAD_RANGE 416 127 #define HTTP_PROTOCOL_ERROR 999 128 129 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \ 130 || (xyz) == HTTP_MOVED_TEMP \ 131 || (xyz) == HTTP_TEMP_REDIRECT \ 132 || (xyz) == HTTP_SEE_OTHER) 133 134 #define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599) 135 136 137 /***************************************************************************** 138 * I/O functions for decoding chunked streams 139 */ 140 141 struct httpio 142 { 143 conn_t *conn; /* connection */ 144 int chunked; /* chunked mode */ 145 int keep_alive; /* keep-alive mode */ 146 char *buf; /* chunk buffer */ 147 size_t bufsize; /* size of chunk buffer */ 148 ssize_t buflen; /* amount of data currently in buffer */ 149 size_t bufpos; /* current read offset in buffer */ 150 int eof; /* end-of-file flag */ 151 int error; /* error flag */ 152 size_t chunksize; /* remaining size of current chunk */ 153 off_t contentlength; /* remaining size of the content */ 154 }; 155 156 /* 157 * Get next chunk header 158 */ 159 static ssize_t 160 http_new_chunk(struct httpio *io) 161 { 162 char *p; 163 164 if (fetch_getln(io->conn) == -1) 165 return (-1); 166 167 if (io->conn->buflen < 2 || !isxdigit((unsigned char)*io->conn->buf)) 168 return (-1); 169 170 for (p = io->conn->buf; *p && !isspace((unsigned char)*p); ++p) { 171 if (*p == ';') 172 break; 173 if (!isxdigit((unsigned char)*p)) 174 return (-1); 175 if (isdigit((unsigned char)*p)) { 176 io->chunksize = io->chunksize * 16 + 177 *p - '0'; 178 } else { 179 io->chunksize = io->chunksize * 16 + 180 10 + tolower((unsigned char)*p) - 'a'; 181 } 182 } 183 184 return (io->chunksize); 185 } 186 187 /* 188 * Grow the input buffer to at least len bytes 189 */ 190 static int 191 http_growbuf(struct httpio *io, size_t len) 192 { 193 char *tmp; 194 195 if (io->bufsize >= len) 196 return (0); 197 198 if ((tmp = realloc(io->buf, len)) == NULL) 199 return (-1); 200 io->buf = tmp; 201 io->bufsize = len; 202 return (0); 203 } 204 205 /* 206 * Fill the input buffer, do chunk decoding on the fly 207 */ 208 static ssize_t 209 http_fillbuf(struct httpio *io, size_t len) 210 { 211 if (io->error) 212 return (-1); 213 if (io->eof) 214 return (0); 215 216 if (io->contentlength >= 0 && (off_t)len > io->contentlength) 217 len = io->contentlength; 218 219 if (io->chunked == 0) { 220 if (http_growbuf(io, len) == -1) 221 return (-1); 222 if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) { 223 io->error = 1; 224 return (-1); 225 } 226 if (io->contentlength) 227 io->contentlength -= io->buflen; 228 io->bufpos = 0; 229 return (io->buflen); 230 } 231 232 if (io->chunksize == 0) { 233 switch (http_new_chunk(io)) { 234 case -1: 235 io->error = 1; 236 return (-1); 237 case 0: 238 io->eof = 1; 239 if (fetch_getln(io->conn) == -1) 240 return (-1); 241 return (0); 242 } 243 } 244 245 if (len > io->chunksize) 246 len = io->chunksize; 247 if (http_growbuf(io, len) == -1) 248 return (-1); 249 if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) { 250 io->error = 1; 251 return (-1); 252 } 253 io->chunksize -= io->buflen; 254 if (io->contentlength >= 0) 255 io->contentlength -= io->buflen; 256 257 if (io->chunksize == 0) { 258 char endl[2]; 259 ssize_t len2; 260 261 len2 = fetch_read(io->conn, endl, 2); 262 if (len2 == 1 && fetch_read(io->conn, endl + 1, 1) != 1) 263 return (-1); 264 if (len2 == -1 || endl[0] != '\r' || endl[1] != '\n') 265 return (-1); 266 } 267 268 io->bufpos = 0; 269 270 return (io->buflen); 271 } 272 273 /* 274 * Read function 275 */ 276 static ssize_t 277 http_readfn(void *v, void *buf, size_t len) 278 { 279 struct httpio *io = (struct httpio *)v; 280 size_t l, pos; 281 282 if (io->error) 283 return (-1); 284 if (io->eof) 285 return (0); 286 287 for (pos = 0; len > 0; pos += l, len -= l) { 288 /* empty buffer */ 289 if (!io->buf || (ssize_t)io->bufpos == io->buflen) 290 if (http_fillbuf(io, len) < 1) 291 break; 292 l = io->buflen - io->bufpos; 293 if (len < l) 294 l = len; 295 memcpy((char *)buf + pos, io->buf + io->bufpos, l); 296 io->bufpos += l; 297 } 298 299 if (!pos && io->error) 300 return (-1); 301 return (pos); 302 } 303 304 /* 305 * Write function 306 */ 307 static ssize_t 308 http_writefn(void *v, const void *buf, size_t len) 309 { 310 struct httpio *io = (struct httpio *)v; 311 312 return (fetch_write(io->conn, buf, len)); 313 } 314 315 /* 316 * Close function 317 */ 318 static void 319 http_closefn(void *v) 320 { 321 struct httpio *io = (struct httpio *)v; 322 323 if (io->keep_alive) { 324 int val; 325 326 val = 0; 327 setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NODELAY, &val, 328 (socklen_t)sizeof(val)); 329 fetch_cache_put(io->conn, fetch_close); 330 #ifdef TCP_NOPUSH 331 val = 1; 332 setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, 333 sizeof(val)); 334 #endif 335 } else { 336 fetch_close(io->conn); 337 } 338 339 free(io->buf); 340 free(io); 341 } 342 343 /* 344 * Wrap a file descriptor up 345 */ 346 static fetchIO * 347 http_funopen(conn_t *conn, int chunked, int keep_alive, off_t clength) 348 { 349 struct httpio *io; 350 fetchIO *f; 351 352 if ((io = calloc(1, sizeof(*io))) == NULL) { 353 fetch_syserr(); 354 return (NULL); 355 } 356 io->conn = conn; 357 io->chunked = chunked; 358 io->contentlength = clength; 359 io->keep_alive = keep_alive; 360 f = fetchIO_unopen(io, http_readfn, http_writefn, http_closefn); 361 if (f == NULL) { 362 fetch_syserr(); 363 free(io); 364 return (NULL); 365 } 366 return (f); 367 } 368 369 370 /***************************************************************************** 371 * Helper functions for talking to the server and parsing its replies 372 */ 373 374 /* Header types */ 375 typedef enum { 376 hdr_syserror = -2, 377 hdr_error = -1, 378 hdr_end = 0, 379 hdr_unknown = 1, 380 hdr_connection, 381 hdr_content_length, 382 hdr_content_range, 383 hdr_last_modified, 384 hdr_location, 385 hdr_transfer_encoding, 386 hdr_www_authenticate 387 } hdr_t; 388 389 /* Names of interesting headers */ 390 static struct { 391 hdr_t num; 392 const char *name; 393 } hdr_names[] = { 394 { hdr_connection, "Connection" }, 395 { hdr_content_length, "Content-Length" }, 396 { hdr_content_range, "Content-Range" }, 397 { hdr_last_modified, "Last-Modified" }, 398 { hdr_location, "Location" }, 399 { hdr_transfer_encoding, "Transfer-Encoding" }, 400 { hdr_www_authenticate, "WWW-Authenticate" }, 401 { hdr_unknown, NULL }, 402 }; 403 404 /* 405 * Send a formatted line; optionally echo to terminal 406 */ 407 __printflike(2, 3) 408 static int 409 http_cmd(conn_t *conn, const char *fmt, ...) 410 { 411 va_list ap; 412 size_t len; 413 char *msg; 414 ssize_t r; 415 416 va_start(ap, fmt); 417 len = vasprintf(&msg, fmt, ap); 418 va_end(ap); 419 420 if (msg == NULL) { 421 errno = ENOMEM; 422 fetch_syserr(); 423 return (-1); 424 } 425 426 r = fetch_write(conn, msg, len); 427 free(msg); 428 429 if (r == -1) { 430 fetch_syserr(); 431 return (-1); 432 } 433 434 return (0); 435 } 436 437 /* 438 * Get and parse status line 439 */ 440 static int 441 http_get_reply(conn_t *conn) 442 { 443 char *p; 444 445 if (fetch_getln(conn) == -1) 446 return (-1); 447 /* 448 * A valid status line looks like "HTTP/m.n xyz reason" where m 449 * and n are the major and minor protocol version numbers and xyz 450 * is the reply code. 451 * Unfortunately, there are servers out there (NCSA 1.5.1, to name 452 * just one) that do not send a version number, so we can't rely 453 * on finding one, but if we do, insist on it being 1.0 or 1.1. 454 * We don't care about the reason phrase. 455 */ 456 if (strncmp(conn->buf, "HTTP", 4) != 0) 457 return (HTTP_PROTOCOL_ERROR); 458 p = conn->buf + 4; 459 if (*p == '/') { 460 if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1')) 461 return (HTTP_PROTOCOL_ERROR); 462 p += 4; 463 } 464 if (*p != ' ' || 465 !isdigit((unsigned char)p[1]) || 466 !isdigit((unsigned char)p[2]) || 467 !isdigit((unsigned char)p[3])) 468 return (HTTP_PROTOCOL_ERROR); 469 470 conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0'); 471 return (conn->err); 472 } 473 474 /* 475 * Check a header; if the type matches the given string, return a pointer 476 * to the beginning of the value. 477 */ 478 static const char * 479 http_match(const char *str, const char *hdr) 480 { 481 while (*str && *hdr && 482 tolower((unsigned char)*str++) == tolower((unsigned char)*hdr++)) 483 /* nothing */; 484 if (*str || *hdr != ':') 485 return (NULL); 486 while (*hdr && isspace((unsigned char)*++hdr)) 487 /* nothing */; 488 return (hdr); 489 } 490 491 /* 492 * Get the next header and return the appropriate symbolic code. 493 */ 494 static hdr_t 495 http_next_header(conn_t *conn, const char **p) 496 { 497 int i; 498 499 if (fetch_getln(conn) == -1) 500 return (hdr_syserror); 501 while (conn->buflen && isspace((unsigned char)conn->buf[conn->buflen - 1])) 502 conn->buflen--; 503 conn->buf[conn->buflen] = '\0'; 504 if (conn->buflen == 0) 505 return (hdr_end); 506 /* 507 * We could check for malformed headers but we don't really care. 508 * A valid header starts with a token immediately followed by a 509 * colon; a token is any sequence of non-control, non-whitespace 510 * characters except "()<>@,;:\\\"{}". 511 */ 512 for (i = 0; hdr_names[i].num != hdr_unknown; i++) 513 if ((*p = http_match(hdr_names[i].name, conn->buf)) != NULL) 514 return (hdr_names[i].num); 515 return (hdr_unknown); 516 } 517 518 /* 519 * Parse a last-modified header 520 */ 521 static int 522 http_parse_mtime(const char *p, time_t *mtime) 523 { 524 char locale[64], *r; 525 struct tm tm; 526 527 strncpy(locale, setlocale(LC_TIME, NULL), sizeof(locale)); 528 setlocale(LC_TIME, "C"); 529 r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm); 530 /* XXX should add support for date-2 and date-3 */ 531 setlocale(LC_TIME, locale); 532 if (r == NULL) 533 return (-1); 534 *mtime = timegm(&tm); 535 return (0); 536 } 537 538 /* 539 * Parse a content-length header 540 */ 541 static int 542 http_parse_length(const char *p, off_t *length) 543 { 544 off_t len; 545 546 for (len = 0; *p && isdigit((unsigned char)*p); ++p) 547 len = len * 10 + (*p - '0'); 548 if (*p) 549 return (-1); 550 *length = len; 551 return (0); 552 } 553 554 /* 555 * Parse a content-range header 556 */ 557 static int 558 http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size) 559 { 560 off_t first, last, len; 561 562 if (strncasecmp(p, "bytes ", 6) != 0) 563 return (-1); 564 p += 6; 565 if (*p == '*') { 566 first = last = -1; 567 ++p; 568 } else { 569 for (first = 0; *p && isdigit((unsigned char)*p); ++p) 570 first = first * 10 + *p - '0'; 571 if (*p != '-') 572 return (-1); 573 for (last = 0, ++p; *p && isdigit((unsigned char)*p); ++p) 574 last = last * 10 + *p - '0'; 575 } 576 if (first > last || *p != '/') 577 return (-1); 578 for (len = 0, ++p; *p && isdigit((unsigned char)*p); ++p) 579 len = len * 10 + *p - '0'; 580 if (*p || len < last - first + 1) 581 return (-1); 582 if (first == -1) 583 *length = 0; 584 else 585 *length = last - first + 1; 586 *offset = first; 587 *size = len; 588 return (0); 589 } 590 591 592 /***************************************************************************** 593 * Helper functions for authorization 594 */ 595 596 /* 597 * Base64 encoding 598 */ 599 static char * 600 http_base64(const char *src) 601 { 602 static const char base64[] = 603 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 604 "abcdefghijklmnopqrstuvwxyz" 605 "0123456789+/"; 606 char *str, *dst; 607 size_t l; 608 unsigned int t, r; 609 610 l = strlen(src); 611 if ((str = malloc(((l + 2) / 3) * 4 + 1)) == NULL) 612 return (NULL); 613 dst = str; 614 r = 0; 615 616 while (l >= 3) { 617 t = (src[0] << 16) | (src[1] << 8) | src[2]; 618 dst[0] = base64[(t >> 18) & 0x3f]; 619 dst[1] = base64[(t >> 12) & 0x3f]; 620 dst[2] = base64[(t >> 6) & 0x3f]; 621 dst[3] = base64[(t >> 0) & 0x3f]; 622 src += 3; l -= 3; 623 dst += 4; r += 4; 624 } 625 626 switch (l) { 627 case 2: 628 t = (src[0] << 16) | (src[1] << 8); 629 dst[0] = base64[(t >> 18) & 0x3f]; 630 dst[1] = base64[(t >> 12) & 0x3f]; 631 dst[2] = base64[(t >> 6) & 0x3f]; 632 dst[3] = '='; 633 dst += 4; 634 r += 4; 635 break; 636 case 1: 637 t = src[0] << 16; 638 dst[0] = base64[(t >> 18) & 0x3f]; 639 dst[1] = base64[(t >> 12) & 0x3f]; 640 dst[2] = dst[3] = '='; 641 dst += 4; 642 r += 4; 643 break; 644 case 0: 645 break; 646 } 647 648 *dst = 0; 649 return (str); 650 } 651 652 /* 653 * Encode username and password 654 */ 655 static int 656 http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd) 657 { 658 char *upw, *auth; 659 int r; 660 661 if (asprintf(&upw, "%s:%s", usr, pwd) == -1) 662 return (-1); 663 auth = http_base64(upw); 664 free(upw); 665 if (auth == NULL) 666 return (-1); 667 r = http_cmd(conn, "%s: Basic %s\r\n", hdr, auth); 668 free(auth); 669 return (r); 670 } 671 672 /* 673 * Send an authorization header 674 */ 675 static int 676 http_authorize(conn_t *conn, const char *hdr, const char *p) 677 { 678 /* basic authorization */ 679 if (strncasecmp(p, "basic:", 6) == 0) { 680 char *user, *pwd, *str; 681 int r; 682 683 /* skip realm */ 684 for (p += 6; *p && *p != ':'; ++p) 685 /* nothing */ ; 686 if (!*p || strchr(++p, ':') == NULL) 687 return (-1); 688 if ((str = strdup(p)) == NULL) 689 return (-1); /* XXX */ 690 user = str; 691 pwd = strchr(str, ':'); 692 *pwd++ = '\0'; 693 r = http_basic_auth(conn, hdr, user, pwd); 694 free(str); 695 return (r); 696 } 697 return (-1); 698 } 699 700 701 /***************************************************************************** 702 * Helper functions for connecting to a server or proxy 703 */ 704 705 /* 706 * Connect to the correct HTTP server or proxy. 707 */ 708 static conn_t * 709 http_connect(struct url *URL, struct url *purl, const char *flags, int *cached) 710 { 711 conn_t *conn; 712 int af, verbose; 713 #ifdef TCP_NOPUSH 714 int val; 715 #endif 716 717 *cached = 1; 718 719 #ifdef INET6 720 af = AF_UNSPEC; 721 #else 722 af = AF_INET; 723 #endif 724 725 verbose = CHECK_FLAG('v'); 726 if (CHECK_FLAG('4')) 727 af = AF_INET; 728 #ifdef INET6 729 else if (CHECK_FLAG('6')) 730 af = AF_INET6; 731 #endif 732 733 if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) { 734 URL = purl; 735 } else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) { 736 /* can't talk http to an ftp server */ 737 /* XXX should set an error code */ 738 return (NULL); 739 } 740 741 if ((conn = fetch_cache_get(URL, af)) != NULL) { 742 *cached = 1; 743 return (conn); 744 } 745 746 if ((conn = fetch_connect(URL, af, verbose)) == NULL) 747 /* fetch_connect() has already set an error code */ 748 return (NULL); 749 if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 && 750 fetch_ssl(conn, verbose) == -1) { 751 fetch_close(conn); 752 /* grrr */ 753 #ifdef EAUTH 754 errno = EAUTH; 755 #else 756 errno = EPERM; 757 #endif 758 fetch_syserr(); 759 return (NULL); 760 } 761 762 #ifdef TCP_NOPUSH 763 val = 1; 764 setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val)); 765 #endif 766 767 return (conn); 768 } 769 770 static struct url * 771 http_get_proxy(struct url * url, const char *flags) 772 { 773 struct url *purl; 774 char *p; 775 776 if (flags != NULL && strchr(flags, 'd') != NULL) 777 return (NULL); 778 if (fetch_no_proxy_match(url->host)) 779 return (NULL); 780 if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) && 781 *p && (purl = fetchParseURL(p))) { 782 if (!*purl->scheme) 783 strcpy(purl->scheme, SCHEME_HTTP); 784 if (!purl->port) 785 purl->port = fetch_default_proxy_port(purl->scheme); 786 if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0) 787 return (purl); 788 fetchFreeURL(purl); 789 } 790 return (NULL); 791 } 792 793 static void 794 set_if_modified_since(conn_t *conn, time_t last_modified) 795 { 796 static const char weekdays[] = "SunMonTueWedThuFriSat"; 797 static const char months[] = "JanFebMarAprMayJunJulAugSepOctNovDec"; 798 struct tm tm; 799 char buf[80]; 800 gmtime_r(&last_modified, &tm); 801 snprintf(buf, sizeof(buf), "%.3s, %02d %.3s %4d %02d:%02d:%02d GMT", 802 weekdays + tm.tm_wday * 3, tm.tm_mday, months + tm.tm_mon * 3, 803 tm.tm_year + 1900, tm.tm_hour, tm.tm_min, tm.tm_sec); 804 http_cmd(conn, "If-Modified-Since: %s\r\n", buf); 805 } 806 807 808 /***************************************************************************** 809 * Core 810 */ 811 812 /* 813 * Send a request and process the reply 814 * 815 * XXX This function is way too long, the do..while loop should be split 816 * XXX off into a separate function. 817 */ 818 fetchIO * 819 http_request(struct url *URL, const char *op, struct url_stat *us, 820 struct url *purl, const char *flags) 821 { 822 conn_t *conn; 823 struct url *url, *new; 824 int chunked, direct, if_modified_since, need_auth, noredirect; 825 int keep_alive, verbose, cached; 826 int e, i, n, val; 827 off_t offset, clength, length, size; 828 time_t mtime; 829 const char *p; 830 fetchIO *f; 831 hdr_t h; 832 char hbuf[URL_HOSTLEN + 7], *host; 833 834 direct = CHECK_FLAG('d'); 835 noredirect = CHECK_FLAG('A'); 836 verbose = CHECK_FLAG('v'); 837 if_modified_since = CHECK_FLAG('i'); 838 keep_alive = 0; 839 840 if (direct && purl) { 841 fetchFreeURL(purl); 842 purl = NULL; 843 } 844 845 /* try the provided URL first */ 846 url = URL; 847 848 /* if the A flag is set, we only get one try */ 849 n = noredirect ? 1 : MAX_REDIRECT; 850 i = 0; 851 852 e = HTTP_PROTOCOL_ERROR; 853 need_auth = 0; 854 do { 855 new = NULL; 856 chunked = 0; 857 offset = 0; 858 clength = -1; 859 length = -1; 860 size = -1; 861 mtime = 0; 862 863 /* check port */ 864 if (!url->port) 865 url->port = fetch_default_port(url->scheme); 866 867 /* were we redirected to an FTP URL? */ 868 if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) { 869 if (strcmp(op, "GET") == 0) 870 return (ftp_request(url, "RETR", NULL, us, purl, flags)); 871 else if (strcmp(op, "HEAD") == 0) 872 return (ftp_request(url, "STAT", NULL, us, purl, flags)); 873 } 874 875 /* connect to server or proxy */ 876 if ((conn = http_connect(url, purl, flags, &cached)) == NULL) 877 goto ouch; 878 879 host = url->host; 880 #ifdef INET6 881 if (strchr(url->host, ':')) { 882 snprintf(hbuf, sizeof(hbuf), "[%s]", url->host); 883 host = hbuf; 884 } 885 #endif 886 if (url->port != fetch_default_port(url->scheme)) { 887 if (host != hbuf) { 888 strcpy(hbuf, host); 889 host = hbuf; 890 } 891 snprintf(hbuf + strlen(hbuf), 892 sizeof(hbuf) - strlen(hbuf), ":%d", url->port); 893 } 894 895 /* send request */ 896 if (verbose) 897 fetch_info("requesting %s://%s%s", 898 url->scheme, host, url->doc); 899 if (purl) { 900 http_cmd(conn, "%s %s://%s%s HTTP/1.1\r\n", 901 op, url->scheme, host, url->doc); 902 } else { 903 http_cmd(conn, "%s %s HTTP/1.1\r\n", 904 op, url->doc); 905 } 906 907 if (if_modified_since && url->last_modified > 0) 908 set_if_modified_since(conn, url->last_modified); 909 910 /* virtual host */ 911 http_cmd(conn, "Host: %s\r\n", host); 912 913 /* proxy authorization */ 914 if (purl) { 915 if (*purl->user || *purl->pwd) 916 http_basic_auth(conn, "Proxy-Authorization", 917 purl->user, purl->pwd); 918 else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0') 919 http_authorize(conn, "Proxy-Authorization", p); 920 } 921 922 /* server authorization */ 923 if (need_auth || *url->user || *url->pwd) { 924 if (*url->user || *url->pwd) 925 http_basic_auth(conn, "Authorization", url->user, url->pwd); 926 else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0') 927 http_authorize(conn, "Authorization", p); 928 else if (fetchAuthMethod && fetchAuthMethod(url) == 0) { 929 http_basic_auth(conn, "Authorization", url->user, url->pwd); 930 } else { 931 http_seterr(HTTP_NEED_AUTH); 932 goto ouch; 933 } 934 } 935 936 /* other headers */ 937 if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') { 938 if (strcasecmp(p, "auto") == 0) 939 http_cmd(conn, "Referer: %s://%s%s\r\n", 940 url->scheme, host, url->doc); 941 else 942 http_cmd(conn, "Referer: %s\r\n", p); 943 } 944 if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0') 945 http_cmd(conn, "User-Agent: %s\r\n", p); 946 else 947 http_cmd(conn, "User-Agent: %s\r\n", _LIBFETCH_VER); 948 if (url->offset > 0) 949 http_cmd(conn, "Range: bytes=%lld-\r\n", (long long)url->offset); 950 http_cmd(conn, "\r\n"); 951 952 /* 953 * Force the queued request to be dispatched. Normally, one 954 * would do this with shutdown(2) but squid proxies can be 955 * configured to disallow such half-closed connections. To 956 * be compatible with such configurations, fiddle with socket 957 * options to force the pending data to be written. 958 */ 959 #ifdef TCP_NOPUSH 960 val = 0; 961 setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, 962 sizeof(val)); 963 #endif 964 val = 1; 965 setsockopt(conn->sd, IPPROTO_TCP, TCP_NODELAY, &val, 966 (socklen_t)sizeof(val)); 967 968 /* get reply */ 969 switch (http_get_reply(conn)) { 970 case HTTP_OK: 971 case HTTP_PARTIAL: 972 case HTTP_NOT_MODIFIED: 973 /* fine */ 974 break; 975 case HTTP_MOVED_PERM: 976 case HTTP_MOVED_TEMP: 977 case HTTP_SEE_OTHER: 978 /* 979 * Not so fine, but we still have to read the 980 * headers to get the new location. 981 */ 982 break; 983 case HTTP_NEED_AUTH: 984 if (need_auth) { 985 /* 986 * We already sent out authorization code, 987 * so there's nothing more we can do. 988 */ 989 http_seterr(conn->err); 990 goto ouch; 991 } 992 /* try again, but send the password this time */ 993 if (verbose) 994 fetch_info("server requires authorization"); 995 break; 996 case HTTP_NEED_PROXY_AUTH: 997 /* 998 * If we're talking to a proxy, we already sent 999 * our proxy authorization code, so there's 1000 * nothing more we can do. 1001 */ 1002 http_seterr(conn->err); 1003 goto ouch; 1004 case HTTP_BAD_RANGE: 1005 /* 1006 * This can happen if we ask for 0 bytes because 1007 * we already have the whole file. Consider this 1008 * a success for now, and check sizes later. 1009 */ 1010 break; 1011 case HTTP_PROTOCOL_ERROR: 1012 /* fall through */ 1013 case -1: 1014 --i; 1015 if (cached) 1016 continue; 1017 fetch_syserr(); 1018 goto ouch; 1019 default: 1020 http_seterr(conn->err); 1021 if (!verbose) 1022 goto ouch; 1023 /* fall through so we can get the full error message */ 1024 } 1025 1026 /* get headers */ 1027 do { 1028 switch ((h = http_next_header(conn, &p))) { 1029 case hdr_syserror: 1030 fetch_syserr(); 1031 goto ouch; 1032 case hdr_error: 1033 http_seterr(HTTP_PROTOCOL_ERROR); 1034 goto ouch; 1035 case hdr_connection: 1036 /* XXX too weak? */ 1037 keep_alive = (strcasecmp(p, "keep-alive") == 0); 1038 break; 1039 case hdr_content_length: 1040 http_parse_length(p, &clength); 1041 break; 1042 case hdr_content_range: 1043 http_parse_range(p, &offset, &length, &size); 1044 break; 1045 case hdr_last_modified: 1046 http_parse_mtime(p, &mtime); 1047 break; 1048 case hdr_location: 1049 if (!HTTP_REDIRECT(conn->err)) 1050 break; 1051 if (new) 1052 free(new); 1053 if (verbose) 1054 fetch_info("%d redirect to %s", conn->err, p); 1055 if (*p == '/') 1056 /* absolute path */ 1057 new = fetchMakeURL(url->scheme, url->host, url->port, p, 1058 url->user, url->pwd); 1059 else 1060 new = fetchParseURL(p); 1061 if (new == NULL) { 1062 /* XXX should set an error code */ 1063 goto ouch; 1064 } 1065 if (!*new->user && !*new->pwd) { 1066 strcpy(new->user, url->user); 1067 strcpy(new->pwd, url->pwd); 1068 } 1069 new->offset = url->offset; 1070 new->length = url->length; 1071 break; 1072 case hdr_transfer_encoding: 1073 /* XXX weak test*/ 1074 chunked = (strcasecmp(p, "chunked") == 0); 1075 break; 1076 case hdr_www_authenticate: 1077 if (conn->err != HTTP_NEED_AUTH) 1078 break; 1079 /* if we were smarter, we'd check the method and realm */ 1080 break; 1081 case hdr_end: 1082 /* fall through */ 1083 case hdr_unknown: 1084 /* ignore */ 1085 break; 1086 } 1087 } while (h > hdr_end); 1088 1089 /* we need to provide authentication */ 1090 if (conn->err == HTTP_NEED_AUTH) { 1091 e = conn->err; 1092 need_auth = 1; 1093 fetch_close(conn); 1094 conn = NULL; 1095 continue; 1096 } 1097 1098 /* requested range not satisfiable */ 1099 if (conn->err == HTTP_BAD_RANGE) { 1100 if (url->offset == size && url->length == 0) { 1101 /* asked for 0 bytes; fake it */ 1102 offset = url->offset; 1103 conn->err = HTTP_OK; 1104 break; 1105 } else { 1106 http_seterr(conn->err); 1107 goto ouch; 1108 } 1109 } 1110 1111 /* we have a hit or an error */ 1112 if (conn->err == HTTP_OK || 1113 conn->err == HTTP_PARTIAL || 1114 conn->err == HTTP_NOT_MODIFIED || 1115 HTTP_ERROR(conn->err)) 1116 break; 1117 1118 /* all other cases: we got a redirect */ 1119 e = conn->err; 1120 need_auth = 0; 1121 fetch_close(conn); 1122 conn = NULL; 1123 if (!new) 1124 break; 1125 if (url != URL) 1126 fetchFreeURL(url); 1127 url = new; 1128 } while (++i < n); 1129 1130 /* we failed, or ran out of retries */ 1131 if (conn == NULL) { 1132 http_seterr(e); 1133 goto ouch; 1134 } 1135 1136 /* check for inconsistencies */ 1137 if (clength != -1 && length != -1 && clength != length) { 1138 http_seterr(HTTP_PROTOCOL_ERROR); 1139 goto ouch; 1140 } 1141 if (clength == -1) 1142 clength = length; 1143 if (clength != -1) 1144 length = offset + clength; 1145 if (length != -1 && size != -1 && length != size) { 1146 http_seterr(HTTP_PROTOCOL_ERROR); 1147 goto ouch; 1148 } 1149 if (size == -1) 1150 size = length; 1151 1152 /* fill in stats */ 1153 if (us) { 1154 us->size = size; 1155 us->atime = us->mtime = mtime; 1156 } 1157 1158 /* too far? */ 1159 if (URL->offset > 0 && offset > URL->offset) { 1160 http_seterr(HTTP_PROTOCOL_ERROR); 1161 goto ouch; 1162 } 1163 1164 /* report back real offset and size */ 1165 URL->offset = offset; 1166 URL->length = clength; 1167 1168 if (clength == -1 && !chunked) 1169 keep_alive = 0; 1170 1171 if (conn->err == HTTP_NOT_MODIFIED) { 1172 http_seterr(HTTP_NOT_MODIFIED); 1173 if (keep_alive) { 1174 fetch_cache_put(conn, fetch_close); 1175 conn = NULL; 1176 } 1177 goto ouch; 1178 } 1179 1180 /* wrap it up in a fetchIO */ 1181 if ((f = http_funopen(conn, chunked, keep_alive, clength)) == NULL) { 1182 fetch_syserr(); 1183 goto ouch; 1184 } 1185 1186 if (url != URL) 1187 fetchFreeURL(url); 1188 if (purl) 1189 fetchFreeURL(purl); 1190 1191 if (HTTP_ERROR(conn->err)) { 1192 1193 if (keep_alive) { 1194 char buf[512]; 1195 do { 1196 } while (fetchIO_read(f, buf, sizeof(buf)) > 0); 1197 } 1198 1199 fetchIO_close(f); 1200 f = NULL; 1201 } 1202 1203 return (f); 1204 1205 ouch: 1206 if (url != URL) 1207 fetchFreeURL(url); 1208 if (purl) 1209 fetchFreeURL(purl); 1210 if (conn != NULL) 1211 fetch_close(conn); 1212 return (NULL); 1213 } 1214 1215 1216 /***************************************************************************** 1217 * Entry points 1218 */ 1219 1220 /* 1221 * Retrieve and stat a file by HTTP 1222 */ 1223 fetchIO * 1224 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags) 1225 { 1226 return (http_request(URL, "GET", us, http_get_proxy(URL, flags), flags)); 1227 } 1228 1229 /* 1230 * Retrieve a file by HTTP 1231 */ 1232 fetchIO * 1233 fetchGetHTTP(struct url *URL, const char *flags) 1234 { 1235 return (fetchXGetHTTP(URL, NULL, flags)); 1236 } 1237 1238 /* 1239 * Store a file by HTTP 1240 */ 1241 fetchIO * 1242 /*ARGSUSED*/ 1243 fetchPutHTTP(struct url *URL __unused, const char *flags __unused) 1244 { 1245 fprintf(stderr, "fetchPutHTTP(): not implemented\n"); 1246 return (NULL); 1247 } 1248 1249 /* 1250 * Get an HTTP document's metadata 1251 */ 1252 int 1253 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags) 1254 { 1255 fetchIO *f; 1256 1257 f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags); 1258 if (f == NULL) 1259 return (-1); 1260 fetchIO_close(f); 1261 return (0); 1262 } 1263 1264 enum http_states { 1265 ST_NONE, 1266 ST_LT, 1267 ST_LTA, 1268 ST_TAGA, 1269 ST_H, 1270 ST_R, 1271 ST_E, 1272 ST_F, 1273 ST_HREF, 1274 ST_HREFQ, 1275 ST_TAG, 1276 ST_TAGAX, 1277 ST_TAGAQ 1278 }; 1279 1280 struct index_parser { 1281 struct url_list *ue; 1282 struct url *url; 1283 enum http_states state; 1284 }; 1285 1286 static ssize_t 1287 parse_index(struct index_parser *parser, const char *buf, size_t len) 1288 { 1289 char *end_attr, p = *buf; 1290 1291 switch (parser->state) { 1292 case ST_NONE: 1293 /* Plain text, not in markup */ 1294 if (p == '<') 1295 parser->state = ST_LT; 1296 return 1; 1297 case ST_LT: 1298 /* In tag -- "<" already found */ 1299 if (p == '>') 1300 parser->state = ST_NONE; 1301 else if (p == 'a' || p == 'A') 1302 parser->state = ST_LTA; 1303 else if (!isspace((unsigned char)p)) 1304 parser->state = ST_TAG; 1305 return 1; 1306 case ST_LTA: 1307 /* In tag -- "<a" already found */ 1308 if (p == '>') 1309 parser->state = ST_NONE; 1310 else if (p == '"') 1311 parser->state = ST_TAGAQ; 1312 else if (isspace((unsigned char)p)) 1313 parser->state = ST_TAGA; 1314 else 1315 parser->state = ST_TAG; 1316 return 1; 1317 case ST_TAG: 1318 /* In tag, but not "<a" -- disregard */ 1319 if (p == '>') 1320 parser->state = ST_NONE; 1321 return 1; 1322 case ST_TAGA: 1323 /* In a-tag -- "<a " already found */ 1324 if (p == '>') 1325 parser->state = ST_NONE; 1326 else if (p == '"') 1327 parser->state = ST_TAGAQ; 1328 else if (p == 'h' || p == 'H') 1329 parser->state = ST_H; 1330 else if (!isspace((unsigned char)p)) 1331 parser->state = ST_TAGAX; 1332 return 1; 1333 case ST_TAGAX: 1334 /* In unknown keyword in a-tag */ 1335 if (p == '>') 1336 parser->state = ST_NONE; 1337 else if (p == '"') 1338 parser->state = ST_TAGAQ; 1339 else if (isspace((unsigned char)p)) 1340 parser->state = ST_TAGA; 1341 return 1; 1342 case ST_TAGAQ: 1343 /* In a-tag, unknown argument for keys. */ 1344 if (p == '>') 1345 parser->state = ST_NONE; 1346 else if (p == '"') 1347 parser->state = ST_TAGA; 1348 return 1; 1349 case ST_H: 1350 /* In a-tag -- "<a h" already found */ 1351 if (p == '>') 1352 parser->state = ST_NONE; 1353 else if (p == '"') 1354 parser->state = ST_TAGAQ; 1355 else if (p == 'r' || p == 'R') 1356 parser->state = ST_R; 1357 else if (isspace((unsigned char)p)) 1358 parser->state = ST_TAGA; 1359 else 1360 parser->state = ST_TAGAX; 1361 return 1; 1362 case ST_R: 1363 /* In a-tag -- "<a hr" already found */ 1364 if (p == '>') 1365 parser->state = ST_NONE; 1366 else if (p == '"') 1367 parser->state = ST_TAGAQ; 1368 else if (p == 'e' || p == 'E') 1369 parser->state = ST_E; 1370 else if (isspace((unsigned char)p)) 1371 parser->state = ST_TAGA; 1372 else 1373 parser->state = ST_TAGAX; 1374 return 1; 1375 case ST_E: 1376 /* In a-tag -- "<a hre" already found */ 1377 if (p == '>') 1378 parser->state = ST_NONE; 1379 else if (p == '"') 1380 parser->state = ST_TAGAQ; 1381 else if (p == 'f' || p == 'F') 1382 parser->state = ST_F; 1383 else if (isspace((unsigned char)p)) 1384 parser->state = ST_TAGA; 1385 else 1386 parser->state = ST_TAGAX; 1387 return 1; 1388 case ST_F: 1389 /* In a-tag -- "<a href" already found */ 1390 if (p == '>') 1391 parser->state = ST_NONE; 1392 else if (p == '"') 1393 parser->state = ST_TAGAQ; 1394 else if (p == '=') 1395 parser->state = ST_HREF; 1396 else if (!isspace((unsigned char)p)) 1397 parser->state = ST_TAGAX; 1398 return 1; 1399 case ST_HREF: 1400 /* In a-tag -- "<a href=" already found */ 1401 if (p == '>') 1402 parser->state = ST_NONE; 1403 else if (p == '"') 1404 parser->state = ST_HREFQ; 1405 else if (!isspace((unsigned char)p)) 1406 parser->state = ST_TAGA; 1407 return 1; 1408 case ST_HREFQ: 1409 /* In href of the a-tag */ 1410 end_attr = memchr(buf, '"', len); 1411 if (end_attr == NULL) 1412 return 0; 1413 *end_attr = '\0'; 1414 parser->state = ST_TAGA; 1415 if (fetch_add_entry(parser->ue, parser->url, buf, 1)) 1416 return -1; 1417 return end_attr + 1 - buf; 1418 } 1419 /* NOTREACHED */ 1420 abort(); 1421 } 1422 1423 struct http_index_cache { 1424 struct http_index_cache *next; 1425 struct url *location; 1426 struct url_list ue; 1427 }; 1428 1429 static struct http_index_cache *index_cache; 1430 1431 /* 1432 * List a directory 1433 */ 1434 int 1435 /*ARGSUSED*/ 1436 fetchListHTTP(struct url_list *ue, struct url *url, const char *pattern __unused, const char *flags) 1437 { 1438 fetchIO *f; 1439 char buf[2 * PATH_MAX]; 1440 size_t buf_len, sum_processed; 1441 ssize_t read_len, processed; 1442 struct index_parser state; 1443 struct http_index_cache *cache = NULL; 1444 int do_cache, ret; 1445 1446 do_cache = CHECK_FLAG('c'); 1447 1448 if (do_cache) { 1449 for (cache = index_cache; cache != NULL; cache = cache->next) { 1450 if (strcmp(cache->location->scheme, url->scheme)) 1451 continue; 1452 if (strcmp(cache->location->user, url->user)) 1453 continue; 1454 if (strcmp(cache->location->pwd, url->pwd)) 1455 continue; 1456 if (strcmp(cache->location->host, url->host)) 1457 continue; 1458 if (cache->location->port != url->port) 1459 continue; 1460 if (strcmp(cache->location->doc, url->doc)) 1461 continue; 1462 return fetchAppendURLList(ue, &cache->ue); 1463 } 1464 1465 cache = malloc(sizeof(*cache)); 1466 fetchInitURLList(&cache->ue); 1467 cache->location = fetchCopyURL(url); 1468 } 1469 1470 f = fetchGetHTTP(url, flags); 1471 if (f == NULL) { 1472 if (do_cache) { 1473 fetchFreeURLList(&cache->ue); 1474 fetchFreeURL(cache->location); 1475 free(cache); 1476 } 1477 return -1; 1478 } 1479 1480 state.url = url; 1481 state.state = ST_NONE; 1482 if (do_cache) { 1483 state.ue = &cache->ue; 1484 } else { 1485 state.ue = ue; 1486 } 1487 1488 buf_len = 0; 1489 1490 while ((read_len = fetchIO_read(f, buf + buf_len, sizeof(buf) - buf_len)) > 0) { 1491 buf_len += read_len; 1492 sum_processed = 0; 1493 do { 1494 processed = parse_index(&state, buf + sum_processed, buf_len); 1495 if (processed == -1) 1496 break; 1497 buf_len -= processed; 1498 sum_processed += processed; 1499 } while (processed != 0 && buf_len > 0); 1500 if (processed == -1) { 1501 read_len = -1; 1502 break; 1503 } 1504 memmove(buf, buf + sum_processed, buf_len); 1505 } 1506 1507 fetchIO_close(f); 1508 1509 ret = read_len < 0 ? -1 : 0; 1510 1511 if (do_cache) { 1512 if (ret == 0) { 1513 cache->next = index_cache; 1514 index_cache = cache; 1515 } 1516 1517 if (fetchAppendURLList(ue, &cache->ue)) 1518 ret = -1; 1519 } 1520 1521 return ret; 1522 } 1523