1 /* $NetBSD: http.c,v 1.1.1.4 2009/03/10 00:44:23 joerg Exp $ */ 2 /*- 3 * Copyright (c) 2000-2004 Dag-Erling Co�dan Sm�rgrav 4 * Copyright (c) 2003 Thomas Klausner <wiz@NetBSD.org> 5 * Copyright (c) 2008, 2009 Joerg Sonnenberger <joerg@NetBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 * 31 * $FreeBSD: http.c,v 1.83 2008/02/06 11:39:55 des Exp $ 32 */ 33 34 /* 35 * The following copyright applies to the base64 code: 36 * 37 *- 38 * Copyright 1997 Massachusetts Institute of Technology 39 * 40 * Permission to use, copy, modify, and distribute this software and 41 * its documentation for any purpose and without fee is hereby 42 * granted, provided that both the above copyright notice and this 43 * permission notice appear in all copies, that both the above 44 * copyright notice and this permission notice appear in all 45 * supporting documentation, and that the name of M.I.T. not be used 46 * in advertising or publicity pertaining to distribution of the 47 * software without specific, written prior permission. M.I.T. makes 48 * no representations about the suitability of this software for any 49 * purpose. It is provided "as is" without express or implied 50 * warranty. 51 * 52 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 53 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 54 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 55 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 56 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 57 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 58 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 59 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 60 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 61 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 62 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 */ 65 66 #ifdef __linux__ 67 /* Keep this down to Linux, it can create surprises else where. */ 68 #define _GNU_SOURCE 69 #endif 70 71 #if HAVE_CONFIG_H 72 #include "config.h" 73 #endif 74 #ifndef NETBSD 75 #include <nbcompat.h> 76 #endif 77 78 #include <sys/types.h> 79 #include <sys/socket.h> 80 81 #include <ctype.h> 82 #include <errno.h> 83 #include <locale.h> 84 #include <stdarg.h> 85 #ifndef NETBSD 86 #include <nbcompat/netdb.h> 87 #include <nbcompat/stdio.h> 88 #else 89 #include <netdb.h> 90 #include <stdio.h> 91 #endif 92 #include <stdlib.h> 93 #include <string.h> 94 #include <time.h> 95 #include <unistd.h> 96 97 #include <arpa/inet.h> 98 99 #include <netinet/in.h> 100 #include <netinet/tcp.h> 101 102 #include "fetch.h" 103 #include "common.h" 104 #include "httperr.h" 105 106 /* Maximum number of redirects to follow */ 107 #define MAX_REDIRECT 5 108 109 /* Symbolic names for reply codes we care about */ 110 #define HTTP_OK 200 111 #define HTTP_PARTIAL 206 112 #define HTTP_MOVED_PERM 301 113 #define HTTP_MOVED_TEMP 302 114 #define HTTP_SEE_OTHER 303 115 #define HTTP_NOT_MODIFIED 304 116 #define HTTP_TEMP_REDIRECT 307 117 #define HTTP_NEED_AUTH 401 118 #define HTTP_NEED_PROXY_AUTH 407 119 #define HTTP_BAD_RANGE 416 120 #define HTTP_PROTOCOL_ERROR 999 121 122 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \ 123 || (xyz) == HTTP_MOVED_TEMP \ 124 || (xyz) == HTTP_TEMP_REDIRECT \ 125 || (xyz) == HTTP_SEE_OTHER) 126 127 #define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599) 128 129 130 /***************************************************************************** 131 * I/O functions for decoding chunked streams 132 */ 133 134 struct httpio 135 { 136 conn_t *conn; /* connection */ 137 int chunked; /* chunked mode */ 138 char *buf; /* chunk buffer */ 139 size_t bufsize; /* size of chunk buffer */ 140 ssize_t buflen; /* amount of data currently in buffer */ 141 int bufpos; /* current read offset in buffer */ 142 int eof; /* end-of-file flag */ 143 int error; /* error flag */ 144 size_t chunksize; /* remaining size of current chunk */ 145 }; 146 147 /* 148 * Get next chunk header 149 */ 150 static int 151 http_new_chunk(struct httpio *io) 152 { 153 char *p; 154 155 if (fetch_getln(io->conn) == -1) 156 return (-1); 157 158 if (io->conn->buflen < 2 || !isxdigit((unsigned char)*io->conn->buf)) 159 return (-1); 160 161 for (p = io->conn->buf; *p && !isspace((unsigned char)*p); ++p) { 162 if (*p == ';') 163 break; 164 if (!isxdigit((unsigned char)*p)) 165 return (-1); 166 if (isdigit((unsigned char)*p)) { 167 io->chunksize = io->chunksize * 16 + 168 *p - '0'; 169 } else { 170 io->chunksize = io->chunksize * 16 + 171 10 + tolower((unsigned char)*p) - 'a'; 172 } 173 } 174 175 return (io->chunksize); 176 } 177 178 /* 179 * Grow the input buffer to at least len bytes 180 */ 181 static int 182 http_growbuf(struct httpio *io, size_t len) 183 { 184 char *tmp; 185 186 if (io->bufsize >= len) 187 return (0); 188 189 if ((tmp = realloc(io->buf, len)) == NULL) 190 return (-1); 191 io->buf = tmp; 192 io->bufsize = len; 193 return (0); 194 } 195 196 /* 197 * Fill the input buffer, do chunk decoding on the fly 198 */ 199 static int 200 http_fillbuf(struct httpio *io, size_t len) 201 { 202 if (io->error) 203 return (-1); 204 if (io->eof) 205 return (0); 206 207 if (io->chunked == 0) { 208 if (http_growbuf(io, len) == -1) 209 return (-1); 210 if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) { 211 io->error = 1; 212 return (-1); 213 } 214 io->bufpos = 0; 215 return (io->buflen); 216 } 217 218 if (io->chunksize == 0) { 219 switch (http_new_chunk(io)) { 220 case -1: 221 io->error = 1; 222 return (-1); 223 case 0: 224 io->eof = 1; 225 return (0); 226 } 227 } 228 229 if (len > io->chunksize) 230 len = io->chunksize; 231 if (http_growbuf(io, len) == -1) 232 return (-1); 233 if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) { 234 io->error = 1; 235 return (-1); 236 } 237 io->chunksize -= io->buflen; 238 239 if (io->chunksize == 0) { 240 char endl[2]; 241 ssize_t len2; 242 243 len2 = fetch_read(io->conn, endl, 2); 244 if (len2 == 1 && fetch_read(io->conn, endl + 1, 1) != 1) 245 return (-1); 246 if (len2 == -1 || endl[0] != '\r' || endl[1] != '\n') 247 return (-1); 248 } 249 250 io->bufpos = 0; 251 252 return (io->buflen); 253 } 254 255 /* 256 * Read function 257 */ 258 static ssize_t 259 http_readfn(void *v, void *buf, size_t len) 260 { 261 struct httpio *io = (struct httpio *)v; 262 size_t l, pos; 263 264 if (io->error) 265 return (-1); 266 if (io->eof) 267 return (0); 268 269 for (pos = 0; len > 0; pos += l, len -= l) { 270 /* empty buffer */ 271 if (!io->buf || io->bufpos == io->buflen) 272 if (http_fillbuf(io, len) < 1) 273 break; 274 l = io->buflen - io->bufpos; 275 if (len < l) 276 l = len; 277 memcpy((char *)buf + pos, io->buf + io->bufpos, l); 278 io->bufpos += l; 279 } 280 281 if (!pos && io->error) 282 return (-1); 283 return (pos); 284 } 285 286 /* 287 * Write function 288 */ 289 static ssize_t 290 http_writefn(void *v, const void *buf, size_t len) 291 { 292 struct httpio *io = (struct httpio *)v; 293 294 return (fetch_write(io->conn, buf, len)); 295 } 296 297 /* 298 * Close function 299 */ 300 static void 301 http_closefn(void *v) 302 { 303 struct httpio *io = (struct httpio *)v; 304 305 fetch_close(io->conn); 306 if (io->buf) 307 free(io->buf); 308 free(io); 309 } 310 311 /* 312 * Wrap a file descriptor up 313 */ 314 static fetchIO * 315 http_funopen(conn_t *conn, int chunked) 316 { 317 struct httpio *io; 318 fetchIO *f; 319 320 if ((io = calloc(1, sizeof(*io))) == NULL) { 321 fetch_syserr(); 322 return (NULL); 323 } 324 io->conn = conn; 325 io->chunked = chunked; 326 f = fetchIO_unopen(io, http_readfn, http_writefn, http_closefn); 327 if (f == NULL) { 328 fetch_syserr(); 329 free(io); 330 return (NULL); 331 } 332 return (f); 333 } 334 335 336 /***************************************************************************** 337 * Helper functions for talking to the server and parsing its replies 338 */ 339 340 /* Header types */ 341 typedef enum { 342 hdr_syserror = -2, 343 hdr_error = -1, 344 hdr_end = 0, 345 hdr_unknown = 1, 346 hdr_content_length, 347 hdr_content_range, 348 hdr_last_modified, 349 hdr_location, 350 hdr_transfer_encoding, 351 hdr_www_authenticate 352 } hdr_t; 353 354 /* Names of interesting headers */ 355 static struct { 356 hdr_t num; 357 const char *name; 358 } hdr_names[] = { 359 { hdr_content_length, "Content-Length" }, 360 { hdr_content_range, "Content-Range" }, 361 { hdr_last_modified, "Last-Modified" }, 362 { hdr_location, "Location" }, 363 { hdr_transfer_encoding, "Transfer-Encoding" }, 364 { hdr_www_authenticate, "WWW-Authenticate" }, 365 { hdr_unknown, NULL }, 366 }; 367 368 /* 369 * Send a formatted line; optionally echo to terminal 370 */ 371 static int 372 http_cmd(conn_t *conn, const char *fmt, ...) 373 { 374 va_list ap; 375 size_t len; 376 char *msg; 377 int r; 378 379 va_start(ap, fmt); 380 len = vasprintf(&msg, fmt, ap); 381 va_end(ap); 382 383 if (msg == NULL) { 384 errno = ENOMEM; 385 fetch_syserr(); 386 return (-1); 387 } 388 389 r = fetch_putln(conn, msg, len); 390 free(msg); 391 392 if (r == -1) { 393 fetch_syserr(); 394 return (-1); 395 } 396 397 return (0); 398 } 399 400 /* 401 * Get and parse status line 402 */ 403 static int 404 http_get_reply(conn_t *conn) 405 { 406 char *p; 407 408 if (fetch_getln(conn) == -1) 409 return (-1); 410 /* 411 * A valid status line looks like "HTTP/m.n xyz reason" where m 412 * and n are the major and minor protocol version numbers and xyz 413 * is the reply code. 414 * Unfortunately, there are servers out there (NCSA 1.5.1, to name 415 * just one) that do not send a version number, so we can't rely 416 * on finding one, but if we do, insist on it being 1.0 or 1.1. 417 * We don't care about the reason phrase. 418 */ 419 if (strncmp(conn->buf, "HTTP", 4) != 0) 420 return (HTTP_PROTOCOL_ERROR); 421 p = conn->buf + 4; 422 if (*p == '/') { 423 if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1')) 424 return (HTTP_PROTOCOL_ERROR); 425 p += 4; 426 } 427 if (*p != ' ' || 428 !isdigit((unsigned char)p[1]) || 429 !isdigit((unsigned char)p[2]) || 430 !isdigit((unsigned char)p[3])) 431 return (HTTP_PROTOCOL_ERROR); 432 433 conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0'); 434 return (conn->err); 435 } 436 437 /* 438 * Check a header; if the type matches the given string, return a pointer 439 * to the beginning of the value. 440 */ 441 static const char * 442 http_match(const char *str, const char *hdr) 443 { 444 while (*str && *hdr && 445 tolower((unsigned char)*str++) == tolower((unsigned char)*hdr++)) 446 /* nothing */; 447 if (*str || *hdr != ':') 448 return (NULL); 449 while (*hdr && isspace((unsigned char)*++hdr)) 450 /* nothing */; 451 return (hdr); 452 } 453 454 /* 455 * Get the next header and return the appropriate symbolic code. 456 */ 457 static hdr_t 458 http_next_header(conn_t *conn, const char **p) 459 { 460 int i; 461 462 if (fetch_getln(conn) == -1) 463 return (hdr_syserror); 464 while (conn->buflen && isspace((unsigned char)conn->buf[conn->buflen - 1])) 465 conn->buflen--; 466 conn->buf[conn->buflen] = '\0'; 467 if (conn->buflen == 0) 468 return (hdr_end); 469 /* 470 * We could check for malformed headers but we don't really care. 471 * A valid header starts with a token immediately followed by a 472 * colon; a token is any sequence of non-control, non-whitespace 473 * characters except "()<>@,;:\\\"{}". 474 */ 475 for (i = 0; hdr_names[i].num != hdr_unknown; i++) 476 if ((*p = http_match(hdr_names[i].name, conn->buf)) != NULL) 477 return (hdr_names[i].num); 478 return (hdr_unknown); 479 } 480 481 /* 482 * Parse a last-modified header 483 */ 484 static int 485 http_parse_mtime(const char *p, time_t *mtime) 486 { 487 char locale[64], *r; 488 struct tm tm; 489 490 strncpy(locale, setlocale(LC_TIME, NULL), sizeof(locale)); 491 setlocale(LC_TIME, "C"); 492 r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm); 493 /* XXX should add support for date-2 and date-3 */ 494 setlocale(LC_TIME, locale); 495 if (r == NULL) 496 return (-1); 497 *mtime = timegm(&tm); 498 return (0); 499 } 500 501 /* 502 * Parse a content-length header 503 */ 504 static int 505 http_parse_length(const char *p, off_t *length) 506 { 507 off_t len; 508 509 for (len = 0; *p && isdigit((unsigned char)*p); ++p) 510 len = len * 10 + (*p - '0'); 511 if (*p) 512 return (-1); 513 *length = len; 514 return (0); 515 } 516 517 /* 518 * Parse a content-range header 519 */ 520 static int 521 http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size) 522 { 523 off_t first, last, len; 524 525 if (strncasecmp(p, "bytes ", 6) != 0) 526 return (-1); 527 p += 6; 528 if (*p == '*') { 529 first = last = -1; 530 ++p; 531 } else { 532 for (first = 0; *p && isdigit((unsigned char)*p); ++p) 533 first = first * 10 + *p - '0'; 534 if (*p != '-') 535 return (-1); 536 for (last = 0, ++p; *p && isdigit((unsigned char)*p); ++p) 537 last = last * 10 + *p - '0'; 538 } 539 if (first > last || *p != '/') 540 return (-1); 541 for (len = 0, ++p; *p && isdigit((unsigned char)*p); ++p) 542 len = len * 10 + *p - '0'; 543 if (*p || len < last - first + 1) 544 return (-1); 545 if (first == -1) 546 *length = 0; 547 else 548 *length = last - first + 1; 549 *offset = first; 550 *size = len; 551 return (0); 552 } 553 554 555 /***************************************************************************** 556 * Helper functions for authorization 557 */ 558 559 /* 560 * Base64 encoding 561 */ 562 static char * 563 http_base64(const char *src) 564 { 565 static const char base64[] = 566 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 567 "abcdefghijklmnopqrstuvwxyz" 568 "0123456789+/"; 569 char *str, *dst; 570 size_t l; 571 int t, r; 572 573 l = strlen(src); 574 if ((str = malloc(((l + 2) / 3) * 4 + 1)) == NULL) 575 return (NULL); 576 dst = str; 577 r = 0; 578 579 while (l >= 3) { 580 t = (src[0] << 16) | (src[1] << 8) | src[2]; 581 dst[0] = base64[(t >> 18) & 0x3f]; 582 dst[1] = base64[(t >> 12) & 0x3f]; 583 dst[2] = base64[(t >> 6) & 0x3f]; 584 dst[3] = base64[(t >> 0) & 0x3f]; 585 src += 3; l -= 3; 586 dst += 4; r += 4; 587 } 588 589 switch (l) { 590 case 2: 591 t = (src[0] << 16) | (src[1] << 8); 592 dst[0] = base64[(t >> 18) & 0x3f]; 593 dst[1] = base64[(t >> 12) & 0x3f]; 594 dst[2] = base64[(t >> 6) & 0x3f]; 595 dst[3] = '='; 596 dst += 4; 597 r += 4; 598 break; 599 case 1: 600 t = src[0] << 16; 601 dst[0] = base64[(t >> 18) & 0x3f]; 602 dst[1] = base64[(t >> 12) & 0x3f]; 603 dst[2] = dst[3] = '='; 604 dst += 4; 605 r += 4; 606 break; 607 case 0: 608 break; 609 } 610 611 *dst = 0; 612 return (str); 613 } 614 615 /* 616 * Encode username and password 617 */ 618 static int 619 http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd) 620 { 621 char *upw, *auth; 622 int r; 623 624 if (asprintf(&upw, "%s:%s", usr, pwd) == -1) 625 return (-1); 626 auth = http_base64(upw); 627 free(upw); 628 if (auth == NULL) 629 return (-1); 630 r = http_cmd(conn, "%s: Basic %s", hdr, auth); 631 free(auth); 632 return (r); 633 } 634 635 /* 636 * Send an authorization header 637 */ 638 static int 639 http_authorize(conn_t *conn, const char *hdr, const char *p) 640 { 641 /* basic authorization */ 642 if (strncasecmp(p, "basic:", 6) == 0) { 643 char *user, *pwd, *str; 644 int r; 645 646 /* skip realm */ 647 for (p += 6; *p && *p != ':'; ++p) 648 /* nothing */ ; 649 if (!*p || strchr(++p, ':') == NULL) 650 return (-1); 651 if ((str = strdup(p)) == NULL) 652 return (-1); /* XXX */ 653 user = str; 654 pwd = strchr(str, ':'); 655 *pwd++ = '\0'; 656 r = http_basic_auth(conn, hdr, user, pwd); 657 free(str); 658 return (r); 659 } 660 return (-1); 661 } 662 663 664 /***************************************************************************** 665 * Helper functions for connecting to a server or proxy 666 */ 667 668 /* 669 * Connect to the correct HTTP server or proxy. 670 */ 671 static conn_t * 672 http_connect(struct url *URL, struct url *purl, const char *flags) 673 { 674 conn_t *conn; 675 int af, verbose; 676 #ifdef TCP_NOPUSH 677 int val; 678 #endif 679 680 #ifdef INET6 681 af = AF_UNSPEC; 682 #else 683 af = AF_INET; 684 #endif 685 686 verbose = CHECK_FLAG('v'); 687 if (CHECK_FLAG('4')) 688 af = AF_INET; 689 #ifdef INET6 690 else if (CHECK_FLAG('6')) 691 af = AF_INET6; 692 #endif 693 694 if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) { 695 URL = purl; 696 } else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) { 697 /* can't talk http to an ftp server */ 698 /* XXX should set an error code */ 699 return (NULL); 700 } 701 702 if ((conn = fetch_connect(URL->host, URL->port, af, verbose)) == NULL) 703 /* fetch_connect() has already set an error code */ 704 return (NULL); 705 if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 && 706 fetch_ssl(conn, verbose) == -1) { 707 fetch_close(conn); 708 /* grrr */ 709 #ifdef EAUTH 710 errno = EAUTH; 711 #else 712 errno = EPERM; 713 #endif 714 fetch_syserr(); 715 return (NULL); 716 } 717 718 #ifdef TCP_NOPUSH 719 val = 1; 720 setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val)); 721 #endif 722 723 return (conn); 724 } 725 726 static struct url * 727 http_get_proxy(struct url * url, const char *flags) 728 { 729 struct url *purl; 730 char *p; 731 732 if (flags != NULL && strchr(flags, 'd') != NULL) 733 return (NULL); 734 if (fetch_no_proxy_match(url->host)) 735 return (NULL); 736 if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) && 737 *p && (purl = fetchParseURL(p))) { 738 if (!*purl->scheme) 739 strcpy(purl->scheme, SCHEME_HTTP); 740 if (!purl->port) 741 purl->port = fetch_default_proxy_port(purl->scheme); 742 if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0) 743 return (purl); 744 fetchFreeURL(purl); 745 } 746 return (NULL); 747 } 748 749 static void 750 set_if_modified_since(conn_t *conn, time_t last_modified) 751 { 752 static const char weekdays[] = "SunMonTueWedThuFriSat"; 753 static const char months[] = "JanFebMarAprMayJunJulAugSepOctNovDec"; 754 struct tm tm; 755 char buf[80]; 756 gmtime_r(&last_modified, &tm); 757 snprintf(buf, sizeof(buf), "%.3s, %02d %.3s %4d %02d:%02d:%02d GMT", 758 weekdays + tm.tm_wday * 3, tm.tm_mday, months + tm.tm_mon * 3, 759 tm.tm_year + 1900, tm.tm_hour, tm.tm_min, tm.tm_sec); 760 http_cmd(conn, "If-Modified-Since: %s", buf); 761 } 762 763 764 /***************************************************************************** 765 * Core 766 */ 767 768 /* 769 * Send a request and process the reply 770 * 771 * XXX This function is way too long, the do..while loop should be split 772 * XXX off into a separate function. 773 */ 774 fetchIO * 775 http_request(struct url *URL, const char *op, struct url_stat *us, 776 struct url *purl, const char *flags) 777 { 778 conn_t *conn; 779 struct url *url, *new; 780 int chunked, direct, if_modified_since, need_auth, noredirect, verbose; 781 int e, i, n, val; 782 off_t offset, clength, length, size; 783 time_t mtime; 784 const char *p; 785 fetchIO *f; 786 hdr_t h; 787 char hbuf[URL_HOSTLEN + 7], *host; 788 789 direct = CHECK_FLAG('d'); 790 noredirect = CHECK_FLAG('A'); 791 verbose = CHECK_FLAG('v'); 792 if_modified_since = CHECK_FLAG('i'); 793 794 if (direct && purl) { 795 fetchFreeURL(purl); 796 purl = NULL; 797 } 798 799 /* try the provided URL first */ 800 url = URL; 801 802 /* if the A flag is set, we only get one try */ 803 n = noredirect ? 1 : MAX_REDIRECT; 804 i = 0; 805 806 e = HTTP_PROTOCOL_ERROR; 807 need_auth = 0; 808 do { 809 new = NULL; 810 chunked = 0; 811 offset = 0; 812 clength = -1; 813 length = -1; 814 size = -1; 815 mtime = 0; 816 817 /* check port */ 818 if (!url->port) 819 url->port = fetch_default_port(url->scheme); 820 821 /* were we redirected to an FTP URL? */ 822 if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) { 823 if (strcmp(op, "GET") == 0) 824 return (ftp_request(url, "RETR", NULL, us, purl, flags)); 825 else if (strcmp(op, "HEAD") == 0) 826 return (ftp_request(url, "STAT", NULL, us, purl, flags)); 827 } 828 829 /* connect to server or proxy */ 830 if ((conn = http_connect(url, purl, flags)) == NULL) 831 goto ouch; 832 833 host = url->host; 834 #ifdef INET6 835 if (strchr(url->host, ':')) { 836 snprintf(hbuf, sizeof(hbuf), "[%s]", url->host); 837 host = hbuf; 838 } 839 #endif 840 if (url->port != fetch_default_port(url->scheme)) { 841 if (host != hbuf) { 842 strcpy(hbuf, host); 843 host = hbuf; 844 } 845 snprintf(hbuf + strlen(hbuf), 846 sizeof(hbuf) - strlen(hbuf), ":%d", url->port); 847 } 848 849 /* send request */ 850 if (verbose) 851 fetch_info("requesting %s://%s%s", 852 url->scheme, host, url->doc); 853 if (purl) { 854 http_cmd(conn, "%s %s://%s%s HTTP/1.1", 855 op, url->scheme, host, url->doc); 856 } else { 857 http_cmd(conn, "%s %s HTTP/1.1", 858 op, url->doc); 859 } 860 861 if (if_modified_since && url->last_modified > 0) 862 set_if_modified_since(conn, url->last_modified); 863 864 /* virtual host */ 865 http_cmd(conn, "Host: %s", host); 866 867 /* proxy authorization */ 868 if (purl) { 869 if (*purl->user || *purl->pwd) 870 http_basic_auth(conn, "Proxy-Authorization", 871 purl->user, purl->pwd); 872 else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0') 873 http_authorize(conn, "Proxy-Authorization", p); 874 } 875 876 /* server authorization */ 877 if (need_auth || *url->user || *url->pwd) { 878 if (*url->user || *url->pwd) 879 http_basic_auth(conn, "Authorization", url->user, url->pwd); 880 else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0') 881 http_authorize(conn, "Authorization", p); 882 else if (fetchAuthMethod && fetchAuthMethod(url) == 0) { 883 http_basic_auth(conn, "Authorization", url->user, url->pwd); 884 } else { 885 http_seterr(HTTP_NEED_AUTH); 886 goto ouch; 887 } 888 } 889 890 /* other headers */ 891 if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') { 892 if (strcasecmp(p, "auto") == 0) 893 http_cmd(conn, "Referer: %s://%s%s", 894 url->scheme, host, url->doc); 895 else 896 http_cmd(conn, "Referer: %s", p); 897 } 898 if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0') 899 http_cmd(conn, "User-Agent: %s", p); 900 else 901 http_cmd(conn, "User-Agent: %s ", _LIBFETCH_VER); 902 if (url->offset > 0) 903 http_cmd(conn, "Range: bytes=%lld-", (long long)url->offset); 904 http_cmd(conn, "Connection: close"); 905 http_cmd(conn, ""); 906 907 /* 908 * Force the queued request to be dispatched. Normally, one 909 * would do this with shutdown(2) but squid proxies can be 910 * configured to disallow such half-closed connections. To 911 * be compatible with such configurations, fiddle with socket 912 * options to force the pending data to be written. 913 */ 914 #ifdef TCP_NOPUSH 915 val = 0; 916 setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, 917 sizeof(val)); 918 #endif 919 val = 1; 920 setsockopt(conn->sd, IPPROTO_TCP, TCP_NODELAY, &val, 921 sizeof(val)); 922 923 /* get reply */ 924 switch (http_get_reply(conn)) { 925 case HTTP_OK: 926 case HTTP_PARTIAL: 927 case HTTP_NOT_MODIFIED: 928 /* fine */ 929 break; 930 case HTTP_MOVED_PERM: 931 case HTTP_MOVED_TEMP: 932 case HTTP_SEE_OTHER: 933 /* 934 * Not so fine, but we still have to read the 935 * headers to get the new location. 936 */ 937 break; 938 case HTTP_NEED_AUTH: 939 if (need_auth) { 940 /* 941 * We already sent out authorization code, 942 * so there's nothing more we can do. 943 */ 944 http_seterr(conn->err); 945 goto ouch; 946 } 947 /* try again, but send the password this time */ 948 if (verbose) 949 fetch_info("server requires authorization"); 950 break; 951 case HTTP_NEED_PROXY_AUTH: 952 /* 953 * If we're talking to a proxy, we already sent 954 * our proxy authorization code, so there's 955 * nothing more we can do. 956 */ 957 http_seterr(conn->err); 958 goto ouch; 959 case HTTP_BAD_RANGE: 960 /* 961 * This can happen if we ask for 0 bytes because 962 * we already have the whole file. Consider this 963 * a success for now, and check sizes later. 964 */ 965 break; 966 case HTTP_PROTOCOL_ERROR: 967 /* fall through */ 968 case -1: 969 fetch_syserr(); 970 goto ouch; 971 default: 972 http_seterr(conn->err); 973 if (!verbose) 974 goto ouch; 975 /* fall through so we can get the full error message */ 976 } 977 978 /* get headers */ 979 do { 980 switch ((h = http_next_header(conn, &p))) { 981 case hdr_syserror: 982 fetch_syserr(); 983 goto ouch; 984 case hdr_error: 985 http_seterr(HTTP_PROTOCOL_ERROR); 986 goto ouch; 987 case hdr_content_length: 988 http_parse_length(p, &clength); 989 break; 990 case hdr_content_range: 991 http_parse_range(p, &offset, &length, &size); 992 break; 993 case hdr_last_modified: 994 http_parse_mtime(p, &mtime); 995 break; 996 case hdr_location: 997 if (!HTTP_REDIRECT(conn->err)) 998 break; 999 if (new) 1000 free(new); 1001 if (verbose) 1002 fetch_info("%d redirect to %s", conn->err, p); 1003 if (*p == '/') 1004 /* absolute path */ 1005 new = fetchMakeURL(url->scheme, url->host, url->port, p, 1006 url->user, url->pwd); 1007 else 1008 new = fetchParseURL(p); 1009 if (new == NULL) { 1010 /* XXX should set an error code */ 1011 goto ouch; 1012 } 1013 if (!*new->user && !*new->pwd) { 1014 strcpy(new->user, url->user); 1015 strcpy(new->pwd, url->pwd); 1016 } 1017 new->offset = url->offset; 1018 new->length = url->length; 1019 break; 1020 case hdr_transfer_encoding: 1021 /* XXX weak test*/ 1022 chunked = (strcasecmp(p, "chunked") == 0); 1023 break; 1024 case hdr_www_authenticate: 1025 if (conn->err != HTTP_NEED_AUTH) 1026 break; 1027 /* if we were smarter, we'd check the method and realm */ 1028 break; 1029 case hdr_end: 1030 /* fall through */ 1031 case hdr_unknown: 1032 /* ignore */ 1033 break; 1034 } 1035 } while (h > hdr_end); 1036 1037 /* we need to provide authentication */ 1038 if (conn->err == HTTP_NEED_AUTH) { 1039 e = conn->err; 1040 need_auth = 1; 1041 fetch_close(conn); 1042 conn = NULL; 1043 continue; 1044 } 1045 1046 /* requested range not satisfiable */ 1047 if (conn->err == HTTP_BAD_RANGE) { 1048 if (url->offset == size && url->length == 0) { 1049 /* asked for 0 bytes; fake it */ 1050 offset = url->offset; 1051 conn->err = HTTP_OK; 1052 break; 1053 } else { 1054 http_seterr(conn->err); 1055 goto ouch; 1056 } 1057 } 1058 1059 /* we have a hit or an error */ 1060 if (conn->err == HTTP_OK || 1061 conn->err == HTTP_PARTIAL || 1062 conn->err == HTTP_NOT_MODIFIED || 1063 HTTP_ERROR(conn->err)) 1064 break; 1065 1066 /* all other cases: we got a redirect */ 1067 e = conn->err; 1068 need_auth = 0; 1069 fetch_close(conn); 1070 conn = NULL; 1071 if (!new) 1072 break; 1073 if (url != URL) 1074 fetchFreeURL(url); 1075 url = new; 1076 } while (++i < n); 1077 1078 /* we failed, or ran out of retries */ 1079 if (conn == NULL) { 1080 http_seterr(e); 1081 goto ouch; 1082 } 1083 1084 /* check for inconsistencies */ 1085 if (clength != -1 && length != -1 && clength != length) { 1086 http_seterr(HTTP_PROTOCOL_ERROR); 1087 goto ouch; 1088 } 1089 if (clength == -1) 1090 clength = length; 1091 if (clength != -1) 1092 length = offset + clength; 1093 if (length != -1 && size != -1 && length != size) { 1094 http_seterr(HTTP_PROTOCOL_ERROR); 1095 goto ouch; 1096 } 1097 if (size == -1) 1098 size = length; 1099 1100 /* fill in stats */ 1101 if (us) { 1102 us->size = size; 1103 us->atime = us->mtime = mtime; 1104 } 1105 1106 /* too far? */ 1107 if (URL->offset > 0 && offset > URL->offset) { 1108 http_seterr(HTTP_PROTOCOL_ERROR); 1109 goto ouch; 1110 } 1111 1112 /* report back real offset and size */ 1113 URL->offset = offset; 1114 URL->length = clength; 1115 1116 if (conn->err == HTTP_NOT_MODIFIED) { 1117 http_seterr(HTTP_NOT_MODIFIED); 1118 return (NULL); 1119 } 1120 1121 /* wrap it up in a fetchIO */ 1122 if ((f = http_funopen(conn, chunked)) == NULL) { 1123 fetch_syserr(); 1124 goto ouch; 1125 } 1126 1127 if (url != URL) 1128 fetchFreeURL(url); 1129 if (purl) 1130 fetchFreeURL(purl); 1131 1132 if (HTTP_ERROR(conn->err)) { 1133 fetchIO_close(f); 1134 f = NULL; 1135 } 1136 1137 return (f); 1138 1139 ouch: 1140 if (url != URL) 1141 fetchFreeURL(url); 1142 if (purl) 1143 fetchFreeURL(purl); 1144 if (conn != NULL) 1145 fetch_close(conn); 1146 return (NULL); 1147 } 1148 1149 1150 /***************************************************************************** 1151 * Entry points 1152 */ 1153 1154 /* 1155 * Retrieve and stat a file by HTTP 1156 */ 1157 fetchIO * 1158 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags) 1159 { 1160 return (http_request(URL, "GET", us, http_get_proxy(URL, flags), flags)); 1161 } 1162 1163 /* 1164 * Retrieve a file by HTTP 1165 */ 1166 fetchIO * 1167 fetchGetHTTP(struct url *URL, const char *flags) 1168 { 1169 return (fetchXGetHTTP(URL, NULL, flags)); 1170 } 1171 1172 /* 1173 * Store a file by HTTP 1174 */ 1175 fetchIO * 1176 fetchPutHTTP(struct url *URL, const char *flags) 1177 { 1178 fprintf(stderr, "fetchPutHTTP(): not implemented\n"); 1179 return (NULL); 1180 } 1181 1182 /* 1183 * Get an HTTP document's metadata 1184 */ 1185 int 1186 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags) 1187 { 1188 fetchIO *f; 1189 1190 f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags); 1191 if (f == NULL) 1192 return (-1); 1193 fetchIO_close(f); 1194 return (0); 1195 } 1196 1197 enum http_states { 1198 ST_NONE, 1199 ST_LT, 1200 ST_LTA, 1201 ST_TAGA, 1202 ST_H, 1203 ST_R, 1204 ST_E, 1205 ST_F, 1206 ST_HREF, 1207 ST_HREFQ, 1208 ST_TAG, 1209 ST_TAGAX, 1210 ST_TAGAQ 1211 }; 1212 1213 struct index_parser { 1214 struct url_list *ue; 1215 struct url *url; 1216 enum http_states state; 1217 }; 1218 1219 static size_t 1220 parse_index(struct index_parser *parser, const char *buf, size_t len) 1221 { 1222 char *end_attr, p = *buf; 1223 1224 switch (parser->state) { 1225 case ST_NONE: 1226 /* Plain text, not in markup */ 1227 if (p == '<') 1228 parser->state = ST_LT; 1229 return 1; 1230 case ST_LT: 1231 /* In tag -- "<" already found */ 1232 if (p == '>') 1233 parser->state = ST_NONE; 1234 else if (p == 'a' || p == 'A') 1235 parser->state = ST_LTA; 1236 else if (!isspace((unsigned char)p)) 1237 parser->state = ST_TAG; 1238 return 1; 1239 case ST_LTA: 1240 /* In tag -- "<a" already found */ 1241 if (p == '>') 1242 parser->state = ST_NONE; 1243 else if (p == '"') 1244 parser->state = ST_TAGAQ; 1245 else if (isspace((unsigned char)p)) 1246 parser->state = ST_TAGA; 1247 else 1248 parser->state = ST_TAG; 1249 return 1; 1250 case ST_TAG: 1251 /* In tag, but not "<a" -- disregard */ 1252 if (p == '>') 1253 parser->state = ST_NONE; 1254 return 1; 1255 case ST_TAGA: 1256 /* In a-tag -- "<a " already found */ 1257 if (p == '>') 1258 parser->state = ST_NONE; 1259 else if (p == '"') 1260 parser->state = ST_TAGAQ; 1261 else if (p == 'h' || p == 'H') 1262 parser->state = ST_H; 1263 else if (!isspace((unsigned char)p)) 1264 parser->state = ST_TAGAX; 1265 return 1; 1266 case ST_TAGAX: 1267 /* In unknown keyword in a-tag */ 1268 if (p == '>') 1269 parser->state = ST_NONE; 1270 else if (p == '"') 1271 parser->state = ST_TAGAQ; 1272 else if (isspace((unsigned char)p)) 1273 parser->state = ST_TAGA; 1274 return 1; 1275 case ST_TAGAQ: 1276 /* In a-tag, unknown argument for keys. */ 1277 if (p == '>') 1278 parser->state = ST_NONE; 1279 else if (p == '"') 1280 parser->state = ST_TAGA; 1281 return 1; 1282 case ST_H: 1283 /* In a-tag -- "<a h" already found */ 1284 if (p == '>') 1285 parser->state = ST_NONE; 1286 else if (p == '"') 1287 parser->state = ST_TAGAQ; 1288 else if (p == 'r' || p == 'R') 1289 parser->state = ST_R; 1290 else if (isspace((unsigned char)p)) 1291 parser->state = ST_TAGA; 1292 else 1293 parser->state = ST_TAGAX; 1294 return 1; 1295 case ST_R: 1296 /* In a-tag -- "<a hr" already found */ 1297 if (p == '>') 1298 parser->state = ST_NONE; 1299 else if (p == '"') 1300 parser->state = ST_TAGAQ; 1301 else if (p == 'e' || p == 'E') 1302 parser->state = ST_E; 1303 else if (isspace((unsigned char)p)) 1304 parser->state = ST_TAGA; 1305 else 1306 parser->state = ST_TAGAX; 1307 return 1; 1308 case ST_E: 1309 /* In a-tag -- "<a hre" already found */ 1310 if (p == '>') 1311 parser->state = ST_NONE; 1312 else if (p == '"') 1313 parser->state = ST_TAGAQ; 1314 else if (p == 'f' || p == 'F') 1315 parser->state = ST_F; 1316 else if (isspace((unsigned char)p)) 1317 parser->state = ST_TAGA; 1318 else 1319 parser->state = ST_TAGAX; 1320 return 1; 1321 case ST_F: 1322 /* In a-tag -- "<a href" already found */ 1323 if (p == '>') 1324 parser->state = ST_NONE; 1325 else if (p == '"') 1326 parser->state = ST_TAGAQ; 1327 else if (p == '=') 1328 parser->state = ST_HREF; 1329 else if (!isspace((unsigned char)p)) 1330 parser->state = ST_TAGAX; 1331 return 1; 1332 case ST_HREF: 1333 /* In a-tag -- "<a href=" already found */ 1334 if (p == '>') 1335 parser->state = ST_NONE; 1336 else if (p == '"') 1337 parser->state = ST_HREFQ; 1338 else if (!isspace((unsigned char)p)) 1339 parser->state = ST_TAGA; 1340 return 1; 1341 case ST_HREFQ: 1342 /* In href of the a-tag */ 1343 end_attr = memchr(buf, '"', len); 1344 if (end_attr == NULL) 1345 return 0; 1346 *end_attr = '\0'; 1347 parser->state = ST_TAGA; 1348 fetch_add_entry(parser->ue, parser->url, buf, 1); 1349 return end_attr + 1 - buf; 1350 } 1351 abort(); 1352 } 1353 1354 /* 1355 * List a directory 1356 */ 1357 int 1358 fetchListHTTP(struct url_list *ue, struct url *url, const char *pattern, const char *flags) 1359 { 1360 fetchIO *f; 1361 char buf[2 * PATH_MAX]; 1362 size_t buf_len, processed, sum_processed; 1363 ssize_t read_len; 1364 struct index_parser state; 1365 1366 state.url = url; 1367 state.state = ST_NONE; 1368 state.ue = ue; 1369 1370 f = fetchGetHTTP(url, flags); 1371 if (f == NULL) 1372 return -1; 1373 1374 buf_len = 0; 1375 1376 while ((read_len = fetchIO_read(f, buf + buf_len, sizeof(buf) - buf_len)) > 0) { 1377 buf_len += read_len; 1378 sum_processed = 0; 1379 do { 1380 processed = parse_index(&state, buf + sum_processed, buf_len); 1381 buf_len -= processed; 1382 sum_processed += processed; 1383 } while (processed != 0 && buf_len > 0); 1384 memmove(buf, buf + sum_processed, buf_len); 1385 } 1386 1387 fetchIO_close(f); 1388 return read_len < 0 ? -1 : 0; 1389 } 1390