1 /*- 2 * Copyright (c) 2000-2004 Dag-Erling Co�dan Sm�rgrav 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $FreeBSD: src/lib/libfetch/http.c,v 1.86 2008/12/15 08:27:44 murray Exp $ 29 * $DragonFly: src/lib/libfetch/http.c,v 1.4 2007/08/05 21:48:12 swildner Exp $ 30 */ 31 32 /* 33 * The following copyright applies to the base64 code: 34 * 35 *- 36 * Copyright 1997 Massachusetts Institute of Technology 37 * 38 * Permission to use, copy, modify, and distribute this software and 39 * its documentation for any purpose and without fee is hereby 40 * granted, provided that both the above copyright notice and this 41 * permission notice appear in all copies, that both the above 42 * copyright notice and this permission notice appear in all 43 * supporting documentation, and that the name of M.I.T. not be used 44 * in advertising or publicity pertaining to distribution of the 45 * software without specific, written prior permission. M.I.T. makes 46 * no representations about the suitability of this software for any 47 * purpose. It is provided "as is" without express or implied 48 * warranty. 49 * 50 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 51 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 52 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 53 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 54 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 55 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 56 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 57 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 58 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 59 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 60 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 #include <sys/param.h> 65 #include <sys/socket.h> 66 #include <sys/time.h> 67 68 #include <ctype.h> 69 #include <err.h> 70 #include <errno.h> 71 #include <locale.h> 72 #include <netdb.h> 73 #include <stdarg.h> 74 #include <stdio.h> 75 #include <stdlib.h> 76 #include <string.h> 77 #include <time.h> 78 #include <unistd.h> 79 80 #include <netinet/in.h> 81 #include <netinet/tcp.h> 82 83 #include "fetch.h" 84 #include "common.h" 85 #include "httperr.h" 86 87 /* Maximum number of redirects to follow */ 88 #define MAX_REDIRECT 20 89 90 /* Symbolic names for reply codes we care about */ 91 #define HTTP_OK 200 92 #define HTTP_PARTIAL 206 93 #define HTTP_MOVED_PERM 301 94 #define HTTP_MOVED_TEMP 302 95 #define HTTP_SEE_OTHER 303 96 #define HTTP_NOT_MODIFIED 304 97 #define HTTP_TEMP_REDIRECT 307 98 #define HTTP_NEED_AUTH 401 99 #define HTTP_NEED_PROXY_AUTH 407 100 #define HTTP_BAD_RANGE 416 101 #define HTTP_PROTOCOL_ERROR 999 102 103 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \ 104 || (xyz) == HTTP_MOVED_TEMP \ 105 || (xyz) == HTTP_TEMP_REDIRECT \ 106 || (xyz) == HTTP_SEE_OTHER) 107 108 #define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599) 109 110 111 /***************************************************************************** 112 * I/O functions for decoding chunked streams 113 */ 114 115 struct httpio 116 { 117 conn_t *conn; /* connection */ 118 int chunked; /* chunked mode */ 119 char *buf; /* chunk buffer */ 120 size_t bufsize; /* size of chunk buffer */ 121 ssize_t buflen; /* amount of data currently in buffer */ 122 int bufpos; /* current read offset in buffer */ 123 int eof; /* end-of-file flag */ 124 int error; /* error flag */ 125 size_t chunksize; /* remaining size of current chunk */ 126 #ifndef NDEBUG 127 size_t total; 128 #endif 129 }; 130 131 static int http_cmd(conn_t *, const char *, ...) __printflike(2, 3); 132 133 /* 134 * Get next chunk header 135 */ 136 static int 137 http_new_chunk(struct httpio *io) 138 { 139 char *p; 140 141 if (fetch_getln(io->conn) == -1) 142 return (-1); 143 144 if (io->conn->buflen < 2 || !isxdigit((unsigned char)*io->conn->buf)) 145 return (-1); 146 147 for (p = io->conn->buf; *p && !isspace((unsigned char)*p); ++p) { 148 if (*p == ';') 149 break; 150 if (!isxdigit((unsigned char)*p)) 151 return (-1); 152 if (isdigit((unsigned char)*p)) { 153 io->chunksize = io->chunksize * 16 + 154 *p - '0'; 155 } else { 156 io->chunksize = io->chunksize * 16 + 157 10 + tolower((unsigned char)*p) - 'a'; 158 } 159 } 160 161 #ifndef NDEBUG 162 if (fetchDebug) { 163 io->total += io->chunksize; 164 if (io->chunksize == 0) 165 fprintf(stderr, "%s(): end of last chunk\n", __func__); 166 else 167 fprintf(stderr, "%s(): new chunk: %lu (%lu)\n", 168 __func__, (unsigned long)io->chunksize, 169 (unsigned long)io->total); 170 } 171 #endif 172 173 return (io->chunksize); 174 } 175 176 /* 177 * Grow the input buffer to at least len bytes 178 */ 179 static inline int 180 http_growbuf(struct httpio *io, size_t len) 181 { 182 char *tmp; 183 184 if (io->bufsize >= len) 185 return (0); 186 187 if ((tmp = realloc(io->buf, len)) == NULL) 188 return (-1); 189 io->buf = tmp; 190 io->bufsize = len; 191 return (0); 192 } 193 194 /* 195 * Fill the input buffer, do chunk decoding on the fly 196 */ 197 static int 198 http_fillbuf(struct httpio *io, size_t len) 199 { 200 ssize_t nbytes; 201 202 if (io->error) 203 return (-1); 204 if (io->eof) 205 return (0); 206 207 if (io->chunked == 0) { 208 if (http_growbuf(io, len) == -1) 209 return (-1); 210 if ((nbytes = fetch_read(io->conn, io->buf, len)) == -1) { 211 io->error = errno; 212 return (-1); 213 } 214 io->buflen = nbytes; 215 io->bufpos = 0; 216 return (io->buflen); 217 } 218 219 if (io->chunksize == 0) { 220 switch (http_new_chunk(io)) { 221 case -1: 222 io->error = 1; 223 return (-1); 224 case 0: 225 io->eof = 1; 226 return (0); 227 } 228 } 229 230 if (len > io->chunksize) 231 len = io->chunksize; 232 if (http_growbuf(io, len) == -1) 233 return (-1); 234 if ((nbytes = fetch_read(io->conn, io->buf, len)) == -1) { 235 io->error = errno; 236 return (-1); 237 } 238 io->buflen = nbytes; 239 io->chunksize -= io->buflen; 240 241 if (io->chunksize == 0) { 242 char endl[2]; 243 244 if (fetch_read(io->conn, endl, 2) != 2 || 245 endl[0] != '\r' || endl[1] != '\n') 246 return (-1); 247 } 248 249 io->bufpos = 0; 250 251 return (io->buflen); 252 } 253 254 /* 255 * Read function 256 */ 257 static int 258 http_readfn(void *v, char *buf, int len) 259 { 260 struct httpio *io = (struct httpio *)v; 261 int l, pos; 262 263 if (io->error) 264 return (-1); 265 if (io->eof) 266 return (0); 267 268 for (pos = 0; len > 0; pos += l, len -= l) { 269 /* empty buffer */ 270 if (!io->buf || io->bufpos == io->buflen) 271 if (http_fillbuf(io, len) < 1) 272 break; 273 l = io->buflen - io->bufpos; 274 if (len < l) 275 l = len; 276 memcpy(buf + pos, io->buf + io->bufpos, l); 277 io->bufpos += l; 278 } 279 280 if (!pos && io->error) { 281 if (io->error == EINTR) 282 io->error = 0; 283 return (-1); 284 } 285 return (pos); 286 } 287 288 /* 289 * Write function 290 */ 291 static int 292 http_writefn(void *v, const char *buf, int len) 293 { 294 struct httpio *io = (struct httpio *)v; 295 296 return (fetch_write(io->conn, buf, len)); 297 } 298 299 /* 300 * Close function 301 */ 302 static int 303 http_closefn(void *v) 304 { 305 struct httpio *io = (struct httpio *)v; 306 int r; 307 308 r = fetch_close(io->conn); 309 if (io->buf) 310 free(io->buf); 311 free(io); 312 return (r); 313 } 314 315 /* 316 * Wrap a file descriptor up 317 */ 318 static FILE * 319 http_funopen(conn_t *conn, int chunked) 320 { 321 struct httpio *io; 322 FILE *f; 323 324 if ((io = calloc(1, sizeof(*io))) == NULL) { 325 fetch_syserr(); 326 return (NULL); 327 } 328 io->conn = conn; 329 io->chunked = chunked; 330 f = funopen(io, http_readfn, http_writefn, NULL, http_closefn); 331 if (f == NULL) { 332 fetch_syserr(); 333 free(io); 334 return (NULL); 335 } 336 return (f); 337 } 338 339 340 /***************************************************************************** 341 * Helper functions for talking to the server and parsing its replies 342 */ 343 344 /* Header types */ 345 typedef enum { 346 hdr_syserror = -2, 347 hdr_error = -1, 348 hdr_end = 0, 349 hdr_unknown = 1, 350 hdr_content_length, 351 hdr_content_range, 352 hdr_last_modified, 353 hdr_location, 354 hdr_transfer_encoding, 355 hdr_www_authenticate 356 } hdr_t; 357 358 /* Names of interesting headers */ 359 static struct { 360 hdr_t num; 361 const char *name; 362 } hdr_names[] = { 363 { hdr_content_length, "Content-Length" }, 364 { hdr_content_range, "Content-Range" }, 365 { hdr_last_modified, "Last-Modified" }, 366 { hdr_location, "Location" }, 367 { hdr_transfer_encoding, "Transfer-Encoding" }, 368 { hdr_www_authenticate, "WWW-Authenticate" }, 369 { hdr_unknown, NULL }, 370 }; 371 372 /* 373 * Send a formatted line; optionally echo to terminal 374 */ 375 static int 376 http_cmd(conn_t *conn, const char *fmt, ...) 377 { 378 va_list ap; 379 size_t len; 380 char *msg; 381 int r; 382 383 va_start(ap, fmt); 384 len = vasprintf(&msg, fmt, ap); 385 va_end(ap); 386 387 if (msg == NULL) { 388 errno = ENOMEM; 389 fetch_syserr(); 390 return (-1); 391 } 392 393 r = fetch_putln(conn, msg, len); 394 free(msg); 395 396 if (r == -1) { 397 fetch_syserr(); 398 return (-1); 399 } 400 401 return (0); 402 } 403 404 /* 405 * Get and parse status line 406 */ 407 static int 408 http_get_reply(conn_t *conn) 409 { 410 char *p; 411 412 if (fetch_getln(conn) == -1) 413 return (-1); 414 /* 415 * A valid status line looks like "HTTP/m.n xyz reason" where m 416 * and n are the major and minor protocol version numbers and xyz 417 * is the reply code. 418 * Unfortunately, there are servers out there (NCSA 1.5.1, to name 419 * just one) that do not send a version number, so we can't rely 420 * on finding one, but if we do, insist on it being 1.0 or 1.1. 421 * We don't care about the reason phrase. 422 */ 423 if (strncmp(conn->buf, "HTTP", 4) != 0) 424 return (HTTP_PROTOCOL_ERROR); 425 p = conn->buf + 4; 426 if (*p == '/') { 427 if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1')) 428 return (HTTP_PROTOCOL_ERROR); 429 p += 4; 430 } 431 if (*p != ' ' || 432 !isdigit((unsigned char)p[1]) || 433 !isdigit((unsigned char)p[2]) || 434 !isdigit((unsigned char)p[3])) 435 return (HTTP_PROTOCOL_ERROR); 436 437 conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0'); 438 return (conn->err); 439 } 440 441 /* 442 * Check a header; if the type matches the given string, return a pointer 443 * to the beginning of the value. 444 */ 445 static const char * 446 http_match(const char *str, const char *hdr) 447 { 448 while (*str && *hdr && 449 tolower((unsigned char)*str++) == tolower((unsigned char)*hdr++)) 450 /* nothing */; 451 if (*str || *hdr != ':') 452 return (NULL); 453 while (*hdr && isspace((unsigned char)*++hdr)) 454 /* nothing */; 455 return (hdr); 456 } 457 458 /* 459 * Get the next header and return the appropriate symbolic code. 460 */ 461 static hdr_t 462 http_next_header(conn_t *conn, const char **p) 463 { 464 int i; 465 466 if (fetch_getln(conn) == -1) 467 return (hdr_syserror); 468 while (conn->buflen && isspace((unsigned char)conn->buf[conn->buflen - 1])) 469 conn->buflen--; 470 conn->buf[conn->buflen] = '\0'; 471 if (conn->buflen == 0) 472 return (hdr_end); 473 /* 474 * We could check for malformed headers but we don't really care. 475 * A valid header starts with a token immediately followed by a 476 * colon; a token is any sequence of non-control, non-whitespace 477 * characters except "()<>@,;:\\\"{}". 478 */ 479 for (i = 0; hdr_names[i].num != hdr_unknown; i++) 480 if ((*p = http_match(hdr_names[i].name, conn->buf)) != NULL) 481 return (hdr_names[i].num); 482 return (hdr_unknown); 483 } 484 485 /* 486 * Parse a last-modified header 487 */ 488 static int 489 http_parse_mtime(const char *p, time_t *mtime) 490 { 491 char locale[64], *r; 492 struct tm tm; 493 494 strncpy(locale, setlocale(LC_TIME, NULL), sizeof(locale)); 495 setlocale(LC_TIME, "C"); 496 r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm); 497 /* XXX should add support for date-2 and date-3 */ 498 setlocale(LC_TIME, locale); 499 if (r == NULL) 500 return (-1); 501 DEBUG(fprintf(stderr, "last modified: [%04d-%02d-%02d " 502 "%02d:%02d:%02d]\n", 503 tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, 504 tm.tm_hour, tm.tm_min, tm.tm_sec)); 505 *mtime = timegm(&tm); 506 return (0); 507 } 508 509 /* 510 * Parse a content-length header 511 */ 512 static int 513 http_parse_length(const char *p, off_t *length) 514 { 515 off_t len; 516 517 for (len = 0; *p && isdigit((unsigned char)*p); ++p) 518 len = len * 10 + (*p - '0'); 519 if (*p) 520 return (-1); 521 DEBUG(fprintf(stderr, "content length: [%lld]\n", 522 (long long)len)); 523 *length = len; 524 return (0); 525 } 526 527 /* 528 * Parse a content-range header 529 */ 530 static int 531 http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size) 532 { 533 off_t first, last, len; 534 535 if (strncasecmp(p, "bytes ", 6) != 0) 536 return (-1); 537 p += 6; 538 if (*p == '*') { 539 first = last = -1; 540 ++p; 541 } else { 542 for (first = 0; *p && isdigit((unsigned char)*p); ++p) 543 first = first * 10 + *p - '0'; 544 if (*p != '-') 545 return (-1); 546 for (last = 0, ++p; *p && isdigit((unsigned char)*p); ++p) 547 last = last * 10 + *p - '0'; 548 } 549 if (first > last || *p != '/') 550 return (-1); 551 for (len = 0, ++p; *p && isdigit((unsigned char)*p); ++p) 552 len = len * 10 + *p - '0'; 553 if (*p || len < last - first + 1) 554 return (-1); 555 if (first == -1) { 556 DEBUG(fprintf(stderr, "content range: [*/%lld]\n", 557 (long long)len)); 558 *length = 0; 559 } else { 560 DEBUG(fprintf(stderr, "content range: [%lld-%lld/%lld]\n", 561 (long long)first, (long long)last, (long long)len)); 562 *length = last - first + 1; 563 } 564 *offset = first; 565 *size = len; 566 return (0); 567 } 568 569 570 /***************************************************************************** 571 * Helper functions for authorization 572 */ 573 574 /* 575 * Base64 encoding 576 */ 577 static char * 578 http_base64(const char *src) 579 { 580 static const char base64[] = 581 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 582 "abcdefghijklmnopqrstuvwxyz" 583 "0123456789+/"; 584 char *str, *dst; 585 size_t l; 586 int t, r; 587 588 l = strlen(src); 589 if ((str = malloc(((l + 2) / 3) * 4 + 1)) == NULL) 590 return (NULL); 591 dst = str; 592 r = 0; 593 594 while (l >= 3) { 595 t = (src[0] << 16) | (src[1] << 8) | src[2]; 596 dst[0] = base64[(t >> 18) & 0x3f]; 597 dst[1] = base64[(t >> 12) & 0x3f]; 598 dst[2] = base64[(t >> 6) & 0x3f]; 599 dst[3] = base64[(t >> 0) & 0x3f]; 600 src += 3; l -= 3; 601 dst += 4; r += 4; 602 } 603 604 switch (l) { 605 case 2: 606 t = (src[0] << 16) | (src[1] << 8); 607 dst[0] = base64[(t >> 18) & 0x3f]; 608 dst[1] = base64[(t >> 12) & 0x3f]; 609 dst[2] = base64[(t >> 6) & 0x3f]; 610 dst[3] = '='; 611 dst += 4; 612 r += 4; 613 break; 614 case 1: 615 t = src[0] << 16; 616 dst[0] = base64[(t >> 18) & 0x3f]; 617 dst[1] = base64[(t >> 12) & 0x3f]; 618 dst[2] = dst[3] = '='; 619 dst += 4; 620 r += 4; 621 break; 622 case 0: 623 break; 624 } 625 626 *dst = 0; 627 return (str); 628 } 629 630 /* 631 * Encode username and password 632 */ 633 static int 634 http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd) 635 { 636 char *upw, *auth; 637 int r; 638 639 DEBUG(fprintf(stderr, "usr: [%s]\n", usr)); 640 DEBUG(fprintf(stderr, "pwd: [%s]\n", pwd)); 641 if (asprintf(&upw, "%s:%s", usr, pwd) == -1) 642 return (-1); 643 auth = http_base64(upw); 644 free(upw); 645 if (auth == NULL) 646 return (-1); 647 r = http_cmd(conn, "%s: Basic %s", hdr, auth); 648 free(auth); 649 return (r); 650 } 651 652 /* 653 * Send an authorization header 654 */ 655 static int 656 http_authorize(conn_t *conn, const char *hdr, const char *p) 657 { 658 /* basic authorization */ 659 if (strncasecmp(p, "basic:", 6) == 0) { 660 char *user, *pwd, *str; 661 int r; 662 663 /* skip realm */ 664 for (p += 6; *p && *p != ':'; ++p) 665 /* nothing */ ; 666 if (!*p || strchr(++p, ':') == NULL) 667 return (-1); 668 if ((str = strdup(p)) == NULL) 669 return (-1); /* XXX */ 670 user = str; 671 pwd = strchr(str, ':'); 672 *pwd++ = '\0'; 673 r = http_basic_auth(conn, hdr, user, pwd); 674 free(str); 675 return (r); 676 } 677 return (-1); 678 } 679 680 681 /***************************************************************************** 682 * Helper functions for connecting to a server or proxy 683 */ 684 685 /* 686 * Connect to the correct HTTP server or proxy. 687 */ 688 static conn_t * 689 http_connect(struct url *URL, struct url *purl, const char *flags) 690 { 691 conn_t *conn; 692 int verbose; 693 int af, val; 694 695 #ifdef INET6 696 af = AF_UNSPEC; 697 #else 698 af = AF_INET; 699 #endif 700 701 verbose = CHECK_FLAG('v'); 702 if (CHECK_FLAG('4')) 703 af = AF_INET; 704 #ifdef INET6 705 else if (CHECK_FLAG('6')) 706 af = AF_INET6; 707 #endif 708 709 if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) { 710 URL = purl; 711 } else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) { 712 /* can't talk http to an ftp server */ 713 /* XXX should set an error code */ 714 return (NULL); 715 } 716 717 if ((conn = fetch_connect(URL->host, URL->port, af, verbose)) == NULL) 718 /* fetch_connect() has already set an error code */ 719 return (NULL); 720 if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 && 721 fetch_ssl(conn, verbose) == -1) { 722 fetch_close(conn); 723 /* grrr */ 724 errno = EAUTH; 725 fetch_syserr(); 726 return (NULL); 727 } 728 729 val = 1; 730 setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val)); 731 732 return (conn); 733 } 734 735 static struct url * 736 http_get_proxy(struct url * url, const char *flags) 737 { 738 struct url *purl; 739 char *p; 740 741 if (flags != NULL && strchr(flags, 'd') != NULL) 742 return (NULL); 743 if (fetch_no_proxy_match(url->host)) 744 return (NULL); 745 if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) && 746 *p && (purl = fetchParseURL(p))) { 747 if (!*purl->scheme) 748 strcpy(purl->scheme, SCHEME_HTTP); 749 if (!purl->port) 750 purl->port = fetch_default_proxy_port(purl->scheme); 751 if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0) 752 return (purl); 753 fetchFreeURL(purl); 754 } 755 return (NULL); 756 } 757 758 static void 759 http_print_html(FILE *out, FILE *in) 760 { 761 size_t len; 762 char *line, *p, *q; 763 int comment, tag; 764 765 comment = tag = 0; 766 while ((line = fgetln(in, &len)) != NULL) { 767 while (len && isspace((unsigned char)line[len - 1])) 768 --len; 769 for (p = q = line; q < line + len; ++q) { 770 if (comment && *q == '-') { 771 if (q + 2 < line + len && 772 strcmp(q, "-->") == 0) { 773 tag = comment = 0; 774 q += 2; 775 } 776 } else if (tag && !comment && *q == '>') { 777 p = q + 1; 778 tag = 0; 779 } else if (!tag && *q == '<') { 780 if (q > p) 781 fwrite(p, q - p, 1, out); 782 tag = 1; 783 if (q + 3 < line + len && 784 strcmp(q, "<!--") == 0) { 785 comment = 1; 786 q += 3; 787 } 788 } 789 } 790 if (!tag && q > p) 791 fwrite(p, q - p, 1, out); 792 fputc('\n', out); 793 } 794 } 795 796 797 /***************************************************************************** 798 * Core 799 */ 800 801 /* 802 * Send a request and process the reply 803 * 804 * XXX This function is way too long, the do..while loop should be split 805 * XXX off into a separate function. 806 */ 807 FILE * 808 http_request(struct url *URL, const char *op, struct url_stat *us, 809 struct url *purl, const char *flags) 810 { 811 char timebuf[80]; 812 char hbuf[MAXHOSTNAMELEN + 7], *host; 813 conn_t *conn; 814 struct url *url, *new; 815 int chunked, direct, ims, need_auth, noredirect, verbose; 816 int e, i, n, val; 817 off_t offset, clength, length, size; 818 time_t mtime; 819 const char *p; 820 FILE *f; 821 hdr_t h; 822 struct tm *timestruct; 823 824 direct = CHECK_FLAG('d'); 825 noredirect = CHECK_FLAG('A'); 826 verbose = CHECK_FLAG('v'); 827 ims = CHECK_FLAG('i'); 828 829 if (direct && purl) { 830 fetchFreeURL(purl); 831 purl = NULL; 832 } 833 834 /* try the provided URL first */ 835 url = URL; 836 837 /* if the A flag is set, we only get one try */ 838 n = noredirect ? 1 : MAX_REDIRECT; 839 i = 0; 840 841 e = HTTP_PROTOCOL_ERROR; 842 need_auth = 0; 843 do { 844 new = NULL; 845 chunked = 0; 846 offset = 0; 847 clength = -1; 848 length = -1; 849 size = -1; 850 mtime = 0; 851 852 /* check port */ 853 if (!url->port) 854 url->port = fetch_default_port(url->scheme); 855 856 /* were we redirected to an FTP URL? */ 857 if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) { 858 if (strcmp(op, "GET") == 0) 859 return (ftp_request(url, "RETR", us, purl, flags)); 860 else if (strcmp(op, "HEAD") == 0) 861 return (ftp_request(url, "STAT", us, purl, flags)); 862 } 863 864 /* connect to server or proxy */ 865 if ((conn = http_connect(url, purl, flags)) == NULL) 866 goto ouch; 867 868 host = url->host; 869 #ifdef INET6 870 if (strchr(url->host, ':')) { 871 snprintf(hbuf, sizeof(hbuf), "[%s]", url->host); 872 host = hbuf; 873 } 874 #endif 875 if (url->port != fetch_default_port(url->scheme)) { 876 if (host != hbuf) { 877 strcpy(hbuf, host); 878 host = hbuf; 879 } 880 snprintf(hbuf + strlen(hbuf), 881 sizeof(hbuf) - strlen(hbuf), ":%d", url->port); 882 } 883 884 /* send request */ 885 if (verbose) 886 fetch_info("requesting %s://%s%s", 887 url->scheme, host, url->doc); 888 if (purl) { 889 http_cmd(conn, "%s %s://%s%s HTTP/1.1", 890 op, url->scheme, host, url->doc); 891 } else { 892 http_cmd(conn, "%s %s HTTP/1.1", 893 op, url->doc); 894 } 895 896 if (ims && url->ims_time) { 897 timestruct = gmtime((time_t *)&url->ims_time); 898 (void)strftime(timebuf, 80, "%a, %d %b %Y %T GMT", 899 timestruct); 900 if (verbose) 901 fetch_info("If-Modified-Since: %s", timebuf); 902 http_cmd(conn, "If-Modified-Since: %s", timebuf); 903 } 904 /* virtual host */ 905 http_cmd(conn, "Host: %s", host); 906 907 /* proxy authorization */ 908 if (purl) { 909 if (*purl->user || *purl->pwd) 910 http_basic_auth(conn, "Proxy-Authorization", 911 purl->user, purl->pwd); 912 else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0') 913 http_authorize(conn, "Proxy-Authorization", p); 914 } 915 916 /* server authorization */ 917 if (need_auth || *url->user || *url->pwd) { 918 if (*url->user || *url->pwd) 919 http_basic_auth(conn, "Authorization", url->user, url->pwd); 920 else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0') 921 http_authorize(conn, "Authorization", p); 922 else if (fetchAuthMethod && fetchAuthMethod(url) == 0) { 923 http_basic_auth(conn, "Authorization", url->user, url->pwd); 924 } else { 925 http_seterr(HTTP_NEED_AUTH); 926 goto ouch; 927 } 928 } 929 930 /* other headers */ 931 if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') { 932 if (strcasecmp(p, "auto") == 0) 933 http_cmd(conn, "Referer: %s://%s%s", 934 url->scheme, host, url->doc); 935 else 936 http_cmd(conn, "Referer: %s", p); 937 } 938 if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0') 939 http_cmd(conn, "User-Agent: %s", p); 940 else 941 http_cmd(conn, "User-Agent: %s " _LIBFETCH_VER, getprogname()); 942 if (url->offset > 0) 943 http_cmd(conn, "Range: bytes=%lld-", (long long)url->offset); 944 http_cmd(conn, "Connection: close"); 945 http_cmd(conn, "%s", ""); 946 947 /* 948 * Force the queued request to be dispatched. Normally, one 949 * would do this with shutdown(2) but squid proxies can be 950 * configured to disallow such half-closed connections. To 951 * be compatible with such configurations, fiddle with socket 952 * options to force the pending data to be written. 953 */ 954 val = 0; 955 setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, 956 sizeof(val)); 957 val = 1; 958 setsockopt(conn->sd, IPPROTO_TCP, TCP_NODELAY, &val, 959 sizeof(val)); 960 961 /* get reply */ 962 switch (http_get_reply(conn)) { 963 case HTTP_OK: 964 case HTTP_PARTIAL: 965 case HTTP_NOT_MODIFIED: 966 /* fine */ 967 break; 968 case HTTP_MOVED_PERM: 969 case HTTP_MOVED_TEMP: 970 case HTTP_SEE_OTHER: 971 /* 972 * Not so fine, but we still have to read the 973 * headers to get the new location. 974 */ 975 break; 976 case HTTP_NEED_AUTH: 977 if (need_auth) { 978 /* 979 * We already sent out authorization code, 980 * so there's nothing more we can do. 981 */ 982 http_seterr(conn->err); 983 goto ouch; 984 } 985 /* try again, but send the password this time */ 986 if (verbose) 987 fetch_info("server requires authorization"); 988 break; 989 case HTTP_NEED_PROXY_AUTH: 990 /* 991 * If we're talking to a proxy, we already sent 992 * our proxy authorization code, so there's 993 * nothing more we can do. 994 */ 995 http_seterr(conn->err); 996 goto ouch; 997 case HTTP_BAD_RANGE: 998 /* 999 * This can happen if we ask for 0 bytes because 1000 * we already have the whole file. Consider this 1001 * a success for now, and check sizes later. 1002 */ 1003 break; 1004 case HTTP_PROTOCOL_ERROR: 1005 /* fall through */ 1006 case -1: 1007 fetch_syserr(); 1008 goto ouch; 1009 default: 1010 http_seterr(conn->err); 1011 if (!verbose) 1012 goto ouch; 1013 /* fall through so we can get the full error message */ 1014 } 1015 1016 /* get headers */ 1017 do { 1018 switch ((h = http_next_header(conn, &p))) { 1019 case hdr_syserror: 1020 fetch_syserr(); 1021 goto ouch; 1022 case hdr_error: 1023 http_seterr(HTTP_PROTOCOL_ERROR); 1024 goto ouch; 1025 case hdr_content_length: 1026 http_parse_length(p, &clength); 1027 break; 1028 case hdr_content_range: 1029 http_parse_range(p, &offset, &length, &size); 1030 break; 1031 case hdr_last_modified: 1032 http_parse_mtime(p, &mtime); 1033 break; 1034 case hdr_location: 1035 if (!HTTP_REDIRECT(conn->err)) 1036 break; 1037 if (new) 1038 free(new); 1039 if (verbose) 1040 fetch_info("%d redirect to %s", conn->err, p); 1041 if (*p == '/') 1042 /* absolute path */ 1043 new = fetchMakeURL(url->scheme, url->host, url->port, p, 1044 url->user, url->pwd); 1045 else 1046 new = fetchParseURL(p); 1047 if (new == NULL) { 1048 /* XXX should set an error code */ 1049 DEBUG(fprintf(stderr, "failed to parse new URL\n")); 1050 goto ouch; 1051 } 1052 if (!*new->user && !*new->pwd) { 1053 strcpy(new->user, url->user); 1054 strcpy(new->pwd, url->pwd); 1055 } 1056 new->offset = url->offset; 1057 new->length = url->length; 1058 break; 1059 case hdr_transfer_encoding: 1060 /* XXX weak test*/ 1061 chunked = (strcasecmp(p, "chunked") == 0); 1062 break; 1063 case hdr_www_authenticate: 1064 if (conn->err != HTTP_NEED_AUTH) 1065 break; 1066 /* if we were smarter, we'd check the method and realm */ 1067 break; 1068 case hdr_end: 1069 /* fall through */ 1070 case hdr_unknown: 1071 /* ignore */ 1072 break; 1073 } 1074 } while (h > hdr_end); 1075 1076 /* we need to provide authentication */ 1077 if (conn->err == HTTP_NEED_AUTH) { 1078 e = conn->err; 1079 need_auth = 1; 1080 fetch_close(conn); 1081 conn = NULL; 1082 continue; 1083 } 1084 1085 /* requested range not satisfiable */ 1086 if (conn->err == HTTP_BAD_RANGE) { 1087 if (url->offset == size && url->length == 0) { 1088 /* asked for 0 bytes; fake it */ 1089 offset = url->offset; 1090 clength = -1; 1091 conn->err = HTTP_OK; 1092 break; 1093 } else { 1094 http_seterr(conn->err); 1095 goto ouch; 1096 } 1097 } 1098 1099 /* we have a hit or an error */ 1100 if (conn->err == HTTP_OK 1101 || conn->err == HTTP_NOT_MODIFIED 1102 || conn->err == HTTP_PARTIAL 1103 || HTTP_ERROR(conn->err)) 1104 break; 1105 1106 /* all other cases: we got a redirect */ 1107 e = conn->err; 1108 need_auth = 0; 1109 fetch_close(conn); 1110 conn = NULL; 1111 if (!new) { 1112 DEBUG(fprintf(stderr, "redirect with no new location\n")); 1113 break; 1114 } 1115 if (url != URL) 1116 fetchFreeURL(url); 1117 url = new; 1118 } while (++i < n); 1119 1120 /* we failed, or ran out of retries */ 1121 if (conn == NULL) { 1122 http_seterr(e); 1123 goto ouch; 1124 } 1125 1126 DEBUG(fprintf(stderr, "offset %lld, length %lld," 1127 " size %lld, clength %lld\n", 1128 (long long)offset, (long long)length, 1129 (long long)size, (long long)clength)); 1130 1131 if (conn->err == HTTP_NOT_MODIFIED) { 1132 http_seterr(HTTP_NOT_MODIFIED); 1133 return (NULL); 1134 } 1135 1136 /* check for inconsistencies */ 1137 if (clength != -1 && length != -1 && clength != length) { 1138 http_seterr(HTTP_PROTOCOL_ERROR); 1139 goto ouch; 1140 } 1141 if (clength == -1) 1142 clength = length; 1143 if (clength != -1) 1144 length = offset + clength; 1145 if (length != -1 && size != -1 && length != size) { 1146 http_seterr(HTTP_PROTOCOL_ERROR); 1147 goto ouch; 1148 } 1149 if (size == -1) 1150 size = length; 1151 1152 /* fill in stats */ 1153 if (us) { 1154 us->size = size; 1155 us->atime = us->mtime = mtime; 1156 } 1157 1158 /* too far? */ 1159 if (URL->offset > 0 && offset > URL->offset) { 1160 http_seterr(HTTP_PROTOCOL_ERROR); 1161 goto ouch; 1162 } 1163 1164 /* report back real offset and size */ 1165 URL->offset = offset; 1166 URL->length = clength; 1167 1168 /* wrap it up in a FILE */ 1169 if ((f = http_funopen(conn, chunked)) == NULL) { 1170 fetch_syserr(); 1171 goto ouch; 1172 } 1173 1174 if (url != URL) 1175 fetchFreeURL(url); 1176 if (purl) 1177 fetchFreeURL(purl); 1178 1179 if (HTTP_ERROR(conn->err)) { 1180 http_print_html(stderr, f); 1181 fclose(f); 1182 f = NULL; 1183 } 1184 1185 return (f); 1186 1187 ouch: 1188 if (url != URL) 1189 fetchFreeURL(url); 1190 if (purl) 1191 fetchFreeURL(purl); 1192 if (conn != NULL) 1193 fetch_close(conn); 1194 return (NULL); 1195 } 1196 1197 1198 /***************************************************************************** 1199 * Entry points 1200 */ 1201 1202 /* 1203 * Retrieve and stat a file by HTTP 1204 */ 1205 FILE * 1206 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags) 1207 { 1208 return (http_request(URL, "GET", us, http_get_proxy(URL, flags), flags)); 1209 } 1210 1211 /* 1212 * Retrieve a file by HTTP 1213 */ 1214 FILE * 1215 fetchGetHTTP(struct url *URL, const char *flags) 1216 { 1217 return (fetchXGetHTTP(URL, NULL, flags)); 1218 } 1219 1220 /* 1221 * Store a file by HTTP 1222 */ 1223 FILE * 1224 fetchPutHTTP(struct url *URL __unused, const char *flags __unused) 1225 { 1226 warnx("fetchPutHTTP(): not implemented"); 1227 return (NULL); 1228 } 1229 1230 /* 1231 * Get an HTTP document's metadata 1232 */ 1233 int 1234 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags) 1235 { 1236 FILE *f; 1237 1238 f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags); 1239 if (f == NULL) 1240 return (-1); 1241 fclose(f); 1242 return (0); 1243 } 1244 1245 /* 1246 * List a directory 1247 */ 1248 struct url_ent * 1249 fetchListHTTP(struct url *url __unused, const char *flags __unused) 1250 { 1251 warnx("fetchListHTTP(): not implemented"); 1252 return (NULL); 1253 } 1254