1 /*- 2 * Copyright (c) 2000-2004 Dag-Erling Co�dan Sm�rgrav 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include "free2net.h" 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD: src/lib/libfetch/http.c,v 1.76.2.2 2007/05/29 12:35:26 des Exp $"); 33 34 /* 35 * The following copyright applies to the base64 code: 36 * 37 *- 38 * Copyright 1997 Massachusetts Institute of Technology 39 * 40 * Permission to use, copy, modify, and distribute this software and 41 * its documentation for any purpose and without fee is hereby 42 * granted, provided that both the above copyright notice and this 43 * permission notice appear in all copies, that both the above 44 * copyright notice and this permission notice appear in all 45 * supporting documentation, and that the name of M.I.T. not be used 46 * in advertising or publicity pertaining to distribution of the 47 * software without specific, written prior permission. M.I.T. makes 48 * no representations about the suitability of this software for any 49 * purpose. It is provided "as is" without express or implied 50 * warranty. 51 * 52 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 53 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 54 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 55 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 56 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 57 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 58 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 59 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 60 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 61 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 62 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 */ 65 66 #include <sys/param.h> 67 #include <sys/socket.h> 68 69 #include <ctype.h> 70 #include <err.h> 71 #include <errno.h> 72 #include <locale.h> 73 #include <netdb.h> 74 #include <stdarg.h> 75 #include <stdio.h> 76 #include <stdlib.h> 77 #include <string.h> 78 #include <time.h> 79 #include <unistd.h> 80 81 #include <netinet/in.h> 82 #include <netinet/tcp.h> 83 84 #include "fetch.h" 85 #include "common.h" 86 #include "httperr.h" 87 88 #include "free2net.h" 89 90 /* Maximum number of redirects to follow */ 91 #define MAX_REDIRECT 5 92 93 /* Symbolic names for reply codes we care about */ 94 #define HTTP_OK 200 95 #define HTTP_PARTIAL 206 96 #define HTTP_MOVED_PERM 301 97 #define HTTP_MOVED_TEMP 302 98 #define HTTP_SEE_OTHER 303 99 #define HTTP_TEMP_REDIRECT 307 100 #define HTTP_NEED_AUTH 401 101 #define HTTP_NEED_PROXY_AUTH 407 102 #define HTTP_BAD_RANGE 416 103 #define HTTP_PROTOCOL_ERROR 999 104 105 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \ 106 || (xyz) == HTTP_MOVED_TEMP \ 107 || (xyz) == HTTP_TEMP_REDIRECT \ 108 || (xyz) == HTTP_SEE_OTHER) 109 110 #define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599) 111 112 113 /***************************************************************************** 114 * I/O functions for decoding chunked streams 115 */ 116 117 struct httpio 118 { 119 conn_t *conn; /* connection */ 120 int chunked; /* chunked mode */ 121 char *buf; /* chunk buffer */ 122 size_t bufsize; /* size of chunk buffer */ 123 ssize_t buflen; /* amount of data currently in buffer */ 124 int bufpos; /* current read offset in buffer */ 125 int eof; /* end-of-file flag */ 126 int error; /* error flag */ 127 size_t chunksize; /* remaining size of current chunk */ 128 #ifndef NDEBUG 129 size_t total; 130 #endif 131 }; 132 133 134 /* 135 * Get next chunk header 136 */ 137 static int 138 _http_new_chunk(struct httpio *io) 139 { 140 char *p; 141 142 if (_fetch_getln(io->conn) == -1) 143 return (-1); 144 145 if (io->conn->buflen < 2 || !ishexnumber((unsigned)*io->conn->buf)) 146 return (-1); 147 148 for (p = io->conn->buf; *p && !isspace((unsigned)*p); ++p) { 149 if (*p == ';') 150 break; 151 if (!ishexnumber((unsigned)*p)) 152 return (-1); 153 if (isdigit((unsigned)*p)) { 154 io->chunksize = io->chunksize * 16 + 155 *p - '0'; 156 } else { 157 io->chunksize = io->chunksize * 16 + 158 10 + tolower((unsigned)*p) - 'a'; 159 } 160 } 161 162 #ifndef NDEBUG 163 if (fetchDebug) { 164 io->total += io->chunksize; 165 if (io->chunksize == 0) 166 fprintf(stderr, "%s(): end of last chunk\n", __func__); 167 else 168 fprintf(stderr, "%s(): new chunk: %lu (%lu)\n", 169 __func__, (unsigned long)io->chunksize, 170 (unsigned long)io->total); 171 } 172 #endif 173 174 return (io->chunksize); 175 } 176 177 /* 178 * Grow the input buffer to at least len bytes 179 */ 180 static inline int 181 _http_growbuf(struct httpio *io, size_t len) 182 { 183 char *tmp; 184 185 if (io->bufsize >= len) 186 return (0); 187 188 if ((tmp = realloc(io->buf, len)) == NULL) 189 return (-1); 190 io->buf = tmp; 191 io->bufsize = len; 192 return (0); 193 } 194 195 /* 196 * Fill the input buffer, do chunk decoding on the fly 197 */ 198 static int 199 _http_fillbuf(struct httpio *io, size_t len) 200 { 201 if (io->error) 202 return (-1); 203 if (io->eof) 204 return (0); 205 206 if (io->chunked == 0) { 207 if (_http_growbuf(io, len) == -1) 208 return (-1); 209 if ((io->buflen = _fetch_read(io->conn, io->buf, len)) == -1) { 210 io->error = 1; 211 return (-1); 212 } 213 io->bufpos = 0; 214 return (io->buflen); 215 } 216 217 if (io->chunksize == 0) { 218 switch (_http_new_chunk(io)) { 219 case -1: 220 io->error = 1; 221 return (-1); 222 case 0: 223 io->eof = 1; 224 return (0); 225 } 226 } 227 228 if (len > io->chunksize) 229 len = io->chunksize; 230 if (_http_growbuf(io, len) == -1) 231 return (-1); 232 if ((io->buflen = _fetch_read(io->conn, io->buf, len)) == -1) { 233 io->error = 1; 234 return (-1); 235 } 236 io->chunksize -= io->buflen; 237 238 if (io->chunksize == 0) { 239 char endl[2]; 240 241 if (_fetch_read(io->conn, endl, 2) != 2 || 242 endl[0] != '\r' || endl[1] != '\n') 243 return (-1); 244 } 245 246 io->bufpos = 0; 247 248 return (io->buflen); 249 } 250 251 /* 252 * Read function 253 */ 254 static int 255 _http_readfn(void *v, char *buf, int len) 256 { 257 struct httpio *io = (struct httpio *)v; 258 int l, pos; 259 260 if (io->error) 261 return (-1); 262 if (io->eof) 263 return (0); 264 265 for (pos = 0; len > 0; pos += l, len -= l) { 266 /* empty buffer */ 267 if (!io->buf || io->bufpos == io->buflen) 268 if (_http_fillbuf(io, (unsigned) len) < 1) 269 break; 270 l = io->buflen - io->bufpos; 271 if (len < l) 272 l = len; 273 bcopy(io->buf + io->bufpos, buf + pos, (unsigned) l); 274 io->bufpos += l; 275 } 276 277 if (!pos && io->error) 278 return (-1); 279 return (pos); 280 } 281 282 /* 283 * Write function 284 */ 285 static int 286 _http_writefn(void *v, const char *buf, int len) 287 { 288 struct httpio *io = (struct httpio *)v; 289 290 return (_fetch_write(io->conn, buf, (unsigned) len)); 291 } 292 293 /* 294 * Close function 295 */ 296 static int 297 _http_closefn(void *v) 298 { 299 struct httpio *io = (struct httpio *)v; 300 int r; 301 302 r = _fetch_close(io->conn); 303 if (io->buf) 304 free(io->buf); 305 free(io); 306 return (r); 307 } 308 309 /* 310 * Wrap a file descriptor up 311 */ 312 static FILE * 313 _http_funopen(conn_t *conn, int chunked) 314 { 315 struct httpio *io; 316 FILE *f; 317 318 if ((io = calloc(1, sizeof(*io))) == NULL) { 319 _fetch_syserr(); 320 return (NULL); 321 } 322 io->conn = conn; 323 io->chunked = chunked; 324 f = funopen(io, _http_readfn, _http_writefn, NULL, _http_closefn); 325 if (f == NULL) { 326 _fetch_syserr(); 327 free(io); 328 return (NULL); 329 } 330 return (f); 331 } 332 333 334 /***************************************************************************** 335 * Helper functions for talking to the server and parsing its replies 336 */ 337 338 /* Header types */ 339 typedef enum { 340 hdr_syserror = -2, 341 hdr_error = -1, 342 hdr_end = 0, 343 hdr_unknown = 1, 344 hdr_content_length, 345 hdr_content_range, 346 hdr_last_modified, 347 hdr_location, 348 hdr_transfer_encoding, 349 hdr_www_authenticate 350 } hdr_t; 351 352 /* Names of interesting headers */ 353 static struct { 354 hdr_t num; 355 const char *name; 356 } hdr_names[] = { 357 { hdr_content_length, "Content-Length" }, 358 { hdr_content_range, "Content-Range" }, 359 { hdr_last_modified, "Last-Modified" }, 360 { hdr_location, "Location" }, 361 { hdr_transfer_encoding, "Transfer-Encoding" }, 362 { hdr_www_authenticate, "WWW-Authenticate" }, 363 { hdr_unknown, NULL }, 364 }; 365 366 /* 367 * Send a formatted line; optionally echo to terminal 368 */ 369 static int 370 _http_cmd(conn_t *conn, const char *fmt, ...) 371 { 372 va_list ap; 373 size_t len; 374 char *msg; 375 int r; 376 377 va_start(ap, fmt); 378 len = vasprintf(&msg, fmt, ap); 379 va_end(ap); 380 381 if (msg == NULL) { 382 errno = ENOMEM; 383 _fetch_syserr(); 384 return (-1); 385 } 386 387 r = _fetch_putln(conn, msg, len); 388 free(msg); 389 390 if (r == -1) { 391 _fetch_syserr(); 392 return (-1); 393 } 394 395 return (0); 396 } 397 398 /* 399 * Get and parse status line 400 */ 401 static int 402 _http_get_reply(conn_t *conn) 403 { 404 char *p; 405 406 if (_fetch_getln(conn) == -1) 407 return (-1); 408 /* 409 * A valid status line looks like "HTTP/m.n xyz reason" where m 410 * and n are the major and minor protocol version numbers and xyz 411 * is the reply code. 412 * Unfortunately, there are servers out there (NCSA 1.5.1, to name 413 * just one) that do not send a version number, so we can't rely 414 * on finding one, but if we do, insist on it being 1.0 or 1.1. 415 * We don't care about the reason phrase. 416 */ 417 if (strncmp(conn->buf, "HTTP", 4) != 0) 418 return (HTTP_PROTOCOL_ERROR); 419 p = conn->buf + 4; 420 if (*p == '/') { 421 if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1')) 422 return (HTTP_PROTOCOL_ERROR); 423 p += 4; 424 } 425 if (*p != ' ' || !isdigit((unsigned)p[1]) || !isdigit((unsigned)p[2]) || !isdigit((unsigned)p[3])) 426 return (HTTP_PROTOCOL_ERROR); 427 428 conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0'); 429 return (conn->err); 430 } 431 432 /* 433 * Check a header; if the type matches the given string, return a pointer 434 * to the beginning of the value. 435 */ 436 static const char * 437 _http_match(const char *str, const char *hdr) 438 { 439 while (*str && *hdr && tolower((unsigned)*str++) == tolower((unsigned)*hdr++)) 440 /* nothing */; 441 if (*str || *hdr != ':') 442 return (NULL); 443 while (*hdr && isspace((unsigned)*++hdr)) 444 /* nothing */; 445 return (hdr); 446 } 447 448 /* 449 * Get the next header and return the appropriate symbolic code. 450 */ 451 static hdr_t 452 _http_next_header(conn_t *conn, const char **p) 453 { 454 int i; 455 456 if (_fetch_getln(conn) == -1) 457 return (hdr_syserror); 458 while (conn->buflen && isspace((unsigned)conn->buf[conn->buflen - 1])) 459 conn->buflen--; 460 conn->buf[conn->buflen] = '\0'; 461 if (conn->buflen == 0) 462 return (hdr_end); 463 /* 464 * We could check for malformed headers but we don't really care. 465 * A valid header starts with a token immediately followed by a 466 * colon; a token is any sequence of non-control, non-whitespace 467 * characters except "()<>@,;:\\\"{}". 468 */ 469 for (i = 0; hdr_names[i].num != hdr_unknown; i++) 470 if ((*p = _http_match(hdr_names[i].name, conn->buf)) != NULL) 471 return (hdr_names[i].num); 472 return (hdr_unknown); 473 } 474 475 /* 476 * Parse a last-modified header 477 */ 478 static int 479 _http_parse_mtime(const char *p, time_t *mtime) 480 { 481 char locale[64], *r; 482 struct tm tm; 483 484 strncpy(locale, setlocale(LC_TIME, NULL), sizeof(locale)); 485 setlocale(LC_TIME, "C"); 486 r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm); 487 /* XXX should add support for date-2 and date-3 */ 488 setlocale(LC_TIME, locale); 489 if (r == NULL) 490 return (-1); 491 DEBUG(fprintf(stderr, "last modified: [%04d-%02d-%02d " 492 "%02d:%02d:%02d]\n", 493 tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, 494 tm.tm_hour, tm.tm_min, tm.tm_sec)); 495 *mtime = timegm(&tm); 496 return (0); 497 } 498 499 /* 500 * Parse a content-length header 501 */ 502 static int 503 _http_parse_length(const char *p, off_t *length) 504 { 505 off_t len; 506 507 for (len = 0; *p && isdigit((unsigned)*p); ++p) 508 len = len * 10 + (*p - '0'); 509 if (*p) 510 return (-1); 511 DEBUG(fprintf(stderr, "content length: [%lld]\n", 512 (long long)len)); 513 *length = len; 514 return (0); 515 } 516 517 /* 518 * Parse a content-range header 519 */ 520 static int 521 _http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size) 522 { 523 off_t first, last, len; 524 525 if (strncasecmp(p, "bytes ", 6) != 0) 526 return (-1); 527 p += 6; 528 if (*p == '*') { 529 first = last = -1; 530 ++p; 531 } else { 532 for (first = 0; *p && isdigit((unsigned)*p); ++p) 533 first = first * 10 + *p - '0'; 534 if (*p != '-') 535 return (-1); 536 for (last = 0, ++p; *p && isdigit((unsigned)*p); ++p) 537 last = last * 10 + *p - '0'; 538 } 539 if (first > last || *p != '/') 540 return (-1); 541 for (len = 0, ++p; *p && isdigit((unsigned)*p); ++p) 542 len = len * 10 + *p - '0'; 543 if (*p || len < last - first + 1) 544 return (-1); 545 if (first == -1) { 546 DEBUG(fprintf(stderr, "content range: [*/%lld]\n", 547 (long long)len)); 548 *length = 0; 549 } else { 550 DEBUG(fprintf(stderr, "content range: [%lld-%lld/%lld]\n", 551 (long long)first, (long long)last, (long long)len)); 552 *length = last - first + 1; 553 } 554 *offset = first; 555 *size = len; 556 return (0); 557 } 558 559 560 /***************************************************************************** 561 * Helper functions for authorization 562 */ 563 564 /* 565 * Base64 encoding 566 */ 567 static char * 568 _http_base64(const char *src) 569 { 570 static const char base64[] = 571 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 572 "abcdefghijklmnopqrstuvwxyz" 573 "0123456789+/"; 574 char *str, *dst; 575 size_t l; 576 int r; 577 unsigned t; 578 579 l = strlen(src); 580 if ((str = malloc(((l + 2) / 3) * 4 + 1)) == NULL) 581 return (NULL); 582 dst = str; 583 r = 0; 584 585 while (l >= 3) { 586 t = (src[0] << 16) | (src[1] << 8) | src[2]; 587 dst[0] = base64[(t >> 18) & 0x3f]; 588 dst[1] = base64[(t >> 12) & 0x3f]; 589 dst[2] = base64[(t >> 6) & 0x3f]; 590 dst[3] = base64[(t >> 0) & 0x3f]; 591 src += 3; l -= 3; 592 dst += 4; r += 4; 593 } 594 595 switch (l) { 596 case 2: 597 t = (src[0] << 16) | (src[1] << 8); 598 dst[0] = base64[(t >> 18) & 0x3f]; 599 dst[1] = base64[(t >> 12) & 0x3f]; 600 dst[2] = base64[(t >> 6) & 0x3f]; 601 dst[3] = '='; 602 dst += 4; 603 r += 4; 604 break; 605 case 1: 606 t = src[0] << 16; 607 dst[0] = base64[(t >> 18) & 0x3f]; 608 dst[1] = base64[(t >> 12) & 0x3f]; 609 dst[2] = dst[3] = '='; 610 dst += 4; 611 r += 4; 612 break; 613 case 0: 614 break; 615 } 616 617 *dst = 0; 618 return (str); 619 } 620 621 /* 622 * Encode username and password 623 */ 624 static int 625 _http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd) 626 { 627 char *upw, *auth; 628 int r; 629 630 DEBUG(fprintf(stderr, "usr: [%s]\n", usr)); 631 DEBUG(fprintf(stderr, "pwd: [%s]\n", pwd)); 632 if (asprintf(&upw, "%s:%s", usr, pwd) == -1) 633 return (-1); 634 auth = _http_base64(upw); 635 free(upw); 636 if (auth == NULL) 637 return (-1); 638 r = _http_cmd(conn, "%s: Basic %s", hdr, auth); 639 free(auth); 640 return (r); 641 } 642 643 /* 644 * Send an authorization header 645 */ 646 static int 647 _http_authorize(conn_t *conn, const char *hdr, const char *p) 648 { 649 /* basic authorization */ 650 if (strncasecmp(p, "basic:", 6) == 0) { 651 char *user, *pwd, *str; 652 int r; 653 654 /* skip realm */ 655 for (p += 6; *p && *p != ':'; ++p) 656 /* nothing */ ; 657 if (!*p || strchr(++p, ':') == NULL) 658 return (-1); 659 if ((str = strdup(p)) == NULL) 660 return (-1); /* XXX */ 661 user = str; 662 pwd = strchr(str, ':'); 663 *pwd++ = '\0'; 664 r = _http_basic_auth(conn, hdr, user, pwd); 665 free(str); 666 return (r); 667 } 668 return (-1); 669 } 670 671 672 /***************************************************************************** 673 * Helper functions for connecting to a server or proxy 674 */ 675 676 /* 677 * Connect to the correct HTTP server or proxy. 678 */ 679 static conn_t * 680 _http_connect(struct url *URL, struct url *purl, const char *flags) 681 { 682 conn_t *conn; 683 int verbose; 684 int af; 685 686 #ifdef INET6 687 af = AF_UNSPEC; 688 #else 689 af = AF_INET; 690 #endif 691 692 verbose = CHECK_FLAG('v'); 693 if (CHECK_FLAG('4')) 694 af = AF_INET; 695 #ifdef INET6 696 else if (CHECK_FLAG('6')) 697 af = AF_INET6; 698 #endif 699 700 if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) { 701 URL = purl; 702 } else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) { 703 /* can't talk http to an ftp server */ 704 /* XXX should set an error code */ 705 return (NULL); 706 } 707 708 if ((conn = _fetch_connect(URL->host, URL->port, af, verbose)) == NULL) 709 /* _fetch_connect() has already set an error code */ 710 return (NULL); 711 if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 && 712 _fetch_ssl(conn, verbose) == -1) { 713 _fetch_close(conn); 714 /* grrr */ 715 errno = EAUTH; 716 _fetch_syserr(); 717 return (NULL); 718 } 719 720 #ifdef TCP_NOPUSH 721 { 722 int val; 723 724 val = 1; 725 setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val)); 726 } 727 #endif 728 729 return (conn); 730 } 731 732 static struct url * 733 _http_get_proxy(const char *flags) 734 { 735 struct url *purl; 736 char *p; 737 738 if (flags != NULL && strchr(flags, 'd') != NULL) 739 return (NULL); 740 if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) && 741 *p && (purl = fetchParseURL(p))) { 742 if (!*purl->scheme) 743 strcpy(purl->scheme, SCHEME_HTTP); 744 if (!purl->port) 745 purl->port = _fetch_default_proxy_port(purl->scheme); 746 if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0) 747 return (purl); 748 fetchFreeURL(purl); 749 } 750 return (NULL); 751 } 752 753 static void 754 _http_print_html(FILE *out, FILE *in) 755 { 756 size_t len; 757 char *line, *p, *q; 758 int comment, tag; 759 760 comment = tag = 0; 761 while ((line = fgetln(in, &len)) != NULL) { 762 while (len && isspace((unsigned)line[len - 1])) 763 --len; 764 for (p = q = line; q < line + len; ++q) { 765 if (comment && *q == '-') { 766 if (q + 2 < line + len && 767 strcmp(q, "-->") == 0) { 768 tag = comment = 0; 769 q += 2; 770 } 771 } else if (tag && !comment && *q == '>') { 772 p = q + 1; 773 tag = 0; 774 } else if (!tag && *q == '<') { 775 if (q > p) 776 fwrite(p, (unsigned)(q - p), 1, out); 777 tag = 1; 778 if (q + 3 < line + len && 779 strcmp(q, "<!--") == 0) { 780 comment = 1; 781 q += 3; 782 } 783 } 784 } 785 if (!tag && q > p) 786 fwrite(p, (unsigned)(q - p), 1, out); 787 fputc('\n', out); 788 } 789 } 790 791 792 /***************************************************************************** 793 * Core 794 */ 795 796 /* 797 * Send a request and process the reply 798 * 799 * XXX This function is way too long, the do..while loop should be split 800 * XXX off into a separate function. 801 */ 802 FILE * 803 _http_request(struct url *URL, const char *op, struct url_stat *us, 804 struct url *purl, const char *flags) 805 { 806 conn_t *conn; 807 struct url *url, *new; 808 int chunked, direct, need_auth, noredirect, verbose; 809 int e, i, n, val; 810 off_t offset, clength, length, size; 811 time_t mtime; 812 const char *p; 813 FILE *f; 814 hdr_t h; 815 char hbuf[MAXHOSTNAMELEN + 7], *host; 816 817 direct = CHECK_FLAG('d'); 818 noredirect = CHECK_FLAG('A'); 819 verbose = CHECK_FLAG('v'); 820 821 if (direct && purl) { 822 fetchFreeURL(purl); 823 purl = NULL; 824 } 825 826 /* try the provided URL first */ 827 url = URL; 828 829 /* if the A flag is set, we only get one try */ 830 n = noredirect ? 1 : MAX_REDIRECT; 831 i = 0; 832 833 e = HTTP_PROTOCOL_ERROR; 834 need_auth = 0; 835 do { 836 new = NULL; 837 chunked = 0; 838 offset = 0; 839 clength = -1; 840 length = -1; 841 size = -1; 842 mtime = 0; 843 844 /* check port */ 845 if (!url->port) 846 url->port = _fetch_default_port(url->scheme); 847 848 /* were we redirected to an FTP URL? */ 849 if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) { 850 if (strcmp(op, "GET") == 0) 851 return (_ftp_request(url, "RETR", us, purl, flags)); 852 else if (strcmp(op, "HEAD") == 0) 853 return (_ftp_request(url, "STAT", us, purl, flags)); 854 } 855 856 /* connect to server or proxy */ 857 if ((conn = _http_connect(url, purl, flags)) == NULL) 858 goto ouch; 859 860 host = url->host; 861 #ifdef INET6 862 if (strchr(url->host, ':')) { 863 snprintf(hbuf, sizeof(hbuf), "[%s]", url->host); 864 host = hbuf; 865 } 866 #endif 867 if (url->port != _fetch_default_port(url->scheme)) { 868 if (host != hbuf) { 869 strcpy(hbuf, host); 870 host = hbuf; 871 } 872 snprintf(hbuf + strlen(hbuf), 873 sizeof(hbuf) - strlen(hbuf), ":%d", url->port); 874 } 875 876 /* send request */ 877 if (verbose) 878 _fetch_info("requesting %s://%s%s", 879 url->scheme, host, url->doc); 880 if (purl) { 881 _http_cmd(conn, "%s %s://%s%s HTTP/1.1", 882 op, url->scheme, host, url->doc); 883 } else { 884 _http_cmd(conn, "%s %s HTTP/1.1", 885 op, url->doc); 886 } 887 888 /* virtual host */ 889 _http_cmd(conn, "Host: %s", host); 890 891 /* proxy authorization */ 892 if (purl) { 893 if (*purl->user || *purl->pwd) 894 _http_basic_auth(conn, "Proxy-Authorization", 895 purl->user, purl->pwd); 896 else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0') 897 _http_authorize(conn, "Proxy-Authorization", p); 898 } 899 900 /* server authorization */ 901 if (need_auth || *url->user || *url->pwd) { 902 if (*url->user || *url->pwd) 903 _http_basic_auth(conn, "Authorization", url->user, url->pwd); 904 else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0') 905 _http_authorize(conn, "Authorization", p); 906 else if (fetchAuthMethod && fetchAuthMethod(url) == 0) { 907 _http_basic_auth(conn, "Authorization", url->user, url->pwd); 908 } else { 909 _http_seterr(HTTP_NEED_AUTH); 910 goto ouch; 911 } 912 } 913 914 /* other headers */ 915 if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') { 916 if (strcasecmp(p, "auto") == 0) 917 _http_cmd(conn, "Referer: %s://%s%s", 918 url->scheme, host, url->doc); 919 else 920 _http_cmd(conn, "Referer: %s", p); 921 } 922 if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0') 923 _http_cmd(conn, "User-Agent: %s", p); 924 else 925 _http_cmd(conn, "User-Agent: %s " _LIBFETCH_VER, getprogname()); 926 if (url->offset > 0) 927 _http_cmd(conn, "Range: bytes=%lld-", (long long)url->offset); 928 _http_cmd(conn, "Connection: close"); 929 _http_cmd(conn, ""); 930 931 /* 932 * Force the queued request to be dispatched. Normally, one 933 * would do this with shutdown(2) but squid proxies can be 934 * configured to disallow such half-closed connections. To 935 * be compatible with such configurations, fiddle with socket 936 * options to force the pending data to be written. 937 */ 938 val = 0; 939 #ifdef TCP_NOPUSH 940 setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, 941 sizeof(val)); 942 #endif 943 val = 1; 944 setsockopt(conn->sd, IPPROTO_TCP, TCP_NODELAY, &val, 945 sizeof(val)); 946 947 /* get reply */ 948 switch (_http_get_reply(conn)) { 949 case HTTP_OK: 950 case HTTP_PARTIAL: 951 /* fine */ 952 break; 953 case HTTP_MOVED_PERM: 954 case HTTP_MOVED_TEMP: 955 case HTTP_SEE_OTHER: 956 /* 957 * Not so fine, but we still have to read the 958 * headers to get the new location. 959 */ 960 break; 961 case HTTP_NEED_AUTH: 962 if (need_auth) { 963 /* 964 * We already sent out authorization code, 965 * so there's nothing more we can do. 966 */ 967 _http_seterr(conn->err); 968 goto ouch; 969 } 970 /* try again, but send the password this time */ 971 if (verbose) 972 _fetch_info("server requires authorization"); 973 break; 974 case HTTP_NEED_PROXY_AUTH: 975 /* 976 * If we're talking to a proxy, we already sent 977 * our proxy authorization code, so there's 978 * nothing more we can do. 979 */ 980 _http_seterr(conn->err); 981 goto ouch; 982 case HTTP_BAD_RANGE: 983 /* 984 * This can happen if we ask for 0 bytes because 985 * we already have the whole file. Consider this 986 * a success for now, and check sizes later. 987 */ 988 break; 989 case HTTP_PROTOCOL_ERROR: 990 /* fall through */ 991 case -1: 992 _fetch_syserr(); 993 goto ouch; 994 default: 995 _http_seterr(conn->err); 996 if (!verbose) 997 goto ouch; 998 /* fall through so we can get the full error message */ 999 } 1000 1001 /* get headers */ 1002 do { 1003 switch ((h = _http_next_header(conn, &p))) { 1004 case hdr_syserror: 1005 _fetch_syserr(); 1006 goto ouch; 1007 case hdr_error: 1008 _http_seterr(HTTP_PROTOCOL_ERROR); 1009 goto ouch; 1010 case hdr_content_length: 1011 _http_parse_length(p, &clength); 1012 break; 1013 case hdr_content_range: 1014 _http_parse_range(p, &offset, &length, &size); 1015 break; 1016 case hdr_last_modified: 1017 _http_parse_mtime(p, &mtime); 1018 break; 1019 case hdr_location: 1020 if (!HTTP_REDIRECT(conn->err)) 1021 break; 1022 if (new) 1023 free(new); 1024 if (verbose) 1025 _fetch_info("%d redirect to %s", conn->err, p); 1026 if (*p == '/') 1027 /* absolute path */ 1028 new = fetchMakeURL(url->scheme, url->host, url->port, p, 1029 url->user, url->pwd); 1030 else 1031 new = fetchParseURL(p); 1032 if (new == NULL) { 1033 /* XXX should set an error code */ 1034 DEBUG(fprintf(stderr, "failed to parse new URL\n")); 1035 goto ouch; 1036 } 1037 if (!*new->user && !*new->pwd) { 1038 strcpy(new->user, url->user); 1039 strcpy(new->pwd, url->pwd); 1040 } 1041 new->offset = url->offset; 1042 new->length = url->length; 1043 break; 1044 case hdr_transfer_encoding: 1045 /* XXX weak test*/ 1046 chunked = (strcasecmp(p, "chunked") == 0); 1047 break; 1048 case hdr_www_authenticate: 1049 if (conn->err != HTTP_NEED_AUTH) 1050 break; 1051 /* if we were smarter, we'd check the method and realm */ 1052 break; 1053 case hdr_end: 1054 /* fall through */ 1055 case hdr_unknown: 1056 /* ignore */ 1057 break; 1058 } 1059 } while (h > hdr_end); 1060 1061 /* we need to provide authentication */ 1062 if (conn->err == HTTP_NEED_AUTH) { 1063 e = conn->err; 1064 need_auth = 1; 1065 _fetch_close(conn); 1066 conn = NULL; 1067 continue; 1068 } 1069 1070 /* requested range not satisfiable */ 1071 if (conn->err == HTTP_BAD_RANGE) { 1072 if (url->offset == size && url->length == 0) { 1073 /* asked for 0 bytes; fake it */ 1074 offset = url->offset; 1075 conn->err = HTTP_OK; 1076 break; 1077 } else { 1078 _http_seterr(conn->err); 1079 goto ouch; 1080 } 1081 } 1082 1083 /* we have a hit or an error */ 1084 if (conn->err == HTTP_OK || conn->err == HTTP_PARTIAL || HTTP_ERROR(conn->err)) 1085 break; 1086 1087 /* all other cases: we got a redirect */ 1088 e = conn->err; 1089 need_auth = 0; 1090 _fetch_close(conn); 1091 conn = NULL; 1092 if (!new) { 1093 DEBUG(fprintf(stderr, "redirect with no new location\n")); 1094 break; 1095 } 1096 if (url != URL) 1097 fetchFreeURL(url); 1098 url = new; 1099 } while (++i < n); 1100 1101 /* we failed, or ran out of retries */ 1102 if (conn == NULL) { 1103 _http_seterr(e); 1104 goto ouch; 1105 } 1106 1107 DEBUG(fprintf(stderr, "offset %lld, length %lld," 1108 " size %lld, clength %lld\n", 1109 (long long)offset, (long long)length, 1110 (long long)size, (long long)clength)); 1111 1112 /* check for inconsistencies */ 1113 if (clength != -1 && length != -1 && clength != length) { 1114 _http_seterr(HTTP_PROTOCOL_ERROR); 1115 goto ouch; 1116 } 1117 if (clength == -1) 1118 clength = length; 1119 if (clength != -1) 1120 length = offset + clength; 1121 if (length != -1 && size != -1 && length != size) { 1122 _http_seterr(HTTP_PROTOCOL_ERROR); 1123 goto ouch; 1124 } 1125 if (size == -1) 1126 size = length; 1127 1128 /* fill in stats */ 1129 if (us) { 1130 us->size = size; 1131 us->atime = us->mtime = mtime; 1132 } 1133 1134 /* too far? */ 1135 if (URL->offset > 0 && offset > URL->offset) { 1136 _http_seterr(HTTP_PROTOCOL_ERROR); 1137 goto ouch; 1138 } 1139 1140 /* report back real offset and size */ 1141 URL->offset = offset; 1142 URL->length = (unsigned) clength; 1143 1144 /* wrap it up in a FILE */ 1145 if ((f = _http_funopen(conn, chunked)) == NULL) { 1146 _fetch_syserr(); 1147 goto ouch; 1148 } 1149 1150 if (url != URL) 1151 fetchFreeURL(url); 1152 if (purl) 1153 fetchFreeURL(purl); 1154 1155 if (HTTP_ERROR(conn->err)) { 1156 _http_print_html(stderr, f); 1157 fclose(f); 1158 f = NULL; 1159 } 1160 1161 return (f); 1162 1163 ouch: 1164 if (url != URL) 1165 fetchFreeURL(url); 1166 if (purl) 1167 fetchFreeURL(purl); 1168 if (conn != NULL) 1169 _fetch_close(conn); 1170 return (NULL); 1171 } 1172 1173 1174 /***************************************************************************** 1175 * Entry points 1176 */ 1177 1178 /* 1179 * Retrieve and stat a file by HTTP 1180 */ 1181 FILE * 1182 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags) 1183 { 1184 return (_http_request(URL, "GET", us, _http_get_proxy(flags), flags)); 1185 } 1186 1187 /* 1188 * Retrieve a file by HTTP 1189 */ 1190 FILE * 1191 fetchGetHTTP(struct url *URL, const char *flags) 1192 { 1193 return (fetchXGetHTTP(URL, NULL, flags)); 1194 } 1195 1196 /* 1197 * Store a file by HTTP 1198 */ 1199 /* ARGSUSED0 */ 1200 FILE * 1201 fetchPutHTTP(struct url *URL __unused, const char *flags __unused) 1202 { 1203 warnx("fetchPutHTTP(): not implemented"); 1204 return (NULL); 1205 } 1206 1207 /* 1208 * Get an HTTP document's metadata 1209 */ 1210 int 1211 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags) 1212 { 1213 FILE *f; 1214 1215 f = _http_request(URL, "HEAD", us, _http_get_proxy(flags), flags); 1216 if (f == NULL) 1217 return (-1); 1218 fclose(f); 1219 return (0); 1220 } 1221 1222 /* 1223 * List a directory 1224 */ 1225 /* ARGSUSED0 */ 1226 struct url_ent * 1227 fetchListHTTP(struct url *url __unused, const char *flags __unused) 1228 { 1229 warnx("fetchListHTTP(): not implemented"); 1230 return (NULL); 1231 } 1232