xref: /netbsd-src/external/bsd/fetch/dist/libfetch/http.c (revision 466a16a118933bd295a8a104f095714fadf9cf68)
1 /*	$NetBSD: http.c,v 1.1.1.2 2008/10/07 15:55:20 joerg Exp $	*/
2 /*-
3  * Copyright (c) 2000-2004 Dag-Erling Co�dan Sm�rgrav
4  * Copyright (c) 2003 Thomas Klausner <wiz@NetBSD.org>
5  * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer
13  *    in this position and unchanged.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. The name of the author may not be used to endorse or promote products
18  *    derived from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * $FreeBSD: http.c,v 1.83 2008/02/06 11:39:55 des Exp $
32  */
33 
34 /*
35  * The following copyright applies to the base64 code:
36  *
37  *-
38  * Copyright 1997 Massachusetts Institute of Technology
39  *
40  * Permission to use, copy, modify, and distribute this software and
41  * its documentation for any purpose and without fee is hereby
42  * granted, provided that both the above copyright notice and this
43  * permission notice appear in all copies, that both the above
44  * copyright notice and this permission notice appear in all
45  * supporting documentation, and that the name of M.I.T. not be used
46  * in advertising or publicity pertaining to distribution of the
47  * software without specific, written prior permission.  M.I.T. makes
48  * no representations about the suitability of this software for any
49  * purpose.  It is provided "as is" without express or implied
50  * warranty.
51  *
52  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
53  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
54  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
55  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
56  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
57  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
58  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
59  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
60  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
61  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
62  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  */
65 
66 #ifdef __linux__
67 /* Keep this down to Linux, it can create surprises else where. */
68 #define _GNU_SOURCE
69 #endif
70 
71 #if HAVE_CONFIG_H
72 #include "config.h"
73 #endif
74 #ifndef NETBSD
75 #include <nbcompat.h>
76 #endif
77 
78 #include <sys/types.h>
79 #include <sys/socket.h>
80 
81 #include <ctype.h>
82 #include <errno.h>
83 #include <locale.h>
84 #include <stdarg.h>
85 #ifndef NETBSD
86 #include <nbcompat/netdb.h>
87 #include <nbcompat/stdio.h>
88 #else
89 #include <netdb.h>
90 #include <stdio.h>
91 #endif
92 #include <stdlib.h>
93 #include <string.h>
94 #include <time.h>
95 #include <unistd.h>
96 
97 #include <arpa/inet.h>
98 
99 #include <netinet/in.h>
100 #include <netinet/tcp.h>
101 
102 #include "fetch.h"
103 #include "common.h"
104 #include "httperr.h"
105 
106 /* Maximum number of redirects to follow */
107 #define MAX_REDIRECT 5
108 
109 /* Symbolic names for reply codes we care about */
110 #define HTTP_OK			200
111 #define HTTP_PARTIAL		206
112 #define HTTP_MOVED_PERM		301
113 #define HTTP_MOVED_TEMP		302
114 #define HTTP_SEE_OTHER		303
115 #define HTTP_TEMP_REDIRECT	307
116 #define HTTP_NEED_AUTH		401
117 #define HTTP_NEED_PROXY_AUTH	407
118 #define HTTP_BAD_RANGE		416
119 #define HTTP_PROTOCOL_ERROR	999
120 
121 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \
122 			    || (xyz) == HTTP_MOVED_TEMP \
123 			    || (xyz) == HTTP_TEMP_REDIRECT \
124 			    || (xyz) == HTTP_SEE_OTHER)
125 
126 #define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599)
127 
128 
129 /*****************************************************************************
130  * I/O functions for decoding chunked streams
131  */
132 
133 struct httpio
134 {
135 	conn_t		*conn;		/* connection */
136 	int		 chunked;	/* chunked mode */
137 	char		*buf;		/* chunk buffer */
138 	size_t		 bufsize;	/* size of chunk buffer */
139 	ssize_t		 buflen;	/* amount of data currently in buffer */
140 	int		 bufpos;	/* current read offset in buffer */
141 	int		 eof;		/* end-of-file flag */
142 	int		 error;		/* error flag */
143 	size_t		 chunksize;	/* remaining size of current chunk */
144 };
145 
146 /*
147  * Get next chunk header
148  */
149 static int
150 http_new_chunk(struct httpio *io)
151 {
152 	char *p;
153 
154 	if (fetch_getln(io->conn) == -1)
155 		return (-1);
156 
157 	if (io->conn->buflen < 2 || !isxdigit((unsigned char)*io->conn->buf))
158 		return (-1);
159 
160 	for (p = io->conn->buf; *p && !isspace((unsigned char)*p); ++p) {
161 		if (*p == ';')
162 			break;
163 		if (!isxdigit((unsigned char)*p))
164 			return (-1);
165 		if (isdigit((unsigned char)*p)) {
166 			io->chunksize = io->chunksize * 16 +
167 			    *p - '0';
168 		} else {
169 			io->chunksize = io->chunksize * 16 +
170 			    10 + tolower((unsigned char)*p) - 'a';
171 		}
172 	}
173 
174 	return (io->chunksize);
175 }
176 
177 /*
178  * Grow the input buffer to at least len bytes
179  */
180 static int
181 http_growbuf(struct httpio *io, size_t len)
182 {
183 	char *tmp;
184 
185 	if (io->bufsize >= len)
186 		return (0);
187 
188 	if ((tmp = realloc(io->buf, len)) == NULL)
189 		return (-1);
190 	io->buf = tmp;
191 	io->bufsize = len;
192 	return (0);
193 }
194 
195 /*
196  * Fill the input buffer, do chunk decoding on the fly
197  */
198 static int
199 http_fillbuf(struct httpio *io, size_t len)
200 {
201 	if (io->error)
202 		return (-1);
203 	if (io->eof)
204 		return (0);
205 
206 	if (io->chunked == 0) {
207 		if (http_growbuf(io, len) == -1)
208 			return (-1);
209 		if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
210 			io->error = 1;
211 			return (-1);
212 		}
213 		io->bufpos = 0;
214 		return (io->buflen);
215 	}
216 
217 	if (io->chunksize == 0) {
218 		switch (http_new_chunk(io)) {
219 		case -1:
220 			io->error = 1;
221 			return (-1);
222 		case 0:
223 			io->eof = 1;
224 			return (0);
225 		}
226 	}
227 
228 	if (len > io->chunksize)
229 		len = io->chunksize;
230 	if (http_growbuf(io, len) == -1)
231 		return (-1);
232 	if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
233 		io->error = 1;
234 		return (-1);
235 	}
236 	io->chunksize -= io->buflen;
237 
238 	if (io->chunksize == 0) {
239 		char endl[2];
240 		ssize_t len2;
241 
242 		len2 = fetch_read(io->conn, endl, 2);
243 		if (len2 == 1 && fetch_read(io->conn, endl + 1, 1) != 1)
244 			return (-1);
245 		if (len2 == -1 || endl[0] != '\r' || endl[1] != '\n')
246 			return (-1);
247 	}
248 
249 	io->bufpos = 0;
250 
251 	return (io->buflen);
252 }
253 
254 /*
255  * Read function
256  */
257 static ssize_t
258 http_readfn(void *v, void *buf, size_t len)
259 {
260 	struct httpio *io = (struct httpio *)v;
261 	size_t l, pos;
262 
263 	if (io->error)
264 		return (-1);
265 	if (io->eof)
266 		return (0);
267 
268 	for (pos = 0; len > 0; pos += l, len -= l) {
269 		/* empty buffer */
270 		if (!io->buf || io->bufpos == io->buflen)
271 			if (http_fillbuf(io, len) < 1)
272 				break;
273 		l = io->buflen - io->bufpos;
274 		if (len < l)
275 			l = len;
276 		memcpy((char *)buf + pos, io->buf + io->bufpos, l);
277 		io->bufpos += l;
278 	}
279 
280 	if (!pos && io->error)
281 		return (-1);
282 	return (pos);
283 }
284 
285 /*
286  * Write function
287  */
288 static ssize_t
289 http_writefn(void *v, const void *buf, size_t len)
290 {
291 	struct httpio *io = (struct httpio *)v;
292 
293 	return (fetch_write(io->conn, buf, len));
294 }
295 
296 /*
297  * Close function
298  */
299 static void
300 http_closefn(void *v)
301 {
302 	struct httpio *io = (struct httpio *)v;
303 
304 	fetch_close(io->conn);
305 	if (io->buf)
306 		free(io->buf);
307 	free(io);
308 }
309 
310 /*
311  * Wrap a file descriptor up
312  */
313 static fetchIO *
314 http_funopen(conn_t *conn, int chunked)
315 {
316 	struct httpio *io;
317 	fetchIO *f;
318 
319 	if ((io = calloc(1, sizeof(*io))) == NULL) {
320 		fetch_syserr();
321 		return (NULL);
322 	}
323 	io->conn = conn;
324 	io->chunked = chunked;
325 	f = fetchIO_unopen(io, http_readfn, http_writefn, http_closefn);
326 	if (f == NULL) {
327 		fetch_syserr();
328 		free(io);
329 		return (NULL);
330 	}
331 	return (f);
332 }
333 
334 
335 /*****************************************************************************
336  * Helper functions for talking to the server and parsing its replies
337  */
338 
339 /* Header types */
340 typedef enum {
341 	hdr_syserror = -2,
342 	hdr_error = -1,
343 	hdr_end = 0,
344 	hdr_unknown = 1,
345 	hdr_content_length,
346 	hdr_content_range,
347 	hdr_last_modified,
348 	hdr_location,
349 	hdr_transfer_encoding,
350 	hdr_www_authenticate
351 } hdr_t;
352 
353 /* Names of interesting headers */
354 static struct {
355 	hdr_t		 num;
356 	const char	*name;
357 } hdr_names[] = {
358 	{ hdr_content_length,		"Content-Length" },
359 	{ hdr_content_range,		"Content-Range" },
360 	{ hdr_last_modified,		"Last-Modified" },
361 	{ hdr_location,			"Location" },
362 	{ hdr_transfer_encoding,	"Transfer-Encoding" },
363 	{ hdr_www_authenticate,		"WWW-Authenticate" },
364 	{ hdr_unknown,			NULL },
365 };
366 
367 /*
368  * Send a formatted line; optionally echo to terminal
369  */
370 static int
371 http_cmd(conn_t *conn, const char *fmt, ...)
372 {
373 	va_list ap;
374 	size_t len;
375 	char *msg;
376 	int r;
377 
378 	va_start(ap, fmt);
379 	len = vasprintf(&msg, fmt, ap);
380 	va_end(ap);
381 
382 	if (msg == NULL) {
383 		errno = ENOMEM;
384 		fetch_syserr();
385 		return (-1);
386 	}
387 
388 	r = fetch_putln(conn, msg, len);
389 	free(msg);
390 
391 	if (r == -1) {
392 		fetch_syserr();
393 		return (-1);
394 	}
395 
396 	return (0);
397 }
398 
399 /*
400  * Get and parse status line
401  */
402 static int
403 http_get_reply(conn_t *conn)
404 {
405 	char *p;
406 
407 	if (fetch_getln(conn) == -1)
408 		return (-1);
409 	/*
410 	 * A valid status line looks like "HTTP/m.n xyz reason" where m
411 	 * and n are the major and minor protocol version numbers and xyz
412 	 * is the reply code.
413 	 * Unfortunately, there are servers out there (NCSA 1.5.1, to name
414 	 * just one) that do not send a version number, so we can't rely
415 	 * on finding one, but if we do, insist on it being 1.0 or 1.1.
416 	 * We don't care about the reason phrase.
417 	 */
418 	if (strncmp(conn->buf, "HTTP", 4) != 0)
419 		return (HTTP_PROTOCOL_ERROR);
420 	p = conn->buf + 4;
421 	if (*p == '/') {
422 		if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1'))
423 			return (HTTP_PROTOCOL_ERROR);
424 		p += 4;
425 	}
426 	if (*p != ' ' ||
427 	    !isdigit((unsigned char)p[1]) ||
428 	    !isdigit((unsigned char)p[2]) ||
429 	    !isdigit((unsigned char)p[3]))
430 		return (HTTP_PROTOCOL_ERROR);
431 
432 	conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0');
433 	return (conn->err);
434 }
435 
436 /*
437  * Check a header; if the type matches the given string, return a pointer
438  * to the beginning of the value.
439  */
440 static const char *
441 http_match(const char *str, const char *hdr)
442 {
443 	while (*str && *hdr &&
444 	    tolower((unsigned char)*str++) == tolower((unsigned char)*hdr++))
445 		/* nothing */;
446 	if (*str || *hdr != ':')
447 		return (NULL);
448 	while (*hdr && isspace((unsigned char)*++hdr))
449 		/* nothing */;
450 	return (hdr);
451 }
452 
453 /*
454  * Get the next header and return the appropriate symbolic code.
455  */
456 static hdr_t
457 http_next_header(conn_t *conn, const char **p)
458 {
459 	int i;
460 
461 	if (fetch_getln(conn) == -1)
462 		return (hdr_syserror);
463 	while (conn->buflen && isspace((unsigned char)conn->buf[conn->buflen - 1]))
464 		conn->buflen--;
465 	conn->buf[conn->buflen] = '\0';
466 	if (conn->buflen == 0)
467 		return (hdr_end);
468 	/*
469 	 * We could check for malformed headers but we don't really care.
470 	 * A valid header starts with a token immediately followed by a
471 	 * colon; a token is any sequence of non-control, non-whitespace
472 	 * characters except "()<>@,;:\\\"{}".
473 	 */
474 	for (i = 0; hdr_names[i].num != hdr_unknown; i++)
475 		if ((*p = http_match(hdr_names[i].name, conn->buf)) != NULL)
476 			return (hdr_names[i].num);
477 	return (hdr_unknown);
478 }
479 
480 /*
481  * Parse a last-modified header
482  */
483 static int
484 http_parse_mtime(const char *p, time_t *mtime)
485 {
486 	char locale[64], *r;
487 	struct tm tm;
488 
489 	strncpy(locale, setlocale(LC_TIME, NULL), sizeof(locale));
490 	setlocale(LC_TIME, "C");
491 	r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
492 	/* XXX should add support for date-2 and date-3 */
493 	setlocale(LC_TIME, locale);
494 	if (r == NULL)
495 		return (-1);
496 	*mtime = timegm(&tm);
497 	return (0);
498 }
499 
500 /*
501  * Parse a content-length header
502  */
503 static int
504 http_parse_length(const char *p, off_t *length)
505 {
506 	off_t len;
507 
508 	for (len = 0; *p && isdigit((unsigned char)*p); ++p)
509 		len = len * 10 + (*p - '0');
510 	if (*p)
511 		return (-1);
512 	*length = len;
513 	return (0);
514 }
515 
516 /*
517  * Parse a content-range header
518  */
519 static int
520 http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size)
521 {
522 	off_t first, last, len;
523 
524 	if (strncasecmp(p, "bytes ", 6) != 0)
525 		return (-1);
526 	p += 6;
527 	if (*p == '*') {
528 		first = last = -1;
529 		++p;
530 	} else {
531 		for (first = 0; *p && isdigit((unsigned char)*p); ++p)
532 			first = first * 10 + *p - '0';
533 		if (*p != '-')
534 			return (-1);
535 		for (last = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
536 			last = last * 10 + *p - '0';
537 	}
538 	if (first > last || *p != '/')
539 		return (-1);
540 	for (len = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
541 		len = len * 10 + *p - '0';
542 	if (*p || len < last - first + 1)
543 		return (-1);
544 	if (first == -1)
545 		*length = 0;
546 	else
547 		*length = last - first + 1;
548 	*offset = first;
549 	*size = len;
550 	return (0);
551 }
552 
553 
554 /*****************************************************************************
555  * Helper functions for authorization
556  */
557 
558 /*
559  * Base64 encoding
560  */
561 static char *
562 http_base64(const char *src)
563 {
564 	static const char base64[] =
565 	    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
566 	    "abcdefghijklmnopqrstuvwxyz"
567 	    "0123456789+/";
568 	char *str, *dst;
569 	size_t l;
570 	int t, r;
571 
572 	l = strlen(src);
573 	if ((str = malloc(((l + 2) / 3) * 4 + 1)) == NULL)
574 		return (NULL);
575 	dst = str;
576 	r = 0;
577 
578 	while (l >= 3) {
579 		t = (src[0] << 16) | (src[1] << 8) | src[2];
580 		dst[0] = base64[(t >> 18) & 0x3f];
581 		dst[1] = base64[(t >> 12) & 0x3f];
582 		dst[2] = base64[(t >> 6) & 0x3f];
583 		dst[3] = base64[(t >> 0) & 0x3f];
584 		src += 3; l -= 3;
585 		dst += 4; r += 4;
586 	}
587 
588 	switch (l) {
589 	case 2:
590 		t = (src[0] << 16) | (src[1] << 8);
591 		dst[0] = base64[(t >> 18) & 0x3f];
592 		dst[1] = base64[(t >> 12) & 0x3f];
593 		dst[2] = base64[(t >> 6) & 0x3f];
594 		dst[3] = '=';
595 		dst += 4;
596 		r += 4;
597 		break;
598 	case 1:
599 		t = src[0] << 16;
600 		dst[0] = base64[(t >> 18) & 0x3f];
601 		dst[1] = base64[(t >> 12) & 0x3f];
602 		dst[2] = dst[3] = '=';
603 		dst += 4;
604 		r += 4;
605 		break;
606 	case 0:
607 		break;
608 	}
609 
610 	*dst = 0;
611 	return (str);
612 }
613 
614 /*
615  * Encode username and password
616  */
617 static int
618 http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd)
619 {
620 	char *upw, *auth;
621 	int r;
622 
623 	if (asprintf(&upw, "%s:%s", usr, pwd) == -1)
624 		return (-1);
625 	auth = http_base64(upw);
626 	free(upw);
627 	if (auth == NULL)
628 		return (-1);
629 	r = http_cmd(conn, "%s: Basic %s", hdr, auth);
630 	free(auth);
631 	return (r);
632 }
633 
634 /*
635  * Send an authorization header
636  */
637 static int
638 http_authorize(conn_t *conn, const char *hdr, const char *p)
639 {
640 	/* basic authorization */
641 	if (strncasecmp(p, "basic:", 6) == 0) {
642 		char *user, *pwd, *str;
643 		int r;
644 
645 		/* skip realm */
646 		for (p += 6; *p && *p != ':'; ++p)
647 			/* nothing */ ;
648 		if (!*p || strchr(++p, ':') == NULL)
649 			return (-1);
650 		if ((str = strdup(p)) == NULL)
651 			return (-1); /* XXX */
652 		user = str;
653 		pwd = strchr(str, ':');
654 		*pwd++ = '\0';
655 		r = http_basic_auth(conn, hdr, user, pwd);
656 		free(str);
657 		return (r);
658 	}
659 	return (-1);
660 }
661 
662 
663 /*****************************************************************************
664  * Helper functions for connecting to a server or proxy
665  */
666 
667 /*
668  * Connect to the correct HTTP server or proxy.
669  */
670 static conn_t *
671 http_connect(struct url *URL, struct url *purl, const char *flags)
672 {
673 	conn_t *conn;
674 	int af, verbose;
675 #ifdef TCP_NOPUSH
676 	int val;
677 #endif
678 
679 #ifdef INET6
680 	af = AF_UNSPEC;
681 #else
682 	af = AF_INET;
683 #endif
684 
685 	verbose = CHECK_FLAG('v');
686 	if (CHECK_FLAG('4'))
687 		af = AF_INET;
688 #ifdef INET6
689 	else if (CHECK_FLAG('6'))
690 		af = AF_INET6;
691 #endif
692 
693 	if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) {
694 		URL = purl;
695 	} else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
696 		/* can't talk http to an ftp server */
697 		/* XXX should set an error code */
698 		return (NULL);
699 	}
700 
701 	if ((conn = fetch_connect(URL->host, URL->port, af, verbose)) == NULL)
702 		/* fetch_connect() has already set an error code */
703 		return (NULL);
704 	if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 &&
705 	    fetch_ssl(conn, verbose) == -1) {
706 		fetch_close(conn);
707 		/* grrr */
708 #ifdef EAUTH
709 		errno = EAUTH;
710 #else
711 		errno = EPERM;
712 #endif
713 		fetch_syserr();
714 		return (NULL);
715 	}
716 
717 #ifdef TCP_NOPUSH
718 	val = 1;
719 	setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val));
720 #endif
721 
722 	return (conn);
723 }
724 
725 static struct url *
726 http_get_proxy(struct url * url, const char *flags)
727 {
728 	struct url *purl;
729 	char *p;
730 
731 	if (flags != NULL && strchr(flags, 'd') != NULL)
732 		return (NULL);
733 	if (fetch_no_proxy_match(url->host))
734 		return (NULL);
735 	if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) &&
736 	    *p && (purl = fetchParseURL(p))) {
737 		if (!*purl->scheme)
738 			strcpy(purl->scheme, SCHEME_HTTP);
739 		if (!purl->port)
740 			purl->port = fetch_default_proxy_port(purl->scheme);
741 		if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0)
742 			return (purl);
743 		fetchFreeURL(purl);
744 	}
745 	return (NULL);
746 }
747 
748 /*****************************************************************************
749  * Core
750  */
751 
752 /*
753  * Send a request and process the reply
754  *
755  * XXX This function is way too long, the do..while loop should be split
756  * XXX off into a separate function.
757  */
758 fetchIO *
759 http_request(struct url *URL, const char *op, struct url_stat *us,
760     struct url *purl, const char *flags)
761 {
762 	conn_t *conn;
763 	struct url *url, *new;
764 	int chunked, direct, need_auth, noredirect, verbose;
765 	int e, i, n, val;
766 	off_t offset, clength, length, size;
767 	time_t mtime;
768 	const char *p;
769 	fetchIO *f;
770 	hdr_t h;
771 	char hbuf[URL_HOSTLEN + 7], *host;
772 
773 	direct = CHECK_FLAG('d');
774 	noredirect = CHECK_FLAG('A');
775 	verbose = CHECK_FLAG('v');
776 
777 	if (direct && purl) {
778 		fetchFreeURL(purl);
779 		purl = NULL;
780 	}
781 
782 	/* try the provided URL first */
783 	url = URL;
784 
785 	/* if the A flag is set, we only get one try */
786 	n = noredirect ? 1 : MAX_REDIRECT;
787 	i = 0;
788 
789 	e = HTTP_PROTOCOL_ERROR;
790 	need_auth = 0;
791 	do {
792 		new = NULL;
793 		chunked = 0;
794 		offset = 0;
795 		clength = -1;
796 		length = -1;
797 		size = -1;
798 		mtime = 0;
799 
800 		/* check port */
801 		if (!url->port)
802 			url->port = fetch_default_port(url->scheme);
803 
804 		/* were we redirected to an FTP URL? */
805 		if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) {
806 			if (strcmp(op, "GET") == 0)
807 				return (ftp_request(url, "RETR", NULL, us, purl, flags));
808 			else if (strcmp(op, "HEAD") == 0)
809 				return (ftp_request(url, "STAT", NULL, us, purl, flags));
810 		}
811 
812 		/* connect to server or proxy */
813 		if ((conn = http_connect(url, purl, flags)) == NULL)
814 			goto ouch;
815 
816 		host = url->host;
817 #ifdef INET6
818 		if (strchr(url->host, ':')) {
819 			snprintf(hbuf, sizeof(hbuf), "[%s]", url->host);
820 			host = hbuf;
821 		}
822 #endif
823 		if (url->port != fetch_default_port(url->scheme)) {
824 			if (host != hbuf) {
825 				strcpy(hbuf, host);
826 				host = hbuf;
827 			}
828 			snprintf(hbuf + strlen(hbuf),
829 			    sizeof(hbuf) - strlen(hbuf), ":%d", url->port);
830 		}
831 
832 		/* send request */
833 		if (verbose)
834 			fetch_info("requesting %s://%s%s",
835 			    url->scheme, host, url->doc);
836 		if (purl) {
837 			http_cmd(conn, "%s %s://%s%s HTTP/1.1",
838 			    op, url->scheme, host, url->doc);
839 		} else {
840 			http_cmd(conn, "%s %s HTTP/1.1",
841 			    op, url->doc);
842 		}
843 
844 		/* virtual host */
845 		http_cmd(conn, "Host: %s", host);
846 
847 		/* proxy authorization */
848 		if (purl) {
849 			if (*purl->user || *purl->pwd)
850 				http_basic_auth(conn, "Proxy-Authorization",
851 				    purl->user, purl->pwd);
852 			else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0')
853 				http_authorize(conn, "Proxy-Authorization", p);
854 		}
855 
856 		/* server authorization */
857 		if (need_auth || *url->user || *url->pwd) {
858 			if (*url->user || *url->pwd)
859 				http_basic_auth(conn, "Authorization", url->user, url->pwd);
860 			else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0')
861 				http_authorize(conn, "Authorization", p);
862 			else if (fetchAuthMethod && fetchAuthMethod(url) == 0) {
863 				http_basic_auth(conn, "Authorization", url->user, url->pwd);
864 			} else {
865 				http_seterr(HTTP_NEED_AUTH);
866 				goto ouch;
867 			}
868 		}
869 
870 		/* other headers */
871 		if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') {
872 			if (strcasecmp(p, "auto") == 0)
873 				http_cmd(conn, "Referer: %s://%s%s",
874 				    url->scheme, host, url->doc);
875 			else
876 				http_cmd(conn, "Referer: %s", p);
877 		}
878 		if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0')
879 			http_cmd(conn, "User-Agent: %s", p);
880 		else
881 			http_cmd(conn, "User-Agent: %s ", _LIBFETCH_VER);
882 		if (url->offset > 0)
883 			http_cmd(conn, "Range: bytes=%lld-", (long long)url->offset);
884 		http_cmd(conn, "Connection: close");
885 		http_cmd(conn, "");
886 
887 		/*
888 		 * Force the queued request to be dispatched.  Normally, one
889 		 * would do this with shutdown(2) but squid proxies can be
890 		 * configured to disallow such half-closed connections.  To
891 		 * be compatible with such configurations, fiddle with socket
892 		 * options to force the pending data to be written.
893 		 */
894 #ifdef TCP_NOPUSH
895 		val = 0;
896 		setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
897 			   sizeof(val));
898 #endif
899 		val = 1;
900 		setsockopt(conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
901 			   sizeof(val));
902 
903 		/* get reply */
904 		switch (http_get_reply(conn)) {
905 		case HTTP_OK:
906 		case HTTP_PARTIAL:
907 			/* fine */
908 			break;
909 		case HTTP_MOVED_PERM:
910 		case HTTP_MOVED_TEMP:
911 		case HTTP_SEE_OTHER:
912 			/*
913 			 * Not so fine, but we still have to read the
914 			 * headers to get the new location.
915 			 */
916 			break;
917 		case HTTP_NEED_AUTH:
918 			if (need_auth) {
919 				/*
920 				 * We already sent out authorization code,
921 				 * so there's nothing more we can do.
922 				 */
923 				http_seterr(conn->err);
924 				goto ouch;
925 			}
926 			/* try again, but send the password this time */
927 			if (verbose)
928 				fetch_info("server requires authorization");
929 			break;
930 		case HTTP_NEED_PROXY_AUTH:
931 			/*
932 			 * If we're talking to a proxy, we already sent
933 			 * our proxy authorization code, so there's
934 			 * nothing more we can do.
935 			 */
936 			http_seterr(conn->err);
937 			goto ouch;
938 		case HTTP_BAD_RANGE:
939 			/*
940 			 * This can happen if we ask for 0 bytes because
941 			 * we already have the whole file.  Consider this
942 			 * a success for now, and check sizes later.
943 			 */
944 			break;
945 		case HTTP_PROTOCOL_ERROR:
946 			/* fall through */
947 		case -1:
948 			fetch_syserr();
949 			goto ouch;
950 		default:
951 			http_seterr(conn->err);
952 			if (!verbose)
953 				goto ouch;
954 			/* fall through so we can get the full error message */
955 		}
956 
957 		/* get headers */
958 		do {
959 			switch ((h = http_next_header(conn, &p))) {
960 			case hdr_syserror:
961 				fetch_syserr();
962 				goto ouch;
963 			case hdr_error:
964 				http_seterr(HTTP_PROTOCOL_ERROR);
965 				goto ouch;
966 			case hdr_content_length:
967 				http_parse_length(p, &clength);
968 				break;
969 			case hdr_content_range:
970 				http_parse_range(p, &offset, &length, &size);
971 				break;
972 			case hdr_last_modified:
973 				http_parse_mtime(p, &mtime);
974 				break;
975 			case hdr_location:
976 				if (!HTTP_REDIRECT(conn->err))
977 					break;
978 				if (new)
979 					free(new);
980 				if (verbose)
981 					fetch_info("%d redirect to %s", conn->err, p);
982 				if (*p == '/')
983 					/* absolute path */
984 					new = fetchMakeURL(url->scheme, url->host, url->port, p,
985 					    url->user, url->pwd);
986 				else
987 					new = fetchParseURL(p);
988 				if (new == NULL) {
989 					/* XXX should set an error code */
990 					goto ouch;
991 				}
992 				if (!*new->user && !*new->pwd) {
993 					strcpy(new->user, url->user);
994 					strcpy(new->pwd, url->pwd);
995 				}
996 				new->offset = url->offset;
997 				new->length = url->length;
998 				break;
999 			case hdr_transfer_encoding:
1000 				/* XXX weak test*/
1001 				chunked = (strcasecmp(p, "chunked") == 0);
1002 				break;
1003 			case hdr_www_authenticate:
1004 				if (conn->err != HTTP_NEED_AUTH)
1005 					break;
1006 				/* if we were smarter, we'd check the method and realm */
1007 				break;
1008 			case hdr_end:
1009 				/* fall through */
1010 			case hdr_unknown:
1011 				/* ignore */
1012 				break;
1013 			}
1014 		} while (h > hdr_end);
1015 
1016 		/* we need to provide authentication */
1017 		if (conn->err == HTTP_NEED_AUTH) {
1018 			e = conn->err;
1019 			need_auth = 1;
1020 			fetch_close(conn);
1021 			conn = NULL;
1022 			continue;
1023 		}
1024 
1025 		/* requested range not satisfiable */
1026 		if (conn->err == HTTP_BAD_RANGE) {
1027 			if (url->offset == size && url->length == 0) {
1028 				/* asked for 0 bytes; fake it */
1029 				offset = url->offset;
1030 				conn->err = HTTP_OK;
1031 				break;
1032 			} else {
1033 				http_seterr(conn->err);
1034 				goto ouch;
1035 			}
1036 		}
1037 
1038 		/* we have a hit or an error */
1039 		if (conn->err == HTTP_OK || conn->err == HTTP_PARTIAL || HTTP_ERROR(conn->err))
1040 			break;
1041 
1042 		/* all other cases: we got a redirect */
1043 		e = conn->err;
1044 		need_auth = 0;
1045 		fetch_close(conn);
1046 		conn = NULL;
1047 		if (!new)
1048 			break;
1049 		if (url != URL)
1050 			fetchFreeURL(url);
1051 		url = new;
1052 	} while (++i < n);
1053 
1054 	/* we failed, or ran out of retries */
1055 	if (conn == NULL) {
1056 		http_seterr(e);
1057 		goto ouch;
1058 	}
1059 
1060 	/* check for inconsistencies */
1061 	if (clength != -1 && length != -1 && clength != length) {
1062 		http_seterr(HTTP_PROTOCOL_ERROR);
1063 		goto ouch;
1064 	}
1065 	if (clength == -1)
1066 		clength = length;
1067 	if (clength != -1)
1068 		length = offset + clength;
1069 	if (length != -1 && size != -1 && length != size) {
1070 		http_seterr(HTTP_PROTOCOL_ERROR);
1071 		goto ouch;
1072 	}
1073 	if (size == -1)
1074 		size = length;
1075 
1076 	/* fill in stats */
1077 	if (us) {
1078 		us->size = size;
1079 		us->atime = us->mtime = mtime;
1080 	}
1081 
1082 	/* too far? */
1083 	if (URL->offset > 0 && offset > URL->offset) {
1084 		http_seterr(HTTP_PROTOCOL_ERROR);
1085 		goto ouch;
1086 	}
1087 
1088 	/* report back real offset and size */
1089 	URL->offset = offset;
1090 	URL->length = clength;
1091 
1092 	/* wrap it up in a fetchIO */
1093 	if ((f = http_funopen(conn, chunked)) == NULL) {
1094 		fetch_syserr();
1095 		goto ouch;
1096 	}
1097 
1098 	if (url != URL)
1099 		fetchFreeURL(url);
1100 	if (purl)
1101 		fetchFreeURL(purl);
1102 
1103 	if (HTTP_ERROR(conn->err)) {
1104 		fetchIO_close(f);
1105 		f = NULL;
1106 	}
1107 
1108 	return (f);
1109 
1110 ouch:
1111 	if (url != URL)
1112 		fetchFreeURL(url);
1113 	if (purl)
1114 		fetchFreeURL(purl);
1115 	if (conn != NULL)
1116 		fetch_close(conn);
1117 	return (NULL);
1118 }
1119 
1120 
1121 /*****************************************************************************
1122  * Entry points
1123  */
1124 
1125 /*
1126  * Retrieve and stat a file by HTTP
1127  */
1128 fetchIO *
1129 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags)
1130 {
1131 	return (http_request(URL, "GET", us, http_get_proxy(URL, flags), flags));
1132 }
1133 
1134 /*
1135  * Retrieve a file by HTTP
1136  */
1137 fetchIO *
1138 fetchGetHTTP(struct url *URL, const char *flags)
1139 {
1140 	return (fetchXGetHTTP(URL, NULL, flags));
1141 }
1142 
1143 /*
1144  * Store a file by HTTP
1145  */
1146 fetchIO *
1147 fetchPutHTTP(struct url *URL, const char *flags)
1148 {
1149 	fprintf(stderr, "fetchPutHTTP(): not implemented\n");
1150 	return (NULL);
1151 }
1152 
1153 /*
1154  * Get an HTTP document's metadata
1155  */
1156 int
1157 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
1158 {
1159 	fetchIO *f;
1160 
1161 	f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags);
1162 	if (f == NULL)
1163 		return (-1);
1164 	fetchIO_close(f);
1165 	return (0);
1166 }
1167 
1168 enum http_states {
1169 	ST_NONE,
1170 	ST_LT,
1171 	ST_LTA,
1172 	ST_TAGA,
1173 	ST_H,
1174 	ST_R,
1175 	ST_E,
1176 	ST_F,
1177 	ST_HREF,
1178 	ST_HREFQ,
1179 	ST_TAG,
1180 	ST_TAGAX,
1181 	ST_TAGAQ
1182 };
1183 
1184 struct index_parser {
1185 	struct url_list *ue;
1186 	struct url *url;
1187 	enum http_states state;
1188 };
1189 
1190 static size_t
1191 parse_index(struct index_parser *parser, const char *buf, size_t len)
1192 {
1193 	char *end_attr, p = *buf;
1194 
1195 	switch (parser->state) {
1196 	case ST_NONE:
1197 		/* Plain text, not in markup */
1198 		if (p == '<')
1199 			parser->state = ST_LT;
1200 		return 1;
1201 	case ST_LT:
1202 		/* In tag -- "<" already found */
1203 		if (p == '>')
1204 			parser->state = ST_NONE;
1205 		else if (p == 'a' || p == 'A')
1206 			parser->state = ST_LTA;
1207 		else if (!isspace((unsigned char)p))
1208 			parser->state = ST_TAG;
1209 		return 1;
1210 	case ST_LTA:
1211 		/* In tag -- "<a" already found */
1212 		if (p == '>')
1213 			parser->state = ST_NONE;
1214 		else if (p == '"')
1215 			parser->state = ST_TAGAQ;
1216 		else if (isspace((unsigned char)p))
1217 			parser->state = ST_TAGA;
1218 		else
1219 			parser->state = ST_TAG;
1220 		return 1;
1221 	case ST_TAG:
1222 		/* In tag, but not "<a" -- disregard */
1223 		if (p == '>')
1224 			parser->state = ST_NONE;
1225 		return 1;
1226 	case ST_TAGA:
1227 		/* In a-tag -- "<a " already found */
1228 		if (p == '>')
1229 			parser->state = ST_NONE;
1230 		else if (p == '"')
1231 			parser->state = ST_TAGAQ;
1232 		else if (p == 'h' || p == 'H')
1233 			parser->state = ST_H;
1234 		else if (!isspace((unsigned char)p))
1235 			parser->state = ST_TAGAX;
1236 		return 1;
1237 	case ST_TAGAX:
1238 		/* In unknown keyword in a-tag */
1239 		if (p == '>')
1240 			parser->state = ST_NONE;
1241 		else if (p == '"')
1242 			parser->state = ST_TAGAQ;
1243 		else if (isspace((unsigned char)p))
1244 			parser->state = ST_TAGA;
1245 		return 1;
1246 	case ST_TAGAQ:
1247 		/* In a-tag, unknown argument for keys. */
1248 		if (p == '>')
1249 			parser->state = ST_NONE;
1250 		else if (p == '"')
1251 			parser->state = ST_TAGA;
1252 		return 1;
1253 	case ST_H:
1254 		/* In a-tag -- "<a h" already found */
1255 		if (p == '>')
1256 			parser->state = ST_NONE;
1257 		else if (p == '"')
1258 			parser->state = ST_TAGAQ;
1259 		else if (p == 'r' || p == 'R')
1260 			parser->state = ST_R;
1261 		else if (isspace((unsigned char)p))
1262 			parser->state = ST_TAGA;
1263 		else
1264 			parser->state = ST_TAGAX;
1265 		return 1;
1266 	case ST_R:
1267 		/* In a-tag -- "<a hr" already found */
1268 		if (p == '>')
1269 			parser->state = ST_NONE;
1270 		else if (p == '"')
1271 			parser->state = ST_TAGAQ;
1272 		else if (p == 'e' || p == 'E')
1273 			parser->state = ST_E;
1274 		else if (isspace((unsigned char)p))
1275 			parser->state = ST_TAGA;
1276 		else
1277 			parser->state = ST_TAGAX;
1278 		return 1;
1279 	case ST_E:
1280 		/* In a-tag -- "<a hre" already found */
1281 		if (p == '>')
1282 			parser->state = ST_NONE;
1283 		else if (p == '"')
1284 			parser->state = ST_TAGAQ;
1285 		else if (p == 'f' || p == 'F')
1286 			parser->state = ST_F;
1287 		else if (isspace((unsigned char)p))
1288 			parser->state = ST_TAGA;
1289 		else
1290 			parser->state = ST_TAGAX;
1291 		return 1;
1292 	case ST_F:
1293 		/* In a-tag -- "<a href" already found */
1294 		if (p == '>')
1295 			parser->state = ST_NONE;
1296 		else if (p == '"')
1297 			parser->state = ST_TAGAQ;
1298 		else if (p == '=')
1299 			parser->state = ST_HREF;
1300 		else if (!isspace((unsigned char)p))
1301 			parser->state = ST_TAGAX;
1302 		return 1;
1303 	case ST_HREF:
1304 		/* In a-tag -- "<a href=" already found */
1305 		if (p == '>')
1306 			parser->state = ST_NONE;
1307 		else if (p == '"')
1308 			parser->state = ST_HREFQ;
1309 		else if (!isspace((unsigned char)p))
1310 			parser->state = ST_TAGA;
1311 		return 1;
1312 	case ST_HREFQ:
1313 		/* In href of the a-tag */
1314 		end_attr = memchr(buf, '"', len);
1315 		if (end_attr == NULL)
1316 			return 0;
1317 		*end_attr = '\0';
1318 		parser->state = ST_TAGA;
1319 		fetch_add_entry(parser->ue, parser->url, buf, 1);
1320 		return end_attr + 1 - buf;
1321 	}
1322 	abort();
1323 }
1324 
1325 /*
1326  * List a directory
1327  */
1328 int
1329 fetchListHTTP(struct url_list *ue, struct url *url, const char *pattern, const char *flags)
1330 {
1331 	fetchIO *f;
1332 	char buf[2 * PATH_MAX];
1333 	size_t buf_len, processed, sum_processed;
1334 	ssize_t read_len;
1335 	struct index_parser state;
1336 
1337 	state.url = url;
1338 	state.state = ST_NONE;
1339 	state.ue = ue;
1340 
1341 	f = fetchGetHTTP(url, flags);
1342 	if (f == NULL)
1343 		return -1;
1344 
1345 	buf_len = 0;
1346 
1347 	while ((read_len = fetchIO_read(f, buf + buf_len, sizeof(buf) - buf_len)) > 0) {
1348 		buf_len += read_len;
1349 		sum_processed = 0;
1350 		do {
1351 			processed = parse_index(&state, buf + sum_processed, buf_len);
1352 			buf_len -= processed;
1353 			sum_processed += processed;
1354 		} while (processed != 0 && buf_len > 0);
1355 		memmove(buf, buf + sum_processed, buf_len);
1356 	}
1357 
1358 	fetchIO_close(f);
1359 	return read_len < 0 ? -1 : 0;
1360 }
1361