xref: /netbsd-src/external/bsd/fetch/dist/libfetch/http.c (revision ccd9df534e375a4366c5b55f23782053c7a98d82)
1 /*	$NetBSD: http.c,v 1.5 2024/02/02 22:19:05 christos Exp $	*/
2 /*-
3  * Copyright (c) 2000-2004 Dag-Erling Co�dan Sm�rgrav
4  * Copyright (c) 2003 Thomas Klausner <wiz@NetBSD.org>
5  * Copyright (c) 2008, 2009 Joerg Sonnenberger <joerg@NetBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer
13  *    in this position and unchanged.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. The name of the author may not be used to endorse or promote products
18  *    derived from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * $FreeBSD: http.c,v 1.83 2008/02/06 11:39:55 des Exp $
32  */
33 
34 /*
35  * The following copyright applies to the base64 code:
36  *
37  *-
38  * Copyright 1997 Massachusetts Institute of Technology
39  *
40  * Permission to use, copy, modify, and distribute this software and
41  * its documentation for any purpose and without fee is hereby
42  * granted, provided that both the above copyright notice and this
43  * permission notice appear in all copies, that both the above
44  * copyright notice and this permission notice appear in all
45  * supporting documentation, and that the name of M.I.T. not be used
46  * in advertising or publicity pertaining to distribution of the
47  * software without specific, written prior permission.  M.I.T. makes
48  * no representations about the suitability of this software for any
49  * purpose.  It is provided "as is" without express or implied
50  * warranty.
51  *
52  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
53  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
54  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
55  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
56  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
57  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
58  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
59  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
60  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
61  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
62  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  */
65 
66 #if defined(__linux__) || defined(__MINT__) || defined(__FreeBSD_kernel__)
67 /* Keep this down to Linux or MiNT, it can create surprises elsewhere. */
68 /*
69    __FreeBSD_kernel__ is defined for GNU/kFreeBSD.
70    See http://glibc-bsd.alioth.debian.org/porting/PORTING .
71 */
72 #define _GNU_SOURCE
73 #endif
74 
75 #ifndef _REENTRANT
76 /* Needed for gmtime_r on Interix */
77 #define _REENTRANT
78 #endif
79 
80 #if HAVE_CONFIG_H
81 #include "config.h"
82 #endif
83 #ifndef NETBSD
84 #include <nbcompat.h>
85 #endif
86 
87 #include <sys/types.h>
88 #include <sys/socket.h>
89 
90 #include <ctype.h>
91 #include <errno.h>
92 #include <locale.h>
93 #include <stdarg.h>
94 #ifndef NETBSD
95 #include <nbcompat/stdio.h>
96 #else
97 #include <stdio.h>
98 #endif
99 #include <stdlib.h>
100 #include <string.h>
101 #include <time.h>
102 #include <unistd.h>
103 
104 #include <netinet/in.h>
105 #include <netinet/tcp.h>
106 
107 #ifndef NETBSD
108 #include <nbcompat/netdb.h>
109 #else
110 #include <netdb.h>
111 #endif
112 
113 #include <arpa/inet.h>
114 
115 #include "fetch.h"
116 #include "common.h"
117 #include "httperr.h"
118 
119 /* Maximum number of redirects to follow */
120 #define MAX_REDIRECT 5
121 
122 /* Symbolic names for reply codes we care about */
123 #define HTTP_OK			200
124 #define HTTP_PARTIAL		206
125 #define HTTP_MOVED_PERM		301
126 #define HTTP_MOVED_TEMP		302
127 #define HTTP_SEE_OTHER		303
128 #define HTTP_NOT_MODIFIED	304
129 #define HTTP_TEMP_REDIRECT	307
130 #define HTTP_NEED_AUTH		401
131 #define HTTP_NEED_PROXY_AUTH	407
132 #define HTTP_BAD_RANGE		416
133 #define HTTP_PROTOCOL_ERROR	999
134 
135 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \
136 			    || (xyz) == HTTP_MOVED_TEMP \
137 			    || (xyz) == HTTP_TEMP_REDIRECT \
138 			    || (xyz) == HTTP_SEE_OTHER)
139 
140 #define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599)
141 
142 
143 /*****************************************************************************
144  * I/O functions for decoding chunked streams
145  */
146 
147 struct httpio
148 {
149 	conn_t		*conn;		/* connection */
150 	int		 chunked;	/* chunked mode */
151 	int		 keep_alive;	/* keep-alive mode */
152 	char		*buf;		/* chunk buffer */
153 	size_t		 bufsize;	/* size of chunk buffer */
154 	ssize_t		 buflen;	/* amount of data currently in buffer */
155 	size_t		 bufpos;	/* current read offset in buffer */
156 	int		 eof;		/* end-of-file flag */
157 	int		 error;		/* error flag */
158 	size_t		 chunksize;	/* remaining size of current chunk */
159 	off_t		 contentlength;	/* remaining size of the content */
160 };
161 
162 /*
163  * Get next chunk header
164  */
165 static ssize_t
166 http_new_chunk(struct httpio *io)
167 {
168 	char *p;
169 
170 	if (fetch_getln(io->conn) == -1)
171 		return (-1);
172 
173 	if (io->conn->buflen < 2 || !isxdigit((unsigned char)*io->conn->buf))
174 		return (-1);
175 
176 	for (p = io->conn->buf; *p && !isspace((unsigned char)*p); ++p) {
177 		if (*p == ';')
178 			break;
179 		if (!isxdigit((unsigned char)*p))
180 			return (-1);
181 		if (isdigit((unsigned char)*p)) {
182 			io->chunksize = io->chunksize * 16 +
183 			    *p - '0';
184 		} else {
185 			io->chunksize = io->chunksize * 16 +
186 			    10 + tolower((unsigned char)*p) - 'a';
187 		}
188 	}
189 
190 	return (io->chunksize);
191 }
192 
193 /*
194  * Grow the input buffer to at least len bytes
195  */
196 static int
197 http_growbuf(struct httpio *io, size_t len)
198 {
199 	char *tmp;
200 
201 	if (io->bufsize >= len)
202 		return (0);
203 
204 	if ((tmp = realloc(io->buf, len)) == NULL)
205 		return (-1);
206 	io->buf = tmp;
207 	io->bufsize = len;
208 	return (0);
209 }
210 
211 /*
212  * Fill the input buffer, do chunk decoding on the fly
213  */
214 static ssize_t
215 http_fillbuf(struct httpio *io, size_t len)
216 {
217 	if (io->error)
218 		return (-1);
219 	if (io->eof)
220 		return (0);
221 
222 	if (io->contentlength >= 0 && (off_t)len > io->contentlength)
223 		len = io->contentlength;
224 
225 	if (io->chunked == 0) {
226 		if (http_growbuf(io, len) == -1)
227 			return (-1);
228 		if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
229 			io->error = 1;
230 			return (-1);
231 		}
232 		if (io->contentlength)
233 			io->contentlength -= io->buflen;
234 		io->bufpos = 0;
235 		return (io->buflen);
236 	}
237 
238 	if (io->chunksize == 0) {
239 		switch (http_new_chunk(io)) {
240 		case -1:
241 			io->error = 1;
242 			return (-1);
243 		case 0:
244 			io->eof = 1;
245 			if (fetch_getln(io->conn) == -1)
246 				return (-1);
247 			return (0);
248 		}
249 	}
250 
251 	if (len > io->chunksize)
252 		len = io->chunksize;
253 	if (http_growbuf(io, len) == -1)
254 		return (-1);
255 	if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
256 		io->error = 1;
257 		return (-1);
258 	}
259 	io->chunksize -= io->buflen;
260 	if (io->contentlength >= 0)
261 		io->contentlength -= io->buflen;
262 
263 	if (io->chunksize == 0) {
264 		char endl[2];
265 		ssize_t len2;
266 
267 		len2 = fetch_read(io->conn, endl, 2);
268 		if (len2 == 1 && fetch_read(io->conn, endl + 1, 1) != 1)
269 			return (-1);
270 		if (len2 == -1 || endl[0] != '\r' || endl[1] != '\n')
271 			return (-1);
272 	}
273 
274 	io->bufpos = 0;
275 
276 	return (io->buflen);
277 }
278 
279 /*
280  * Read function
281  */
282 static ssize_t
283 http_readfn(void *v, void *buf, size_t len)
284 {
285 	struct httpio *io = (struct httpio *)v;
286 	size_t l, pos;
287 
288 	if (io->error)
289 		return (-1);
290 	if (io->eof)
291 		return (0);
292 
293 	for (pos = 0; len > 0; pos += l, len -= l) {
294 		/* empty buffer */
295 		if (!io->buf || (ssize_t)io->bufpos == io->buflen)
296 			if (http_fillbuf(io, len) < 1)
297 				break;
298 		l = io->buflen - io->bufpos;
299 		if (len < l)
300 			l = len;
301 		memcpy((char *)buf + pos, io->buf + io->bufpos, l);
302 		io->bufpos += l;
303 	}
304 
305 	if (!pos && io->error)
306 		return (-1);
307 	return (pos);
308 }
309 
310 /*
311  * Write function
312  */
313 static ssize_t
314 http_writefn(void *v, const void *buf, size_t len)
315 {
316 	struct httpio *io = (struct httpio *)v;
317 
318 	return (fetch_write(io->conn, buf, len));
319 }
320 
321 /*
322  * Close function
323  */
324 static void
325 http_closefn(void *v)
326 {
327 	struct httpio *io = (struct httpio *)v;
328 
329 	if (io->keep_alive) {
330 		int val;
331 
332 		val = 0;
333 		setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
334 			   (socklen_t)sizeof(val));
335 			  fetch_cache_put(io->conn, fetch_close);
336 #if defined(TCP_NOPUSH) && !defined(__APPLE__)
337 		val = 1;
338 		setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
339 		    sizeof(val));
340 #endif
341 	} else {
342 		fetch_close(io->conn);
343 	}
344 
345 	free(io->buf);
346 	free(io);
347 }
348 
349 /*
350  * Wrap a file descriptor up
351  */
352 static fetchIO *
353 http_funopen(conn_t *conn, int chunked, int keep_alive, off_t clength)
354 {
355 	struct httpio *io;
356 	fetchIO *f;
357 
358 	if ((io = calloc(1, sizeof(*io))) == NULL) {
359 		fetch_syserr();
360 		return (NULL);
361 	}
362 	io->conn = conn;
363 	io->chunked = chunked;
364 	io->contentlength = clength;
365 	io->keep_alive = keep_alive;
366 	f = fetchIO_unopen(io, http_readfn, http_writefn, http_closefn);
367 	if (f == NULL) {
368 		fetch_syserr();
369 		free(io);
370 		return (NULL);
371 	}
372 	return (f);
373 }
374 
375 
376 /*****************************************************************************
377  * Helper functions for talking to the server and parsing its replies
378  */
379 
380 /* Header types */
381 typedef enum {
382 	hdr_syserror = -2,
383 	hdr_error = -1,
384 	hdr_end = 0,
385 	hdr_unknown = 1,
386 	hdr_connection,
387 	hdr_content_length,
388 	hdr_content_range,
389 	hdr_last_modified,
390 	hdr_location,
391 	hdr_transfer_encoding,
392 	hdr_www_authenticate
393 } hdr_t;
394 
395 /* Names of interesting headers */
396 static struct {
397 	hdr_t		 num;
398 	const char	*name;
399 } hdr_names[] = {
400 	{ hdr_connection,		"Connection" },
401 	{ hdr_content_length,		"Content-Length" },
402 	{ hdr_content_range,		"Content-Range" },
403 	{ hdr_last_modified,		"Last-Modified" },
404 	{ hdr_location,			"Location" },
405 	{ hdr_transfer_encoding,	"Transfer-Encoding" },
406 	{ hdr_www_authenticate,		"WWW-Authenticate" },
407 	{ hdr_unknown,			NULL },
408 };
409 
410 /*
411  * Send a formatted line; optionally echo to terminal
412  */
413 LIBFETCH_PRINTFLIKE(2, 3)
414 static int
415 http_cmd(conn_t *conn, const char *fmt, ...)
416 {
417 	va_list ap;
418 	size_t len;
419 	char *msg;
420 	ssize_t r;
421 
422 	va_start(ap, fmt);
423 	len = vasprintf(&msg, fmt, ap);
424 	va_end(ap);
425 
426 	if (msg == NULL) {
427 		errno = ENOMEM;
428 		fetch_syserr();
429 		return (-1);
430 	}
431 
432 	r = fetch_write(conn, msg, len);
433 	free(msg);
434 
435 	if (r == -1) {
436 		fetch_syserr();
437 		return (-1);
438 	}
439 
440 	return (0);
441 }
442 
443 /*
444  * Get and parse status line
445  */
446 static int
447 http_get_reply(conn_t *conn)
448 {
449 	char *p;
450 
451 	if (fetch_getln(conn) == -1)
452 		return (-1);
453 	/*
454 	 * A valid status line looks like "HTTP/m.n xyz reason" where m
455 	 * and n are the major and minor protocol version numbers and xyz
456 	 * is the reply code.
457 	 * Unfortunately, there are servers out there (NCSA 1.5.1, to name
458 	 * just one) that do not send a version number, so we can't rely
459 	 * on finding one, but if we do, insist on it being 1.0 or 1.1.
460 	 * We don't care about the reason phrase.
461 	 */
462 	if (strncmp(conn->buf, "HTTP", 4) != 0)
463 		return (HTTP_PROTOCOL_ERROR);
464 	p = conn->buf + 4;
465 	if (*p == '/') {
466 		if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1'))
467 			return (HTTP_PROTOCOL_ERROR);
468 		p += 4;
469 	}
470 	if (*p != ' ' ||
471 	    !isdigit((unsigned char)p[1]) ||
472 	    !isdigit((unsigned char)p[2]) ||
473 	    !isdigit((unsigned char)p[3]))
474 		return (HTTP_PROTOCOL_ERROR);
475 
476 	conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0');
477 	return (conn->err);
478 }
479 
480 /*
481  * Check a header; if the type matches the given string, return a pointer
482  * to the beginning of the value.
483  */
484 static const char *
485 http_match(const char *str, const char *hdr)
486 {
487 	while (*str && *hdr &&
488 	    tolower((unsigned char)*str++) == tolower((unsigned char)*hdr++))
489 		/* nothing */;
490 	if (*str || *hdr != ':')
491 		return (NULL);
492 	while (*hdr && isspace((unsigned char)*++hdr))
493 		/* nothing */;
494 	return (hdr);
495 }
496 
497 /*
498  * Get the next header and return the appropriate symbolic code.
499  */
500 static hdr_t
501 http_next_header(conn_t *conn, const char **p)
502 {
503 	int i;
504 
505 	if (fetch_getln(conn) == -1)
506 		return (hdr_syserror);
507 	while (conn->buflen && isspace((unsigned char)conn->buf[conn->buflen - 1]))
508 		conn->buflen--;
509 	conn->buf[conn->buflen] = '\0';
510 	if (conn->buflen == 0)
511 		return (hdr_end);
512 	/*
513 	 * We could check for malformed headers but we don't really care.
514 	 * A valid header starts with a token immediately followed by a
515 	 * colon; a token is any sequence of non-control, non-whitespace
516 	 * characters except "()<>@,;:\\\"{}".
517 	 */
518 	for (i = 0; hdr_names[i].num != hdr_unknown; i++)
519 		if ((*p = http_match(hdr_names[i].name, conn->buf)) != NULL)
520 			return (hdr_names[i].num);
521 	return (hdr_unknown);
522 }
523 
524 /*
525  * Parse a last-modified header
526  */
527 static int
528 http_parse_mtime(const char *p, time_t *mtime)
529 {
530 	struct tm tm;
531 	char *r;
532 
533 #ifdef LC_C_LOCALE
534 	r = strptime_l(p, "%a, %d %b %Y %H:%M:%S GMT", &tm, LC_C_LOCALE);
535 #else
536 	char *locale;
537 
538 	locale = strdup(setlocale(LC_TIME, NULL));
539 	if (locale == NULL)
540 		return (-1);
541 
542 	setlocale(LC_TIME, "C");
543 	r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
544 	/* XXX should add support for date-2 and date-3 */
545 	setlocale(LC_TIME, locale);
546 	free(locale);
547 #endif
548  	if (r == NULL)
549  		return (-1);
550 	*mtime = timegm(&tm);
551 	return (0);
552 }
553 
554 /*
555  * Parse a content-length header
556  */
557 static int
558 http_parse_length(const char *p, off_t *length)
559 {
560 	off_t len;
561 
562 	for (len = 0; *p && isdigit((unsigned char)*p); ++p)
563 		len = len * 10 + (*p - '0');
564 	if (*p)
565 		return (-1);
566 	*length = len;
567 	return (0);
568 }
569 
570 /*
571  * Parse a content-range header
572  */
573 static int
574 http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size)
575 {
576 	off_t first, last, len;
577 
578 	if (strncasecmp(p, "bytes ", 6) != 0)
579 		return (-1);
580 	p += 6;
581 	if (*p == '*') {
582 		first = last = -1;
583 		++p;
584 	} else {
585 		for (first = 0; *p && isdigit((unsigned char)*p); ++p)
586 			first = first * 10 + *p - '0';
587 		if (*p != '-')
588 			return (-1);
589 		for (last = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
590 			last = last * 10 + *p - '0';
591 	}
592 	if (first > last || *p != '/')
593 		return (-1);
594 	for (len = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
595 		len = len * 10 + *p - '0';
596 	if (*p || len < last - first + 1)
597 		return (-1);
598 	if (first == -1)
599 		*length = 0;
600 	else
601 		*length = last - first + 1;
602 	*offset = first;
603 	*size = len;
604 	return (0);
605 }
606 
607 
608 /*****************************************************************************
609  * Helper functions for authorization
610  */
611 
612 /*
613  * Base64 encoding
614  */
615 static char *
616 http_base64(const char *src)
617 {
618 	static const char base64[] =
619 	    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
620 	    "abcdefghijklmnopqrstuvwxyz"
621 	    "0123456789+/";
622 	char *str, *dst;
623 	size_t l;
624 	unsigned int t, r;
625 
626 	l = strlen(src);
627 	if ((str = malloc(((l + 2) / 3) * 4 + 1)) == NULL)
628 		return (NULL);
629 	dst = str;
630 	r = 0;
631 
632 	while (l >= 3) {
633 		t = (src[0] << 16) | (src[1] << 8) | src[2];
634 		dst[0] = base64[(t >> 18) & 0x3f];
635 		dst[1] = base64[(t >> 12) & 0x3f];
636 		dst[2] = base64[(t >> 6) & 0x3f];
637 		dst[3] = base64[(t >> 0) & 0x3f];
638 		src += 3; l -= 3;
639 		dst += 4; r += 4;
640 	}
641 
642 	switch (l) {
643 	case 2:
644 		t = (src[0] << 16) | (src[1] << 8);
645 		dst[0] = base64[(t >> 18) & 0x3f];
646 		dst[1] = base64[(t >> 12) & 0x3f];
647 		dst[2] = base64[(t >> 6) & 0x3f];
648 		dst[3] = '=';
649 		dst += 4;
650 		r += 4;
651 		break;
652 	case 1:
653 		t = src[0] << 16;
654 		dst[0] = base64[(t >> 18) & 0x3f];
655 		dst[1] = base64[(t >> 12) & 0x3f];
656 		dst[2] = dst[3] = '=';
657 		dst += 4;
658 		r += 4;
659 		break;
660 	case 0:
661 		break;
662 	}
663 
664 	*dst = 0;
665 	return (str);
666 }
667 
668 /*
669  * Encode username and password
670  */
671 static int
672 http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd)
673 {
674 	char *upw, *auth;
675 	int r;
676 
677 	if (asprintf(&upw, "%s:%s", usr, pwd) == -1)
678 		return (-1);
679 	auth = http_base64(upw);
680 	free(upw);
681 	if (auth == NULL)
682 		return (-1);
683 	r = http_cmd(conn, "%s: Basic %s\r\n", hdr, auth);
684 	free(auth);
685 	return (r);
686 }
687 
688 /*
689  * Send an authorization header
690  */
691 static int
692 http_authorize(conn_t *conn, const char *hdr, const char *p)
693 {
694 	/* basic authorization */
695 	if (strncasecmp(p, "basic:", 6) == 0) {
696 		char *user, *pwd, *str;
697 		int r;
698 
699 		/* skip realm */
700 		for (p += 6; *p && *p != ':'; ++p)
701 			/* nothing */ ;
702 		if (!*p || strchr(++p, ':') == NULL)
703 			return (-1);
704 		if ((str = strdup(p)) == NULL)
705 			return (-1); /* XXX */
706 		user = str;
707 		pwd = strchr(str, ':');
708 		*pwd++ = '\0';
709 		r = http_basic_auth(conn, hdr, user, pwd);
710 		free(str);
711 		return (r);
712 	}
713 	return (-1);
714 }
715 
716 
717 /*****************************************************************************
718  * Helper functions for connecting to a server or proxy
719  */
720 
721 /*
722  * Connect to the correct HTTP server or proxy.
723  */
724 static conn_t *
725 http_connect(struct url *URL, struct url *purl, const char *flags, int *cached)
726 {
727 	struct url *curl;
728 	conn_t *conn;
729 	hdr_t h;
730 	const char *p;
731 	int af, verbose;
732 #if defined(TCP_NOPUSH) && !defined(__APPLE__)
733 	int val;
734 #endif
735 
736 	*cached = 0;
737 
738 #ifdef INET6
739 	af = AF_UNSPEC;
740 #else
741 	af = AF_INET;
742 #endif
743 
744 	verbose = CHECK_FLAG('v');
745 	if (CHECK_FLAG('4'))
746 		af = AF_INET;
747 #ifdef INET6
748 	else if (CHECK_FLAG('6'))
749 		af = AF_INET6;
750 #endif
751 
752 	curl = (purl != NULL) ? purl : URL;
753 	if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) {
754 		URL = purl;
755 	} else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
756 		/* can't talk http to an ftp server */
757 		/* XXX should set an error code */
758 		return (NULL);
759 	}
760 
761 	if ((conn = fetch_cache_get(curl, af)) != NULL) {
762 		*cached = 1;
763 		return (conn);
764 	}
765 
766 	if ((conn = fetch_connect(curl, af, verbose)) == NULL)
767 		/* fetch_connect() has already set an error code */
768 		return (NULL);
769 	if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 && purl) {
770 		http_cmd(conn, "CONNECT %s:%d HTTP/1.1\r\n",
771 				URL->host, URL->port);
772 		http_cmd(conn, "Host: %s:%d\r\n",
773 				URL->host, URL->port);
774 		http_cmd(conn, "\r\n");
775 		if (http_get_reply(conn) != HTTP_OK) {
776 			http_seterr(conn->err);
777 			goto ouch;
778 		}
779 		/* Read and discard the rest of the proxy response */
780 		if (fetch_getln(conn) < 0) {
781 			fetch_syserr();
782 			goto ouch;
783 		}
784 		do {
785 			switch ((h = http_next_header(conn, &p))) {
786 			case hdr_syserror:
787 				fetch_syserr();
788 				goto ouch;
789 			case hdr_error:
790 				http_seterr(HTTP_PROTOCOL_ERROR);
791 				goto ouch;
792 			default:
793 				/* ignore */ ;
794 			}
795 		} while (h < hdr_end);
796 	}
797 	if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 &&
798 	    fetch_ssl(conn, URL, verbose) == -1) {
799 		/* grrr */
800 #ifdef EAUTH
801 		errno = EAUTH;
802 #else
803 		errno = EPERM;
804 #endif
805 		goto ouch;
806 	}
807 
808 #if defined(TCP_NOPUSH) && !defined(__APPLE__)
809 	val = 1;
810 	setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val));
811 #endif
812 
813 	return (conn);
814 ouch:
815 	fetch_close(conn);
816 	return (NULL);
817 }
818 
819 static struct url *
820 http_get_proxy(struct url * url, const char *flags)
821 {
822 	struct url *purl;
823 	char *p;
824 
825 	if (flags != NULL && strchr(flags, 'd') != NULL)
826 		return (NULL);
827 	if (fetch_no_proxy_match(url->host))
828 		return (NULL);
829 	if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) &&
830 	    *p && (purl = fetchParseURL(p))) {
831 		if (!*purl->scheme)
832 			strcpy(purl->scheme, SCHEME_HTTP);
833 		if (!purl->port)
834 			purl->port = fetch_default_proxy_port(purl->scheme);
835 		if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0)
836 			return (purl);
837 		fetchFreeURL(purl);
838 	}
839 	return (NULL);
840 }
841 
842 static void
843 set_if_modified_since(conn_t *conn, time_t last_modified)
844 {
845 	static const char weekdays[] = "SunMonTueWedThuFriSat";
846 	static const char months[] = "JanFebMarAprMayJunJulAugSepOctNovDec";
847 	struct tm tm;
848 	char buf[80];
849 	gmtime_r(&last_modified, &tm);
850 	snprintf(buf, sizeof(buf), "%.3s, %02d %.3s %4ld %02d:%02d:%02d GMT",
851 	    weekdays + tm.tm_wday * 3, tm.tm_mday, months + tm.tm_mon * 3,
852 	    (long)tm.tm_year + 1900, tm.tm_hour, tm.tm_min, tm.tm_sec);
853 	http_cmd(conn, "If-Modified-Since: %s\r\n", buf);
854 }
855 
856 
857 /*****************************************************************************
858  * Core
859  */
860 
861 /*
862  * Send a request and process the reply
863  *
864  * XXX This function is way too long, the do..while loop should be split
865  * XXX off into a separate function.
866  */
867 fetchIO *
868 http_request(struct url *URL, const char *op, struct url_stat *us,
869     struct url *purl, const char *flags)
870 {
871 	conn_t *conn;
872 	struct url *url, *new;
873 	int chunked, direct, if_modified_since, need_auth, noredirect;
874 	int keep_alive, verbose, cached;
875 	int e, i, n, val;
876 	off_t offset, clength, length, size;
877 	time_t mtime;
878 	const char *p;
879 	fetchIO *f;
880 	hdr_t h;
881 	char hbuf[URL_HOSTLEN + 7], *host;
882 
883 	direct = CHECK_FLAG('d');
884 	noredirect = CHECK_FLAG('A');
885 	verbose = CHECK_FLAG('v');
886 	if_modified_since = CHECK_FLAG('i');
887 	keep_alive = 0;
888 
889 	if (direct && purl) {
890 		fetchFreeURL(purl);
891 		purl = NULL;
892 	}
893 
894 	/* try the provided URL first */
895 	url = URL;
896 
897 	/* if the A flag is set, we only get one try */
898 	n = noredirect ? 1 : MAX_REDIRECT;
899 	i = 0;
900 
901 	e = HTTP_PROTOCOL_ERROR;
902 	need_auth = 0;
903 	do {
904 		new = NULL;
905 		chunked = 0;
906 		offset = 0;
907 		clength = -1;
908 		length = -1;
909 		size = -1;
910 		mtime = 0;
911 
912 		/* check port */
913 		if (!url->port)
914 			url->port = fetch_default_port(url->scheme);
915 
916 		/* were we redirected to an FTP URL? */
917 		if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) {
918 			if (strcmp(op, "GET") == 0)
919 				return (ftp_request(url, "RETR", NULL, us, purl, flags));
920 			else if (strcmp(op, "HEAD") == 0)
921 				return (ftp_request(url, "STAT", NULL, us, purl, flags));
922 		}
923 
924 		/* connect to server or proxy */
925 		if ((conn = http_connect(url, purl, flags, &cached)) == NULL)
926 			goto ouch;
927 
928 		host = url->host;
929 #ifdef INET6
930 		if (strchr(url->host, ':')) {
931 			snprintf(hbuf, sizeof(hbuf), "[%s]", url->host);
932 			host = hbuf;
933 		}
934 #endif
935 		if (url->port != fetch_default_port(url->scheme)) {
936 			if (host != hbuf) {
937 				strcpy(hbuf, host);
938 				host = hbuf;
939 			}
940 			snprintf(hbuf + strlen(hbuf),
941 			    sizeof(hbuf) - strlen(hbuf), ":%d", url->port);
942 		}
943 
944 		/* send request */
945 		if (verbose)
946 			fetch_info("requesting %s://%s%s",
947 			    url->scheme, host, url->doc);
948 		if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) {
949 			http_cmd(conn, "%s %s://%s%s HTTP/1.1\r\n",
950 			    op, url->scheme, host, url->doc);
951 		} else {
952 			http_cmd(conn, "%s %s HTTP/1.1\r\n",
953 			    op, url->doc);
954 		}
955 
956 		if (if_modified_since && url->last_modified > 0)
957 			set_if_modified_since(conn, url->last_modified);
958 
959 		/* virtual host */
960 		http_cmd(conn, "Host: %s\r\n", host);
961 
962 		/* proxy authorization */
963 		if (purl) {
964 			if (*purl->user || *purl->pwd)
965 				http_basic_auth(conn, "Proxy-Authorization",
966 				    purl->user, purl->pwd);
967 			else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0')
968 				http_authorize(conn, "Proxy-Authorization", p);
969 		}
970 
971 		/* server authorization */
972 		if (need_auth || *url->user || *url->pwd) {
973 			if (*url->user || *url->pwd)
974 				http_basic_auth(conn, "Authorization", url->user, url->pwd);
975 			else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0')
976 				http_authorize(conn, "Authorization", p);
977 			else if (fetchAuthMethod && fetchAuthMethod(url) == 0) {
978 				http_basic_auth(conn, "Authorization", url->user, url->pwd);
979 			} else {
980 				http_seterr(HTTP_NEED_AUTH);
981 				goto ouch;
982 			}
983 		}
984 
985 		/* other headers */
986 		if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') {
987 			if (strcasecmp(p, "auto") == 0)
988 				http_cmd(conn, "Referer: %s://%s%s\r\n",
989 				    url->scheme, host, url->doc);
990 			else
991 				http_cmd(conn, "Referer: %s\r\n", p);
992 		}
993 		if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0')
994 			http_cmd(conn, "User-Agent: %s\r\n", p);
995 		else
996 			http_cmd(conn, "User-Agent: %s\r\n", _LIBFETCH_VER);
997 		if (url->offset > 0)
998 			http_cmd(conn, "Range: bytes=%lld-\r\n", (long long)url->offset);
999 		http_cmd(conn, "\r\n");
1000 
1001 		/*
1002 		 * Force the queued request to be dispatched.  Normally, one
1003 		 * would do this with shutdown(2) but squid proxies can be
1004 		 * configured to disallow such half-closed connections.  To
1005 		 * be compatible with such configurations, fiddle with socket
1006 		 * options to force the pending data to be written.
1007 		 */
1008 #if defined(TCP_NOPUSH) && !defined(__APPLE__)
1009 		val = 0;
1010 		setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
1011 			   sizeof(val));
1012 #endif
1013 		val = 1;
1014 		setsockopt(conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
1015 		    (socklen_t)sizeof(val));
1016 
1017 		/* get reply */
1018 		switch (http_get_reply(conn)) {
1019 		case HTTP_OK:
1020 		case HTTP_PARTIAL:
1021 		case HTTP_NOT_MODIFIED:
1022 			/* fine */
1023 			break;
1024 		case HTTP_MOVED_PERM:
1025 		case HTTP_MOVED_TEMP:
1026 		case HTTP_SEE_OTHER:
1027 			/*
1028 			 * Not so fine, but we still have to read the
1029 			 * headers to get the new location.
1030 			 */
1031 			break;
1032 		case HTTP_NEED_AUTH:
1033 			if (need_auth) {
1034 				/*
1035 				 * We already sent out authorization code,
1036 				 * so there's nothing more we can do.
1037 				 */
1038 				http_seterr(conn->err);
1039 				goto ouch;
1040 			}
1041 			/* try again, but send the password this time */
1042 			if (verbose)
1043 				fetch_info("server requires authorization");
1044 			break;
1045 		case HTTP_NEED_PROXY_AUTH:
1046 			/*
1047 			 * If we're talking to a proxy, we already sent
1048 			 * our proxy authorization code, so there's
1049 			 * nothing more we can do.
1050 			 */
1051 			http_seterr(conn->err);
1052 			goto ouch;
1053 		case HTTP_BAD_RANGE:
1054 			/*
1055 			 * This can happen if we ask for 0 bytes because
1056 			 * we already have the whole file.  Consider this
1057 			 * a success for now, and check sizes later.
1058 			 */
1059 			break;
1060 		case HTTP_PROTOCOL_ERROR:
1061 			/* fall through */
1062 		case -1:
1063 			--i;
1064 			if (cached)
1065 				continue;
1066 			fetch_syserr();
1067 			goto ouch;
1068 		default:
1069 			http_seterr(conn->err);
1070 			if (!verbose)
1071 				goto ouch;
1072 			/* fall through so we can get the full error message */
1073 		}
1074 
1075 		/* get headers */
1076 		do {
1077 			switch ((h = http_next_header(conn, &p))) {
1078 			case hdr_syserror:
1079 				fetch_syserr();
1080 				goto ouch;
1081 			case hdr_error:
1082 				http_seterr(HTTP_PROTOCOL_ERROR);
1083 				goto ouch;
1084 			case hdr_connection:
1085 				/* XXX too weak? */
1086 				keep_alive = (strcasecmp(p, "keep-alive") == 0);
1087 				break;
1088 			case hdr_content_length:
1089 				http_parse_length(p, &clength);
1090 				break;
1091 			case hdr_content_range:
1092 				http_parse_range(p, &offset, &length, &size);
1093 				break;
1094 			case hdr_last_modified:
1095 				http_parse_mtime(p, &mtime);
1096 				break;
1097 			case hdr_location:
1098 				if (!HTTP_REDIRECT(conn->err))
1099 					break;
1100 				if (new)
1101 					free(new);
1102 				if (verbose)
1103 					fetch_info("%d redirect to %s", conn->err, p);
1104 				if (*p == '/')
1105 					/* absolute path */
1106 					new = fetchMakeURL(url->scheme, url->host, url->port, p,
1107 					    url->user, url->pwd);
1108 				else
1109 					new = fetchParseURL(p);
1110 				if (new == NULL) {
1111 					/* XXX should set an error code */
1112 					goto ouch;
1113 				}
1114 				if (!*new->user && !*new->pwd) {
1115 					strcpy(new->user, url->user);
1116 					strcpy(new->pwd, url->pwd);
1117 				}
1118 				new->offset = url->offset;
1119 				new->length = url->length;
1120 				break;
1121 			case hdr_transfer_encoding:
1122 				/* XXX weak test*/
1123 				chunked = (strcasecmp(p, "chunked") == 0);
1124 				break;
1125 			case hdr_www_authenticate:
1126 				if (conn->err != HTTP_NEED_AUTH)
1127 					break;
1128 				/* if we were smarter, we'd check the method and realm */
1129 				break;
1130 			case hdr_end:
1131 				/* fall through */
1132 			case hdr_unknown:
1133 				/* ignore */
1134 				break;
1135 			}
1136 		} while (h > hdr_end);
1137 
1138 		/* we need to provide authentication */
1139 		if (conn->err == HTTP_NEED_AUTH) {
1140 			e = conn->err;
1141 			need_auth = 1;
1142 			fetch_close(conn);
1143 			conn = NULL;
1144 			continue;
1145 		}
1146 
1147 		/* requested range not satisfiable */
1148 		if (conn->err == HTTP_BAD_RANGE) {
1149 			if (url->offset == size && url->length == 0) {
1150 				/* asked for 0 bytes; fake it */
1151 				offset = url->offset;
1152 				conn->err = HTTP_OK;
1153 				break;
1154 			} else {
1155 				http_seterr(conn->err);
1156 				goto ouch;
1157 			}
1158 		}
1159 
1160 		/* we have a hit or an error */
1161 		if (conn->err == HTTP_OK ||
1162 		    conn->err == HTTP_PARTIAL ||
1163 		    conn->err == HTTP_NOT_MODIFIED ||
1164 		    HTTP_ERROR(conn->err))
1165 			break;
1166 
1167 		/* all other cases: we got a redirect */
1168 		e = conn->err;
1169 		need_auth = 0;
1170 		fetch_close(conn);
1171 		conn = NULL;
1172 		if (!new)
1173 			break;
1174 		if (url != URL)
1175 			fetchFreeURL(url);
1176 		url = new;
1177 	} while (++i < n);
1178 
1179 	/* we failed, or ran out of retries */
1180 	if (conn == NULL) {
1181 		http_seterr(e);
1182 		goto ouch;
1183 	}
1184 
1185 	/* check for inconsistencies */
1186 	if (clength != -1 && length != -1 && clength != length) {
1187 		http_seterr(HTTP_PROTOCOL_ERROR);
1188 		goto ouch;
1189 	}
1190 	if (clength == -1)
1191 		clength = length;
1192 	if (clength != -1)
1193 		length = offset + clength;
1194 	if (length != -1 && size != -1 && length != size) {
1195 		http_seterr(HTTP_PROTOCOL_ERROR);
1196 		goto ouch;
1197 	}
1198 	if (size == -1)
1199 		size = length;
1200 
1201 	/* fill in stats */
1202 	if (us) {
1203 		us->size = size;
1204 		us->atime = us->mtime = mtime;
1205 	}
1206 
1207 	/* too far? */
1208 	if (URL->offset > 0 && offset > URL->offset) {
1209 		http_seterr(HTTP_PROTOCOL_ERROR);
1210 		goto ouch;
1211 	}
1212 
1213 	/* report back real offset and size */
1214 	URL->offset = offset;
1215 	URL->length = clength;
1216 
1217 	if (clength == -1 && !chunked)
1218 		keep_alive = 0;
1219 
1220 	if (conn->err == HTTP_NOT_MODIFIED) {
1221 		http_seterr(HTTP_NOT_MODIFIED);
1222 		if (keep_alive) {
1223 			fetch_cache_put(conn, fetch_close);
1224 			conn = NULL;
1225 		}
1226 		goto ouch;
1227 	}
1228 
1229 	/* wrap it up in a fetchIO */
1230 	if ((f = http_funopen(conn, chunked, keep_alive, clength)) == NULL) {
1231 		fetch_syserr();
1232 		goto ouch;
1233 	}
1234 
1235 	if (url != URL)
1236 		fetchFreeURL(url);
1237 	if (purl)
1238 		fetchFreeURL(purl);
1239 
1240 	if (HTTP_ERROR(conn->err)) {
1241 
1242 		if (keep_alive) {
1243 			char buf[512];
1244 			do {
1245 			} while (fetchIO_read(f, buf, sizeof(buf)) > 0);
1246 		}
1247 
1248 		fetchIO_close(f);
1249 		f = NULL;
1250 	}
1251 
1252 	return (f);
1253 
1254 ouch:
1255 	if (url != URL)
1256 		fetchFreeURL(url);
1257 	if (purl)
1258 		fetchFreeURL(purl);
1259 	if (conn != NULL)
1260 		fetch_close(conn);
1261 	return (NULL);
1262 }
1263 
1264 
1265 /*****************************************************************************
1266  * Entry points
1267  */
1268 
1269 /*
1270  * Retrieve and stat a file by HTTP
1271  */
1272 fetchIO *
1273 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags)
1274 {
1275 	return (http_request(URL, "GET", us, http_get_proxy(URL, flags), flags));
1276 }
1277 
1278 /*
1279  * Retrieve a file by HTTP
1280  */
1281 fetchIO *
1282 fetchGetHTTP(struct url *URL, const char *flags)
1283 {
1284 	return (fetchXGetHTTP(URL, NULL, flags));
1285 }
1286 
1287 /*
1288  * Store a file by HTTP
1289  */
1290 fetchIO *
1291 /*ARGSUSED*/
1292 fetchPutHTTP(struct url *URL __unused, const char *flags __unused)
1293 {
1294 	fprintf(stderr, "fetchPutHTTP(): not implemented\n");
1295 	return (NULL);
1296 }
1297 
1298 /*
1299  * Get an HTTP document's metadata
1300  */
1301 int
1302 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
1303 {
1304 	fetchIO *f;
1305 
1306 	f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags);
1307 	if (f == NULL)
1308 		return (-1);
1309 	fetchIO_close(f);
1310 	return (0);
1311 }
1312 
1313 enum http_states {
1314 	ST_NONE,
1315 	ST_LT,
1316 	ST_LTA,
1317 	ST_TAGA,
1318 	ST_H,
1319 	ST_R,
1320 	ST_E,
1321 	ST_F,
1322 	ST_HREF,
1323 	ST_HREFQ,
1324 	ST_TAG,
1325 	ST_TAGAX,
1326 	ST_TAGAQ
1327 };
1328 
1329 struct index_parser {
1330 	struct url_list *ue;
1331 	struct url *url;
1332 	enum http_states state;
1333 };
1334 
1335 static ssize_t
1336 parse_index(struct index_parser *parser, const char *buf, size_t len)
1337 {
1338 	char *end_attr, p = *buf;
1339 
1340 	switch (parser->state) {
1341 	case ST_NONE:
1342 		/* Plain text, not in markup */
1343 		if (p == '<')
1344 			parser->state = ST_LT;
1345 		return 1;
1346 	case ST_LT:
1347 		/* In tag -- "<" already found */
1348 		if (p == '>')
1349 			parser->state = ST_NONE;
1350 		else if (p == 'a' || p == 'A')
1351 			parser->state = ST_LTA;
1352 		else if (!isspace((unsigned char)p))
1353 			parser->state = ST_TAG;
1354 		return 1;
1355 	case ST_LTA:
1356 		/* In tag -- "<a" already found */
1357 		if (p == '>')
1358 			parser->state = ST_NONE;
1359 		else if (p == '"')
1360 			parser->state = ST_TAGAQ;
1361 		else if (isspace((unsigned char)p))
1362 			parser->state = ST_TAGA;
1363 		else
1364 			parser->state = ST_TAG;
1365 		return 1;
1366 	case ST_TAG:
1367 		/* In tag, but not "<a" -- disregard */
1368 		if (p == '>')
1369 			parser->state = ST_NONE;
1370 		return 1;
1371 	case ST_TAGA:
1372 		/* In a-tag -- "<a " already found */
1373 		if (p == '>')
1374 			parser->state = ST_NONE;
1375 		else if (p == '"')
1376 			parser->state = ST_TAGAQ;
1377 		else if (p == 'h' || p == 'H')
1378 			parser->state = ST_H;
1379 		else if (!isspace((unsigned char)p))
1380 			parser->state = ST_TAGAX;
1381 		return 1;
1382 	case ST_TAGAX:
1383 		/* In unknown keyword in a-tag */
1384 		if (p == '>')
1385 			parser->state = ST_NONE;
1386 		else if (p == '"')
1387 			parser->state = ST_TAGAQ;
1388 		else if (isspace((unsigned char)p))
1389 			parser->state = ST_TAGA;
1390 		return 1;
1391 	case ST_TAGAQ:
1392 		/* In a-tag, unknown argument for keys. */
1393 		if (p == '>')
1394 			parser->state = ST_NONE;
1395 		else if (p == '"')
1396 			parser->state = ST_TAGA;
1397 		return 1;
1398 	case ST_H:
1399 		/* In a-tag -- "<a h" already found */
1400 		if (p == '>')
1401 			parser->state = ST_NONE;
1402 		else if (p == '"')
1403 			parser->state = ST_TAGAQ;
1404 		else if (p == 'r' || p == 'R')
1405 			parser->state = ST_R;
1406 		else if (isspace((unsigned char)p))
1407 			parser->state = ST_TAGA;
1408 		else
1409 			parser->state = ST_TAGAX;
1410 		return 1;
1411 	case ST_R:
1412 		/* In a-tag -- "<a hr" already found */
1413 		if (p == '>')
1414 			parser->state = ST_NONE;
1415 		else if (p == '"')
1416 			parser->state = ST_TAGAQ;
1417 		else if (p == 'e' || p == 'E')
1418 			parser->state = ST_E;
1419 		else if (isspace((unsigned char)p))
1420 			parser->state = ST_TAGA;
1421 		else
1422 			parser->state = ST_TAGAX;
1423 		return 1;
1424 	case ST_E:
1425 		/* In a-tag -- "<a hre" already found */
1426 		if (p == '>')
1427 			parser->state = ST_NONE;
1428 		else if (p == '"')
1429 			parser->state = ST_TAGAQ;
1430 		else if (p == 'f' || p == 'F')
1431 			parser->state = ST_F;
1432 		else if (isspace((unsigned char)p))
1433 			parser->state = ST_TAGA;
1434 		else
1435 			parser->state = ST_TAGAX;
1436 		return 1;
1437 	case ST_F:
1438 		/* In a-tag -- "<a href" already found */
1439 		if (p == '>')
1440 			parser->state = ST_NONE;
1441 		else if (p == '"')
1442 			parser->state = ST_TAGAQ;
1443 		else if (p == '=')
1444 			parser->state = ST_HREF;
1445 		else if (!isspace((unsigned char)p))
1446 			parser->state = ST_TAGAX;
1447 		return 1;
1448 	case ST_HREF:
1449 		/* In a-tag -- "<a href=" already found */
1450 		if (p == '>')
1451 			parser->state = ST_NONE;
1452 		else if (p == '"')
1453 			parser->state = ST_HREFQ;
1454 		else if (!isspace((unsigned char)p))
1455 			parser->state = ST_TAGA;
1456 		return 1;
1457 	case ST_HREFQ:
1458 		/* In href of the a-tag */
1459 		end_attr = memchr(buf, '"', len);
1460 		if (end_attr == NULL)
1461 			return 0;
1462 		*end_attr = '\0';
1463 		parser->state = ST_TAGA;
1464 		if (fetch_add_entry(parser->ue, parser->url, buf, 1))
1465 			return -1;
1466 		return end_attr + 1 - buf;
1467 	}
1468 	/* NOTREACHED */
1469 	abort();
1470 }
1471 
1472 struct http_index_cache {
1473 	struct http_index_cache *next;
1474 	struct url *location;
1475 	struct url_list ue;
1476 };
1477 
1478 static struct http_index_cache *index_cache;
1479 
1480 /*
1481  * List a directory
1482  */
1483 int
1484 /*ARGSUSED*/
1485 fetchListHTTP(struct url_list *ue, struct url *url, const char *pattern __unused, const char *flags)
1486 {
1487 	fetchIO *f;
1488 	char buf[2 * PATH_MAX];
1489 	size_t buf_len, sum_processed;
1490 	ssize_t read_len, processed;
1491 	struct index_parser state;
1492 	struct http_index_cache *cache = NULL;
1493 	int do_cache, ret;
1494 
1495 	do_cache = CHECK_FLAG('c');
1496 
1497 	if (do_cache) {
1498 		for (cache = index_cache; cache != NULL; cache = cache->next) {
1499 			if (strcmp(cache->location->scheme, url->scheme))
1500 				continue;
1501 			if (strcmp(cache->location->user, url->user))
1502 				continue;
1503 			if (strcmp(cache->location->pwd, url->pwd))
1504 				continue;
1505 			if (strcmp(cache->location->host, url->host))
1506 				continue;
1507 			if (cache->location->port != url->port)
1508 				continue;
1509 			if (strcmp(cache->location->doc, url->doc))
1510 				continue;
1511 			return fetchAppendURLList(ue, &cache->ue);
1512 		}
1513 
1514 		cache = malloc(sizeof(*cache));
1515 		fetchInitURLList(&cache->ue);
1516 		cache->location = fetchCopyURL(url);
1517 	}
1518 
1519 	f = fetchGetHTTP(url, flags);
1520 	if (f == NULL) {
1521 		if (do_cache) {
1522 			fetchFreeURLList(&cache->ue);
1523 			fetchFreeURL(cache->location);
1524 			free(cache);
1525 		}
1526 		return -1;
1527 	}
1528 
1529 	state.url = url;
1530 	state.state = ST_NONE;
1531 	if (do_cache) {
1532 		state.ue = &cache->ue;
1533 	} else {
1534 		state.ue = ue;
1535 	}
1536 
1537 	buf_len = 0;
1538 
1539 	while ((read_len = fetchIO_read(f, buf + buf_len, sizeof(buf) - buf_len)) > 0) {
1540 		buf_len += read_len;
1541 		sum_processed = 0;
1542 		do {
1543 			processed = parse_index(&state, buf + sum_processed, buf_len);
1544 			if (processed == -1)
1545 				break;
1546 			buf_len -= processed;
1547 			sum_processed += processed;
1548 		} while (processed != 0 && buf_len > 0);
1549 		if (processed == -1) {
1550 			read_len = -1;
1551 			break;
1552 		}
1553 		memmove(buf, buf + sum_processed, buf_len);
1554 	}
1555 
1556 	fetchIO_close(f);
1557 
1558 	ret = read_len < 0 ? -1 : 0;
1559 
1560 	if (do_cache) {
1561 		if (ret == 0) {
1562 			cache->next = index_cache;
1563 			index_cache = cache;
1564 		}
1565 
1566 		if (fetchAppendURLList(ue, &cache->ue))
1567 			ret = -1;
1568 	}
1569 
1570 	return ret;
1571 }
1572