xref: /netbsd-src/external/bsd/fetch/dist/libfetch/http.c (revision 63372caa2f74032c7c1cb34e7cd32f28ad65b703)
1 /*	$NetBSD: http.c,v 1.6 2024/09/01 15:07:31 christos Exp $	*/
2 /*-
3  * Copyright (c) 2000-2004 Dag-Erling Co�dan Sm�rgrav
4  * Copyright (c) 2003 Thomas Klausner <wiz@NetBSD.org>
5  * Copyright (c) 2008, 2009 Joerg Sonnenberger <joerg@NetBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer
13  *    in this position and unchanged.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. The name of the author may not be used to endorse or promote products
18  *    derived from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * $FreeBSD: http.c,v 1.83 2008/02/06 11:39:55 des Exp $
32  */
33 
34 /*
35  * The following copyright applies to the base64 code:
36  *
37  *-
38  * Copyright 1997 Massachusetts Institute of Technology
39  *
40  * Permission to use, copy, modify, and distribute this software and
41  * its documentation for any purpose and without fee is hereby
42  * granted, provided that both the above copyright notice and this
43  * permission notice appear in all copies, that both the above
44  * copyright notice and this permission notice appear in all
45  * supporting documentation, and that the name of M.I.T. not be used
46  * in advertising or publicity pertaining to distribution of the
47  * software without specific, written prior permission.  M.I.T. makes
48  * no representations about the suitability of this software for any
49  * purpose.  It is provided "as is" without express or implied
50  * warranty.
51  *
52  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
53  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
54  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
55  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
56  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
57  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
58  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
59  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
60  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
61  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
62  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  */
65 
66 #if defined(__linux__) || defined(__MINT__) || defined(__FreeBSD_kernel__)
67 /* Keep this down to Linux or MiNT, it can create surprises elsewhere. */
68 /*
69    __FreeBSD_kernel__ is defined for GNU/kFreeBSD.
70    See http://glibc-bsd.alioth.debian.org/porting/PORTING .
71 */
72 #define _GNU_SOURCE
73 #endif
74 
75 #ifndef _REENTRANT
76 /* Needed for gmtime_r on Interix */
77 #define _REENTRANT
78 #endif
79 
80 #if HAVE_CONFIG_H
81 #include "config.h"
82 #endif
83 #ifndef NETBSD
84 #include <nbcompat.h>
85 #endif
86 
87 #include <sys/types.h>
88 #include <sys/socket.h>
89 
90 #include <ctype.h>
91 #include <errno.h>
92 #include <locale.h>
93 #include <stdarg.h>
94 #ifndef NETBSD
95 #include <nbcompat/stdio.h>
96 #else
97 #include <stdio.h>
98 #endif
99 #include <stdlib.h>
100 #include <string.h>
101 #include <time.h>
102 #include <unistd.h>
103 
104 #include <netinet/in.h>
105 #include <netinet/tcp.h>
106 
107 #ifndef NETBSD
108 #include <nbcompat/netdb.h>
109 #else
110 #include <netdb.h>
111 #endif
112 
113 #include <arpa/inet.h>
114 
115 #include "fetch.h"
116 #include "common.h"
117 #include "httperr.h"
118 
119 /* Maximum number of redirects to follow */
120 #define MAX_REDIRECT 5
121 
122 /* Symbolic names for reply codes we care about */
123 #define HTTP_OK			200
124 #define HTTP_PARTIAL		206
125 #define HTTP_MOVED_PERM		301
126 #define HTTP_MOVED_TEMP		302
127 #define HTTP_SEE_OTHER		303
128 #define HTTP_NOT_MODIFIED	304
129 #define HTTP_TEMP_REDIRECT	307
130 #define HTTP_NEED_AUTH		401
131 #define HTTP_NEED_PROXY_AUTH	407
132 #define HTTP_BAD_RANGE		416
133 #define HTTP_PROTOCOL_ERROR	999
134 
135 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \
136 			    || (xyz) == HTTP_MOVED_TEMP \
137 			    || (xyz) == HTTP_TEMP_REDIRECT \
138 			    || (xyz) == HTTP_SEE_OTHER)
139 
140 #define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599)
141 
142 
143 /*****************************************************************************
144  * I/O functions for decoding chunked streams
145  */
146 
147 struct httpio
148 {
149 	conn_t		*conn;		/* connection */
150 	int		 chunked;	/* chunked mode */
151 	int		 keep_alive;	/* keep-alive mode */
152 	char		*buf;		/* chunk buffer */
153 	size_t		 bufsize;	/* size of chunk buffer */
154 	ssize_t		 buflen;	/* amount of data currently in buffer */
155 	size_t		 bufpos;	/* current read offset in buffer */
156 	int		 eof;		/* end-of-file flag */
157 	int		 error;		/* error flag */
158 	size_t		 chunksize;	/* remaining size of current chunk */
159 	off_t		 contentlength;	/* remaining size of the content */
160 };
161 
162 /*
163  * Get next chunk header
164  */
165 static ssize_t
166 http_new_chunk(struct httpio *io)
167 {
168 	char *p;
169 
170 	if (fetch_getln(io->conn) == -1)
171 		return (-1);
172 
173 	if (io->conn->buflen < 2 || !isxdigit((unsigned char)*io->conn->buf))
174 		return (-1);
175 
176 	for (p = io->conn->buf; *p && !isspace((unsigned char)*p); ++p) {
177 		if (*p == ';')
178 			break;
179 		if (!isxdigit((unsigned char)*p))
180 			return (-1);
181 		if (isdigit((unsigned char)*p)) {
182 			io->chunksize = io->chunksize * 16 +
183 			    *p - '0';
184 		} else {
185 			io->chunksize = io->chunksize * 16 +
186 			    10 + tolower((unsigned char)*p) - 'a';
187 		}
188 	}
189 
190 	return (io->chunksize);
191 }
192 
193 /*
194  * Grow the input buffer to at least len bytes
195  */
196 static int
197 http_growbuf(struct httpio *io, size_t len)
198 {
199 	char *tmp;
200 
201 	if (io->bufsize >= len)
202 		return (0);
203 
204 	if ((tmp = realloc(io->buf, len)) == NULL)
205 		return (-1);
206 	io->buf = tmp;
207 	io->bufsize = len;
208 	return (0);
209 }
210 
211 /*
212  * Fill the input buffer, do chunk decoding on the fly
213  */
214 static ssize_t
215 http_fillbuf(struct httpio *io, size_t len)
216 {
217 	if (io->error)
218 		return (-1);
219 	if (io->eof)
220 		return (0);
221 
222 	if (io->contentlength >= 0 && (off_t)len > io->contentlength)
223 		len = io->contentlength;
224 
225 	if (io->chunked == 0) {
226 		if (http_growbuf(io, len) == -1)
227 			return (-1);
228 		if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
229 			io->error = 1;
230 			return (-1);
231 		}
232 		if (io->contentlength)
233 			io->contentlength -= io->buflen;
234 		io->bufpos = 0;
235 		return (io->buflen);
236 	}
237 
238 	if (io->chunksize == 0) {
239 		switch (http_new_chunk(io)) {
240 		case -1:
241 			io->error = 1;
242 			return (-1);
243 		case 0:
244 			io->eof = 1;
245 			if (fetch_getln(io->conn) == -1)
246 				return (-1);
247 			return (0);
248 		}
249 	}
250 
251 	if (len > io->chunksize)
252 		len = io->chunksize;
253 	if (http_growbuf(io, len) == -1)
254 		return (-1);
255 	if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
256 		io->error = 1;
257 		return (-1);
258 	}
259 	io->chunksize -= io->buflen;
260 	if (io->contentlength >= 0)
261 		io->contentlength -= io->buflen;
262 
263 	if (io->chunksize == 0) {
264 		char endl[2];
265 		ssize_t len2;
266 
267 		len2 = fetch_read(io->conn, endl, 2);
268 		if (len2 == 1 && fetch_read(io->conn, endl + 1, 1) != 1)
269 			return (-1);
270 		if (len2 == -1 || endl[0] != '\r' || endl[1] != '\n')
271 			return (-1);
272 	}
273 
274 	io->bufpos = 0;
275 
276 	return (io->buflen);
277 }
278 
279 /*
280  * Read function
281  */
282 static ssize_t
283 http_readfn(void *v, void *buf, size_t len)
284 {
285 	struct httpio *io = (struct httpio *)v;
286 	size_t l, pos;
287 
288 	if (io->error)
289 		return (-1);
290 	if (io->eof)
291 		return (0);
292 
293 	for (pos = 0; len > 0; pos += l, len -= l) {
294 		/* empty buffer */
295 		if (!io->buf || (ssize_t)io->bufpos == io->buflen)
296 			if (http_fillbuf(io, len) < 1)
297 				break;
298 		l = io->buflen - io->bufpos;
299 		if (len < l)
300 			l = len;
301 		memcpy((char *)buf + pos, io->buf + io->bufpos, l);
302 		io->bufpos += l;
303 	}
304 
305 	if (!pos && io->error)
306 		return (-1);
307 	return (pos);
308 }
309 
310 /*
311  * Write function
312  */
313 static ssize_t
314 http_writefn(void *v, const void *buf, size_t len)
315 {
316 	struct httpio *io = (struct httpio *)v;
317 
318 	return (fetch_write(io->conn, buf, len));
319 }
320 
321 /*
322  * Close function
323  */
324 static void
325 http_closefn(void *v)
326 {
327 	struct httpio *io = (struct httpio *)v;
328 
329 	if (io->keep_alive) {
330 		int val;
331 
332 		val = 0;
333 		setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
334 			   (socklen_t)sizeof(val));
335 			  fetch_cache_put(io->conn, fetch_close);
336 #if defined(TCP_NOPUSH) && !defined(__APPLE__)
337 		val = 1;
338 		setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
339 		    sizeof(val));
340 #endif
341 	} else {
342 		fetch_close(io->conn);
343 	}
344 
345 	free(io->buf);
346 	free(io);
347 }
348 
349 /*
350  * Wrap a file descriptor up
351  */
352 static fetchIO *
353 http_funopen(conn_t *conn, int chunked, int keep_alive, off_t clength)
354 {
355 	struct httpio *io;
356 	fetchIO *f;
357 
358 	if ((io = calloc(1, sizeof(*io))) == NULL) {
359 		fetch_syserr();
360 		return (NULL);
361 	}
362 	io->conn = conn;
363 	io->chunked = chunked;
364 	io->contentlength = clength;
365 	io->keep_alive = keep_alive;
366 	f = fetchIO_unopen(io, http_readfn, http_writefn, http_closefn);
367 	if (f == NULL) {
368 		fetch_syserr();
369 		free(io);
370 		return (NULL);
371 	}
372 	return (f);
373 }
374 
375 
376 /*****************************************************************************
377  * Helper functions for talking to the server and parsing its replies
378  */
379 
380 /* Header types */
381 typedef enum {
382 	hdr_syserror = -2,
383 	hdr_error = -1,
384 	hdr_end = 0,
385 	hdr_unknown = 1,
386 	hdr_connection,
387 	hdr_content_length,
388 	hdr_content_range,
389 	hdr_last_modified,
390 	hdr_location,
391 	hdr_transfer_encoding,
392 	hdr_www_authenticate
393 } hdr_t;
394 
395 /* Names of interesting headers */
396 static struct {
397 	hdr_t		 num;
398 	const char	*name;
399 } hdr_names[] = {
400 	{ hdr_connection,		"Connection" },
401 	{ hdr_content_length,		"Content-Length" },
402 	{ hdr_content_range,		"Content-Range" },
403 	{ hdr_last_modified,		"Last-Modified" },
404 	{ hdr_location,			"Location" },
405 	{ hdr_transfer_encoding,	"Transfer-Encoding" },
406 	{ hdr_www_authenticate,		"WWW-Authenticate" },
407 	{ hdr_unknown,			NULL },
408 };
409 
410 /*
411  * Send a formatted line; optionally echo to terminal
412  */
413 LIBFETCH_PRINTFLIKE(2, 3)
414 static int
415 http_cmd(conn_t *conn, const char *fmt, ...)
416 {
417 	va_list ap;
418 	size_t len;
419 	char *msg;
420 	ssize_t r;
421 
422 	va_start(ap, fmt);
423 	len = vasprintf(&msg, fmt, ap);
424 	va_end(ap);
425 
426 	if (msg == NULL) {
427 		errno = ENOMEM;
428 		fetch_syserr();
429 		return (-1);
430 	}
431 
432 	r = fetch_write(conn, msg, len);
433 	free(msg);
434 
435 	if (r == -1) {
436 		fetch_syserr();
437 		return (-1);
438 	}
439 
440 	return (0);
441 }
442 
443 /*
444  * Get and parse status line
445  */
446 static int
447 http_get_reply(conn_t *conn)
448 {
449 	char *p;
450 
451 	if (fetch_getln(conn) == -1)
452 		return (-1);
453 	/*
454 	 * A valid status line looks like "HTTP/m.n xyz reason" where m
455 	 * and n are the major and minor protocol version numbers and xyz
456 	 * is the reply code.
457 	 * Unfortunately, there are servers out there (NCSA 1.5.1, to name
458 	 * just one) that do not send a version number, so we can't rely
459 	 * on finding one, but if we do, insist on it being 1.0 or 1.1.
460 	 * We don't care about the reason phrase.
461 	 */
462 	if (strncmp(conn->buf, "HTTP", 4) != 0)
463 		return (HTTP_PROTOCOL_ERROR);
464 	p = conn->buf + 4;
465 	if (*p == '/') {
466 		if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1'))
467 			return (HTTP_PROTOCOL_ERROR);
468 		p += 4;
469 	}
470 	if (*p != ' ' ||
471 	    !isdigit((unsigned char)p[1]) ||
472 	    !isdigit((unsigned char)p[2]) ||
473 	    !isdigit((unsigned char)p[3]))
474 		return (HTTP_PROTOCOL_ERROR);
475 
476 	conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0');
477 	return (conn->err);
478 }
479 
480 /*
481  * Check a header; if the type matches the given string, return a pointer
482  * to the beginning of the value.
483  */
484 static const char *
485 http_match(const char *str, const char *hdr)
486 {
487 	while (*str && *hdr &&
488 	    tolower((unsigned char)*str++) == tolower((unsigned char)*hdr++))
489 		/* nothing */;
490 	if (*str || *hdr != ':')
491 		return (NULL);
492 	while (*hdr && isspace((unsigned char)*++hdr))
493 		/* nothing */;
494 	return (hdr);
495 }
496 
497 /*
498  * Get the next header and return the appropriate symbolic code.
499  */
500 static hdr_t
501 http_next_header(conn_t *conn, const char **p)
502 {
503 	int i;
504 
505 	if (fetch_getln(conn) == -1)
506 		return (hdr_syserror);
507 	while (conn->buflen && isspace((unsigned char)conn->buf[conn->buflen - 1]))
508 		conn->buflen--;
509 	conn->buf[conn->buflen] = '\0';
510 	if (conn->buflen == 0)
511 		return (hdr_end);
512 	/*
513 	 * We could check for malformed headers but we don't really care.
514 	 * A valid header starts with a token immediately followed by a
515 	 * colon; a token is any sequence of non-control, non-whitespace
516 	 * characters except "()<>@,;:\\\"{}".
517 	 */
518 	for (i = 0; hdr_names[i].num != hdr_unknown; i++)
519 		if ((*p = http_match(hdr_names[i].name, conn->buf)) != NULL)
520 			return (hdr_names[i].num);
521 	return (hdr_unknown);
522 }
523 
524 /*
525  * Parse a last-modified header
526  */
527 static int
528 http_parse_mtime(const char *p, time_t *mtime)
529 {
530 	struct tm tm;
531 	char *r;
532 
533 #ifdef LC_C_LOCALE
534 	r = strptime_l(p, "%a, %d %b %Y %H:%M:%S GMT", &tm, LC_C_LOCALE);
535 #else
536 	char *locale;
537 
538 	locale = strdup(setlocale(LC_TIME, NULL));
539 	if (locale == NULL)
540 		return (-1);
541 
542 	setlocale(LC_TIME, "C");
543 	r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
544 	/* XXX should add support for date-2 and date-3 */
545 	setlocale(LC_TIME, locale);
546 	free(locale);
547 #endif
548  	if (r == NULL)
549  		return (-1);
550 	*mtime = timegm(&tm);
551 	return (0);
552 }
553 
554 /*
555  * Parse a content-length header
556  */
557 static int
558 http_parse_length(const char *p, off_t *length)
559 {
560 	off_t len;
561 
562 	for (len = 0; *p && isdigit((unsigned char)*p); ++p)
563 		len = len * 10 + (*p - '0');
564 	if (*p)
565 		return (-1);
566 	*length = len;
567 	return (0);
568 }
569 
570 /*
571  * Parse a content-range header
572  */
573 static int
574 http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size)
575 {
576 	off_t first, last, len;
577 
578 	if (strncasecmp(p, "bytes ", 6) != 0)
579 		return (-1);
580 	p += 6;
581 	if (*p == '*') {
582 		first = last = -1;
583 		++p;
584 	} else {
585 		for (first = 0; *p && isdigit((unsigned char)*p); ++p)
586 			first = first * 10 + *p - '0';
587 		if (*p != '-')
588 			return (-1);
589 		for (last = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
590 			last = last * 10 + *p - '0';
591 	}
592 	if (first > last || *p != '/')
593 		return (-1);
594 	for (len = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
595 		len = len * 10 + *p - '0';
596 	if (*p || len < last - first + 1)
597 		return (-1);
598 	if (first == -1)
599 		*length = 0;
600 	else
601 		*length = last - first + 1;
602 	*offset = first;
603 	*size = len;
604 	return (0);
605 }
606 
607 
608 /*****************************************************************************
609  * Helper functions for authorization
610  */
611 
612 /*
613  * Base64 encoding
614  */
615 static char *
616 http_base64(const char *src)
617 {
618 	static const char base64[] =
619 	    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
620 	    "abcdefghijklmnopqrstuvwxyz"
621 	    "0123456789+/";
622 	char *str, *dst;
623 	size_t l;
624 	unsigned int t, r;
625 
626 	l = strlen(src);
627 	if ((str = malloc(((l + 2) / 3) * 4 + 1)) == NULL)
628 		return (NULL);
629 	dst = str;
630 	r = 0;
631 
632 	while (l >= 3) {
633 		t = (src[0] << 16) | (src[1] << 8) | src[2];
634 		dst[0] = base64[(t >> 18) & 0x3f];
635 		dst[1] = base64[(t >> 12) & 0x3f];
636 		dst[2] = base64[(t >> 6) & 0x3f];
637 		dst[3] = base64[(t >> 0) & 0x3f];
638 		src += 3; l -= 3;
639 		dst += 4; r += 4;
640 	}
641 
642 	switch (l) {
643 	case 2:
644 		t = (src[0] << 16) | (src[1] << 8);
645 		dst[0] = base64[(t >> 18) & 0x3f];
646 		dst[1] = base64[(t >> 12) & 0x3f];
647 		dst[2] = base64[(t >> 6) & 0x3f];
648 		dst[3] = '=';
649 		dst += 4;
650 		r += 4;
651 		break;
652 	case 1:
653 		t = src[0] << 16;
654 		dst[0] = base64[(t >> 18) & 0x3f];
655 		dst[1] = base64[(t >> 12) & 0x3f];
656 		dst[2] = dst[3] = '=';
657 		dst += 4;
658 		r += 4;
659 		break;
660 	case 0:
661 		break;
662 	}
663 
664 	*dst = 0;
665 	return (str);
666 }
667 
668 /*
669  * Encode username and password
670  */
671 static int
672 http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd)
673 {
674 	char *upw, *auth;
675 	int r;
676 
677 	if (asprintf(&upw, "%s:%s", usr, pwd) == -1)
678 		return (-1);
679 	auth = http_base64(upw);
680 	free(upw);
681 	if (auth == NULL)
682 		return (-1);
683 	r = http_cmd(conn, "%s: Basic %s\r\n", hdr, auth);
684 	free(auth);
685 	return (r);
686 }
687 
688 /*
689  * Send an authorization header
690  */
691 static int
692 http_authorize(conn_t *conn, const char *hdr, const char *p)
693 {
694 	/* basic authorization */
695 	if (strncasecmp(p, "basic:", 6) == 0) {
696 		char *user, *pwd, *str;
697 		int r;
698 
699 		/* skip realm */
700 		for (p += 6; *p && *p != ':'; ++p)
701 			/* nothing */ ;
702 		if (!*p || strchr(++p, ':') == NULL)
703 			return (-1);
704 		if ((str = strdup(p)) == NULL)
705 			return (-1); /* XXX */
706 		user = str;
707 		pwd = strchr(str, ':');
708 		*pwd++ = '\0';
709 		r = http_basic_auth(conn, hdr, user, pwd);
710 		free(str);
711 		return (r);
712 	}
713 	return (-1);
714 }
715 
716 
717 /*****************************************************************************
718  * Helper functions for connecting to a server or proxy
719  */
720 
721 /*
722  * Connect to the correct HTTP server or proxy.
723  */
724 static conn_t *
725 http_connect(struct url *URL, struct url *purl, const char *flags, int *cached)
726 {
727 	struct url *curl;
728 	conn_t *conn;
729 	hdr_t h;
730 	const char *p;
731 	int af, verbose;
732 #if defined(TCP_NOPUSH) && !defined(__APPLE__)
733 	int val;
734 #endif
735 
736 	*cached = 0;
737 
738 #ifdef INET6
739 	af = AF_UNSPEC;
740 #else
741 	af = AF_INET;
742 #endif
743 
744 	verbose = CHECK_FLAG('v');
745 	if (CHECK_FLAG('4'))
746 		af = AF_INET;
747 #ifdef INET6
748 	else if (CHECK_FLAG('6'))
749 		af = AF_INET6;
750 #endif
751 
752 	curl = (purl != NULL) ? purl : URL;
753 	if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) {
754 		URL = purl;
755 	} else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
756 		/* can't talk http to an ftp server */
757 		/* XXX should set an error code */
758 		return (NULL);
759 	}
760 
761 	if ((conn = fetch_cache_get(curl, af)) != NULL) {
762 		*cached = 1;
763 		return (conn);
764 	}
765 
766 	if ((conn = fetch_connect(curl, af, verbose)) == NULL)
767 		/* fetch_connect() has already set an error code */
768 		return (NULL);
769 	if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 && purl) {
770 		http_cmd(conn, "CONNECT %s:%d HTTP/1.1\r\n",
771 				URL->host, URL->port);
772 		http_cmd(conn, "Host: %s:%d\r\n",
773 				URL->host, URL->port);
774 		/* proxy authorization */
775 		if (*purl->user || *purl->pwd)
776 			http_basic_auth(conn, "Proxy-Authorization",
777 			    purl->user, purl->pwd);
778 		else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0')
779 			http_authorize(conn, "Proxy-Authorization", p);
780 		http_cmd(conn, "\r\n");
781 		if (http_get_reply(conn) != HTTP_OK) {
782 			http_seterr(conn->err);
783 			goto ouch;
784 		}
785 		/* Read and discard the rest of the proxy response (if any) */
786 		do {
787 			switch ((h = http_next_header(conn, &p))) {
788 			case hdr_syserror:
789 				fetch_syserr();
790 				goto ouch;
791 			case hdr_error:
792 				http_seterr(HTTP_PROTOCOL_ERROR);
793 				goto ouch;
794 			default:
795 				/* ignore */ ;
796 			}
797 		} while (h > hdr_end);
798 	}
799 	if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 &&
800 	    fetch_ssl(conn, URL, verbose) == -1) {
801 		/* grrr */
802 #ifdef EAUTH
803 		errno = EAUTH;
804 #else
805 		errno = EPERM;
806 #endif
807 		goto ouch;
808 	}
809 
810 #if defined(TCP_NOPUSH) && !defined(__APPLE__)
811 	val = 1;
812 	setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val));
813 #endif
814 
815 	return (conn);
816 ouch:
817 	fetch_close(conn);
818 	return (NULL);
819 }
820 
821 static struct url *
822 http_get_proxy(struct url * url, const char *flags)
823 {
824 	struct url *purl;
825 	char *p;
826 
827 	if (flags != NULL && strchr(flags, 'd') != NULL)
828 		return (NULL);
829 	if (fetch_no_proxy_match(url->host))
830 		return (NULL);
831 	if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) &&
832 	    *p && (purl = fetchParseURL(p))) {
833 		if (!*purl->scheme)
834 			strcpy(purl->scheme, SCHEME_HTTP);
835 		if (!purl->port)
836 			purl->port = fetch_default_proxy_port(purl->scheme);
837 		if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0)
838 			return (purl);
839 		fetchFreeURL(purl);
840 	}
841 	return (NULL);
842 }
843 
844 static void
845 set_if_modified_since(conn_t *conn, time_t last_modified)
846 {
847 	static const char weekdays[] = "SunMonTueWedThuFriSat";
848 	static const char months[] = "JanFebMarAprMayJunJulAugSepOctNovDec";
849 	struct tm tm;
850 	char buf[80];
851 	gmtime_r(&last_modified, &tm);
852 	snprintf(buf, sizeof(buf), "%.3s, %02d %.3s %4ld %02d:%02d:%02d GMT",
853 	    weekdays + tm.tm_wday * 3, tm.tm_mday, months + tm.tm_mon * 3,
854 	    (long)tm.tm_year + 1900, tm.tm_hour, tm.tm_min, tm.tm_sec);
855 	http_cmd(conn, "If-Modified-Since: %s\r\n", buf);
856 }
857 
858 
859 /*****************************************************************************
860  * Core
861  */
862 
863 /*
864  * Send a request and process the reply
865  *
866  * XXX This function is way too long, the do..while loop should be split
867  * XXX off into a separate function.
868  */
869 fetchIO *
870 http_request(struct url *URL, const char *op, struct url_stat *us,
871     struct url *purl, const char *flags)
872 {
873 	conn_t *conn;
874 	struct url *url, *new;
875 	int chunked, direct, if_modified_since, need_auth, noredirect;
876 	int keep_alive, verbose, cached;
877 	int e, i, n, val;
878 	off_t offset, clength, length, size;
879 	time_t mtime;
880 	const char *p;
881 	fetchIO *f;
882 	hdr_t h;
883 	char hbuf[URL_HOSTLEN + 7], *host;
884 
885 	direct = CHECK_FLAG('d');
886 	noredirect = CHECK_FLAG('A');
887 	verbose = CHECK_FLAG('v');
888 	if_modified_since = CHECK_FLAG('i');
889 	keep_alive = 0;
890 
891 	if (direct && purl) {
892 		fetchFreeURL(purl);
893 		purl = NULL;
894 	}
895 
896 	/* try the provided URL first */
897 	url = URL;
898 
899 	/* if the A flag is set, we only get one try */
900 	n = noredirect ? 1 : MAX_REDIRECT;
901 	i = 0;
902 
903 	e = HTTP_PROTOCOL_ERROR;
904 	need_auth = 0;
905 	do {
906 		new = NULL;
907 		chunked = 0;
908 		offset = 0;
909 		clength = -1;
910 		length = -1;
911 		size = -1;
912 		mtime = 0;
913 
914 		/* check port */
915 		if (!url->port)
916 			url->port = fetch_default_port(url->scheme);
917 
918 		/* were we redirected to an FTP URL? */
919 		if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) {
920 			if (strcmp(op, "GET") == 0)
921 				return (ftp_request(url, "RETR", NULL, us, purl, flags));
922 			else if (strcmp(op, "HEAD") == 0)
923 				return (ftp_request(url, "STAT", NULL, us, purl, flags));
924 		}
925 
926 		/* connect to server or proxy */
927 		if ((conn = http_connect(url, purl, flags, &cached)) == NULL)
928 			goto ouch;
929 
930 		host = url->host;
931 #ifdef INET6
932 		if (strchr(url->host, ':')) {
933 			snprintf(hbuf, sizeof(hbuf), "[%s]", url->host);
934 			host = hbuf;
935 		}
936 #endif
937 		if (url->port != fetch_default_port(url->scheme)) {
938 			if (host != hbuf) {
939 				strcpy(hbuf, host);
940 				host = hbuf;
941 			}
942 			snprintf(hbuf + strlen(hbuf),
943 			    sizeof(hbuf) - strlen(hbuf), ":%d", url->port);
944 		}
945 
946 		/* send request */
947 		if (verbose)
948 			fetch_info("requesting %s://%s%s",
949 			    url->scheme, host, url->doc);
950 		if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) {
951 			http_cmd(conn, "%s %s://%s%s HTTP/1.1\r\n",
952 			    op, url->scheme, host, url->doc);
953 		} else {
954 			http_cmd(conn, "%s %s HTTP/1.1\r\n",
955 			    op, url->doc);
956 		}
957 
958 		if (if_modified_since && url->last_modified > 0)
959 			set_if_modified_since(conn, url->last_modified);
960 
961 		/* virtual host */
962 		http_cmd(conn, "Host: %s\r\n", host);
963 
964 		/* proxy authorization */
965 		if (purl) {
966 			if (*purl->user || *purl->pwd)
967 				http_basic_auth(conn, "Proxy-Authorization",
968 				    purl->user, purl->pwd);
969 			else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0')
970 				http_authorize(conn, "Proxy-Authorization", p);
971 		}
972 
973 		/* server authorization */
974 		if (need_auth || *url->user || *url->pwd) {
975 			if (*url->user || *url->pwd)
976 				http_basic_auth(conn, "Authorization", url->user, url->pwd);
977 			else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0')
978 				http_authorize(conn, "Authorization", p);
979 			else if (fetchAuthMethod && fetchAuthMethod(url) == 0) {
980 				http_basic_auth(conn, "Authorization", url->user, url->pwd);
981 			} else {
982 				http_seterr(HTTP_NEED_AUTH);
983 				goto ouch;
984 			}
985 		}
986 
987 		/* other headers */
988 		if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') {
989 			if (strcasecmp(p, "auto") == 0)
990 				http_cmd(conn, "Referer: %s://%s%s\r\n",
991 				    url->scheme, host, url->doc);
992 			else
993 				http_cmd(conn, "Referer: %s\r\n", p);
994 		}
995 		if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0')
996 			http_cmd(conn, "User-Agent: %s\r\n", p);
997 		else
998 			http_cmd(conn, "User-Agent: %s\r\n", _LIBFETCH_VER);
999 		if (url->offset > 0)
1000 			http_cmd(conn, "Range: bytes=%lld-\r\n", (long long)url->offset);
1001 		http_cmd(conn, "\r\n");
1002 
1003 		/*
1004 		 * Force the queued request to be dispatched.  Normally, one
1005 		 * would do this with shutdown(2) but squid proxies can be
1006 		 * configured to disallow such half-closed connections.  To
1007 		 * be compatible with such configurations, fiddle with socket
1008 		 * options to force the pending data to be written.
1009 		 */
1010 #if defined(TCP_NOPUSH) && !defined(__APPLE__)
1011 		val = 0;
1012 		setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
1013 			   sizeof(val));
1014 #endif
1015 		val = 1;
1016 		setsockopt(conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
1017 		    (socklen_t)sizeof(val));
1018 
1019 		/* get reply */
1020 		switch (http_get_reply(conn)) {
1021 		case HTTP_OK:
1022 		case HTTP_PARTIAL:
1023 		case HTTP_NOT_MODIFIED:
1024 			/* fine */
1025 			break;
1026 		case HTTP_MOVED_PERM:
1027 		case HTTP_MOVED_TEMP:
1028 		case HTTP_SEE_OTHER:
1029 			/*
1030 			 * Not so fine, but we still have to read the
1031 			 * headers to get the new location.
1032 			 */
1033 			break;
1034 		case HTTP_NEED_AUTH:
1035 			if (need_auth) {
1036 				/*
1037 				 * We already sent out authorization code,
1038 				 * so there's nothing more we can do.
1039 				 */
1040 				http_seterr(conn->err);
1041 				goto ouch;
1042 			}
1043 			/* try again, but send the password this time */
1044 			if (verbose)
1045 				fetch_info("server requires authorization");
1046 			break;
1047 		case HTTP_NEED_PROXY_AUTH:
1048 			/*
1049 			 * If we're talking to a proxy, we already sent
1050 			 * our proxy authorization code, so there's
1051 			 * nothing more we can do.
1052 			 */
1053 			http_seterr(conn->err);
1054 			goto ouch;
1055 		case HTTP_BAD_RANGE:
1056 			/*
1057 			 * This can happen if we ask for 0 bytes because
1058 			 * we already have the whole file.  Consider this
1059 			 * a success for now, and check sizes later.
1060 			 */
1061 			break;
1062 		case HTTP_PROTOCOL_ERROR:
1063 			/* fall through */
1064 		case -1:
1065 			--i;
1066 			if (cached)
1067 				continue;
1068 			fetch_syserr();
1069 			goto ouch;
1070 		default:
1071 			http_seterr(conn->err);
1072 			if (!verbose)
1073 				goto ouch;
1074 			/* fall through so we can get the full error message */
1075 		}
1076 
1077 		/* get headers */
1078 		do {
1079 			switch ((h = http_next_header(conn, &p))) {
1080 			case hdr_syserror:
1081 				fetch_syserr();
1082 				goto ouch;
1083 			case hdr_error:
1084 				http_seterr(HTTP_PROTOCOL_ERROR);
1085 				goto ouch;
1086 			case hdr_connection:
1087 				/* XXX too weak? */
1088 				keep_alive = (strcasecmp(p, "keep-alive") == 0);
1089 				break;
1090 			case hdr_content_length:
1091 				http_parse_length(p, &clength);
1092 				break;
1093 			case hdr_content_range:
1094 				http_parse_range(p, &offset, &length, &size);
1095 				break;
1096 			case hdr_last_modified:
1097 				http_parse_mtime(p, &mtime);
1098 				break;
1099 			case hdr_location:
1100 				if (!HTTP_REDIRECT(conn->err))
1101 					break;
1102 				if (new)
1103 					free(new);
1104 				if (verbose)
1105 					fetch_info("%d redirect to %s", conn->err, p);
1106 				if (*p == '/')
1107 					/* absolute path */
1108 					new = fetchMakeURL(url->scheme, url->host, url->port, p,
1109 					    url->user, url->pwd);
1110 				else
1111 					new = fetchParseURL(p);
1112 				if (new == NULL) {
1113 					/* XXX should set an error code */
1114 					goto ouch;
1115 				}
1116 				if (!*new->user && !*new->pwd) {
1117 					strcpy(new->user, url->user);
1118 					strcpy(new->pwd, url->pwd);
1119 				}
1120 				new->offset = url->offset;
1121 				new->length = url->length;
1122 				break;
1123 			case hdr_transfer_encoding:
1124 				/* XXX weak test*/
1125 				chunked = (strcasecmp(p, "chunked") == 0);
1126 				break;
1127 			case hdr_www_authenticate:
1128 				if (conn->err != HTTP_NEED_AUTH)
1129 					break;
1130 				/* if we were smarter, we'd check the method and realm */
1131 				break;
1132 			case hdr_end:
1133 				/* fall through */
1134 			case hdr_unknown:
1135 				/* ignore */
1136 				break;
1137 			}
1138 		} while (h > hdr_end);
1139 
1140 		/* we need to provide authentication */
1141 		if (conn->err == HTTP_NEED_AUTH) {
1142 			e = conn->err;
1143 			need_auth = 1;
1144 			fetch_close(conn);
1145 			conn = NULL;
1146 			continue;
1147 		}
1148 
1149 		/* requested range not satisfiable */
1150 		if (conn->err == HTTP_BAD_RANGE) {
1151 			if (url->offset == size && url->length == 0) {
1152 				/* asked for 0 bytes; fake it */
1153 				offset = url->offset;
1154 				conn->err = HTTP_OK;
1155 				break;
1156 			} else {
1157 				http_seterr(conn->err);
1158 				goto ouch;
1159 			}
1160 		}
1161 
1162 		/* we have a hit or an error */
1163 		if (conn->err == HTTP_OK ||
1164 		    conn->err == HTTP_PARTIAL ||
1165 		    conn->err == HTTP_NOT_MODIFIED ||
1166 		    HTTP_ERROR(conn->err))
1167 			break;
1168 
1169 		/* all other cases: we got a redirect */
1170 		e = conn->err;
1171 		need_auth = 0;
1172 		fetch_close(conn);
1173 		conn = NULL;
1174 		if (!new)
1175 			break;
1176 		if (url != URL)
1177 			fetchFreeURL(url);
1178 		url = new;
1179 	} while (++i < n);
1180 
1181 	/* we failed, or ran out of retries */
1182 	if (conn == NULL) {
1183 		http_seterr(e);
1184 		goto ouch;
1185 	}
1186 
1187 	/* check for inconsistencies */
1188 	if (clength != -1 && length != -1 && clength != length) {
1189 		http_seterr(HTTP_PROTOCOL_ERROR);
1190 		goto ouch;
1191 	}
1192 	if (clength == -1)
1193 		clength = length;
1194 	if (clength != -1)
1195 		length = offset + clength;
1196 	if (length != -1 && size != -1 && length != size) {
1197 		http_seterr(HTTP_PROTOCOL_ERROR);
1198 		goto ouch;
1199 	}
1200 	if (size == -1)
1201 		size = length;
1202 
1203 	/* fill in stats */
1204 	if (us) {
1205 		us->size = size;
1206 		us->atime = us->mtime = mtime;
1207 	}
1208 
1209 	/* too far? */
1210 	if (URL->offset > 0 && offset > URL->offset) {
1211 		http_seterr(HTTP_PROTOCOL_ERROR);
1212 		goto ouch;
1213 	}
1214 
1215 	/* report back real offset and size */
1216 	URL->offset = offset;
1217 	URL->length = clength;
1218 
1219 	if (clength == -1 && !chunked)
1220 		keep_alive = 0;
1221 
1222 	if (conn->err == HTTP_NOT_MODIFIED) {
1223 		http_seterr(HTTP_NOT_MODIFIED);
1224 		if (keep_alive) {
1225 			fetch_cache_put(conn, fetch_close);
1226 			conn = NULL;
1227 		}
1228 		goto ouch;
1229 	}
1230 
1231 	/* wrap it up in a fetchIO */
1232 	if ((f = http_funopen(conn, chunked, keep_alive, clength)) == NULL) {
1233 		fetch_syserr();
1234 		goto ouch;
1235 	}
1236 
1237 	if (url != URL)
1238 		fetchFreeURL(url);
1239 	if (purl)
1240 		fetchFreeURL(purl);
1241 
1242 	if (HTTP_ERROR(conn->err)) {
1243 
1244 		if (keep_alive) {
1245 			char buf[512];
1246 			do {
1247 			} while (fetchIO_read(f, buf, sizeof(buf)) > 0);
1248 		}
1249 
1250 		fetchIO_close(f);
1251 		f = NULL;
1252 	}
1253 
1254 	return (f);
1255 
1256 ouch:
1257 	if (url != URL)
1258 		fetchFreeURL(url);
1259 	if (purl)
1260 		fetchFreeURL(purl);
1261 	if (conn != NULL)
1262 		fetch_close(conn);
1263 	return (NULL);
1264 }
1265 
1266 
1267 /*****************************************************************************
1268  * Entry points
1269  */
1270 
1271 /*
1272  * Retrieve and stat a file by HTTP
1273  */
1274 fetchIO *
1275 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags)
1276 {
1277 	return (http_request(URL, "GET", us, http_get_proxy(URL, flags), flags));
1278 }
1279 
1280 /*
1281  * Retrieve a file by HTTP
1282  */
1283 fetchIO *
1284 fetchGetHTTP(struct url *URL, const char *flags)
1285 {
1286 	return (fetchXGetHTTP(URL, NULL, flags));
1287 }
1288 
1289 /*
1290  * Store a file by HTTP
1291  */
1292 fetchIO *
1293 /*ARGSUSED*/
1294 fetchPutHTTP(struct url *URL __unused, const char *flags __unused)
1295 {
1296 	fprintf(stderr, "fetchPutHTTP(): not implemented\n");
1297 	return (NULL);
1298 }
1299 
1300 /*
1301  * Get an HTTP document's metadata
1302  */
1303 int
1304 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
1305 {
1306 	fetchIO *f;
1307 
1308 	f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags);
1309 	if (f == NULL)
1310 		return (-1);
1311 	fetchIO_close(f);
1312 	return (0);
1313 }
1314 
1315 enum http_states {
1316 	ST_NONE,
1317 	ST_LT,
1318 	ST_LTA,
1319 	ST_TAGA,
1320 	ST_H,
1321 	ST_R,
1322 	ST_E,
1323 	ST_F,
1324 	ST_HREF,
1325 	ST_HREFQ,
1326 	ST_TAG,
1327 	ST_TAGAX,
1328 	ST_TAGAQ
1329 };
1330 
1331 struct index_parser {
1332 	struct url_list *ue;
1333 	struct url *url;
1334 	enum http_states state;
1335 };
1336 
1337 static ssize_t
1338 parse_index(struct index_parser *parser, const char *buf, size_t len)
1339 {
1340 	char *end_attr, p = *buf;
1341 
1342 	switch (parser->state) {
1343 	case ST_NONE:
1344 		/* Plain text, not in markup */
1345 		if (p == '<')
1346 			parser->state = ST_LT;
1347 		return 1;
1348 	case ST_LT:
1349 		/* In tag -- "<" already found */
1350 		if (p == '>')
1351 			parser->state = ST_NONE;
1352 		else if (p == 'a' || p == 'A')
1353 			parser->state = ST_LTA;
1354 		else if (!isspace((unsigned char)p))
1355 			parser->state = ST_TAG;
1356 		return 1;
1357 	case ST_LTA:
1358 		/* In tag -- "<a" already found */
1359 		if (p == '>')
1360 			parser->state = ST_NONE;
1361 		else if (p == '"')
1362 			parser->state = ST_TAGAQ;
1363 		else if (isspace((unsigned char)p))
1364 			parser->state = ST_TAGA;
1365 		else
1366 			parser->state = ST_TAG;
1367 		return 1;
1368 	case ST_TAG:
1369 		/* In tag, but not "<a" -- disregard */
1370 		if (p == '>')
1371 			parser->state = ST_NONE;
1372 		return 1;
1373 	case ST_TAGA:
1374 		/* In a-tag -- "<a " already found */
1375 		if (p == '>')
1376 			parser->state = ST_NONE;
1377 		else if (p == '"')
1378 			parser->state = ST_TAGAQ;
1379 		else if (p == 'h' || p == 'H')
1380 			parser->state = ST_H;
1381 		else if (!isspace((unsigned char)p))
1382 			parser->state = ST_TAGAX;
1383 		return 1;
1384 	case ST_TAGAX:
1385 		/* In unknown keyword in a-tag */
1386 		if (p == '>')
1387 			parser->state = ST_NONE;
1388 		else if (p == '"')
1389 			parser->state = ST_TAGAQ;
1390 		else if (isspace((unsigned char)p))
1391 			parser->state = ST_TAGA;
1392 		return 1;
1393 	case ST_TAGAQ:
1394 		/* In a-tag, unknown argument for keys. */
1395 		if (p == '>')
1396 			parser->state = ST_NONE;
1397 		else if (p == '"')
1398 			parser->state = ST_TAGA;
1399 		return 1;
1400 	case ST_H:
1401 		/* In a-tag -- "<a h" already found */
1402 		if (p == '>')
1403 			parser->state = ST_NONE;
1404 		else if (p == '"')
1405 			parser->state = ST_TAGAQ;
1406 		else if (p == 'r' || p == 'R')
1407 			parser->state = ST_R;
1408 		else if (isspace((unsigned char)p))
1409 			parser->state = ST_TAGA;
1410 		else
1411 			parser->state = ST_TAGAX;
1412 		return 1;
1413 	case ST_R:
1414 		/* In a-tag -- "<a hr" already found */
1415 		if (p == '>')
1416 			parser->state = ST_NONE;
1417 		else if (p == '"')
1418 			parser->state = ST_TAGAQ;
1419 		else if (p == 'e' || p == 'E')
1420 			parser->state = ST_E;
1421 		else if (isspace((unsigned char)p))
1422 			parser->state = ST_TAGA;
1423 		else
1424 			parser->state = ST_TAGAX;
1425 		return 1;
1426 	case ST_E:
1427 		/* In a-tag -- "<a hre" already found */
1428 		if (p == '>')
1429 			parser->state = ST_NONE;
1430 		else if (p == '"')
1431 			parser->state = ST_TAGAQ;
1432 		else if (p == 'f' || p == 'F')
1433 			parser->state = ST_F;
1434 		else if (isspace((unsigned char)p))
1435 			parser->state = ST_TAGA;
1436 		else
1437 			parser->state = ST_TAGAX;
1438 		return 1;
1439 	case ST_F:
1440 		/* In a-tag -- "<a href" already found */
1441 		if (p == '>')
1442 			parser->state = ST_NONE;
1443 		else if (p == '"')
1444 			parser->state = ST_TAGAQ;
1445 		else if (p == '=')
1446 			parser->state = ST_HREF;
1447 		else if (!isspace((unsigned char)p))
1448 			parser->state = ST_TAGAX;
1449 		return 1;
1450 	case ST_HREF:
1451 		/* In a-tag -- "<a href=" already found */
1452 		if (p == '>')
1453 			parser->state = ST_NONE;
1454 		else if (p == '"')
1455 			parser->state = ST_HREFQ;
1456 		else if (!isspace((unsigned char)p))
1457 			parser->state = ST_TAGA;
1458 		return 1;
1459 	case ST_HREFQ:
1460 		/* In href of the a-tag */
1461 		end_attr = memchr(buf, '"', len);
1462 		if (end_attr == NULL)
1463 			return 0;
1464 		*end_attr = '\0';
1465 		parser->state = ST_TAGA;
1466 		if (fetch_add_entry(parser->ue, parser->url, buf, 1))
1467 			return -1;
1468 		return end_attr + 1 - buf;
1469 	}
1470 	/* NOTREACHED */
1471 	abort();
1472 }
1473 
1474 struct http_index_cache {
1475 	struct http_index_cache *next;
1476 	struct url *location;
1477 	struct url_list ue;
1478 };
1479 
1480 static struct http_index_cache *index_cache;
1481 
1482 /*
1483  * List a directory
1484  */
1485 int
1486 /*ARGSUSED*/
1487 fetchListHTTP(struct url_list *ue, struct url *url, const char *pattern __unused, const char *flags)
1488 {
1489 	fetchIO *f;
1490 	char buf[2 * PATH_MAX];
1491 	size_t buf_len, sum_processed;
1492 	ssize_t read_len, processed;
1493 	struct index_parser state;
1494 	struct http_index_cache *cache = NULL;
1495 	int do_cache, ret;
1496 
1497 	do_cache = CHECK_FLAG('c');
1498 
1499 	if (do_cache) {
1500 		for (cache = index_cache; cache != NULL; cache = cache->next) {
1501 			if (strcmp(cache->location->scheme, url->scheme))
1502 				continue;
1503 			if (strcmp(cache->location->user, url->user))
1504 				continue;
1505 			if (strcmp(cache->location->pwd, url->pwd))
1506 				continue;
1507 			if (strcmp(cache->location->host, url->host))
1508 				continue;
1509 			if (cache->location->port != url->port)
1510 				continue;
1511 			if (strcmp(cache->location->doc, url->doc))
1512 				continue;
1513 			return fetchAppendURLList(ue, &cache->ue);
1514 		}
1515 
1516 		cache = malloc(sizeof(*cache));
1517 		fetchInitURLList(&cache->ue);
1518 		cache->location = fetchCopyURL(url);
1519 	}
1520 
1521 	f = fetchGetHTTP(url, flags);
1522 	if (f == NULL) {
1523 		if (do_cache) {
1524 			fetchFreeURLList(&cache->ue);
1525 			fetchFreeURL(cache->location);
1526 			free(cache);
1527 		}
1528 		return -1;
1529 	}
1530 
1531 	state.url = url;
1532 	state.state = ST_NONE;
1533 	if (do_cache) {
1534 		state.ue = &cache->ue;
1535 	} else {
1536 		state.ue = ue;
1537 	}
1538 
1539 	buf_len = 0;
1540 
1541 	while ((read_len = fetchIO_read(f, buf + buf_len, sizeof(buf) - buf_len)) > 0) {
1542 		buf_len += read_len;
1543 		sum_processed = 0;
1544 		do {
1545 			processed = parse_index(&state, buf + sum_processed, buf_len);
1546 			if (processed == -1)
1547 				break;
1548 			buf_len -= processed;
1549 			sum_processed += processed;
1550 		} while (processed != 0 && buf_len > 0);
1551 		if (processed == -1) {
1552 			read_len = -1;
1553 			break;
1554 		}
1555 		memmove(buf, buf + sum_processed, buf_len);
1556 	}
1557 
1558 	fetchIO_close(f);
1559 
1560 	ret = read_len < 0 ? -1 : 0;
1561 
1562 	if (do_cache) {
1563 		if (ret == 0) {
1564 			cache->next = index_cache;
1565 			index_cache = cache;
1566 		}
1567 
1568 		if (fetchAppendURLList(ue, &cache->ue))
1569 			ret = -1;
1570 	}
1571 
1572 	return ret;
1573 }
1574