xref: /minix3/external/bsd/fetch/dist/libfetch/http.c (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1 /*	$NetBSD: http.c,v 1.3 2014/01/07 02:13:00 joerg Exp $	*/
2 /*-
3  * Copyright (c) 2000-2004 Dag-Erling Co�dan Sm�rgrav
4  * Copyright (c) 2003 Thomas Klausner <wiz@NetBSD.org>
5  * Copyright (c) 2008, 2009 Joerg Sonnenberger <joerg@NetBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer
13  *    in this position and unchanged.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. The name of the author may not be used to endorse or promote products
18  *    derived from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * $FreeBSD: http.c,v 1.83 2008/02/06 11:39:55 des Exp $
32  */
33 
34 /*
35  * The following copyright applies to the base64 code:
36  *
37  *-
38  * Copyright 1997 Massachusetts Institute of Technology
39  *
40  * Permission to use, copy, modify, and distribute this software and
41  * its documentation for any purpose and without fee is hereby
42  * granted, provided that both the above copyright notice and this
43  * permission notice appear in all copies, that both the above
44  * copyright notice and this permission notice appear in all
45  * supporting documentation, and that the name of M.I.T. not be used
46  * in advertising or publicity pertaining to distribution of the
47  * software without specific, written prior permission.  M.I.T. makes
48  * no representations about the suitability of this software for any
49  * purpose.  It is provided "as is" without express or implied
50  * warranty.
51  *
52  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
53  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
54  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
55  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
56  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
57  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
58  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
59  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
60  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
61  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
62  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  */
65 
66 #if defined(__linux__) || defined(__MINT__)
67 /* Keep this down to Linux or MiNT, it can create surprises elsewhere. */
68 #define _GNU_SOURCE
69 #endif
70 
71 /* Needed for gmtime_r on Interix */
72 #define _REENTRANT
73 
74 #if HAVE_CONFIG_H
75 #include "config.h"
76 #endif
77 #ifndef NETBSD
78 #include <nbcompat.h>
79 #endif
80 
81 #include <sys/types.h>
82 #include <sys/socket.h>
83 
84 #include <ctype.h>
85 #include <errno.h>
86 #include <locale.h>
87 #include <stdarg.h>
88 #ifndef NETBSD
89 #include <nbcompat/stdio.h>
90 #else
91 #include <stdio.h>
92 #endif
93 #include <stdlib.h>
94 #include <string.h>
95 #include <time.h>
96 #include <unistd.h>
97 
98 #include <netinet/in.h>
99 #include <netinet/tcp.h>
100 
101 #ifndef NETBSD
102 #include <nbcompat/netdb.h>
103 #else
104 #include <netdb.h>
105 #endif
106 
107 #include <arpa/inet.h>
108 
109 #include "fetch.h"
110 #include "common.h"
111 #include "httperr.h"
112 
113 /* Maximum number of redirects to follow */
114 #define MAX_REDIRECT 5
115 
116 /* Symbolic names for reply codes we care about */
117 #define HTTP_OK			200
118 #define HTTP_PARTIAL		206
119 #define HTTP_MOVED_PERM		301
120 #define HTTP_MOVED_TEMP		302
121 #define HTTP_SEE_OTHER		303
122 #define HTTP_NOT_MODIFIED	304
123 #define HTTP_TEMP_REDIRECT	307
124 #define HTTP_NEED_AUTH		401
125 #define HTTP_NEED_PROXY_AUTH	407
126 #define HTTP_BAD_RANGE		416
127 #define HTTP_PROTOCOL_ERROR	999
128 
129 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \
130 			    || (xyz) == HTTP_MOVED_TEMP \
131 			    || (xyz) == HTTP_TEMP_REDIRECT \
132 			    || (xyz) == HTTP_SEE_OTHER)
133 
134 #define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599)
135 
136 
137 /*****************************************************************************
138  * I/O functions for decoding chunked streams
139  */
140 
141 struct httpio
142 {
143 	conn_t		*conn;		/* connection */
144 	int		 chunked;	/* chunked mode */
145 	int		 keep_alive;	/* keep-alive mode */
146 	char		*buf;		/* chunk buffer */
147 	size_t		 bufsize;	/* size of chunk buffer */
148 	ssize_t		 buflen;	/* amount of data currently in buffer */
149 	size_t		 bufpos;	/* current read offset in buffer */
150 	int		 eof;		/* end-of-file flag */
151 	int		 error;		/* error flag */
152 	size_t		 chunksize;	/* remaining size of current chunk */
153 	off_t		 contentlength;	/* remaining size of the content */
154 };
155 
156 /*
157  * Get next chunk header
158  */
159 static ssize_t
http_new_chunk(struct httpio * io)160 http_new_chunk(struct httpio *io)
161 {
162 	char *p;
163 
164 	if (fetch_getln(io->conn) == -1)
165 		return (-1);
166 
167 	if (io->conn->buflen < 2 || !isxdigit((unsigned char)*io->conn->buf))
168 		return (-1);
169 
170 	for (p = io->conn->buf; *p && !isspace((unsigned char)*p); ++p) {
171 		if (*p == ';')
172 			break;
173 		if (!isxdigit((unsigned char)*p))
174 			return (-1);
175 		if (isdigit((unsigned char)*p)) {
176 			io->chunksize = io->chunksize * 16 +
177 			    *p - '0';
178 		} else {
179 			io->chunksize = io->chunksize * 16 +
180 			    10 + tolower((unsigned char)*p) - 'a';
181 		}
182 	}
183 
184 	return (io->chunksize);
185 }
186 
187 /*
188  * Grow the input buffer to at least len bytes
189  */
190 static int
http_growbuf(struct httpio * io,size_t len)191 http_growbuf(struct httpio *io, size_t len)
192 {
193 	char *tmp;
194 
195 	if (io->bufsize >= len)
196 		return (0);
197 
198 	if ((tmp = realloc(io->buf, len)) == NULL)
199 		return (-1);
200 	io->buf = tmp;
201 	io->bufsize = len;
202 	return (0);
203 }
204 
205 /*
206  * Fill the input buffer, do chunk decoding on the fly
207  */
208 static ssize_t
http_fillbuf(struct httpio * io,size_t len)209 http_fillbuf(struct httpio *io, size_t len)
210 {
211 	if (io->error)
212 		return (-1);
213 	if (io->eof)
214 		return (0);
215 
216 	if (io->contentlength >= 0 && (off_t)len > io->contentlength)
217 		len = io->contentlength;
218 
219 	if (io->chunked == 0) {
220 		if (http_growbuf(io, len) == -1)
221 			return (-1);
222 		if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
223 			io->error = 1;
224 			return (-1);
225 		}
226 		if (io->contentlength)
227 			io->contentlength -= io->buflen;
228 		io->bufpos = 0;
229 		return (io->buflen);
230 	}
231 
232 	if (io->chunksize == 0) {
233 		switch (http_new_chunk(io)) {
234 		case -1:
235 			io->error = 1;
236 			return (-1);
237 		case 0:
238 			io->eof = 1;
239 			if (fetch_getln(io->conn) == -1)
240 				return (-1);
241 			return (0);
242 		}
243 	}
244 
245 	if (len > io->chunksize)
246 		len = io->chunksize;
247 	if (http_growbuf(io, len) == -1)
248 		return (-1);
249 	if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
250 		io->error = 1;
251 		return (-1);
252 	}
253 	io->chunksize -= io->buflen;
254 	if (io->contentlength >= 0)
255 		io->contentlength -= io->buflen;
256 
257 	if (io->chunksize == 0) {
258 		char endl[2];
259 		ssize_t len2;
260 
261 		len2 = fetch_read(io->conn, endl, 2);
262 		if (len2 == 1 && fetch_read(io->conn, endl + 1, 1) != 1)
263 			return (-1);
264 		if (len2 == -1 || endl[0] != '\r' || endl[1] != '\n')
265 			return (-1);
266 	}
267 
268 	io->bufpos = 0;
269 
270 	return (io->buflen);
271 }
272 
273 /*
274  * Read function
275  */
276 static ssize_t
http_readfn(void * v,void * buf,size_t len)277 http_readfn(void *v, void *buf, size_t len)
278 {
279 	struct httpio *io = (struct httpio *)v;
280 	size_t l, pos;
281 
282 	if (io->error)
283 		return (-1);
284 	if (io->eof)
285 		return (0);
286 
287 	for (pos = 0; len > 0; pos += l, len -= l) {
288 		/* empty buffer */
289 		if (!io->buf || (ssize_t)io->bufpos == io->buflen)
290 			if (http_fillbuf(io, len) < 1)
291 				break;
292 		l = io->buflen - io->bufpos;
293 		if (len < l)
294 			l = len;
295 		memcpy((char *)buf + pos, io->buf + io->bufpos, l);
296 		io->bufpos += l;
297 	}
298 
299 	if (!pos && io->error)
300 		return (-1);
301 	return (pos);
302 }
303 
304 /*
305  * Write function
306  */
307 static ssize_t
http_writefn(void * v,const void * buf,size_t len)308 http_writefn(void *v, const void *buf, size_t len)
309 {
310 	struct httpio *io = (struct httpio *)v;
311 
312 	return (fetch_write(io->conn, buf, len));
313 }
314 
315 /*
316  * Close function
317  */
318 static void
http_closefn(void * v)319 http_closefn(void *v)
320 {
321 	struct httpio *io = (struct httpio *)v;
322 
323 	if (io->keep_alive) {
324 		int val;
325 
326 		val = 0;
327 		setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
328 			   (socklen_t)sizeof(val));
329 			  fetch_cache_put(io->conn, fetch_close);
330 #ifdef TCP_NOPUSH
331 		val = 1;
332 		setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
333 		    sizeof(val));
334 #endif
335 	} else {
336 		fetch_close(io->conn);
337 	}
338 
339 	free(io->buf);
340 	free(io);
341 }
342 
343 /*
344  * Wrap a file descriptor up
345  */
346 static fetchIO *
http_funopen(conn_t * conn,int chunked,int keep_alive,off_t clength)347 http_funopen(conn_t *conn, int chunked, int keep_alive, off_t clength)
348 {
349 	struct httpio *io;
350 	fetchIO *f;
351 
352 	if ((io = calloc(1, sizeof(*io))) == NULL) {
353 		fetch_syserr();
354 		return (NULL);
355 	}
356 	io->conn = conn;
357 	io->chunked = chunked;
358 	io->contentlength = clength;
359 	io->keep_alive = keep_alive;
360 	f = fetchIO_unopen(io, http_readfn, http_writefn, http_closefn);
361 	if (f == NULL) {
362 		fetch_syserr();
363 		free(io);
364 		return (NULL);
365 	}
366 	return (f);
367 }
368 
369 
370 /*****************************************************************************
371  * Helper functions for talking to the server and parsing its replies
372  */
373 
374 /* Header types */
375 typedef enum {
376 	hdr_syserror = -2,
377 	hdr_error = -1,
378 	hdr_end = 0,
379 	hdr_unknown = 1,
380 	hdr_connection,
381 	hdr_content_length,
382 	hdr_content_range,
383 	hdr_last_modified,
384 	hdr_location,
385 	hdr_transfer_encoding,
386 	hdr_www_authenticate
387 } hdr_t;
388 
389 /* Names of interesting headers */
390 static struct {
391 	hdr_t		 num;
392 	const char	*name;
393 } hdr_names[] = {
394 	{ hdr_connection,		"Connection" },
395 	{ hdr_content_length,		"Content-Length" },
396 	{ hdr_content_range,		"Content-Range" },
397 	{ hdr_last_modified,		"Last-Modified" },
398 	{ hdr_location,			"Location" },
399 	{ hdr_transfer_encoding,	"Transfer-Encoding" },
400 	{ hdr_www_authenticate,		"WWW-Authenticate" },
401 	{ hdr_unknown,			NULL },
402 };
403 
404 /*
405  * Send a formatted line; optionally echo to terminal
406  */
407 __printflike(2, 3)
408 static int
http_cmd(conn_t * conn,const char * fmt,...)409 http_cmd(conn_t *conn, const char *fmt, ...)
410 {
411 	va_list ap;
412 	size_t len;
413 	char *msg;
414 	ssize_t r;
415 
416 	va_start(ap, fmt);
417 	len = vasprintf(&msg, fmt, ap);
418 	va_end(ap);
419 
420 	if (msg == NULL) {
421 		errno = ENOMEM;
422 		fetch_syserr();
423 		return (-1);
424 	}
425 
426 	r = fetch_write(conn, msg, len);
427 	free(msg);
428 
429 	if (r == -1) {
430 		fetch_syserr();
431 		return (-1);
432 	}
433 
434 	return (0);
435 }
436 
437 /*
438  * Get and parse status line
439  */
440 static int
http_get_reply(conn_t * conn)441 http_get_reply(conn_t *conn)
442 {
443 	char *p;
444 
445 	if (fetch_getln(conn) == -1)
446 		return (-1);
447 	/*
448 	 * A valid status line looks like "HTTP/m.n xyz reason" where m
449 	 * and n are the major and minor protocol version numbers and xyz
450 	 * is the reply code.
451 	 * Unfortunately, there are servers out there (NCSA 1.5.1, to name
452 	 * just one) that do not send a version number, so we can't rely
453 	 * on finding one, but if we do, insist on it being 1.0 or 1.1.
454 	 * We don't care about the reason phrase.
455 	 */
456 	if (strncmp(conn->buf, "HTTP", 4) != 0)
457 		return (HTTP_PROTOCOL_ERROR);
458 	p = conn->buf + 4;
459 	if (*p == '/') {
460 		if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1'))
461 			return (HTTP_PROTOCOL_ERROR);
462 		p += 4;
463 	}
464 	if (*p != ' ' ||
465 	    !isdigit((unsigned char)p[1]) ||
466 	    !isdigit((unsigned char)p[2]) ||
467 	    !isdigit((unsigned char)p[3]))
468 		return (HTTP_PROTOCOL_ERROR);
469 
470 	conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0');
471 	return (conn->err);
472 }
473 
474 /*
475  * Check a header; if the type matches the given string, return a pointer
476  * to the beginning of the value.
477  */
478 static const char *
http_match(const char * str,const char * hdr)479 http_match(const char *str, const char *hdr)
480 {
481 	while (*str && *hdr &&
482 	    tolower((unsigned char)*str++) == tolower((unsigned char)*hdr++))
483 		/* nothing */;
484 	if (*str || *hdr != ':')
485 		return (NULL);
486 	while (*hdr && isspace((unsigned char)*++hdr))
487 		/* nothing */;
488 	return (hdr);
489 }
490 
491 /*
492  * Get the next header and return the appropriate symbolic code.
493  */
494 static hdr_t
http_next_header(conn_t * conn,const char ** p)495 http_next_header(conn_t *conn, const char **p)
496 {
497 	int i;
498 
499 	if (fetch_getln(conn) == -1)
500 		return (hdr_syserror);
501 	while (conn->buflen && isspace((unsigned char)conn->buf[conn->buflen - 1]))
502 		conn->buflen--;
503 	conn->buf[conn->buflen] = '\0';
504 	if (conn->buflen == 0)
505 		return (hdr_end);
506 	/*
507 	 * We could check for malformed headers but we don't really care.
508 	 * A valid header starts with a token immediately followed by a
509 	 * colon; a token is any sequence of non-control, non-whitespace
510 	 * characters except "()<>@,;:\\\"{}".
511 	 */
512 	for (i = 0; hdr_names[i].num != hdr_unknown; i++)
513 		if ((*p = http_match(hdr_names[i].name, conn->buf)) != NULL)
514 			return (hdr_names[i].num);
515 	return (hdr_unknown);
516 }
517 
518 /*
519  * Parse a last-modified header
520  */
521 static int
http_parse_mtime(const char * p,time_t * mtime)522 http_parse_mtime(const char *p, time_t *mtime)
523 {
524 	char locale[64], *r;
525 	struct tm tm;
526 
527 	strncpy(locale, setlocale(LC_TIME, NULL), sizeof(locale));
528 	setlocale(LC_TIME, "C");
529 	r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
530 	/* XXX should add support for date-2 and date-3 */
531 	setlocale(LC_TIME, locale);
532 	if (r == NULL)
533 		return (-1);
534 	*mtime = timegm(&tm);
535 	return (0);
536 }
537 
538 /*
539  * Parse a content-length header
540  */
541 static int
http_parse_length(const char * p,off_t * length)542 http_parse_length(const char *p, off_t *length)
543 {
544 	off_t len;
545 
546 	for (len = 0; *p && isdigit((unsigned char)*p); ++p)
547 		len = len * 10 + (*p - '0');
548 	if (*p)
549 		return (-1);
550 	*length = len;
551 	return (0);
552 }
553 
554 /*
555  * Parse a content-range header
556  */
557 static int
http_parse_range(const char * p,off_t * offset,off_t * length,off_t * size)558 http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size)
559 {
560 	off_t first, last, len;
561 
562 	if (strncasecmp(p, "bytes ", 6) != 0)
563 		return (-1);
564 	p += 6;
565 	if (*p == '*') {
566 		first = last = -1;
567 		++p;
568 	} else {
569 		for (first = 0; *p && isdigit((unsigned char)*p); ++p)
570 			first = first * 10 + *p - '0';
571 		if (*p != '-')
572 			return (-1);
573 		for (last = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
574 			last = last * 10 + *p - '0';
575 	}
576 	if (first > last || *p != '/')
577 		return (-1);
578 	for (len = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
579 		len = len * 10 + *p - '0';
580 	if (*p || len < last - first + 1)
581 		return (-1);
582 	if (first == -1)
583 		*length = 0;
584 	else
585 		*length = last - first + 1;
586 	*offset = first;
587 	*size = len;
588 	return (0);
589 }
590 
591 
592 /*****************************************************************************
593  * Helper functions for authorization
594  */
595 
596 /*
597  * Base64 encoding
598  */
599 static char *
http_base64(const char * src)600 http_base64(const char *src)
601 {
602 	static const char base64[] =
603 	    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
604 	    "abcdefghijklmnopqrstuvwxyz"
605 	    "0123456789+/";
606 	char *str, *dst;
607 	size_t l;
608 	unsigned int t, r;
609 
610 	l = strlen(src);
611 	if ((str = malloc(((l + 2) / 3) * 4 + 1)) == NULL)
612 		return (NULL);
613 	dst = str;
614 	r = 0;
615 
616 	while (l >= 3) {
617 		t = (src[0] << 16) | (src[1] << 8) | src[2];
618 		dst[0] = base64[(t >> 18) & 0x3f];
619 		dst[1] = base64[(t >> 12) & 0x3f];
620 		dst[2] = base64[(t >> 6) & 0x3f];
621 		dst[3] = base64[(t >> 0) & 0x3f];
622 		src += 3; l -= 3;
623 		dst += 4; r += 4;
624 	}
625 
626 	switch (l) {
627 	case 2:
628 		t = (src[0] << 16) | (src[1] << 8);
629 		dst[0] = base64[(t >> 18) & 0x3f];
630 		dst[1] = base64[(t >> 12) & 0x3f];
631 		dst[2] = base64[(t >> 6) & 0x3f];
632 		dst[3] = '=';
633 		dst += 4;
634 		r += 4;
635 		break;
636 	case 1:
637 		t = src[0] << 16;
638 		dst[0] = base64[(t >> 18) & 0x3f];
639 		dst[1] = base64[(t >> 12) & 0x3f];
640 		dst[2] = dst[3] = '=';
641 		dst += 4;
642 		r += 4;
643 		break;
644 	case 0:
645 		break;
646 	}
647 
648 	*dst = 0;
649 	return (str);
650 }
651 
652 /*
653  * Encode username and password
654  */
655 static int
http_basic_auth(conn_t * conn,const char * hdr,const char * usr,const char * pwd)656 http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd)
657 {
658 	char *upw, *auth;
659 	int r;
660 
661 	if (asprintf(&upw, "%s:%s", usr, pwd) == -1)
662 		return (-1);
663 	auth = http_base64(upw);
664 	free(upw);
665 	if (auth == NULL)
666 		return (-1);
667 	r = http_cmd(conn, "%s: Basic %s\r\n", hdr, auth);
668 	free(auth);
669 	return (r);
670 }
671 
672 /*
673  * Send an authorization header
674  */
675 static int
http_authorize(conn_t * conn,const char * hdr,const char * p)676 http_authorize(conn_t *conn, const char *hdr, const char *p)
677 {
678 	/* basic authorization */
679 	if (strncasecmp(p, "basic:", 6) == 0) {
680 		char *user, *pwd, *str;
681 		int r;
682 
683 		/* skip realm */
684 		for (p += 6; *p && *p != ':'; ++p)
685 			/* nothing */ ;
686 		if (!*p || strchr(++p, ':') == NULL)
687 			return (-1);
688 		if ((str = strdup(p)) == NULL)
689 			return (-1); /* XXX */
690 		user = str;
691 		pwd = strchr(str, ':');
692 		*pwd++ = '\0';
693 		r = http_basic_auth(conn, hdr, user, pwd);
694 		free(str);
695 		return (r);
696 	}
697 	return (-1);
698 }
699 
700 
701 /*****************************************************************************
702  * Helper functions for connecting to a server or proxy
703  */
704 
705 /*
706  * Connect to the correct HTTP server or proxy.
707  */
708 static conn_t *
http_connect(struct url * URL,struct url * purl,const char * flags,int * cached)709 http_connect(struct url *URL, struct url *purl, const char *flags, int *cached)
710 {
711 	conn_t *conn;
712 	int af, verbose;
713 #ifdef TCP_NOPUSH
714 	int val;
715 #endif
716 
717 	*cached = 1;
718 
719 #ifdef INET6
720 	af = AF_UNSPEC;
721 #else
722 	af = AF_INET;
723 #endif
724 
725 	verbose = CHECK_FLAG('v');
726 	if (CHECK_FLAG('4'))
727 		af = AF_INET;
728 #ifdef INET6
729 	else if (CHECK_FLAG('6'))
730 		af = AF_INET6;
731 #endif
732 
733 	if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) {
734 		URL = purl;
735 	} else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
736 		/* can't talk http to an ftp server */
737 		/* XXX should set an error code */
738 		return (NULL);
739 	}
740 
741 	if ((conn = fetch_cache_get(URL, af)) != NULL) {
742 		*cached = 1;
743 		return (conn);
744 	}
745 
746 	if ((conn = fetch_connect(URL, af, verbose)) == NULL)
747 		/* fetch_connect() has already set an error code */
748 		return (NULL);
749 	if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 &&
750 	    fetch_ssl(conn, verbose) == -1) {
751 		fetch_close(conn);
752 		/* grrr */
753 #ifdef EAUTH
754 		errno = EAUTH;
755 #else
756 		errno = EPERM;
757 #endif
758 		fetch_syserr();
759 		return (NULL);
760 	}
761 
762 #ifdef TCP_NOPUSH
763 	val = 1;
764 	setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val));
765 #endif
766 
767 	return (conn);
768 }
769 
770 static struct url *
http_get_proxy(struct url * url,const char * flags)771 http_get_proxy(struct url * url, const char *flags)
772 {
773 	struct url *purl;
774 	char *p;
775 
776 	if (flags != NULL && strchr(flags, 'd') != NULL)
777 		return (NULL);
778 	if (fetch_no_proxy_match(url->host))
779 		return (NULL);
780 	if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) &&
781 	    *p && (purl = fetchParseURL(p))) {
782 		if (!*purl->scheme)
783 			strcpy(purl->scheme, SCHEME_HTTP);
784 		if (!purl->port)
785 			purl->port = fetch_default_proxy_port(purl->scheme);
786 		if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0)
787 			return (purl);
788 		fetchFreeURL(purl);
789 	}
790 	return (NULL);
791 }
792 
793 static void
set_if_modified_since(conn_t * conn,time_t last_modified)794 set_if_modified_since(conn_t *conn, time_t last_modified)
795 {
796 	static const char weekdays[] = "SunMonTueWedThuFriSat";
797 	static const char months[] = "JanFebMarAprMayJunJulAugSepOctNovDec";
798 	struct tm tm;
799 	char buf[80];
800 	gmtime_r(&last_modified, &tm);
801 	snprintf(buf, sizeof(buf), "%.3s, %02d %.3s %4d %02d:%02d:%02d GMT",
802 	    weekdays + tm.tm_wday * 3, tm.tm_mday, months + tm.tm_mon * 3,
803 	    tm.tm_year + 1900, tm.tm_hour, tm.tm_min, tm.tm_sec);
804 	http_cmd(conn, "If-Modified-Since: %s\r\n", buf);
805 }
806 
807 
808 /*****************************************************************************
809  * Core
810  */
811 
812 /*
813  * Send a request and process the reply
814  *
815  * XXX This function is way too long, the do..while loop should be split
816  * XXX off into a separate function.
817  */
818 fetchIO *
http_request(struct url * URL,const char * op,struct url_stat * us,struct url * purl,const char * flags)819 http_request(struct url *URL, const char *op, struct url_stat *us,
820     struct url *purl, const char *flags)
821 {
822 	conn_t *conn;
823 	struct url *url, *new;
824 	int chunked, direct, if_modified_since, need_auth, noredirect;
825 	int keep_alive, verbose, cached;
826 	int e, i, n, val;
827 	off_t offset, clength, length, size;
828 	time_t mtime;
829 	const char *p;
830 	fetchIO *f;
831 	hdr_t h;
832 	char hbuf[URL_HOSTLEN + 7], *host;
833 
834 	direct = CHECK_FLAG('d');
835 	noredirect = CHECK_FLAG('A');
836 	verbose = CHECK_FLAG('v');
837 	if_modified_since = CHECK_FLAG('i');
838 	keep_alive = 0;
839 
840 	if (direct && purl) {
841 		fetchFreeURL(purl);
842 		purl = NULL;
843 	}
844 
845 	/* try the provided URL first */
846 	url = URL;
847 
848 	/* if the A flag is set, we only get one try */
849 	n = noredirect ? 1 : MAX_REDIRECT;
850 	i = 0;
851 
852 	e = HTTP_PROTOCOL_ERROR;
853 	need_auth = 0;
854 	do {
855 		new = NULL;
856 		chunked = 0;
857 		offset = 0;
858 		clength = -1;
859 		length = -1;
860 		size = -1;
861 		mtime = 0;
862 
863 		/* check port */
864 		if (!url->port)
865 			url->port = fetch_default_port(url->scheme);
866 
867 		/* were we redirected to an FTP URL? */
868 		if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) {
869 			if (strcmp(op, "GET") == 0)
870 				return (ftp_request(url, "RETR", NULL, us, purl, flags));
871 			else if (strcmp(op, "HEAD") == 0)
872 				return (ftp_request(url, "STAT", NULL, us, purl, flags));
873 		}
874 
875 		/* connect to server or proxy */
876 		if ((conn = http_connect(url, purl, flags, &cached)) == NULL)
877 			goto ouch;
878 
879 		host = url->host;
880 #ifdef INET6
881 		if (strchr(url->host, ':')) {
882 			snprintf(hbuf, sizeof(hbuf), "[%s]", url->host);
883 			host = hbuf;
884 		}
885 #endif
886 		if (url->port != fetch_default_port(url->scheme)) {
887 			if (host != hbuf) {
888 				strcpy(hbuf, host);
889 				host = hbuf;
890 			}
891 			snprintf(hbuf + strlen(hbuf),
892 			    sizeof(hbuf) - strlen(hbuf), ":%d", url->port);
893 		}
894 
895 		/* send request */
896 		if (verbose)
897 			fetch_info("requesting %s://%s%s",
898 			    url->scheme, host, url->doc);
899 		if (purl) {
900 			http_cmd(conn, "%s %s://%s%s HTTP/1.1\r\n",
901 			    op, url->scheme, host, url->doc);
902 		} else {
903 			http_cmd(conn, "%s %s HTTP/1.1\r\n",
904 			    op, url->doc);
905 		}
906 
907 		if (if_modified_since && url->last_modified > 0)
908 			set_if_modified_since(conn, url->last_modified);
909 
910 		/* virtual host */
911 		http_cmd(conn, "Host: %s\r\n", host);
912 
913 		/* proxy authorization */
914 		if (purl) {
915 			if (*purl->user || *purl->pwd)
916 				http_basic_auth(conn, "Proxy-Authorization",
917 				    purl->user, purl->pwd);
918 			else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0')
919 				http_authorize(conn, "Proxy-Authorization", p);
920 		}
921 
922 		/* server authorization */
923 		if (need_auth || *url->user || *url->pwd) {
924 			if (*url->user || *url->pwd)
925 				http_basic_auth(conn, "Authorization", url->user, url->pwd);
926 			else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0')
927 				http_authorize(conn, "Authorization", p);
928 			else if (fetchAuthMethod && fetchAuthMethod(url) == 0) {
929 				http_basic_auth(conn, "Authorization", url->user, url->pwd);
930 			} else {
931 				http_seterr(HTTP_NEED_AUTH);
932 				goto ouch;
933 			}
934 		}
935 
936 		/* other headers */
937 		if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') {
938 			if (strcasecmp(p, "auto") == 0)
939 				http_cmd(conn, "Referer: %s://%s%s\r\n",
940 				    url->scheme, host, url->doc);
941 			else
942 				http_cmd(conn, "Referer: %s\r\n", p);
943 		}
944 		if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0')
945 			http_cmd(conn, "User-Agent: %s\r\n", p);
946 		else
947 			http_cmd(conn, "User-Agent: %s\r\n", _LIBFETCH_VER);
948 		if (url->offset > 0)
949 			http_cmd(conn, "Range: bytes=%lld-\r\n", (long long)url->offset);
950 		http_cmd(conn, "\r\n");
951 
952 		/*
953 		 * Force the queued request to be dispatched.  Normally, one
954 		 * would do this with shutdown(2) but squid proxies can be
955 		 * configured to disallow such half-closed connections.  To
956 		 * be compatible with such configurations, fiddle with socket
957 		 * options to force the pending data to be written.
958 		 */
959 #ifdef TCP_NOPUSH
960 		val = 0;
961 		setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
962 			   sizeof(val));
963 #endif
964 		val = 1;
965 		setsockopt(conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
966 		    (socklen_t)sizeof(val));
967 
968 		/* get reply */
969 		switch (http_get_reply(conn)) {
970 		case HTTP_OK:
971 		case HTTP_PARTIAL:
972 		case HTTP_NOT_MODIFIED:
973 			/* fine */
974 			break;
975 		case HTTP_MOVED_PERM:
976 		case HTTP_MOVED_TEMP:
977 		case HTTP_SEE_OTHER:
978 			/*
979 			 * Not so fine, but we still have to read the
980 			 * headers to get the new location.
981 			 */
982 			break;
983 		case HTTP_NEED_AUTH:
984 			if (need_auth) {
985 				/*
986 				 * We already sent out authorization code,
987 				 * so there's nothing more we can do.
988 				 */
989 				http_seterr(conn->err);
990 				goto ouch;
991 			}
992 			/* try again, but send the password this time */
993 			if (verbose)
994 				fetch_info("server requires authorization");
995 			break;
996 		case HTTP_NEED_PROXY_AUTH:
997 			/*
998 			 * If we're talking to a proxy, we already sent
999 			 * our proxy authorization code, so there's
1000 			 * nothing more we can do.
1001 			 */
1002 			http_seterr(conn->err);
1003 			goto ouch;
1004 		case HTTP_BAD_RANGE:
1005 			/*
1006 			 * This can happen if we ask for 0 bytes because
1007 			 * we already have the whole file.  Consider this
1008 			 * a success for now, and check sizes later.
1009 			 */
1010 			break;
1011 		case HTTP_PROTOCOL_ERROR:
1012 			/* fall through */
1013 		case -1:
1014 			--i;
1015 			if (cached)
1016 				continue;
1017 			fetch_syserr();
1018 			goto ouch;
1019 		default:
1020 			http_seterr(conn->err);
1021 			if (!verbose)
1022 				goto ouch;
1023 			/* fall through so we can get the full error message */
1024 		}
1025 
1026 		/* get headers */
1027 		do {
1028 			switch ((h = http_next_header(conn, &p))) {
1029 			case hdr_syserror:
1030 				fetch_syserr();
1031 				goto ouch;
1032 			case hdr_error:
1033 				http_seterr(HTTP_PROTOCOL_ERROR);
1034 				goto ouch;
1035 			case hdr_connection:
1036 				/* XXX too weak? */
1037 				keep_alive = (strcasecmp(p, "keep-alive") == 0);
1038 				break;
1039 			case hdr_content_length:
1040 				http_parse_length(p, &clength);
1041 				break;
1042 			case hdr_content_range:
1043 				http_parse_range(p, &offset, &length, &size);
1044 				break;
1045 			case hdr_last_modified:
1046 				http_parse_mtime(p, &mtime);
1047 				break;
1048 			case hdr_location:
1049 				if (!HTTP_REDIRECT(conn->err))
1050 					break;
1051 				if (new)
1052 					free(new);
1053 				if (verbose)
1054 					fetch_info("%d redirect to %s", conn->err, p);
1055 				if (*p == '/')
1056 					/* absolute path */
1057 					new = fetchMakeURL(url->scheme, url->host, url->port, p,
1058 					    url->user, url->pwd);
1059 				else
1060 					new = fetchParseURL(p);
1061 				if (new == NULL) {
1062 					/* XXX should set an error code */
1063 					goto ouch;
1064 				}
1065 				if (!*new->user && !*new->pwd) {
1066 					strcpy(new->user, url->user);
1067 					strcpy(new->pwd, url->pwd);
1068 				}
1069 				new->offset = url->offset;
1070 				new->length = url->length;
1071 				break;
1072 			case hdr_transfer_encoding:
1073 				/* XXX weak test*/
1074 				chunked = (strcasecmp(p, "chunked") == 0);
1075 				break;
1076 			case hdr_www_authenticate:
1077 				if (conn->err != HTTP_NEED_AUTH)
1078 					break;
1079 				/* if we were smarter, we'd check the method and realm */
1080 				break;
1081 			case hdr_end:
1082 				/* fall through */
1083 			case hdr_unknown:
1084 				/* ignore */
1085 				break;
1086 			}
1087 		} while (h > hdr_end);
1088 
1089 		/* we need to provide authentication */
1090 		if (conn->err == HTTP_NEED_AUTH) {
1091 			e = conn->err;
1092 			need_auth = 1;
1093 			fetch_close(conn);
1094 			conn = NULL;
1095 			continue;
1096 		}
1097 
1098 		/* requested range not satisfiable */
1099 		if (conn->err == HTTP_BAD_RANGE) {
1100 			if (url->offset == size && url->length == 0) {
1101 				/* asked for 0 bytes; fake it */
1102 				offset = url->offset;
1103 				conn->err = HTTP_OK;
1104 				break;
1105 			} else {
1106 				http_seterr(conn->err);
1107 				goto ouch;
1108 			}
1109 		}
1110 
1111 		/* we have a hit or an error */
1112 		if (conn->err == HTTP_OK ||
1113 		    conn->err == HTTP_PARTIAL ||
1114 		    conn->err == HTTP_NOT_MODIFIED ||
1115 		    HTTP_ERROR(conn->err))
1116 			break;
1117 
1118 		/* all other cases: we got a redirect */
1119 		e = conn->err;
1120 		need_auth = 0;
1121 		fetch_close(conn);
1122 		conn = NULL;
1123 		if (!new)
1124 			break;
1125 		if (url != URL)
1126 			fetchFreeURL(url);
1127 		url = new;
1128 	} while (++i < n);
1129 
1130 	/* we failed, or ran out of retries */
1131 	if (conn == NULL) {
1132 		http_seterr(e);
1133 		goto ouch;
1134 	}
1135 
1136 	/* check for inconsistencies */
1137 	if (clength != -1 && length != -1 && clength != length) {
1138 		http_seterr(HTTP_PROTOCOL_ERROR);
1139 		goto ouch;
1140 	}
1141 	if (clength == -1)
1142 		clength = length;
1143 	if (clength != -1)
1144 		length = offset + clength;
1145 	if (length != -1 && size != -1 && length != size) {
1146 		http_seterr(HTTP_PROTOCOL_ERROR);
1147 		goto ouch;
1148 	}
1149 	if (size == -1)
1150 		size = length;
1151 
1152 	/* fill in stats */
1153 	if (us) {
1154 		us->size = size;
1155 		us->atime = us->mtime = mtime;
1156 	}
1157 
1158 	/* too far? */
1159 	if (URL->offset > 0 && offset > URL->offset) {
1160 		http_seterr(HTTP_PROTOCOL_ERROR);
1161 		goto ouch;
1162 	}
1163 
1164 	/* report back real offset and size */
1165 	URL->offset = offset;
1166 	URL->length = clength;
1167 
1168 	if (clength == -1 && !chunked)
1169 		keep_alive = 0;
1170 
1171 	if (conn->err == HTTP_NOT_MODIFIED) {
1172 		http_seterr(HTTP_NOT_MODIFIED);
1173 		if (keep_alive) {
1174 			fetch_cache_put(conn, fetch_close);
1175 			conn = NULL;
1176 		}
1177 		goto ouch;
1178 	}
1179 
1180 	/* wrap it up in a fetchIO */
1181 	if ((f = http_funopen(conn, chunked, keep_alive, clength)) == NULL) {
1182 		fetch_syserr();
1183 		goto ouch;
1184 	}
1185 
1186 	if (url != URL)
1187 		fetchFreeURL(url);
1188 	if (purl)
1189 		fetchFreeURL(purl);
1190 
1191 	if (HTTP_ERROR(conn->err)) {
1192 
1193 		if (keep_alive) {
1194 			char buf[512];
1195 			do {
1196 			} while (fetchIO_read(f, buf, sizeof(buf)) > 0);
1197 		}
1198 
1199 		fetchIO_close(f);
1200 		f = NULL;
1201 	}
1202 
1203 	return (f);
1204 
1205 ouch:
1206 	if (url != URL)
1207 		fetchFreeURL(url);
1208 	if (purl)
1209 		fetchFreeURL(purl);
1210 	if (conn != NULL)
1211 		fetch_close(conn);
1212 	return (NULL);
1213 }
1214 
1215 
1216 /*****************************************************************************
1217  * Entry points
1218  */
1219 
1220 /*
1221  * Retrieve and stat a file by HTTP
1222  */
1223 fetchIO *
fetchXGetHTTP(struct url * URL,struct url_stat * us,const char * flags)1224 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags)
1225 {
1226 	return (http_request(URL, "GET", us, http_get_proxy(URL, flags), flags));
1227 }
1228 
1229 /*
1230  * Retrieve a file by HTTP
1231  */
1232 fetchIO *
fetchGetHTTP(struct url * URL,const char * flags)1233 fetchGetHTTP(struct url *URL, const char *flags)
1234 {
1235 	return (fetchXGetHTTP(URL, NULL, flags));
1236 }
1237 
1238 /*
1239  * Store a file by HTTP
1240  */
1241 fetchIO *
1242 /*ARGSUSED*/
fetchPutHTTP(struct url * URL __unused,const char * flags __unused)1243 fetchPutHTTP(struct url *URL __unused, const char *flags __unused)
1244 {
1245 	fprintf(stderr, "fetchPutHTTP(): not implemented\n");
1246 	return (NULL);
1247 }
1248 
1249 /*
1250  * Get an HTTP document's metadata
1251  */
1252 int
fetchStatHTTP(struct url * URL,struct url_stat * us,const char * flags)1253 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
1254 {
1255 	fetchIO *f;
1256 
1257 	f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags);
1258 	if (f == NULL)
1259 		return (-1);
1260 	fetchIO_close(f);
1261 	return (0);
1262 }
1263 
1264 enum http_states {
1265 	ST_NONE,
1266 	ST_LT,
1267 	ST_LTA,
1268 	ST_TAGA,
1269 	ST_H,
1270 	ST_R,
1271 	ST_E,
1272 	ST_F,
1273 	ST_HREF,
1274 	ST_HREFQ,
1275 	ST_TAG,
1276 	ST_TAGAX,
1277 	ST_TAGAQ
1278 };
1279 
1280 struct index_parser {
1281 	struct url_list *ue;
1282 	struct url *url;
1283 	enum http_states state;
1284 };
1285 
1286 static ssize_t
parse_index(struct index_parser * parser,const char * buf,size_t len)1287 parse_index(struct index_parser *parser, const char *buf, size_t len)
1288 {
1289 	char *end_attr, p = *buf;
1290 
1291 	switch (parser->state) {
1292 	case ST_NONE:
1293 		/* Plain text, not in markup */
1294 		if (p == '<')
1295 			parser->state = ST_LT;
1296 		return 1;
1297 	case ST_LT:
1298 		/* In tag -- "<" already found */
1299 		if (p == '>')
1300 			parser->state = ST_NONE;
1301 		else if (p == 'a' || p == 'A')
1302 			parser->state = ST_LTA;
1303 		else if (!isspace((unsigned char)p))
1304 			parser->state = ST_TAG;
1305 		return 1;
1306 	case ST_LTA:
1307 		/* In tag -- "<a" already found */
1308 		if (p == '>')
1309 			parser->state = ST_NONE;
1310 		else if (p == '"')
1311 			parser->state = ST_TAGAQ;
1312 		else if (isspace((unsigned char)p))
1313 			parser->state = ST_TAGA;
1314 		else
1315 			parser->state = ST_TAG;
1316 		return 1;
1317 	case ST_TAG:
1318 		/* In tag, but not "<a" -- disregard */
1319 		if (p == '>')
1320 			parser->state = ST_NONE;
1321 		return 1;
1322 	case ST_TAGA:
1323 		/* In a-tag -- "<a " already found */
1324 		if (p == '>')
1325 			parser->state = ST_NONE;
1326 		else if (p == '"')
1327 			parser->state = ST_TAGAQ;
1328 		else if (p == 'h' || p == 'H')
1329 			parser->state = ST_H;
1330 		else if (!isspace((unsigned char)p))
1331 			parser->state = ST_TAGAX;
1332 		return 1;
1333 	case ST_TAGAX:
1334 		/* In unknown keyword in a-tag */
1335 		if (p == '>')
1336 			parser->state = ST_NONE;
1337 		else if (p == '"')
1338 			parser->state = ST_TAGAQ;
1339 		else if (isspace((unsigned char)p))
1340 			parser->state = ST_TAGA;
1341 		return 1;
1342 	case ST_TAGAQ:
1343 		/* In a-tag, unknown argument for keys. */
1344 		if (p == '>')
1345 			parser->state = ST_NONE;
1346 		else if (p == '"')
1347 			parser->state = ST_TAGA;
1348 		return 1;
1349 	case ST_H:
1350 		/* In a-tag -- "<a h" already found */
1351 		if (p == '>')
1352 			parser->state = ST_NONE;
1353 		else if (p == '"')
1354 			parser->state = ST_TAGAQ;
1355 		else if (p == 'r' || p == 'R')
1356 			parser->state = ST_R;
1357 		else if (isspace((unsigned char)p))
1358 			parser->state = ST_TAGA;
1359 		else
1360 			parser->state = ST_TAGAX;
1361 		return 1;
1362 	case ST_R:
1363 		/* In a-tag -- "<a hr" already found */
1364 		if (p == '>')
1365 			parser->state = ST_NONE;
1366 		else if (p == '"')
1367 			parser->state = ST_TAGAQ;
1368 		else if (p == 'e' || p == 'E')
1369 			parser->state = ST_E;
1370 		else if (isspace((unsigned char)p))
1371 			parser->state = ST_TAGA;
1372 		else
1373 			parser->state = ST_TAGAX;
1374 		return 1;
1375 	case ST_E:
1376 		/* In a-tag -- "<a hre" already found */
1377 		if (p == '>')
1378 			parser->state = ST_NONE;
1379 		else if (p == '"')
1380 			parser->state = ST_TAGAQ;
1381 		else if (p == 'f' || p == 'F')
1382 			parser->state = ST_F;
1383 		else if (isspace((unsigned char)p))
1384 			parser->state = ST_TAGA;
1385 		else
1386 			parser->state = ST_TAGAX;
1387 		return 1;
1388 	case ST_F:
1389 		/* In a-tag -- "<a href" already found */
1390 		if (p == '>')
1391 			parser->state = ST_NONE;
1392 		else if (p == '"')
1393 			parser->state = ST_TAGAQ;
1394 		else if (p == '=')
1395 			parser->state = ST_HREF;
1396 		else if (!isspace((unsigned char)p))
1397 			parser->state = ST_TAGAX;
1398 		return 1;
1399 	case ST_HREF:
1400 		/* In a-tag -- "<a href=" already found */
1401 		if (p == '>')
1402 			parser->state = ST_NONE;
1403 		else if (p == '"')
1404 			parser->state = ST_HREFQ;
1405 		else if (!isspace((unsigned char)p))
1406 			parser->state = ST_TAGA;
1407 		return 1;
1408 	case ST_HREFQ:
1409 		/* In href of the a-tag */
1410 		end_attr = memchr(buf, '"', len);
1411 		if (end_attr == NULL)
1412 			return 0;
1413 		*end_attr = '\0';
1414 		parser->state = ST_TAGA;
1415 		if (fetch_add_entry(parser->ue, parser->url, buf, 1))
1416 			return -1;
1417 		return end_attr + 1 - buf;
1418 	}
1419 	/* NOTREACHED */
1420 	abort();
1421 }
1422 
1423 struct http_index_cache {
1424 	struct http_index_cache *next;
1425 	struct url *location;
1426 	struct url_list ue;
1427 };
1428 
1429 static struct http_index_cache *index_cache;
1430 
1431 /*
1432  * List a directory
1433  */
1434 int
1435 /*ARGSUSED*/
fetchListHTTP(struct url_list * ue,struct url * url,const char * pattern __unused,const char * flags)1436 fetchListHTTP(struct url_list *ue, struct url *url, const char *pattern __unused, const char *flags)
1437 {
1438 	fetchIO *f;
1439 	char buf[2 * PATH_MAX];
1440 	size_t buf_len, sum_processed;
1441 	ssize_t read_len, processed;
1442 	struct index_parser state;
1443 	struct http_index_cache *cache = NULL;
1444 	int do_cache, ret;
1445 
1446 	do_cache = CHECK_FLAG('c');
1447 
1448 	if (do_cache) {
1449 		for (cache = index_cache; cache != NULL; cache = cache->next) {
1450 			if (strcmp(cache->location->scheme, url->scheme))
1451 				continue;
1452 			if (strcmp(cache->location->user, url->user))
1453 				continue;
1454 			if (strcmp(cache->location->pwd, url->pwd))
1455 				continue;
1456 			if (strcmp(cache->location->host, url->host))
1457 				continue;
1458 			if (cache->location->port != url->port)
1459 				continue;
1460 			if (strcmp(cache->location->doc, url->doc))
1461 				continue;
1462 			return fetchAppendURLList(ue, &cache->ue);
1463 		}
1464 
1465 		cache = malloc(sizeof(*cache));
1466 		fetchInitURLList(&cache->ue);
1467 		cache->location = fetchCopyURL(url);
1468 	}
1469 
1470 	f = fetchGetHTTP(url, flags);
1471 	if (f == NULL) {
1472 		if (do_cache) {
1473 			fetchFreeURLList(&cache->ue);
1474 			fetchFreeURL(cache->location);
1475 			free(cache);
1476 		}
1477 		return -1;
1478 	}
1479 
1480 	state.url = url;
1481 	state.state = ST_NONE;
1482 	if (do_cache) {
1483 		state.ue = &cache->ue;
1484 	} else {
1485 		state.ue = ue;
1486 	}
1487 
1488 	buf_len = 0;
1489 
1490 	while ((read_len = fetchIO_read(f, buf + buf_len, sizeof(buf) - buf_len)) > 0) {
1491 		buf_len += read_len;
1492 		sum_processed = 0;
1493 		do {
1494 			processed = parse_index(&state, buf + sum_processed, buf_len);
1495 			if (processed == -1)
1496 				break;
1497 			buf_len -= processed;
1498 			sum_processed += processed;
1499 		} while (processed != 0 && buf_len > 0);
1500 		if (processed == -1) {
1501 			read_len = -1;
1502 			break;
1503 		}
1504 		memmove(buf, buf + sum_processed, buf_len);
1505 	}
1506 
1507 	fetchIO_close(f);
1508 
1509 	ret = read_len < 0 ? -1 : 0;
1510 
1511 	if (do_cache) {
1512 		if (ret == 0) {
1513 			cache->next = index_cache;
1514 			index_cache = cache;
1515 		}
1516 
1517 		if (fetchAppendURLList(ue, &cache->ue))
1518 			ret = -1;
1519 	}
1520 
1521 	return ret;
1522 }
1523