xref: /dflybsd-src/lib/libfetch/http.c (revision 19b217afd6cebbfc1fc0639e368459f11526832c)
1 /*-
2  * Copyright (c) 2000-2011 Dag-Erling Smørgrav
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * The following copyright applies to the base64 code:
31  *
32  *-
33  * Copyright 1997 Massachusetts Institute of Technology
34  *
35  * Permission to use, copy, modify, and distribute this software and
36  * its documentation for any purpose and without fee is hereby
37  * granted, provided that both the above copyright notice and this
38  * permission notice appear in all copies, that both the above
39  * copyright notice and this permission notice appear in all
40  * supporting documentation, and that the name of M.I.T. not be used
41  * in advertising or publicity pertaining to distribution of the
42  * software without specific, written prior permission.  M.I.T. makes
43  * no representations about the suitability of this software for any
44  * purpose.  It is provided "as is" without express or implied
45  * warranty.
46  *
47  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
48  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
49  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
50  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
51  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
52  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
53  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
54  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
55  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
56  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
57  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  */
60 
61 #include <sys/param.h>
62 #include <sys/socket.h>
63 #include <sys/time.h>
64 
65 #include <ctype.h>
66 #include <err.h>
67 #include <errno.h>
68 #include <locale.h>
69 #include <netdb.h>
70 #include <stdarg.h>
71 #include <stdio.h>
72 #include <stdlib.h>
73 #include <string.h>
74 #include <time.h>
75 #include <unistd.h>
76 
77 #ifdef WITH_SSL
78 #include <openssl/md5.h>
79 #define MD5Init(c) MD5_Init(c)
80 #define MD5Update(c, data, len) MD5_Update(c, data, len)
81 #define MD5Final(md, c) MD5_Final(md, c)
82 #else
83 #include <md5.h>
84 #endif
85 
86 #include <netinet/in.h>
87 #include <netinet/tcp.h>
88 
89 #include "fetch.h"
90 #include "common.h"
91 #include "httperr.h"
92 
93 /* Maximum number of redirects to follow */
94 #define MAX_REDIRECT 20
95 
96 /* Symbolic names for reply codes we care about */
97 #define HTTP_OK			200
98 #define HTTP_PARTIAL		206
99 #define HTTP_MOVED_PERM		301
100 #define HTTP_MOVED_TEMP		302
101 #define HTTP_SEE_OTHER		303
102 #define HTTP_NOT_MODIFIED	304
103 #define HTTP_USE_PROXY		305
104 #define HTTP_TEMP_REDIRECT	307
105 #define HTTP_PERM_REDIRECT	308
106 #define HTTP_NEED_AUTH		401
107 #define HTTP_NEED_PROXY_AUTH	407
108 #define HTTP_BAD_RANGE		416
109 #define HTTP_PROTOCOL_ERROR	999
110 
111 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \
112 			    || (xyz) == HTTP_MOVED_TEMP \
113 			    || (xyz) == HTTP_TEMP_REDIRECT \
114 			    || (xyz) == HTTP_USE_PROXY \
115 			    || (xyz) == HTTP_SEE_OTHER)
116 
117 #define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599)
118 
119 
120 /*****************************************************************************
121  * I/O functions for decoding chunked streams
122  */
123 
124 struct httpio
125 {
126 	conn_t		*conn;		/* connection */
127 	int		 chunked;	/* chunked mode */
128 	char		*buf;		/* chunk buffer */
129 	size_t		 bufsize;	/* size of chunk buffer */
130 	ssize_t		 buflen;	/* amount of data currently in buffer */
131 	int		 bufpos;	/* current read offset in buffer */
132 	int		 eof;		/* end-of-file flag */
133 	int		 error;		/* error flag */
134 	size_t		 chunksize;	/* remaining size of current chunk */
135 #ifndef NDEBUG
136 	size_t		 total;
137 #endif
138 };
139 
140 static int http_cmd(conn_t *, const char *, ...) __printflike(2, 3);
141 
142 /*
143  * Get next chunk header
144  */
145 static int
146 http_new_chunk(struct httpio *io)
147 {
148 	char *p;
149 
150 	if (fetch_getln(io->conn) == -1)
151 		return (-1);
152 
153 	if (io->conn->buflen < 2 || !isxdigit((unsigned char)*io->conn->buf))
154 		return (-1);
155 
156 	for (p = io->conn->buf; *p && !isspace((unsigned char)*p); ++p) {
157 		if (*p == ';')
158 			break;
159 		if (!isxdigit((unsigned char)*p))
160 			return (-1);
161 		if (isdigit((unsigned char)*p)) {
162 			io->chunksize = io->chunksize * 16 +
163 			    *p - '0';
164 		} else {
165 			io->chunksize = io->chunksize * 16 +
166 			    10 + tolower((unsigned char)*p) - 'a';
167 		}
168 	}
169 
170 #ifndef NDEBUG
171 	if (fetchDebug) {
172 		io->total += io->chunksize;
173 		if (io->chunksize == 0)
174 			fprintf(stderr, "%s(): end of last chunk\n", __func__);
175 		else
176 			fprintf(stderr, "%s(): new chunk: %lu (%lu)\n",
177 			    __func__, (unsigned long)io->chunksize,
178 			    (unsigned long)io->total);
179 	}
180 #endif
181 
182 	return (io->chunksize);
183 }
184 
185 /*
186  * Grow the input buffer to at least len bytes
187  */
188 static inline int
189 http_growbuf(struct httpio *io, size_t len)
190 {
191 	char *tmp;
192 
193 	if (io->bufsize >= len)
194 		return (0);
195 
196 	if ((tmp = realloc(io->buf, len)) == NULL)
197 		return (-1);
198 	io->buf = tmp;
199 	io->bufsize = len;
200 	return (0);
201 }
202 
203 /*
204  * Fill the input buffer, do chunk decoding on the fly
205  */
206 static int
207 http_fillbuf(struct httpio *io, size_t len)
208 {
209 	ssize_t nbytes;
210 
211 	if (io->error)
212 		return (-1);
213 	if (io->eof)
214 		return (0);
215 
216 	if (io->chunked == 0) {
217 		if (http_growbuf(io, len) == -1)
218 			return (-1);
219 		if ((nbytes = fetch_read(io->conn, io->buf, len)) == -1) {
220 			io->error = errno;
221 			return (-1);
222 		}
223 		io->buflen = nbytes;
224 		io->bufpos = 0;
225 		return (io->buflen);
226 	}
227 
228 	if (io->chunksize == 0) {
229 		switch (http_new_chunk(io)) {
230 		case -1:
231 			io->error = 1;
232 			return (-1);
233 		case 0:
234 			io->eof = 1;
235 			return (0);
236 		}
237 	}
238 
239 	if (len > io->chunksize)
240 		len = io->chunksize;
241 	if (http_growbuf(io, len) == -1)
242 		return (-1);
243 	if ((nbytes = fetch_read(io->conn, io->buf, len)) == -1) {
244 		io->error = errno;
245 		return (-1);
246 	}
247 	io->buflen = nbytes;
248 	io->chunksize -= io->buflen;
249 
250 	if (io->chunksize == 0) {
251 		char endl[2];
252 
253 		if (fetch_read(io->conn, endl, 2) != 2 ||
254 		    endl[0] != '\r' || endl[1] != '\n')
255 			return (-1);
256 	}
257 
258 	io->bufpos = 0;
259 
260 	return (io->buflen);
261 }
262 
263 /*
264  * Read function
265  */
266 static int
267 http_readfn(void *v, char *buf, int len)
268 {
269 	struct httpio *io = (struct httpio *)v;
270 	int l, pos;
271 
272 	if (io->error)
273 		return (-1);
274 	if (io->eof)
275 		return (0);
276 
277 	for (pos = 0; len > 0; pos += l, len -= l) {
278 		/* empty buffer */
279 		if (!io->buf || io->bufpos == io->buflen)
280 			if (http_fillbuf(io, len) < 1)
281 				break;
282 		l = io->buflen - io->bufpos;
283 		if (len < l)
284 			l = len;
285 		memcpy(buf + pos, io->buf + io->bufpos, l);
286 		io->bufpos += l;
287 	}
288 
289 	if (!pos && io->error) {
290 		if (io->error == EINTR)
291 			io->error = 0;
292 		return (-1);
293 	}
294 	return (pos);
295 }
296 
297 /*
298  * Write function
299  */
300 static int
301 http_writefn(void *v, const char *buf, int len)
302 {
303 	struct httpio *io = (struct httpio *)v;
304 
305 	return (fetch_write(io->conn, buf, len));
306 }
307 
308 /*
309  * Close function
310  */
311 static int
312 http_closefn(void *v)
313 {
314 	struct httpio *io = (struct httpio *)v;
315 	int r;
316 
317 	r = fetch_close(io->conn);
318 	if (io->buf)
319 		free(io->buf);
320 	free(io);
321 	return (r);
322 }
323 
324 /*
325  * Wrap a file descriptor up
326  */
327 static FILE *
328 http_funopen(conn_t *conn, int chunked)
329 {
330 	struct httpio *io;
331 	FILE *f;
332 
333 	if ((io = calloc(1, sizeof(*io))) == NULL) {
334 		fetch_syserr();
335 		return (NULL);
336 	}
337 	io->conn = conn;
338 	io->chunked = chunked;
339 	f = funopen(io, http_readfn, http_writefn, NULL, http_closefn);
340 	if (f == NULL) {
341 		fetch_syserr();
342 		free(io);
343 		return (NULL);
344 	}
345 	return (f);
346 }
347 
348 
349 /*****************************************************************************
350  * Helper functions for talking to the server and parsing its replies
351  */
352 
353 /* Header types */
354 typedef enum {
355 	hdr_syserror = -2,
356 	hdr_error = -1,
357 	hdr_end = 0,
358 	hdr_unknown = 1,
359 	hdr_content_length,
360 	hdr_content_range,
361 	hdr_last_modified,
362 	hdr_location,
363 	hdr_transfer_encoding,
364 	hdr_www_authenticate,
365 	hdr_proxy_authenticate,
366 } hdr_t;
367 
368 /* Names of interesting headers */
369 static struct {
370 	hdr_t		 num;
371 	const char	*name;
372 } hdr_names[] = {
373 	{ hdr_content_length,		"Content-Length" },
374 	{ hdr_content_range,		"Content-Range" },
375 	{ hdr_last_modified,		"Last-Modified" },
376 	{ hdr_location,			"Location" },
377 	{ hdr_transfer_encoding,	"Transfer-Encoding" },
378 	{ hdr_www_authenticate,		"WWW-Authenticate" },
379 	{ hdr_proxy_authenticate,	"Proxy-Authenticate" },
380 	{ hdr_unknown,			NULL },
381 };
382 
383 /*
384  * Send a formatted line; optionally echo to terminal
385  */
386 static int
387 http_cmd(conn_t *conn, const char *fmt, ...)
388 {
389 	va_list ap;
390 	size_t len;
391 	char *msg;
392 	int r;
393 
394 	va_start(ap, fmt);
395 	len = vasprintf(&msg, fmt, ap);
396 	va_end(ap);
397 
398 	if (msg == NULL) {
399 		errno = ENOMEM;
400 		fetch_syserr();
401 		return (-1);
402 	}
403 
404 	r = fetch_putln(conn, msg, len);
405 	free(msg);
406 
407 	if (r == -1) {
408 		fetch_syserr();
409 		return (-1);
410 	}
411 
412 	return (0);
413 }
414 
415 /*
416  * Get and parse status line
417  */
418 static int
419 http_get_reply(conn_t *conn)
420 {
421 	char *p;
422 
423 	if (fetch_getln(conn) == -1)
424 		return (-1);
425 	/*
426 	 * A valid status line looks like "HTTP/m.n xyz reason" where m
427 	 * and n are the major and minor protocol version numbers and xyz
428 	 * is the reply code.
429 	 * Unfortunately, there are servers out there (NCSA 1.5.1, to name
430 	 * just one) that do not send a version number, so we can't rely
431 	 * on finding one, but if we do, insist on it being 1.0 or 1.1.
432 	 * We don't care about the reason phrase.
433 	 */
434 	if (strncmp(conn->buf, "HTTP", 4) != 0)
435 		return (HTTP_PROTOCOL_ERROR);
436 	p = conn->buf + 4;
437 	if (*p == '/') {
438 		if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1'))
439 			return (HTTP_PROTOCOL_ERROR);
440 		p += 4;
441 	}
442 	if (*p != ' ' ||
443 	    !isdigit((unsigned char)p[1]) ||
444 	    !isdigit((unsigned char)p[2]) ||
445 	    !isdigit((unsigned char)p[3]))
446 		return (HTTP_PROTOCOL_ERROR);
447 
448 	conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0');
449 	return (conn->err);
450 }
451 
452 /*
453  * Check a header; if the type matches the given string, return a pointer
454  * to the beginning of the value.
455  */
456 static const char *
457 http_match(const char *str, const char *hdr)
458 {
459 	while (*str && *hdr &&
460 	    tolower((unsigned char)*str++) == tolower((unsigned char)*hdr++))
461 		/* nothing */;
462 	if (*str || *hdr != ':')
463 		return (NULL);
464 	while (*hdr && isspace((unsigned char)*++hdr))
465 		/* nothing */;
466 	return (hdr);
467 }
468 
469 
470 /*
471  * Get the next header and return the appropriate symbolic code.  We
472  * need to read one line ahead for checking for a continuation line
473  * belonging to the current header (continuation lines start with
474  * white space).
475  *
476  * We get called with a fresh line already in the conn buffer, either
477  * from the previous http_next_header() invocation, or, the first
478  * time, from a fetch_getln() performed by our caller.
479  *
480  * This stops when we encounter an empty line (we dont read beyond the header
481  * area).
482  *
483  * Note that the "headerbuf" is just a place to return the result. Its
484  * contents are not used for the next call. This means that no cleanup
485  * is needed when ie doing another connection, just call the cleanup when
486  * fully done to deallocate memory.
487  */
488 
489 /* Limit the max number of continuation lines to some reasonable value */
490 #define HTTP_MAX_CONT_LINES 10
491 
492 /* Place into which to build a header from one or several lines */
493 typedef struct {
494 	char	*buf;		/* buffer */
495 	size_t	 bufsize;	/* buffer size */
496 	size_t	 buflen;	/* length of buffer contents */
497 } http_headerbuf_t;
498 
499 static void
500 init_http_headerbuf(http_headerbuf_t *buf)
501 {
502 	buf->buf = NULL;
503 	buf->bufsize = 0;
504 	buf->buflen = 0;
505 }
506 
507 static void
508 clean_http_headerbuf(http_headerbuf_t *buf)
509 {
510 	if (buf->buf)
511 		free(buf->buf);
512 	init_http_headerbuf(buf);
513 }
514 
515 /* Remove whitespace at the end of the buffer */
516 static void
517 http_conn_trimright(conn_t *conn)
518 {
519 	while (conn->buflen &&
520 	       isspace((unsigned char)conn->buf[conn->buflen - 1]))
521 		conn->buflen--;
522 	conn->buf[conn->buflen] = '\0';
523 }
524 
525 static hdr_t
526 http_next_header(conn_t *conn, http_headerbuf_t *hbuf, const char **p)
527 {
528 	unsigned int i, len;
529 
530 	/*
531 	 * Have to do the stripping here because of the first line. So
532 	 * it's done twice for the subsequent lines. No big deal
533 	 */
534 	http_conn_trimright(conn);
535 	if (conn->buflen == 0)
536 		return (hdr_end);
537 
538 	/* Copy the line to the headerbuf */
539 	if (hbuf->bufsize < conn->buflen + 1) {
540 		if ((hbuf->buf = realloc(hbuf->buf, conn->buflen + 1)) == NULL)
541 			return (hdr_syserror);
542 		hbuf->bufsize = conn->buflen + 1;
543 	}
544 	strcpy(hbuf->buf, conn->buf);
545 	hbuf->buflen = conn->buflen;
546 
547 	/*
548 	 * Fetch possible continuation lines. Stop at 1st non-continuation
549 	 * and leave it in the conn buffer
550 	 */
551 	for (i = 0; i < HTTP_MAX_CONT_LINES; i++) {
552 		if (fetch_getln(conn) == -1)
553 			return (hdr_syserror);
554 
555 		/*
556 		 * Note: we carry on the idea from the previous version
557 		 * that a pure whitespace line is equivalent to an empty
558 		 * one (so it's not continuation and will be handled when
559 		 * we are called next)
560 		 */
561 		http_conn_trimright(conn);
562 		if (conn->buf[0] != ' ' && conn->buf[0] != "\t"[0])
563 			break;
564 
565 		/* Got a continuation line. Concatenate to previous */
566 		len = hbuf->buflen + conn->buflen;
567 		if (hbuf->bufsize < len + 1) {
568 			len *= 2;
569 			if ((hbuf->buf = realloc(hbuf->buf, len + 1)) == NULL)
570 				return (hdr_syserror);
571 			hbuf->bufsize = len + 1;
572 		}
573 		strcpy(hbuf->buf + hbuf->buflen, conn->buf);
574 		hbuf->buflen += conn->buflen;
575 	}
576 
577 	/*
578 	 * We could check for malformed headers but we don't really care.
579 	 * A valid header starts with a token immediately followed by a
580 	 * colon; a token is any sequence of non-control, non-whitespace
581 	 * characters except "()<>@,;:\\\"{}".
582 	 */
583 	for (i = 0; hdr_names[i].num != hdr_unknown; i++)
584 		if ((*p = http_match(hdr_names[i].name, hbuf->buf)) != NULL)
585 			return (hdr_names[i].num);
586 
587 	return (hdr_unknown);
588 }
589 
590 /**************************
591  * [Proxy-]Authenticate header parsing
592  */
593 
594 /*
595  * Read doublequote-delimited string into output buffer obuf (allocated
596  * by caller, whose responsibility it is to ensure that it's big enough)
597  * cp points to the first char after the initial '"'
598  * Handles \ quoting
599  * Returns pointer to the first char after the terminating double quote, or
600  * NULL for error.
601  */
602 static const char *
603 http_parse_headerstring(const char *cp, char *obuf)
604 {
605 	for (;;) {
606 		switch (*cp) {
607 		case 0: /* Unterminated string */
608 			*obuf = 0;
609 			return (NULL);
610 		case '"': /* Ending quote */
611 			*obuf = 0;
612 			return (++cp);
613 		case '\\':
614 			if (*++cp == 0) {
615 				*obuf = 0;
616 				return (NULL);
617 			}
618 			/* FALLTHROUGH */
619 		default:
620 			*obuf++ = *cp++;
621 		}
622 	}
623 }
624 
625 /* Http auth challenge schemes */
626 typedef enum {HTTPAS_UNKNOWN, HTTPAS_BASIC,HTTPAS_DIGEST} http_auth_schemes_t;
627 
628 /* Data holder for a Basic or Digest challenge. */
629 typedef struct {
630 	http_auth_schemes_t scheme;
631 	char	*realm;
632 	char	*qop;
633 	char	*nonce;
634 	char	*opaque;
635 	char	*algo;
636 	int	 stale;
637 	int	 nc; /* Nonce count */
638 } http_auth_challenge_t;
639 
640 static void
641 init_http_auth_challenge(http_auth_challenge_t *b)
642 {
643 	b->scheme = HTTPAS_UNKNOWN;
644 	b->realm = b->qop = b->nonce = b->opaque = b->algo = NULL;
645 	b->stale = b->nc = 0;
646 }
647 
648 static void
649 clean_http_auth_challenge(http_auth_challenge_t *b)
650 {
651 	if (b->realm)
652 		free(b->realm);
653 	if (b->qop)
654 		free(b->qop);
655 	if (b->nonce)
656 		free(b->nonce);
657 	if (b->opaque)
658 		free(b->opaque);
659 	if (b->algo)
660 		free(b->algo);
661 	init_http_auth_challenge(b);
662 }
663 
664 /* Data holder for an array of challenges offered in an http response. */
665 #define MAX_CHALLENGES 10
666 typedef struct {
667 	http_auth_challenge_t *challenges[MAX_CHALLENGES];
668 	int	count; /* Number of parsed challenges in the array */
669 	int	valid; /* We did parse an authenticate header */
670 } http_auth_challenges_t;
671 
672 static void
673 init_http_auth_challenges(http_auth_challenges_t *cs)
674 {
675 	int i;
676 	for (i = 0; i < MAX_CHALLENGES; i++)
677 		cs->challenges[i] = NULL;
678 	cs->count = cs->valid = 0;
679 }
680 
681 static void
682 clean_http_auth_challenges(http_auth_challenges_t *cs)
683 {
684 	int i;
685 	/* We rely on non-zero pointers being allocated, not on the count */
686 	for (i = 0; i < MAX_CHALLENGES; i++) {
687 		if (cs->challenges[i] != NULL) {
688 			clean_http_auth_challenge(cs->challenges[i]);
689 			free(cs->challenges[i]);
690 		}
691 	}
692 	init_http_auth_challenges(cs);
693 }
694 
695 /*
696  * Enumeration for lexical elements. Separators will be returned as their own
697  * ascii value
698  */
699 typedef enum {HTTPHL_WORD=256, HTTPHL_STRING=257, HTTPHL_END=258,
700 	      HTTPHL_ERROR = 259} http_header_lex_t;
701 
702 /*
703  * Determine what kind of token comes next and return possible value
704  * in buf, which is supposed to have been allocated big enough by
705  * caller. Advance input pointer and return element type.
706  */
707 static int
708 http_header_lex(const char **cpp, char *buf)
709 {
710 	size_t l;
711 	/* Eat initial whitespace */
712 	*cpp += strspn(*cpp, " \t");
713 	if (**cpp == 0)
714 		return (HTTPHL_END);
715 
716 	/* Separator ? */
717 	if (**cpp == ',' || **cpp == '=')
718 		return (*((*cpp)++));
719 
720 	/* String ? */
721 	if (**cpp == '"') {
722 		*cpp = http_parse_headerstring(++*cpp, buf);
723 		if (*cpp == NULL)
724 			return (HTTPHL_ERROR);
725 		return (HTTPHL_STRING);
726 	}
727 
728 	/* Read other token, until separator or whitespace */
729 	l = strcspn(*cpp, " \t,=");
730 	memcpy(buf, *cpp, l);
731 	buf[l] = 0;
732 	*cpp += l;
733 	return (HTTPHL_WORD);
734 }
735 
736 /*
737  * Read challenges from http xxx-authenticate header and accumulate them
738  * in the challenges list structure.
739  *
740  * Headers with multiple challenges are specified by rfc2617, but
741  * servers (ie: squid) often send them in separate headers instead,
742  * which in turn is forbidden by the http spec (multiple headers with
743  * the same name are only allowed for pure comma-separated lists, see
744  * rfc2616 sec 4.2).
745  *
746  * We support both approaches anyway
747  */
748 static int
749 http_parse_authenticate(const char *cp, http_auth_challenges_t *cs)
750 {
751 	int ret = -1;
752 	http_header_lex_t lex;
753 	char *key = malloc(strlen(cp) + 1);
754 	char *value = malloc(strlen(cp) + 1);
755 	char *buf = malloc(strlen(cp) + 1);
756 
757 	if (key == NULL || value == NULL || buf == NULL) {
758 		fetch_syserr();
759 		goto out;
760 	}
761 
762 	/* In any case we've seen the header and we set the valid bit */
763 	cs->valid = 1;
764 
765 	/* Need word first */
766 	lex = http_header_lex(&cp, key);
767 	if (lex != HTTPHL_WORD)
768 		goto out;
769 
770 	/* Loop on challenges */
771 	for (; cs->count < MAX_CHALLENGES; cs->count++) {
772 		cs->challenges[cs->count] =
773 			malloc(sizeof(http_auth_challenge_t));
774 		if (cs->challenges[cs->count] == NULL) {
775 			fetch_syserr();
776 			goto out;
777 		}
778 		init_http_auth_challenge(cs->challenges[cs->count]);
779 		if (!strcasecmp(key, "basic")) {
780 			cs->challenges[cs->count]->scheme = HTTPAS_BASIC;
781 		} else if (!strcasecmp(key, "digest")) {
782 			cs->challenges[cs->count]->scheme = HTTPAS_DIGEST;
783 		} else {
784 			cs->challenges[cs->count]->scheme = HTTPAS_UNKNOWN;
785 			/*
786 			 * Continue parsing as basic or digest may
787 			 * follow, and the syntax is the same for
788 			 * all. We'll just ignore this one when
789 			 * looking at the list
790 			 */
791 		}
792 
793 		/* Loop on attributes */
794 		for (;;) {
795 			/* Key */
796 			lex = http_header_lex(&cp, key);
797 			if (lex != HTTPHL_WORD)
798 				goto out;
799 
800 			/* Equal sign */
801 			lex = http_header_lex(&cp, buf);
802 			if (lex != '=')
803 				goto out;
804 
805 			/* Value */
806 			lex = http_header_lex(&cp, value);
807 			if (lex != HTTPHL_WORD && lex != HTTPHL_STRING)
808 				goto out;
809 
810 			if (!strcasecmp(key, "realm"))
811 				cs->challenges[cs->count]->realm =
812 					strdup(value);
813 			else if (!strcasecmp(key, "qop"))
814 				cs->challenges[cs->count]->qop =
815 					strdup(value);
816 			else if (!strcasecmp(key, "nonce"))
817 				cs->challenges[cs->count]->nonce =
818 					strdup(value);
819 			else if (!strcasecmp(key, "opaque"))
820 				cs->challenges[cs->count]->opaque =
821 					strdup(value);
822 			else if (!strcasecmp(key, "algorithm"))
823 				cs->challenges[cs->count]->algo =
824 					strdup(value);
825 			else if (!strcasecmp(key, "stale"))
826 				cs->challenges[cs->count]->stale =
827 					strcasecmp(value, "no");
828 			/* Else ignore unknown attributes */
829 
830 			/* Comma or Next challenge or End */
831 			lex = http_header_lex(&cp, key);
832 			/*
833 			 * If we get a word here, this is the beginning of the
834 			 * next challenge. Break the attributes loop
835 			 */
836 			if (lex == HTTPHL_WORD)
837 				break;
838 
839 			if (lex == HTTPHL_END) {
840 				/* End while looking for ',' is normal exit */
841 				cs->count++;
842 				ret = 0;
843 				goto out;
844 			}
845 			/* Anything else is an error */
846 			if (lex != ',')
847 				goto out;
848 
849 		} /* End attributes loop */
850 	} /* End challenge loop */
851 
852 	/*
853 	 * Challenges max count exceeded. This really can't happen
854 	 * with normal data, something's fishy -> error
855 	 */
856 
857 out:
858 	if (key)
859 		free(key);
860 	if (value)
861 		free(value);
862 	if (buf)
863 		free(buf);
864 	return (ret);
865 }
866 
867 
868 /*
869  * Parse a last-modified header
870  */
871 static int
872 http_parse_mtime(const char *p, time_t *mtime)
873 {
874 	char locale[64], *r;
875 	struct tm tm;
876 
877 	strncpy(locale, setlocale(LC_TIME, NULL), sizeof(locale));
878 	setlocale(LC_TIME, "C");
879 	r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
880 	/* XXX should add support for date-2 and date-3 */
881 	setlocale(LC_TIME, locale);
882 	if (r == NULL)
883 		return (-1);
884 	DEBUG(fprintf(stderr, "last modified: [%04d-%02d-%02d "
885 		  "%02d:%02d:%02d]\n",
886 		  tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
887 		  tm.tm_hour, tm.tm_min, tm.tm_sec));
888 	*mtime = timegm(&tm);
889 	return (0);
890 }
891 
892 /*
893  * Parse a content-length header
894  */
895 static int
896 http_parse_length(const char *p, off_t *length)
897 {
898 	off_t len;
899 
900 	for (len = 0; *p && isdigit((unsigned char)*p); ++p)
901 		len = len * 10 + (*p - '0');
902 	if (*p)
903 		return (-1);
904 	DEBUG(fprintf(stderr, "content length: [%lld]\n",
905 	    (long long)len));
906 	*length = len;
907 	return (0);
908 }
909 
910 /*
911  * Parse a content-range header
912  */
913 static int
914 http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size)
915 {
916 	off_t first, last, len;
917 
918 	if (strncasecmp(p, "bytes ", 6) != 0)
919 		return (-1);
920 	p += 6;
921 	if (*p == '*') {
922 		first = last = -1;
923 		++p;
924 	} else {
925 		for (first = 0; *p && isdigit((unsigned char)*p); ++p)
926 			first = first * 10 + *p - '0';
927 		if (*p != '-')
928 			return (-1);
929 		for (last = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
930 			last = last * 10 + *p - '0';
931 	}
932 	if (first > last || *p != '/')
933 		return (-1);
934 	for (len = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
935 		len = len * 10 + *p - '0';
936 	if (*p || len < last - first + 1)
937 		return (-1);
938 	if (first == -1) {
939 		DEBUG(fprintf(stderr, "content range: [*/%lld]\n",
940 		    (long long)len));
941 		*length = 0;
942 	} else {
943 		DEBUG(fprintf(stderr, "content range: [%lld-%lld/%lld]\n",
944 		    (long long)first, (long long)last, (long long)len));
945 		*length = last - first + 1;
946 	}
947 	*offset = first;
948 	*size = len;
949 	return (0);
950 }
951 
952 
953 /*****************************************************************************
954  * Helper functions for authorization
955  */
956 
957 /*
958  * Base64 encoding
959  */
960 static char *
961 http_base64(const char *src)
962 {
963 	static const char base64[] =
964 	    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
965 	    "abcdefghijklmnopqrstuvwxyz"
966 	    "0123456789+/";
967 	char *str, *dst;
968 	size_t l;
969 	int t, r;
970 
971 	l = strlen(src);
972 	if ((str = malloc(((l + 2) / 3) * 4 + 1)) == NULL)
973 		return (NULL);
974 	dst = str;
975 	r = 0;
976 
977 	while (l >= 3) {
978 		t = (src[0] << 16) | (src[1] << 8) | src[2];
979 		dst[0] = base64[(t >> 18) & 0x3f];
980 		dst[1] = base64[(t >> 12) & 0x3f];
981 		dst[2] = base64[(t >> 6) & 0x3f];
982 		dst[3] = base64[(t >> 0) & 0x3f];
983 		src += 3; l -= 3;
984 		dst += 4; r += 4;
985 	}
986 
987 	switch (l) {
988 	case 2:
989 		t = (src[0] << 16) | (src[1] << 8);
990 		dst[0] = base64[(t >> 18) & 0x3f];
991 		dst[1] = base64[(t >> 12) & 0x3f];
992 		dst[2] = base64[(t >> 6) & 0x3f];
993 		dst[3] = '=';
994 		dst += 4;
995 		r += 4;
996 		break;
997 	case 1:
998 		t = src[0] << 16;
999 		dst[0] = base64[(t >> 18) & 0x3f];
1000 		dst[1] = base64[(t >> 12) & 0x3f];
1001 		dst[2] = dst[3] = '=';
1002 		dst += 4;
1003 		r += 4;
1004 		break;
1005 	case 0:
1006 		break;
1007 	}
1008 
1009 	*dst = 0;
1010 	return (str);
1011 }
1012 
1013 
1014 /*
1015  * Extract authorization parameters from environment value.
1016  * The value is like scheme:realm:user:pass
1017  */
1018 typedef struct {
1019 	char	*scheme;
1020 	char	*realm;
1021 	char	*user;
1022 	char	*password;
1023 } http_auth_params_t;
1024 
1025 static void
1026 init_http_auth_params(http_auth_params_t *s)
1027 {
1028 	s->scheme = s->realm = s->user = s->password = NULL;
1029 }
1030 
1031 static void
1032 clean_http_auth_params(http_auth_params_t *s)
1033 {
1034 	if (s->scheme)
1035 		free(s->scheme);
1036 	if (s->realm)
1037 		free(s->realm);
1038 	if (s->user)
1039 		free(s->user);
1040 	if (s->password)
1041 		free(s->password);
1042 	init_http_auth_params(s);
1043 }
1044 
1045 static int
1046 http_authfromenv(const char *p, http_auth_params_t *parms)
1047 {
1048 	int ret = -1;
1049 	char *v, *ve;
1050 	char *str = strdup(p);
1051 
1052 	if (str == NULL) {
1053 		fetch_syserr();
1054 		return (-1);
1055 	}
1056 	v = str;
1057 
1058 	if ((ve = strchr(v, ':')) == NULL)
1059 		goto out;
1060 
1061 	*ve = 0;
1062 	if ((parms->scheme = strdup(v)) == NULL) {
1063 		fetch_syserr();
1064 		goto out;
1065 	}
1066 	v = ve + 1;
1067 
1068 	if ((ve = strchr(v, ':')) == NULL)
1069 		goto out;
1070 
1071 	*ve = 0;
1072 	if ((parms->realm = strdup(v)) == NULL) {
1073 		fetch_syserr();
1074 		goto out;
1075 	}
1076 	v = ve + 1;
1077 
1078 	if ((ve = strchr(v, ':')) == NULL)
1079 		goto out;
1080 
1081 	*ve = 0;
1082 	if ((parms->user = strdup(v)) == NULL) {
1083 		fetch_syserr();
1084 		goto out;
1085 	}
1086 	v = ve + 1;
1087 
1088 
1089 	if ((parms->password = strdup(v)) == NULL) {
1090 		fetch_syserr();
1091 		goto out;
1092 	}
1093 	ret = 0;
1094 out:
1095 	if (ret == -1)
1096 		clean_http_auth_params(parms);
1097 	if (str)
1098 		free(str);
1099 	return (ret);
1100 }
1101 
1102 
1103 /*
1104  * Digest response: the code to compute the digest is taken from the
1105  * sample implementation in RFC2616
1106  */
1107 #define IN const
1108 #define OUT
1109 
1110 #define HASHLEN 16
1111 typedef char HASH[HASHLEN];
1112 #define HASHHEXLEN 32
1113 typedef char HASHHEX[HASHHEXLEN+1];
1114 
1115 static const char *hexchars = "0123456789abcdef";
1116 static void
1117 CvtHex(IN HASH Bin, OUT HASHHEX Hex)
1118 {
1119 	unsigned short i;
1120 	unsigned char j;
1121 
1122 	for (i = 0; i < HASHLEN; i++) {
1123 		j = (Bin[i] >> 4) & 0xf;
1124 		Hex[i*2] = hexchars[j];
1125 		j = Bin[i] & 0xf;
1126 		Hex[i*2+1] = hexchars[j];
1127 	};
1128 	Hex[HASHHEXLEN] = '\0';
1129 };
1130 
1131 /* calculate H(A1) as per spec */
1132 static void
1133 DigestCalcHA1(
1134 	IN char * pszAlg,
1135 	IN char * pszUserName,
1136 	IN char * pszRealm,
1137 	IN char * pszPassword,
1138 	IN char * pszNonce,
1139 	IN char * pszCNonce,
1140 	OUT HASHHEX SessionKey
1141 	)
1142 {
1143 	MD5_CTX Md5Ctx;
1144 	HASH HA1;
1145 
1146 	MD5Init(&Md5Ctx);
1147 	MD5Update(&Md5Ctx, pszUserName, strlen(pszUserName));
1148 	MD5Update(&Md5Ctx, ":", 1);
1149 	MD5Update(&Md5Ctx, pszRealm, strlen(pszRealm));
1150 	MD5Update(&Md5Ctx, ":", 1);
1151 	MD5Update(&Md5Ctx, pszPassword, strlen(pszPassword));
1152 	MD5Final(HA1, &Md5Ctx);
1153 	if (strcasecmp(pszAlg, "md5-sess") == 0) {
1154 
1155 		MD5Init(&Md5Ctx);
1156 		MD5Update(&Md5Ctx, HA1, HASHLEN);
1157 		MD5Update(&Md5Ctx, ":", 1);
1158 		MD5Update(&Md5Ctx, pszNonce, strlen(pszNonce));
1159 		MD5Update(&Md5Ctx, ":", 1);
1160 		MD5Update(&Md5Ctx, pszCNonce, strlen(pszCNonce));
1161 		MD5Final(HA1, &Md5Ctx);
1162 	};
1163 	CvtHex(HA1, SessionKey);
1164 }
1165 
1166 /* calculate request-digest/response-digest as per HTTP Digest spec */
1167 static void
1168 DigestCalcResponse(
1169 	IN HASHHEX HA1,           /* H(A1) */
1170 	IN char * pszNonce,       /* nonce from server */
1171 	IN char * pszNonceCount,  /* 8 hex digits */
1172 	IN char * pszCNonce,      /* client nonce */
1173 	IN char * pszQop,         /* qop-value: "", "auth", "auth-int" */
1174 	IN char * pszMethod,      /* method from the request */
1175 	IN char * pszDigestUri,   /* requested URL */
1176 	IN HASHHEX HEntity,       /* H(entity body) if qop="auth-int" */
1177 	OUT HASHHEX Response      /* request-digest or response-digest */
1178 	)
1179 {
1180 /*	DEBUG(fprintf(stderr,
1181 		      "Calc: HA1[%s] Nonce[%s] qop[%s] method[%s] URI[%s]\n",
1182 		      HA1, pszNonce, pszQop, pszMethod, pszDigestUri));*/
1183 	MD5_CTX Md5Ctx;
1184 	HASH HA2;
1185 	HASH RespHash;
1186 	HASHHEX HA2Hex;
1187 
1188 	// calculate H(A2)
1189 	MD5Init(&Md5Ctx);
1190 	MD5Update(&Md5Ctx, pszMethod, strlen(pszMethod));
1191 	MD5Update(&Md5Ctx, ":", 1);
1192 	MD5Update(&Md5Ctx, pszDigestUri, strlen(pszDigestUri));
1193 	if (strcasecmp(pszQop, "auth-int") == 0) {
1194 		MD5Update(&Md5Ctx, ":", 1);
1195 		MD5Update(&Md5Ctx, HEntity, HASHHEXLEN);
1196 	};
1197 	MD5Final(HA2, &Md5Ctx);
1198 	CvtHex(HA2, HA2Hex);
1199 
1200 	// calculate response
1201 	MD5Init(&Md5Ctx);
1202 	MD5Update(&Md5Ctx, HA1, HASHHEXLEN);
1203 	MD5Update(&Md5Ctx, ":", 1);
1204 	MD5Update(&Md5Ctx, pszNonce, strlen(pszNonce));
1205 	MD5Update(&Md5Ctx, ":", 1);
1206 	if (*pszQop) {
1207 		MD5Update(&Md5Ctx, pszNonceCount, strlen(pszNonceCount));
1208 		MD5Update(&Md5Ctx, ":", 1);
1209 		MD5Update(&Md5Ctx, pszCNonce, strlen(pszCNonce));
1210 		MD5Update(&Md5Ctx, ":", 1);
1211 		MD5Update(&Md5Ctx, pszQop, strlen(pszQop));
1212 		MD5Update(&Md5Ctx, ":", 1);
1213 	};
1214 	MD5Update(&Md5Ctx, HA2Hex, HASHHEXLEN);
1215 	MD5Final(RespHash, &Md5Ctx);
1216 	CvtHex(RespHash, Response);
1217 }
1218 
1219 /*
1220  * Generate/Send a Digest authorization header
1221  * This looks like: [Proxy-]Authorization: credentials
1222  *
1223  *  credentials      = "Digest" digest-response
1224  *  digest-response  = 1#( username | realm | nonce | digest-uri
1225  *                      | response | [ algorithm ] | [cnonce] |
1226  *                      [opaque] | [message-qop] |
1227  *                          [nonce-count]  | [auth-param] )
1228  *  username         = "username" "=" username-value
1229  *  username-value   = quoted-string
1230  *  digest-uri       = "uri" "=" digest-uri-value
1231  *  digest-uri-value = request-uri   ; As specified by HTTP/1.1
1232  *  message-qop      = "qop" "=" qop-value
1233  *  cnonce           = "cnonce" "=" cnonce-value
1234  *  cnonce-value     = nonce-value
1235  *  nonce-count      = "nc" "=" nc-value
1236  *  nc-value         = 8LHEX
1237  *  response         = "response" "=" request-digest
1238  *  request-digest = <"> 32LHEX <">
1239  */
1240 static int
1241 http_digest_auth(conn_t *conn, const char *hdr, http_auth_challenge_t *c,
1242 		 http_auth_params_t *parms, struct url *url)
1243 {
1244 	int r;
1245 	char noncecount[10];
1246 	char cnonce[40];
1247 	char *options = NULL;
1248 
1249 	if (!c->realm || !c->nonce) {
1250 		DEBUG(fprintf(stderr, "realm/nonce not set in challenge\n"));
1251 		return(-1);
1252 	}
1253 	if (!c->algo)
1254 		c->algo = strdup("");
1255 
1256 	if (asprintf(&options, "%s%s%s%s",
1257 		     *c->algo? ",algorithm=" : "", c->algo,
1258 		     c->opaque? ",opaque=" : "", c->opaque?c->opaque:"")== -1)
1259 		return (-1);
1260 
1261 	if (!c->qop) {
1262 		c->qop = strdup("");
1263 		*noncecount = 0;
1264 		*cnonce = 0;
1265 	} else {
1266 		c->nc++;
1267 		sprintf(noncecount, "%08x", c->nc);
1268 		/* We don't try very hard with the cnonce ... */
1269 		sprintf(cnonce, "%x%lx", getpid(), (unsigned long)time(0));
1270 	}
1271 
1272 	HASHHEX HA1;
1273 	DigestCalcHA1(c->algo, parms->user, c->realm,
1274 		      parms->password, c->nonce, cnonce, HA1);
1275 	DEBUG(fprintf(stderr, "HA1: [%s]\n", HA1));
1276 	HASHHEX digest;
1277 	DigestCalcResponse(HA1, c->nonce, noncecount, cnonce, c->qop,
1278 			   "GET", url->doc, "", digest);
1279 
1280 	if (c->qop[0]) {
1281 		r = http_cmd(conn, "%s: Digest username=\"%s\",realm=\"%s\","
1282 			     "nonce=\"%s\",uri=\"%s\",response=\"%s\","
1283 			     "qop=\"auth\", cnonce=\"%s\", nc=%s%s",
1284 			     hdr, parms->user, c->realm,
1285 			     c->nonce, url->doc, digest,
1286 			     cnonce, noncecount, options);
1287 	} else {
1288 		r = http_cmd(conn, "%s: Digest username=\"%s\",realm=\"%s\","
1289 			     "nonce=\"%s\",uri=\"%s\",response=\"%s\"%s",
1290 			     hdr, parms->user, c->realm,
1291 			     c->nonce, url->doc, digest, options);
1292 	}
1293 	if (options)
1294 		free(options);
1295 	return (r);
1296 }
1297 
1298 /*
1299  * Encode username and password
1300  */
1301 static int
1302 http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd)
1303 {
1304 	char *upw, *auth;
1305 	int r;
1306 
1307 	DEBUG(fprintf(stderr, "basic: usr: [%s]\n", usr));
1308 	DEBUG(fprintf(stderr, "basic: pwd: [%s]\n", pwd));
1309 	if (asprintf(&upw, "%s:%s", usr, pwd) == -1)
1310 		return (-1);
1311 	auth = http_base64(upw);
1312 	free(upw);
1313 	if (auth == NULL)
1314 		return (-1);
1315 	r = http_cmd(conn, "%s: Basic %s", hdr, auth);
1316 	free(auth);
1317 	return (r);
1318 }
1319 
1320 /*
1321  * Chose the challenge to answer and call the appropriate routine to
1322  * produce the header.
1323  */
1324 static int
1325 http_authorize(conn_t *conn, const char *hdr, http_auth_challenges_t *cs,
1326 	       http_auth_params_t *parms, struct url *url)
1327 {
1328 	http_auth_challenge_t *digest = NULL;
1329 	int i;
1330 
1331 	/* If user or pass are null we're not happy */
1332 	if (!parms->user || !parms->password) {
1333 		DEBUG(fprintf(stderr, "NULL usr or pass\n"));
1334 		return (-1);
1335 	}
1336 
1337 	/* Look for a Digest */
1338 	for (i = 0; i < cs->count; i++) {
1339 		if (cs->challenges[i]->scheme == HTTPAS_DIGEST)
1340 			digest = cs->challenges[i];
1341 	}
1342 
1343 	/* Error if "Digest" was specified and there is no Digest challenge */
1344 	if (!digest && (parms->scheme &&
1345 			!strcasecmp(parms->scheme, "digest"))) {
1346 		DEBUG(fprintf(stderr,
1347 			      "Digest auth in env, not supported by peer\n"));
1348 		return (-1);
1349 	}
1350 	/*
1351 	 * If "basic" was specified in the environment, or there is no Digest
1352 	 * challenge, do the basic thing. Don't need a challenge for this,
1353 	 * so no need to check basic!=NULL
1354 	 */
1355 	if (!digest || (parms->scheme && !strcasecmp(parms->scheme,"basic")))
1356 		return (http_basic_auth(conn,hdr,parms->user,parms->password));
1357 
1358 	/* Else, prefer digest. We just checked that it's not NULL */
1359 	return (http_digest_auth(conn, hdr, digest, parms, url));
1360 }
1361 
1362 /*****************************************************************************
1363  * Helper functions for connecting to a server or proxy
1364  */
1365 
1366 /*
1367  * Connect to the correct HTTP server or proxy.
1368  */
1369 static conn_t *
1370 http_connect(struct url *URL, struct url *purl, const char *flags)
1371 {
1372 	conn_t *conn;
1373 	int verbose;
1374 	int af, val;
1375 
1376 #ifdef INET6
1377 	af = AF_UNSPEC;
1378 #else
1379 	af = AF_INET;
1380 #endif
1381 
1382 	verbose = CHECK_FLAG('v');
1383 	if (CHECK_FLAG('4'))
1384 		af = AF_INET;
1385 #ifdef INET6
1386 	else if (CHECK_FLAG('6'))
1387 		af = AF_INET6;
1388 #endif
1389 
1390 	if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) {
1391 		URL = purl;
1392 	} else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
1393 		/* can't talk http to an ftp server */
1394 		/* XXX should set an error code */
1395 		return (NULL);
1396 	}
1397 
1398 	if ((conn = fetch_connect(URL->host, URL->port, af, verbose)) == NULL)
1399 		/* fetch_connect() has already set an error code */
1400 		return (NULL);
1401 	if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 &&
1402 	    fetch_ssl(conn, verbose) == -1) {
1403 		fetch_close(conn);
1404 		/* grrr */
1405 		errno = EAUTH;
1406 		fetch_syserr();
1407 		return (NULL);
1408 	}
1409 
1410 	val = 1;
1411 	setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val));
1412 
1413 	return (conn);
1414 }
1415 
1416 static struct url *
1417 http_get_proxy(struct url * url, const char *flags)
1418 {
1419 	struct url *purl;
1420 	char *p;
1421 
1422 	if (flags != NULL && strchr(flags, 'd') != NULL)
1423 		return (NULL);
1424 	if (fetch_no_proxy_match(url->host))
1425 		return (NULL);
1426 	if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) &&
1427 	    *p && (purl = fetchParseURL(p))) {
1428 		if (!*purl->scheme)
1429 			strcpy(purl->scheme, SCHEME_HTTP);
1430 		if (!purl->port)
1431 			purl->port = fetch_default_proxy_port(purl->scheme);
1432 		if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0)
1433 			return (purl);
1434 		fetchFreeURL(purl);
1435 	}
1436 	return (NULL);
1437 }
1438 
1439 static void
1440 http_print_html(FILE *out, FILE *in)
1441 {
1442 	size_t len;
1443 	char *line, *p, *q;
1444 	int comment, tag;
1445 
1446 	comment = tag = 0;
1447 	while ((line = fgetln(in, &len)) != NULL) {
1448 		while (len && isspace((unsigned char)line[len - 1]))
1449 			--len;
1450 		for (p = q = line; q < line + len; ++q) {
1451 			if (comment && *q == '-') {
1452 				if (q + 2 < line + len &&
1453 				    strcmp(q, "-->") == 0) {
1454 					tag = comment = 0;
1455 					q += 2;
1456 				}
1457 			} else if (tag && !comment && *q == '>') {
1458 				p = q + 1;
1459 				tag = 0;
1460 			} else if (!tag && *q == '<') {
1461 				if (q > p)
1462 					fwrite(p, q - p, 1, out);
1463 				tag = 1;
1464 				if (q + 3 < line + len &&
1465 				    strcmp(q, "<!--") == 0) {
1466 					comment = 1;
1467 					q += 3;
1468 				}
1469 			}
1470 		}
1471 		if (!tag && q > p)
1472 			fwrite(p, q - p, 1, out);
1473 		fputc('\n', out);
1474 	}
1475 }
1476 
1477 
1478 /*****************************************************************************
1479  * Core
1480  */
1481 
1482 /*
1483  * Send a request and process the reply
1484  *
1485  * XXX This function is way too long, the do..while loop should be split
1486  * XXX off into a separate function.
1487  */
1488 FILE *
1489 http_request(struct url *URL, const char *op, struct url_stat *us,
1490 	struct url *purl, const char *flags)
1491 {
1492 	char timebuf[80];
1493 	char hbuf[MAXHOSTNAMELEN + 7], *host;
1494 	conn_t *conn;
1495 	struct url *url, *new;
1496 	int chunked, direct, ims, noredirect, verbose;
1497 	int e, i, n, val;
1498 	off_t offset, clength, length, size;
1499 	time_t mtime;
1500 	const char *p;
1501 	FILE *f;
1502 	hdr_t h;
1503 	struct tm *timestruct;
1504 	http_headerbuf_t headerbuf;
1505 	http_auth_challenges_t server_challenges;
1506 	http_auth_challenges_t proxy_challenges;
1507 
1508 	/* The following calls don't allocate anything */
1509 	init_http_headerbuf(&headerbuf);
1510 	init_http_auth_challenges(&server_challenges);
1511 	init_http_auth_challenges(&proxy_challenges);
1512 
1513 	direct = CHECK_FLAG('d');
1514 	noredirect = CHECK_FLAG('A');
1515 	verbose = CHECK_FLAG('v');
1516 	ims = CHECK_FLAG('i');
1517 
1518 	if (direct && purl) {
1519 		fetchFreeURL(purl);
1520 		purl = NULL;
1521 	}
1522 
1523 	/* try the provided URL first */
1524 	url = URL;
1525 
1526 	n = MAX_REDIRECT;
1527 	i = 0;
1528 
1529 	e = HTTP_PROTOCOL_ERROR;
1530 	do {
1531 		new = NULL;
1532 		chunked = 0;
1533 		offset = 0;
1534 		clength = -1;
1535 		length = -1;
1536 		size = -1;
1537 		mtime = 0;
1538 
1539 		/* check port */
1540 		if (!url->port)
1541 			url->port = fetch_default_port(url->scheme);
1542 
1543 		/* were we redirected to an FTP URL? */
1544 		if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) {
1545 			if (strcmp(op, "GET") == 0)
1546 				return (ftp_request(url, "RETR", us, purl, flags));
1547 			else if (strcmp(op, "HEAD") == 0)
1548 				return (ftp_request(url, "STAT", us, purl, flags));
1549 		}
1550 
1551 		/* connect to server or proxy */
1552 		if ((conn = http_connect(url, purl, flags)) == NULL)
1553 			goto ouch;
1554 
1555 		host = url->host;
1556 #ifdef INET6
1557 		if (strchr(url->host, ':')) {
1558 			snprintf(hbuf, sizeof(hbuf), "[%s]", url->host);
1559 			host = hbuf;
1560 		}
1561 #endif
1562 		if (url->port != fetch_default_port(url->scheme)) {
1563 			if (host != hbuf) {
1564 				strcpy(hbuf, host);
1565 				host = hbuf;
1566 			}
1567 			snprintf(hbuf + strlen(hbuf),
1568 			    sizeof(hbuf) - strlen(hbuf), ":%d", url->port);
1569 		}
1570 
1571 		/* send request */
1572 		if (verbose)
1573 			fetch_info("requesting %s://%s%s",
1574 			    url->scheme, host, url->doc);
1575 		if (purl) {
1576 			http_cmd(conn, "%s %s://%s%s HTTP/1.1",
1577 			    op, url->scheme, host, url->doc);
1578 		} else {
1579 			http_cmd(conn, "%s %s HTTP/1.1",
1580 			    op, url->doc);
1581 		}
1582 
1583 		if (ims && url->ims_time) {
1584 			timestruct = gmtime((time_t *)&url->ims_time);
1585 			(void)strftime(timebuf, 80, "%a, %d %b %Y %T GMT",
1586 			    timestruct);
1587 			if (verbose)
1588 				fetch_info("If-Modified-Since: %s", timebuf);
1589 			http_cmd(conn, "If-Modified-Since: %s", timebuf);
1590 		}
1591 		/* virtual host */
1592 		http_cmd(conn, "Host: %s", host);
1593 
1594 		/*
1595 		 * Proxy authorization: we only send auth after we received
1596 		 * a 407 error. We do not first try basic anyway (changed
1597 		 * when support was added for digest-auth)
1598 		 */
1599 		if (purl && proxy_challenges.valid) {
1600 			http_auth_params_t aparams;
1601 			init_http_auth_params(&aparams);
1602 			if (*purl->user || *purl->pwd) {
1603 				aparams.user = purl->user ?
1604 					strdup(purl->user) : strdup("");
1605 				aparams.password = purl->pwd?
1606 					strdup(purl->pwd) : strdup("");
1607 			} else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL &&
1608 				   *p != '\0') {
1609 				if (http_authfromenv(p, &aparams) < 0) {
1610 					http_seterr(HTTP_NEED_PROXY_AUTH);
1611 					goto ouch;
1612 				}
1613 			}
1614 			http_authorize(conn, "Proxy-Authorization",
1615 				       &proxy_challenges, &aparams, url);
1616 			clean_http_auth_params(&aparams);
1617 		}
1618 
1619 		/*
1620 		 * Server authorization: we never send "a priori"
1621 		 * Basic auth, which used to be done if user/pass were
1622 		 * set in the url. This would be weird because we'd send the
1623 		 * password in the clear even if Digest is finally to be
1624 		 * used (it would have made more sense for the
1625 		 * pre-digest version to do this when Basic was specified
1626 		 * in the environment)
1627 		 */
1628 		if (server_challenges.valid) {
1629 			http_auth_params_t aparams;
1630 			init_http_auth_params(&aparams);
1631 			if (*url->user || *url->pwd) {
1632 				aparams.user = url->user ?
1633 					strdup(url->user) : strdup("");
1634 				aparams.password = url->pwd ?
1635 					strdup(url->pwd) : strdup("");
1636 			} else if ((p = getenv("HTTP_AUTH")) != NULL &&
1637 				   *p != '\0') {
1638 				if (http_authfromenv(p, &aparams) < 0) {
1639 					http_seterr(HTTP_NEED_AUTH);
1640 					goto ouch;
1641 				}
1642 			} else if (fetchAuthMethod &&
1643 				   fetchAuthMethod(url) == 0) {
1644 				aparams.user = url->user ?
1645 					strdup(url->user) : strdup("");
1646 				aparams.password = url->pwd ?
1647 					strdup(url->pwd) : strdup("");
1648 			} else {
1649 				http_seterr(HTTP_NEED_AUTH);
1650 				goto ouch;
1651 			}
1652 			http_authorize(conn, "Authorization",
1653 				       &server_challenges, &aparams, url);
1654 			clean_http_auth_params(&aparams);
1655 		}
1656 
1657 		/* other headers */
1658 		if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') {
1659 			if (strcasecmp(p, "auto") == 0)
1660 				http_cmd(conn, "Referer: %s://%s%s",
1661 				    url->scheme, host, url->doc);
1662 			else
1663 				http_cmd(conn, "Referer: %s", p);
1664 		}
1665 		if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0')
1666 			http_cmd(conn, "User-Agent: %s", p);
1667 		else
1668 			http_cmd(conn, "User-Agent: %s " _LIBFETCH_VER, getprogname());
1669 		if (url->offset > 0)
1670 			http_cmd(conn, "Range: bytes=%lld-", (long long)url->offset);
1671 		http_cmd(conn, "Connection: close");
1672 		http_cmd(conn, "%s", "");
1673 
1674 		/*
1675 		 * Force the queued request to be dispatched.  Normally, one
1676 		 * would do this with shutdown(2) but squid proxies can be
1677 		 * configured to disallow such half-closed connections.  To
1678 		 * be compatible with such configurations, fiddle with socket
1679 		 * options to force the pending data to be written.
1680 		 */
1681 		val = 0;
1682 		setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
1683 			   sizeof(val));
1684 		val = 1;
1685 		setsockopt(conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
1686 			   sizeof(val));
1687 
1688 		/* get reply */
1689 		switch (http_get_reply(conn)) {
1690 		case HTTP_OK:
1691 		case HTTP_PARTIAL:
1692 		case HTTP_NOT_MODIFIED:
1693 			/* fine */
1694 			break;
1695 		case HTTP_MOVED_PERM:
1696 		case HTTP_MOVED_TEMP:
1697 		case HTTP_SEE_OTHER:
1698 		case HTTP_USE_PROXY:
1699 			/*
1700 			 * Not so fine, but we still have to read the
1701 			 * headers to get the new location.
1702 			 */
1703 			break;
1704 		case HTTP_NEED_AUTH:
1705 			if (server_challenges.valid) {
1706 				/*
1707 				 * We already sent out authorization code,
1708 				 * so there's nothing more we can do.
1709 				 */
1710 				http_seterr(conn->err);
1711 				goto ouch;
1712 			}
1713 			/* try again, but send the password this time */
1714 			if (verbose)
1715 				fetch_info("server requires authorization");
1716 			break;
1717 		case HTTP_NEED_PROXY_AUTH:
1718 			if (proxy_challenges.valid) {
1719 				/*
1720 				 * We already sent our proxy
1721 				 * authorization code, so there's
1722 				 * nothing more we can do. */
1723 				http_seterr(conn->err);
1724 				goto ouch;
1725 			}
1726 			/* try again, but send the password this time */
1727 			if (verbose)
1728 				fetch_info("proxy requires authorization");
1729 			break;
1730 		case HTTP_BAD_RANGE:
1731 			/*
1732 			 * This can happen if we ask for 0 bytes because
1733 			 * we already have the whole file.  Consider this
1734 			 * a success for now, and check sizes later.
1735 			 */
1736 			break;
1737 		case HTTP_PROTOCOL_ERROR:
1738 			/* fall through */
1739 		case -1:
1740 			fetch_syserr();
1741 			goto ouch;
1742 		default:
1743 			http_seterr(conn->err);
1744 			if (!verbose)
1745 				goto ouch;
1746 			/* fall through so we can get the full error message */
1747 		}
1748 
1749 		/* get headers. http_next_header expects one line readahead */
1750 		if (fetch_getln(conn) == -1) {
1751 		    fetch_syserr();
1752 		    goto ouch;
1753 		}
1754 		do {
1755 		    switch ((h = http_next_header(conn, &headerbuf, &p))) {
1756 			case hdr_syserror:
1757 				fetch_syserr();
1758 				goto ouch;
1759 			case hdr_error:
1760 				http_seterr(HTTP_PROTOCOL_ERROR);
1761 				goto ouch;
1762 			case hdr_content_length:
1763 				http_parse_length(p, &clength);
1764 				break;
1765 			case hdr_content_range:
1766 				http_parse_range(p, &offset, &length, &size);
1767 				break;
1768 			case hdr_last_modified:
1769 				http_parse_mtime(p, &mtime);
1770 				break;
1771 			case hdr_location:
1772 				if (!HTTP_REDIRECT(conn->err))
1773 					break;
1774 				/*
1775 				 * if the A flag is set, we don't follow
1776 				 * temporary redirects.
1777 				 */
1778 				if (noredirect &&
1779 				    conn->err != HTTP_MOVED_PERM &&
1780 				    conn->err != HTTP_PERM_REDIRECT &&
1781 				    conn->err != HTTP_USE_PROXY) {
1782 					n = 1;
1783 					break;
1784                                 }
1785 				if (new)
1786 					free(new);
1787 				if (verbose)
1788 					fetch_info("%d redirect to %s", conn->err, p);
1789 				if (*p == '/')
1790 					/* absolute path */
1791 					new = fetchMakeURL(url->scheme, url->host, url->port, p,
1792 					    url->user, url->pwd);
1793 				else
1794 					new = fetchParseURL(p);
1795 				if (new == NULL) {
1796 					/* XXX should set an error code */
1797 					DEBUG(fprintf(stderr, "failed to parse new URL\n"));
1798 					goto ouch;
1799 				}
1800 
1801 				/* Only copy credentials if the host matches */
1802 				if (!strcmp(new->host, url->host) && !*new->user && !*new->pwd) {
1803 					strcpy(new->user, url->user);
1804 					strcpy(new->pwd, url->pwd);
1805 				}
1806 				new->offset = url->offset;
1807 				new->length = url->length;
1808 				break;
1809 			case hdr_transfer_encoding:
1810 				/* XXX weak test*/
1811 				chunked = (strcasecmp(p, "chunked") == 0);
1812 				break;
1813 			case hdr_www_authenticate:
1814 				if (conn->err != HTTP_NEED_AUTH)
1815 					break;
1816 				if (http_parse_authenticate(p, &server_challenges) == 0)
1817 					++n;
1818 				break;
1819 			case hdr_proxy_authenticate:
1820 				if (conn->err != HTTP_NEED_PROXY_AUTH)
1821 					break;
1822 				if (http_parse_authenticate(p, &proxy_challenges) == 0)
1823 					++n;
1824 				break;
1825 			case hdr_end:
1826 				/* fall through */
1827 			case hdr_unknown:
1828 				/* ignore */
1829 				break;
1830 			}
1831 		} while (h > hdr_end);
1832 
1833 		/* we need to provide authentication */
1834 		if (conn->err == HTTP_NEED_AUTH ||
1835 		    conn->err == HTTP_NEED_PROXY_AUTH) {
1836 			e = conn->err;
1837 			if ((conn->err == HTTP_NEED_AUTH &&
1838 			     !server_challenges.valid) ||
1839 			    (conn->err == HTTP_NEED_PROXY_AUTH &&
1840 			     !proxy_challenges.valid)) {
1841 				/* 401/7 but no www/proxy-authenticate ?? */
1842 				DEBUG(fprintf(stderr, "401/7 and no auth header\n"));
1843 				goto ouch;
1844 			}
1845 			fetch_close(conn);
1846 			conn = NULL;
1847 			continue;
1848 		}
1849 
1850 		/* requested range not satisfiable */
1851 		if (conn->err == HTTP_BAD_RANGE) {
1852 			if (url->offset == size && url->length == 0) {
1853 				/* asked for 0 bytes; fake it */
1854 				offset = url->offset;
1855 				clength = -1;
1856 				conn->err = HTTP_OK;
1857 				break;
1858 			} else {
1859 				http_seterr(conn->err);
1860 				goto ouch;
1861 			}
1862 		}
1863 
1864 		/* we have a hit or an error */
1865 		if (conn->err == HTTP_OK
1866 		    || conn->err == HTTP_NOT_MODIFIED
1867 		    || conn->err == HTTP_PARTIAL
1868 		    || HTTP_ERROR(conn->err))
1869 			break;
1870 
1871 		/* all other cases: we got a redirect */
1872 		e = conn->err;
1873 		clean_http_auth_challenges(&server_challenges);
1874 		fetch_close(conn);
1875 		conn = NULL;
1876 		if (!new) {
1877 			DEBUG(fprintf(stderr, "redirect with no new location\n"));
1878 			break;
1879 		}
1880 		if (url != URL)
1881 			fetchFreeURL(url);
1882 		url = new;
1883 	} while (++i < n);
1884 
1885 	/* we failed, or ran out of retries */
1886 	if (conn == NULL) {
1887 		http_seterr(e);
1888 		goto ouch;
1889 	}
1890 
1891 	DEBUG(fprintf(stderr, "offset %lld, length %lld,"
1892 		  " size %lld, clength %lld\n",
1893 		  (long long)offset, (long long)length,
1894 		  (long long)size, (long long)clength));
1895 
1896 	if (conn->err == HTTP_NOT_MODIFIED) {
1897 		http_seterr(HTTP_NOT_MODIFIED);
1898 		return (NULL);
1899 	}
1900 
1901 	/* check for inconsistencies */
1902 	if (clength != -1 && length != -1 && clength != length) {
1903 		http_seterr(HTTP_PROTOCOL_ERROR);
1904 		goto ouch;
1905 	}
1906 	if (clength == -1)
1907 		clength = length;
1908 	if (clength != -1)
1909 		length = offset + clength;
1910 	if (length != -1 && size != -1 && length != size) {
1911 		http_seterr(HTTP_PROTOCOL_ERROR);
1912 		goto ouch;
1913 	}
1914 	if (size == -1)
1915 		size = length;
1916 
1917 	/* fill in stats */
1918 	if (us) {
1919 		us->size = size;
1920 		us->atime = us->mtime = mtime;
1921 	}
1922 
1923 	/* too far? */
1924 	if (URL->offset > 0 && offset > URL->offset) {
1925 		http_seterr(HTTP_PROTOCOL_ERROR);
1926 		goto ouch;
1927 	}
1928 
1929 	/* report back real offset and size */
1930 	URL->offset = offset;
1931 	URL->length = clength;
1932 
1933 	/* wrap it up in a FILE */
1934 	if ((f = http_funopen(conn, chunked)) == NULL) {
1935 		fetch_syserr();
1936 		goto ouch;
1937 	}
1938 
1939 	if (url != URL)
1940 		fetchFreeURL(url);
1941 	if (purl)
1942 		fetchFreeURL(purl);
1943 
1944 	if (HTTP_ERROR(conn->err)) {
1945 		http_print_html(stderr, f);
1946 		fclose(f);
1947 		f = NULL;
1948 	}
1949 	clean_http_headerbuf(&headerbuf);
1950 	clean_http_auth_challenges(&server_challenges);
1951 	clean_http_auth_challenges(&proxy_challenges);
1952 	return (f);
1953 
1954 ouch:
1955 	if (url != URL)
1956 		fetchFreeURL(url);
1957 	if (purl)
1958 		fetchFreeURL(purl);
1959 	if (conn != NULL)
1960 		fetch_close(conn);
1961 	clean_http_headerbuf(&headerbuf);
1962 	clean_http_auth_challenges(&server_challenges);
1963 	clean_http_auth_challenges(&proxy_challenges);
1964 	return (NULL);
1965 }
1966 
1967 
1968 /*****************************************************************************
1969  * Entry points
1970  */
1971 
1972 /*
1973  * Retrieve and stat a file by HTTP
1974  */
1975 FILE *
1976 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags)
1977 {
1978 	return (http_request(URL, "GET", us, http_get_proxy(URL, flags), flags));
1979 }
1980 
1981 /*
1982  * Retrieve a file by HTTP
1983  */
1984 FILE *
1985 fetchGetHTTP(struct url *URL, const char *flags)
1986 {
1987 	return (fetchXGetHTTP(URL, NULL, flags));
1988 }
1989 
1990 /*
1991  * Store a file by HTTP
1992  */
1993 FILE *
1994 fetchPutHTTP(struct url *URL __unused, const char *flags __unused)
1995 {
1996 	warnx("fetchPutHTTP(): not implemented");
1997 	return (NULL);
1998 }
1999 
2000 /*
2001  * Get an HTTP document's metadata
2002  */
2003 int
2004 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
2005 {
2006 	FILE *f;
2007 
2008 	f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags);
2009 	if (f == NULL)
2010 		return (-1);
2011 	fclose(f);
2012 	return (0);
2013 }
2014 
2015 /*
2016  * List a directory
2017  */
2018 struct url_ent *
2019 fetchListHTTP(struct url *url __unused, const char *flags __unused)
2020 {
2021 	warnx("fetchListHTTP(): not implemented");
2022 	return (NULL);
2023 }
2024