1 /* $NetBSD: http.c,v 1.3 2014/01/07 02:13:00 joerg Exp $ */
2 /*-
3 * Copyright (c) 2000-2004 Dag-Erling Co�dan Sm�rgrav
4 * Copyright (c) 2003 Thomas Klausner <wiz@NetBSD.org>
5 * Copyright (c) 2008, 2009 Joerg Sonnenberger <joerg@NetBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer
13 * in this position and unchanged.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. The name of the author may not be used to endorse or promote products
18 * derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 * $FreeBSD: http.c,v 1.83 2008/02/06 11:39:55 des Exp $
32 */
33
34 /*
35 * The following copyright applies to the base64 code:
36 *
37 *-
38 * Copyright 1997 Massachusetts Institute of Technology
39 *
40 * Permission to use, copy, modify, and distribute this software and
41 * its documentation for any purpose and without fee is hereby
42 * granted, provided that both the above copyright notice and this
43 * permission notice appear in all copies, that both the above
44 * copyright notice and this permission notice appear in all
45 * supporting documentation, and that the name of M.I.T. not be used
46 * in advertising or publicity pertaining to distribution of the
47 * software without specific, written prior permission. M.I.T. makes
48 * no representations about the suitability of this software for any
49 * purpose. It is provided "as is" without express or implied
50 * warranty.
51 *
52 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
53 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
54 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
55 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
56 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
57 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
58 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
59 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
60 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
61 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
62 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 */
65
66 #if defined(__linux__) || defined(__MINT__)
67 /* Keep this down to Linux or MiNT, it can create surprises elsewhere. */
68 #define _GNU_SOURCE
69 #endif
70
71 /* Needed for gmtime_r on Interix */
72 #define _REENTRANT
73
74 #if HAVE_CONFIG_H
75 #include "config.h"
76 #endif
77 #ifndef NETBSD
78 #include <nbcompat.h>
79 #endif
80
81 #include <sys/types.h>
82 #include <sys/socket.h>
83
84 #include <ctype.h>
85 #include <errno.h>
86 #include <locale.h>
87 #include <stdarg.h>
88 #ifndef NETBSD
89 #include <nbcompat/stdio.h>
90 #else
91 #include <stdio.h>
92 #endif
93 #include <stdlib.h>
94 #include <string.h>
95 #include <time.h>
96 #include <unistd.h>
97
98 #include <netinet/in.h>
99 #include <netinet/tcp.h>
100
101 #ifndef NETBSD
102 #include <nbcompat/netdb.h>
103 #else
104 #include <netdb.h>
105 #endif
106
107 #include <arpa/inet.h>
108
109 #include "fetch.h"
110 #include "common.h"
111 #include "httperr.h"
112
113 /* Maximum number of redirects to follow */
114 #define MAX_REDIRECT 5
115
116 /* Symbolic names for reply codes we care about */
117 #define HTTP_OK 200
118 #define HTTP_PARTIAL 206
119 #define HTTP_MOVED_PERM 301
120 #define HTTP_MOVED_TEMP 302
121 #define HTTP_SEE_OTHER 303
122 #define HTTP_NOT_MODIFIED 304
123 #define HTTP_TEMP_REDIRECT 307
124 #define HTTP_NEED_AUTH 401
125 #define HTTP_NEED_PROXY_AUTH 407
126 #define HTTP_BAD_RANGE 416
127 #define HTTP_PROTOCOL_ERROR 999
128
129 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \
130 || (xyz) == HTTP_MOVED_TEMP \
131 || (xyz) == HTTP_TEMP_REDIRECT \
132 || (xyz) == HTTP_SEE_OTHER)
133
134 #define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599)
135
136
137 /*****************************************************************************
138 * I/O functions for decoding chunked streams
139 */
140
141 struct httpio
142 {
143 conn_t *conn; /* connection */
144 int chunked; /* chunked mode */
145 int keep_alive; /* keep-alive mode */
146 char *buf; /* chunk buffer */
147 size_t bufsize; /* size of chunk buffer */
148 ssize_t buflen; /* amount of data currently in buffer */
149 size_t bufpos; /* current read offset in buffer */
150 int eof; /* end-of-file flag */
151 int error; /* error flag */
152 size_t chunksize; /* remaining size of current chunk */
153 off_t contentlength; /* remaining size of the content */
154 };
155
156 /*
157 * Get next chunk header
158 */
159 static ssize_t
http_new_chunk(struct httpio * io)160 http_new_chunk(struct httpio *io)
161 {
162 char *p;
163
164 if (fetch_getln(io->conn) == -1)
165 return (-1);
166
167 if (io->conn->buflen < 2 || !isxdigit((unsigned char)*io->conn->buf))
168 return (-1);
169
170 for (p = io->conn->buf; *p && !isspace((unsigned char)*p); ++p) {
171 if (*p == ';')
172 break;
173 if (!isxdigit((unsigned char)*p))
174 return (-1);
175 if (isdigit((unsigned char)*p)) {
176 io->chunksize = io->chunksize * 16 +
177 *p - '0';
178 } else {
179 io->chunksize = io->chunksize * 16 +
180 10 + tolower((unsigned char)*p) - 'a';
181 }
182 }
183
184 return (io->chunksize);
185 }
186
187 /*
188 * Grow the input buffer to at least len bytes
189 */
190 static int
http_growbuf(struct httpio * io,size_t len)191 http_growbuf(struct httpio *io, size_t len)
192 {
193 char *tmp;
194
195 if (io->bufsize >= len)
196 return (0);
197
198 if ((tmp = realloc(io->buf, len)) == NULL)
199 return (-1);
200 io->buf = tmp;
201 io->bufsize = len;
202 return (0);
203 }
204
205 /*
206 * Fill the input buffer, do chunk decoding on the fly
207 */
208 static ssize_t
http_fillbuf(struct httpio * io,size_t len)209 http_fillbuf(struct httpio *io, size_t len)
210 {
211 if (io->error)
212 return (-1);
213 if (io->eof)
214 return (0);
215
216 if (io->contentlength >= 0 && (off_t)len > io->contentlength)
217 len = io->contentlength;
218
219 if (io->chunked == 0) {
220 if (http_growbuf(io, len) == -1)
221 return (-1);
222 if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
223 io->error = 1;
224 return (-1);
225 }
226 if (io->contentlength)
227 io->contentlength -= io->buflen;
228 io->bufpos = 0;
229 return (io->buflen);
230 }
231
232 if (io->chunksize == 0) {
233 switch (http_new_chunk(io)) {
234 case -1:
235 io->error = 1;
236 return (-1);
237 case 0:
238 io->eof = 1;
239 if (fetch_getln(io->conn) == -1)
240 return (-1);
241 return (0);
242 }
243 }
244
245 if (len > io->chunksize)
246 len = io->chunksize;
247 if (http_growbuf(io, len) == -1)
248 return (-1);
249 if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
250 io->error = 1;
251 return (-1);
252 }
253 io->chunksize -= io->buflen;
254 if (io->contentlength >= 0)
255 io->contentlength -= io->buflen;
256
257 if (io->chunksize == 0) {
258 char endl[2];
259 ssize_t len2;
260
261 len2 = fetch_read(io->conn, endl, 2);
262 if (len2 == 1 && fetch_read(io->conn, endl + 1, 1) != 1)
263 return (-1);
264 if (len2 == -1 || endl[0] != '\r' || endl[1] != '\n')
265 return (-1);
266 }
267
268 io->bufpos = 0;
269
270 return (io->buflen);
271 }
272
273 /*
274 * Read function
275 */
276 static ssize_t
http_readfn(void * v,void * buf,size_t len)277 http_readfn(void *v, void *buf, size_t len)
278 {
279 struct httpio *io = (struct httpio *)v;
280 size_t l, pos;
281
282 if (io->error)
283 return (-1);
284 if (io->eof)
285 return (0);
286
287 for (pos = 0; len > 0; pos += l, len -= l) {
288 /* empty buffer */
289 if (!io->buf || (ssize_t)io->bufpos == io->buflen)
290 if (http_fillbuf(io, len) < 1)
291 break;
292 l = io->buflen - io->bufpos;
293 if (len < l)
294 l = len;
295 memcpy((char *)buf + pos, io->buf + io->bufpos, l);
296 io->bufpos += l;
297 }
298
299 if (!pos && io->error)
300 return (-1);
301 return (pos);
302 }
303
304 /*
305 * Write function
306 */
307 static ssize_t
http_writefn(void * v,const void * buf,size_t len)308 http_writefn(void *v, const void *buf, size_t len)
309 {
310 struct httpio *io = (struct httpio *)v;
311
312 return (fetch_write(io->conn, buf, len));
313 }
314
315 /*
316 * Close function
317 */
318 static void
http_closefn(void * v)319 http_closefn(void *v)
320 {
321 struct httpio *io = (struct httpio *)v;
322
323 if (io->keep_alive) {
324 int val;
325
326 val = 0;
327 setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
328 (socklen_t)sizeof(val));
329 fetch_cache_put(io->conn, fetch_close);
330 #ifdef TCP_NOPUSH
331 val = 1;
332 setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
333 sizeof(val));
334 #endif
335 } else {
336 fetch_close(io->conn);
337 }
338
339 free(io->buf);
340 free(io);
341 }
342
343 /*
344 * Wrap a file descriptor up
345 */
346 static fetchIO *
http_funopen(conn_t * conn,int chunked,int keep_alive,off_t clength)347 http_funopen(conn_t *conn, int chunked, int keep_alive, off_t clength)
348 {
349 struct httpio *io;
350 fetchIO *f;
351
352 if ((io = calloc(1, sizeof(*io))) == NULL) {
353 fetch_syserr();
354 return (NULL);
355 }
356 io->conn = conn;
357 io->chunked = chunked;
358 io->contentlength = clength;
359 io->keep_alive = keep_alive;
360 f = fetchIO_unopen(io, http_readfn, http_writefn, http_closefn);
361 if (f == NULL) {
362 fetch_syserr();
363 free(io);
364 return (NULL);
365 }
366 return (f);
367 }
368
369
370 /*****************************************************************************
371 * Helper functions for talking to the server and parsing its replies
372 */
373
374 /* Header types */
375 typedef enum {
376 hdr_syserror = -2,
377 hdr_error = -1,
378 hdr_end = 0,
379 hdr_unknown = 1,
380 hdr_connection,
381 hdr_content_length,
382 hdr_content_range,
383 hdr_last_modified,
384 hdr_location,
385 hdr_transfer_encoding,
386 hdr_www_authenticate
387 } hdr_t;
388
389 /* Names of interesting headers */
390 static struct {
391 hdr_t num;
392 const char *name;
393 } hdr_names[] = {
394 { hdr_connection, "Connection" },
395 { hdr_content_length, "Content-Length" },
396 { hdr_content_range, "Content-Range" },
397 { hdr_last_modified, "Last-Modified" },
398 { hdr_location, "Location" },
399 { hdr_transfer_encoding, "Transfer-Encoding" },
400 { hdr_www_authenticate, "WWW-Authenticate" },
401 { hdr_unknown, NULL },
402 };
403
404 /*
405 * Send a formatted line; optionally echo to terminal
406 */
407 __printflike(2, 3)
408 static int
http_cmd(conn_t * conn,const char * fmt,...)409 http_cmd(conn_t *conn, const char *fmt, ...)
410 {
411 va_list ap;
412 size_t len;
413 char *msg;
414 ssize_t r;
415
416 va_start(ap, fmt);
417 len = vasprintf(&msg, fmt, ap);
418 va_end(ap);
419
420 if (msg == NULL) {
421 errno = ENOMEM;
422 fetch_syserr();
423 return (-1);
424 }
425
426 r = fetch_write(conn, msg, len);
427 free(msg);
428
429 if (r == -1) {
430 fetch_syserr();
431 return (-1);
432 }
433
434 return (0);
435 }
436
437 /*
438 * Get and parse status line
439 */
440 static int
http_get_reply(conn_t * conn)441 http_get_reply(conn_t *conn)
442 {
443 char *p;
444
445 if (fetch_getln(conn) == -1)
446 return (-1);
447 /*
448 * A valid status line looks like "HTTP/m.n xyz reason" where m
449 * and n are the major and minor protocol version numbers and xyz
450 * is the reply code.
451 * Unfortunately, there are servers out there (NCSA 1.5.1, to name
452 * just one) that do not send a version number, so we can't rely
453 * on finding one, but if we do, insist on it being 1.0 or 1.1.
454 * We don't care about the reason phrase.
455 */
456 if (strncmp(conn->buf, "HTTP", 4) != 0)
457 return (HTTP_PROTOCOL_ERROR);
458 p = conn->buf + 4;
459 if (*p == '/') {
460 if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1'))
461 return (HTTP_PROTOCOL_ERROR);
462 p += 4;
463 }
464 if (*p != ' ' ||
465 !isdigit((unsigned char)p[1]) ||
466 !isdigit((unsigned char)p[2]) ||
467 !isdigit((unsigned char)p[3]))
468 return (HTTP_PROTOCOL_ERROR);
469
470 conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0');
471 return (conn->err);
472 }
473
474 /*
475 * Check a header; if the type matches the given string, return a pointer
476 * to the beginning of the value.
477 */
478 static const char *
http_match(const char * str,const char * hdr)479 http_match(const char *str, const char *hdr)
480 {
481 while (*str && *hdr &&
482 tolower((unsigned char)*str++) == tolower((unsigned char)*hdr++))
483 /* nothing */;
484 if (*str || *hdr != ':')
485 return (NULL);
486 while (*hdr && isspace((unsigned char)*++hdr))
487 /* nothing */;
488 return (hdr);
489 }
490
491 /*
492 * Get the next header and return the appropriate symbolic code.
493 */
494 static hdr_t
http_next_header(conn_t * conn,const char ** p)495 http_next_header(conn_t *conn, const char **p)
496 {
497 int i;
498
499 if (fetch_getln(conn) == -1)
500 return (hdr_syserror);
501 while (conn->buflen && isspace((unsigned char)conn->buf[conn->buflen - 1]))
502 conn->buflen--;
503 conn->buf[conn->buflen] = '\0';
504 if (conn->buflen == 0)
505 return (hdr_end);
506 /*
507 * We could check for malformed headers but we don't really care.
508 * A valid header starts with a token immediately followed by a
509 * colon; a token is any sequence of non-control, non-whitespace
510 * characters except "()<>@,;:\\\"{}".
511 */
512 for (i = 0; hdr_names[i].num != hdr_unknown; i++)
513 if ((*p = http_match(hdr_names[i].name, conn->buf)) != NULL)
514 return (hdr_names[i].num);
515 return (hdr_unknown);
516 }
517
518 /*
519 * Parse a last-modified header
520 */
521 static int
http_parse_mtime(const char * p,time_t * mtime)522 http_parse_mtime(const char *p, time_t *mtime)
523 {
524 char locale[64], *r;
525 struct tm tm;
526
527 strncpy(locale, setlocale(LC_TIME, NULL), sizeof(locale));
528 setlocale(LC_TIME, "C");
529 r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
530 /* XXX should add support for date-2 and date-3 */
531 setlocale(LC_TIME, locale);
532 if (r == NULL)
533 return (-1);
534 *mtime = timegm(&tm);
535 return (0);
536 }
537
538 /*
539 * Parse a content-length header
540 */
541 static int
http_parse_length(const char * p,off_t * length)542 http_parse_length(const char *p, off_t *length)
543 {
544 off_t len;
545
546 for (len = 0; *p && isdigit((unsigned char)*p); ++p)
547 len = len * 10 + (*p - '0');
548 if (*p)
549 return (-1);
550 *length = len;
551 return (0);
552 }
553
554 /*
555 * Parse a content-range header
556 */
557 static int
http_parse_range(const char * p,off_t * offset,off_t * length,off_t * size)558 http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size)
559 {
560 off_t first, last, len;
561
562 if (strncasecmp(p, "bytes ", 6) != 0)
563 return (-1);
564 p += 6;
565 if (*p == '*') {
566 first = last = -1;
567 ++p;
568 } else {
569 for (first = 0; *p && isdigit((unsigned char)*p); ++p)
570 first = first * 10 + *p - '0';
571 if (*p != '-')
572 return (-1);
573 for (last = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
574 last = last * 10 + *p - '0';
575 }
576 if (first > last || *p != '/')
577 return (-1);
578 for (len = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
579 len = len * 10 + *p - '0';
580 if (*p || len < last - first + 1)
581 return (-1);
582 if (first == -1)
583 *length = 0;
584 else
585 *length = last - first + 1;
586 *offset = first;
587 *size = len;
588 return (0);
589 }
590
591
592 /*****************************************************************************
593 * Helper functions for authorization
594 */
595
596 /*
597 * Base64 encoding
598 */
599 static char *
http_base64(const char * src)600 http_base64(const char *src)
601 {
602 static const char base64[] =
603 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
604 "abcdefghijklmnopqrstuvwxyz"
605 "0123456789+/";
606 char *str, *dst;
607 size_t l;
608 unsigned int t, r;
609
610 l = strlen(src);
611 if ((str = malloc(((l + 2) / 3) * 4 + 1)) == NULL)
612 return (NULL);
613 dst = str;
614 r = 0;
615
616 while (l >= 3) {
617 t = (src[0] << 16) | (src[1] << 8) | src[2];
618 dst[0] = base64[(t >> 18) & 0x3f];
619 dst[1] = base64[(t >> 12) & 0x3f];
620 dst[2] = base64[(t >> 6) & 0x3f];
621 dst[3] = base64[(t >> 0) & 0x3f];
622 src += 3; l -= 3;
623 dst += 4; r += 4;
624 }
625
626 switch (l) {
627 case 2:
628 t = (src[0] << 16) | (src[1] << 8);
629 dst[0] = base64[(t >> 18) & 0x3f];
630 dst[1] = base64[(t >> 12) & 0x3f];
631 dst[2] = base64[(t >> 6) & 0x3f];
632 dst[3] = '=';
633 dst += 4;
634 r += 4;
635 break;
636 case 1:
637 t = src[0] << 16;
638 dst[0] = base64[(t >> 18) & 0x3f];
639 dst[1] = base64[(t >> 12) & 0x3f];
640 dst[2] = dst[3] = '=';
641 dst += 4;
642 r += 4;
643 break;
644 case 0:
645 break;
646 }
647
648 *dst = 0;
649 return (str);
650 }
651
652 /*
653 * Encode username and password
654 */
655 static int
http_basic_auth(conn_t * conn,const char * hdr,const char * usr,const char * pwd)656 http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd)
657 {
658 char *upw, *auth;
659 int r;
660
661 if (asprintf(&upw, "%s:%s", usr, pwd) == -1)
662 return (-1);
663 auth = http_base64(upw);
664 free(upw);
665 if (auth == NULL)
666 return (-1);
667 r = http_cmd(conn, "%s: Basic %s\r\n", hdr, auth);
668 free(auth);
669 return (r);
670 }
671
672 /*
673 * Send an authorization header
674 */
675 static int
http_authorize(conn_t * conn,const char * hdr,const char * p)676 http_authorize(conn_t *conn, const char *hdr, const char *p)
677 {
678 /* basic authorization */
679 if (strncasecmp(p, "basic:", 6) == 0) {
680 char *user, *pwd, *str;
681 int r;
682
683 /* skip realm */
684 for (p += 6; *p && *p != ':'; ++p)
685 /* nothing */ ;
686 if (!*p || strchr(++p, ':') == NULL)
687 return (-1);
688 if ((str = strdup(p)) == NULL)
689 return (-1); /* XXX */
690 user = str;
691 pwd = strchr(str, ':');
692 *pwd++ = '\0';
693 r = http_basic_auth(conn, hdr, user, pwd);
694 free(str);
695 return (r);
696 }
697 return (-1);
698 }
699
700
701 /*****************************************************************************
702 * Helper functions for connecting to a server or proxy
703 */
704
705 /*
706 * Connect to the correct HTTP server or proxy.
707 */
708 static conn_t *
http_connect(struct url * URL,struct url * purl,const char * flags,int * cached)709 http_connect(struct url *URL, struct url *purl, const char *flags, int *cached)
710 {
711 conn_t *conn;
712 int af, verbose;
713 #ifdef TCP_NOPUSH
714 int val;
715 #endif
716
717 *cached = 1;
718
719 #ifdef INET6
720 af = AF_UNSPEC;
721 #else
722 af = AF_INET;
723 #endif
724
725 verbose = CHECK_FLAG('v');
726 if (CHECK_FLAG('4'))
727 af = AF_INET;
728 #ifdef INET6
729 else if (CHECK_FLAG('6'))
730 af = AF_INET6;
731 #endif
732
733 if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) {
734 URL = purl;
735 } else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
736 /* can't talk http to an ftp server */
737 /* XXX should set an error code */
738 return (NULL);
739 }
740
741 if ((conn = fetch_cache_get(URL, af)) != NULL) {
742 *cached = 1;
743 return (conn);
744 }
745
746 if ((conn = fetch_connect(URL, af, verbose)) == NULL)
747 /* fetch_connect() has already set an error code */
748 return (NULL);
749 if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 &&
750 fetch_ssl(conn, verbose) == -1) {
751 fetch_close(conn);
752 /* grrr */
753 #ifdef EAUTH
754 errno = EAUTH;
755 #else
756 errno = EPERM;
757 #endif
758 fetch_syserr();
759 return (NULL);
760 }
761
762 #ifdef TCP_NOPUSH
763 val = 1;
764 setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val));
765 #endif
766
767 return (conn);
768 }
769
770 static struct url *
http_get_proxy(struct url * url,const char * flags)771 http_get_proxy(struct url * url, const char *flags)
772 {
773 struct url *purl;
774 char *p;
775
776 if (flags != NULL && strchr(flags, 'd') != NULL)
777 return (NULL);
778 if (fetch_no_proxy_match(url->host))
779 return (NULL);
780 if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) &&
781 *p && (purl = fetchParseURL(p))) {
782 if (!*purl->scheme)
783 strcpy(purl->scheme, SCHEME_HTTP);
784 if (!purl->port)
785 purl->port = fetch_default_proxy_port(purl->scheme);
786 if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0)
787 return (purl);
788 fetchFreeURL(purl);
789 }
790 return (NULL);
791 }
792
793 static void
set_if_modified_since(conn_t * conn,time_t last_modified)794 set_if_modified_since(conn_t *conn, time_t last_modified)
795 {
796 static const char weekdays[] = "SunMonTueWedThuFriSat";
797 static const char months[] = "JanFebMarAprMayJunJulAugSepOctNovDec";
798 struct tm tm;
799 char buf[80];
800 gmtime_r(&last_modified, &tm);
801 snprintf(buf, sizeof(buf), "%.3s, %02d %.3s %4d %02d:%02d:%02d GMT",
802 weekdays + tm.tm_wday * 3, tm.tm_mday, months + tm.tm_mon * 3,
803 tm.tm_year + 1900, tm.tm_hour, tm.tm_min, tm.tm_sec);
804 http_cmd(conn, "If-Modified-Since: %s\r\n", buf);
805 }
806
807
808 /*****************************************************************************
809 * Core
810 */
811
812 /*
813 * Send a request and process the reply
814 *
815 * XXX This function is way too long, the do..while loop should be split
816 * XXX off into a separate function.
817 */
818 fetchIO *
http_request(struct url * URL,const char * op,struct url_stat * us,struct url * purl,const char * flags)819 http_request(struct url *URL, const char *op, struct url_stat *us,
820 struct url *purl, const char *flags)
821 {
822 conn_t *conn;
823 struct url *url, *new;
824 int chunked, direct, if_modified_since, need_auth, noredirect;
825 int keep_alive, verbose, cached;
826 int e, i, n, val;
827 off_t offset, clength, length, size;
828 time_t mtime;
829 const char *p;
830 fetchIO *f;
831 hdr_t h;
832 char hbuf[URL_HOSTLEN + 7], *host;
833
834 direct = CHECK_FLAG('d');
835 noredirect = CHECK_FLAG('A');
836 verbose = CHECK_FLAG('v');
837 if_modified_since = CHECK_FLAG('i');
838 keep_alive = 0;
839
840 if (direct && purl) {
841 fetchFreeURL(purl);
842 purl = NULL;
843 }
844
845 /* try the provided URL first */
846 url = URL;
847
848 /* if the A flag is set, we only get one try */
849 n = noredirect ? 1 : MAX_REDIRECT;
850 i = 0;
851
852 e = HTTP_PROTOCOL_ERROR;
853 need_auth = 0;
854 do {
855 new = NULL;
856 chunked = 0;
857 offset = 0;
858 clength = -1;
859 length = -1;
860 size = -1;
861 mtime = 0;
862
863 /* check port */
864 if (!url->port)
865 url->port = fetch_default_port(url->scheme);
866
867 /* were we redirected to an FTP URL? */
868 if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) {
869 if (strcmp(op, "GET") == 0)
870 return (ftp_request(url, "RETR", NULL, us, purl, flags));
871 else if (strcmp(op, "HEAD") == 0)
872 return (ftp_request(url, "STAT", NULL, us, purl, flags));
873 }
874
875 /* connect to server or proxy */
876 if ((conn = http_connect(url, purl, flags, &cached)) == NULL)
877 goto ouch;
878
879 host = url->host;
880 #ifdef INET6
881 if (strchr(url->host, ':')) {
882 snprintf(hbuf, sizeof(hbuf), "[%s]", url->host);
883 host = hbuf;
884 }
885 #endif
886 if (url->port != fetch_default_port(url->scheme)) {
887 if (host != hbuf) {
888 strcpy(hbuf, host);
889 host = hbuf;
890 }
891 snprintf(hbuf + strlen(hbuf),
892 sizeof(hbuf) - strlen(hbuf), ":%d", url->port);
893 }
894
895 /* send request */
896 if (verbose)
897 fetch_info("requesting %s://%s%s",
898 url->scheme, host, url->doc);
899 if (purl) {
900 http_cmd(conn, "%s %s://%s%s HTTP/1.1\r\n",
901 op, url->scheme, host, url->doc);
902 } else {
903 http_cmd(conn, "%s %s HTTP/1.1\r\n",
904 op, url->doc);
905 }
906
907 if (if_modified_since && url->last_modified > 0)
908 set_if_modified_since(conn, url->last_modified);
909
910 /* virtual host */
911 http_cmd(conn, "Host: %s\r\n", host);
912
913 /* proxy authorization */
914 if (purl) {
915 if (*purl->user || *purl->pwd)
916 http_basic_auth(conn, "Proxy-Authorization",
917 purl->user, purl->pwd);
918 else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0')
919 http_authorize(conn, "Proxy-Authorization", p);
920 }
921
922 /* server authorization */
923 if (need_auth || *url->user || *url->pwd) {
924 if (*url->user || *url->pwd)
925 http_basic_auth(conn, "Authorization", url->user, url->pwd);
926 else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0')
927 http_authorize(conn, "Authorization", p);
928 else if (fetchAuthMethod && fetchAuthMethod(url) == 0) {
929 http_basic_auth(conn, "Authorization", url->user, url->pwd);
930 } else {
931 http_seterr(HTTP_NEED_AUTH);
932 goto ouch;
933 }
934 }
935
936 /* other headers */
937 if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') {
938 if (strcasecmp(p, "auto") == 0)
939 http_cmd(conn, "Referer: %s://%s%s\r\n",
940 url->scheme, host, url->doc);
941 else
942 http_cmd(conn, "Referer: %s\r\n", p);
943 }
944 if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0')
945 http_cmd(conn, "User-Agent: %s\r\n", p);
946 else
947 http_cmd(conn, "User-Agent: %s\r\n", _LIBFETCH_VER);
948 if (url->offset > 0)
949 http_cmd(conn, "Range: bytes=%lld-\r\n", (long long)url->offset);
950 http_cmd(conn, "\r\n");
951
952 /*
953 * Force the queued request to be dispatched. Normally, one
954 * would do this with shutdown(2) but squid proxies can be
955 * configured to disallow such half-closed connections. To
956 * be compatible with such configurations, fiddle with socket
957 * options to force the pending data to be written.
958 */
959 #ifdef TCP_NOPUSH
960 val = 0;
961 setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
962 sizeof(val));
963 #endif
964 val = 1;
965 setsockopt(conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
966 (socklen_t)sizeof(val));
967
968 /* get reply */
969 switch (http_get_reply(conn)) {
970 case HTTP_OK:
971 case HTTP_PARTIAL:
972 case HTTP_NOT_MODIFIED:
973 /* fine */
974 break;
975 case HTTP_MOVED_PERM:
976 case HTTP_MOVED_TEMP:
977 case HTTP_SEE_OTHER:
978 /*
979 * Not so fine, but we still have to read the
980 * headers to get the new location.
981 */
982 break;
983 case HTTP_NEED_AUTH:
984 if (need_auth) {
985 /*
986 * We already sent out authorization code,
987 * so there's nothing more we can do.
988 */
989 http_seterr(conn->err);
990 goto ouch;
991 }
992 /* try again, but send the password this time */
993 if (verbose)
994 fetch_info("server requires authorization");
995 break;
996 case HTTP_NEED_PROXY_AUTH:
997 /*
998 * If we're talking to a proxy, we already sent
999 * our proxy authorization code, so there's
1000 * nothing more we can do.
1001 */
1002 http_seterr(conn->err);
1003 goto ouch;
1004 case HTTP_BAD_RANGE:
1005 /*
1006 * This can happen if we ask for 0 bytes because
1007 * we already have the whole file. Consider this
1008 * a success for now, and check sizes later.
1009 */
1010 break;
1011 case HTTP_PROTOCOL_ERROR:
1012 /* fall through */
1013 case -1:
1014 --i;
1015 if (cached)
1016 continue;
1017 fetch_syserr();
1018 goto ouch;
1019 default:
1020 http_seterr(conn->err);
1021 if (!verbose)
1022 goto ouch;
1023 /* fall through so we can get the full error message */
1024 }
1025
1026 /* get headers */
1027 do {
1028 switch ((h = http_next_header(conn, &p))) {
1029 case hdr_syserror:
1030 fetch_syserr();
1031 goto ouch;
1032 case hdr_error:
1033 http_seterr(HTTP_PROTOCOL_ERROR);
1034 goto ouch;
1035 case hdr_connection:
1036 /* XXX too weak? */
1037 keep_alive = (strcasecmp(p, "keep-alive") == 0);
1038 break;
1039 case hdr_content_length:
1040 http_parse_length(p, &clength);
1041 break;
1042 case hdr_content_range:
1043 http_parse_range(p, &offset, &length, &size);
1044 break;
1045 case hdr_last_modified:
1046 http_parse_mtime(p, &mtime);
1047 break;
1048 case hdr_location:
1049 if (!HTTP_REDIRECT(conn->err))
1050 break;
1051 if (new)
1052 free(new);
1053 if (verbose)
1054 fetch_info("%d redirect to %s", conn->err, p);
1055 if (*p == '/')
1056 /* absolute path */
1057 new = fetchMakeURL(url->scheme, url->host, url->port, p,
1058 url->user, url->pwd);
1059 else
1060 new = fetchParseURL(p);
1061 if (new == NULL) {
1062 /* XXX should set an error code */
1063 goto ouch;
1064 }
1065 if (!*new->user && !*new->pwd) {
1066 strcpy(new->user, url->user);
1067 strcpy(new->pwd, url->pwd);
1068 }
1069 new->offset = url->offset;
1070 new->length = url->length;
1071 break;
1072 case hdr_transfer_encoding:
1073 /* XXX weak test*/
1074 chunked = (strcasecmp(p, "chunked") == 0);
1075 break;
1076 case hdr_www_authenticate:
1077 if (conn->err != HTTP_NEED_AUTH)
1078 break;
1079 /* if we were smarter, we'd check the method and realm */
1080 break;
1081 case hdr_end:
1082 /* fall through */
1083 case hdr_unknown:
1084 /* ignore */
1085 break;
1086 }
1087 } while (h > hdr_end);
1088
1089 /* we need to provide authentication */
1090 if (conn->err == HTTP_NEED_AUTH) {
1091 e = conn->err;
1092 need_auth = 1;
1093 fetch_close(conn);
1094 conn = NULL;
1095 continue;
1096 }
1097
1098 /* requested range not satisfiable */
1099 if (conn->err == HTTP_BAD_RANGE) {
1100 if (url->offset == size && url->length == 0) {
1101 /* asked for 0 bytes; fake it */
1102 offset = url->offset;
1103 conn->err = HTTP_OK;
1104 break;
1105 } else {
1106 http_seterr(conn->err);
1107 goto ouch;
1108 }
1109 }
1110
1111 /* we have a hit or an error */
1112 if (conn->err == HTTP_OK ||
1113 conn->err == HTTP_PARTIAL ||
1114 conn->err == HTTP_NOT_MODIFIED ||
1115 HTTP_ERROR(conn->err))
1116 break;
1117
1118 /* all other cases: we got a redirect */
1119 e = conn->err;
1120 need_auth = 0;
1121 fetch_close(conn);
1122 conn = NULL;
1123 if (!new)
1124 break;
1125 if (url != URL)
1126 fetchFreeURL(url);
1127 url = new;
1128 } while (++i < n);
1129
1130 /* we failed, or ran out of retries */
1131 if (conn == NULL) {
1132 http_seterr(e);
1133 goto ouch;
1134 }
1135
1136 /* check for inconsistencies */
1137 if (clength != -1 && length != -1 && clength != length) {
1138 http_seterr(HTTP_PROTOCOL_ERROR);
1139 goto ouch;
1140 }
1141 if (clength == -1)
1142 clength = length;
1143 if (clength != -1)
1144 length = offset + clength;
1145 if (length != -1 && size != -1 && length != size) {
1146 http_seterr(HTTP_PROTOCOL_ERROR);
1147 goto ouch;
1148 }
1149 if (size == -1)
1150 size = length;
1151
1152 /* fill in stats */
1153 if (us) {
1154 us->size = size;
1155 us->atime = us->mtime = mtime;
1156 }
1157
1158 /* too far? */
1159 if (URL->offset > 0 && offset > URL->offset) {
1160 http_seterr(HTTP_PROTOCOL_ERROR);
1161 goto ouch;
1162 }
1163
1164 /* report back real offset and size */
1165 URL->offset = offset;
1166 URL->length = clength;
1167
1168 if (clength == -1 && !chunked)
1169 keep_alive = 0;
1170
1171 if (conn->err == HTTP_NOT_MODIFIED) {
1172 http_seterr(HTTP_NOT_MODIFIED);
1173 if (keep_alive) {
1174 fetch_cache_put(conn, fetch_close);
1175 conn = NULL;
1176 }
1177 goto ouch;
1178 }
1179
1180 /* wrap it up in a fetchIO */
1181 if ((f = http_funopen(conn, chunked, keep_alive, clength)) == NULL) {
1182 fetch_syserr();
1183 goto ouch;
1184 }
1185
1186 if (url != URL)
1187 fetchFreeURL(url);
1188 if (purl)
1189 fetchFreeURL(purl);
1190
1191 if (HTTP_ERROR(conn->err)) {
1192
1193 if (keep_alive) {
1194 char buf[512];
1195 do {
1196 } while (fetchIO_read(f, buf, sizeof(buf)) > 0);
1197 }
1198
1199 fetchIO_close(f);
1200 f = NULL;
1201 }
1202
1203 return (f);
1204
1205 ouch:
1206 if (url != URL)
1207 fetchFreeURL(url);
1208 if (purl)
1209 fetchFreeURL(purl);
1210 if (conn != NULL)
1211 fetch_close(conn);
1212 return (NULL);
1213 }
1214
1215
1216 /*****************************************************************************
1217 * Entry points
1218 */
1219
1220 /*
1221 * Retrieve and stat a file by HTTP
1222 */
1223 fetchIO *
fetchXGetHTTP(struct url * URL,struct url_stat * us,const char * flags)1224 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags)
1225 {
1226 return (http_request(URL, "GET", us, http_get_proxy(URL, flags), flags));
1227 }
1228
1229 /*
1230 * Retrieve a file by HTTP
1231 */
1232 fetchIO *
fetchGetHTTP(struct url * URL,const char * flags)1233 fetchGetHTTP(struct url *URL, const char *flags)
1234 {
1235 return (fetchXGetHTTP(URL, NULL, flags));
1236 }
1237
1238 /*
1239 * Store a file by HTTP
1240 */
1241 fetchIO *
1242 /*ARGSUSED*/
fetchPutHTTP(struct url * URL __unused,const char * flags __unused)1243 fetchPutHTTP(struct url *URL __unused, const char *flags __unused)
1244 {
1245 fprintf(stderr, "fetchPutHTTP(): not implemented\n");
1246 return (NULL);
1247 }
1248
1249 /*
1250 * Get an HTTP document's metadata
1251 */
1252 int
fetchStatHTTP(struct url * URL,struct url_stat * us,const char * flags)1253 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
1254 {
1255 fetchIO *f;
1256
1257 f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags);
1258 if (f == NULL)
1259 return (-1);
1260 fetchIO_close(f);
1261 return (0);
1262 }
1263
1264 enum http_states {
1265 ST_NONE,
1266 ST_LT,
1267 ST_LTA,
1268 ST_TAGA,
1269 ST_H,
1270 ST_R,
1271 ST_E,
1272 ST_F,
1273 ST_HREF,
1274 ST_HREFQ,
1275 ST_TAG,
1276 ST_TAGAX,
1277 ST_TAGAQ
1278 };
1279
1280 struct index_parser {
1281 struct url_list *ue;
1282 struct url *url;
1283 enum http_states state;
1284 };
1285
1286 static ssize_t
parse_index(struct index_parser * parser,const char * buf,size_t len)1287 parse_index(struct index_parser *parser, const char *buf, size_t len)
1288 {
1289 char *end_attr, p = *buf;
1290
1291 switch (parser->state) {
1292 case ST_NONE:
1293 /* Plain text, not in markup */
1294 if (p == '<')
1295 parser->state = ST_LT;
1296 return 1;
1297 case ST_LT:
1298 /* In tag -- "<" already found */
1299 if (p == '>')
1300 parser->state = ST_NONE;
1301 else if (p == 'a' || p == 'A')
1302 parser->state = ST_LTA;
1303 else if (!isspace((unsigned char)p))
1304 parser->state = ST_TAG;
1305 return 1;
1306 case ST_LTA:
1307 /* In tag -- "<a" already found */
1308 if (p == '>')
1309 parser->state = ST_NONE;
1310 else if (p == '"')
1311 parser->state = ST_TAGAQ;
1312 else if (isspace((unsigned char)p))
1313 parser->state = ST_TAGA;
1314 else
1315 parser->state = ST_TAG;
1316 return 1;
1317 case ST_TAG:
1318 /* In tag, but not "<a" -- disregard */
1319 if (p == '>')
1320 parser->state = ST_NONE;
1321 return 1;
1322 case ST_TAGA:
1323 /* In a-tag -- "<a " already found */
1324 if (p == '>')
1325 parser->state = ST_NONE;
1326 else if (p == '"')
1327 parser->state = ST_TAGAQ;
1328 else if (p == 'h' || p == 'H')
1329 parser->state = ST_H;
1330 else if (!isspace((unsigned char)p))
1331 parser->state = ST_TAGAX;
1332 return 1;
1333 case ST_TAGAX:
1334 /* In unknown keyword in a-tag */
1335 if (p == '>')
1336 parser->state = ST_NONE;
1337 else if (p == '"')
1338 parser->state = ST_TAGAQ;
1339 else if (isspace((unsigned char)p))
1340 parser->state = ST_TAGA;
1341 return 1;
1342 case ST_TAGAQ:
1343 /* In a-tag, unknown argument for keys. */
1344 if (p == '>')
1345 parser->state = ST_NONE;
1346 else if (p == '"')
1347 parser->state = ST_TAGA;
1348 return 1;
1349 case ST_H:
1350 /* In a-tag -- "<a h" already found */
1351 if (p == '>')
1352 parser->state = ST_NONE;
1353 else if (p == '"')
1354 parser->state = ST_TAGAQ;
1355 else if (p == 'r' || p == 'R')
1356 parser->state = ST_R;
1357 else if (isspace((unsigned char)p))
1358 parser->state = ST_TAGA;
1359 else
1360 parser->state = ST_TAGAX;
1361 return 1;
1362 case ST_R:
1363 /* In a-tag -- "<a hr" already found */
1364 if (p == '>')
1365 parser->state = ST_NONE;
1366 else if (p == '"')
1367 parser->state = ST_TAGAQ;
1368 else if (p == 'e' || p == 'E')
1369 parser->state = ST_E;
1370 else if (isspace((unsigned char)p))
1371 parser->state = ST_TAGA;
1372 else
1373 parser->state = ST_TAGAX;
1374 return 1;
1375 case ST_E:
1376 /* In a-tag -- "<a hre" already found */
1377 if (p == '>')
1378 parser->state = ST_NONE;
1379 else if (p == '"')
1380 parser->state = ST_TAGAQ;
1381 else if (p == 'f' || p == 'F')
1382 parser->state = ST_F;
1383 else if (isspace((unsigned char)p))
1384 parser->state = ST_TAGA;
1385 else
1386 parser->state = ST_TAGAX;
1387 return 1;
1388 case ST_F:
1389 /* In a-tag -- "<a href" already found */
1390 if (p == '>')
1391 parser->state = ST_NONE;
1392 else if (p == '"')
1393 parser->state = ST_TAGAQ;
1394 else if (p == '=')
1395 parser->state = ST_HREF;
1396 else if (!isspace((unsigned char)p))
1397 parser->state = ST_TAGAX;
1398 return 1;
1399 case ST_HREF:
1400 /* In a-tag -- "<a href=" already found */
1401 if (p == '>')
1402 parser->state = ST_NONE;
1403 else if (p == '"')
1404 parser->state = ST_HREFQ;
1405 else if (!isspace((unsigned char)p))
1406 parser->state = ST_TAGA;
1407 return 1;
1408 case ST_HREFQ:
1409 /* In href of the a-tag */
1410 end_attr = memchr(buf, '"', len);
1411 if (end_attr == NULL)
1412 return 0;
1413 *end_attr = '\0';
1414 parser->state = ST_TAGA;
1415 if (fetch_add_entry(parser->ue, parser->url, buf, 1))
1416 return -1;
1417 return end_attr + 1 - buf;
1418 }
1419 /* NOTREACHED */
1420 abort();
1421 }
1422
1423 struct http_index_cache {
1424 struct http_index_cache *next;
1425 struct url *location;
1426 struct url_list ue;
1427 };
1428
1429 static struct http_index_cache *index_cache;
1430
1431 /*
1432 * List a directory
1433 */
1434 int
1435 /*ARGSUSED*/
fetchListHTTP(struct url_list * ue,struct url * url,const char * pattern __unused,const char * flags)1436 fetchListHTTP(struct url_list *ue, struct url *url, const char *pattern __unused, const char *flags)
1437 {
1438 fetchIO *f;
1439 char buf[2 * PATH_MAX];
1440 size_t buf_len, sum_processed;
1441 ssize_t read_len, processed;
1442 struct index_parser state;
1443 struct http_index_cache *cache = NULL;
1444 int do_cache, ret;
1445
1446 do_cache = CHECK_FLAG('c');
1447
1448 if (do_cache) {
1449 for (cache = index_cache; cache != NULL; cache = cache->next) {
1450 if (strcmp(cache->location->scheme, url->scheme))
1451 continue;
1452 if (strcmp(cache->location->user, url->user))
1453 continue;
1454 if (strcmp(cache->location->pwd, url->pwd))
1455 continue;
1456 if (strcmp(cache->location->host, url->host))
1457 continue;
1458 if (cache->location->port != url->port)
1459 continue;
1460 if (strcmp(cache->location->doc, url->doc))
1461 continue;
1462 return fetchAppendURLList(ue, &cache->ue);
1463 }
1464
1465 cache = malloc(sizeof(*cache));
1466 fetchInitURLList(&cache->ue);
1467 cache->location = fetchCopyURL(url);
1468 }
1469
1470 f = fetchGetHTTP(url, flags);
1471 if (f == NULL) {
1472 if (do_cache) {
1473 fetchFreeURLList(&cache->ue);
1474 fetchFreeURL(cache->location);
1475 free(cache);
1476 }
1477 return -1;
1478 }
1479
1480 state.url = url;
1481 state.state = ST_NONE;
1482 if (do_cache) {
1483 state.ue = &cache->ue;
1484 } else {
1485 state.ue = ue;
1486 }
1487
1488 buf_len = 0;
1489
1490 while ((read_len = fetchIO_read(f, buf + buf_len, sizeof(buf) - buf_len)) > 0) {
1491 buf_len += read_len;
1492 sum_processed = 0;
1493 do {
1494 processed = parse_index(&state, buf + sum_processed, buf_len);
1495 if (processed == -1)
1496 break;
1497 buf_len -= processed;
1498 sum_processed += processed;
1499 } while (processed != 0 && buf_len > 0);
1500 if (processed == -1) {
1501 read_len = -1;
1502 break;
1503 }
1504 memmove(buf, buf + sum_processed, buf_len);
1505 }
1506
1507 fetchIO_close(f);
1508
1509 ret = read_len < 0 ? -1 : 0;
1510
1511 if (do_cache) {
1512 if (ret == 0) {
1513 cache->next = index_cache;
1514 index_cache = cache;
1515 }
1516
1517 if (fetchAppendURLList(ue, &cache->ue))
1518 ret = -1;
1519 }
1520
1521 return ret;
1522 }
1523