1*ebe76b0cSjoerg /* $NetBSD: fetch.c,v 1.1.1.8 2009/08/21 15:12:27 joerg Exp $ */
2fe618babSjoerg /*-
3fe618babSjoerg * Copyright (c) 1998-2004 Dag-Erling Co�dan Sm�rgrav
4fe618babSjoerg * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>
5fe618babSjoerg * All rights reserved.
6fe618babSjoerg *
7fe618babSjoerg * Redistribution and use in source and binary forms, with or without
8fe618babSjoerg * modification, are permitted provided that the following conditions
9fe618babSjoerg * are met:
10fe618babSjoerg * 1. Redistributions of source code must retain the above copyright
11fe618babSjoerg * notice, this list of conditions and the following disclaimer
12fe618babSjoerg * in this position and unchanged.
13fe618babSjoerg * 2. Redistributions in binary form must reproduce the above copyright
14fe618babSjoerg * notice, this list of conditions and the following disclaimer in the
15fe618babSjoerg * documentation and/or other materials provided with the distribution.
16fe618babSjoerg * 3. The name of the author may not be used to endorse or promote products
17fe618babSjoerg * derived from this software without specific prior written permission
18fe618babSjoerg *
19fe618babSjoerg * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20fe618babSjoerg * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21fe618babSjoerg * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22fe618babSjoerg * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23fe618babSjoerg * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24fe618babSjoerg * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25fe618babSjoerg * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26fe618babSjoerg * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27fe618babSjoerg * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28fe618babSjoerg * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29fe618babSjoerg *
30fe618babSjoerg * $FreeBSD: fetch.c,v 1.41 2007/12/19 00:26:36 des Exp $
31fe618babSjoerg */
32fe618babSjoerg
33fe618babSjoerg #if HAVE_CONFIG_H
34fe618babSjoerg #include "config.h"
35fe618babSjoerg #endif
36fe618babSjoerg #ifndef NETBSD
37fe618babSjoerg #include <nbcompat.h>
38fe618babSjoerg #endif
39fe618babSjoerg
40fe618babSjoerg #include <ctype.h>
41fe618babSjoerg #include <errno.h>
42fe618babSjoerg #include <stdio.h>
43fe618babSjoerg #include <stdlib.h>
44fe618babSjoerg #include <string.h>
45fe618babSjoerg
46fe618babSjoerg #include "fetch.h"
47fe618babSjoerg #include "common.h"
48fe618babSjoerg
49fe618babSjoerg auth_t fetchAuthMethod;
50fe618babSjoerg int fetchLastErrCode;
51fe618babSjoerg char fetchLastErrString[MAXERRSTRING];
52fe618babSjoerg int fetchTimeout;
539da2cc5cSjoerg volatile int fetchRestartCalls = 1;
54fe618babSjoerg int fetchDebug;
55fe618babSjoerg
56fe618babSjoerg
57fe618babSjoerg /*** Local data **************************************************************/
58fe618babSjoerg
59fe618babSjoerg /*
60fe618babSjoerg * Error messages for parser errors
61fe618babSjoerg */
62fe618babSjoerg #define URL_MALFORMED 1
63fe618babSjoerg #define URL_BAD_SCHEME 2
64fe618babSjoerg #define URL_BAD_PORT 3
65fe618babSjoerg static struct fetcherr url_errlist[] = {
66fe618babSjoerg { URL_MALFORMED, FETCH_URL, "Malformed URL" },
67fe618babSjoerg { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" },
68fe618babSjoerg { URL_BAD_PORT, FETCH_URL, "Invalid server port" },
69fe618babSjoerg { -1, FETCH_UNKNOWN, "Unknown parser error" }
70fe618babSjoerg };
71fe618babSjoerg
72fe618babSjoerg
73fe618babSjoerg /*** Public API **************************************************************/
74fe618babSjoerg
75fe618babSjoerg /*
76fe618babSjoerg * Select the appropriate protocol for the URL scheme, and return a
77fe618babSjoerg * read-only stream connected to the document referenced by the URL.
78fe618babSjoerg * Also fill out the struct url_stat.
79fe618babSjoerg */
80fe618babSjoerg fetchIO *
fetchXGet(struct url * URL,struct url_stat * us,const char * flags)81fe618babSjoerg fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
82fe618babSjoerg {
83fe618babSjoerg
84fe618babSjoerg if (us != NULL) {
85fe618babSjoerg us->size = -1;
86fe618babSjoerg us->atime = us->mtime = 0;
87fe618babSjoerg }
88fe618babSjoerg if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
89fe618babSjoerg return (fetchXGetFile(URL, us, flags));
90fe618babSjoerg else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
91fe618babSjoerg return (fetchXGetFTP(URL, us, flags));
92fe618babSjoerg else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
93fe618babSjoerg return (fetchXGetHTTP(URL, us, flags));
94fe618babSjoerg else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
95fe618babSjoerg return (fetchXGetHTTP(URL, us, flags));
96fe618babSjoerg url_seterr(URL_BAD_SCHEME);
97fe618babSjoerg return (NULL);
98fe618babSjoerg }
99fe618babSjoerg
100fe618babSjoerg /*
101fe618babSjoerg * Select the appropriate protocol for the URL scheme, and return a
102fe618babSjoerg * read-only stream connected to the document referenced by the URL.
103fe618babSjoerg */
104fe618babSjoerg fetchIO *
fetchGet(struct url * URL,const char * flags)105fe618babSjoerg fetchGet(struct url *URL, const char *flags)
106fe618babSjoerg {
107fe618babSjoerg return (fetchXGet(URL, NULL, flags));
108fe618babSjoerg }
109fe618babSjoerg
110fe618babSjoerg /*
111fe618babSjoerg * Select the appropriate protocol for the URL scheme, and return a
112fe618babSjoerg * write-only stream connected to the document referenced by the URL.
113fe618babSjoerg */
114fe618babSjoerg fetchIO *
fetchPut(struct url * URL,const char * flags)115fe618babSjoerg fetchPut(struct url *URL, const char *flags)
116fe618babSjoerg {
117fe618babSjoerg
118fe618babSjoerg if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
119fe618babSjoerg return (fetchPutFile(URL, flags));
120fe618babSjoerg else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
121fe618babSjoerg return (fetchPutFTP(URL, flags));
122fe618babSjoerg else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
123fe618babSjoerg return (fetchPutHTTP(URL, flags));
124fe618babSjoerg else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
125fe618babSjoerg return (fetchPutHTTP(URL, flags));
126fe618babSjoerg url_seterr(URL_BAD_SCHEME);
127fe618babSjoerg return (NULL);
128fe618babSjoerg }
129fe618babSjoerg
130fe618babSjoerg /*
131fe618babSjoerg * Select the appropriate protocol for the URL scheme, and return the
132fe618babSjoerg * size of the document referenced by the URL if it exists.
133fe618babSjoerg */
134fe618babSjoerg int
fetchStat(struct url * URL,struct url_stat * us,const char * flags)135fe618babSjoerg fetchStat(struct url *URL, struct url_stat *us, const char *flags)
136fe618babSjoerg {
137fe618babSjoerg
138fe618babSjoerg if (us != NULL) {
139fe618babSjoerg us->size = -1;
140fe618babSjoerg us->atime = us->mtime = 0;
141fe618babSjoerg }
142fe618babSjoerg if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
143fe618babSjoerg return (fetchStatFile(URL, us, flags));
144fe618babSjoerg else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
145fe618babSjoerg return (fetchStatFTP(URL, us, flags));
146fe618babSjoerg else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
147fe618babSjoerg return (fetchStatHTTP(URL, us, flags));
148fe618babSjoerg else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
149fe618babSjoerg return (fetchStatHTTP(URL, us, flags));
150fe618babSjoerg url_seterr(URL_BAD_SCHEME);
151fe618babSjoerg return (-1);
152fe618babSjoerg }
153fe618babSjoerg
154fe618babSjoerg /*
155fe618babSjoerg * Select the appropriate protocol for the URL scheme, and return a
156fe618babSjoerg * list of files in the directory pointed to by the URL.
157fe618babSjoerg */
158fe618babSjoerg int
fetchList(struct url_list * ue,struct url * URL,const char * pattern,const char * flags)159fe618babSjoerg fetchList(struct url_list *ue, struct url *URL, const char *pattern,
160fe618babSjoerg const char *flags)
161fe618babSjoerg {
162fe618babSjoerg
163fe618babSjoerg if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
164fe618babSjoerg return (fetchListFile(ue, URL, pattern, flags));
165fe618babSjoerg else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
166fe618babSjoerg return (fetchListFTP(ue, URL, pattern, flags));
167fe618babSjoerg else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
168fe618babSjoerg return (fetchListHTTP(ue, URL, pattern, flags));
169fe618babSjoerg else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
170fe618babSjoerg return (fetchListHTTP(ue, URL, pattern, flags));
171fe618babSjoerg url_seterr(URL_BAD_SCHEME);
172fe618babSjoerg return -1;
173fe618babSjoerg }
174fe618babSjoerg
175fe618babSjoerg /*
176fe618babSjoerg * Attempt to parse the given URL; if successful, call fetchXGet().
177fe618babSjoerg */
178fe618babSjoerg fetchIO *
fetchXGetURL(const char * URL,struct url_stat * us,const char * flags)179fe618babSjoerg fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
180fe618babSjoerg {
181fe618babSjoerg struct url *u;
182fe618babSjoerg fetchIO *f;
183fe618babSjoerg
184fe618babSjoerg if ((u = fetchParseURL(URL)) == NULL)
185fe618babSjoerg return (NULL);
186fe618babSjoerg
187fe618babSjoerg f = fetchXGet(u, us, flags);
188fe618babSjoerg
189fe618babSjoerg fetchFreeURL(u);
190fe618babSjoerg return (f);
191fe618babSjoerg }
192fe618babSjoerg
193fe618babSjoerg /*
194fe618babSjoerg * Attempt to parse the given URL; if successful, call fetchGet().
195fe618babSjoerg */
196fe618babSjoerg fetchIO *
fetchGetURL(const char * URL,const char * flags)197fe618babSjoerg fetchGetURL(const char *URL, const char *flags)
198fe618babSjoerg {
199fe618babSjoerg return (fetchXGetURL(URL, NULL, flags));
200fe618babSjoerg }
201fe618babSjoerg
202fe618babSjoerg /*
203fe618babSjoerg * Attempt to parse the given URL; if successful, call fetchPut().
204fe618babSjoerg */
205fe618babSjoerg fetchIO *
fetchPutURL(const char * URL,const char * flags)206fe618babSjoerg fetchPutURL(const char *URL, const char *flags)
207fe618babSjoerg {
208fe618babSjoerg struct url *u;
209fe618babSjoerg fetchIO *f;
210fe618babSjoerg
211fe618babSjoerg if ((u = fetchParseURL(URL)) == NULL)
212fe618babSjoerg return (NULL);
213fe618babSjoerg
214fe618babSjoerg f = fetchPut(u, flags);
215fe618babSjoerg
216fe618babSjoerg fetchFreeURL(u);
217fe618babSjoerg return (f);
218fe618babSjoerg }
219fe618babSjoerg
220fe618babSjoerg /*
221fe618babSjoerg * Attempt to parse the given URL; if successful, call fetchStat().
222fe618babSjoerg */
223fe618babSjoerg int
fetchStatURL(const char * URL,struct url_stat * us,const char * flags)224fe618babSjoerg fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
225fe618babSjoerg {
226fe618babSjoerg struct url *u;
227fe618babSjoerg int s;
228fe618babSjoerg
229fe618babSjoerg if ((u = fetchParseURL(URL)) == NULL)
230fe618babSjoerg return (-1);
231fe618babSjoerg
232fe618babSjoerg s = fetchStat(u, us, flags);
233fe618babSjoerg
234fe618babSjoerg fetchFreeURL(u);
235fe618babSjoerg return (s);
236fe618babSjoerg }
237fe618babSjoerg
238fe618babSjoerg /*
239fe618babSjoerg * Attempt to parse the given URL; if successful, call fetchList().
240fe618babSjoerg */
241fe618babSjoerg int
fetchListURL(struct url_list * ue,const char * URL,const char * pattern,const char * flags)242fe618babSjoerg fetchListURL(struct url_list *ue, const char *URL, const char *pattern,
243fe618babSjoerg const char *flags)
244fe618babSjoerg {
245fe618babSjoerg struct url *u;
246fe618babSjoerg int rv;
247fe618babSjoerg
248fe618babSjoerg if ((u = fetchParseURL(URL)) == NULL)
249fe618babSjoerg return -1;
250fe618babSjoerg
251fe618babSjoerg rv = fetchList(ue, u, pattern, flags);
252fe618babSjoerg
253fe618babSjoerg fetchFreeURL(u);
254fe618babSjoerg return rv;
255fe618babSjoerg }
256fe618babSjoerg
257fe618babSjoerg /*
258fe618babSjoerg * Make a URL
259fe618babSjoerg */
260fe618babSjoerg struct url *
fetchMakeURL(const char * scheme,const char * host,int port,const char * doc,const char * user,const char * pwd)261fe618babSjoerg fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
262fe618babSjoerg const char *user, const char *pwd)
263fe618babSjoerg {
264fe618babSjoerg struct url *u;
265fe618babSjoerg
266fe618babSjoerg if (!scheme || (!host && !doc)) {
267fe618babSjoerg url_seterr(URL_MALFORMED);
268fe618babSjoerg return (NULL);
269fe618babSjoerg }
270fe618babSjoerg
271fe618babSjoerg if (port < 0 || port > 65535) {
272fe618babSjoerg url_seterr(URL_BAD_PORT);
273fe618babSjoerg return (NULL);
274fe618babSjoerg }
275fe618babSjoerg
276fe618babSjoerg /* allocate struct url */
277fe618babSjoerg if ((u = calloc(1, sizeof(*u))) == NULL) {
278fe618babSjoerg fetch_syserr();
279fe618babSjoerg return (NULL);
280fe618babSjoerg }
281fe618babSjoerg
282fe618babSjoerg if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
283fe618babSjoerg fetch_syserr();
284fe618babSjoerg free(u);
285fe618babSjoerg return (NULL);
286fe618babSjoerg }
287fe618babSjoerg
288fe618babSjoerg #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
289fe618babSjoerg seturl(scheme);
290fe618babSjoerg seturl(host);
291fe618babSjoerg seturl(user);
292fe618babSjoerg seturl(pwd);
293fe618babSjoerg #undef seturl
294fe618babSjoerg u->port = port;
295fe618babSjoerg
296fe618babSjoerg return (u);
297fe618babSjoerg }
298fe618babSjoerg
299fe618babSjoerg int
fetch_urlpath_safe(char x)300fe618babSjoerg fetch_urlpath_safe(char x)
301fe618babSjoerg {
302fe618babSjoerg if ((x >= '0' && x <= '9') || (x >= 'A' && x <= 'Z') ||
303fe618babSjoerg (x >= 'a' && x <= 'z'))
304fe618babSjoerg return 1;
305fe618babSjoerg
306fe618babSjoerg switch (x) {
307fe618babSjoerg case '$':
308fe618babSjoerg case '-':
309fe618babSjoerg case '_':
310fe618babSjoerg case '.':
311fe618babSjoerg case '+':
312fe618babSjoerg case '!':
313fe618babSjoerg case '*':
314fe618babSjoerg case '\'':
315fe618babSjoerg case '(':
316fe618babSjoerg case ')':
317fe618babSjoerg case ',':
318fe618babSjoerg /* The following are allowed in segment and path components: */
319fe618babSjoerg case '?':
320fe618babSjoerg case ':':
321fe618babSjoerg case '@':
322fe618babSjoerg case '&':
323fe618babSjoerg case '=':
324fe618babSjoerg case '/':
325fe618babSjoerg case ';':
326fe618babSjoerg /* If something is already quoted... */
327fe618babSjoerg case '%':
328fe618babSjoerg return 1;
329fe618babSjoerg default:
330fe618babSjoerg return 0;
331fe618babSjoerg }
332fe618babSjoerg }
333fe618babSjoerg
334fe618babSjoerg /*
335fe618babSjoerg * Copy an existing URL.
336fe618babSjoerg */
337fe618babSjoerg struct url *
fetchCopyURL(const struct url * src)338fe618babSjoerg fetchCopyURL(const struct url *src)
339fe618babSjoerg {
340fe618babSjoerg struct url *dst;
341fe618babSjoerg char *doc;
342fe618babSjoerg
343fe618babSjoerg /* allocate struct url */
344fe618babSjoerg if ((dst = malloc(sizeof(*dst))) == NULL) {
345fe618babSjoerg fetch_syserr();
346fe618babSjoerg return (NULL);
347fe618babSjoerg }
348fe618babSjoerg if ((doc = strdup(src->doc)) == NULL) {
349fe618babSjoerg fetch_syserr();
350fe618babSjoerg free(dst);
351fe618babSjoerg return (NULL);
352fe618babSjoerg }
353fe618babSjoerg *dst = *src;
354fe618babSjoerg dst->doc = doc;
355fe618babSjoerg
356fe618babSjoerg return dst;
357fe618babSjoerg }
358fe618babSjoerg
359fe618babSjoerg /*
360fe618babSjoerg * Split an URL into components. URL syntax is:
361fe618babSjoerg * [method:/][/[user[:pwd]@]host[:port]/][document]
362fe618babSjoerg * This almost, but not quite, RFC1738 URL syntax.
363fe618babSjoerg */
364fe618babSjoerg struct url *
fetchParseURL(const char * URL)365fe618babSjoerg fetchParseURL(const char *URL)
366fe618babSjoerg {
367fe618babSjoerg const char *p, *q;
368fe618babSjoerg struct url *u;
369fe618babSjoerg size_t i, count;
370fe618babSjoerg int pre_quoted;
371fe618babSjoerg
372fe618babSjoerg /* allocate struct url */
373fe618babSjoerg if ((u = calloc(1, sizeof(*u))) == NULL) {
374fe618babSjoerg fetch_syserr();
375fe618babSjoerg return (NULL);
376fe618babSjoerg }
377fe618babSjoerg
378fe618babSjoerg if (*URL == '/') {
379fe618babSjoerg pre_quoted = 0;
380fe618babSjoerg strcpy(u->scheme, SCHEME_FILE);
381fe618babSjoerg p = URL;
382fe618babSjoerg goto quote_doc;
383fe618babSjoerg }
384fe618babSjoerg if (strncmp(URL, "file:", 5) == 0) {
385fe618babSjoerg pre_quoted = 1;
386fe618babSjoerg strcpy(u->scheme, SCHEME_FILE);
387fe618babSjoerg URL += 5;
388fe618babSjoerg if (URL[0] != '/' || URL[1] != '/' || URL[2] != '/') {
389fe618babSjoerg url_seterr(URL_MALFORMED);
390fe618babSjoerg goto ouch;
391fe618babSjoerg }
392fe618babSjoerg p = URL + 2;
393fe618babSjoerg goto quote_doc;
394fe618babSjoerg }
395fe618babSjoerg if (strncmp(URL, "http:", 5) == 0 ||
396fe618babSjoerg strncmp(URL, "https:", 6) == 0) {
397fe618babSjoerg pre_quoted = 1;
398fe618babSjoerg if (URL[4] == ':') {
399fe618babSjoerg strcpy(u->scheme, SCHEME_HTTP);
400fe618babSjoerg URL += 5;
401fe618babSjoerg } else {
402fe618babSjoerg strcpy(u->scheme, SCHEME_HTTPS);
403fe618babSjoerg URL += 6;
404fe618babSjoerg }
405fe618babSjoerg
406fe618babSjoerg if (URL[0] != '/' || URL[1] != '/') {
407fe618babSjoerg url_seterr(URL_MALFORMED);
408fe618babSjoerg goto ouch;
409fe618babSjoerg }
410fe618babSjoerg URL += 2;
411fe618babSjoerg p = URL;
4128e658292Sjoerg goto find_user;
413fe618babSjoerg }
414fe618babSjoerg if (strncmp(URL, "ftp:", 4) == 0) {
415fe618babSjoerg pre_quoted = 1;
416fe618babSjoerg strcpy(u->scheme, SCHEME_FTP);
417fe618babSjoerg URL += 4;
418fe618babSjoerg if (URL[0] != '/' || URL[1] != '/') {
419fe618babSjoerg url_seterr(URL_MALFORMED);
420fe618babSjoerg goto ouch;
421fe618babSjoerg }
422fe618babSjoerg URL += 2;
423fe618babSjoerg p = URL;
424fe618babSjoerg goto find_user;
425fe618babSjoerg }
426fe618babSjoerg
427fe618babSjoerg url_seterr(URL_BAD_SCHEME);
428fe618babSjoerg goto ouch;
429fe618babSjoerg
430fe618babSjoerg find_user:
431fe618babSjoerg p = strpbrk(URL, "/@");
432fe618babSjoerg if (p != NULL && *p == '@') {
433fe618babSjoerg /* username */
434fe618babSjoerg for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++) {
435fe618babSjoerg if (i < URL_USERLEN)
436fe618babSjoerg u->user[i++] = *q;
437fe618babSjoerg }
438fe618babSjoerg
439fe618babSjoerg /* password */
440fe618babSjoerg if (*q == ':') {
4418e658292Sjoerg for (q++, i = 0; (*q != '@'); q++)
442fe618babSjoerg if (i < URL_PWDLEN)
443fe618babSjoerg u->pwd[i++] = *q;
444fe618babSjoerg }
445fe618babSjoerg
446fe618babSjoerg p++;
447fe618babSjoerg } else {
448fe618babSjoerg p = URL;
449fe618babSjoerg }
450fe618babSjoerg
451fe618babSjoerg /* hostname */
452fe618babSjoerg #ifdef INET6
453fe618babSjoerg if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
454fe618babSjoerg (*++q == '\0' || *q == '/' || *q == ':')) {
455fe618babSjoerg if ((i = q - p - 2) > URL_HOSTLEN)
456fe618babSjoerg i = URL_HOSTLEN;
457fe618babSjoerg strncpy(u->host, ++p, i);
458fe618babSjoerg p = q;
459fe618babSjoerg } else
460fe618babSjoerg #endif
461fe618babSjoerg for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
462fe618babSjoerg if (i < URL_HOSTLEN)
463fe618babSjoerg u->host[i++] = *p;
464fe618babSjoerg
465fe618babSjoerg /* port */
466fe618babSjoerg if (*p == ':') {
467fe618babSjoerg for (q = ++p; *q && (*q != '/'); q++)
468fe618babSjoerg if (isdigit((unsigned char)*q))
469fe618babSjoerg u->port = u->port * 10 + (*q - '0');
470fe618babSjoerg else {
471fe618babSjoerg /* invalid port */
472fe618babSjoerg url_seterr(URL_BAD_PORT);
473fe618babSjoerg goto ouch;
474fe618babSjoerg }
475fe618babSjoerg p = q;
476fe618babSjoerg }
477fe618babSjoerg
478fe618babSjoerg /* document */
479fe618babSjoerg if (!*p)
480fe618babSjoerg p = "/";
481fe618babSjoerg
482fe618babSjoerg quote_doc:
483fe618babSjoerg count = 1;
484fe618babSjoerg for (i = 0; p[i] != '\0'; ++i) {
485fe618babSjoerg if ((!pre_quoted && p[i] == '%') ||
486fe618babSjoerg !fetch_urlpath_safe(p[i]))
487fe618babSjoerg count += 3;
488fe618babSjoerg else
489fe618babSjoerg ++count;
490fe618babSjoerg }
491fe618babSjoerg
492fe618babSjoerg if ((u->doc = malloc(count)) == NULL) {
493fe618babSjoerg fetch_syserr();
494fe618babSjoerg goto ouch;
495fe618babSjoerg }
496fe618babSjoerg for (i = 0; *p != '\0'; ++p) {
497fe618babSjoerg if ((!pre_quoted && *p == '%') ||
498fe618babSjoerg !fetch_urlpath_safe(*p)) {
499fe618babSjoerg u->doc[i++] = '%';
500fe618babSjoerg if ((unsigned char)*p < 160)
501fe618babSjoerg u->doc[i++] = '0' + ((unsigned char)*p) / 16;
502fe618babSjoerg else
503fe618babSjoerg u->doc[i++] = 'a' - 10 + ((unsigned char)*p) / 16;
504d4553746Sjoerg if ((unsigned char)*p % 16 < 10)
505fe618babSjoerg u->doc[i++] = '0' + ((unsigned char)*p) % 16;
506fe618babSjoerg else
507fe618babSjoerg u->doc[i++] = 'a' - 10 + ((unsigned char)*p) % 16;
508fe618babSjoerg } else
509fe618babSjoerg u->doc[i++] = *p;
510fe618babSjoerg }
511fe618babSjoerg u->doc[i] = '\0';
512fe618babSjoerg
513fe618babSjoerg return (u);
514fe618babSjoerg
515fe618babSjoerg ouch:
516fe618babSjoerg free(u);
517fe618babSjoerg return (NULL);
518fe618babSjoerg }
519fe618babSjoerg
520fe618babSjoerg /*
521fe618babSjoerg * Free a URL
522fe618babSjoerg */
523fe618babSjoerg void
fetchFreeURL(struct url * u)524fe618babSjoerg fetchFreeURL(struct url *u)
525fe618babSjoerg {
526fe618babSjoerg free(u->doc);
527fe618babSjoerg free(u);
528fe618babSjoerg }
529fe618babSjoerg
530fe618babSjoerg static char
xdigit2digit(char digit)531fe618babSjoerg xdigit2digit(char digit)
532fe618babSjoerg {
533fe618babSjoerg digit = tolower((unsigned char)digit);
534fe618babSjoerg if (digit >= 'a' && digit <= 'f')
535fe618babSjoerg digit = digit - 'a' + 10;
536fe618babSjoerg else
537fe618babSjoerg digit = digit - '0';
538fe618babSjoerg
539fe618babSjoerg return digit;
540fe618babSjoerg }
541fe618babSjoerg
542fe618babSjoerg /*
543fe618babSjoerg * Unquote whole URL.
544fe618babSjoerg * Skips optional parts like query or fragment identifier.
545fe618babSjoerg */
546fe618babSjoerg char *
fetchUnquotePath(struct url * url)547fe618babSjoerg fetchUnquotePath(struct url *url)
548fe618babSjoerg {
549fe618babSjoerg char *unquoted;
550fe618babSjoerg const char *iter;
551fe618babSjoerg size_t i;
552fe618babSjoerg
553fe618babSjoerg if ((unquoted = malloc(strlen(url->doc) + 1)) == NULL)
554fe618babSjoerg return NULL;
555fe618babSjoerg
556fe618babSjoerg for (i = 0, iter = url->doc; *iter != '\0'; ++iter) {
557fe618babSjoerg if (*iter == '#' || *iter == '?')
558fe618babSjoerg break;
559fe618babSjoerg if (iter[0] != '%' ||
560fe618babSjoerg !isxdigit((unsigned char)iter[1]) ||
561fe618babSjoerg !isxdigit((unsigned char)iter[2])) {
562fe618babSjoerg unquoted[i++] = *iter;
563fe618babSjoerg continue;
564fe618babSjoerg }
565fe618babSjoerg unquoted[i++] = xdigit2digit(iter[1]) * 16 +
566fe618babSjoerg xdigit2digit(iter[2]);
567fe618babSjoerg iter += 2;
568fe618babSjoerg }
569fe618babSjoerg unquoted[i] = '\0';
570fe618babSjoerg return unquoted;
571fe618babSjoerg }
572fe618babSjoerg
573fe618babSjoerg
574fe618babSjoerg /*
575fe618babSjoerg * Extract the file name component of a URL.
576fe618babSjoerg */
577fe618babSjoerg char *
fetchUnquoteFilename(struct url * url)578fe618babSjoerg fetchUnquoteFilename(struct url *url)
579fe618babSjoerg {
580fe618babSjoerg char *unquoted, *filename;
581fe618babSjoerg const char *last_slash;
582fe618babSjoerg
583fe618babSjoerg if ((unquoted = fetchUnquotePath(url)) == NULL)
584fe618babSjoerg return NULL;
585fe618babSjoerg
586fe618babSjoerg if ((last_slash = strrchr(unquoted, '/')) == NULL)
587fe618babSjoerg return unquoted;
588fe618babSjoerg filename = strdup(last_slash + 1);
589fe618babSjoerg free(unquoted);
590fe618babSjoerg return filename;
591fe618babSjoerg }
592fe618babSjoerg
593fe618babSjoerg char *
fetchStringifyURL(const struct url * url)594fe618babSjoerg fetchStringifyURL(const struct url *url)
595fe618babSjoerg {
596fe618babSjoerg size_t total;
597fe618babSjoerg char *doc;
598fe618babSjoerg
599fe618babSjoerg /* scheme :// user : pwd @ host :port doc */
600fe618babSjoerg total = strlen(url->scheme) + 3 + strlen(url->user) + 1 +
601fe618babSjoerg strlen(url->pwd) + 1 + strlen(url->host) + 6 + strlen(url->doc) + 1;
602fe618babSjoerg if ((doc = malloc(total)) == NULL)
603fe618babSjoerg return NULL;
604fe618babSjoerg if (url->port != 0)
605fe618babSjoerg snprintf(doc, total, "%s%s%s%s%s%s%s:%d%s",
606fe618babSjoerg url->scheme,
607fe618babSjoerg url->scheme[0] != '\0' ? "://" : "",
608fe618babSjoerg url->user,
609fe618babSjoerg url->pwd[0] != '\0' ? ":" : "",
610fe618babSjoerg url->pwd,
611fe618babSjoerg url->user[0] != '\0' || url->pwd[0] != '\0' ? "@" : "",
612fe618babSjoerg url->host,
613fe618babSjoerg (int)url->port,
614fe618babSjoerg url->doc);
615fe618babSjoerg else {
616fe618babSjoerg snprintf(doc, total, "%s%s%s%s%s%s%s%s",
617fe618babSjoerg url->scheme,
618fe618babSjoerg url->scheme[0] != '\0' ? "://" : "",
619fe618babSjoerg url->user,
620fe618babSjoerg url->pwd[0] != '\0' ? ":" : "",
621fe618babSjoerg url->pwd,
622fe618babSjoerg url->user[0] != '\0' || url->pwd[0] != '\0' ? "@" : "",
623fe618babSjoerg url->host,
624fe618babSjoerg url->doc);
625fe618babSjoerg }
626fe618babSjoerg return doc;
627fe618babSjoerg }
628