14ca1ab94SDag-Erling Smørgrav /*- 25e53a4f9SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 35e53a4f9SPedro F. Giffuni * 4578153f1SDag-Erling Smørgrav * Copyright (c) 1998-2004 Dag-Erling Smørgrav 54ca1ab94SDag-Erling Smørgrav * All rights reserved. 64ca1ab94SDag-Erling Smørgrav * 74ca1ab94SDag-Erling Smørgrav * Redistribution and use in source and binary forms, with or without 84ca1ab94SDag-Erling Smørgrav * modification, are permitted provided that the following conditions 94ca1ab94SDag-Erling Smørgrav * are met: 104ca1ab94SDag-Erling Smørgrav * 1. Redistributions of source code must retain the above copyright 114ca1ab94SDag-Erling Smørgrav * notice, this list of conditions and the following disclaimer 124ca1ab94SDag-Erling Smørgrav * in this position and unchanged. 134ca1ab94SDag-Erling Smørgrav * 2. Redistributions in binary form must reproduce the above copyright 144ca1ab94SDag-Erling Smørgrav * notice, this list of conditions and the following disclaimer in the 154ca1ab94SDag-Erling Smørgrav * documentation and/or other materials provided with the distribution. 164ca1ab94SDag-Erling Smørgrav * 3. The name of the author may not be used to endorse or promote products 174ca1ab94SDag-Erling Smørgrav * derived from this software without specific prior written permission 184ca1ab94SDag-Erling Smørgrav * 194ca1ab94SDag-Erling Smørgrav * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 204ca1ab94SDag-Erling Smørgrav * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 214ca1ab94SDag-Erling Smørgrav * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 224ca1ab94SDag-Erling Smørgrav * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 234ca1ab94SDag-Erling Smørgrav * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 244ca1ab94SDag-Erling Smørgrav * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 254ca1ab94SDag-Erling Smørgrav * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 264ca1ab94SDag-Erling Smørgrav * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 274ca1ab94SDag-Erling Smørgrav * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 284ca1ab94SDag-Erling Smørgrav * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 294ca1ab94SDag-Erling Smørgrav */ 304ca1ab94SDag-Erling Smørgrav 314ca1ab94SDag-Erling Smørgrav #include <sys/param.h> 324ca1ab94SDag-Erling Smørgrav 338d9de5b1SDag-Erling Smørgrav #include <netinet/in.h> 348d9de5b1SDag-Erling Smørgrav 358d9de5b1SDag-Erling Smørgrav #include <errno.h> 364ca1ab94SDag-Erling Smørgrav #include <ctype.h> 374ca1ab94SDag-Erling Smørgrav #include <stdio.h> 384ca1ab94SDag-Erling Smørgrav #include <stdlib.h> 394ca1ab94SDag-Erling Smørgrav #include <string.h> 404ca1ab94SDag-Erling Smørgrav 414ca1ab94SDag-Erling Smørgrav #include "fetch.h" 42d8acd8dcSDag-Erling Smørgrav #include "common.h" 434ca1ab94SDag-Erling Smørgrav 446490b215SDag-Erling Smørgrav auth_t fetchAuthMethod; 450fba3a00SDag-Erling Smørgrav int fetchLastErrCode; 46ba101983SDag-Erling Smørgrav char fetchLastErrString[MAXERRSTRING]; 47fc6e9e65SDag-Erling Smørgrav int fetchTimeout; 48a1bb3f48SDag-Erling Smørgrav int fetchRestartCalls = 1; 497eb2f34dSDag-Erling Smørgrav int fetchDebug; 500fba3a00SDag-Erling Smørgrav 510fba3a00SDag-Erling Smørgrav 52d8acd8dcSDag-Erling Smørgrav /*** Local data **************************************************************/ 53d8acd8dcSDag-Erling Smørgrav 54d8acd8dcSDag-Erling Smørgrav /* 55d8acd8dcSDag-Erling Smørgrav * Error messages for parser errors 56d8acd8dcSDag-Erling Smørgrav */ 57d8acd8dcSDag-Erling Smørgrav #define URL_MALFORMED 1 58d8acd8dcSDag-Erling Smørgrav #define URL_BAD_SCHEME 2 59d8acd8dcSDag-Erling Smørgrav #define URL_BAD_PORT 3 60a1b37df2SDag-Erling Smørgrav static struct fetcherr url_errlist[] = { 61d8acd8dcSDag-Erling Smørgrav { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 62d8acd8dcSDag-Erling Smørgrav { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 63d8acd8dcSDag-Erling Smørgrav { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 64d8acd8dcSDag-Erling Smørgrav { -1, FETCH_UNKNOWN, "Unknown parser error" } 65d8acd8dcSDag-Erling Smørgrav }; 66d8acd8dcSDag-Erling Smørgrav 67d8acd8dcSDag-Erling Smørgrav 68d8acd8dcSDag-Erling Smørgrav /*** Public API **************************************************************/ 694ca1ab94SDag-Erling Smørgrav 70842a95ccSDag-Erling Smørgrav /* 71842a95ccSDag-Erling Smørgrav * Select the appropriate protocol for the URL scheme, and return a 72842a95ccSDag-Erling Smørgrav * read-only stream connected to the document referenced by the URL. 731a5faa10SDag-Erling Smørgrav * Also fill out the struct url_stat. 74842a95ccSDag-Erling Smørgrav */ 75ecc91352SDag-Erling Smørgrav FILE * 7638c7e4a6SArchie Cobbs fetchXGet(struct url *URL, struct url_stat *us, const char *flags) 77ecc91352SDag-Erling Smørgrav { 78c97925adSHajimu UMEMOTO 7902e6bec1SDag-Erling Smørgrav if (us != NULL) { 8002e6bec1SDag-Erling Smørgrav us->size = -1; 8102e6bec1SDag-Erling Smørgrav us->atime = us->mtime = 0; 8202e6bec1SDag-Erling Smørgrav } 838d9de5b1SDag-Erling Smørgrav if (strcmp(URL->scheme, SCHEME_FILE) == 0) 84e19e6098SDag-Erling Smørgrav return (fetchXGetFile(URL, us, flags)); 858d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 86111e2510SDag-Erling Smørgrav return (fetchXGetFTP(URL, us, flags)); 878d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 88e19e6098SDag-Erling Smørgrav return (fetchXGetHTTP(URL, us, flags)); 898d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 90111e2510SDag-Erling Smørgrav return (fetchXGetHTTP(URL, us, flags)); 91a1b37df2SDag-Erling Smørgrav url_seterr(URL_BAD_SCHEME); 92e19e6098SDag-Erling Smørgrav return (NULL); 93d8acd8dcSDag-Erling Smørgrav } 94ecc91352SDag-Erling Smørgrav 95842a95ccSDag-Erling Smørgrav /* 96842a95ccSDag-Erling Smørgrav * Select the appropriate protocol for the URL scheme, and return a 971a5faa10SDag-Erling Smørgrav * read-only stream connected to the document referenced by the URL. 981a5faa10SDag-Erling Smørgrav */ 991a5faa10SDag-Erling Smørgrav FILE * 10038c7e4a6SArchie Cobbs fetchGet(struct url *URL, const char *flags) 1011a5faa10SDag-Erling Smørgrav { 102e19e6098SDag-Erling Smørgrav return (fetchXGet(URL, NULL, flags)); 1031a5faa10SDag-Erling Smørgrav } 1041a5faa10SDag-Erling Smørgrav 1051a5faa10SDag-Erling Smørgrav /* 1061a5faa10SDag-Erling Smørgrav * Select the appropriate protocol for the URL scheme, and return a 107842a95ccSDag-Erling Smørgrav * write-only stream connected to the document referenced by the URL. 108842a95ccSDag-Erling Smørgrav */ 109ecc91352SDag-Erling Smørgrav FILE * 11038c7e4a6SArchie Cobbs fetchPut(struct url *URL, const char *flags) 111ecc91352SDag-Erling Smørgrav { 112c97925adSHajimu UMEMOTO 1138d9de5b1SDag-Erling Smørgrav if (strcmp(URL->scheme, SCHEME_FILE) == 0) 114e19e6098SDag-Erling Smørgrav return (fetchPutFile(URL, flags)); 1158d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 116111e2510SDag-Erling Smørgrav return (fetchPutFTP(URL, flags)); 1178d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 118e19e6098SDag-Erling Smørgrav return (fetchPutHTTP(URL, flags)); 1198d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 120111e2510SDag-Erling Smørgrav return (fetchPutHTTP(URL, flags)); 121a1b37df2SDag-Erling Smørgrav url_seterr(URL_BAD_SCHEME); 122e19e6098SDag-Erling Smørgrav return (NULL); 123d8acd8dcSDag-Erling Smørgrav } 124d8acd8dcSDag-Erling Smørgrav 125d8acd8dcSDag-Erling Smørgrav /* 126d8acd8dcSDag-Erling Smørgrav * Select the appropriate protocol for the URL scheme, and return the 127d8acd8dcSDag-Erling Smørgrav * size of the document referenced by the URL if it exists. 128d8acd8dcSDag-Erling Smørgrav */ 129d8acd8dcSDag-Erling Smørgrav int 13038c7e4a6SArchie Cobbs fetchStat(struct url *URL, struct url_stat *us, const char *flags) 131d8acd8dcSDag-Erling Smørgrav { 132c97925adSHajimu UMEMOTO 13302e6bec1SDag-Erling Smørgrav if (us != NULL) { 13402e6bec1SDag-Erling Smørgrav us->size = -1; 13502e6bec1SDag-Erling Smørgrav us->atime = us->mtime = 0; 13602e6bec1SDag-Erling Smørgrav } 1378d9de5b1SDag-Erling Smørgrav if (strcmp(URL->scheme, SCHEME_FILE) == 0) 138e19e6098SDag-Erling Smørgrav return (fetchStatFile(URL, us, flags)); 1398d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 140e19e6098SDag-Erling Smørgrav return (fetchStatFTP(URL, us, flags)); 1418d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 142111e2510SDag-Erling Smørgrav return (fetchStatHTTP(URL, us, flags)); 1438d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 144111e2510SDag-Erling Smørgrav return (fetchStatHTTP(URL, us, flags)); 145a1b37df2SDag-Erling Smørgrav url_seterr(URL_BAD_SCHEME); 146e19e6098SDag-Erling Smørgrav return (-1); 147ecc91352SDag-Erling Smørgrav } 148ecc91352SDag-Erling Smørgrav 149842a95ccSDag-Erling Smørgrav /* 150ce71b736SDag-Erling Smørgrav * Select the appropriate protocol for the URL scheme, and return a 151ce71b736SDag-Erling Smørgrav * list of files in the directory pointed to by the URL. 152ce71b736SDag-Erling Smørgrav */ 153ce71b736SDag-Erling Smørgrav struct url_ent * 15438c7e4a6SArchie Cobbs fetchList(struct url *URL, const char *flags) 155ce71b736SDag-Erling Smørgrav { 156c97925adSHajimu UMEMOTO 1578d9de5b1SDag-Erling Smørgrav if (strcmp(URL->scheme, SCHEME_FILE) == 0) 158e19e6098SDag-Erling Smørgrav return (fetchListFile(URL, flags)); 1598d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 160e19e6098SDag-Erling Smørgrav return (fetchListFTP(URL, flags)); 1618d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 162111e2510SDag-Erling Smørgrav return (fetchListHTTP(URL, flags)); 1638d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 164111e2510SDag-Erling Smørgrav return (fetchListHTTP(URL, flags)); 165a1b37df2SDag-Erling Smørgrav url_seterr(URL_BAD_SCHEME); 166e19e6098SDag-Erling Smørgrav return (NULL); 167ce71b736SDag-Erling Smørgrav } 168ce71b736SDag-Erling Smørgrav 169ce71b736SDag-Erling Smørgrav /* 1701a5faa10SDag-Erling Smørgrav * Attempt to parse the given URL; if successful, call fetchXGet(). 171842a95ccSDag-Erling Smørgrav */ 1724ca1ab94SDag-Erling Smørgrav FILE * 17338c7e4a6SArchie Cobbs fetchXGetURL(const char *URL, struct url_stat *us, const char *flags) 1744ca1ab94SDag-Erling Smørgrav { 175d8acd8dcSDag-Erling Smørgrav struct url *u; 1764ca1ab94SDag-Erling Smørgrav FILE *f; 1774ca1ab94SDag-Erling Smørgrav 1784ca1ab94SDag-Erling Smørgrav if ((u = fetchParseURL(URL)) == NULL) 179e19e6098SDag-Erling Smørgrav return (NULL); 1804ca1ab94SDag-Erling Smørgrav 1811a5faa10SDag-Erling Smørgrav f = fetchXGet(u, us, flags); 1824ca1ab94SDag-Erling Smørgrav 18360245e42SDag-Erling Smørgrav fetchFreeURL(u); 184e19e6098SDag-Erling Smørgrav return (f); 1854ca1ab94SDag-Erling Smørgrav } 1864ca1ab94SDag-Erling Smørgrav 1871a5faa10SDag-Erling Smørgrav /* 1881a5faa10SDag-Erling Smørgrav * Attempt to parse the given URL; if successful, call fetchGet(). 1891a5faa10SDag-Erling Smørgrav */ 1901a5faa10SDag-Erling Smørgrav FILE * 19138c7e4a6SArchie Cobbs fetchGetURL(const char *URL, const char *flags) 1921a5faa10SDag-Erling Smørgrav { 193e19e6098SDag-Erling Smørgrav return (fetchXGetURL(URL, NULL, flags)); 1941a5faa10SDag-Erling Smørgrav } 1954ca1ab94SDag-Erling Smørgrav 196842a95ccSDag-Erling Smørgrav /* 197842a95ccSDag-Erling Smørgrav * Attempt to parse the given URL; if successful, call fetchPut(). 198842a95ccSDag-Erling Smørgrav */ 1994ca1ab94SDag-Erling Smørgrav FILE * 20038c7e4a6SArchie Cobbs fetchPutURL(const char *URL, const char *flags) 2014ca1ab94SDag-Erling Smørgrav { 202d8acd8dcSDag-Erling Smørgrav struct url *u; 2034ca1ab94SDag-Erling Smørgrav FILE *f; 2044ca1ab94SDag-Erling Smørgrav 2054ca1ab94SDag-Erling Smørgrav if ((u = fetchParseURL(URL)) == NULL) 206e19e6098SDag-Erling Smørgrav return (NULL); 2074ca1ab94SDag-Erling Smørgrav 208ecc91352SDag-Erling Smørgrav f = fetchPut(u, flags); 2094ca1ab94SDag-Erling Smørgrav 21060245e42SDag-Erling Smørgrav fetchFreeURL(u); 211e19e6098SDag-Erling Smørgrav return (f); 2124ca1ab94SDag-Erling Smørgrav } 2134ca1ab94SDag-Erling Smørgrav 2144ca1ab94SDag-Erling Smørgrav /* 215d8acd8dcSDag-Erling Smørgrav * Attempt to parse the given URL; if successful, call fetchStat(). 216d8acd8dcSDag-Erling Smørgrav */ 217d8acd8dcSDag-Erling Smørgrav int 21838c7e4a6SArchie Cobbs fetchStatURL(const char *URL, struct url_stat *us, const char *flags) 219d8acd8dcSDag-Erling Smørgrav { 220d8acd8dcSDag-Erling Smørgrav struct url *u; 221d8acd8dcSDag-Erling Smørgrav int s; 222d8acd8dcSDag-Erling Smørgrav 223d8acd8dcSDag-Erling Smørgrav if ((u = fetchParseURL(URL)) == NULL) 224e19e6098SDag-Erling Smørgrav return (-1); 225d8acd8dcSDag-Erling Smørgrav 226d8acd8dcSDag-Erling Smørgrav s = fetchStat(u, us, flags); 227d8acd8dcSDag-Erling Smørgrav 22860245e42SDag-Erling Smørgrav fetchFreeURL(u); 229e19e6098SDag-Erling Smørgrav return (s); 230d8acd8dcSDag-Erling Smørgrav } 231d8acd8dcSDag-Erling Smørgrav 232d8acd8dcSDag-Erling Smørgrav /* 233ce71b736SDag-Erling Smørgrav * Attempt to parse the given URL; if successful, call fetchList(). 234ce71b736SDag-Erling Smørgrav */ 235ce71b736SDag-Erling Smørgrav struct url_ent * 23638c7e4a6SArchie Cobbs fetchListURL(const char *URL, const char *flags) 237ce71b736SDag-Erling Smørgrav { 238ce71b736SDag-Erling Smørgrav struct url *u; 239ce71b736SDag-Erling Smørgrav struct url_ent *ue; 240ce71b736SDag-Erling Smørgrav 241ce71b736SDag-Erling Smørgrav if ((u = fetchParseURL(URL)) == NULL) 242e19e6098SDag-Erling Smørgrav return (NULL); 243ce71b736SDag-Erling Smørgrav 244ce71b736SDag-Erling Smørgrav ue = fetchList(u, flags); 245ce71b736SDag-Erling Smørgrav 24660245e42SDag-Erling Smørgrav fetchFreeURL(u); 247e19e6098SDag-Erling Smørgrav return (ue); 248ce71b736SDag-Erling Smørgrav } 249ce71b736SDag-Erling Smørgrav 250ce71b736SDag-Erling Smørgrav /* 2519a964d6aSDag-Erling Smørgrav * Make a URL 2529a964d6aSDag-Erling Smørgrav */ 2539a964d6aSDag-Erling Smørgrav struct url * 25438c7e4a6SArchie Cobbs fetchMakeURL(const char *scheme, const char *host, int port, const char *doc, 25538c7e4a6SArchie Cobbs const char *user, const char *pwd) 2569a964d6aSDag-Erling Smørgrav { 2579a964d6aSDag-Erling Smørgrav struct url *u; 2589a964d6aSDag-Erling Smørgrav 2599a964d6aSDag-Erling Smørgrav if (!scheme || (!host && !doc)) { 260a1b37df2SDag-Erling Smørgrav url_seterr(URL_MALFORMED); 261e19e6098SDag-Erling Smørgrav return (NULL); 2629a964d6aSDag-Erling Smørgrav } 2639a964d6aSDag-Erling Smørgrav 2649a964d6aSDag-Erling Smørgrav if (port < 0 || port > 65535) { 265a1b37df2SDag-Erling Smørgrav url_seterr(URL_BAD_PORT); 266e19e6098SDag-Erling Smørgrav return (NULL); 2679a964d6aSDag-Erling Smørgrav } 2689a964d6aSDag-Erling Smørgrav 2699a964d6aSDag-Erling Smørgrav /* allocate struct url */ 270930105c1SDag-Erling Smørgrav if ((u = calloc(1, sizeof(*u))) == NULL) { 271a1b37df2SDag-Erling Smørgrav fetch_syserr(); 272e19e6098SDag-Erling Smørgrav return (NULL); 2739a964d6aSDag-Erling Smørgrav } 2745f04ebd4SDag-Erling Smørgrav u->netrcfd = -1; 2759a964d6aSDag-Erling Smørgrav 2769a964d6aSDag-Erling Smørgrav if ((u->doc = strdup(doc ? doc : "/")) == NULL) { 277a1b37df2SDag-Erling Smørgrav fetch_syserr(); 2789a964d6aSDag-Erling Smørgrav free(u); 279e19e6098SDag-Erling Smørgrav return (NULL); 2809a964d6aSDag-Erling Smørgrav } 2819a964d6aSDag-Erling Smørgrav 282930105c1SDag-Erling Smørgrav #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x) 2839a964d6aSDag-Erling Smørgrav seturl(scheme); 2849a964d6aSDag-Erling Smørgrav seturl(host); 2859a964d6aSDag-Erling Smørgrav seturl(user); 2869a964d6aSDag-Erling Smørgrav seturl(pwd); 2879a964d6aSDag-Erling Smørgrav #undef seturl 2889a964d6aSDag-Erling Smørgrav u->port = port; 2899a964d6aSDag-Erling Smørgrav 290e19e6098SDag-Erling Smørgrav return (u); 2919a964d6aSDag-Erling Smørgrav } 2929a964d6aSDag-Erling Smørgrav 2939a964d6aSDag-Erling Smørgrav /* 2940fa39199SEd Maste * Return value of the given hex digit. 2950fa39199SEd Maste */ 2960fa39199SEd Maste static int 2970fa39199SEd Maste fetch_hexval(char ch) 2980fa39199SEd Maste { 2990fa39199SEd Maste 3000fa39199SEd Maste if (ch >= '0' && ch <= '9') 3010fa39199SEd Maste return (ch - '0'); 3020fa39199SEd Maste else if (ch >= 'a' && ch <= 'f') 3030fa39199SEd Maste return (ch - 'a' + 10); 3040fa39199SEd Maste else if (ch >= 'A' && ch <= 'F') 3050fa39199SEd Maste return (ch - 'A' + 10); 3060fa39199SEd Maste return (-1); 3070fa39199SEd Maste } 3080fa39199SEd Maste 3090fa39199SEd Maste /* 3100fa39199SEd Maste * Decode percent-encoded URL component from src into dst, stopping at end 3110fa39199SEd Maste * of string, or at @ or : separators. Returns a pointer to the unhandled 3120fa39199SEd Maste * part of the input string (null terminator, @, or :). No terminator is 3130fa39199SEd Maste * written to dst (it is the caller's responsibility). 3140fa39199SEd Maste */ 3150fa39199SEd Maste static const char * 3160fa39199SEd Maste fetch_pctdecode(char *dst, const char *src, size_t dlen) 3170fa39199SEd Maste { 3180fa39199SEd Maste int d1, d2; 3190fa39199SEd Maste char c; 3200fa39199SEd Maste const char *s; 3210fa39199SEd Maste 3220fa39199SEd Maste for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) { 3230fa39199SEd Maste if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 && 3240fa39199SEd Maste (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) { 3250fa39199SEd Maste c = d1 << 4 | d2; 3260fa39199SEd Maste s += 2; 32783372bdaSEd Maste } else if (s[0] == '%') { 32883372bdaSEd Maste /* Invalid escape sequence. */ 32983372bdaSEd Maste return (NULL); 3300fa39199SEd Maste } else { 3310fa39199SEd Maste c = *s; 3320fa39199SEd Maste } 3330fa39199SEd Maste if (dlen-- > 0) 3340fa39199SEd Maste *dst++ = c; 3356fb3f994SGordon Tetlow else 3366fb3f994SGordon Tetlow return (NULL); 3370fa39199SEd Maste } 3380fa39199SEd Maste return (s); 3390fa39199SEd Maste } 3400fa39199SEd Maste 3410fa39199SEd Maste /* 3424ca1ab94SDag-Erling Smørgrav * Split an URL into components. URL syntax is: 34359769ab1SDag-Erling Smørgrav * [method:/][/[user[:pwd]@]host[:port]/][document] 3444ca1ab94SDag-Erling Smørgrav * This almost, but not quite, RFC1738 URL syntax. 3454ca1ab94SDag-Erling Smørgrav */ 346d8acd8dcSDag-Erling Smørgrav struct url * 34738c7e4a6SArchie Cobbs fetchParseURL(const char *URL) 3484ca1ab94SDag-Erling Smørgrav { 34938c7e4a6SArchie Cobbs char *doc; 35038c7e4a6SArchie Cobbs const char *p, *q; 351d8acd8dcSDag-Erling Smørgrav struct url *u; 3528d9de5b1SDag-Erling Smørgrav int i, n; 3534ca1ab94SDag-Erling Smørgrav 354d8acd8dcSDag-Erling Smørgrav /* allocate struct url */ 355930105c1SDag-Erling Smørgrav if ((u = calloc(1, sizeof(*u))) == NULL) { 356a1b37df2SDag-Erling Smørgrav fetch_syserr(); 357e19e6098SDag-Erling Smørgrav return (NULL); 358d8acd8dcSDag-Erling Smørgrav } 3595f04ebd4SDag-Erling Smørgrav u->netrcfd = -1; 3604ca1ab94SDag-Erling Smørgrav 3614ca1ab94SDag-Erling Smørgrav /* scheme name */ 36259769ab1SDag-Erling Smørgrav if ((p = strstr(URL, ":/"))) { 3638d9de5b1SDag-Erling Smørgrav if (p - URL > URL_SCHEMELEN) 3648d9de5b1SDag-Erling Smørgrav goto ouch; 3658d9de5b1SDag-Erling Smørgrav for (i = 0; URL + i < p; i++) 3668d9de5b1SDag-Erling Smørgrav u->scheme[i] = tolower((unsigned char)URL[i]); 36759769ab1SDag-Erling Smørgrav URL = ++p; 36859769ab1SDag-Erling Smørgrav /* 36959769ab1SDag-Erling Smørgrav * Only one slash: no host, leave slash as part of document 37059769ab1SDag-Erling Smørgrav * Two slashes: host follows, strip slashes 37159769ab1SDag-Erling Smørgrav */ 37259769ab1SDag-Erling Smørgrav if (URL[1] == '/') 37359769ab1SDag-Erling Smørgrav URL = (p += 2); 3741ba84976SDag-Erling Smørgrav } else { 3751ba84976SDag-Erling Smørgrav p = URL; 376d8acd8dcSDag-Erling Smørgrav } 3775b2ad516SDag-Erling Smørgrav if (!*URL || *URL == '/' || *URL == '.' || 37873b3e4dfSStefan Eßer (u->scheme[0] == '\0' && 37973b3e4dfSStefan Eßer strchr(URL, '/') == NULL && strchr(URL, ':') == NULL)) 3804ca1ab94SDag-Erling Smørgrav goto nohost; 3814ca1ab94SDag-Erling Smørgrav 3824ca1ab94SDag-Erling Smørgrav p = strpbrk(URL, "/@"); 3830fba3a00SDag-Erling Smørgrav if (p && *p == '@') { 3844ca1ab94SDag-Erling Smørgrav /* username */ 3850fa39199SEd Maste q = fetch_pctdecode(u->user, URL, URL_USERLEN); 3866fb3f994SGordon Tetlow if (q == NULL) 3876fb3f994SGordon Tetlow goto ouch; 3884ca1ab94SDag-Erling Smørgrav 3894ca1ab94SDag-Erling Smørgrav /* password */ 3906fb3f994SGordon Tetlow if (*q == ':') { 3919bc22394STim Kientzle q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN); 3926fb3f994SGordon Tetlow if (q == NULL) 3936fb3f994SGordon Tetlow goto ouch; 3946fb3f994SGordon Tetlow } 3954ca1ab94SDag-Erling Smørgrav p++; 396ab39353eSDag-Erling Smørgrav } else { 397ab39353eSDag-Erling Smørgrav p = URL; 398ab39353eSDag-Erling Smørgrav } 3994ca1ab94SDag-Erling Smørgrav 4004ca1ab94SDag-Erling Smørgrav /* hostname */ 4018d9de5b1SDag-Erling Smørgrav if (*p == '[') { 4028cd71a0eSKa Ho Ng q = p + 1 + strspn(p + 1, ":0123456789ABCDEFabcdef."); 4038d9de5b1SDag-Erling Smørgrav if (*q++ != ']') 4048d9de5b1SDag-Erling Smørgrav goto ouch; 40508a49957SDag-Erling Smørgrav } else { 4068d9de5b1SDag-Erling Smørgrav /* valid characters in a DNS name */ 4078d9de5b1SDag-Erling Smørgrav q = p + strspn(p, "-." "0123456789" 4088d9de5b1SDag-Erling Smørgrav "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "_" 4098d9de5b1SDag-Erling Smørgrav "abcdefghijklmnopqrstuvwxyz"); 41008a49957SDag-Erling Smørgrav } 4118d9de5b1SDag-Erling Smørgrav if ((*q != '\0' && *q != '/' && *q != ':') || q - p > MAXHOSTNAMELEN) 4128d9de5b1SDag-Erling Smørgrav goto ouch; 4138d9de5b1SDag-Erling Smørgrav for (i = 0; p + i < q; i++) 4148d9de5b1SDag-Erling Smørgrav u->host[i] = tolower((unsigned char)p[i]); 4158d9de5b1SDag-Erling Smørgrav u->host[i] = '\0'; 4168d9de5b1SDag-Erling Smørgrav p = q; 4174ca1ab94SDag-Erling Smørgrav 4184ca1ab94SDag-Erling Smørgrav /* port */ 4194ca1ab94SDag-Erling Smørgrav if (*p == ':') { 4208d9de5b1SDag-Erling Smørgrav for (n = 0, q = ++p; *q && (*q != '/'); q++) { 4218d9de5b1SDag-Erling Smørgrav if (*q >= '0' && *q <= '9' && n < INT_MAX / 10) { 4228d9de5b1SDag-Erling Smørgrav n = n * 10 + (*q - '0'); 4238d9de5b1SDag-Erling Smørgrav } else { 424d8acd8dcSDag-Erling Smørgrav /* invalid port */ 425a1b37df2SDag-Erling Smørgrav url_seterr(URL_BAD_PORT); 426d8acd8dcSDag-Erling Smørgrav goto ouch; 427d8acd8dcSDag-Erling Smørgrav } 4288d9de5b1SDag-Erling Smørgrav } 429fb860ed0SKa Ho Ng if (p != q && (n < 1 || n > IPPORT_MAX)) 4308d9de5b1SDag-Erling Smørgrav goto ouch; 4318d9de5b1SDag-Erling Smørgrav u->port = n; 432551858f0SDag-Erling Smørgrav p = q; 4334ca1ab94SDag-Erling Smørgrav } 4344ca1ab94SDag-Erling Smørgrav 4354ca1ab94SDag-Erling Smørgrav nohost: 4364ca1ab94SDag-Erling Smørgrav /* document */ 43760245e42SDag-Erling Smørgrav if (!*p) 43860245e42SDag-Erling Smørgrav p = "/"; 43960245e42SDag-Erling Smørgrav 4408d9de5b1SDag-Erling Smørgrav if (strcmp(u->scheme, SCHEME_HTTP) == 0 || 4418d9de5b1SDag-Erling Smørgrav strcmp(u->scheme, SCHEME_HTTPS) == 0) { 44223fe6d7aSDag-Erling Smørgrav const char hexnums[] = "0123456789abcdef"; 44323fe6d7aSDag-Erling Smørgrav 444f9c2053bSDag-Erling Smørgrav /* percent-escape whitespace. */ 445f9c2053bSDag-Erling Smørgrav if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) { 446a1b37df2SDag-Erling Smørgrav fetch_syserr(); 44723fe6d7aSDag-Erling Smørgrav goto ouch; 44823fe6d7aSDag-Erling Smørgrav } 449f9c2053bSDag-Erling Smørgrav u->doc = doc; 450*1af7d5f3SPietro Cerutti /* fragments are reserved for client-side processing, see 451*1af7d5f3SPietro Cerutti * https://www.rfc-editor.org/rfc/rfc9110.html#section-7.1 452*1af7d5f3SPietro Cerutti */ 453*1af7d5f3SPietro Cerutti while (*p != '\0' && *p != '#') { 454facd9827SDag-Erling Smørgrav if (!isspace((unsigned char)*p)) { 45523fe6d7aSDag-Erling Smørgrav *doc++ = *p++; 45623fe6d7aSDag-Erling Smørgrav } else { 45723fe6d7aSDag-Erling Smørgrav *doc++ = '%'; 45823fe6d7aSDag-Erling Smørgrav *doc++ = hexnums[((unsigned int)*p) >> 4]; 45923fe6d7aSDag-Erling Smørgrav *doc++ = hexnums[((unsigned int)*p) & 0xf]; 46023fe6d7aSDag-Erling Smørgrav p++; 46123fe6d7aSDag-Erling Smørgrav } 46223fe6d7aSDag-Erling Smørgrav } 46323fe6d7aSDag-Erling Smørgrav *doc = '\0'; 46423fe6d7aSDag-Erling Smørgrav } else if ((u->doc = strdup(p)) == NULL) { 465a1b37df2SDag-Erling Smørgrav fetch_syserr(); 4664ca1ab94SDag-Erling Smørgrav goto ouch; 467d8acd8dcSDag-Erling Smørgrav } 4684ca1ab94SDag-Erling Smørgrav 469c5712d6dSDag-Erling Smørgrav DEBUGF("scheme: \"%s\"\n" 47008a49957SDag-Erling Smørgrav "user: \"%s\"\n" 47108a49957SDag-Erling Smørgrav "password: \"%s\"\n" 47208a49957SDag-Erling Smørgrav "host: \"%s\"\n" 47308a49957SDag-Erling Smørgrav "port: \"%d\"\n" 47408a49957SDag-Erling Smørgrav "document: \"%s\"\n", 4754ca1ab94SDag-Erling Smørgrav u->scheme, u->user, u->pwd, 476c5712d6dSDag-Erling Smørgrav u->host, u->port, u->doc); 4774ca1ab94SDag-Erling Smørgrav 478e19e6098SDag-Erling Smørgrav return (u); 4794ca1ab94SDag-Erling Smørgrav 4804ca1ab94SDag-Erling Smørgrav ouch: 4814ca1ab94SDag-Erling Smørgrav free(u); 482e19e6098SDag-Erling Smørgrav return (NULL); 4834ca1ab94SDag-Erling Smørgrav } 48460245e42SDag-Erling Smørgrav 48560245e42SDag-Erling Smørgrav /* 48660245e42SDag-Erling Smørgrav * Free a URL 48760245e42SDag-Erling Smørgrav */ 48860245e42SDag-Erling Smørgrav void 48960245e42SDag-Erling Smørgrav fetchFreeURL(struct url *u) 49060245e42SDag-Erling Smørgrav { 49160245e42SDag-Erling Smørgrav free(u->doc); 49260245e42SDag-Erling Smørgrav free(u); 49360245e42SDag-Erling Smørgrav } 494