1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1998-2004 Dag-Erling Smørgrav 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * $FreeBSD: head/lib/libfetch/fetch.c 357579 2020-02-05 16:55:00Z emaste $ 31 */ 32 33 #include <sys/cdefs.h> 34 35 #include <sys/param.h> 36 37 #include <netinet/in.h> 38 39 #include <errno.h> 40 #include <ctype.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <string.h> 44 45 #include "fetch.h" 46 #include "common.h" 47 48 auth_t fetchAuthMethod; 49 int fetchLastErrCode; 50 char fetchLastErrString[MAXERRSTRING]; 51 int fetchTimeout; 52 int fetchRestartCalls = 1; 53 int fetchDebug; 54 55 56 /*** Local data **************************************************************/ 57 58 /* 59 * Error messages for parser errors 60 */ 61 #define URL_MALFORMED 1 62 #define URL_BAD_SCHEME 2 63 #define URL_BAD_PORT 3 64 static struct fetcherr url_errlist[] = { 65 { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 66 { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 67 { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 68 { -1, FETCH_UNKNOWN, "Unknown parser error" } 69 }; 70 71 72 /*** Public API **************************************************************/ 73 74 /* 75 * Select the appropriate protocol for the URL scheme, and return a 76 * read-only stream connected to the document referenced by the URL. 77 * Also fill out the struct url_stat. 78 */ 79 FILE * 80 fetchXGet(struct url *URL, struct url_stat *us, const char *flags) 81 { 82 83 if (us != NULL) { 84 us->size = -1; 85 us->atime = us->mtime = 0; 86 } 87 if (strcmp(URL->scheme, SCHEME_FILE) == 0) 88 return (fetchXGetFile(URL, us, flags)); 89 else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 90 return (fetchXGetFTP(URL, us, flags)); 91 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 92 return (fetchXGetHTTP(URL, us, flags)); 93 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 94 return (fetchXGetHTTP(URL, us, flags)); 95 url_seterr(URL_BAD_SCHEME); 96 return (NULL); 97 } 98 99 /* 100 * Select the appropriate protocol for the URL scheme, and return a 101 * read-only stream connected to the document referenced by the URL. 102 */ 103 FILE * 104 fetchGet(struct url *URL, const char *flags) 105 { 106 return (fetchXGet(URL, NULL, flags)); 107 } 108 109 /* 110 * Select the appropriate protocol for the URL scheme, and return a 111 * write-only stream connected to the document referenced by the URL. 112 */ 113 FILE * 114 fetchPut(struct url *URL, const char *flags) 115 { 116 117 if (strcmp(URL->scheme, SCHEME_FILE) == 0) 118 return (fetchPutFile(URL, flags)); 119 else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 120 return (fetchPutFTP(URL, flags)); 121 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 122 return (fetchPutHTTP(URL, flags)); 123 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 124 return (fetchPutHTTP(URL, flags)); 125 url_seterr(URL_BAD_SCHEME); 126 return (NULL); 127 } 128 129 /* 130 * Select the appropriate protocol for the URL scheme, and return the 131 * size of the document referenced by the URL if it exists. 132 */ 133 int 134 fetchStat(struct url *URL, struct url_stat *us, const char *flags) 135 { 136 137 if (us != NULL) { 138 us->size = -1; 139 us->atime = us->mtime = 0; 140 } 141 if (strcmp(URL->scheme, SCHEME_FILE) == 0) 142 return (fetchStatFile(URL, us, flags)); 143 else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 144 return (fetchStatFTP(URL, us, flags)); 145 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 146 return (fetchStatHTTP(URL, us, flags)); 147 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 148 return (fetchStatHTTP(URL, us, flags)); 149 url_seterr(URL_BAD_SCHEME); 150 return (-1); 151 } 152 153 /* 154 * Select the appropriate protocol for the URL scheme, and return a 155 * list of files in the directory pointed to by the URL. 156 */ 157 struct url_ent * 158 fetchList(struct url *URL, const char *flags) 159 { 160 161 if (strcmp(URL->scheme, SCHEME_FILE) == 0) 162 return (fetchListFile(URL, flags)); 163 else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 164 return (fetchListFTP(URL, flags)); 165 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 166 return (fetchListHTTP(URL, flags)); 167 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 168 return (fetchListHTTP(URL, flags)); 169 url_seterr(URL_BAD_SCHEME); 170 return (NULL); 171 } 172 173 /* 174 * Attempt to parse the given URL; if successful, call fetchXGet(). 175 */ 176 FILE * 177 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags) 178 { 179 struct url *u; 180 FILE *f; 181 182 if ((u = fetchParseURL(URL)) == NULL) 183 return (NULL); 184 185 f = fetchXGet(u, us, flags); 186 187 fetchFreeURL(u); 188 return (f); 189 } 190 191 /* 192 * Attempt to parse the given URL; if successful, call fetchGet(). 193 */ 194 FILE * 195 fetchGetURL(const char *URL, const char *flags) 196 { 197 return (fetchXGetURL(URL, NULL, flags)); 198 } 199 200 /* 201 * Attempt to parse the given URL; if successful, call fetchPut(). 202 */ 203 FILE * 204 fetchPutURL(const char *URL, const char *flags) 205 { 206 struct url *u; 207 FILE *f; 208 209 if ((u = fetchParseURL(URL)) == NULL) 210 return (NULL); 211 212 f = fetchPut(u, flags); 213 214 fetchFreeURL(u); 215 return (f); 216 } 217 218 /* 219 * Attempt to parse the given URL; if successful, call fetchStat(). 220 */ 221 int 222 fetchStatURL(const char *URL, struct url_stat *us, const char *flags) 223 { 224 struct url *u; 225 int s; 226 227 if ((u = fetchParseURL(URL)) == NULL) 228 return (-1); 229 230 s = fetchStat(u, us, flags); 231 232 fetchFreeURL(u); 233 return (s); 234 } 235 236 /* 237 * Attempt to parse the given URL; if successful, call fetchList(). 238 */ 239 struct url_ent * 240 fetchListURL(const char *URL, const char *flags) 241 { 242 struct url *u; 243 struct url_ent *ue; 244 245 if ((u = fetchParseURL(URL)) == NULL) 246 return (NULL); 247 248 ue = fetchList(u, flags); 249 250 fetchFreeURL(u); 251 return (ue); 252 } 253 254 /* 255 * Make a URL 256 */ 257 struct url * 258 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc, 259 const char *user, const char *pwd) 260 { 261 struct url *u; 262 263 if (!scheme || (!host && !doc)) { 264 url_seterr(URL_MALFORMED); 265 return (NULL); 266 } 267 268 if (port < 0 || port > 65535) { 269 url_seterr(URL_BAD_PORT); 270 return (NULL); 271 } 272 273 /* allocate struct url */ 274 if ((u = calloc(1, sizeof(*u))) == NULL) { 275 fetch_syserr(); 276 return (NULL); 277 } 278 u->netrcfd = -1; 279 280 if ((u->doc = strdup(doc ? doc : "/")) == NULL) { 281 fetch_syserr(); 282 free(u); 283 return (NULL); 284 } 285 286 #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x) 287 seturl(scheme); 288 seturl(host); 289 seturl(user); 290 seturl(pwd); 291 #undef seturl 292 u->port = port; 293 294 return (u); 295 } 296 297 /* 298 * Return value of the given hex digit. 299 */ 300 static int 301 fetch_hexval(char ch) 302 { 303 304 if (ch >= '0' && ch <= '9') 305 return (ch - '0'); 306 else if (ch >= 'a' && ch <= 'f') 307 return (ch - 'a' + 10); 308 else if (ch >= 'A' && ch <= 'F') 309 return (ch - 'A' + 10); 310 return (-1); 311 } 312 313 /* 314 * Decode percent-encoded URL component from src into dst, stopping at end 315 * of string, or at @ or : separators. Returns a pointer to the unhandled 316 * part of the input string (null terminator, @, or :). No terminator is 317 * written to dst (it is the caller's responsibility). 318 */ 319 static const char * 320 fetch_pctdecode(char *dst, const char *src, size_t dlen) 321 { 322 int d1, d2; 323 char c; 324 const char *s; 325 326 for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) { 327 if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 && 328 (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) { 329 c = d1 << 4 | d2; 330 s += 2; 331 } else if (s[0] == '%') { 332 /* Invalid escape sequence. */ 333 return (NULL); 334 } else { 335 c = *s; 336 } 337 if (dlen-- > 0) 338 *dst++ = c; 339 else 340 return (NULL); 341 } 342 return (s); 343 } 344 345 /* 346 * Split an URL into components. URL syntax is: 347 * [method:/][/[user[:pwd]@]host[:port]/][document] 348 * This almost, but not quite, RFC1738 URL syntax. 349 */ 350 struct url * 351 fetchParseURL(const char *URL) 352 { 353 char *doc; 354 const char *p, *q; 355 struct url *u; 356 int i, n; 357 358 /* allocate struct url */ 359 if ((u = calloc(1, sizeof(*u))) == NULL) { 360 fetch_syserr(); 361 return (NULL); 362 } 363 u->netrcfd = -1; 364 365 /* scheme name */ 366 if ((p = strstr(URL, ":/"))) { 367 if (p - URL > URL_SCHEMELEN) 368 goto ouch; 369 for (i = 0; URL + i < p; i++) 370 u->scheme[i] = tolower((unsigned char)URL[i]); 371 URL = ++p; 372 /* 373 * Only one slash: no host, leave slash as part of document 374 * Two slashes: host follows, strip slashes 375 */ 376 if (URL[1] == '/') 377 URL = (p += 2); 378 } else { 379 p = URL; 380 } 381 if (!*URL || *URL == '/' || *URL == '.' || 382 (u->scheme[0] == '\0' && 383 strchr(URL, '/') == NULL && strchr(URL, ':') == NULL)) 384 goto nohost; 385 386 p = strpbrk(URL, "/@"); 387 if (p && *p == '@') { 388 /* username */ 389 q = fetch_pctdecode(u->user, URL, URL_USERLEN); 390 if (q == NULL) 391 goto ouch; 392 393 /* password */ 394 if (*q == ':') { 395 q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN); 396 if (q == NULL) 397 goto ouch; 398 } 399 p++; 400 } else { 401 p = URL; 402 } 403 404 /* hostname */ 405 if (*p == '[') { 406 q = p + 1 + strspn(p + 1, ":0123456789ABCDEFabcdef"); 407 if (*q++ != ']') 408 goto ouch; 409 } else { 410 /* valid characters in a DNS name */ 411 q = p + strspn(p, "-." "0123456789" 412 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "_" 413 "abcdefghijklmnopqrstuvwxyz"); 414 } 415 if ((*q != '\0' && *q != '/' && *q != ':') || q - p > MAXHOSTNAMELEN) 416 goto ouch; 417 for (i = 0; p + i < q; i++) 418 u->host[i] = tolower((unsigned char)p[i]); 419 u->host[i] = '\0'; 420 p = q; 421 422 /* port */ 423 if (*p == ':') { 424 for (n = 0, q = ++p; *q && (*q != '/'); q++) { 425 if (*q >= '0' && *q <= '9' && n < INT_MAX / 10) { 426 n = n * 10 + (*q - '0'); 427 } else { 428 /* invalid port */ 429 url_seterr(URL_BAD_PORT); 430 goto ouch; 431 } 432 } 433 if (n < 1 || n > IPPORT_MAX) 434 goto ouch; 435 u->port = n; 436 p = q; 437 } 438 439 nohost: 440 /* document */ 441 if (!*p) 442 p = "/"; 443 444 if (strcmp(u->scheme, SCHEME_HTTP) == 0 || 445 strcmp(u->scheme, SCHEME_HTTPS) == 0) { 446 const char hexnums[] = "0123456789abcdef"; 447 448 /* percent-escape whitespace. */ 449 if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) { 450 fetch_syserr(); 451 goto ouch; 452 } 453 u->doc = doc; 454 /* fragments are reserved for client-side processing, see 455 * https://www.rfc-editor.org/rfc/rfc9110.html#section-7.1 456 */ 457 while (*p != '\0' && *p != '#') { 458 if (!isspace((unsigned char)*p)) { 459 *doc++ = *p++; 460 } else { 461 *doc++ = '%'; 462 *doc++ = hexnums[((unsigned int)*p) >> 4]; 463 *doc++ = hexnums[((unsigned int)*p) & 0xf]; 464 p++; 465 } 466 } 467 *doc = '\0'; 468 } else if ((u->doc = strdup(p)) == NULL) { 469 fetch_syserr(); 470 goto ouch; 471 } 472 473 DEBUGF("scheme: \"%s\"\n" 474 "user: \"%s\"\n" 475 "password: \"%s\"\n" 476 "host: \"%s\"\n" 477 "port: \"%d\"\n" 478 "document: \"%s\"\n", 479 u->scheme, u->user, u->pwd, 480 u->host, u->port, u->doc); 481 482 return (u); 483 484 ouch: 485 free(u); 486 return (NULL); 487 } 488 489 /* 490 * Free a URL 491 */ 492 void 493 fetchFreeURL(struct url *u) 494 { 495 free(u->doc); 496 free(u); 497 } 498