1 /* $NetBSD: fetch.c,v 1.1.1.7 2009/04/04 23:26:04 joerg Exp $ */ 2 /*- 3 * Copyright (c) 1998-2004 Dag-Erling Co�dan Sm�rgrav 4 * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * $FreeBSD: fetch.c,v 1.41 2007/12/19 00:26:36 des Exp $ 31 */ 32 33 #if HAVE_CONFIG_H 34 #include "config.h" 35 #endif 36 #ifndef NETBSD 37 #include <nbcompat.h> 38 #endif 39 40 #include <ctype.h> 41 #include <errno.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <string.h> 45 46 #include "fetch.h" 47 #include "common.h" 48 49 auth_t fetchAuthMethod; 50 int fetchLastErrCode; 51 char fetchLastErrString[MAXERRSTRING]; 52 int fetchTimeout; 53 volatile int fetchRestartCalls = 1; 54 int fetchDebug; 55 56 57 /*** Local data **************************************************************/ 58 59 /* 60 * Error messages for parser errors 61 */ 62 #define URL_MALFORMED 1 63 #define URL_BAD_SCHEME 2 64 #define URL_BAD_PORT 3 65 static struct fetcherr url_errlist[] = { 66 { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 67 { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 68 { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 69 { -1, FETCH_UNKNOWN, "Unknown parser error" } 70 }; 71 72 73 /*** Public API **************************************************************/ 74 75 /* 76 * Select the appropriate protocol for the URL scheme, and return a 77 * read-only stream connected to the document referenced by the URL. 78 * Also fill out the struct url_stat. 79 */ 80 fetchIO * 81 fetchXGet(struct url *URL, struct url_stat *us, const char *flags) 82 { 83 int direct; 84 85 direct = CHECK_FLAG('d'); 86 if (us != NULL) { 87 us->size = -1; 88 us->atime = us->mtime = 0; 89 } 90 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 91 return (fetchXGetFile(URL, us, flags)); 92 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 93 return (fetchXGetFTP(URL, us, flags)); 94 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 95 return (fetchXGetHTTP(URL, us, flags)); 96 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 97 return (fetchXGetHTTP(URL, us, flags)); 98 url_seterr(URL_BAD_SCHEME); 99 return (NULL); 100 } 101 102 /* 103 * Select the appropriate protocol for the URL scheme, and return a 104 * read-only stream connected to the document referenced by the URL. 105 */ 106 fetchIO * 107 fetchGet(struct url *URL, const char *flags) 108 { 109 return (fetchXGet(URL, NULL, flags)); 110 } 111 112 /* 113 * Select the appropriate protocol for the URL scheme, and return a 114 * write-only stream connected to the document referenced by the URL. 115 */ 116 fetchIO * 117 fetchPut(struct url *URL, const char *flags) 118 { 119 int direct; 120 121 direct = CHECK_FLAG('d'); 122 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 123 return (fetchPutFile(URL, flags)); 124 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 125 return (fetchPutFTP(URL, flags)); 126 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 127 return (fetchPutHTTP(URL, flags)); 128 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 129 return (fetchPutHTTP(URL, flags)); 130 url_seterr(URL_BAD_SCHEME); 131 return (NULL); 132 } 133 134 /* 135 * Select the appropriate protocol for the URL scheme, and return the 136 * size of the document referenced by the URL if it exists. 137 */ 138 int 139 fetchStat(struct url *URL, struct url_stat *us, const char *flags) 140 { 141 int direct; 142 143 direct = CHECK_FLAG('d'); 144 if (us != NULL) { 145 us->size = -1; 146 us->atime = us->mtime = 0; 147 } 148 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 149 return (fetchStatFile(URL, us, flags)); 150 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 151 return (fetchStatFTP(URL, us, flags)); 152 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 153 return (fetchStatHTTP(URL, us, flags)); 154 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 155 return (fetchStatHTTP(URL, us, flags)); 156 url_seterr(URL_BAD_SCHEME); 157 return (-1); 158 } 159 160 /* 161 * Select the appropriate protocol for the URL scheme, and return a 162 * list of files in the directory pointed to by the URL. 163 */ 164 int 165 fetchList(struct url_list *ue, struct url *URL, const char *pattern, 166 const char *flags) 167 { 168 int direct; 169 170 direct = CHECK_FLAG('d'); 171 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 172 return (fetchListFile(ue, URL, pattern, flags)); 173 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 174 return (fetchListFTP(ue, URL, pattern, flags)); 175 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 176 return (fetchListHTTP(ue, URL, pattern, flags)); 177 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 178 return (fetchListHTTP(ue, URL, pattern, flags)); 179 url_seterr(URL_BAD_SCHEME); 180 return -1; 181 } 182 183 /* 184 * Attempt to parse the given URL; if successful, call fetchXGet(). 185 */ 186 fetchIO * 187 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags) 188 { 189 struct url *u; 190 fetchIO *f; 191 192 if ((u = fetchParseURL(URL)) == NULL) 193 return (NULL); 194 195 f = fetchXGet(u, us, flags); 196 197 fetchFreeURL(u); 198 return (f); 199 } 200 201 /* 202 * Attempt to parse the given URL; if successful, call fetchGet(). 203 */ 204 fetchIO * 205 fetchGetURL(const char *URL, const char *flags) 206 { 207 return (fetchXGetURL(URL, NULL, flags)); 208 } 209 210 /* 211 * Attempt to parse the given URL; if successful, call fetchPut(). 212 */ 213 fetchIO * 214 fetchPutURL(const char *URL, const char *flags) 215 { 216 struct url *u; 217 fetchIO *f; 218 219 if ((u = fetchParseURL(URL)) == NULL) 220 return (NULL); 221 222 f = fetchPut(u, flags); 223 224 fetchFreeURL(u); 225 return (f); 226 } 227 228 /* 229 * Attempt to parse the given URL; if successful, call fetchStat(). 230 */ 231 int 232 fetchStatURL(const char *URL, struct url_stat *us, const char *flags) 233 { 234 struct url *u; 235 int s; 236 237 if ((u = fetchParseURL(URL)) == NULL) 238 return (-1); 239 240 s = fetchStat(u, us, flags); 241 242 fetchFreeURL(u); 243 return (s); 244 } 245 246 /* 247 * Attempt to parse the given URL; if successful, call fetchList(). 248 */ 249 int 250 fetchListURL(struct url_list *ue, const char *URL, const char *pattern, 251 const char *flags) 252 { 253 struct url *u; 254 int rv; 255 256 if ((u = fetchParseURL(URL)) == NULL) 257 return -1; 258 259 rv = fetchList(ue, u, pattern, flags); 260 261 fetchFreeURL(u); 262 return rv; 263 } 264 265 /* 266 * Make a URL 267 */ 268 struct url * 269 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc, 270 const char *user, const char *pwd) 271 { 272 struct url *u; 273 274 if (!scheme || (!host && !doc)) { 275 url_seterr(URL_MALFORMED); 276 return (NULL); 277 } 278 279 if (port < 0 || port > 65535) { 280 url_seterr(URL_BAD_PORT); 281 return (NULL); 282 } 283 284 /* allocate struct url */ 285 if ((u = calloc(1, sizeof(*u))) == NULL) { 286 fetch_syserr(); 287 return (NULL); 288 } 289 290 if ((u->doc = strdup(doc ? doc : "/")) == NULL) { 291 fetch_syserr(); 292 free(u); 293 return (NULL); 294 } 295 296 #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x) 297 seturl(scheme); 298 seturl(host); 299 seturl(user); 300 seturl(pwd); 301 #undef seturl 302 u->port = port; 303 304 return (u); 305 } 306 307 int 308 fetch_urlpath_safe(char x) 309 { 310 if ((x >= '0' && x <= '9') || (x >= 'A' && x <= 'Z') || 311 (x >= 'a' && x <= 'z')) 312 return 1; 313 314 switch (x) { 315 case '$': 316 case '-': 317 case '_': 318 case '.': 319 case '+': 320 case '!': 321 case '*': 322 case '\'': 323 case '(': 324 case ')': 325 case ',': 326 /* The following are allowed in segment and path components: */ 327 case '?': 328 case ':': 329 case '@': 330 case '&': 331 case '=': 332 case '/': 333 case ';': 334 /* If something is already quoted... */ 335 case '%': 336 return 1; 337 default: 338 return 0; 339 } 340 } 341 342 /* 343 * Copy an existing URL. 344 */ 345 struct url * 346 fetchCopyURL(const struct url *src) 347 { 348 struct url *dst; 349 char *doc; 350 351 /* allocate struct url */ 352 if ((dst = malloc(sizeof(*dst))) == NULL) { 353 fetch_syserr(); 354 return (NULL); 355 } 356 if ((doc = strdup(src->doc)) == NULL) { 357 fetch_syserr(); 358 free(dst); 359 return (NULL); 360 } 361 *dst = *src; 362 dst->doc = doc; 363 364 return dst; 365 } 366 367 /* 368 * Split an URL into components. URL syntax is: 369 * [method:/][/[user[:pwd]@]host[:port]/][document] 370 * This almost, but not quite, RFC1738 URL syntax. 371 */ 372 struct url * 373 fetchParseURL(const char *URL) 374 { 375 const char *p, *q; 376 struct url *u; 377 size_t i, count; 378 int pre_quoted; 379 380 /* allocate struct url */ 381 if ((u = calloc(1, sizeof(*u))) == NULL) { 382 fetch_syserr(); 383 return (NULL); 384 } 385 386 if (*URL == '/') { 387 pre_quoted = 0; 388 strcpy(u->scheme, SCHEME_FILE); 389 p = URL; 390 goto quote_doc; 391 } 392 if (strncmp(URL, "file:", 5) == 0) { 393 pre_quoted = 1; 394 strcpy(u->scheme, SCHEME_FILE); 395 URL += 5; 396 if (URL[0] != '/' || URL[1] != '/' || URL[2] != '/') { 397 url_seterr(URL_MALFORMED); 398 goto ouch; 399 } 400 p = URL + 2; 401 goto quote_doc; 402 } 403 if (strncmp(URL, "http:", 5) == 0 || 404 strncmp(URL, "https:", 6) == 0) { 405 pre_quoted = 1; 406 if (URL[4] == ':') { 407 strcpy(u->scheme, SCHEME_HTTP); 408 URL += 5; 409 } else { 410 strcpy(u->scheme, SCHEME_HTTPS); 411 URL += 6; 412 } 413 414 if (URL[0] != '/' || URL[1] != '/') { 415 url_seterr(URL_MALFORMED); 416 goto ouch; 417 } 418 URL += 2; 419 p = URL; 420 goto find_user; 421 } 422 if (strncmp(URL, "ftp:", 4) == 0) { 423 pre_quoted = 1; 424 strcpy(u->scheme, SCHEME_FTP); 425 URL += 4; 426 if (URL[0] != '/' || URL[1] != '/') { 427 url_seterr(URL_MALFORMED); 428 goto ouch; 429 } 430 URL += 2; 431 p = URL; 432 goto find_user; 433 } 434 435 url_seterr(URL_BAD_SCHEME); 436 goto ouch; 437 438 find_user: 439 p = strpbrk(URL, "/@"); 440 if (p != NULL && *p == '@') { 441 /* username */ 442 for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++) { 443 if (i < URL_USERLEN) 444 u->user[i++] = *q; 445 } 446 447 /* password */ 448 if (*q == ':') { 449 for (q++, i = 0; (*q != '@'); q++) 450 if (i < URL_PWDLEN) 451 u->pwd[i++] = *q; 452 } 453 454 p++; 455 } else { 456 p = URL; 457 } 458 459 /* hostname */ 460 #ifdef INET6 461 if (*p == '[' && (q = strchr(p + 1, ']')) != NULL && 462 (*++q == '\0' || *q == '/' || *q == ':')) { 463 if ((i = q - p - 2) > URL_HOSTLEN) 464 i = URL_HOSTLEN; 465 strncpy(u->host, ++p, i); 466 p = q; 467 } else 468 #endif 469 for (i = 0; *p && (*p != '/') && (*p != ':'); p++) 470 if (i < URL_HOSTLEN) 471 u->host[i++] = *p; 472 473 /* port */ 474 if (*p == ':') { 475 for (q = ++p; *q && (*q != '/'); q++) 476 if (isdigit((unsigned char)*q)) 477 u->port = u->port * 10 + (*q - '0'); 478 else { 479 /* invalid port */ 480 url_seterr(URL_BAD_PORT); 481 goto ouch; 482 } 483 p = q; 484 } 485 486 /* document */ 487 if (!*p) 488 p = "/"; 489 490 quote_doc: 491 count = 1; 492 for (i = 0; p[i] != '\0'; ++i) { 493 if ((!pre_quoted && p[i] == '%') || 494 !fetch_urlpath_safe(p[i])) 495 count += 3; 496 else 497 ++count; 498 } 499 500 if ((u->doc = malloc(count)) == NULL) { 501 fetch_syserr(); 502 goto ouch; 503 } 504 for (i = 0; *p != '\0'; ++p) { 505 if ((!pre_quoted && *p == '%') || 506 !fetch_urlpath_safe(*p)) { 507 u->doc[i++] = '%'; 508 if ((unsigned char)*p < 160) 509 u->doc[i++] = '0' + ((unsigned char)*p) / 16; 510 else 511 u->doc[i++] = 'a' - 10 + ((unsigned char)*p) / 16; 512 if ((unsigned char)*p % 16 < 10) 513 u->doc[i++] = '0' + ((unsigned char)*p) % 16; 514 else 515 u->doc[i++] = 'a' - 10 + ((unsigned char)*p) % 16; 516 } else 517 u->doc[i++] = *p; 518 } 519 u->doc[i] = '\0'; 520 521 return (u); 522 523 ouch: 524 free(u); 525 return (NULL); 526 } 527 528 /* 529 * Free a URL 530 */ 531 void 532 fetchFreeURL(struct url *u) 533 { 534 free(u->doc); 535 free(u); 536 } 537 538 static char 539 xdigit2digit(char digit) 540 { 541 digit = tolower((unsigned char)digit); 542 if (digit >= 'a' && digit <= 'f') 543 digit = digit - 'a' + 10; 544 else 545 digit = digit - '0'; 546 547 return digit; 548 } 549 550 /* 551 * Unquote whole URL. 552 * Skips optional parts like query or fragment identifier. 553 */ 554 char * 555 fetchUnquotePath(struct url *url) 556 { 557 char *unquoted; 558 const char *iter; 559 size_t i; 560 561 if ((unquoted = malloc(strlen(url->doc) + 1)) == NULL) 562 return NULL; 563 564 for (i = 0, iter = url->doc; *iter != '\0'; ++iter) { 565 if (*iter == '#' || *iter == '?') 566 break; 567 if (iter[0] != '%' || 568 !isxdigit((unsigned char)iter[1]) || 569 !isxdigit((unsigned char)iter[2])) { 570 unquoted[i++] = *iter; 571 continue; 572 } 573 unquoted[i++] = xdigit2digit(iter[1]) * 16 + 574 xdigit2digit(iter[2]); 575 iter += 2; 576 } 577 unquoted[i] = '\0'; 578 return unquoted; 579 } 580 581 582 /* 583 * Extract the file name component of a URL. 584 */ 585 char * 586 fetchUnquoteFilename(struct url *url) 587 { 588 char *unquoted, *filename; 589 const char *last_slash; 590 591 if ((unquoted = fetchUnquotePath(url)) == NULL) 592 return NULL; 593 594 if ((last_slash = strrchr(unquoted, '/')) == NULL) 595 return unquoted; 596 filename = strdup(last_slash + 1); 597 free(unquoted); 598 return filename; 599 } 600 601 char * 602 fetchStringifyURL(const struct url *url) 603 { 604 size_t total; 605 char *doc; 606 607 /* scheme :// user : pwd @ host :port doc */ 608 total = strlen(url->scheme) + 3 + strlen(url->user) + 1 + 609 strlen(url->pwd) + 1 + strlen(url->host) + 6 + strlen(url->doc) + 1; 610 if ((doc = malloc(total)) == NULL) 611 return NULL; 612 if (url->port != 0) 613 snprintf(doc, total, "%s%s%s%s%s%s%s:%d%s", 614 url->scheme, 615 url->scheme[0] != '\0' ? "://" : "", 616 url->user, 617 url->pwd[0] != '\0' ? ":" : "", 618 url->pwd, 619 url->user[0] != '\0' || url->pwd[0] != '\0' ? "@" : "", 620 url->host, 621 (int)url->port, 622 url->doc); 623 else { 624 snprintf(doc, total, "%s%s%s%s%s%s%s%s", 625 url->scheme, 626 url->scheme[0] != '\0' ? "://" : "", 627 url->user, 628 url->pwd[0] != '\0' ? ":" : "", 629 url->pwd, 630 url->user[0] != '\0' || url->pwd[0] != '\0' ? "@" : "", 631 url->host, 632 url->doc); 633 } 634 return doc; 635 } 636