1 /* $OpenBSD: cgi.c,v 1.86 2017/02/22 16:16:35 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 #include <sys/time.h> 20 21 #include <ctype.h> 22 #include <err.h> 23 #include <errno.h> 24 #include <fcntl.h> 25 #include <limits.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <unistd.h> 31 32 #include "mandoc_aux.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "mdoc.h" 36 #include "man.h" 37 #include "main.h" 38 #include "manconf.h" 39 #include "mansearch.h" 40 #include "cgi.h" 41 42 /* 43 * A query as passed to the search function. 44 */ 45 struct query { 46 char *manpath; /* desired manual directory */ 47 char *arch; /* architecture */ 48 char *sec; /* manual section */ 49 char *query; /* unparsed query expression */ 50 int equal; /* match whole names, not substrings */ 51 }; 52 53 struct req { 54 struct query q; 55 char **p; /* array of available manpaths */ 56 size_t psz; /* number of available manpaths */ 57 int isquery; /* QUERY_STRING used, not PATH_INFO */ 58 }; 59 60 enum focus { 61 FOCUS_NONE = 0, 62 FOCUS_QUERY 63 }; 64 65 static void html_print(const char *); 66 static void html_putchar(char); 67 static int http_decode(char *); 68 static void parse_manpath_conf(struct req *); 69 static void parse_path_info(struct req *req, const char *path); 70 static void parse_query_string(struct req *, const char *); 71 static void pg_error_badrequest(const char *); 72 static void pg_error_internal(void); 73 static void pg_index(const struct req *); 74 static void pg_noresult(const struct req *, const char *); 75 static void pg_search(const struct req *); 76 static void pg_searchres(const struct req *, 77 struct manpage *, size_t); 78 static void pg_show(struct req *, const char *); 79 static void resp_begin_html(int, const char *); 80 static void resp_begin_http(int, const char *); 81 static void resp_catman(const struct req *, const char *); 82 static void resp_copy(const char *); 83 static void resp_end_html(void); 84 static void resp_format(const struct req *, const char *); 85 static void resp_searchform(const struct req *, enum focus); 86 static void resp_show(const struct req *, const char *); 87 static void set_query_attr(char **, char **); 88 static int validate_filename(const char *); 89 static int validate_manpath(const struct req *, const char *); 90 static int validate_urifrag(const char *); 91 92 static const char *scriptname = SCRIPT_NAME; 93 94 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 95 static const char *const sec_numbers[] = { 96 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 97 }; 98 static const char *const sec_names[] = { 99 "All Sections", 100 "1 - General Commands", 101 "2 - System Calls", 102 "3 - Library Functions", 103 "3p - Perl Library", 104 "4 - Device Drivers", 105 "5 - File Formats", 106 "6 - Games", 107 "7 - Miscellaneous Information", 108 "8 - System Manager\'s Manual", 109 "9 - Kernel Developer\'s Manual" 110 }; 111 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 112 113 static const char *const arch_names[] = { 114 "amd64", "alpha", "armv7", "arm64", 115 "hppa", "i386", "landisk", 116 "loongson", "luna88k", "macppc", "mips64", 117 "octeon", "sgi", "socppc", "sparc64", 118 "amiga", "arc", "armish", "arm32", 119 "atari", "aviion", "beagle", "cats", 120 "hppa64", "hp300", 121 "ia64", "mac68k", "mvme68k", "mvme88k", 122 "mvmeppc", "palm", "pc532", "pegasos", 123 "pmax", "powerpc", "solbourne", "sparc", 124 "sun3", "vax", "wgrisc", "x68k", 125 "zaurus" 126 }; 127 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 128 129 /* 130 * Print a character, escaping HTML along the way. 131 * This will pass non-ASCII straight to output: be warned! 132 */ 133 static void 134 html_putchar(char c) 135 { 136 137 switch (c) { 138 case ('"'): 139 printf("""); 140 break; 141 case ('&'): 142 printf("&"); 143 break; 144 case ('>'): 145 printf(">"); 146 break; 147 case ('<'): 148 printf("<"); 149 break; 150 default: 151 putchar((unsigned char)c); 152 break; 153 } 154 } 155 156 /* 157 * Call through to html_putchar(). 158 * Accepts NULL strings. 159 */ 160 static void 161 html_print(const char *p) 162 { 163 164 if (NULL == p) 165 return; 166 while ('\0' != *p) 167 html_putchar(*p++); 168 } 169 170 /* 171 * Transfer the responsibility for the allocated string *val 172 * to the query structure. 173 */ 174 static void 175 set_query_attr(char **attr, char **val) 176 { 177 178 free(*attr); 179 if (**val == '\0') { 180 *attr = NULL; 181 free(*val); 182 } else 183 *attr = *val; 184 *val = NULL; 185 } 186 187 /* 188 * Parse the QUERY_STRING for key-value pairs 189 * and store the values into the query structure. 190 */ 191 static void 192 parse_query_string(struct req *req, const char *qs) 193 { 194 char *key, *val; 195 size_t keysz, valsz; 196 197 req->isquery = 1; 198 req->q.manpath = NULL; 199 req->q.arch = NULL; 200 req->q.sec = NULL; 201 req->q.query = NULL; 202 req->q.equal = 1; 203 204 key = val = NULL; 205 while (*qs != '\0') { 206 207 /* Parse one key. */ 208 209 keysz = strcspn(qs, "=;&"); 210 key = mandoc_strndup(qs, keysz); 211 qs += keysz; 212 if (*qs != '=') 213 goto next; 214 215 /* Parse one value. */ 216 217 valsz = strcspn(++qs, ";&"); 218 val = mandoc_strndup(qs, valsz); 219 qs += valsz; 220 221 /* Decode and catch encoding errors. */ 222 223 if ( ! (http_decode(key) && http_decode(val))) 224 goto next; 225 226 /* Handle key-value pairs. */ 227 228 if ( ! strcmp(key, "query")) 229 set_query_attr(&req->q.query, &val); 230 231 else if ( ! strcmp(key, "apropos")) 232 req->q.equal = !strcmp(val, "0"); 233 234 else if ( ! strcmp(key, "manpath")) { 235 #ifdef COMPAT_OLDURI 236 if ( ! strncmp(val, "OpenBSD ", 8)) { 237 val[7] = '-'; 238 if ('C' == val[8]) 239 val[8] = 'c'; 240 } 241 #endif 242 set_query_attr(&req->q.manpath, &val); 243 } 244 245 else if ( ! (strcmp(key, "sec") 246 #ifdef COMPAT_OLDURI 247 && strcmp(key, "sektion") 248 #endif 249 )) { 250 if ( ! strcmp(val, "0")) 251 *val = '\0'; 252 set_query_attr(&req->q.sec, &val); 253 } 254 255 else if ( ! strcmp(key, "arch")) { 256 if ( ! strcmp(val, "default")) 257 *val = '\0'; 258 set_query_attr(&req->q.arch, &val); 259 } 260 261 /* 262 * The key must be freed in any case. 263 * The val may have been handed over to the query 264 * structure, in which case it is now NULL. 265 */ 266 next: 267 free(key); 268 key = NULL; 269 free(val); 270 val = NULL; 271 272 if (*qs != '\0') 273 qs++; 274 } 275 } 276 277 /* 278 * HTTP-decode a string. The standard explanation is that this turns 279 * "%4e+foo" into "n foo" in the regular way. This is done in-place 280 * over the allocated string. 281 */ 282 static int 283 http_decode(char *p) 284 { 285 char hex[3]; 286 char *q; 287 int c; 288 289 hex[2] = '\0'; 290 291 q = p; 292 for ( ; '\0' != *p; p++, q++) { 293 if ('%' == *p) { 294 if ('\0' == (hex[0] = *(p + 1))) 295 return 0; 296 if ('\0' == (hex[1] = *(p + 2))) 297 return 0; 298 if (1 != sscanf(hex, "%x", &c)) 299 return 0; 300 if ('\0' == c) 301 return 0; 302 303 *q = (char)c; 304 p += 2; 305 } else 306 *q = '+' == *p ? ' ' : *p; 307 } 308 309 *q = '\0'; 310 return 1; 311 } 312 313 static void 314 resp_begin_http(int code, const char *msg) 315 { 316 317 if (200 != code) 318 printf("Status: %d %s\r\n", code, msg); 319 320 printf("Content-Type: text/html; charset=utf-8\r\n" 321 "Cache-Control: no-cache\r\n" 322 "Pragma: no-cache\r\n" 323 "\r\n"); 324 325 fflush(stdout); 326 } 327 328 static void 329 resp_copy(const char *filename) 330 { 331 char buf[4096]; 332 ssize_t sz; 333 int fd; 334 335 if ((fd = open(filename, O_RDONLY)) != -1) { 336 fflush(stdout); 337 while ((sz = read(fd, buf, sizeof(buf))) > 0) 338 write(STDOUT_FILENO, buf, sz); 339 close(fd); 340 } 341 } 342 343 static void 344 resp_begin_html(int code, const char *msg) 345 { 346 347 resp_begin_http(code, msg); 348 349 printf("<!DOCTYPE html>\n" 350 "<html>\n" 351 "<head>\n" 352 " <meta charset=\"UTF-8\"/>\n" 353 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 354 " type=\"text/css\" media=\"all\">\n" 355 " <title>%s</title>\n" 356 "</head>\n" 357 "<body>\n", 358 CSS_DIR, CUSTOMIZE_TITLE); 359 360 resp_copy(MAN_DIR "/header.html"); 361 } 362 363 static void 364 resp_end_html(void) 365 { 366 367 resp_copy(MAN_DIR "/footer.html"); 368 369 puts("</body>\n" 370 "</html>"); 371 } 372 373 static void 374 resp_searchform(const struct req *req, enum focus focus) 375 { 376 int i; 377 378 printf("<form action=\"/%s\" method=\"get\">\n" 379 " <fieldset>\n" 380 " <legend>Manual Page Search Parameters</legend>\n", 381 scriptname); 382 383 /* Write query input box. */ 384 385 printf(" <input type=\"text\" name=\"query\" value=\""); 386 if (req->q.query != NULL) 387 html_print(req->q.query); 388 printf( "\" size=\"40\""); 389 if (focus == FOCUS_QUERY) 390 printf(" autofocus"); 391 puts(">"); 392 393 /* Write submission buttons. */ 394 395 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">" 396 "man</button>\n" 397 " <button type=\"submit\" name=\"apropos\" value=\"1\">" 398 "apropos</button>\n" 399 " <br/>\n"); 400 401 /* Write section selector. */ 402 403 puts(" <select name=\"sec\">"); 404 for (i = 0; i < sec_MAX; i++) { 405 printf(" <option value=\"%s\"", sec_numbers[i]); 406 if (NULL != req->q.sec && 407 0 == strcmp(sec_numbers[i], req->q.sec)) 408 printf(" selected=\"selected\""); 409 printf(">%s</option>\n", sec_names[i]); 410 } 411 puts(" </select>"); 412 413 /* Write architecture selector. */ 414 415 printf( " <select name=\"arch\">\n" 416 " <option value=\"default\""); 417 if (NULL == req->q.arch) 418 printf(" selected=\"selected\""); 419 puts(">All Architectures</option>"); 420 for (i = 0; i < arch_MAX; i++) { 421 printf(" <option value=\"%s\"", arch_names[i]); 422 if (NULL != req->q.arch && 423 0 == strcmp(arch_names[i], req->q.arch)) 424 printf(" selected=\"selected\""); 425 printf(">%s</option>\n", arch_names[i]); 426 } 427 puts(" </select>"); 428 429 /* Write manpath selector. */ 430 431 if (req->psz > 1) { 432 puts(" <select name=\"manpath\">"); 433 for (i = 0; i < (int)req->psz; i++) { 434 printf(" <option "); 435 if (strcmp(req->q.manpath, req->p[i]) == 0) 436 printf("selected=\"selected\" "); 437 printf("value=\""); 438 html_print(req->p[i]); 439 printf("\">"); 440 html_print(req->p[i]); 441 puts("</option>"); 442 } 443 puts(" </select>"); 444 } 445 446 puts(" </fieldset>\n" 447 "</form>"); 448 } 449 450 static int 451 validate_urifrag(const char *frag) 452 { 453 454 while ('\0' != *frag) { 455 if ( ! (isalnum((unsigned char)*frag) || 456 '-' == *frag || '.' == *frag || 457 '/' == *frag || '_' == *frag)) 458 return 0; 459 frag++; 460 } 461 return 1; 462 } 463 464 static int 465 validate_manpath(const struct req *req, const char* manpath) 466 { 467 size_t i; 468 469 for (i = 0; i < req->psz; i++) 470 if ( ! strcmp(manpath, req->p[i])) 471 return 1; 472 473 return 0; 474 } 475 476 static int 477 validate_filename(const char *file) 478 { 479 480 if ('.' == file[0] && '/' == file[1]) 481 file += 2; 482 483 return ! (strstr(file, "../") || strstr(file, "/..") || 484 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 485 } 486 487 static void 488 pg_index(const struct req *req) 489 { 490 491 resp_begin_html(200, NULL); 492 resp_searchform(req, FOCUS_QUERY); 493 printf("<p>\n" 494 "This web interface is documented in the\n" 495 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n" 496 "manual, and the\n" 497 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n" 498 "manual explains the query syntax.\n" 499 "</p>\n", 500 scriptname, *scriptname == '\0' ? "" : "/", 501 scriptname, *scriptname == '\0' ? "" : "/"); 502 resp_end_html(); 503 } 504 505 static void 506 pg_noresult(const struct req *req, const char *msg) 507 { 508 resp_begin_html(200, NULL); 509 resp_searchform(req, FOCUS_QUERY); 510 puts("<p>"); 511 puts(msg); 512 puts("</p>"); 513 resp_end_html(); 514 } 515 516 static void 517 pg_error_badrequest(const char *msg) 518 { 519 520 resp_begin_html(400, "Bad Request"); 521 puts("<h1>Bad Request</h1>\n" 522 "<p>\n"); 523 puts(msg); 524 printf("Try again from the\n" 525 "<a href=\"/%s\">main page</a>.\n" 526 "</p>", scriptname); 527 resp_end_html(); 528 } 529 530 static void 531 pg_error_internal(void) 532 { 533 resp_begin_html(500, "Internal Server Error"); 534 puts("<p>Internal Server Error</p>"); 535 resp_end_html(); 536 } 537 538 static void 539 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 540 { 541 char *arch, *archend; 542 const char *sec; 543 size_t i, iuse; 544 int archprio, archpriouse; 545 int prio, priouse; 546 547 for (i = 0; i < sz; i++) { 548 if (validate_filename(r[i].file)) 549 continue; 550 warnx("invalid filename %s in %s database", 551 r[i].file, req->q.manpath); 552 pg_error_internal(); 553 return; 554 } 555 556 if (req->isquery && sz == 1) { 557 /* 558 * If we have just one result, then jump there now 559 * without any delay. 560 */ 561 printf("Status: 303 See Other\r\n"); 562 printf("Location: http://%s/%s%s%s/%s", 563 HTTP_HOST, scriptname, 564 *scriptname == '\0' ? "" : "/", 565 req->q.manpath, r[0].file); 566 printf("\r\n" 567 "Content-Type: text/html; charset=utf-8\r\n" 568 "\r\n"); 569 return; 570 } 571 572 resp_begin_html(200, NULL); 573 resp_searchform(req, 574 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 575 576 if (sz > 1) { 577 puts("<table class=\"results\">"); 578 for (i = 0; i < sz; i++) { 579 printf(" <tr>\n" 580 " <td>" 581 "<a class=\"Xr\" href=\"/%s%s%s/%s\">", 582 scriptname, *scriptname == '\0' ? "" : "/", 583 req->q.manpath, r[i].file); 584 html_print(r[i].names); 585 printf("</a></td>\n" 586 " <td><span class=\"Nd\">"); 587 html_print(r[i].output); 588 puts("</span></td>\n" 589 " </tr>"); 590 } 591 puts("</table>"); 592 } 593 594 /* 595 * In man(1) mode, show one of the pages 596 * even if more than one is found. 597 */ 598 599 if (req->q.equal || sz == 1) { 600 puts("<hr>"); 601 iuse = 0; 602 priouse = 20; 603 archpriouse = 3; 604 for (i = 0; i < sz; i++) { 605 sec = r[i].file; 606 sec += strcspn(sec, "123456789"); 607 if (sec[0] == '\0') 608 continue; 609 prio = sec_prios[sec[0] - '1']; 610 if (sec[1] != '/') 611 prio += 10; 612 if (req->q.arch == NULL) { 613 archprio = 614 ((arch = strchr(sec + 1, '/')) 615 == NULL) ? 3 : 616 ((archend = strchr(arch + 1, '/')) 617 == NULL) ? 0 : 618 strncmp(arch, "amd64/", 619 archend - arch) ? 2 : 1; 620 if (archprio < archpriouse) { 621 archpriouse = archprio; 622 priouse = prio; 623 iuse = i; 624 continue; 625 } 626 if (archprio > archpriouse) 627 continue; 628 } 629 if (prio >= priouse) 630 continue; 631 priouse = prio; 632 iuse = i; 633 } 634 resp_show(req, r[iuse].file); 635 } 636 637 resp_end_html(); 638 } 639 640 static void 641 resp_catman(const struct req *req, const char *file) 642 { 643 FILE *f; 644 char *p; 645 size_t sz; 646 ssize_t len; 647 int i; 648 int italic, bold; 649 650 if ((f = fopen(file, "r")) == NULL) { 651 puts("<p>You specified an invalid manual file.</p>"); 652 return; 653 } 654 655 puts("<div class=\"catman\">\n" 656 "<pre>"); 657 658 p = NULL; 659 sz = 0; 660 661 while ((len = getline(&p, &sz, f)) != -1) { 662 bold = italic = 0; 663 for (i = 0; i < len - 1; i++) { 664 /* 665 * This means that the catpage is out of state. 666 * Ignore it and keep going (although the 667 * catpage is bogus). 668 */ 669 670 if ('\b' == p[i] || '\n' == p[i]) 671 continue; 672 673 /* 674 * Print a regular character. 675 * Close out any bold/italic scopes. 676 * If we're in back-space mode, make sure we'll 677 * have something to enter when we backspace. 678 */ 679 680 if ('\b' != p[i + 1]) { 681 if (italic) 682 printf("</i>"); 683 if (bold) 684 printf("</b>"); 685 italic = bold = 0; 686 html_putchar(p[i]); 687 continue; 688 } else if (i + 2 >= len) 689 continue; 690 691 /* Italic mode. */ 692 693 if ('_' == p[i]) { 694 if (bold) 695 printf("</b>"); 696 if ( ! italic) 697 printf("<i>"); 698 bold = 0; 699 italic = 1; 700 i += 2; 701 html_putchar(p[i]); 702 continue; 703 } 704 705 /* 706 * Handle funny behaviour troff-isms. 707 * These grok'd from the original man2html.c. 708 */ 709 710 if (('+' == p[i] && 'o' == p[i + 2]) || 711 ('o' == p[i] && '+' == p[i + 2]) || 712 ('|' == p[i] && '=' == p[i + 2]) || 713 ('=' == p[i] && '|' == p[i + 2]) || 714 ('*' == p[i] && '=' == p[i + 2]) || 715 ('=' == p[i] && '*' == p[i + 2]) || 716 ('*' == p[i] && '|' == p[i + 2]) || 717 ('|' == p[i] && '*' == p[i + 2])) { 718 if (italic) 719 printf("</i>"); 720 if (bold) 721 printf("</b>"); 722 italic = bold = 0; 723 putchar('*'); 724 i += 2; 725 continue; 726 } else if (('|' == p[i] && '-' == p[i + 2]) || 727 ('-' == p[i] && '|' == p[i + 1]) || 728 ('+' == p[i] && '-' == p[i + 1]) || 729 ('-' == p[i] && '+' == p[i + 1]) || 730 ('+' == p[i] && '|' == p[i + 1]) || 731 ('|' == p[i] && '+' == p[i + 1])) { 732 if (italic) 733 printf("</i>"); 734 if (bold) 735 printf("</b>"); 736 italic = bold = 0; 737 putchar('+'); 738 i += 2; 739 continue; 740 } 741 742 /* Bold mode. */ 743 744 if (italic) 745 printf("</i>"); 746 if ( ! bold) 747 printf("<b>"); 748 bold = 1; 749 italic = 0; 750 i += 2; 751 html_putchar(p[i]); 752 } 753 754 /* 755 * Clean up the last character. 756 * We can get to a newline; don't print that. 757 */ 758 759 if (italic) 760 printf("</i>"); 761 if (bold) 762 printf("</b>"); 763 764 if (i == len - 1 && p[i] != '\n') 765 html_putchar(p[i]); 766 767 putchar('\n'); 768 } 769 free(p); 770 771 puts("</pre>\n" 772 "</div>"); 773 774 fclose(f); 775 } 776 777 static void 778 resp_format(const struct req *req, const char *file) 779 { 780 struct manoutput conf; 781 struct mparse *mp; 782 struct roff_man *man; 783 void *vp; 784 int fd; 785 int usepath; 786 787 if (-1 == (fd = open(file, O_RDONLY, 0))) { 788 puts("<p>You specified an invalid manual file.</p>"); 789 return; 790 } 791 792 mchars_alloc(); 793 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1, 794 MANDOCLEVEL_BADARG, NULL, req->q.manpath); 795 mparse_readfd(mp, fd, file); 796 close(fd); 797 798 memset(&conf, 0, sizeof(conf)); 799 conf.fragment = 1; 800 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css"); 801 usepath = strcmp(req->q.manpath, req->p[0]); 802 mandoc_asprintf(&conf.man, "/%s%s%%N.%%S", 803 usepath ? req->q.manpath : "", usepath ? "/" : ""); 804 805 mparse_result(mp, &man, NULL); 806 if (man == NULL) { 807 warnx("fatal mandoc error: %s/%s", req->q.manpath, file); 808 pg_error_internal(); 809 mparse_free(mp); 810 mchars_free(); 811 return; 812 } 813 814 vp = html_alloc(&conf); 815 816 if (man->macroset == MACROSET_MDOC) { 817 mdoc_validate(man); 818 html_mdoc(vp, man); 819 } else { 820 man_validate(man); 821 html_man(vp, man); 822 } 823 824 html_free(vp); 825 mparse_free(mp); 826 mchars_free(); 827 free(conf.man); 828 free(conf.style); 829 } 830 831 static void 832 resp_show(const struct req *req, const char *file) 833 { 834 835 if ('.' == file[0] && '/' == file[1]) 836 file += 2; 837 838 if ('c' == *file) 839 resp_catman(req, file); 840 else 841 resp_format(req, file); 842 } 843 844 static void 845 pg_show(struct req *req, const char *fullpath) 846 { 847 char *manpath; 848 const char *file; 849 850 if ((file = strchr(fullpath, '/')) == NULL) { 851 pg_error_badrequest( 852 "You did not specify a page to show."); 853 return; 854 } 855 manpath = mandoc_strndup(fullpath, file - fullpath); 856 file++; 857 858 if ( ! validate_manpath(req, manpath)) { 859 pg_error_badrequest( 860 "You specified an invalid manpath."); 861 free(manpath); 862 return; 863 } 864 865 /* 866 * Begin by chdir()ing into the manpath. 867 * This way we can pick up the database files, which are 868 * relative to the manpath root. 869 */ 870 871 if (chdir(manpath) == -1) { 872 warn("chdir %s", manpath); 873 pg_error_internal(); 874 free(manpath); 875 return; 876 } 877 free(manpath); 878 879 if ( ! validate_filename(file)) { 880 pg_error_badrequest( 881 "You specified an invalid manual file."); 882 return; 883 } 884 885 resp_begin_html(200, NULL); 886 resp_searchform(req, FOCUS_NONE); 887 resp_show(req, file); 888 resp_end_html(); 889 } 890 891 static void 892 pg_search(const struct req *req) 893 { 894 struct mansearch search; 895 struct manpaths paths; 896 struct manpage *res; 897 char **argv; 898 char *query, *rp, *wp; 899 size_t ressz; 900 int argc; 901 902 /* 903 * Begin by chdir()ing into the root of the manpath. 904 * This way we can pick up the database files, which are 905 * relative to the manpath root. 906 */ 907 908 if (chdir(req->q.manpath) == -1) { 909 warn("chdir %s", req->q.manpath); 910 pg_error_internal(); 911 return; 912 } 913 914 search.arch = req->q.arch; 915 search.sec = req->q.sec; 916 search.outkey = "Nd"; 917 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 918 search.firstmatch = 1; 919 920 paths.sz = 1; 921 paths.paths = mandoc_malloc(sizeof(char *)); 922 paths.paths[0] = mandoc_strdup("."); 923 924 /* 925 * Break apart at spaces with backslash-escaping. 926 */ 927 928 argc = 0; 929 argv = NULL; 930 rp = query = mandoc_strdup(req->q.query); 931 for (;;) { 932 while (isspace((unsigned char)*rp)) 933 rp++; 934 if (*rp == '\0') 935 break; 936 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 937 argv[argc++] = wp = rp; 938 for (;;) { 939 if (isspace((unsigned char)*rp)) { 940 *wp = '\0'; 941 rp++; 942 break; 943 } 944 if (rp[0] == '\\' && rp[1] != '\0') 945 rp++; 946 if (wp != rp) 947 *wp = *rp; 948 if (*rp == '\0') 949 break; 950 wp++; 951 rp++; 952 } 953 } 954 955 if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz)) 956 pg_noresult(req, "You entered an invalid query."); 957 else if (0 == ressz) 958 pg_noresult(req, "No results found."); 959 else 960 pg_searchres(req, res, ressz); 961 962 free(query); 963 mansearch_free(res, ressz); 964 free(paths.paths[0]); 965 free(paths.paths); 966 } 967 968 int 969 main(void) 970 { 971 struct req req; 972 struct itimerval itimer; 973 const char *path; 974 const char *querystring; 975 int i; 976 977 /* 978 * The "rpath" pledge could be revoked after mparse_readfd() 979 * if the file desciptor to "/footer.html" would be opened 980 * up front, but it's probably not worth the complication 981 * of the code it would cause: it would require scattering 982 * pledge() calls in multiple low-level resp_*() functions. 983 */ 984 985 if (pledge("stdio rpath", NULL) == -1) { 986 warn("pledge"); 987 pg_error_internal(); 988 return EXIT_FAILURE; 989 } 990 991 /* Poor man's ReDoS mitigation. */ 992 993 itimer.it_value.tv_sec = 2; 994 itimer.it_value.tv_usec = 0; 995 itimer.it_interval.tv_sec = 2; 996 itimer.it_interval.tv_usec = 0; 997 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 998 warn("setitimer"); 999 pg_error_internal(); 1000 return EXIT_FAILURE; 1001 } 1002 1003 /* 1004 * First we change directory into the MAN_DIR so that 1005 * subsequent scanning for manpath directories is rooted 1006 * relative to the same position. 1007 */ 1008 1009 if (chdir(MAN_DIR) == -1) { 1010 warn("MAN_DIR: %s", MAN_DIR); 1011 pg_error_internal(); 1012 return EXIT_FAILURE; 1013 } 1014 1015 memset(&req, 0, sizeof(struct req)); 1016 req.q.equal = 1; 1017 parse_manpath_conf(&req); 1018 1019 /* Parse the path info and the query string. */ 1020 1021 if ((path = getenv("PATH_INFO")) == NULL) 1022 path = ""; 1023 else if (*path == '/') 1024 path++; 1025 1026 if (*path != '\0') { 1027 parse_path_info(&req, path); 1028 if (req.q.manpath == NULL || access(path, F_OK) == -1) 1029 path = ""; 1030 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1031 parse_query_string(&req, querystring); 1032 1033 /* Validate parsed data and add defaults. */ 1034 1035 if (req.q.manpath == NULL) 1036 req.q.manpath = mandoc_strdup(req.p[0]); 1037 else if ( ! validate_manpath(&req, req.q.manpath)) { 1038 pg_error_badrequest( 1039 "You specified an invalid manpath."); 1040 return EXIT_FAILURE; 1041 } 1042 1043 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) { 1044 pg_error_badrequest( 1045 "You specified an invalid architecture."); 1046 return EXIT_FAILURE; 1047 } 1048 1049 /* Dispatch to the three different pages. */ 1050 1051 if ('\0' != *path) 1052 pg_show(&req, path); 1053 else if (NULL != req.q.query) 1054 pg_search(&req); 1055 else 1056 pg_index(&req); 1057 1058 free(req.q.manpath); 1059 free(req.q.arch); 1060 free(req.q.sec); 1061 free(req.q.query); 1062 for (i = 0; i < (int)req.psz; i++) 1063 free(req.p[i]); 1064 free(req.p); 1065 return EXIT_SUCCESS; 1066 } 1067 1068 /* 1069 * If PATH_INFO is not a file name, translate it to a query. 1070 */ 1071 static void 1072 parse_path_info(struct req *req, const char *path) 1073 { 1074 char *dir[4]; 1075 int i; 1076 1077 req->isquery = 0; 1078 req->q.equal = 1; 1079 req->q.manpath = mandoc_strdup(path); 1080 req->q.arch = NULL; 1081 1082 /* Mandatory manual page name. */ 1083 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) { 1084 req->q.query = req->q.manpath; 1085 req->q.manpath = NULL; 1086 } else 1087 *req->q.query++ = '\0'; 1088 1089 /* Optional trailing section. */ 1090 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) { 1091 if(isdigit((unsigned char)req->q.sec[1])) { 1092 *req->q.sec++ = '\0'; 1093 req->q.sec = mandoc_strdup(req->q.sec); 1094 } else 1095 req->q.sec = NULL; 1096 } 1097 1098 /* Handle the case of name[.section] only. */ 1099 if (req->q.manpath == NULL) 1100 return; 1101 req->q.query = mandoc_strdup(req->q.query); 1102 1103 /* Split directory components. */ 1104 dir[i = 0] = req->q.manpath; 1105 while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) { 1106 if (++i == 3) { 1107 pg_error_badrequest( 1108 "You specified too many directory components."); 1109 exit(EXIT_FAILURE); 1110 } 1111 *dir[i]++ = '\0'; 1112 } 1113 1114 /* Optional manpath. */ 1115 if ((i = validate_manpath(req, req->q.manpath)) == 0) 1116 req->q.manpath = NULL; 1117 else if (dir[1] == NULL) 1118 return; 1119 1120 /* Optional section. */ 1121 if (strncmp(dir[i], "man", 3) == 0) { 1122 free(req->q.sec); 1123 req->q.sec = mandoc_strdup(dir[i++] + 3); 1124 } 1125 if (dir[i] == NULL) { 1126 if (req->q.manpath == NULL) 1127 free(dir[0]); 1128 return; 1129 } 1130 if (dir[i + 1] != NULL) { 1131 pg_error_badrequest( 1132 "You specified an invalid directory component."); 1133 exit(EXIT_FAILURE); 1134 } 1135 1136 /* Optional architecture. */ 1137 if (i) { 1138 req->q.arch = mandoc_strdup(dir[i]); 1139 if (req->q.manpath == NULL) 1140 free(dir[0]); 1141 } else 1142 req->q.arch = dir[0]; 1143 } 1144 1145 /* 1146 * Scan for indexable paths. 1147 */ 1148 static void 1149 parse_manpath_conf(struct req *req) 1150 { 1151 FILE *fp; 1152 char *dp; 1153 size_t dpsz; 1154 ssize_t len; 1155 1156 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1157 warn("%s/manpath.conf", MAN_DIR); 1158 pg_error_internal(); 1159 exit(EXIT_FAILURE); 1160 } 1161 1162 dp = NULL; 1163 dpsz = 0; 1164 1165 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1166 if (dp[len - 1] == '\n') 1167 dp[--len] = '\0'; 1168 req->p = mandoc_realloc(req->p, 1169 (req->psz + 1) * sizeof(char *)); 1170 if ( ! validate_urifrag(dp)) { 1171 warnx("%s/manpath.conf contains " 1172 "unsafe path \"%s\"", MAN_DIR, dp); 1173 pg_error_internal(); 1174 exit(EXIT_FAILURE); 1175 } 1176 if (strchr(dp, '/') != NULL) { 1177 warnx("%s/manpath.conf contains " 1178 "path with slash \"%s\"", MAN_DIR, dp); 1179 pg_error_internal(); 1180 exit(EXIT_FAILURE); 1181 } 1182 req->p[req->psz++] = dp; 1183 dp = NULL; 1184 dpsz = 0; 1185 } 1186 free(dp); 1187 1188 if (req->p == NULL) { 1189 warnx("%s/manpath.conf is empty", MAN_DIR); 1190 pg_error_internal(); 1191 exit(EXIT_FAILURE); 1192 } 1193 } 1194