1 /* $OpenBSD: cgi.c,v 1.115 2021/10/24 21:24:16 deraadt Exp $ */ 2 /* 3 * Copyright (c) 2014-2019, 2021 Ingo Schwarze <schwarze@usta.de> 4 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Implementation of the man.cgi(8) program. 19 */ 20 #include <sys/types.h> 21 #include <sys/time.h> 22 23 #include <ctype.h> 24 #include <err.h> 25 #include <errno.h> 26 #include <fcntl.h> 27 #include <limits.h> 28 #include <stdint.h> 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <unistd.h> 33 34 #include "mandoc_aux.h" 35 #include "mandoc.h" 36 #include "roff.h" 37 #include "mdoc.h" 38 #include "man.h" 39 #include "mandoc_parse.h" 40 #include "main.h" 41 #include "manconf.h" 42 #include "mansearch.h" 43 #include "cgi.h" 44 45 /* 46 * A query as passed to the search function. 47 */ 48 struct query { 49 char *manpath; /* desired manual directory */ 50 char *arch; /* architecture */ 51 char *sec; /* manual section */ 52 char *query; /* unparsed query expression */ 53 int equal; /* match whole names, not substrings */ 54 }; 55 56 struct req { 57 struct query q; 58 char **p; /* array of available manpaths */ 59 size_t psz; /* number of available manpaths */ 60 int isquery; /* QUERY_STRING used, not PATH_INFO */ 61 }; 62 63 enum focus { 64 FOCUS_NONE = 0, 65 FOCUS_QUERY 66 }; 67 68 static void html_print(const char *); 69 static void html_putchar(char); 70 static int http_decode(char *); 71 static void http_encode(const char *); 72 static void parse_manpath_conf(struct req *); 73 static void parse_path_info(struct req *, const char *); 74 static void parse_query_string(struct req *, const char *); 75 static void pg_error_badrequest(const char *); 76 static void pg_error_internal(void); 77 static void pg_index(const struct req *); 78 static void pg_noresult(const struct req *, int, const char *, 79 const char *); 80 static void pg_redirect(const struct req *, const char *); 81 static void pg_search(const struct req *); 82 static void pg_searchres(const struct req *, 83 struct manpage *, size_t); 84 static void pg_show(struct req *, const char *); 85 static void resp_begin_html(int, const char *, const char *); 86 static void resp_begin_http(int, const char *); 87 static void resp_catman(const struct req *, const char *); 88 static void resp_copy(const char *); 89 static void resp_end_html(void); 90 static void resp_format(const struct req *, const char *); 91 static void resp_searchform(const struct req *, enum focus); 92 static void resp_show(const struct req *, const char *); 93 static void set_query_attr(char **, char **); 94 static int validate_arch(const char *); 95 static int validate_filename(const char *); 96 static int validate_manpath(const struct req *, const char *); 97 static int validate_urifrag(const char *); 98 99 static const char *scriptname = SCRIPT_NAME; 100 101 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 102 static const char *const sec_numbers[] = { 103 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 104 }; 105 static const char *const sec_names[] = { 106 "All Sections", 107 "1 - General Commands", 108 "2 - System Calls", 109 "3 - Library Functions", 110 "3p - Perl Library", 111 "4 - Device Drivers", 112 "5 - File Formats", 113 "6 - Games", 114 "7 - Miscellaneous Information", 115 "8 - System Manager\'s Manual", 116 "9 - Kernel Developer\'s Manual" 117 }; 118 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 119 120 static const char *const arch_names[] = { 121 "amd64", "alpha", "armv7", "arm64", 122 "hppa", "i386", "landisk", "loongson", 123 "luna88k", "macppc", "mips64", "octeon", 124 "powerpc64", "riscv64", "sparc64", 125 126 "amiga", "arc", "armish", "arm32", 127 "atari", "aviion", "beagle", "cats", 128 "hppa64", "hp300", 129 "ia64", "mac68k", "mvme68k", "mvme88k", 130 "mvmeppc", "palm", "pc532", "pegasos", 131 "pmax", "powerpc", "sgi", "socppc", 132 "solbourne", "sparc", 133 "sun3", "vax", "wgrisc", "x68k", 134 "zaurus" 135 }; 136 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 137 138 /* 139 * Print a character, escaping HTML along the way. 140 * This will pass non-ASCII straight to output: be warned! 141 */ 142 static void 143 html_putchar(char c) 144 { 145 146 switch (c) { 147 case '"': 148 printf("""); 149 break; 150 case '&': 151 printf("&"); 152 break; 153 case '>': 154 printf(">"); 155 break; 156 case '<': 157 printf("<"); 158 break; 159 default: 160 putchar((unsigned char)c); 161 break; 162 } 163 } 164 165 /* 166 * Call through to html_putchar(). 167 * Accepts NULL strings. 168 */ 169 static void 170 html_print(const char *p) 171 { 172 173 if (NULL == p) 174 return; 175 while ('\0' != *p) 176 html_putchar(*p++); 177 } 178 179 /* 180 * Transfer the responsibility for the allocated string *val 181 * to the query structure. 182 */ 183 static void 184 set_query_attr(char **attr, char **val) 185 { 186 187 free(*attr); 188 if (**val == '\0') { 189 *attr = NULL; 190 free(*val); 191 } else 192 *attr = *val; 193 *val = NULL; 194 } 195 196 /* 197 * Parse the QUERY_STRING for key-value pairs 198 * and store the values into the query structure. 199 */ 200 static void 201 parse_query_string(struct req *req, const char *qs) 202 { 203 char *key, *val; 204 size_t keysz, valsz; 205 206 req->isquery = 1; 207 req->q.manpath = NULL; 208 req->q.arch = NULL; 209 req->q.sec = NULL; 210 req->q.query = NULL; 211 req->q.equal = 1; 212 213 key = val = NULL; 214 while (*qs != '\0') { 215 216 /* Parse one key. */ 217 218 keysz = strcspn(qs, "=;&"); 219 key = mandoc_strndup(qs, keysz); 220 qs += keysz; 221 if (*qs != '=') 222 goto next; 223 224 /* Parse one value. */ 225 226 valsz = strcspn(++qs, ";&"); 227 val = mandoc_strndup(qs, valsz); 228 qs += valsz; 229 230 /* Decode and catch encoding errors. */ 231 232 if ( ! (http_decode(key) && http_decode(val))) 233 goto next; 234 235 /* Handle key-value pairs. */ 236 237 if ( ! strcmp(key, "query")) 238 set_query_attr(&req->q.query, &val); 239 240 else if ( ! strcmp(key, "apropos")) 241 req->q.equal = !strcmp(val, "0"); 242 243 else if ( ! strcmp(key, "manpath")) { 244 #ifdef COMPAT_OLDURI 245 if ( ! strncmp(val, "OpenBSD ", 8)) { 246 val[7] = '-'; 247 if ('C' == val[8]) 248 val[8] = 'c'; 249 } 250 #endif 251 set_query_attr(&req->q.manpath, &val); 252 } 253 254 else if ( ! (strcmp(key, "sec") 255 #ifdef COMPAT_OLDURI 256 && strcmp(key, "sektion") 257 #endif 258 )) { 259 if ( ! strcmp(val, "0")) 260 *val = '\0'; 261 set_query_attr(&req->q.sec, &val); 262 } 263 264 else if ( ! strcmp(key, "arch")) { 265 if ( ! strcmp(val, "default")) 266 *val = '\0'; 267 set_query_attr(&req->q.arch, &val); 268 } 269 270 /* 271 * The key must be freed in any case. 272 * The val may have been handed over to the query 273 * structure, in which case it is now NULL. 274 */ 275 next: 276 free(key); 277 key = NULL; 278 free(val); 279 val = NULL; 280 281 if (*qs != '\0') 282 qs++; 283 } 284 } 285 286 /* 287 * HTTP-decode a string. The standard explanation is that this turns 288 * "%4e+foo" into "n foo" in the regular way. This is done in-place 289 * over the allocated string. 290 */ 291 static int 292 http_decode(char *p) 293 { 294 char hex[3]; 295 char *q; 296 int c; 297 298 hex[2] = '\0'; 299 300 q = p; 301 for ( ; '\0' != *p; p++, q++) { 302 if ('%' == *p) { 303 if ('\0' == (hex[0] = *(p + 1))) 304 return 0; 305 if ('\0' == (hex[1] = *(p + 2))) 306 return 0; 307 if (1 != sscanf(hex, "%x", &c)) 308 return 0; 309 if ('\0' == c) 310 return 0; 311 312 *q = (char)c; 313 p += 2; 314 } else 315 *q = '+' == *p ? ' ' : *p; 316 } 317 318 *q = '\0'; 319 return 1; 320 } 321 322 static void 323 http_encode(const char *p) 324 { 325 for (; *p != '\0'; p++) { 326 if (isalnum((unsigned char)*p) == 0 && 327 strchr("-._~", *p) == NULL) 328 printf("%%%2.2X", (unsigned char)*p); 329 else 330 putchar(*p); 331 } 332 } 333 334 static void 335 resp_begin_http(int code, const char *msg) 336 { 337 338 if (200 != code) 339 printf("Status: %d %s\r\n", code, msg); 340 341 printf("Content-Type: text/html; charset=utf-8\r\n" 342 "Cache-Control: no-cache\r\n" 343 "Content-Security-Policy: default-src 'none'; " 344 "style-src 'self' 'unsafe-inline'\r\n" 345 "Pragma: no-cache\r\n" 346 "\r\n"); 347 348 fflush(stdout); 349 } 350 351 static void 352 resp_copy(const char *filename) 353 { 354 char buf[4096]; 355 ssize_t sz; 356 int fd; 357 358 if ((fd = open(filename, O_RDONLY)) != -1) { 359 fflush(stdout); 360 while ((sz = read(fd, buf, sizeof(buf))) > 0) 361 write(STDOUT_FILENO, buf, sz); 362 close(fd); 363 } 364 } 365 366 static void 367 resp_begin_html(int code, const char *msg, const char *file) 368 { 369 const char *name, *sec, *cp; 370 int namesz, secsz; 371 372 resp_begin_http(code, msg); 373 374 printf("<!DOCTYPE html>\n" 375 "<html>\n" 376 "<head>\n" 377 " <meta charset=\"UTF-8\"/>\n" 378 " <meta name=\"viewport\"" 379 " content=\"width=device-width, initial-scale=1.0\">\n" 380 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 381 " type=\"text/css\" media=\"all\">\n" 382 " <title>", 383 CSS_DIR); 384 if (file != NULL) { 385 cp = strrchr(file, '/'); 386 name = cp == NULL ? file : cp + 1; 387 cp = strrchr(name, '.'); 388 namesz = cp == NULL ? strlen(name) : cp - name; 389 sec = NULL; 390 if (cp != NULL && cp[1] != '0') { 391 sec = cp + 1; 392 secsz = strlen(sec); 393 } else if (name - file > 1) { 394 for (cp = name - 2; cp >= file; cp--) { 395 if (*cp < '1' || *cp > '9') 396 continue; 397 sec = cp; 398 secsz = name - cp - 1; 399 break; 400 } 401 } 402 printf("%.*s", namesz, name); 403 if (sec != NULL) 404 printf("(%.*s)", secsz, sec); 405 fputs(" - ", stdout); 406 } 407 printf("%s</title>\n" 408 "</head>\n" 409 "<body>\n", 410 CUSTOMIZE_TITLE); 411 412 resp_copy(MAN_DIR "/header.html"); 413 } 414 415 static void 416 resp_end_html(void) 417 { 418 419 resp_copy(MAN_DIR "/footer.html"); 420 421 puts("</body>\n" 422 "</html>"); 423 } 424 425 static void 426 resp_searchform(const struct req *req, enum focus focus) 427 { 428 int i; 429 430 printf("<form action=\"/%s\" method=\"get\" " 431 "autocomplete=\"off\" autocapitalize=\"none\">\n" 432 " <fieldset>\n" 433 " <legend>Manual Page Search Parameters</legend>\n", 434 scriptname); 435 436 /* Write query input box. */ 437 438 printf(" <input type=\"search\" name=\"query\" value=\""); 439 if (req->q.query != NULL) 440 html_print(req->q.query); 441 printf( "\" size=\"40\""); 442 if (focus == FOCUS_QUERY) 443 printf(" autofocus"); 444 puts(">"); 445 446 /* Write submission buttons. */ 447 448 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">" 449 "man</button>\n" 450 " <button type=\"submit\" name=\"apropos\" value=\"1\">" 451 "apropos</button>\n" 452 " <br/>\n"); 453 454 /* Write section selector. */ 455 456 puts(" <select name=\"sec\">"); 457 for (i = 0; i < sec_MAX; i++) { 458 printf(" <option value=\"%s\"", sec_numbers[i]); 459 if (NULL != req->q.sec && 460 0 == strcmp(sec_numbers[i], req->q.sec)) 461 printf(" selected=\"selected\""); 462 printf(">%s</option>\n", sec_names[i]); 463 } 464 puts(" </select>"); 465 466 /* Write architecture selector. */ 467 468 printf( " <select name=\"arch\">\n" 469 " <option value=\"default\""); 470 if (NULL == req->q.arch) 471 printf(" selected=\"selected\""); 472 puts(">All Architectures</option>"); 473 for (i = 0; i < arch_MAX; i++) { 474 printf(" <option"); 475 if (NULL != req->q.arch && 476 0 == strcmp(arch_names[i], req->q.arch)) 477 printf(" selected=\"selected\""); 478 printf(">%s</option>\n", arch_names[i]); 479 } 480 puts(" </select>"); 481 482 /* Write manpath selector. */ 483 484 if (req->psz > 1) { 485 puts(" <select name=\"manpath\">"); 486 for (i = 0; i < (int)req->psz; i++) { 487 printf(" <option"); 488 if (strcmp(req->q.manpath, req->p[i]) == 0) 489 printf(" selected=\"selected\""); 490 printf(">"); 491 html_print(req->p[i]); 492 puts("</option>"); 493 } 494 puts(" </select>"); 495 } 496 497 puts(" </fieldset>\n" 498 "</form>"); 499 } 500 501 static int 502 validate_urifrag(const char *frag) 503 { 504 505 while ('\0' != *frag) { 506 if ( ! (isalnum((unsigned char)*frag) || 507 '-' == *frag || '.' == *frag || 508 '/' == *frag || '_' == *frag)) 509 return 0; 510 frag++; 511 } 512 return 1; 513 } 514 515 static int 516 validate_manpath(const struct req *req, const char* manpath) 517 { 518 size_t i; 519 520 for (i = 0; i < req->psz; i++) 521 if ( ! strcmp(manpath, req->p[i])) 522 return 1; 523 524 return 0; 525 } 526 527 static int 528 validate_arch(const char *arch) 529 { 530 int i; 531 532 for (i = 0; i < arch_MAX; i++) 533 if (strcmp(arch, arch_names[i]) == 0) 534 return 1; 535 536 return 0; 537 } 538 539 static int 540 validate_filename(const char *file) 541 { 542 543 if ('.' == file[0] && '/' == file[1]) 544 file += 2; 545 546 return ! (strstr(file, "../") || strstr(file, "/..") || 547 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 548 } 549 550 static void 551 pg_index(const struct req *req) 552 { 553 554 resp_begin_html(200, NULL, NULL); 555 resp_searchform(req, FOCUS_QUERY); 556 printf("<p>\n" 557 "This web interface is documented in the\n" 558 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n" 559 "manual, and the\n" 560 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n" 561 "manual explains the query syntax.\n" 562 "</p>\n", 563 scriptname, *scriptname == '\0' ? "" : "/", 564 scriptname, *scriptname == '\0' ? "" : "/"); 565 resp_end_html(); 566 } 567 568 static void 569 pg_noresult(const struct req *req, int code, const char *http_msg, 570 const char *user_msg) 571 { 572 resp_begin_html(code, http_msg, NULL); 573 resp_searchform(req, FOCUS_QUERY); 574 puts("<p>"); 575 puts(user_msg); 576 puts("</p>"); 577 resp_end_html(); 578 } 579 580 static void 581 pg_error_badrequest(const char *msg) 582 { 583 584 resp_begin_html(400, "Bad Request", NULL); 585 puts("<h1>Bad Request</h1>\n" 586 "<p>\n"); 587 puts(msg); 588 printf("Try again from the\n" 589 "<a href=\"/%s\">main page</a>.\n" 590 "</p>", scriptname); 591 resp_end_html(); 592 } 593 594 static void 595 pg_error_internal(void) 596 { 597 resp_begin_html(500, "Internal Server Error", NULL); 598 puts("<p>Internal Server Error</p>"); 599 resp_end_html(); 600 } 601 602 static void 603 pg_redirect(const struct req *req, const char *name) 604 { 605 printf("Status: 303 See Other\r\n" 606 "Location: /"); 607 if (*scriptname != '\0') 608 printf("%s/", scriptname); 609 if (strcmp(req->q.manpath, req->p[0])) 610 printf("%s/", req->q.manpath); 611 if (req->q.arch != NULL) 612 printf("%s/", req->q.arch); 613 http_encode(name); 614 if (req->q.sec != NULL) { 615 putchar('.'); 616 http_encode(req->q.sec); 617 } 618 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n"); 619 } 620 621 static void 622 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 623 { 624 char *arch, *archend; 625 const char *sec; 626 size_t i, iuse; 627 int archprio, archpriouse; 628 int prio, priouse; 629 630 for (i = 0; i < sz; i++) { 631 if (validate_filename(r[i].file)) 632 continue; 633 warnx("invalid filename %s in %s database", 634 r[i].file, req->q.manpath); 635 pg_error_internal(); 636 return; 637 } 638 639 if (req->isquery && sz == 1) { 640 /* 641 * If we have just one result, then jump there now 642 * without any delay. 643 */ 644 printf("Status: 303 See Other\r\n" 645 "Location: /"); 646 if (*scriptname != '\0') 647 printf("%s/", scriptname); 648 if (strcmp(req->q.manpath, req->p[0])) 649 printf("%s/", req->q.manpath); 650 printf("%s\r\n" 651 "Content-Type: text/html; charset=utf-8\r\n\r\n", 652 r[0].file); 653 return; 654 } 655 656 /* 657 * In man(1) mode, show one of the pages 658 * even if more than one is found. 659 */ 660 661 iuse = 0; 662 if (req->q.equal || sz == 1) { 663 priouse = 20; 664 archpriouse = 3; 665 for (i = 0; i < sz; i++) { 666 sec = r[i].file; 667 sec += strcspn(sec, "123456789"); 668 if (sec[0] == '\0') 669 continue; 670 prio = sec_prios[sec[0] - '1']; 671 if (sec[1] != '/') 672 prio += 10; 673 if (req->q.arch == NULL) { 674 archprio = 675 ((arch = strchr(sec + 1, '/')) 676 == NULL) ? 3 : 677 ((archend = strchr(arch + 1, '/')) 678 == NULL) ? 0 : 679 strncmp(arch, "amd64/", 680 archend - arch) ? 2 : 1; 681 if (archprio < archpriouse) { 682 archpriouse = archprio; 683 priouse = prio; 684 iuse = i; 685 continue; 686 } 687 if (archprio > archpriouse) 688 continue; 689 } 690 if (prio >= priouse) 691 continue; 692 priouse = prio; 693 iuse = i; 694 } 695 resp_begin_html(200, NULL, r[iuse].file); 696 } else 697 resp_begin_html(200, NULL, NULL); 698 699 resp_searchform(req, 700 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 701 702 if (sz > 1) { 703 puts("<table class=\"results\">"); 704 for (i = 0; i < sz; i++) { 705 printf(" <tr>\n" 706 " <td>" 707 "<a class=\"Xr\" href=\"/"); 708 if (*scriptname != '\0') 709 printf("%s/", scriptname); 710 if (strcmp(req->q.manpath, req->p[0])) 711 printf("%s/", req->q.manpath); 712 printf("%s\">", r[i].file); 713 html_print(r[i].names); 714 printf("</a></td>\n" 715 " <td><span class=\"Nd\">"); 716 html_print(r[i].output); 717 puts("</span></td>\n" 718 " </tr>"); 719 } 720 puts("</table>"); 721 } 722 723 if (req->q.equal || sz == 1) { 724 puts("<hr>"); 725 resp_show(req, r[iuse].file); 726 } 727 728 resp_end_html(); 729 } 730 731 static void 732 resp_catman(const struct req *req, const char *file) 733 { 734 FILE *f; 735 char *p; 736 size_t sz; 737 ssize_t len; 738 int i; 739 int italic, bold; 740 741 if ((f = fopen(file, "r")) == NULL) { 742 puts("<p>You specified an invalid manual file.</p>"); 743 return; 744 } 745 746 puts("<div class=\"catman\">\n" 747 "<pre>"); 748 749 p = NULL; 750 sz = 0; 751 752 while ((len = getline(&p, &sz, f)) != -1) { 753 bold = italic = 0; 754 for (i = 0; i < len - 1; i++) { 755 /* 756 * This means that the catpage is out of state. 757 * Ignore it and keep going (although the 758 * catpage is bogus). 759 */ 760 761 if ('\b' == p[i] || '\n' == p[i]) 762 continue; 763 764 /* 765 * Print a regular character. 766 * Close out any bold/italic scopes. 767 * If we're in back-space mode, make sure we'll 768 * have something to enter when we backspace. 769 */ 770 771 if ('\b' != p[i + 1]) { 772 if (italic) 773 printf("</i>"); 774 if (bold) 775 printf("</b>"); 776 italic = bold = 0; 777 html_putchar(p[i]); 778 continue; 779 } else if (i + 2 >= len) 780 continue; 781 782 /* Italic mode. */ 783 784 if ('_' == p[i]) { 785 if (bold) 786 printf("</b>"); 787 if ( ! italic) 788 printf("<i>"); 789 bold = 0; 790 italic = 1; 791 i += 2; 792 html_putchar(p[i]); 793 continue; 794 } 795 796 /* 797 * Handle funny behaviour troff-isms. 798 * These grok'd from the original man2html.c. 799 */ 800 801 if (('+' == p[i] && 'o' == p[i + 2]) || 802 ('o' == p[i] && '+' == p[i + 2]) || 803 ('|' == p[i] && '=' == p[i + 2]) || 804 ('=' == p[i] && '|' == p[i + 2]) || 805 ('*' == p[i] && '=' == p[i + 2]) || 806 ('=' == p[i] && '*' == p[i + 2]) || 807 ('*' == p[i] && '|' == p[i + 2]) || 808 ('|' == p[i] && '*' == p[i + 2])) { 809 if (italic) 810 printf("</i>"); 811 if (bold) 812 printf("</b>"); 813 italic = bold = 0; 814 putchar('*'); 815 i += 2; 816 continue; 817 } else if (('|' == p[i] && '-' == p[i + 2]) || 818 ('-' == p[i] && '|' == p[i + 1]) || 819 ('+' == p[i] && '-' == p[i + 1]) || 820 ('-' == p[i] && '+' == p[i + 1]) || 821 ('+' == p[i] && '|' == p[i + 1]) || 822 ('|' == p[i] && '+' == p[i + 1])) { 823 if (italic) 824 printf("</i>"); 825 if (bold) 826 printf("</b>"); 827 italic = bold = 0; 828 putchar('+'); 829 i += 2; 830 continue; 831 } 832 833 /* Bold mode. */ 834 835 if (italic) 836 printf("</i>"); 837 if ( ! bold) 838 printf("<b>"); 839 bold = 1; 840 italic = 0; 841 i += 2; 842 html_putchar(p[i]); 843 } 844 845 /* 846 * Clean up the last character. 847 * We can get to a newline; don't print that. 848 */ 849 850 if (italic) 851 printf("</i>"); 852 if (bold) 853 printf("</b>"); 854 855 if (i == len - 1 && p[i] != '\n') 856 html_putchar(p[i]); 857 858 putchar('\n'); 859 } 860 free(p); 861 862 puts("</pre>\n" 863 "</div>"); 864 865 fclose(f); 866 } 867 868 static void 869 resp_format(const struct req *req, const char *file) 870 { 871 struct manoutput conf; 872 struct mparse *mp; 873 struct roff_meta *meta; 874 void *vp; 875 int fd; 876 int usepath; 877 878 if (-1 == (fd = open(file, O_RDONLY))) { 879 puts("<p>You specified an invalid manual file.</p>"); 880 return; 881 } 882 883 mchars_alloc(); 884 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 | 885 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath); 886 mparse_readfd(mp, fd, file); 887 close(fd); 888 meta = mparse_result(mp); 889 890 memset(&conf, 0, sizeof(conf)); 891 conf.fragment = 1; 892 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css"); 893 usepath = strcmp(req->q.manpath, req->p[0]); 894 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S", 895 scriptname, *scriptname == '\0' ? "" : "/", 896 usepath ? req->q.manpath : "", usepath ? "/" : ""); 897 898 vp = html_alloc(&conf); 899 if (meta->macroset == MACROSET_MDOC) 900 html_mdoc(vp, meta); 901 else 902 html_man(vp, meta); 903 904 html_free(vp); 905 mparse_free(mp); 906 mchars_free(); 907 free(conf.man); 908 free(conf.style); 909 } 910 911 static void 912 resp_show(const struct req *req, const char *file) 913 { 914 915 if ('.' == file[0] && '/' == file[1]) 916 file += 2; 917 918 if ('c' == *file) 919 resp_catman(req, file); 920 else 921 resp_format(req, file); 922 } 923 924 static void 925 pg_show(struct req *req, const char *fullpath) 926 { 927 char *manpath; 928 const char *file; 929 930 if ((file = strchr(fullpath, '/')) == NULL) { 931 pg_error_badrequest( 932 "You did not specify a page to show."); 933 return; 934 } 935 manpath = mandoc_strndup(fullpath, file - fullpath); 936 file++; 937 938 if ( ! validate_manpath(req, manpath)) { 939 pg_error_badrequest( 940 "You specified an invalid manpath."); 941 free(manpath); 942 return; 943 } 944 945 /* 946 * Begin by chdir()ing into the manpath. 947 * This way we can pick up the database files, which are 948 * relative to the manpath root. 949 */ 950 951 if (chdir(manpath) == -1) { 952 warn("chdir %s", manpath); 953 pg_error_internal(); 954 free(manpath); 955 return; 956 } 957 free(manpath); 958 959 if ( ! validate_filename(file)) { 960 pg_error_badrequest( 961 "You specified an invalid manual file."); 962 return; 963 } 964 965 resp_begin_html(200, NULL, file); 966 resp_searchform(req, FOCUS_NONE); 967 resp_show(req, file); 968 resp_end_html(); 969 } 970 971 static void 972 pg_search(const struct req *req) 973 { 974 struct mansearch search; 975 struct manpaths paths; 976 struct manpage *res; 977 char **argv; 978 char *query, *rp, *wp; 979 size_t ressz; 980 int argc; 981 982 /* 983 * Begin by chdir()ing into the root of the manpath. 984 * This way we can pick up the database files, which are 985 * relative to the manpath root. 986 */ 987 988 if (chdir(req->q.manpath) == -1) { 989 warn("chdir %s", req->q.manpath); 990 pg_error_internal(); 991 return; 992 } 993 994 search.arch = req->q.arch; 995 search.sec = req->q.sec; 996 search.outkey = "Nd"; 997 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 998 search.firstmatch = 1; 999 1000 paths.sz = 1; 1001 paths.paths = mandoc_malloc(sizeof(char *)); 1002 paths.paths[0] = mandoc_strdup("."); 1003 1004 /* 1005 * Break apart at spaces with backslash-escaping. 1006 */ 1007 1008 argc = 0; 1009 argv = NULL; 1010 rp = query = mandoc_strdup(req->q.query); 1011 for (;;) { 1012 while (isspace((unsigned char)*rp)) 1013 rp++; 1014 if (*rp == '\0') 1015 break; 1016 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 1017 argv[argc++] = wp = rp; 1018 for (;;) { 1019 if (isspace((unsigned char)*rp)) { 1020 *wp = '\0'; 1021 rp++; 1022 break; 1023 } 1024 if (rp[0] == '\\' && rp[1] != '\0') 1025 rp++; 1026 if (wp != rp) 1027 *wp = *rp; 1028 if (*rp == '\0') 1029 break; 1030 wp++; 1031 rp++; 1032 } 1033 } 1034 1035 res = NULL; 1036 ressz = 0; 1037 if (req->isquery && req->q.equal && argc == 1) 1038 pg_redirect(req, argv[0]); 1039 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0) 1040 pg_noresult(req, 400, "Bad Request", 1041 "You entered an invalid query."); 1042 else if (ressz == 0) 1043 pg_noresult(req, 404, "Not Found", "No results found."); 1044 else 1045 pg_searchres(req, res, ressz); 1046 1047 free(query); 1048 mansearch_free(res, ressz); 1049 free(paths.paths[0]); 1050 free(paths.paths); 1051 } 1052 1053 int 1054 main(void) 1055 { 1056 struct req req; 1057 struct itimerval itimer; 1058 const char *path; 1059 const char *querystring; 1060 int i; 1061 1062 /* 1063 * The "rpath" pledge could be revoked after mparse_readfd() 1064 * if the file desciptor to "/footer.html" would be opened 1065 * up front, but it's probably not worth the complication 1066 * of the code it would cause: it would require scattering 1067 * pledge() calls in multiple low-level resp_*() functions. 1068 */ 1069 1070 if (pledge("stdio rpath", NULL) == -1) { 1071 warn("pledge"); 1072 pg_error_internal(); 1073 return EXIT_FAILURE; 1074 } 1075 1076 /* Poor man's ReDoS mitigation. */ 1077 1078 itimer.it_value.tv_sec = 2; 1079 itimer.it_value.tv_usec = 0; 1080 itimer.it_interval.tv_sec = 2; 1081 itimer.it_interval.tv_usec = 0; 1082 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 1083 warn("setitimer"); 1084 pg_error_internal(); 1085 return EXIT_FAILURE; 1086 } 1087 1088 /* 1089 * First we change directory into the MAN_DIR so that 1090 * subsequent scanning for manpath directories is rooted 1091 * relative to the same position. 1092 */ 1093 1094 if (chdir(MAN_DIR) == -1) { 1095 warn("MAN_DIR: %s", MAN_DIR); 1096 pg_error_internal(); 1097 return EXIT_FAILURE; 1098 } 1099 1100 memset(&req, 0, sizeof(struct req)); 1101 req.q.equal = 1; 1102 parse_manpath_conf(&req); 1103 1104 /* Parse the path info and the query string. */ 1105 1106 if ((path = getenv("PATH_INFO")) == NULL) 1107 path = ""; 1108 else if (*path == '/') 1109 path++; 1110 1111 if (*path != '\0') { 1112 parse_path_info(&req, path); 1113 if (req.q.manpath == NULL || req.q.sec == NULL || 1114 *req.q.query == '\0' || access(path, F_OK) == -1) 1115 path = ""; 1116 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1117 parse_query_string(&req, querystring); 1118 1119 /* Validate parsed data and add defaults. */ 1120 1121 if (req.q.manpath == NULL) 1122 req.q.manpath = mandoc_strdup(req.p[0]); 1123 else if ( ! validate_manpath(&req, req.q.manpath)) { 1124 pg_error_badrequest( 1125 "You specified an invalid manpath."); 1126 return EXIT_FAILURE; 1127 } 1128 1129 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) { 1130 pg_error_badrequest( 1131 "You specified an invalid architecture."); 1132 return EXIT_FAILURE; 1133 } 1134 1135 /* Dispatch to the three different pages. */ 1136 1137 if ('\0' != *path) 1138 pg_show(&req, path); 1139 else if (NULL != req.q.query) 1140 pg_search(&req); 1141 else 1142 pg_index(&req); 1143 1144 free(req.q.manpath); 1145 free(req.q.arch); 1146 free(req.q.sec); 1147 free(req.q.query); 1148 for (i = 0; i < (int)req.psz; i++) 1149 free(req.p[i]); 1150 free(req.p); 1151 return EXIT_SUCCESS; 1152 } 1153 1154 /* 1155 * Translate PATH_INFO to a query. 1156 */ 1157 static void 1158 parse_path_info(struct req *req, const char *path) 1159 { 1160 const char *name, *sec, *end; 1161 1162 req->isquery = 0; 1163 req->q.equal = 1; 1164 req->q.manpath = NULL; 1165 req->q.arch = NULL; 1166 1167 /* Mandatory manual page name. */ 1168 if ((name = strrchr(path, '/')) == NULL) 1169 name = path; 1170 else 1171 name++; 1172 1173 /* Optional trailing section. */ 1174 sec = strrchr(name, '.'); 1175 if (sec != NULL && isdigit((unsigned char)*++sec)) { 1176 req->q.query = mandoc_strndup(name, sec - name - 1); 1177 req->q.sec = mandoc_strdup(sec); 1178 } else { 1179 req->q.query = mandoc_strdup(name); 1180 req->q.sec = NULL; 1181 } 1182 1183 /* Handle the case of name[.section] only. */ 1184 if (name == path) 1185 return; 1186 1187 /* Optional manpath. */ 1188 end = strchr(path, '/'); 1189 req->q.manpath = mandoc_strndup(path, end - path); 1190 if (validate_manpath(req, req->q.manpath)) { 1191 path = end + 1; 1192 if (name == path) 1193 return; 1194 } else { 1195 free(req->q.manpath); 1196 req->q.manpath = NULL; 1197 } 1198 1199 /* Optional section. */ 1200 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) { 1201 path += 3; 1202 end = strchr(path, '/'); 1203 free(req->q.sec); 1204 req->q.sec = mandoc_strndup(path, end - path); 1205 path = end + 1; 1206 if (name == path) 1207 return; 1208 } 1209 1210 /* Optional architecture. */ 1211 end = strchr(path, '/'); 1212 if (end + 1 != name) { 1213 pg_error_badrequest( 1214 "You specified too many directory components."); 1215 exit(EXIT_FAILURE); 1216 } 1217 req->q.arch = mandoc_strndup(path, end - path); 1218 if (validate_arch(req->q.arch) == 0) { 1219 pg_error_badrequest( 1220 "You specified an invalid directory component."); 1221 exit(EXIT_FAILURE); 1222 } 1223 } 1224 1225 /* 1226 * Scan for indexable paths. 1227 */ 1228 static void 1229 parse_manpath_conf(struct req *req) 1230 { 1231 FILE *fp; 1232 char *dp; 1233 size_t dpsz; 1234 ssize_t len; 1235 1236 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1237 warn("%s/manpath.conf", MAN_DIR); 1238 pg_error_internal(); 1239 exit(EXIT_FAILURE); 1240 } 1241 1242 dp = NULL; 1243 dpsz = 0; 1244 1245 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1246 if (dp[len - 1] == '\n') 1247 dp[--len] = '\0'; 1248 req->p = mandoc_realloc(req->p, 1249 (req->psz + 1) * sizeof(char *)); 1250 if ( ! validate_urifrag(dp)) { 1251 warnx("%s/manpath.conf contains " 1252 "unsafe path \"%s\"", MAN_DIR, dp); 1253 pg_error_internal(); 1254 exit(EXIT_FAILURE); 1255 } 1256 if (strchr(dp, '/') != NULL) { 1257 warnx("%s/manpath.conf contains " 1258 "path with slash \"%s\"", MAN_DIR, dp); 1259 pg_error_internal(); 1260 exit(EXIT_FAILURE); 1261 } 1262 req->p[req->psz++] = dp; 1263 dp = NULL; 1264 dpsz = 0; 1265 } 1266 free(dp); 1267 1268 if (req->p == NULL) { 1269 warnx("%s/manpath.conf is empty", MAN_DIR); 1270 pg_error_internal(); 1271 exit(EXIT_FAILURE); 1272 } 1273 } 1274