1 /* $OpenBSD: cgi.c,v 1.113 2021/05/01 16:11:17 visa Exp $ */ 2 /* 3 * Copyright (c) 2014-2019 Ingo Schwarze <schwarze@usta.de> 4 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Implementation of the man.cgi(8) program. 19 */ 20 #include <sys/types.h> 21 #include <sys/time.h> 22 23 #include <ctype.h> 24 #include <err.h> 25 #include <errno.h> 26 #include <fcntl.h> 27 #include <limits.h> 28 #include <stdint.h> 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <unistd.h> 33 34 #include "mandoc_aux.h" 35 #include "mandoc.h" 36 #include "roff.h" 37 #include "mdoc.h" 38 #include "man.h" 39 #include "mandoc_parse.h" 40 #include "main.h" 41 #include "manconf.h" 42 #include "mansearch.h" 43 #include "cgi.h" 44 45 /* 46 * A query as passed to the search function. 47 */ 48 struct query { 49 char *manpath; /* desired manual directory */ 50 char *arch; /* architecture */ 51 char *sec; /* manual section */ 52 char *query; /* unparsed query expression */ 53 int equal; /* match whole names, not substrings */ 54 }; 55 56 struct req { 57 struct query q; 58 char **p; /* array of available manpaths */ 59 size_t psz; /* number of available manpaths */ 60 int isquery; /* QUERY_STRING used, not PATH_INFO */ 61 }; 62 63 enum focus { 64 FOCUS_NONE = 0, 65 FOCUS_QUERY 66 }; 67 68 static void html_print(const char *); 69 static void html_putchar(char); 70 static int http_decode(char *); 71 static void http_encode(const char *); 72 static void parse_manpath_conf(struct req *); 73 static void parse_path_info(struct req *, const char *); 74 static void parse_query_string(struct req *, const char *); 75 static void pg_error_badrequest(const char *); 76 static void pg_error_internal(void); 77 static void pg_index(const struct req *); 78 static void pg_noresult(const struct req *, int, const char *, 79 const char *); 80 static void pg_redirect(const struct req *, const char *); 81 static void pg_search(const struct req *); 82 static void pg_searchres(const struct req *, 83 struct manpage *, size_t); 84 static void pg_show(struct req *, const char *); 85 static void resp_begin_html(int, const char *, const char *); 86 static void resp_begin_http(int, const char *); 87 static void resp_catman(const struct req *, const char *); 88 static void resp_copy(const char *); 89 static void resp_end_html(void); 90 static void resp_format(const struct req *, const char *); 91 static void resp_searchform(const struct req *, enum focus); 92 static void resp_show(const struct req *, const char *); 93 static void set_query_attr(char **, char **); 94 static int validate_arch(const char *); 95 static int validate_filename(const char *); 96 static int validate_manpath(const struct req *, const char *); 97 static int validate_urifrag(const char *); 98 99 static const char *scriptname = SCRIPT_NAME; 100 101 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 102 static const char *const sec_numbers[] = { 103 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 104 }; 105 static const char *const sec_names[] = { 106 "All Sections", 107 "1 - General Commands", 108 "2 - System Calls", 109 "3 - Library Functions", 110 "3p - Perl Library", 111 "4 - Device Drivers", 112 "5 - File Formats", 113 "6 - Games", 114 "7 - Miscellaneous Information", 115 "8 - System Manager\'s Manual", 116 "9 - Kernel Developer\'s Manual" 117 }; 118 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 119 120 static const char *const arch_names[] = { 121 "amd64", "alpha", "armv7", "arm64", 122 "hppa", "i386", "landisk", "loongson", 123 "luna88k", "macppc", "mips64", "octeon", 124 "powerpc64", "riscv64", "sparc64", 125 126 "amiga", "arc", "armish", "arm32", 127 "atari", "aviion", "beagle", "cats", 128 "hppa64", "hp300", 129 "ia64", "mac68k", "mvme68k", "mvme88k", 130 "mvmeppc", "palm", "pc532", "pegasos", 131 "pmax", "powerpc", "sgi", "socppc", 132 "solbourne", "sparc", 133 "sun3", "vax", "wgrisc", "x68k", 134 "zaurus" 135 }; 136 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 137 138 /* 139 * Print a character, escaping HTML along the way. 140 * This will pass non-ASCII straight to output: be warned! 141 */ 142 static void 143 html_putchar(char c) 144 { 145 146 switch (c) { 147 case '"': 148 printf("""); 149 break; 150 case '&': 151 printf("&"); 152 break; 153 case '>': 154 printf(">"); 155 break; 156 case '<': 157 printf("<"); 158 break; 159 default: 160 putchar((unsigned char)c); 161 break; 162 } 163 } 164 165 /* 166 * Call through to html_putchar(). 167 * Accepts NULL strings. 168 */ 169 static void 170 html_print(const char *p) 171 { 172 173 if (NULL == p) 174 return; 175 while ('\0' != *p) 176 html_putchar(*p++); 177 } 178 179 /* 180 * Transfer the responsibility for the allocated string *val 181 * to the query structure. 182 */ 183 static void 184 set_query_attr(char **attr, char **val) 185 { 186 187 free(*attr); 188 if (**val == '\0') { 189 *attr = NULL; 190 free(*val); 191 } else 192 *attr = *val; 193 *val = NULL; 194 } 195 196 /* 197 * Parse the QUERY_STRING for key-value pairs 198 * and store the values into the query structure. 199 */ 200 static void 201 parse_query_string(struct req *req, const char *qs) 202 { 203 char *key, *val; 204 size_t keysz, valsz; 205 206 req->isquery = 1; 207 req->q.manpath = NULL; 208 req->q.arch = NULL; 209 req->q.sec = NULL; 210 req->q.query = NULL; 211 req->q.equal = 1; 212 213 key = val = NULL; 214 while (*qs != '\0') { 215 216 /* Parse one key. */ 217 218 keysz = strcspn(qs, "=;&"); 219 key = mandoc_strndup(qs, keysz); 220 qs += keysz; 221 if (*qs != '=') 222 goto next; 223 224 /* Parse one value. */ 225 226 valsz = strcspn(++qs, ";&"); 227 val = mandoc_strndup(qs, valsz); 228 qs += valsz; 229 230 /* Decode and catch encoding errors. */ 231 232 if ( ! (http_decode(key) && http_decode(val))) 233 goto next; 234 235 /* Handle key-value pairs. */ 236 237 if ( ! strcmp(key, "query")) 238 set_query_attr(&req->q.query, &val); 239 240 else if ( ! strcmp(key, "apropos")) 241 req->q.equal = !strcmp(val, "0"); 242 243 else if ( ! strcmp(key, "manpath")) { 244 #ifdef COMPAT_OLDURI 245 if ( ! strncmp(val, "OpenBSD ", 8)) { 246 val[7] = '-'; 247 if ('C' == val[8]) 248 val[8] = 'c'; 249 } 250 #endif 251 set_query_attr(&req->q.manpath, &val); 252 } 253 254 else if ( ! (strcmp(key, "sec") 255 #ifdef COMPAT_OLDURI 256 && strcmp(key, "sektion") 257 #endif 258 )) { 259 if ( ! strcmp(val, "0")) 260 *val = '\0'; 261 set_query_attr(&req->q.sec, &val); 262 } 263 264 else if ( ! strcmp(key, "arch")) { 265 if ( ! strcmp(val, "default")) 266 *val = '\0'; 267 set_query_attr(&req->q.arch, &val); 268 } 269 270 /* 271 * The key must be freed in any case. 272 * The val may have been handed over to the query 273 * structure, in which case it is now NULL. 274 */ 275 next: 276 free(key); 277 key = NULL; 278 free(val); 279 val = NULL; 280 281 if (*qs != '\0') 282 qs++; 283 } 284 } 285 286 /* 287 * HTTP-decode a string. The standard explanation is that this turns 288 * "%4e+foo" into "n foo" in the regular way. This is done in-place 289 * over the allocated string. 290 */ 291 static int 292 http_decode(char *p) 293 { 294 char hex[3]; 295 char *q; 296 int c; 297 298 hex[2] = '\0'; 299 300 q = p; 301 for ( ; '\0' != *p; p++, q++) { 302 if ('%' == *p) { 303 if ('\0' == (hex[0] = *(p + 1))) 304 return 0; 305 if ('\0' == (hex[1] = *(p + 2))) 306 return 0; 307 if (1 != sscanf(hex, "%x", &c)) 308 return 0; 309 if ('\0' == c) 310 return 0; 311 312 *q = (char)c; 313 p += 2; 314 } else 315 *q = '+' == *p ? ' ' : *p; 316 } 317 318 *q = '\0'; 319 return 1; 320 } 321 322 static void 323 http_encode(const char *p) 324 { 325 for (; *p != '\0'; p++) { 326 if (isalnum((unsigned char)*p) == 0 && 327 strchr("-._~", *p) == NULL) 328 printf("%%%2.2X", (unsigned char)*p); 329 else 330 putchar(*p); 331 } 332 } 333 334 static void 335 resp_begin_http(int code, const char *msg) 336 { 337 338 if (200 != code) 339 printf("Status: %d %s\r\n", code, msg); 340 341 printf("Content-Type: text/html; charset=utf-8\r\n" 342 "Cache-Control: no-cache\r\n" 343 "Content-Security-Policy: default-src 'none'; " 344 "style-src 'self' 'unsafe-inline'\r\n" 345 "Pragma: no-cache\r\n" 346 "\r\n"); 347 348 fflush(stdout); 349 } 350 351 static void 352 resp_copy(const char *filename) 353 { 354 char buf[4096]; 355 ssize_t sz; 356 int fd; 357 358 if ((fd = open(filename, O_RDONLY)) != -1) { 359 fflush(stdout); 360 while ((sz = read(fd, buf, sizeof(buf))) > 0) 361 write(STDOUT_FILENO, buf, sz); 362 close(fd); 363 } 364 } 365 366 static void 367 resp_begin_html(int code, const char *msg, const char *file) 368 { 369 char *cp; 370 371 resp_begin_http(code, msg); 372 373 printf("<!DOCTYPE html>\n" 374 "<html>\n" 375 "<head>\n" 376 " <meta charset=\"UTF-8\"/>\n" 377 " <meta name=\"viewport\"" 378 " content=\"width=device-width, initial-scale=1.0\">\n" 379 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 380 " type=\"text/css\" media=\"all\">\n" 381 " <title>", 382 CSS_DIR); 383 if (file != NULL) { 384 if ((cp = strrchr(file, '/')) != NULL) 385 file = cp + 1; 386 if ((cp = strrchr(file, '.')) != NULL) { 387 printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1); 388 } else 389 printf("%s - ", file); 390 } 391 printf("%s</title>\n" 392 "</head>\n" 393 "<body>\n", 394 CUSTOMIZE_TITLE); 395 396 resp_copy(MAN_DIR "/header.html"); 397 } 398 399 static void 400 resp_end_html(void) 401 { 402 403 resp_copy(MAN_DIR "/footer.html"); 404 405 puts("</body>\n" 406 "</html>"); 407 } 408 409 static void 410 resp_searchform(const struct req *req, enum focus focus) 411 { 412 int i; 413 414 printf("<form action=\"/%s\" method=\"get\" " 415 "autocomplete=\"off\" autocapitalize=\"none\">\n" 416 " <fieldset>\n" 417 " <legend>Manual Page Search Parameters</legend>\n", 418 scriptname); 419 420 /* Write query input box. */ 421 422 printf(" <input type=\"search\" name=\"query\" value=\""); 423 if (req->q.query != NULL) 424 html_print(req->q.query); 425 printf( "\" size=\"40\""); 426 if (focus == FOCUS_QUERY) 427 printf(" autofocus"); 428 puts(">"); 429 430 /* Write submission buttons. */ 431 432 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">" 433 "man</button>\n" 434 " <button type=\"submit\" name=\"apropos\" value=\"1\">" 435 "apropos</button>\n" 436 " <br/>\n"); 437 438 /* Write section selector. */ 439 440 puts(" <select name=\"sec\">"); 441 for (i = 0; i < sec_MAX; i++) { 442 printf(" <option value=\"%s\"", sec_numbers[i]); 443 if (NULL != req->q.sec && 444 0 == strcmp(sec_numbers[i], req->q.sec)) 445 printf(" selected=\"selected\""); 446 printf(">%s</option>\n", sec_names[i]); 447 } 448 puts(" </select>"); 449 450 /* Write architecture selector. */ 451 452 printf( " <select name=\"arch\">\n" 453 " <option value=\"default\""); 454 if (NULL == req->q.arch) 455 printf(" selected=\"selected\""); 456 puts(">All Architectures</option>"); 457 for (i = 0; i < arch_MAX; i++) { 458 printf(" <option"); 459 if (NULL != req->q.arch && 460 0 == strcmp(arch_names[i], req->q.arch)) 461 printf(" selected=\"selected\""); 462 printf(">%s</option>\n", arch_names[i]); 463 } 464 puts(" </select>"); 465 466 /* Write manpath selector. */ 467 468 if (req->psz > 1) { 469 puts(" <select name=\"manpath\">"); 470 for (i = 0; i < (int)req->psz; i++) { 471 printf(" <option"); 472 if (strcmp(req->q.manpath, req->p[i]) == 0) 473 printf(" selected=\"selected\""); 474 printf(">"); 475 html_print(req->p[i]); 476 puts("</option>"); 477 } 478 puts(" </select>"); 479 } 480 481 puts(" </fieldset>\n" 482 "</form>"); 483 } 484 485 static int 486 validate_urifrag(const char *frag) 487 { 488 489 while ('\0' != *frag) { 490 if ( ! (isalnum((unsigned char)*frag) || 491 '-' == *frag || '.' == *frag || 492 '/' == *frag || '_' == *frag)) 493 return 0; 494 frag++; 495 } 496 return 1; 497 } 498 499 static int 500 validate_manpath(const struct req *req, const char* manpath) 501 { 502 size_t i; 503 504 for (i = 0; i < req->psz; i++) 505 if ( ! strcmp(manpath, req->p[i])) 506 return 1; 507 508 return 0; 509 } 510 511 static int 512 validate_arch(const char *arch) 513 { 514 int i; 515 516 for (i = 0; i < arch_MAX; i++) 517 if (strcmp(arch, arch_names[i]) == 0) 518 return 1; 519 520 return 0; 521 } 522 523 static int 524 validate_filename(const char *file) 525 { 526 527 if ('.' == file[0] && '/' == file[1]) 528 file += 2; 529 530 return ! (strstr(file, "../") || strstr(file, "/..") || 531 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 532 } 533 534 static void 535 pg_index(const struct req *req) 536 { 537 538 resp_begin_html(200, NULL, NULL); 539 resp_searchform(req, FOCUS_QUERY); 540 printf("<p>\n" 541 "This web interface is documented in the\n" 542 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n" 543 "manual, and the\n" 544 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n" 545 "manual explains the query syntax.\n" 546 "</p>\n", 547 scriptname, *scriptname == '\0' ? "" : "/", 548 scriptname, *scriptname == '\0' ? "" : "/"); 549 resp_end_html(); 550 } 551 552 static void 553 pg_noresult(const struct req *req, int code, const char *http_msg, 554 const char *user_msg) 555 { 556 resp_begin_html(code, http_msg, NULL); 557 resp_searchform(req, FOCUS_QUERY); 558 puts("<p>"); 559 puts(user_msg); 560 puts("</p>"); 561 resp_end_html(); 562 } 563 564 static void 565 pg_error_badrequest(const char *msg) 566 { 567 568 resp_begin_html(400, "Bad Request", NULL); 569 puts("<h1>Bad Request</h1>\n" 570 "<p>\n"); 571 puts(msg); 572 printf("Try again from the\n" 573 "<a href=\"/%s\">main page</a>.\n" 574 "</p>", scriptname); 575 resp_end_html(); 576 } 577 578 static void 579 pg_error_internal(void) 580 { 581 resp_begin_html(500, "Internal Server Error", NULL); 582 puts("<p>Internal Server Error</p>"); 583 resp_end_html(); 584 } 585 586 static void 587 pg_redirect(const struct req *req, const char *name) 588 { 589 printf("Status: 303 See Other\r\n" 590 "Location: /"); 591 if (*scriptname != '\0') 592 printf("%s/", scriptname); 593 if (strcmp(req->q.manpath, req->p[0])) 594 printf("%s/", req->q.manpath); 595 if (req->q.arch != NULL) 596 printf("%s/", req->q.arch); 597 http_encode(name); 598 if (req->q.sec != NULL) { 599 putchar('.'); 600 http_encode(req->q.sec); 601 } 602 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n"); 603 } 604 605 static void 606 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 607 { 608 char *arch, *archend; 609 const char *sec; 610 size_t i, iuse; 611 int archprio, archpriouse; 612 int prio, priouse; 613 614 for (i = 0; i < sz; i++) { 615 if (validate_filename(r[i].file)) 616 continue; 617 warnx("invalid filename %s in %s database", 618 r[i].file, req->q.manpath); 619 pg_error_internal(); 620 return; 621 } 622 623 if (req->isquery && sz == 1) { 624 /* 625 * If we have just one result, then jump there now 626 * without any delay. 627 */ 628 printf("Status: 303 See Other\r\n" 629 "Location: /"); 630 if (*scriptname != '\0') 631 printf("%s/", scriptname); 632 if (strcmp(req->q.manpath, req->p[0])) 633 printf("%s/", req->q.manpath); 634 printf("%s\r\n" 635 "Content-Type: text/html; charset=utf-8\r\n\r\n", 636 r[0].file); 637 return; 638 } 639 640 /* 641 * In man(1) mode, show one of the pages 642 * even if more than one is found. 643 */ 644 645 iuse = 0; 646 if (req->q.equal || sz == 1) { 647 priouse = 20; 648 archpriouse = 3; 649 for (i = 0; i < sz; i++) { 650 sec = r[i].file; 651 sec += strcspn(sec, "123456789"); 652 if (sec[0] == '\0') 653 continue; 654 prio = sec_prios[sec[0] - '1']; 655 if (sec[1] != '/') 656 prio += 10; 657 if (req->q.arch == NULL) { 658 archprio = 659 ((arch = strchr(sec + 1, '/')) 660 == NULL) ? 3 : 661 ((archend = strchr(arch + 1, '/')) 662 == NULL) ? 0 : 663 strncmp(arch, "amd64/", 664 archend - arch) ? 2 : 1; 665 if (archprio < archpriouse) { 666 archpriouse = archprio; 667 priouse = prio; 668 iuse = i; 669 continue; 670 } 671 if (archprio > archpriouse) 672 continue; 673 } 674 if (prio >= priouse) 675 continue; 676 priouse = prio; 677 iuse = i; 678 } 679 resp_begin_html(200, NULL, r[iuse].file); 680 } else 681 resp_begin_html(200, NULL, NULL); 682 683 resp_searchform(req, 684 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 685 686 if (sz > 1) { 687 puts("<table class=\"results\">"); 688 for (i = 0; i < sz; i++) { 689 printf(" <tr>\n" 690 " <td>" 691 "<a class=\"Xr\" href=\"/"); 692 if (*scriptname != '\0') 693 printf("%s/", scriptname); 694 if (strcmp(req->q.manpath, req->p[0])) 695 printf("%s/", req->q.manpath); 696 printf("%s\">", r[i].file); 697 html_print(r[i].names); 698 printf("</a></td>\n" 699 " <td><span class=\"Nd\">"); 700 html_print(r[i].output); 701 puts("</span></td>\n" 702 " </tr>"); 703 } 704 puts("</table>"); 705 } 706 707 if (req->q.equal || sz == 1) { 708 puts("<hr>"); 709 resp_show(req, r[iuse].file); 710 } 711 712 resp_end_html(); 713 } 714 715 static void 716 resp_catman(const struct req *req, const char *file) 717 { 718 FILE *f; 719 char *p; 720 size_t sz; 721 ssize_t len; 722 int i; 723 int italic, bold; 724 725 if ((f = fopen(file, "r")) == NULL) { 726 puts("<p>You specified an invalid manual file.</p>"); 727 return; 728 } 729 730 puts("<div class=\"catman\">\n" 731 "<pre>"); 732 733 p = NULL; 734 sz = 0; 735 736 while ((len = getline(&p, &sz, f)) != -1) { 737 bold = italic = 0; 738 for (i = 0; i < len - 1; i++) { 739 /* 740 * This means that the catpage is out of state. 741 * Ignore it and keep going (although the 742 * catpage is bogus). 743 */ 744 745 if ('\b' == p[i] || '\n' == p[i]) 746 continue; 747 748 /* 749 * Print a regular character. 750 * Close out any bold/italic scopes. 751 * If we're in back-space mode, make sure we'll 752 * have something to enter when we backspace. 753 */ 754 755 if ('\b' != p[i + 1]) { 756 if (italic) 757 printf("</i>"); 758 if (bold) 759 printf("</b>"); 760 italic = bold = 0; 761 html_putchar(p[i]); 762 continue; 763 } else if (i + 2 >= len) 764 continue; 765 766 /* Italic mode. */ 767 768 if ('_' == p[i]) { 769 if (bold) 770 printf("</b>"); 771 if ( ! italic) 772 printf("<i>"); 773 bold = 0; 774 italic = 1; 775 i += 2; 776 html_putchar(p[i]); 777 continue; 778 } 779 780 /* 781 * Handle funny behaviour troff-isms. 782 * These grok'd from the original man2html.c. 783 */ 784 785 if (('+' == p[i] && 'o' == p[i + 2]) || 786 ('o' == p[i] && '+' == p[i + 2]) || 787 ('|' == p[i] && '=' == p[i + 2]) || 788 ('=' == p[i] && '|' == p[i + 2]) || 789 ('*' == p[i] && '=' == p[i + 2]) || 790 ('=' == p[i] && '*' == p[i + 2]) || 791 ('*' == p[i] && '|' == p[i + 2]) || 792 ('|' == p[i] && '*' == p[i + 2])) { 793 if (italic) 794 printf("</i>"); 795 if (bold) 796 printf("</b>"); 797 italic = bold = 0; 798 putchar('*'); 799 i += 2; 800 continue; 801 } else if (('|' == p[i] && '-' == p[i + 2]) || 802 ('-' == p[i] && '|' == p[i + 1]) || 803 ('+' == p[i] && '-' == p[i + 1]) || 804 ('-' == p[i] && '+' == p[i + 1]) || 805 ('+' == p[i] && '|' == p[i + 1]) || 806 ('|' == p[i] && '+' == p[i + 1])) { 807 if (italic) 808 printf("</i>"); 809 if (bold) 810 printf("</b>"); 811 italic = bold = 0; 812 putchar('+'); 813 i += 2; 814 continue; 815 } 816 817 /* Bold mode. */ 818 819 if (italic) 820 printf("</i>"); 821 if ( ! bold) 822 printf("<b>"); 823 bold = 1; 824 italic = 0; 825 i += 2; 826 html_putchar(p[i]); 827 } 828 829 /* 830 * Clean up the last character. 831 * We can get to a newline; don't print that. 832 */ 833 834 if (italic) 835 printf("</i>"); 836 if (bold) 837 printf("</b>"); 838 839 if (i == len - 1 && p[i] != '\n') 840 html_putchar(p[i]); 841 842 putchar('\n'); 843 } 844 free(p); 845 846 puts("</pre>\n" 847 "</div>"); 848 849 fclose(f); 850 } 851 852 static void 853 resp_format(const struct req *req, const char *file) 854 { 855 struct manoutput conf; 856 struct mparse *mp; 857 struct roff_meta *meta; 858 void *vp; 859 int fd; 860 int usepath; 861 862 if (-1 == (fd = open(file, O_RDONLY, 0))) { 863 puts("<p>You specified an invalid manual file.</p>"); 864 return; 865 } 866 867 mchars_alloc(); 868 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 | 869 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath); 870 mparse_readfd(mp, fd, file); 871 close(fd); 872 meta = mparse_result(mp); 873 874 memset(&conf, 0, sizeof(conf)); 875 conf.fragment = 1; 876 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css"); 877 usepath = strcmp(req->q.manpath, req->p[0]); 878 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S", 879 scriptname, *scriptname == '\0' ? "" : "/", 880 usepath ? req->q.manpath : "", usepath ? "/" : ""); 881 882 vp = html_alloc(&conf); 883 if (meta->macroset == MACROSET_MDOC) 884 html_mdoc(vp, meta); 885 else 886 html_man(vp, meta); 887 888 html_free(vp); 889 mparse_free(mp); 890 mchars_free(); 891 free(conf.man); 892 free(conf.style); 893 } 894 895 static void 896 resp_show(const struct req *req, const char *file) 897 { 898 899 if ('.' == file[0] && '/' == file[1]) 900 file += 2; 901 902 if ('c' == *file) 903 resp_catman(req, file); 904 else 905 resp_format(req, file); 906 } 907 908 static void 909 pg_show(struct req *req, const char *fullpath) 910 { 911 char *manpath; 912 const char *file; 913 914 if ((file = strchr(fullpath, '/')) == NULL) { 915 pg_error_badrequest( 916 "You did not specify a page to show."); 917 return; 918 } 919 manpath = mandoc_strndup(fullpath, file - fullpath); 920 file++; 921 922 if ( ! validate_manpath(req, manpath)) { 923 pg_error_badrequest( 924 "You specified an invalid manpath."); 925 free(manpath); 926 return; 927 } 928 929 /* 930 * Begin by chdir()ing into the manpath. 931 * This way we can pick up the database files, which are 932 * relative to the manpath root. 933 */ 934 935 if (chdir(manpath) == -1) { 936 warn("chdir %s", manpath); 937 pg_error_internal(); 938 free(manpath); 939 return; 940 } 941 free(manpath); 942 943 if ( ! validate_filename(file)) { 944 pg_error_badrequest( 945 "You specified an invalid manual file."); 946 return; 947 } 948 949 resp_begin_html(200, NULL, file); 950 resp_searchform(req, FOCUS_NONE); 951 resp_show(req, file); 952 resp_end_html(); 953 } 954 955 static void 956 pg_search(const struct req *req) 957 { 958 struct mansearch search; 959 struct manpaths paths; 960 struct manpage *res; 961 char **argv; 962 char *query, *rp, *wp; 963 size_t ressz; 964 int argc; 965 966 /* 967 * Begin by chdir()ing into the root of the manpath. 968 * This way we can pick up the database files, which are 969 * relative to the manpath root. 970 */ 971 972 if (chdir(req->q.manpath) == -1) { 973 warn("chdir %s", req->q.manpath); 974 pg_error_internal(); 975 return; 976 } 977 978 search.arch = req->q.arch; 979 search.sec = req->q.sec; 980 search.outkey = "Nd"; 981 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 982 search.firstmatch = 1; 983 984 paths.sz = 1; 985 paths.paths = mandoc_malloc(sizeof(char *)); 986 paths.paths[0] = mandoc_strdup("."); 987 988 /* 989 * Break apart at spaces with backslash-escaping. 990 */ 991 992 argc = 0; 993 argv = NULL; 994 rp = query = mandoc_strdup(req->q.query); 995 for (;;) { 996 while (isspace((unsigned char)*rp)) 997 rp++; 998 if (*rp == '\0') 999 break; 1000 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 1001 argv[argc++] = wp = rp; 1002 for (;;) { 1003 if (isspace((unsigned char)*rp)) { 1004 *wp = '\0'; 1005 rp++; 1006 break; 1007 } 1008 if (rp[0] == '\\' && rp[1] != '\0') 1009 rp++; 1010 if (wp != rp) 1011 *wp = *rp; 1012 if (*rp == '\0') 1013 break; 1014 wp++; 1015 rp++; 1016 } 1017 } 1018 1019 res = NULL; 1020 ressz = 0; 1021 if (req->isquery && req->q.equal && argc == 1) 1022 pg_redirect(req, argv[0]); 1023 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0) 1024 pg_noresult(req, 400, "Bad Request", 1025 "You entered an invalid query."); 1026 else if (ressz == 0) 1027 pg_noresult(req, 404, "Not Found", "No results found."); 1028 else 1029 pg_searchres(req, res, ressz); 1030 1031 free(query); 1032 mansearch_free(res, ressz); 1033 free(paths.paths[0]); 1034 free(paths.paths); 1035 } 1036 1037 int 1038 main(void) 1039 { 1040 struct req req; 1041 struct itimerval itimer; 1042 const char *path; 1043 const char *querystring; 1044 int i; 1045 1046 /* 1047 * The "rpath" pledge could be revoked after mparse_readfd() 1048 * if the file desciptor to "/footer.html" would be opened 1049 * up front, but it's probably not worth the complication 1050 * of the code it would cause: it would require scattering 1051 * pledge() calls in multiple low-level resp_*() functions. 1052 */ 1053 1054 if (pledge("stdio rpath", NULL) == -1) { 1055 warn("pledge"); 1056 pg_error_internal(); 1057 return EXIT_FAILURE; 1058 } 1059 1060 /* Poor man's ReDoS mitigation. */ 1061 1062 itimer.it_value.tv_sec = 2; 1063 itimer.it_value.tv_usec = 0; 1064 itimer.it_interval.tv_sec = 2; 1065 itimer.it_interval.tv_usec = 0; 1066 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 1067 warn("setitimer"); 1068 pg_error_internal(); 1069 return EXIT_FAILURE; 1070 } 1071 1072 /* 1073 * First we change directory into the MAN_DIR so that 1074 * subsequent scanning for manpath directories is rooted 1075 * relative to the same position. 1076 */ 1077 1078 if (chdir(MAN_DIR) == -1) { 1079 warn("MAN_DIR: %s", MAN_DIR); 1080 pg_error_internal(); 1081 return EXIT_FAILURE; 1082 } 1083 1084 memset(&req, 0, sizeof(struct req)); 1085 req.q.equal = 1; 1086 parse_manpath_conf(&req); 1087 1088 /* Parse the path info and the query string. */ 1089 1090 if ((path = getenv("PATH_INFO")) == NULL) 1091 path = ""; 1092 else if (*path == '/') 1093 path++; 1094 1095 if (*path != '\0') { 1096 parse_path_info(&req, path); 1097 if (req.q.manpath == NULL || req.q.sec == NULL || 1098 *req.q.query == '\0' || access(path, F_OK) == -1) 1099 path = ""; 1100 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1101 parse_query_string(&req, querystring); 1102 1103 /* Validate parsed data and add defaults. */ 1104 1105 if (req.q.manpath == NULL) 1106 req.q.manpath = mandoc_strdup(req.p[0]); 1107 else if ( ! validate_manpath(&req, req.q.manpath)) { 1108 pg_error_badrequest( 1109 "You specified an invalid manpath."); 1110 return EXIT_FAILURE; 1111 } 1112 1113 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) { 1114 pg_error_badrequest( 1115 "You specified an invalid architecture."); 1116 return EXIT_FAILURE; 1117 } 1118 1119 /* Dispatch to the three different pages. */ 1120 1121 if ('\0' != *path) 1122 pg_show(&req, path); 1123 else if (NULL != req.q.query) 1124 pg_search(&req); 1125 else 1126 pg_index(&req); 1127 1128 free(req.q.manpath); 1129 free(req.q.arch); 1130 free(req.q.sec); 1131 free(req.q.query); 1132 for (i = 0; i < (int)req.psz; i++) 1133 free(req.p[i]); 1134 free(req.p); 1135 return EXIT_SUCCESS; 1136 } 1137 1138 /* 1139 * Translate PATH_INFO to a query. 1140 */ 1141 static void 1142 parse_path_info(struct req *req, const char *path) 1143 { 1144 const char *name, *sec, *end; 1145 1146 req->isquery = 0; 1147 req->q.equal = 1; 1148 req->q.manpath = NULL; 1149 req->q.arch = NULL; 1150 1151 /* Mandatory manual page name. */ 1152 if ((name = strrchr(path, '/')) == NULL) 1153 name = path; 1154 else 1155 name++; 1156 1157 /* Optional trailing section. */ 1158 sec = strrchr(name, '.'); 1159 if (sec != NULL && isdigit((unsigned char)*++sec)) { 1160 req->q.query = mandoc_strndup(name, sec - name - 1); 1161 req->q.sec = mandoc_strdup(sec); 1162 } else { 1163 req->q.query = mandoc_strdup(name); 1164 req->q.sec = NULL; 1165 } 1166 1167 /* Handle the case of name[.section] only. */ 1168 if (name == path) 1169 return; 1170 1171 /* Optional manpath. */ 1172 end = strchr(path, '/'); 1173 req->q.manpath = mandoc_strndup(path, end - path); 1174 if (validate_manpath(req, req->q.manpath)) { 1175 path = end + 1; 1176 if (name == path) 1177 return; 1178 } else { 1179 free(req->q.manpath); 1180 req->q.manpath = NULL; 1181 } 1182 1183 /* Optional section. */ 1184 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) { 1185 path += 3; 1186 end = strchr(path, '/'); 1187 free(req->q.sec); 1188 req->q.sec = mandoc_strndup(path, end - path); 1189 path = end + 1; 1190 if (name == path) 1191 return; 1192 } 1193 1194 /* Optional architecture. */ 1195 end = strchr(path, '/'); 1196 if (end + 1 != name) { 1197 pg_error_badrequest( 1198 "You specified too many directory components."); 1199 exit(EXIT_FAILURE); 1200 } 1201 req->q.arch = mandoc_strndup(path, end - path); 1202 if (validate_arch(req->q.arch) == 0) { 1203 pg_error_badrequest( 1204 "You specified an invalid directory component."); 1205 exit(EXIT_FAILURE); 1206 } 1207 } 1208 1209 /* 1210 * Scan for indexable paths. 1211 */ 1212 static void 1213 parse_manpath_conf(struct req *req) 1214 { 1215 FILE *fp; 1216 char *dp; 1217 size_t dpsz; 1218 ssize_t len; 1219 1220 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1221 warn("%s/manpath.conf", MAN_DIR); 1222 pg_error_internal(); 1223 exit(EXIT_FAILURE); 1224 } 1225 1226 dp = NULL; 1227 dpsz = 0; 1228 1229 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1230 if (dp[len - 1] == '\n') 1231 dp[--len] = '\0'; 1232 req->p = mandoc_realloc(req->p, 1233 (req->psz + 1) * sizeof(char *)); 1234 if ( ! validate_urifrag(dp)) { 1235 warnx("%s/manpath.conf contains " 1236 "unsafe path \"%s\"", MAN_DIR, dp); 1237 pg_error_internal(); 1238 exit(EXIT_FAILURE); 1239 } 1240 if (strchr(dp, '/') != NULL) { 1241 warnx("%s/manpath.conf contains " 1242 "path with slash \"%s\"", MAN_DIR, dp); 1243 pg_error_internal(); 1244 exit(EXIT_FAILURE); 1245 } 1246 req->p[req->psz++] = dp; 1247 dp = NULL; 1248 dpsz = 0; 1249 } 1250 free(dp); 1251 1252 if (req->p == NULL) { 1253 warnx("%s/manpath.conf is empty", MAN_DIR); 1254 pg_error_internal(); 1255 exit(EXIT_FAILURE); 1256 } 1257 } 1258