1 /* $OpenBSD: cgi.c,v 1.109 2020/01/10 15:20:49 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014-2019 Ingo Schwarze <schwarze@usta.de> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 #include <sys/time.h> 20 21 #include <ctype.h> 22 #include <err.h> 23 #include <errno.h> 24 #include <fcntl.h> 25 #include <limits.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <unistd.h> 31 32 #include "mandoc_aux.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "mdoc.h" 36 #include "man.h" 37 #include "mandoc_parse.h" 38 #include "main.h" 39 #include "manconf.h" 40 #include "mansearch.h" 41 #include "cgi.h" 42 43 /* 44 * A query as passed to the search function. 45 */ 46 struct query { 47 char *manpath; /* desired manual directory */ 48 char *arch; /* architecture */ 49 char *sec; /* manual section */ 50 char *query; /* unparsed query expression */ 51 int equal; /* match whole names, not substrings */ 52 }; 53 54 struct req { 55 struct query q; 56 char **p; /* array of available manpaths */ 57 size_t psz; /* number of available manpaths */ 58 int isquery; /* QUERY_STRING used, not PATH_INFO */ 59 }; 60 61 enum focus { 62 FOCUS_NONE = 0, 63 FOCUS_QUERY 64 }; 65 66 static void html_print(const char *); 67 static void html_putchar(char); 68 static int http_decode(char *); 69 static void http_encode(const char *p); 70 static void parse_manpath_conf(struct req *); 71 static void parse_path_info(struct req *req, const char *path); 72 static void parse_query_string(struct req *, const char *); 73 static void pg_error_badrequest(const char *); 74 static void pg_error_internal(void); 75 static void pg_index(const struct req *); 76 static void pg_noresult(const struct req *, int, const char *, 77 const char *); 78 static void pg_redirect(const struct req *, const char *); 79 static void pg_search(const struct req *); 80 static void pg_searchres(const struct req *, 81 struct manpage *, size_t); 82 static void pg_show(struct req *, const char *); 83 static void resp_begin_html(int, const char *, const char *); 84 static void resp_begin_http(int, const char *); 85 static void resp_catman(const struct req *, const char *); 86 static void resp_copy(const char *); 87 static void resp_end_html(void); 88 static void resp_format(const struct req *, const char *); 89 static void resp_searchform(const struct req *, enum focus); 90 static void resp_show(const struct req *, const char *); 91 static void set_query_attr(char **, char **); 92 static int validate_arch(const char *); 93 static int validate_filename(const char *); 94 static int validate_manpath(const struct req *, const char *); 95 static int validate_urifrag(const char *); 96 97 static const char *scriptname = SCRIPT_NAME; 98 99 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 100 static const char *const sec_numbers[] = { 101 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 102 }; 103 static const char *const sec_names[] = { 104 "All Sections", 105 "1 - General Commands", 106 "2 - System Calls", 107 "3 - Library Functions", 108 "3p - Perl Library", 109 "4 - Device Drivers", 110 "5 - File Formats", 111 "6 - Games", 112 "7 - Miscellaneous Information", 113 "8 - System Manager\'s Manual", 114 "9 - Kernel Developer\'s Manual" 115 }; 116 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 117 118 static const char *const arch_names[] = { 119 "amd64", "alpha", "armv7", "arm64", 120 "hppa", "i386", "landisk", 121 "loongson", "luna88k", "macppc", "mips64", 122 "octeon", "sgi", "socppc", "sparc64", 123 "amiga", "arc", "armish", "arm32", 124 "atari", "aviion", "beagle", "cats", 125 "hppa64", "hp300", 126 "ia64", "mac68k", "mvme68k", "mvme88k", 127 "mvmeppc", "palm", "pc532", "pegasos", 128 "pmax", "powerpc", "solbourne", "sparc", 129 "sun3", "vax", "wgrisc", "x68k", 130 "zaurus" 131 }; 132 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 133 134 /* 135 * Print a character, escaping HTML along the way. 136 * This will pass non-ASCII straight to output: be warned! 137 */ 138 static void 139 html_putchar(char c) 140 { 141 142 switch (c) { 143 case '"': 144 printf("""); 145 break; 146 case '&': 147 printf("&"); 148 break; 149 case '>': 150 printf(">"); 151 break; 152 case '<': 153 printf("<"); 154 break; 155 default: 156 putchar((unsigned char)c); 157 break; 158 } 159 } 160 161 /* 162 * Call through to html_putchar(). 163 * Accepts NULL strings. 164 */ 165 static void 166 html_print(const char *p) 167 { 168 169 if (NULL == p) 170 return; 171 while ('\0' != *p) 172 html_putchar(*p++); 173 } 174 175 /* 176 * Transfer the responsibility for the allocated string *val 177 * to the query structure. 178 */ 179 static void 180 set_query_attr(char **attr, char **val) 181 { 182 183 free(*attr); 184 if (**val == '\0') { 185 *attr = NULL; 186 free(*val); 187 } else 188 *attr = *val; 189 *val = NULL; 190 } 191 192 /* 193 * Parse the QUERY_STRING for key-value pairs 194 * and store the values into the query structure. 195 */ 196 static void 197 parse_query_string(struct req *req, const char *qs) 198 { 199 char *key, *val; 200 size_t keysz, valsz; 201 202 req->isquery = 1; 203 req->q.manpath = NULL; 204 req->q.arch = NULL; 205 req->q.sec = NULL; 206 req->q.query = NULL; 207 req->q.equal = 1; 208 209 key = val = NULL; 210 while (*qs != '\0') { 211 212 /* Parse one key. */ 213 214 keysz = strcspn(qs, "=;&"); 215 key = mandoc_strndup(qs, keysz); 216 qs += keysz; 217 if (*qs != '=') 218 goto next; 219 220 /* Parse one value. */ 221 222 valsz = strcspn(++qs, ";&"); 223 val = mandoc_strndup(qs, valsz); 224 qs += valsz; 225 226 /* Decode and catch encoding errors. */ 227 228 if ( ! (http_decode(key) && http_decode(val))) 229 goto next; 230 231 /* Handle key-value pairs. */ 232 233 if ( ! strcmp(key, "query")) 234 set_query_attr(&req->q.query, &val); 235 236 else if ( ! strcmp(key, "apropos")) 237 req->q.equal = !strcmp(val, "0"); 238 239 else if ( ! strcmp(key, "manpath")) { 240 #ifdef COMPAT_OLDURI 241 if ( ! strncmp(val, "OpenBSD ", 8)) { 242 val[7] = '-'; 243 if ('C' == val[8]) 244 val[8] = 'c'; 245 } 246 #endif 247 set_query_attr(&req->q.manpath, &val); 248 } 249 250 else if ( ! (strcmp(key, "sec") 251 #ifdef COMPAT_OLDURI 252 && strcmp(key, "sektion") 253 #endif 254 )) { 255 if ( ! strcmp(val, "0")) 256 *val = '\0'; 257 set_query_attr(&req->q.sec, &val); 258 } 259 260 else if ( ! strcmp(key, "arch")) { 261 if ( ! strcmp(val, "default")) 262 *val = '\0'; 263 set_query_attr(&req->q.arch, &val); 264 } 265 266 /* 267 * The key must be freed in any case. 268 * The val may have been handed over to the query 269 * structure, in which case it is now NULL. 270 */ 271 next: 272 free(key); 273 key = NULL; 274 free(val); 275 val = NULL; 276 277 if (*qs != '\0') 278 qs++; 279 } 280 } 281 282 /* 283 * HTTP-decode a string. The standard explanation is that this turns 284 * "%4e+foo" into "n foo" in the regular way. This is done in-place 285 * over the allocated string. 286 */ 287 static int 288 http_decode(char *p) 289 { 290 char hex[3]; 291 char *q; 292 int c; 293 294 hex[2] = '\0'; 295 296 q = p; 297 for ( ; '\0' != *p; p++, q++) { 298 if ('%' == *p) { 299 if ('\0' == (hex[0] = *(p + 1))) 300 return 0; 301 if ('\0' == (hex[1] = *(p + 2))) 302 return 0; 303 if (1 != sscanf(hex, "%x", &c)) 304 return 0; 305 if ('\0' == c) 306 return 0; 307 308 *q = (char)c; 309 p += 2; 310 } else 311 *q = '+' == *p ? ' ' : *p; 312 } 313 314 *q = '\0'; 315 return 1; 316 } 317 318 static void 319 http_encode(const char *p) 320 { 321 for (; *p != '\0'; p++) { 322 if (isalnum((unsigned char)*p) == 0 && 323 strchr("-._~", *p) == NULL) 324 printf("%%%2.2X", (unsigned char)*p); 325 else 326 putchar(*p); 327 } 328 } 329 330 static void 331 resp_begin_http(int code, const char *msg) 332 { 333 334 if (200 != code) 335 printf("Status: %d %s\r\n", code, msg); 336 337 printf("Content-Type: text/html; charset=utf-8\r\n" 338 "Cache-Control: no-cache\r\n" 339 "Content-Security-Policy: default-src 'none'; " 340 "style-src 'self' 'unsafe-inline'\r\n" 341 "Pragma: no-cache\r\n" 342 "\r\n"); 343 344 fflush(stdout); 345 } 346 347 static void 348 resp_copy(const char *filename) 349 { 350 char buf[4096]; 351 ssize_t sz; 352 int fd; 353 354 if ((fd = open(filename, O_RDONLY)) != -1) { 355 fflush(stdout); 356 while ((sz = read(fd, buf, sizeof(buf))) > 0) 357 write(STDOUT_FILENO, buf, sz); 358 close(fd); 359 } 360 } 361 362 static void 363 resp_begin_html(int code, const char *msg, const char *file) 364 { 365 char *cp; 366 367 resp_begin_http(code, msg); 368 369 printf("<!DOCTYPE html>\n" 370 "<html>\n" 371 "<head>\n" 372 " <meta charset=\"UTF-8\"/>\n" 373 " <meta name=\"viewport\"" 374 " content=\"width=device-width, initial-scale=1.0\">\n" 375 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 376 " type=\"text/css\" media=\"all\">\n" 377 " <title>", 378 CSS_DIR); 379 if (file != NULL) { 380 if ((cp = strrchr(file, '/')) != NULL) 381 file = cp + 1; 382 if ((cp = strrchr(file, '.')) != NULL) { 383 printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1); 384 } else 385 printf("%s - ", file); 386 } 387 printf("%s</title>\n" 388 "</head>\n" 389 "<body>\n", 390 CUSTOMIZE_TITLE); 391 392 resp_copy(MAN_DIR "/header.html"); 393 } 394 395 static void 396 resp_end_html(void) 397 { 398 399 resp_copy(MAN_DIR "/footer.html"); 400 401 puts("</body>\n" 402 "</html>"); 403 } 404 405 static void 406 resp_searchform(const struct req *req, enum focus focus) 407 { 408 int i; 409 410 printf("<form action=\"/%s\" method=\"get\" " 411 "autocomplete=\"off\" autocapitalize=\"none\">\n" 412 " <fieldset>\n" 413 " <legend>Manual Page Search Parameters</legend>\n", 414 scriptname); 415 416 /* Write query input box. */ 417 418 printf(" <input type=\"search\" name=\"query\" value=\""); 419 if (req->q.query != NULL) 420 html_print(req->q.query); 421 printf( "\" size=\"40\""); 422 if (focus == FOCUS_QUERY) 423 printf(" autofocus"); 424 puts(">"); 425 426 /* Write submission buttons. */ 427 428 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">" 429 "man</button>\n" 430 " <button type=\"submit\" name=\"apropos\" value=\"1\">" 431 "apropos</button>\n" 432 " <br/>\n"); 433 434 /* Write section selector. */ 435 436 puts(" <select name=\"sec\">"); 437 for (i = 0; i < sec_MAX; i++) { 438 printf(" <option value=\"%s\"", sec_numbers[i]); 439 if (NULL != req->q.sec && 440 0 == strcmp(sec_numbers[i], req->q.sec)) 441 printf(" selected=\"selected\""); 442 printf(">%s</option>\n", sec_names[i]); 443 } 444 puts(" </select>"); 445 446 /* Write architecture selector. */ 447 448 printf( " <select name=\"arch\">\n" 449 " <option value=\"default\""); 450 if (NULL == req->q.arch) 451 printf(" selected=\"selected\""); 452 puts(">All Architectures</option>"); 453 for (i = 0; i < arch_MAX; i++) { 454 printf(" <option"); 455 if (NULL != req->q.arch && 456 0 == strcmp(arch_names[i], req->q.arch)) 457 printf(" selected=\"selected\""); 458 printf(">%s</option>\n", arch_names[i]); 459 } 460 puts(" </select>"); 461 462 /* Write manpath selector. */ 463 464 if (req->psz > 1) { 465 puts(" <select name=\"manpath\">"); 466 for (i = 0; i < (int)req->psz; i++) { 467 printf(" <option"); 468 if (strcmp(req->q.manpath, req->p[i]) == 0) 469 printf(" selected=\"selected\""); 470 printf(">"); 471 html_print(req->p[i]); 472 puts("</option>"); 473 } 474 puts(" </select>"); 475 } 476 477 puts(" </fieldset>\n" 478 "</form>"); 479 } 480 481 static int 482 validate_urifrag(const char *frag) 483 { 484 485 while ('\0' != *frag) { 486 if ( ! (isalnum((unsigned char)*frag) || 487 '-' == *frag || '.' == *frag || 488 '/' == *frag || '_' == *frag)) 489 return 0; 490 frag++; 491 } 492 return 1; 493 } 494 495 static int 496 validate_manpath(const struct req *req, const char* manpath) 497 { 498 size_t i; 499 500 for (i = 0; i < req->psz; i++) 501 if ( ! strcmp(manpath, req->p[i])) 502 return 1; 503 504 return 0; 505 } 506 507 static int 508 validate_arch(const char *arch) 509 { 510 int i; 511 512 for (i = 0; i < arch_MAX; i++) 513 if (strcmp(arch, arch_names[i]) == 0) 514 return 1; 515 516 return 0; 517 } 518 519 static int 520 validate_filename(const char *file) 521 { 522 523 if ('.' == file[0] && '/' == file[1]) 524 file += 2; 525 526 return ! (strstr(file, "../") || strstr(file, "/..") || 527 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 528 } 529 530 static void 531 pg_index(const struct req *req) 532 { 533 534 resp_begin_html(200, NULL, NULL); 535 resp_searchform(req, FOCUS_QUERY); 536 printf("<p>\n" 537 "This web interface is documented in the\n" 538 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n" 539 "manual, and the\n" 540 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n" 541 "manual explains the query syntax.\n" 542 "</p>\n", 543 scriptname, *scriptname == '\0' ? "" : "/", 544 scriptname, *scriptname == '\0' ? "" : "/"); 545 resp_end_html(); 546 } 547 548 static void 549 pg_noresult(const struct req *req, int code, const char *http_msg, 550 const char *user_msg) 551 { 552 resp_begin_html(code, http_msg, NULL); 553 resp_searchform(req, FOCUS_QUERY); 554 puts("<p>"); 555 puts(user_msg); 556 puts("</p>"); 557 resp_end_html(); 558 } 559 560 static void 561 pg_error_badrequest(const char *msg) 562 { 563 564 resp_begin_html(400, "Bad Request", NULL); 565 puts("<h1>Bad Request</h1>\n" 566 "<p>\n"); 567 puts(msg); 568 printf("Try again from the\n" 569 "<a href=\"/%s\">main page</a>.\n" 570 "</p>", scriptname); 571 resp_end_html(); 572 } 573 574 static void 575 pg_error_internal(void) 576 { 577 resp_begin_html(500, "Internal Server Error", NULL); 578 puts("<p>Internal Server Error</p>"); 579 resp_end_html(); 580 } 581 582 static void 583 pg_redirect(const struct req *req, const char *name) 584 { 585 printf("Status: 303 See Other\r\n" 586 "Location: /"); 587 if (*scriptname != '\0') 588 printf("%s/", scriptname); 589 if (strcmp(req->q.manpath, req->p[0])) 590 printf("%s/", req->q.manpath); 591 if (req->q.arch != NULL) 592 printf("%s/", req->q.arch); 593 http_encode(name); 594 if (req->q.sec != NULL) { 595 putchar('.'); 596 http_encode(req->q.sec); 597 } 598 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n"); 599 } 600 601 static void 602 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 603 { 604 char *arch, *archend; 605 const char *sec; 606 size_t i, iuse; 607 int archprio, archpriouse; 608 int prio, priouse; 609 610 for (i = 0; i < sz; i++) { 611 if (validate_filename(r[i].file)) 612 continue; 613 warnx("invalid filename %s in %s database", 614 r[i].file, req->q.manpath); 615 pg_error_internal(); 616 return; 617 } 618 619 if (req->isquery && sz == 1) { 620 /* 621 * If we have just one result, then jump there now 622 * without any delay. 623 */ 624 printf("Status: 303 See Other\r\n" 625 "Location: /"); 626 if (*scriptname != '\0') 627 printf("%s/", scriptname); 628 if (strcmp(req->q.manpath, req->p[0])) 629 printf("%s/", req->q.manpath); 630 printf("%s\r\n" 631 "Content-Type: text/html; charset=utf-8\r\n\r\n", 632 r[0].file); 633 return; 634 } 635 636 /* 637 * In man(1) mode, show one of the pages 638 * even if more than one is found. 639 */ 640 641 iuse = 0; 642 if (req->q.equal || sz == 1) { 643 priouse = 20; 644 archpriouse = 3; 645 for (i = 0; i < sz; i++) { 646 sec = r[i].file; 647 sec += strcspn(sec, "123456789"); 648 if (sec[0] == '\0') 649 continue; 650 prio = sec_prios[sec[0] - '1']; 651 if (sec[1] != '/') 652 prio += 10; 653 if (req->q.arch == NULL) { 654 archprio = 655 ((arch = strchr(sec + 1, '/')) 656 == NULL) ? 3 : 657 ((archend = strchr(arch + 1, '/')) 658 == NULL) ? 0 : 659 strncmp(arch, "amd64/", 660 archend - arch) ? 2 : 1; 661 if (archprio < archpriouse) { 662 archpriouse = archprio; 663 priouse = prio; 664 iuse = i; 665 continue; 666 } 667 if (archprio > archpriouse) 668 continue; 669 } 670 if (prio >= priouse) 671 continue; 672 priouse = prio; 673 iuse = i; 674 } 675 resp_begin_html(200, NULL, r[iuse].file); 676 } else 677 resp_begin_html(200, NULL, NULL); 678 679 resp_searchform(req, 680 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 681 682 if (sz > 1) { 683 puts("<table class=\"results\">"); 684 for (i = 0; i < sz; i++) { 685 printf(" <tr>\n" 686 " <td>" 687 "<a class=\"Xr\" href=\"/"); 688 if (*scriptname != '\0') 689 printf("%s/", scriptname); 690 if (strcmp(req->q.manpath, req->p[0])) 691 printf("%s/", req->q.manpath); 692 printf("%s\">", r[i].file); 693 html_print(r[i].names); 694 printf("</a></td>\n" 695 " <td><span class=\"Nd\">"); 696 html_print(r[i].output); 697 puts("</span></td>\n" 698 " </tr>"); 699 } 700 puts("</table>"); 701 } 702 703 if (req->q.equal || sz == 1) { 704 puts("<hr>"); 705 resp_show(req, r[iuse].file); 706 } 707 708 resp_end_html(); 709 } 710 711 static void 712 resp_catman(const struct req *req, const char *file) 713 { 714 FILE *f; 715 char *p; 716 size_t sz; 717 ssize_t len; 718 int i; 719 int italic, bold; 720 721 if ((f = fopen(file, "r")) == NULL) { 722 puts("<p>You specified an invalid manual file.</p>"); 723 return; 724 } 725 726 puts("<div class=\"catman\">\n" 727 "<pre>"); 728 729 p = NULL; 730 sz = 0; 731 732 while ((len = getline(&p, &sz, f)) != -1) { 733 bold = italic = 0; 734 for (i = 0; i < len - 1; i++) { 735 /* 736 * This means that the catpage is out of state. 737 * Ignore it and keep going (although the 738 * catpage is bogus). 739 */ 740 741 if ('\b' == p[i] || '\n' == p[i]) 742 continue; 743 744 /* 745 * Print a regular character. 746 * Close out any bold/italic scopes. 747 * If we're in back-space mode, make sure we'll 748 * have something to enter when we backspace. 749 */ 750 751 if ('\b' != p[i + 1]) { 752 if (italic) 753 printf("</i>"); 754 if (bold) 755 printf("</b>"); 756 italic = bold = 0; 757 html_putchar(p[i]); 758 continue; 759 } else if (i + 2 >= len) 760 continue; 761 762 /* Italic mode. */ 763 764 if ('_' == p[i]) { 765 if (bold) 766 printf("</b>"); 767 if ( ! italic) 768 printf("<i>"); 769 bold = 0; 770 italic = 1; 771 i += 2; 772 html_putchar(p[i]); 773 continue; 774 } 775 776 /* 777 * Handle funny behaviour troff-isms. 778 * These grok'd from the original man2html.c. 779 */ 780 781 if (('+' == p[i] && 'o' == p[i + 2]) || 782 ('o' == p[i] && '+' == p[i + 2]) || 783 ('|' == p[i] && '=' == p[i + 2]) || 784 ('=' == p[i] && '|' == p[i + 2]) || 785 ('*' == p[i] && '=' == p[i + 2]) || 786 ('=' == p[i] && '*' == p[i + 2]) || 787 ('*' == p[i] && '|' == p[i + 2]) || 788 ('|' == p[i] && '*' == p[i + 2])) { 789 if (italic) 790 printf("</i>"); 791 if (bold) 792 printf("</b>"); 793 italic = bold = 0; 794 putchar('*'); 795 i += 2; 796 continue; 797 } else if (('|' == p[i] && '-' == p[i + 2]) || 798 ('-' == p[i] && '|' == p[i + 1]) || 799 ('+' == p[i] && '-' == p[i + 1]) || 800 ('-' == p[i] && '+' == p[i + 1]) || 801 ('+' == p[i] && '|' == p[i + 1]) || 802 ('|' == p[i] && '+' == p[i + 1])) { 803 if (italic) 804 printf("</i>"); 805 if (bold) 806 printf("</b>"); 807 italic = bold = 0; 808 putchar('+'); 809 i += 2; 810 continue; 811 } 812 813 /* Bold mode. */ 814 815 if (italic) 816 printf("</i>"); 817 if ( ! bold) 818 printf("<b>"); 819 bold = 1; 820 italic = 0; 821 i += 2; 822 html_putchar(p[i]); 823 } 824 825 /* 826 * Clean up the last character. 827 * We can get to a newline; don't print that. 828 */ 829 830 if (italic) 831 printf("</i>"); 832 if (bold) 833 printf("</b>"); 834 835 if (i == len - 1 && p[i] != '\n') 836 html_putchar(p[i]); 837 838 putchar('\n'); 839 } 840 free(p); 841 842 puts("</pre>\n" 843 "</div>"); 844 845 fclose(f); 846 } 847 848 static void 849 resp_format(const struct req *req, const char *file) 850 { 851 struct manoutput conf; 852 struct mparse *mp; 853 struct roff_meta *meta; 854 void *vp; 855 int fd; 856 int usepath; 857 858 if (-1 == (fd = open(file, O_RDONLY, 0))) { 859 puts("<p>You specified an invalid manual file.</p>"); 860 return; 861 } 862 863 mchars_alloc(); 864 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 | 865 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath); 866 mparse_readfd(mp, fd, file); 867 close(fd); 868 meta = mparse_result(mp); 869 870 memset(&conf, 0, sizeof(conf)); 871 conf.fragment = 1; 872 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css"); 873 usepath = strcmp(req->q.manpath, req->p[0]); 874 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S", 875 scriptname, *scriptname == '\0' ? "" : "/", 876 usepath ? req->q.manpath : "", usepath ? "/" : ""); 877 878 vp = html_alloc(&conf); 879 if (meta->macroset == MACROSET_MDOC) 880 html_mdoc(vp, meta); 881 else 882 html_man(vp, meta); 883 884 html_free(vp); 885 mparse_free(mp); 886 mchars_free(); 887 free(conf.man); 888 free(conf.style); 889 } 890 891 static void 892 resp_show(const struct req *req, const char *file) 893 { 894 895 if ('.' == file[0] && '/' == file[1]) 896 file += 2; 897 898 if ('c' == *file) 899 resp_catman(req, file); 900 else 901 resp_format(req, file); 902 } 903 904 static void 905 pg_show(struct req *req, const char *fullpath) 906 { 907 char *manpath; 908 const char *file; 909 910 if ((file = strchr(fullpath, '/')) == NULL) { 911 pg_error_badrequest( 912 "You did not specify a page to show."); 913 return; 914 } 915 manpath = mandoc_strndup(fullpath, file - fullpath); 916 file++; 917 918 if ( ! validate_manpath(req, manpath)) { 919 pg_error_badrequest( 920 "You specified an invalid manpath."); 921 free(manpath); 922 return; 923 } 924 925 /* 926 * Begin by chdir()ing into the manpath. 927 * This way we can pick up the database files, which are 928 * relative to the manpath root. 929 */ 930 931 if (chdir(manpath) == -1) { 932 warn("chdir %s", manpath); 933 pg_error_internal(); 934 free(manpath); 935 return; 936 } 937 free(manpath); 938 939 if ( ! validate_filename(file)) { 940 pg_error_badrequest( 941 "You specified an invalid manual file."); 942 return; 943 } 944 945 resp_begin_html(200, NULL, file); 946 resp_searchform(req, FOCUS_NONE); 947 resp_show(req, file); 948 resp_end_html(); 949 } 950 951 static void 952 pg_search(const struct req *req) 953 { 954 struct mansearch search; 955 struct manpaths paths; 956 struct manpage *res; 957 char **argv; 958 char *query, *rp, *wp; 959 size_t ressz; 960 int argc; 961 962 /* 963 * Begin by chdir()ing into the root of the manpath. 964 * This way we can pick up the database files, which are 965 * relative to the manpath root. 966 */ 967 968 if (chdir(req->q.manpath) == -1) { 969 warn("chdir %s", req->q.manpath); 970 pg_error_internal(); 971 return; 972 } 973 974 search.arch = req->q.arch; 975 search.sec = req->q.sec; 976 search.outkey = "Nd"; 977 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 978 search.firstmatch = 1; 979 980 paths.sz = 1; 981 paths.paths = mandoc_malloc(sizeof(char *)); 982 paths.paths[0] = mandoc_strdup("."); 983 984 /* 985 * Break apart at spaces with backslash-escaping. 986 */ 987 988 argc = 0; 989 argv = NULL; 990 rp = query = mandoc_strdup(req->q.query); 991 for (;;) { 992 while (isspace((unsigned char)*rp)) 993 rp++; 994 if (*rp == '\0') 995 break; 996 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 997 argv[argc++] = wp = rp; 998 for (;;) { 999 if (isspace((unsigned char)*rp)) { 1000 *wp = '\0'; 1001 rp++; 1002 break; 1003 } 1004 if (rp[0] == '\\' && rp[1] != '\0') 1005 rp++; 1006 if (wp != rp) 1007 *wp = *rp; 1008 if (*rp == '\0') 1009 break; 1010 wp++; 1011 rp++; 1012 } 1013 } 1014 1015 res = NULL; 1016 ressz = 0; 1017 if (req->isquery && req->q.equal && argc == 1) 1018 pg_redirect(req, argv[0]); 1019 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0) 1020 pg_noresult(req, 400, "Bad Request", 1021 "You entered an invalid query."); 1022 else if (ressz == 0) 1023 pg_noresult(req, 404, "Not Found", "No results found."); 1024 else 1025 pg_searchres(req, res, ressz); 1026 1027 free(query); 1028 mansearch_free(res, ressz); 1029 free(paths.paths[0]); 1030 free(paths.paths); 1031 } 1032 1033 int 1034 main(void) 1035 { 1036 struct req req; 1037 struct itimerval itimer; 1038 const char *path; 1039 const char *querystring; 1040 int i; 1041 1042 /* 1043 * The "rpath" pledge could be revoked after mparse_readfd() 1044 * if the file desciptor to "/footer.html" would be opened 1045 * up front, but it's probably not worth the complication 1046 * of the code it would cause: it would require scattering 1047 * pledge() calls in multiple low-level resp_*() functions. 1048 */ 1049 1050 if (pledge("stdio rpath", NULL) == -1) { 1051 warn("pledge"); 1052 pg_error_internal(); 1053 return EXIT_FAILURE; 1054 } 1055 1056 /* Poor man's ReDoS mitigation. */ 1057 1058 itimer.it_value.tv_sec = 2; 1059 itimer.it_value.tv_usec = 0; 1060 itimer.it_interval.tv_sec = 2; 1061 itimer.it_interval.tv_usec = 0; 1062 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 1063 warn("setitimer"); 1064 pg_error_internal(); 1065 return EXIT_FAILURE; 1066 } 1067 1068 /* 1069 * First we change directory into the MAN_DIR so that 1070 * subsequent scanning for manpath directories is rooted 1071 * relative to the same position. 1072 */ 1073 1074 if (chdir(MAN_DIR) == -1) { 1075 warn("MAN_DIR: %s", MAN_DIR); 1076 pg_error_internal(); 1077 return EXIT_FAILURE; 1078 } 1079 1080 memset(&req, 0, sizeof(struct req)); 1081 req.q.equal = 1; 1082 parse_manpath_conf(&req); 1083 1084 /* Parse the path info and the query string. */ 1085 1086 if ((path = getenv("PATH_INFO")) == NULL) 1087 path = ""; 1088 else if (*path == '/') 1089 path++; 1090 1091 if (*path != '\0') { 1092 parse_path_info(&req, path); 1093 if (req.q.manpath == NULL || req.q.sec == NULL || 1094 *req.q.query == '\0' || access(path, F_OK) == -1) 1095 path = ""; 1096 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1097 parse_query_string(&req, querystring); 1098 1099 /* Validate parsed data and add defaults. */ 1100 1101 if (req.q.manpath == NULL) 1102 req.q.manpath = mandoc_strdup(req.p[0]); 1103 else if ( ! validate_manpath(&req, req.q.manpath)) { 1104 pg_error_badrequest( 1105 "You specified an invalid manpath."); 1106 return EXIT_FAILURE; 1107 } 1108 1109 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) { 1110 pg_error_badrequest( 1111 "You specified an invalid architecture."); 1112 return EXIT_FAILURE; 1113 } 1114 1115 /* Dispatch to the three different pages. */ 1116 1117 if ('\0' != *path) 1118 pg_show(&req, path); 1119 else if (NULL != req.q.query) 1120 pg_search(&req); 1121 else 1122 pg_index(&req); 1123 1124 free(req.q.manpath); 1125 free(req.q.arch); 1126 free(req.q.sec); 1127 free(req.q.query); 1128 for (i = 0; i < (int)req.psz; i++) 1129 free(req.p[i]); 1130 free(req.p); 1131 return EXIT_SUCCESS; 1132 } 1133 1134 /* 1135 * Translate PATH_INFO to a query. 1136 */ 1137 static void 1138 parse_path_info(struct req *req, const char *path) 1139 { 1140 const char *name, *sec, *end; 1141 1142 req->isquery = 0; 1143 req->q.equal = 1; 1144 req->q.manpath = NULL; 1145 req->q.arch = NULL; 1146 1147 /* Mandatory manual page name. */ 1148 if ((name = strrchr(path, '/')) == NULL) 1149 name = path; 1150 else 1151 name++; 1152 1153 /* Optional trailing section. */ 1154 sec = strrchr(name, '.'); 1155 if (sec != NULL && isdigit((unsigned char)*++sec)) { 1156 req->q.query = mandoc_strndup(name, sec - name - 1); 1157 req->q.sec = mandoc_strdup(sec); 1158 } else { 1159 req->q.query = mandoc_strdup(name); 1160 req->q.sec = NULL; 1161 } 1162 1163 /* Handle the case of name[.section] only. */ 1164 if (name == path) 1165 return; 1166 1167 /* Optional manpath. */ 1168 end = strchr(path, '/'); 1169 req->q.manpath = mandoc_strndup(path, end - path); 1170 if (validate_manpath(req, req->q.manpath)) { 1171 path = end + 1; 1172 if (name == path) 1173 return; 1174 } else { 1175 free(req->q.manpath); 1176 req->q.manpath = NULL; 1177 } 1178 1179 /* Optional section. */ 1180 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) { 1181 path += 3; 1182 end = strchr(path, '/'); 1183 free(req->q.sec); 1184 req->q.sec = mandoc_strndup(path, end - path); 1185 path = end + 1; 1186 if (name == path) 1187 return; 1188 } 1189 1190 /* Optional architecture. */ 1191 end = strchr(path, '/'); 1192 if (end + 1 != name) { 1193 pg_error_badrequest( 1194 "You specified too many directory components."); 1195 exit(EXIT_FAILURE); 1196 } 1197 req->q.arch = mandoc_strndup(path, end - path); 1198 if (validate_arch(req->q.arch) == 0) { 1199 pg_error_badrequest( 1200 "You specified an invalid directory component."); 1201 exit(EXIT_FAILURE); 1202 } 1203 } 1204 1205 /* 1206 * Scan for indexable paths. 1207 */ 1208 static void 1209 parse_manpath_conf(struct req *req) 1210 { 1211 FILE *fp; 1212 char *dp; 1213 size_t dpsz; 1214 ssize_t len; 1215 1216 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1217 warn("%s/manpath.conf", MAN_DIR); 1218 pg_error_internal(); 1219 exit(EXIT_FAILURE); 1220 } 1221 1222 dp = NULL; 1223 dpsz = 0; 1224 1225 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1226 if (dp[len - 1] == '\n') 1227 dp[--len] = '\0'; 1228 req->p = mandoc_realloc(req->p, 1229 (req->psz + 1) * sizeof(char *)); 1230 if ( ! validate_urifrag(dp)) { 1231 warnx("%s/manpath.conf contains " 1232 "unsafe path \"%s\"", MAN_DIR, dp); 1233 pg_error_internal(); 1234 exit(EXIT_FAILURE); 1235 } 1236 if (strchr(dp, '/') != NULL) { 1237 warnx("%s/manpath.conf contains " 1238 "path with slash \"%s\"", MAN_DIR, dp); 1239 pg_error_internal(); 1240 exit(EXIT_FAILURE); 1241 } 1242 req->p[req->psz++] = dp; 1243 dp = NULL; 1244 dpsz = 0; 1245 } 1246 free(dp); 1247 1248 if (req->p == NULL) { 1249 warnx("%s/manpath.conf is empty", MAN_DIR); 1250 pg_error_internal(); 1251 exit(EXIT_FAILURE); 1252 } 1253 } 1254