1 /* $OpenBSD: cgi.c,v 1.111 2020/06/29 19:16:59 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2014-2019 Ingo Schwarze <schwarze@usta.de> 4 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Implementation of the man.cgi(8) program. 19 */ 20 #include <sys/types.h> 21 #include <sys/time.h> 22 23 #include <ctype.h> 24 #include <err.h> 25 #include <errno.h> 26 #include <fcntl.h> 27 #include <limits.h> 28 #include <stdint.h> 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <unistd.h> 33 34 #include "mandoc_aux.h" 35 #include "mandoc.h" 36 #include "roff.h" 37 #include "mdoc.h" 38 #include "man.h" 39 #include "mandoc_parse.h" 40 #include "main.h" 41 #include "manconf.h" 42 #include "mansearch.h" 43 #include "cgi.h" 44 45 /* 46 * A query as passed to the search function. 47 */ 48 struct query { 49 char *manpath; /* desired manual directory */ 50 char *arch; /* architecture */ 51 char *sec; /* manual section */ 52 char *query; /* unparsed query expression */ 53 int equal; /* match whole names, not substrings */ 54 }; 55 56 struct req { 57 struct query q; 58 char **p; /* array of available manpaths */ 59 size_t psz; /* number of available manpaths */ 60 int isquery; /* QUERY_STRING used, not PATH_INFO */ 61 }; 62 63 enum focus { 64 FOCUS_NONE = 0, 65 FOCUS_QUERY 66 }; 67 68 static void html_print(const char *); 69 static void html_putchar(char); 70 static int http_decode(char *); 71 static void http_encode(const char *); 72 static void parse_manpath_conf(struct req *); 73 static void parse_path_info(struct req *, const char *); 74 static void parse_query_string(struct req *, const char *); 75 static void pg_error_badrequest(const char *); 76 static void pg_error_internal(void); 77 static void pg_index(const struct req *); 78 static void pg_noresult(const struct req *, int, const char *, 79 const char *); 80 static void pg_redirect(const struct req *, const char *); 81 static void pg_search(const struct req *); 82 static void pg_searchres(const struct req *, 83 struct manpage *, size_t); 84 static void pg_show(struct req *, const char *); 85 static void resp_begin_html(int, const char *, const char *); 86 static void resp_begin_http(int, const char *); 87 static void resp_catman(const struct req *, const char *); 88 static void resp_copy(const char *); 89 static void resp_end_html(void); 90 static void resp_format(const struct req *, const char *); 91 static void resp_searchform(const struct req *, enum focus); 92 static void resp_show(const struct req *, const char *); 93 static void set_query_attr(char **, char **); 94 static int validate_arch(const char *); 95 static int validate_filename(const char *); 96 static int validate_manpath(const struct req *, const char *); 97 static int validate_urifrag(const char *); 98 99 static const char *scriptname = SCRIPT_NAME; 100 101 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 102 static const char *const sec_numbers[] = { 103 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 104 }; 105 static const char *const sec_names[] = { 106 "All Sections", 107 "1 - General Commands", 108 "2 - System Calls", 109 "3 - Library Functions", 110 "3p - Perl Library", 111 "4 - Device Drivers", 112 "5 - File Formats", 113 "6 - Games", 114 "7 - Miscellaneous Information", 115 "8 - System Manager\'s Manual", 116 "9 - Kernel Developer\'s Manual" 117 }; 118 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 119 120 static const char *const arch_names[] = { 121 "amd64", "alpha", "armv7", "arm64", 122 "hppa", "i386", "landisk", "loongson", 123 "luna88k", "macppc", "mips64", "octeon", 124 "powerpc64", "sgi", "socppc", "sparc64", 125 126 "amiga", "arc", "armish", "arm32", 127 "atari", "aviion", "beagle", "cats", 128 "hppa64", "hp300", 129 "ia64", "mac68k", "mvme68k", "mvme88k", 130 "mvmeppc", "palm", "pc532", "pegasos", 131 "pmax", "powerpc", "solbourne", "sparc", 132 "sun3", "vax", "wgrisc", "x68k", 133 "zaurus" 134 }; 135 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 136 137 /* 138 * Print a character, escaping HTML along the way. 139 * This will pass non-ASCII straight to output: be warned! 140 */ 141 static void 142 html_putchar(char c) 143 { 144 145 switch (c) { 146 case '"': 147 printf("""); 148 break; 149 case '&': 150 printf("&"); 151 break; 152 case '>': 153 printf(">"); 154 break; 155 case '<': 156 printf("<"); 157 break; 158 default: 159 putchar((unsigned char)c); 160 break; 161 } 162 } 163 164 /* 165 * Call through to html_putchar(). 166 * Accepts NULL strings. 167 */ 168 static void 169 html_print(const char *p) 170 { 171 172 if (NULL == p) 173 return; 174 while ('\0' != *p) 175 html_putchar(*p++); 176 } 177 178 /* 179 * Transfer the responsibility for the allocated string *val 180 * to the query structure. 181 */ 182 static void 183 set_query_attr(char **attr, char **val) 184 { 185 186 free(*attr); 187 if (**val == '\0') { 188 *attr = NULL; 189 free(*val); 190 } else 191 *attr = *val; 192 *val = NULL; 193 } 194 195 /* 196 * Parse the QUERY_STRING for key-value pairs 197 * and store the values into the query structure. 198 */ 199 static void 200 parse_query_string(struct req *req, const char *qs) 201 { 202 char *key, *val; 203 size_t keysz, valsz; 204 205 req->isquery = 1; 206 req->q.manpath = NULL; 207 req->q.arch = NULL; 208 req->q.sec = NULL; 209 req->q.query = NULL; 210 req->q.equal = 1; 211 212 key = val = NULL; 213 while (*qs != '\0') { 214 215 /* Parse one key. */ 216 217 keysz = strcspn(qs, "=;&"); 218 key = mandoc_strndup(qs, keysz); 219 qs += keysz; 220 if (*qs != '=') 221 goto next; 222 223 /* Parse one value. */ 224 225 valsz = strcspn(++qs, ";&"); 226 val = mandoc_strndup(qs, valsz); 227 qs += valsz; 228 229 /* Decode and catch encoding errors. */ 230 231 if ( ! (http_decode(key) && http_decode(val))) 232 goto next; 233 234 /* Handle key-value pairs. */ 235 236 if ( ! strcmp(key, "query")) 237 set_query_attr(&req->q.query, &val); 238 239 else if ( ! strcmp(key, "apropos")) 240 req->q.equal = !strcmp(val, "0"); 241 242 else if ( ! strcmp(key, "manpath")) { 243 #ifdef COMPAT_OLDURI 244 if ( ! strncmp(val, "OpenBSD ", 8)) { 245 val[7] = '-'; 246 if ('C' == val[8]) 247 val[8] = 'c'; 248 } 249 #endif 250 set_query_attr(&req->q.manpath, &val); 251 } 252 253 else if ( ! (strcmp(key, "sec") 254 #ifdef COMPAT_OLDURI 255 && strcmp(key, "sektion") 256 #endif 257 )) { 258 if ( ! strcmp(val, "0")) 259 *val = '\0'; 260 set_query_attr(&req->q.sec, &val); 261 } 262 263 else if ( ! strcmp(key, "arch")) { 264 if ( ! strcmp(val, "default")) 265 *val = '\0'; 266 set_query_attr(&req->q.arch, &val); 267 } 268 269 /* 270 * The key must be freed in any case. 271 * The val may have been handed over to the query 272 * structure, in which case it is now NULL. 273 */ 274 next: 275 free(key); 276 key = NULL; 277 free(val); 278 val = NULL; 279 280 if (*qs != '\0') 281 qs++; 282 } 283 } 284 285 /* 286 * HTTP-decode a string. The standard explanation is that this turns 287 * "%4e+foo" into "n foo" in the regular way. This is done in-place 288 * over the allocated string. 289 */ 290 static int 291 http_decode(char *p) 292 { 293 char hex[3]; 294 char *q; 295 int c; 296 297 hex[2] = '\0'; 298 299 q = p; 300 for ( ; '\0' != *p; p++, q++) { 301 if ('%' == *p) { 302 if ('\0' == (hex[0] = *(p + 1))) 303 return 0; 304 if ('\0' == (hex[1] = *(p + 2))) 305 return 0; 306 if (1 != sscanf(hex, "%x", &c)) 307 return 0; 308 if ('\0' == c) 309 return 0; 310 311 *q = (char)c; 312 p += 2; 313 } else 314 *q = '+' == *p ? ' ' : *p; 315 } 316 317 *q = '\0'; 318 return 1; 319 } 320 321 static void 322 http_encode(const char *p) 323 { 324 for (; *p != '\0'; p++) { 325 if (isalnum((unsigned char)*p) == 0 && 326 strchr("-._~", *p) == NULL) 327 printf("%%%2.2X", (unsigned char)*p); 328 else 329 putchar(*p); 330 } 331 } 332 333 static void 334 resp_begin_http(int code, const char *msg) 335 { 336 337 if (200 != code) 338 printf("Status: %d %s\r\n", code, msg); 339 340 printf("Content-Type: text/html; charset=utf-8\r\n" 341 "Cache-Control: no-cache\r\n" 342 "Content-Security-Policy: default-src 'none'; " 343 "style-src 'self' 'unsafe-inline'\r\n" 344 "Pragma: no-cache\r\n" 345 "\r\n"); 346 347 fflush(stdout); 348 } 349 350 static void 351 resp_copy(const char *filename) 352 { 353 char buf[4096]; 354 ssize_t sz; 355 int fd; 356 357 if ((fd = open(filename, O_RDONLY)) != -1) { 358 fflush(stdout); 359 while ((sz = read(fd, buf, sizeof(buf))) > 0) 360 write(STDOUT_FILENO, buf, sz); 361 close(fd); 362 } 363 } 364 365 static void 366 resp_begin_html(int code, const char *msg, const char *file) 367 { 368 char *cp; 369 370 resp_begin_http(code, msg); 371 372 printf("<!DOCTYPE html>\n" 373 "<html>\n" 374 "<head>\n" 375 " <meta charset=\"UTF-8\"/>\n" 376 " <meta name=\"viewport\"" 377 " content=\"width=device-width, initial-scale=1.0\">\n" 378 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 379 " type=\"text/css\" media=\"all\">\n" 380 " <title>", 381 CSS_DIR); 382 if (file != NULL) { 383 if ((cp = strrchr(file, '/')) != NULL) 384 file = cp + 1; 385 if ((cp = strrchr(file, '.')) != NULL) { 386 printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1); 387 } else 388 printf("%s - ", file); 389 } 390 printf("%s</title>\n" 391 "</head>\n" 392 "<body>\n", 393 CUSTOMIZE_TITLE); 394 395 resp_copy(MAN_DIR "/header.html"); 396 } 397 398 static void 399 resp_end_html(void) 400 { 401 402 resp_copy(MAN_DIR "/footer.html"); 403 404 puts("</body>\n" 405 "</html>"); 406 } 407 408 static void 409 resp_searchform(const struct req *req, enum focus focus) 410 { 411 int i; 412 413 printf("<form action=\"/%s\" method=\"get\" " 414 "autocomplete=\"off\" autocapitalize=\"none\">\n" 415 " <fieldset>\n" 416 " <legend>Manual Page Search Parameters</legend>\n", 417 scriptname); 418 419 /* Write query input box. */ 420 421 printf(" <input type=\"search\" name=\"query\" value=\""); 422 if (req->q.query != NULL) 423 html_print(req->q.query); 424 printf( "\" size=\"40\""); 425 if (focus == FOCUS_QUERY) 426 printf(" autofocus"); 427 puts(">"); 428 429 /* Write submission buttons. */ 430 431 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">" 432 "man</button>\n" 433 " <button type=\"submit\" name=\"apropos\" value=\"1\">" 434 "apropos</button>\n" 435 " <br/>\n"); 436 437 /* Write section selector. */ 438 439 puts(" <select name=\"sec\">"); 440 for (i = 0; i < sec_MAX; i++) { 441 printf(" <option value=\"%s\"", sec_numbers[i]); 442 if (NULL != req->q.sec && 443 0 == strcmp(sec_numbers[i], req->q.sec)) 444 printf(" selected=\"selected\""); 445 printf(">%s</option>\n", sec_names[i]); 446 } 447 puts(" </select>"); 448 449 /* Write architecture selector. */ 450 451 printf( " <select name=\"arch\">\n" 452 " <option value=\"default\""); 453 if (NULL == req->q.arch) 454 printf(" selected=\"selected\""); 455 puts(">All Architectures</option>"); 456 for (i = 0; i < arch_MAX; i++) { 457 printf(" <option"); 458 if (NULL != req->q.arch && 459 0 == strcmp(arch_names[i], req->q.arch)) 460 printf(" selected=\"selected\""); 461 printf(">%s</option>\n", arch_names[i]); 462 } 463 puts(" </select>"); 464 465 /* Write manpath selector. */ 466 467 if (req->psz > 1) { 468 puts(" <select name=\"manpath\">"); 469 for (i = 0; i < (int)req->psz; i++) { 470 printf(" <option"); 471 if (strcmp(req->q.manpath, req->p[i]) == 0) 472 printf(" selected=\"selected\""); 473 printf(">"); 474 html_print(req->p[i]); 475 puts("</option>"); 476 } 477 puts(" </select>"); 478 } 479 480 puts(" </fieldset>\n" 481 "</form>"); 482 } 483 484 static int 485 validate_urifrag(const char *frag) 486 { 487 488 while ('\0' != *frag) { 489 if ( ! (isalnum((unsigned char)*frag) || 490 '-' == *frag || '.' == *frag || 491 '/' == *frag || '_' == *frag)) 492 return 0; 493 frag++; 494 } 495 return 1; 496 } 497 498 static int 499 validate_manpath(const struct req *req, const char* manpath) 500 { 501 size_t i; 502 503 for (i = 0; i < req->psz; i++) 504 if ( ! strcmp(manpath, req->p[i])) 505 return 1; 506 507 return 0; 508 } 509 510 static int 511 validate_arch(const char *arch) 512 { 513 int i; 514 515 for (i = 0; i < arch_MAX; i++) 516 if (strcmp(arch, arch_names[i]) == 0) 517 return 1; 518 519 return 0; 520 } 521 522 static int 523 validate_filename(const char *file) 524 { 525 526 if ('.' == file[0] && '/' == file[1]) 527 file += 2; 528 529 return ! (strstr(file, "../") || strstr(file, "/..") || 530 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 531 } 532 533 static void 534 pg_index(const struct req *req) 535 { 536 537 resp_begin_html(200, NULL, NULL); 538 resp_searchform(req, FOCUS_QUERY); 539 printf("<p>\n" 540 "This web interface is documented in the\n" 541 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n" 542 "manual, and the\n" 543 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n" 544 "manual explains the query syntax.\n" 545 "</p>\n", 546 scriptname, *scriptname == '\0' ? "" : "/", 547 scriptname, *scriptname == '\0' ? "" : "/"); 548 resp_end_html(); 549 } 550 551 static void 552 pg_noresult(const struct req *req, int code, const char *http_msg, 553 const char *user_msg) 554 { 555 resp_begin_html(code, http_msg, NULL); 556 resp_searchform(req, FOCUS_QUERY); 557 puts("<p>"); 558 puts(user_msg); 559 puts("</p>"); 560 resp_end_html(); 561 } 562 563 static void 564 pg_error_badrequest(const char *msg) 565 { 566 567 resp_begin_html(400, "Bad Request", NULL); 568 puts("<h1>Bad Request</h1>\n" 569 "<p>\n"); 570 puts(msg); 571 printf("Try again from the\n" 572 "<a href=\"/%s\">main page</a>.\n" 573 "</p>", scriptname); 574 resp_end_html(); 575 } 576 577 static void 578 pg_error_internal(void) 579 { 580 resp_begin_html(500, "Internal Server Error", NULL); 581 puts("<p>Internal Server Error</p>"); 582 resp_end_html(); 583 } 584 585 static void 586 pg_redirect(const struct req *req, const char *name) 587 { 588 printf("Status: 303 See Other\r\n" 589 "Location: /"); 590 if (*scriptname != '\0') 591 printf("%s/", scriptname); 592 if (strcmp(req->q.manpath, req->p[0])) 593 printf("%s/", req->q.manpath); 594 if (req->q.arch != NULL) 595 printf("%s/", req->q.arch); 596 http_encode(name); 597 if (req->q.sec != NULL) { 598 putchar('.'); 599 http_encode(req->q.sec); 600 } 601 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n"); 602 } 603 604 static void 605 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 606 { 607 char *arch, *archend; 608 const char *sec; 609 size_t i, iuse; 610 int archprio, archpriouse; 611 int prio, priouse; 612 613 for (i = 0; i < sz; i++) { 614 if (validate_filename(r[i].file)) 615 continue; 616 warnx("invalid filename %s in %s database", 617 r[i].file, req->q.manpath); 618 pg_error_internal(); 619 return; 620 } 621 622 if (req->isquery && sz == 1) { 623 /* 624 * If we have just one result, then jump there now 625 * without any delay. 626 */ 627 printf("Status: 303 See Other\r\n" 628 "Location: /"); 629 if (*scriptname != '\0') 630 printf("%s/", scriptname); 631 if (strcmp(req->q.manpath, req->p[0])) 632 printf("%s/", req->q.manpath); 633 printf("%s\r\n" 634 "Content-Type: text/html; charset=utf-8\r\n\r\n", 635 r[0].file); 636 return; 637 } 638 639 /* 640 * In man(1) mode, show one of the pages 641 * even if more than one is found. 642 */ 643 644 iuse = 0; 645 if (req->q.equal || sz == 1) { 646 priouse = 20; 647 archpriouse = 3; 648 for (i = 0; i < sz; i++) { 649 sec = r[i].file; 650 sec += strcspn(sec, "123456789"); 651 if (sec[0] == '\0') 652 continue; 653 prio = sec_prios[sec[0] - '1']; 654 if (sec[1] != '/') 655 prio += 10; 656 if (req->q.arch == NULL) { 657 archprio = 658 ((arch = strchr(sec + 1, '/')) 659 == NULL) ? 3 : 660 ((archend = strchr(arch + 1, '/')) 661 == NULL) ? 0 : 662 strncmp(arch, "amd64/", 663 archend - arch) ? 2 : 1; 664 if (archprio < archpriouse) { 665 archpriouse = archprio; 666 priouse = prio; 667 iuse = i; 668 continue; 669 } 670 if (archprio > archpriouse) 671 continue; 672 } 673 if (prio >= priouse) 674 continue; 675 priouse = prio; 676 iuse = i; 677 } 678 resp_begin_html(200, NULL, r[iuse].file); 679 } else 680 resp_begin_html(200, NULL, NULL); 681 682 resp_searchform(req, 683 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 684 685 if (sz > 1) { 686 puts("<table class=\"results\">"); 687 for (i = 0; i < sz; i++) { 688 printf(" <tr>\n" 689 " <td>" 690 "<a class=\"Xr\" href=\"/"); 691 if (*scriptname != '\0') 692 printf("%s/", scriptname); 693 if (strcmp(req->q.manpath, req->p[0])) 694 printf("%s/", req->q.manpath); 695 printf("%s\">", r[i].file); 696 html_print(r[i].names); 697 printf("</a></td>\n" 698 " <td><span class=\"Nd\">"); 699 html_print(r[i].output); 700 puts("</span></td>\n" 701 " </tr>"); 702 } 703 puts("</table>"); 704 } 705 706 if (req->q.equal || sz == 1) { 707 puts("<hr>"); 708 resp_show(req, r[iuse].file); 709 } 710 711 resp_end_html(); 712 } 713 714 static void 715 resp_catman(const struct req *req, const char *file) 716 { 717 FILE *f; 718 char *p; 719 size_t sz; 720 ssize_t len; 721 int i; 722 int italic, bold; 723 724 if ((f = fopen(file, "r")) == NULL) { 725 puts("<p>You specified an invalid manual file.</p>"); 726 return; 727 } 728 729 puts("<div class=\"catman\">\n" 730 "<pre>"); 731 732 p = NULL; 733 sz = 0; 734 735 while ((len = getline(&p, &sz, f)) != -1) { 736 bold = italic = 0; 737 for (i = 0; i < len - 1; i++) { 738 /* 739 * This means that the catpage is out of state. 740 * Ignore it and keep going (although the 741 * catpage is bogus). 742 */ 743 744 if ('\b' == p[i] || '\n' == p[i]) 745 continue; 746 747 /* 748 * Print a regular character. 749 * Close out any bold/italic scopes. 750 * If we're in back-space mode, make sure we'll 751 * have something to enter when we backspace. 752 */ 753 754 if ('\b' != p[i + 1]) { 755 if (italic) 756 printf("</i>"); 757 if (bold) 758 printf("</b>"); 759 italic = bold = 0; 760 html_putchar(p[i]); 761 continue; 762 } else if (i + 2 >= len) 763 continue; 764 765 /* Italic mode. */ 766 767 if ('_' == p[i]) { 768 if (bold) 769 printf("</b>"); 770 if ( ! italic) 771 printf("<i>"); 772 bold = 0; 773 italic = 1; 774 i += 2; 775 html_putchar(p[i]); 776 continue; 777 } 778 779 /* 780 * Handle funny behaviour troff-isms. 781 * These grok'd from the original man2html.c. 782 */ 783 784 if (('+' == p[i] && 'o' == p[i + 2]) || 785 ('o' == p[i] && '+' == p[i + 2]) || 786 ('|' == p[i] && '=' == p[i + 2]) || 787 ('=' == p[i] && '|' == p[i + 2]) || 788 ('*' == p[i] && '=' == p[i + 2]) || 789 ('=' == p[i] && '*' == p[i + 2]) || 790 ('*' == p[i] && '|' == p[i + 2]) || 791 ('|' == p[i] && '*' == p[i + 2])) { 792 if (italic) 793 printf("</i>"); 794 if (bold) 795 printf("</b>"); 796 italic = bold = 0; 797 putchar('*'); 798 i += 2; 799 continue; 800 } else if (('|' == p[i] && '-' == p[i + 2]) || 801 ('-' == p[i] && '|' == p[i + 1]) || 802 ('+' == p[i] && '-' == p[i + 1]) || 803 ('-' == p[i] && '+' == p[i + 1]) || 804 ('+' == p[i] && '|' == p[i + 1]) || 805 ('|' == p[i] && '+' == p[i + 1])) { 806 if (italic) 807 printf("</i>"); 808 if (bold) 809 printf("</b>"); 810 italic = bold = 0; 811 putchar('+'); 812 i += 2; 813 continue; 814 } 815 816 /* Bold mode. */ 817 818 if (italic) 819 printf("</i>"); 820 if ( ! bold) 821 printf("<b>"); 822 bold = 1; 823 italic = 0; 824 i += 2; 825 html_putchar(p[i]); 826 } 827 828 /* 829 * Clean up the last character. 830 * We can get to a newline; don't print that. 831 */ 832 833 if (italic) 834 printf("</i>"); 835 if (bold) 836 printf("</b>"); 837 838 if (i == len - 1 && p[i] != '\n') 839 html_putchar(p[i]); 840 841 putchar('\n'); 842 } 843 free(p); 844 845 puts("</pre>\n" 846 "</div>"); 847 848 fclose(f); 849 } 850 851 static void 852 resp_format(const struct req *req, const char *file) 853 { 854 struct manoutput conf; 855 struct mparse *mp; 856 struct roff_meta *meta; 857 void *vp; 858 int fd; 859 int usepath; 860 861 if (-1 == (fd = open(file, O_RDONLY, 0))) { 862 puts("<p>You specified an invalid manual file.</p>"); 863 return; 864 } 865 866 mchars_alloc(); 867 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 | 868 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath); 869 mparse_readfd(mp, fd, file); 870 close(fd); 871 meta = mparse_result(mp); 872 873 memset(&conf, 0, sizeof(conf)); 874 conf.fragment = 1; 875 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css"); 876 usepath = strcmp(req->q.manpath, req->p[0]); 877 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S", 878 scriptname, *scriptname == '\0' ? "" : "/", 879 usepath ? req->q.manpath : "", usepath ? "/" : ""); 880 881 vp = html_alloc(&conf); 882 if (meta->macroset == MACROSET_MDOC) 883 html_mdoc(vp, meta); 884 else 885 html_man(vp, meta); 886 887 html_free(vp); 888 mparse_free(mp); 889 mchars_free(); 890 free(conf.man); 891 free(conf.style); 892 } 893 894 static void 895 resp_show(const struct req *req, const char *file) 896 { 897 898 if ('.' == file[0] && '/' == file[1]) 899 file += 2; 900 901 if ('c' == *file) 902 resp_catman(req, file); 903 else 904 resp_format(req, file); 905 } 906 907 static void 908 pg_show(struct req *req, const char *fullpath) 909 { 910 char *manpath; 911 const char *file; 912 913 if ((file = strchr(fullpath, '/')) == NULL) { 914 pg_error_badrequest( 915 "You did not specify a page to show."); 916 return; 917 } 918 manpath = mandoc_strndup(fullpath, file - fullpath); 919 file++; 920 921 if ( ! validate_manpath(req, manpath)) { 922 pg_error_badrequest( 923 "You specified an invalid manpath."); 924 free(manpath); 925 return; 926 } 927 928 /* 929 * Begin by chdir()ing into the manpath. 930 * This way we can pick up the database files, which are 931 * relative to the manpath root. 932 */ 933 934 if (chdir(manpath) == -1) { 935 warn("chdir %s", manpath); 936 pg_error_internal(); 937 free(manpath); 938 return; 939 } 940 free(manpath); 941 942 if ( ! validate_filename(file)) { 943 pg_error_badrequest( 944 "You specified an invalid manual file."); 945 return; 946 } 947 948 resp_begin_html(200, NULL, file); 949 resp_searchform(req, FOCUS_NONE); 950 resp_show(req, file); 951 resp_end_html(); 952 } 953 954 static void 955 pg_search(const struct req *req) 956 { 957 struct mansearch search; 958 struct manpaths paths; 959 struct manpage *res; 960 char **argv; 961 char *query, *rp, *wp; 962 size_t ressz; 963 int argc; 964 965 /* 966 * Begin by chdir()ing into the root of the manpath. 967 * This way we can pick up the database files, which are 968 * relative to the manpath root. 969 */ 970 971 if (chdir(req->q.manpath) == -1) { 972 warn("chdir %s", req->q.manpath); 973 pg_error_internal(); 974 return; 975 } 976 977 search.arch = req->q.arch; 978 search.sec = req->q.sec; 979 search.outkey = "Nd"; 980 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 981 search.firstmatch = 1; 982 983 paths.sz = 1; 984 paths.paths = mandoc_malloc(sizeof(char *)); 985 paths.paths[0] = mandoc_strdup("."); 986 987 /* 988 * Break apart at spaces with backslash-escaping. 989 */ 990 991 argc = 0; 992 argv = NULL; 993 rp = query = mandoc_strdup(req->q.query); 994 for (;;) { 995 while (isspace((unsigned char)*rp)) 996 rp++; 997 if (*rp == '\0') 998 break; 999 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 1000 argv[argc++] = wp = rp; 1001 for (;;) { 1002 if (isspace((unsigned char)*rp)) { 1003 *wp = '\0'; 1004 rp++; 1005 break; 1006 } 1007 if (rp[0] == '\\' && rp[1] != '\0') 1008 rp++; 1009 if (wp != rp) 1010 *wp = *rp; 1011 if (*rp == '\0') 1012 break; 1013 wp++; 1014 rp++; 1015 } 1016 } 1017 1018 res = NULL; 1019 ressz = 0; 1020 if (req->isquery && req->q.equal && argc == 1) 1021 pg_redirect(req, argv[0]); 1022 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0) 1023 pg_noresult(req, 400, "Bad Request", 1024 "You entered an invalid query."); 1025 else if (ressz == 0) 1026 pg_noresult(req, 404, "Not Found", "No results found."); 1027 else 1028 pg_searchres(req, res, ressz); 1029 1030 free(query); 1031 mansearch_free(res, ressz); 1032 free(paths.paths[0]); 1033 free(paths.paths); 1034 } 1035 1036 int 1037 main(void) 1038 { 1039 struct req req; 1040 struct itimerval itimer; 1041 const char *path; 1042 const char *querystring; 1043 int i; 1044 1045 /* 1046 * The "rpath" pledge could be revoked after mparse_readfd() 1047 * if the file desciptor to "/footer.html" would be opened 1048 * up front, but it's probably not worth the complication 1049 * of the code it would cause: it would require scattering 1050 * pledge() calls in multiple low-level resp_*() functions. 1051 */ 1052 1053 if (pledge("stdio rpath", NULL) == -1) { 1054 warn("pledge"); 1055 pg_error_internal(); 1056 return EXIT_FAILURE; 1057 } 1058 1059 /* Poor man's ReDoS mitigation. */ 1060 1061 itimer.it_value.tv_sec = 2; 1062 itimer.it_value.tv_usec = 0; 1063 itimer.it_interval.tv_sec = 2; 1064 itimer.it_interval.tv_usec = 0; 1065 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 1066 warn("setitimer"); 1067 pg_error_internal(); 1068 return EXIT_FAILURE; 1069 } 1070 1071 /* 1072 * First we change directory into the MAN_DIR so that 1073 * subsequent scanning for manpath directories is rooted 1074 * relative to the same position. 1075 */ 1076 1077 if (chdir(MAN_DIR) == -1) { 1078 warn("MAN_DIR: %s", MAN_DIR); 1079 pg_error_internal(); 1080 return EXIT_FAILURE; 1081 } 1082 1083 memset(&req, 0, sizeof(struct req)); 1084 req.q.equal = 1; 1085 parse_manpath_conf(&req); 1086 1087 /* Parse the path info and the query string. */ 1088 1089 if ((path = getenv("PATH_INFO")) == NULL) 1090 path = ""; 1091 else if (*path == '/') 1092 path++; 1093 1094 if (*path != '\0') { 1095 parse_path_info(&req, path); 1096 if (req.q.manpath == NULL || req.q.sec == NULL || 1097 *req.q.query == '\0' || access(path, F_OK) == -1) 1098 path = ""; 1099 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1100 parse_query_string(&req, querystring); 1101 1102 /* Validate parsed data and add defaults. */ 1103 1104 if (req.q.manpath == NULL) 1105 req.q.manpath = mandoc_strdup(req.p[0]); 1106 else if ( ! validate_manpath(&req, req.q.manpath)) { 1107 pg_error_badrequest( 1108 "You specified an invalid manpath."); 1109 return EXIT_FAILURE; 1110 } 1111 1112 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) { 1113 pg_error_badrequest( 1114 "You specified an invalid architecture."); 1115 return EXIT_FAILURE; 1116 } 1117 1118 /* Dispatch to the three different pages. */ 1119 1120 if ('\0' != *path) 1121 pg_show(&req, path); 1122 else if (NULL != req.q.query) 1123 pg_search(&req); 1124 else 1125 pg_index(&req); 1126 1127 free(req.q.manpath); 1128 free(req.q.arch); 1129 free(req.q.sec); 1130 free(req.q.query); 1131 for (i = 0; i < (int)req.psz; i++) 1132 free(req.p[i]); 1133 free(req.p); 1134 return EXIT_SUCCESS; 1135 } 1136 1137 /* 1138 * Translate PATH_INFO to a query. 1139 */ 1140 static void 1141 parse_path_info(struct req *req, const char *path) 1142 { 1143 const char *name, *sec, *end; 1144 1145 req->isquery = 0; 1146 req->q.equal = 1; 1147 req->q.manpath = NULL; 1148 req->q.arch = NULL; 1149 1150 /* Mandatory manual page name. */ 1151 if ((name = strrchr(path, '/')) == NULL) 1152 name = path; 1153 else 1154 name++; 1155 1156 /* Optional trailing section. */ 1157 sec = strrchr(name, '.'); 1158 if (sec != NULL && isdigit((unsigned char)*++sec)) { 1159 req->q.query = mandoc_strndup(name, sec - name - 1); 1160 req->q.sec = mandoc_strdup(sec); 1161 } else { 1162 req->q.query = mandoc_strdup(name); 1163 req->q.sec = NULL; 1164 } 1165 1166 /* Handle the case of name[.section] only. */ 1167 if (name == path) 1168 return; 1169 1170 /* Optional manpath. */ 1171 end = strchr(path, '/'); 1172 req->q.manpath = mandoc_strndup(path, end - path); 1173 if (validate_manpath(req, req->q.manpath)) { 1174 path = end + 1; 1175 if (name == path) 1176 return; 1177 } else { 1178 free(req->q.manpath); 1179 req->q.manpath = NULL; 1180 } 1181 1182 /* Optional section. */ 1183 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) { 1184 path += 3; 1185 end = strchr(path, '/'); 1186 free(req->q.sec); 1187 req->q.sec = mandoc_strndup(path, end - path); 1188 path = end + 1; 1189 if (name == path) 1190 return; 1191 } 1192 1193 /* Optional architecture. */ 1194 end = strchr(path, '/'); 1195 if (end + 1 != name) { 1196 pg_error_badrequest( 1197 "You specified too many directory components."); 1198 exit(EXIT_FAILURE); 1199 } 1200 req->q.arch = mandoc_strndup(path, end - path); 1201 if (validate_arch(req->q.arch) == 0) { 1202 pg_error_badrequest( 1203 "You specified an invalid directory component."); 1204 exit(EXIT_FAILURE); 1205 } 1206 } 1207 1208 /* 1209 * Scan for indexable paths. 1210 */ 1211 static void 1212 parse_manpath_conf(struct req *req) 1213 { 1214 FILE *fp; 1215 char *dp; 1216 size_t dpsz; 1217 ssize_t len; 1218 1219 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1220 warn("%s/manpath.conf", MAN_DIR); 1221 pg_error_internal(); 1222 exit(EXIT_FAILURE); 1223 } 1224 1225 dp = NULL; 1226 dpsz = 0; 1227 1228 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1229 if (dp[len - 1] == '\n') 1230 dp[--len] = '\0'; 1231 req->p = mandoc_realloc(req->p, 1232 (req->psz + 1) * sizeof(char *)); 1233 if ( ! validate_urifrag(dp)) { 1234 warnx("%s/manpath.conf contains " 1235 "unsafe path \"%s\"", MAN_DIR, dp); 1236 pg_error_internal(); 1237 exit(EXIT_FAILURE); 1238 } 1239 if (strchr(dp, '/') != NULL) { 1240 warnx("%s/manpath.conf contains " 1241 "path with slash \"%s\"", MAN_DIR, dp); 1242 pg_error_internal(); 1243 exit(EXIT_FAILURE); 1244 } 1245 req->p[req->psz++] = dp; 1246 dp = NULL; 1247 dpsz = 0; 1248 } 1249 free(dp); 1250 1251 if (req->p == NULL) { 1252 warnx("%s/manpath.conf is empty", MAN_DIR); 1253 pg_error_internal(); 1254 exit(EXIT_FAILURE); 1255 } 1256 } 1257