1 /* $OpenBSD: cgi.c,v 1.110 2020/04/03 11:34:19 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2014-2019 Ingo Schwarze <schwarze@usta.de> 4 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Implementation of the man.cgi(8) program. 19 */ 20 #include <sys/types.h> 21 #include <sys/time.h> 22 23 #include <ctype.h> 24 #include <err.h> 25 #include <errno.h> 26 #include <fcntl.h> 27 #include <limits.h> 28 #include <stdint.h> 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <unistd.h> 33 34 #include "mandoc_aux.h" 35 #include "mandoc.h" 36 #include "roff.h" 37 #include "mdoc.h" 38 #include "man.h" 39 #include "mandoc_parse.h" 40 #include "main.h" 41 #include "manconf.h" 42 #include "mansearch.h" 43 #include "cgi.h" 44 45 /* 46 * A query as passed to the search function. 47 */ 48 struct query { 49 char *manpath; /* desired manual directory */ 50 char *arch; /* architecture */ 51 char *sec; /* manual section */ 52 char *query; /* unparsed query expression */ 53 int equal; /* match whole names, not substrings */ 54 }; 55 56 struct req { 57 struct query q; 58 char **p; /* array of available manpaths */ 59 size_t psz; /* number of available manpaths */ 60 int isquery; /* QUERY_STRING used, not PATH_INFO */ 61 }; 62 63 enum focus { 64 FOCUS_NONE = 0, 65 FOCUS_QUERY 66 }; 67 68 static void html_print(const char *); 69 static void html_putchar(char); 70 static int http_decode(char *); 71 static void http_encode(const char *); 72 static void parse_manpath_conf(struct req *); 73 static void parse_path_info(struct req *, const char *); 74 static void parse_query_string(struct req *, const char *); 75 static void pg_error_badrequest(const char *); 76 static void pg_error_internal(void); 77 static void pg_index(const struct req *); 78 static void pg_noresult(const struct req *, int, const char *, 79 const char *); 80 static void pg_redirect(const struct req *, const char *); 81 static void pg_search(const struct req *); 82 static void pg_searchres(const struct req *, 83 struct manpage *, size_t); 84 static void pg_show(struct req *, const char *); 85 static void resp_begin_html(int, const char *, const char *); 86 static void resp_begin_http(int, const char *); 87 static void resp_catman(const struct req *, const char *); 88 static void resp_copy(const char *); 89 static void resp_end_html(void); 90 static void resp_format(const struct req *, const char *); 91 static void resp_searchform(const struct req *, enum focus); 92 static void resp_show(const struct req *, const char *); 93 static void set_query_attr(char **, char **); 94 static int validate_arch(const char *); 95 static int validate_filename(const char *); 96 static int validate_manpath(const struct req *, const char *); 97 static int validate_urifrag(const char *); 98 99 static const char *scriptname = SCRIPT_NAME; 100 101 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 102 static const char *const sec_numbers[] = { 103 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 104 }; 105 static const char *const sec_names[] = { 106 "All Sections", 107 "1 - General Commands", 108 "2 - System Calls", 109 "3 - Library Functions", 110 "3p - Perl Library", 111 "4 - Device Drivers", 112 "5 - File Formats", 113 "6 - Games", 114 "7 - Miscellaneous Information", 115 "8 - System Manager\'s Manual", 116 "9 - Kernel Developer\'s Manual" 117 }; 118 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 119 120 static const char *const arch_names[] = { 121 "amd64", "alpha", "armv7", "arm64", 122 "hppa", "i386", "landisk", 123 "loongson", "luna88k", "macppc", "mips64", 124 "octeon", "sgi", "socppc", "sparc64", 125 "amiga", "arc", "armish", "arm32", 126 "atari", "aviion", "beagle", "cats", 127 "hppa64", "hp300", 128 "ia64", "mac68k", "mvme68k", "mvme88k", 129 "mvmeppc", "palm", "pc532", "pegasos", 130 "pmax", "powerpc", "solbourne", "sparc", 131 "sun3", "vax", "wgrisc", "x68k", 132 "zaurus" 133 }; 134 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 135 136 /* 137 * Print a character, escaping HTML along the way. 138 * This will pass non-ASCII straight to output: be warned! 139 */ 140 static void 141 html_putchar(char c) 142 { 143 144 switch (c) { 145 case '"': 146 printf("""); 147 break; 148 case '&': 149 printf("&"); 150 break; 151 case '>': 152 printf(">"); 153 break; 154 case '<': 155 printf("<"); 156 break; 157 default: 158 putchar((unsigned char)c); 159 break; 160 } 161 } 162 163 /* 164 * Call through to html_putchar(). 165 * Accepts NULL strings. 166 */ 167 static void 168 html_print(const char *p) 169 { 170 171 if (NULL == p) 172 return; 173 while ('\0' != *p) 174 html_putchar(*p++); 175 } 176 177 /* 178 * Transfer the responsibility for the allocated string *val 179 * to the query structure. 180 */ 181 static void 182 set_query_attr(char **attr, char **val) 183 { 184 185 free(*attr); 186 if (**val == '\0') { 187 *attr = NULL; 188 free(*val); 189 } else 190 *attr = *val; 191 *val = NULL; 192 } 193 194 /* 195 * Parse the QUERY_STRING for key-value pairs 196 * and store the values into the query structure. 197 */ 198 static void 199 parse_query_string(struct req *req, const char *qs) 200 { 201 char *key, *val; 202 size_t keysz, valsz; 203 204 req->isquery = 1; 205 req->q.manpath = NULL; 206 req->q.arch = NULL; 207 req->q.sec = NULL; 208 req->q.query = NULL; 209 req->q.equal = 1; 210 211 key = val = NULL; 212 while (*qs != '\0') { 213 214 /* Parse one key. */ 215 216 keysz = strcspn(qs, "=;&"); 217 key = mandoc_strndup(qs, keysz); 218 qs += keysz; 219 if (*qs != '=') 220 goto next; 221 222 /* Parse one value. */ 223 224 valsz = strcspn(++qs, ";&"); 225 val = mandoc_strndup(qs, valsz); 226 qs += valsz; 227 228 /* Decode and catch encoding errors. */ 229 230 if ( ! (http_decode(key) && http_decode(val))) 231 goto next; 232 233 /* Handle key-value pairs. */ 234 235 if ( ! strcmp(key, "query")) 236 set_query_attr(&req->q.query, &val); 237 238 else if ( ! strcmp(key, "apropos")) 239 req->q.equal = !strcmp(val, "0"); 240 241 else if ( ! strcmp(key, "manpath")) { 242 #ifdef COMPAT_OLDURI 243 if ( ! strncmp(val, "OpenBSD ", 8)) { 244 val[7] = '-'; 245 if ('C' == val[8]) 246 val[8] = 'c'; 247 } 248 #endif 249 set_query_attr(&req->q.manpath, &val); 250 } 251 252 else if ( ! (strcmp(key, "sec") 253 #ifdef COMPAT_OLDURI 254 && strcmp(key, "sektion") 255 #endif 256 )) { 257 if ( ! strcmp(val, "0")) 258 *val = '\0'; 259 set_query_attr(&req->q.sec, &val); 260 } 261 262 else if ( ! strcmp(key, "arch")) { 263 if ( ! strcmp(val, "default")) 264 *val = '\0'; 265 set_query_attr(&req->q.arch, &val); 266 } 267 268 /* 269 * The key must be freed in any case. 270 * The val may have been handed over to the query 271 * structure, in which case it is now NULL. 272 */ 273 next: 274 free(key); 275 key = NULL; 276 free(val); 277 val = NULL; 278 279 if (*qs != '\0') 280 qs++; 281 } 282 } 283 284 /* 285 * HTTP-decode a string. The standard explanation is that this turns 286 * "%4e+foo" into "n foo" in the regular way. This is done in-place 287 * over the allocated string. 288 */ 289 static int 290 http_decode(char *p) 291 { 292 char hex[3]; 293 char *q; 294 int c; 295 296 hex[2] = '\0'; 297 298 q = p; 299 for ( ; '\0' != *p; p++, q++) { 300 if ('%' == *p) { 301 if ('\0' == (hex[0] = *(p + 1))) 302 return 0; 303 if ('\0' == (hex[1] = *(p + 2))) 304 return 0; 305 if (1 != sscanf(hex, "%x", &c)) 306 return 0; 307 if ('\0' == c) 308 return 0; 309 310 *q = (char)c; 311 p += 2; 312 } else 313 *q = '+' == *p ? ' ' : *p; 314 } 315 316 *q = '\0'; 317 return 1; 318 } 319 320 static void 321 http_encode(const char *p) 322 { 323 for (; *p != '\0'; p++) { 324 if (isalnum((unsigned char)*p) == 0 && 325 strchr("-._~", *p) == NULL) 326 printf("%%%2.2X", (unsigned char)*p); 327 else 328 putchar(*p); 329 } 330 } 331 332 static void 333 resp_begin_http(int code, const char *msg) 334 { 335 336 if (200 != code) 337 printf("Status: %d %s\r\n", code, msg); 338 339 printf("Content-Type: text/html; charset=utf-8\r\n" 340 "Cache-Control: no-cache\r\n" 341 "Content-Security-Policy: default-src 'none'; " 342 "style-src 'self' 'unsafe-inline'\r\n" 343 "Pragma: no-cache\r\n" 344 "\r\n"); 345 346 fflush(stdout); 347 } 348 349 static void 350 resp_copy(const char *filename) 351 { 352 char buf[4096]; 353 ssize_t sz; 354 int fd; 355 356 if ((fd = open(filename, O_RDONLY)) != -1) { 357 fflush(stdout); 358 while ((sz = read(fd, buf, sizeof(buf))) > 0) 359 write(STDOUT_FILENO, buf, sz); 360 close(fd); 361 } 362 } 363 364 static void 365 resp_begin_html(int code, const char *msg, const char *file) 366 { 367 char *cp; 368 369 resp_begin_http(code, msg); 370 371 printf("<!DOCTYPE html>\n" 372 "<html>\n" 373 "<head>\n" 374 " <meta charset=\"UTF-8\"/>\n" 375 " <meta name=\"viewport\"" 376 " content=\"width=device-width, initial-scale=1.0\">\n" 377 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 378 " type=\"text/css\" media=\"all\">\n" 379 " <title>", 380 CSS_DIR); 381 if (file != NULL) { 382 if ((cp = strrchr(file, '/')) != NULL) 383 file = cp + 1; 384 if ((cp = strrchr(file, '.')) != NULL) { 385 printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1); 386 } else 387 printf("%s - ", file); 388 } 389 printf("%s</title>\n" 390 "</head>\n" 391 "<body>\n", 392 CUSTOMIZE_TITLE); 393 394 resp_copy(MAN_DIR "/header.html"); 395 } 396 397 static void 398 resp_end_html(void) 399 { 400 401 resp_copy(MAN_DIR "/footer.html"); 402 403 puts("</body>\n" 404 "</html>"); 405 } 406 407 static void 408 resp_searchform(const struct req *req, enum focus focus) 409 { 410 int i; 411 412 printf("<form action=\"/%s\" method=\"get\" " 413 "autocomplete=\"off\" autocapitalize=\"none\">\n" 414 " <fieldset>\n" 415 " <legend>Manual Page Search Parameters</legend>\n", 416 scriptname); 417 418 /* Write query input box. */ 419 420 printf(" <input type=\"search\" name=\"query\" value=\""); 421 if (req->q.query != NULL) 422 html_print(req->q.query); 423 printf( "\" size=\"40\""); 424 if (focus == FOCUS_QUERY) 425 printf(" autofocus"); 426 puts(">"); 427 428 /* Write submission buttons. */ 429 430 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">" 431 "man</button>\n" 432 " <button type=\"submit\" name=\"apropos\" value=\"1\">" 433 "apropos</button>\n" 434 " <br/>\n"); 435 436 /* Write section selector. */ 437 438 puts(" <select name=\"sec\">"); 439 for (i = 0; i < sec_MAX; i++) { 440 printf(" <option value=\"%s\"", sec_numbers[i]); 441 if (NULL != req->q.sec && 442 0 == strcmp(sec_numbers[i], req->q.sec)) 443 printf(" selected=\"selected\""); 444 printf(">%s</option>\n", sec_names[i]); 445 } 446 puts(" </select>"); 447 448 /* Write architecture selector. */ 449 450 printf( " <select name=\"arch\">\n" 451 " <option value=\"default\""); 452 if (NULL == req->q.arch) 453 printf(" selected=\"selected\""); 454 puts(">All Architectures</option>"); 455 for (i = 0; i < arch_MAX; i++) { 456 printf(" <option"); 457 if (NULL != req->q.arch && 458 0 == strcmp(arch_names[i], req->q.arch)) 459 printf(" selected=\"selected\""); 460 printf(">%s</option>\n", arch_names[i]); 461 } 462 puts(" </select>"); 463 464 /* Write manpath selector. */ 465 466 if (req->psz > 1) { 467 puts(" <select name=\"manpath\">"); 468 for (i = 0; i < (int)req->psz; i++) { 469 printf(" <option"); 470 if (strcmp(req->q.manpath, req->p[i]) == 0) 471 printf(" selected=\"selected\""); 472 printf(">"); 473 html_print(req->p[i]); 474 puts("</option>"); 475 } 476 puts(" </select>"); 477 } 478 479 puts(" </fieldset>\n" 480 "</form>"); 481 } 482 483 static int 484 validate_urifrag(const char *frag) 485 { 486 487 while ('\0' != *frag) { 488 if ( ! (isalnum((unsigned char)*frag) || 489 '-' == *frag || '.' == *frag || 490 '/' == *frag || '_' == *frag)) 491 return 0; 492 frag++; 493 } 494 return 1; 495 } 496 497 static int 498 validate_manpath(const struct req *req, const char* manpath) 499 { 500 size_t i; 501 502 for (i = 0; i < req->psz; i++) 503 if ( ! strcmp(manpath, req->p[i])) 504 return 1; 505 506 return 0; 507 } 508 509 static int 510 validate_arch(const char *arch) 511 { 512 int i; 513 514 for (i = 0; i < arch_MAX; i++) 515 if (strcmp(arch, arch_names[i]) == 0) 516 return 1; 517 518 return 0; 519 } 520 521 static int 522 validate_filename(const char *file) 523 { 524 525 if ('.' == file[0] && '/' == file[1]) 526 file += 2; 527 528 return ! (strstr(file, "../") || strstr(file, "/..") || 529 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 530 } 531 532 static void 533 pg_index(const struct req *req) 534 { 535 536 resp_begin_html(200, NULL, NULL); 537 resp_searchform(req, FOCUS_QUERY); 538 printf("<p>\n" 539 "This web interface is documented in the\n" 540 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n" 541 "manual, and the\n" 542 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n" 543 "manual explains the query syntax.\n" 544 "</p>\n", 545 scriptname, *scriptname == '\0' ? "" : "/", 546 scriptname, *scriptname == '\0' ? "" : "/"); 547 resp_end_html(); 548 } 549 550 static void 551 pg_noresult(const struct req *req, int code, const char *http_msg, 552 const char *user_msg) 553 { 554 resp_begin_html(code, http_msg, NULL); 555 resp_searchform(req, FOCUS_QUERY); 556 puts("<p>"); 557 puts(user_msg); 558 puts("</p>"); 559 resp_end_html(); 560 } 561 562 static void 563 pg_error_badrequest(const char *msg) 564 { 565 566 resp_begin_html(400, "Bad Request", NULL); 567 puts("<h1>Bad Request</h1>\n" 568 "<p>\n"); 569 puts(msg); 570 printf("Try again from the\n" 571 "<a href=\"/%s\">main page</a>.\n" 572 "</p>", scriptname); 573 resp_end_html(); 574 } 575 576 static void 577 pg_error_internal(void) 578 { 579 resp_begin_html(500, "Internal Server Error", NULL); 580 puts("<p>Internal Server Error</p>"); 581 resp_end_html(); 582 } 583 584 static void 585 pg_redirect(const struct req *req, const char *name) 586 { 587 printf("Status: 303 See Other\r\n" 588 "Location: /"); 589 if (*scriptname != '\0') 590 printf("%s/", scriptname); 591 if (strcmp(req->q.manpath, req->p[0])) 592 printf("%s/", req->q.manpath); 593 if (req->q.arch != NULL) 594 printf("%s/", req->q.arch); 595 http_encode(name); 596 if (req->q.sec != NULL) { 597 putchar('.'); 598 http_encode(req->q.sec); 599 } 600 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n"); 601 } 602 603 static void 604 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 605 { 606 char *arch, *archend; 607 const char *sec; 608 size_t i, iuse; 609 int archprio, archpriouse; 610 int prio, priouse; 611 612 for (i = 0; i < sz; i++) { 613 if (validate_filename(r[i].file)) 614 continue; 615 warnx("invalid filename %s in %s database", 616 r[i].file, req->q.manpath); 617 pg_error_internal(); 618 return; 619 } 620 621 if (req->isquery && sz == 1) { 622 /* 623 * If we have just one result, then jump there now 624 * without any delay. 625 */ 626 printf("Status: 303 See Other\r\n" 627 "Location: /"); 628 if (*scriptname != '\0') 629 printf("%s/", scriptname); 630 if (strcmp(req->q.manpath, req->p[0])) 631 printf("%s/", req->q.manpath); 632 printf("%s\r\n" 633 "Content-Type: text/html; charset=utf-8\r\n\r\n", 634 r[0].file); 635 return; 636 } 637 638 /* 639 * In man(1) mode, show one of the pages 640 * even if more than one is found. 641 */ 642 643 iuse = 0; 644 if (req->q.equal || sz == 1) { 645 priouse = 20; 646 archpriouse = 3; 647 for (i = 0; i < sz; i++) { 648 sec = r[i].file; 649 sec += strcspn(sec, "123456789"); 650 if (sec[0] == '\0') 651 continue; 652 prio = sec_prios[sec[0] - '1']; 653 if (sec[1] != '/') 654 prio += 10; 655 if (req->q.arch == NULL) { 656 archprio = 657 ((arch = strchr(sec + 1, '/')) 658 == NULL) ? 3 : 659 ((archend = strchr(arch + 1, '/')) 660 == NULL) ? 0 : 661 strncmp(arch, "amd64/", 662 archend - arch) ? 2 : 1; 663 if (archprio < archpriouse) { 664 archpriouse = archprio; 665 priouse = prio; 666 iuse = i; 667 continue; 668 } 669 if (archprio > archpriouse) 670 continue; 671 } 672 if (prio >= priouse) 673 continue; 674 priouse = prio; 675 iuse = i; 676 } 677 resp_begin_html(200, NULL, r[iuse].file); 678 } else 679 resp_begin_html(200, NULL, NULL); 680 681 resp_searchform(req, 682 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 683 684 if (sz > 1) { 685 puts("<table class=\"results\">"); 686 for (i = 0; i < sz; i++) { 687 printf(" <tr>\n" 688 " <td>" 689 "<a class=\"Xr\" href=\"/"); 690 if (*scriptname != '\0') 691 printf("%s/", scriptname); 692 if (strcmp(req->q.manpath, req->p[0])) 693 printf("%s/", req->q.manpath); 694 printf("%s\">", r[i].file); 695 html_print(r[i].names); 696 printf("</a></td>\n" 697 " <td><span class=\"Nd\">"); 698 html_print(r[i].output); 699 puts("</span></td>\n" 700 " </tr>"); 701 } 702 puts("</table>"); 703 } 704 705 if (req->q.equal || sz == 1) { 706 puts("<hr>"); 707 resp_show(req, r[iuse].file); 708 } 709 710 resp_end_html(); 711 } 712 713 static void 714 resp_catman(const struct req *req, const char *file) 715 { 716 FILE *f; 717 char *p; 718 size_t sz; 719 ssize_t len; 720 int i; 721 int italic, bold; 722 723 if ((f = fopen(file, "r")) == NULL) { 724 puts("<p>You specified an invalid manual file.</p>"); 725 return; 726 } 727 728 puts("<div class=\"catman\">\n" 729 "<pre>"); 730 731 p = NULL; 732 sz = 0; 733 734 while ((len = getline(&p, &sz, f)) != -1) { 735 bold = italic = 0; 736 for (i = 0; i < len - 1; i++) { 737 /* 738 * This means that the catpage is out of state. 739 * Ignore it and keep going (although the 740 * catpage is bogus). 741 */ 742 743 if ('\b' == p[i] || '\n' == p[i]) 744 continue; 745 746 /* 747 * Print a regular character. 748 * Close out any bold/italic scopes. 749 * If we're in back-space mode, make sure we'll 750 * have something to enter when we backspace. 751 */ 752 753 if ('\b' != p[i + 1]) { 754 if (italic) 755 printf("</i>"); 756 if (bold) 757 printf("</b>"); 758 italic = bold = 0; 759 html_putchar(p[i]); 760 continue; 761 } else if (i + 2 >= len) 762 continue; 763 764 /* Italic mode. */ 765 766 if ('_' == p[i]) { 767 if (bold) 768 printf("</b>"); 769 if ( ! italic) 770 printf("<i>"); 771 bold = 0; 772 italic = 1; 773 i += 2; 774 html_putchar(p[i]); 775 continue; 776 } 777 778 /* 779 * Handle funny behaviour troff-isms. 780 * These grok'd from the original man2html.c. 781 */ 782 783 if (('+' == p[i] && 'o' == p[i + 2]) || 784 ('o' == p[i] && '+' == p[i + 2]) || 785 ('|' == p[i] && '=' == p[i + 2]) || 786 ('=' == p[i] && '|' == p[i + 2]) || 787 ('*' == p[i] && '=' == p[i + 2]) || 788 ('=' == p[i] && '*' == p[i + 2]) || 789 ('*' == p[i] && '|' == p[i + 2]) || 790 ('|' == p[i] && '*' == p[i + 2])) { 791 if (italic) 792 printf("</i>"); 793 if (bold) 794 printf("</b>"); 795 italic = bold = 0; 796 putchar('*'); 797 i += 2; 798 continue; 799 } else if (('|' == p[i] && '-' == p[i + 2]) || 800 ('-' == p[i] && '|' == p[i + 1]) || 801 ('+' == p[i] && '-' == p[i + 1]) || 802 ('-' == p[i] && '+' == p[i + 1]) || 803 ('+' == p[i] && '|' == p[i + 1]) || 804 ('|' == p[i] && '+' == p[i + 1])) { 805 if (italic) 806 printf("</i>"); 807 if (bold) 808 printf("</b>"); 809 italic = bold = 0; 810 putchar('+'); 811 i += 2; 812 continue; 813 } 814 815 /* Bold mode. */ 816 817 if (italic) 818 printf("</i>"); 819 if ( ! bold) 820 printf("<b>"); 821 bold = 1; 822 italic = 0; 823 i += 2; 824 html_putchar(p[i]); 825 } 826 827 /* 828 * Clean up the last character. 829 * We can get to a newline; don't print that. 830 */ 831 832 if (italic) 833 printf("</i>"); 834 if (bold) 835 printf("</b>"); 836 837 if (i == len - 1 && p[i] != '\n') 838 html_putchar(p[i]); 839 840 putchar('\n'); 841 } 842 free(p); 843 844 puts("</pre>\n" 845 "</div>"); 846 847 fclose(f); 848 } 849 850 static void 851 resp_format(const struct req *req, const char *file) 852 { 853 struct manoutput conf; 854 struct mparse *mp; 855 struct roff_meta *meta; 856 void *vp; 857 int fd; 858 int usepath; 859 860 if (-1 == (fd = open(file, O_RDONLY, 0))) { 861 puts("<p>You specified an invalid manual file.</p>"); 862 return; 863 } 864 865 mchars_alloc(); 866 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 | 867 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath); 868 mparse_readfd(mp, fd, file); 869 close(fd); 870 meta = mparse_result(mp); 871 872 memset(&conf, 0, sizeof(conf)); 873 conf.fragment = 1; 874 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css"); 875 usepath = strcmp(req->q.manpath, req->p[0]); 876 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S", 877 scriptname, *scriptname == '\0' ? "" : "/", 878 usepath ? req->q.manpath : "", usepath ? "/" : ""); 879 880 vp = html_alloc(&conf); 881 if (meta->macroset == MACROSET_MDOC) 882 html_mdoc(vp, meta); 883 else 884 html_man(vp, meta); 885 886 html_free(vp); 887 mparse_free(mp); 888 mchars_free(); 889 free(conf.man); 890 free(conf.style); 891 } 892 893 static void 894 resp_show(const struct req *req, const char *file) 895 { 896 897 if ('.' == file[0] && '/' == file[1]) 898 file += 2; 899 900 if ('c' == *file) 901 resp_catman(req, file); 902 else 903 resp_format(req, file); 904 } 905 906 static void 907 pg_show(struct req *req, const char *fullpath) 908 { 909 char *manpath; 910 const char *file; 911 912 if ((file = strchr(fullpath, '/')) == NULL) { 913 pg_error_badrequest( 914 "You did not specify a page to show."); 915 return; 916 } 917 manpath = mandoc_strndup(fullpath, file - fullpath); 918 file++; 919 920 if ( ! validate_manpath(req, manpath)) { 921 pg_error_badrequest( 922 "You specified an invalid manpath."); 923 free(manpath); 924 return; 925 } 926 927 /* 928 * Begin by chdir()ing into the manpath. 929 * This way we can pick up the database files, which are 930 * relative to the manpath root. 931 */ 932 933 if (chdir(manpath) == -1) { 934 warn("chdir %s", manpath); 935 pg_error_internal(); 936 free(manpath); 937 return; 938 } 939 free(manpath); 940 941 if ( ! validate_filename(file)) { 942 pg_error_badrequest( 943 "You specified an invalid manual file."); 944 return; 945 } 946 947 resp_begin_html(200, NULL, file); 948 resp_searchform(req, FOCUS_NONE); 949 resp_show(req, file); 950 resp_end_html(); 951 } 952 953 static void 954 pg_search(const struct req *req) 955 { 956 struct mansearch search; 957 struct manpaths paths; 958 struct manpage *res; 959 char **argv; 960 char *query, *rp, *wp; 961 size_t ressz; 962 int argc; 963 964 /* 965 * Begin by chdir()ing into the root of the manpath. 966 * This way we can pick up the database files, which are 967 * relative to the manpath root. 968 */ 969 970 if (chdir(req->q.manpath) == -1) { 971 warn("chdir %s", req->q.manpath); 972 pg_error_internal(); 973 return; 974 } 975 976 search.arch = req->q.arch; 977 search.sec = req->q.sec; 978 search.outkey = "Nd"; 979 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 980 search.firstmatch = 1; 981 982 paths.sz = 1; 983 paths.paths = mandoc_malloc(sizeof(char *)); 984 paths.paths[0] = mandoc_strdup("."); 985 986 /* 987 * Break apart at spaces with backslash-escaping. 988 */ 989 990 argc = 0; 991 argv = NULL; 992 rp = query = mandoc_strdup(req->q.query); 993 for (;;) { 994 while (isspace((unsigned char)*rp)) 995 rp++; 996 if (*rp == '\0') 997 break; 998 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 999 argv[argc++] = wp = rp; 1000 for (;;) { 1001 if (isspace((unsigned char)*rp)) { 1002 *wp = '\0'; 1003 rp++; 1004 break; 1005 } 1006 if (rp[0] == '\\' && rp[1] != '\0') 1007 rp++; 1008 if (wp != rp) 1009 *wp = *rp; 1010 if (*rp == '\0') 1011 break; 1012 wp++; 1013 rp++; 1014 } 1015 } 1016 1017 res = NULL; 1018 ressz = 0; 1019 if (req->isquery && req->q.equal && argc == 1) 1020 pg_redirect(req, argv[0]); 1021 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0) 1022 pg_noresult(req, 400, "Bad Request", 1023 "You entered an invalid query."); 1024 else if (ressz == 0) 1025 pg_noresult(req, 404, "Not Found", "No results found."); 1026 else 1027 pg_searchres(req, res, ressz); 1028 1029 free(query); 1030 mansearch_free(res, ressz); 1031 free(paths.paths[0]); 1032 free(paths.paths); 1033 } 1034 1035 int 1036 main(void) 1037 { 1038 struct req req; 1039 struct itimerval itimer; 1040 const char *path; 1041 const char *querystring; 1042 int i; 1043 1044 /* 1045 * The "rpath" pledge could be revoked after mparse_readfd() 1046 * if the file desciptor to "/footer.html" would be opened 1047 * up front, but it's probably not worth the complication 1048 * of the code it would cause: it would require scattering 1049 * pledge() calls in multiple low-level resp_*() functions. 1050 */ 1051 1052 if (pledge("stdio rpath", NULL) == -1) { 1053 warn("pledge"); 1054 pg_error_internal(); 1055 return EXIT_FAILURE; 1056 } 1057 1058 /* Poor man's ReDoS mitigation. */ 1059 1060 itimer.it_value.tv_sec = 2; 1061 itimer.it_value.tv_usec = 0; 1062 itimer.it_interval.tv_sec = 2; 1063 itimer.it_interval.tv_usec = 0; 1064 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 1065 warn("setitimer"); 1066 pg_error_internal(); 1067 return EXIT_FAILURE; 1068 } 1069 1070 /* 1071 * First we change directory into the MAN_DIR so that 1072 * subsequent scanning for manpath directories is rooted 1073 * relative to the same position. 1074 */ 1075 1076 if (chdir(MAN_DIR) == -1) { 1077 warn("MAN_DIR: %s", MAN_DIR); 1078 pg_error_internal(); 1079 return EXIT_FAILURE; 1080 } 1081 1082 memset(&req, 0, sizeof(struct req)); 1083 req.q.equal = 1; 1084 parse_manpath_conf(&req); 1085 1086 /* Parse the path info and the query string. */ 1087 1088 if ((path = getenv("PATH_INFO")) == NULL) 1089 path = ""; 1090 else if (*path == '/') 1091 path++; 1092 1093 if (*path != '\0') { 1094 parse_path_info(&req, path); 1095 if (req.q.manpath == NULL || req.q.sec == NULL || 1096 *req.q.query == '\0' || access(path, F_OK) == -1) 1097 path = ""; 1098 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1099 parse_query_string(&req, querystring); 1100 1101 /* Validate parsed data and add defaults. */ 1102 1103 if (req.q.manpath == NULL) 1104 req.q.manpath = mandoc_strdup(req.p[0]); 1105 else if ( ! validate_manpath(&req, req.q.manpath)) { 1106 pg_error_badrequest( 1107 "You specified an invalid manpath."); 1108 return EXIT_FAILURE; 1109 } 1110 1111 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) { 1112 pg_error_badrequest( 1113 "You specified an invalid architecture."); 1114 return EXIT_FAILURE; 1115 } 1116 1117 /* Dispatch to the three different pages. */ 1118 1119 if ('\0' != *path) 1120 pg_show(&req, path); 1121 else if (NULL != req.q.query) 1122 pg_search(&req); 1123 else 1124 pg_index(&req); 1125 1126 free(req.q.manpath); 1127 free(req.q.arch); 1128 free(req.q.sec); 1129 free(req.q.query); 1130 for (i = 0; i < (int)req.psz; i++) 1131 free(req.p[i]); 1132 free(req.p); 1133 return EXIT_SUCCESS; 1134 } 1135 1136 /* 1137 * Translate PATH_INFO to a query. 1138 */ 1139 static void 1140 parse_path_info(struct req *req, const char *path) 1141 { 1142 const char *name, *sec, *end; 1143 1144 req->isquery = 0; 1145 req->q.equal = 1; 1146 req->q.manpath = NULL; 1147 req->q.arch = NULL; 1148 1149 /* Mandatory manual page name. */ 1150 if ((name = strrchr(path, '/')) == NULL) 1151 name = path; 1152 else 1153 name++; 1154 1155 /* Optional trailing section. */ 1156 sec = strrchr(name, '.'); 1157 if (sec != NULL && isdigit((unsigned char)*++sec)) { 1158 req->q.query = mandoc_strndup(name, sec - name - 1); 1159 req->q.sec = mandoc_strdup(sec); 1160 } else { 1161 req->q.query = mandoc_strdup(name); 1162 req->q.sec = NULL; 1163 } 1164 1165 /* Handle the case of name[.section] only. */ 1166 if (name == path) 1167 return; 1168 1169 /* Optional manpath. */ 1170 end = strchr(path, '/'); 1171 req->q.manpath = mandoc_strndup(path, end - path); 1172 if (validate_manpath(req, req->q.manpath)) { 1173 path = end + 1; 1174 if (name == path) 1175 return; 1176 } else { 1177 free(req->q.manpath); 1178 req->q.manpath = NULL; 1179 } 1180 1181 /* Optional section. */ 1182 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) { 1183 path += 3; 1184 end = strchr(path, '/'); 1185 free(req->q.sec); 1186 req->q.sec = mandoc_strndup(path, end - path); 1187 path = end + 1; 1188 if (name == path) 1189 return; 1190 } 1191 1192 /* Optional architecture. */ 1193 end = strchr(path, '/'); 1194 if (end + 1 != name) { 1195 pg_error_badrequest( 1196 "You specified too many directory components."); 1197 exit(EXIT_FAILURE); 1198 } 1199 req->q.arch = mandoc_strndup(path, end - path); 1200 if (validate_arch(req->q.arch) == 0) { 1201 pg_error_badrequest( 1202 "You specified an invalid directory component."); 1203 exit(EXIT_FAILURE); 1204 } 1205 } 1206 1207 /* 1208 * Scan for indexable paths. 1209 */ 1210 static void 1211 parse_manpath_conf(struct req *req) 1212 { 1213 FILE *fp; 1214 char *dp; 1215 size_t dpsz; 1216 ssize_t len; 1217 1218 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1219 warn("%s/manpath.conf", MAN_DIR); 1220 pg_error_internal(); 1221 exit(EXIT_FAILURE); 1222 } 1223 1224 dp = NULL; 1225 dpsz = 0; 1226 1227 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1228 if (dp[len - 1] == '\n') 1229 dp[--len] = '\0'; 1230 req->p = mandoc_realloc(req->p, 1231 (req->psz + 1) * sizeof(char *)); 1232 if ( ! validate_urifrag(dp)) { 1233 warnx("%s/manpath.conf contains " 1234 "unsafe path \"%s\"", MAN_DIR, dp); 1235 pg_error_internal(); 1236 exit(EXIT_FAILURE); 1237 } 1238 if (strchr(dp, '/') != NULL) { 1239 warnx("%s/manpath.conf contains " 1240 "path with slash \"%s\"", MAN_DIR, dp); 1241 pg_error_internal(); 1242 exit(EXIT_FAILURE); 1243 } 1244 req->p[req->psz++] = dp; 1245 dp = NULL; 1246 dpsz = 0; 1247 } 1248 free(dp); 1249 1250 if (req->p == NULL) { 1251 warnx("%s/manpath.conf is empty", MAN_DIR); 1252 pg_error_internal(); 1253 exit(EXIT_FAILURE); 1254 } 1255 } 1256