1 /* $OpenBSD: cgi.c,v 1.94 2017/06/24 14:38:27 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 #include <sys/time.h> 20 21 #include <ctype.h> 22 #include <err.h> 23 #include <errno.h> 24 #include <fcntl.h> 25 #include <limits.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <unistd.h> 31 32 #include "mandoc_aux.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "mdoc.h" 36 #include "man.h" 37 #include "main.h" 38 #include "manconf.h" 39 #include "mansearch.h" 40 #include "cgi.h" 41 42 /* 43 * A query as passed to the search function. 44 */ 45 struct query { 46 char *manpath; /* desired manual directory */ 47 char *arch; /* architecture */ 48 char *sec; /* manual section */ 49 char *query; /* unparsed query expression */ 50 int equal; /* match whole names, not substrings */ 51 }; 52 53 struct req { 54 struct query q; 55 char **p; /* array of available manpaths */ 56 size_t psz; /* number of available manpaths */ 57 int isquery; /* QUERY_STRING used, not PATH_INFO */ 58 }; 59 60 enum focus { 61 FOCUS_NONE = 0, 62 FOCUS_QUERY 63 }; 64 65 static void html_print(const char *); 66 static void html_putchar(char); 67 static int http_decode(char *); 68 static void parse_manpath_conf(struct req *); 69 static void parse_path_info(struct req *req, const char *path); 70 static void parse_query_string(struct req *, const char *); 71 static void pg_error_badrequest(const char *); 72 static void pg_error_internal(void); 73 static void pg_index(const struct req *); 74 static void pg_noresult(const struct req *, const char *); 75 static void pg_redirect(const struct req *, const char *); 76 static void pg_search(const struct req *); 77 static void pg_searchres(const struct req *, 78 struct manpage *, size_t); 79 static void pg_show(struct req *, const char *); 80 static void resp_begin_html(int, const char *, const char *); 81 static void resp_begin_http(int, const char *); 82 static void resp_catman(const struct req *, const char *); 83 static void resp_copy(const char *); 84 static void resp_end_html(void); 85 static void resp_format(const struct req *, const char *); 86 static void resp_searchform(const struct req *, enum focus); 87 static void resp_show(const struct req *, const char *); 88 static void set_query_attr(char **, char **); 89 static int validate_filename(const char *); 90 static int validate_manpath(const struct req *, const char *); 91 static int validate_urifrag(const char *); 92 93 static const char *scriptname = SCRIPT_NAME; 94 95 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 96 static const char *const sec_numbers[] = { 97 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 98 }; 99 static const char *const sec_names[] = { 100 "All Sections", 101 "1 - General Commands", 102 "2 - System Calls", 103 "3 - Library Functions", 104 "3p - Perl Library", 105 "4 - Device Drivers", 106 "5 - File Formats", 107 "6 - Games", 108 "7 - Miscellaneous Information", 109 "8 - System Manager\'s Manual", 110 "9 - Kernel Developer\'s Manual" 111 }; 112 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 113 114 static const char *const arch_names[] = { 115 "amd64", "alpha", "armv7", "arm64", 116 "hppa", "i386", "landisk", 117 "loongson", "luna88k", "macppc", "mips64", 118 "octeon", "sgi", "socppc", "sparc64", 119 "amiga", "arc", "armish", "arm32", 120 "atari", "aviion", "beagle", "cats", 121 "hppa64", "hp300", 122 "ia64", "mac68k", "mvme68k", "mvme88k", 123 "mvmeppc", "palm", "pc532", "pegasos", 124 "pmax", "powerpc", "solbourne", "sparc", 125 "sun3", "vax", "wgrisc", "x68k", 126 "zaurus" 127 }; 128 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 129 130 /* 131 * Print a character, escaping HTML along the way. 132 * This will pass non-ASCII straight to output: be warned! 133 */ 134 static void 135 html_putchar(char c) 136 { 137 138 switch (c) { 139 case '"': 140 printf("""); 141 break; 142 case '&': 143 printf("&"); 144 break; 145 case '>': 146 printf(">"); 147 break; 148 case '<': 149 printf("<"); 150 break; 151 default: 152 putchar((unsigned char)c); 153 break; 154 } 155 } 156 157 /* 158 * Call through to html_putchar(). 159 * Accepts NULL strings. 160 */ 161 static void 162 html_print(const char *p) 163 { 164 165 if (NULL == p) 166 return; 167 while ('\0' != *p) 168 html_putchar(*p++); 169 } 170 171 /* 172 * Transfer the responsibility for the allocated string *val 173 * to the query structure. 174 */ 175 static void 176 set_query_attr(char **attr, char **val) 177 { 178 179 free(*attr); 180 if (**val == '\0') { 181 *attr = NULL; 182 free(*val); 183 } else 184 *attr = *val; 185 *val = NULL; 186 } 187 188 /* 189 * Parse the QUERY_STRING for key-value pairs 190 * and store the values into the query structure. 191 */ 192 static void 193 parse_query_string(struct req *req, const char *qs) 194 { 195 char *key, *val; 196 size_t keysz, valsz; 197 198 req->isquery = 1; 199 req->q.manpath = NULL; 200 req->q.arch = NULL; 201 req->q.sec = NULL; 202 req->q.query = NULL; 203 req->q.equal = 1; 204 205 key = val = NULL; 206 while (*qs != '\0') { 207 208 /* Parse one key. */ 209 210 keysz = strcspn(qs, "=;&"); 211 key = mandoc_strndup(qs, keysz); 212 qs += keysz; 213 if (*qs != '=') 214 goto next; 215 216 /* Parse one value. */ 217 218 valsz = strcspn(++qs, ";&"); 219 val = mandoc_strndup(qs, valsz); 220 qs += valsz; 221 222 /* Decode and catch encoding errors. */ 223 224 if ( ! (http_decode(key) && http_decode(val))) 225 goto next; 226 227 /* Handle key-value pairs. */ 228 229 if ( ! strcmp(key, "query")) 230 set_query_attr(&req->q.query, &val); 231 232 else if ( ! strcmp(key, "apropos")) 233 req->q.equal = !strcmp(val, "0"); 234 235 else if ( ! strcmp(key, "manpath")) { 236 #ifdef COMPAT_OLDURI 237 if ( ! strncmp(val, "OpenBSD ", 8)) { 238 val[7] = '-'; 239 if ('C' == val[8]) 240 val[8] = 'c'; 241 } 242 #endif 243 set_query_attr(&req->q.manpath, &val); 244 } 245 246 else if ( ! (strcmp(key, "sec") 247 #ifdef COMPAT_OLDURI 248 && strcmp(key, "sektion") 249 #endif 250 )) { 251 if ( ! strcmp(val, "0")) 252 *val = '\0'; 253 set_query_attr(&req->q.sec, &val); 254 } 255 256 else if ( ! strcmp(key, "arch")) { 257 if ( ! strcmp(val, "default")) 258 *val = '\0'; 259 set_query_attr(&req->q.arch, &val); 260 } 261 262 /* 263 * The key must be freed in any case. 264 * The val may have been handed over to the query 265 * structure, in which case it is now NULL. 266 */ 267 next: 268 free(key); 269 key = NULL; 270 free(val); 271 val = NULL; 272 273 if (*qs != '\0') 274 qs++; 275 } 276 } 277 278 /* 279 * HTTP-decode a string. The standard explanation is that this turns 280 * "%4e+foo" into "n foo" in the regular way. This is done in-place 281 * over the allocated string. 282 */ 283 static int 284 http_decode(char *p) 285 { 286 char hex[3]; 287 char *q; 288 int c; 289 290 hex[2] = '\0'; 291 292 q = p; 293 for ( ; '\0' != *p; p++, q++) { 294 if ('%' == *p) { 295 if ('\0' == (hex[0] = *(p + 1))) 296 return 0; 297 if ('\0' == (hex[1] = *(p + 2))) 298 return 0; 299 if (1 != sscanf(hex, "%x", &c)) 300 return 0; 301 if ('\0' == c) 302 return 0; 303 304 *q = (char)c; 305 p += 2; 306 } else 307 *q = '+' == *p ? ' ' : *p; 308 } 309 310 *q = '\0'; 311 return 1; 312 } 313 314 static void 315 resp_begin_http(int code, const char *msg) 316 { 317 318 if (200 != code) 319 printf("Status: %d %s\r\n", code, msg); 320 321 printf("Content-Type: text/html; charset=utf-8\r\n" 322 "Cache-Control: no-cache\r\n" 323 "Pragma: no-cache\r\n" 324 "\r\n"); 325 326 fflush(stdout); 327 } 328 329 static void 330 resp_copy(const char *filename) 331 { 332 char buf[4096]; 333 ssize_t sz; 334 int fd; 335 336 if ((fd = open(filename, O_RDONLY)) != -1) { 337 fflush(stdout); 338 while ((sz = read(fd, buf, sizeof(buf))) > 0) 339 write(STDOUT_FILENO, buf, sz); 340 close(fd); 341 } 342 } 343 344 static void 345 resp_begin_html(int code, const char *msg, const char *file) 346 { 347 char *cp; 348 349 resp_begin_http(code, msg); 350 351 printf("<!DOCTYPE html>\n" 352 "<html>\n" 353 "<head>\n" 354 " <meta charset=\"UTF-8\"/>\n" 355 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 356 " type=\"text/css\" media=\"all\">\n" 357 " <title>", 358 CSS_DIR); 359 if (file != NULL) { 360 if ((cp = strrchr(file, '/')) != NULL) 361 file = cp + 1; 362 if ((cp = strrchr(file, '.')) != NULL) { 363 printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1); 364 } else 365 printf("%s - ", file); 366 } 367 printf("%s</title>\n" 368 "</head>\n" 369 "<body>\n", 370 CUSTOMIZE_TITLE); 371 372 resp_copy(MAN_DIR "/header.html"); 373 } 374 375 static void 376 resp_end_html(void) 377 { 378 379 resp_copy(MAN_DIR "/footer.html"); 380 381 puts("</body>\n" 382 "</html>"); 383 } 384 385 static void 386 resp_searchform(const struct req *req, enum focus focus) 387 { 388 int i; 389 390 printf("<form action=\"/%s\" method=\"get\">\n" 391 " <fieldset>\n" 392 " <legend>Manual Page Search Parameters</legend>\n", 393 scriptname); 394 395 /* Write query input box. */ 396 397 printf(" <input type=\"text\" name=\"query\" value=\""); 398 if (req->q.query != NULL) 399 html_print(req->q.query); 400 printf( "\" size=\"40\""); 401 if (focus == FOCUS_QUERY) 402 printf(" autofocus"); 403 puts(">"); 404 405 /* Write submission buttons. */ 406 407 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">" 408 "man</button>\n" 409 " <button type=\"submit\" name=\"apropos\" value=\"1\">" 410 "apropos</button>\n" 411 " <br/>\n"); 412 413 /* Write section selector. */ 414 415 puts(" <select name=\"sec\">"); 416 for (i = 0; i < sec_MAX; i++) { 417 printf(" <option value=\"%s\"", sec_numbers[i]); 418 if (NULL != req->q.sec && 419 0 == strcmp(sec_numbers[i], req->q.sec)) 420 printf(" selected=\"selected\""); 421 printf(">%s</option>\n", sec_names[i]); 422 } 423 puts(" </select>"); 424 425 /* Write architecture selector. */ 426 427 printf( " <select name=\"arch\">\n" 428 " <option value=\"default\""); 429 if (NULL == req->q.arch) 430 printf(" selected=\"selected\""); 431 puts(">All Architectures</option>"); 432 for (i = 0; i < arch_MAX; i++) { 433 printf(" <option value=\"%s\"", arch_names[i]); 434 if (NULL != req->q.arch && 435 0 == strcmp(arch_names[i], req->q.arch)) 436 printf(" selected=\"selected\""); 437 printf(">%s</option>\n", arch_names[i]); 438 } 439 puts(" </select>"); 440 441 /* Write manpath selector. */ 442 443 if (req->psz > 1) { 444 puts(" <select name=\"manpath\">"); 445 for (i = 0; i < (int)req->psz; i++) { 446 printf(" <option "); 447 if (strcmp(req->q.manpath, req->p[i]) == 0) 448 printf("selected=\"selected\" "); 449 printf("value=\""); 450 html_print(req->p[i]); 451 printf("\">"); 452 html_print(req->p[i]); 453 puts("</option>"); 454 } 455 puts(" </select>"); 456 } 457 458 puts(" </fieldset>\n" 459 "</form>"); 460 } 461 462 static int 463 validate_urifrag(const char *frag) 464 { 465 466 while ('\0' != *frag) { 467 if ( ! (isalnum((unsigned char)*frag) || 468 '-' == *frag || '.' == *frag || 469 '/' == *frag || '_' == *frag)) 470 return 0; 471 frag++; 472 } 473 return 1; 474 } 475 476 static int 477 validate_manpath(const struct req *req, const char* manpath) 478 { 479 size_t i; 480 481 for (i = 0; i < req->psz; i++) 482 if ( ! strcmp(manpath, req->p[i])) 483 return 1; 484 485 return 0; 486 } 487 488 static int 489 validate_filename(const char *file) 490 { 491 492 if ('.' == file[0] && '/' == file[1]) 493 file += 2; 494 495 return ! (strstr(file, "../") || strstr(file, "/..") || 496 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 497 } 498 499 static void 500 pg_index(const struct req *req) 501 { 502 503 resp_begin_html(200, NULL, NULL); 504 resp_searchform(req, FOCUS_QUERY); 505 printf("<p>\n" 506 "This web interface is documented in the\n" 507 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n" 508 "manual, and the\n" 509 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n" 510 "manual explains the query syntax.\n" 511 "</p>\n", 512 scriptname, *scriptname == '\0' ? "" : "/", 513 scriptname, *scriptname == '\0' ? "" : "/"); 514 resp_end_html(); 515 } 516 517 static void 518 pg_noresult(const struct req *req, const char *msg) 519 { 520 resp_begin_html(200, NULL, NULL); 521 resp_searchform(req, FOCUS_QUERY); 522 puts("<p>"); 523 puts(msg); 524 puts("</p>"); 525 resp_end_html(); 526 } 527 528 static void 529 pg_error_badrequest(const char *msg) 530 { 531 532 resp_begin_html(400, "Bad Request", NULL); 533 puts("<h1>Bad Request</h1>\n" 534 "<p>\n"); 535 puts(msg); 536 printf("Try again from the\n" 537 "<a href=\"/%s\">main page</a>.\n" 538 "</p>", scriptname); 539 resp_end_html(); 540 } 541 542 static void 543 pg_error_internal(void) 544 { 545 resp_begin_html(500, "Internal Server Error", NULL); 546 puts("<p>Internal Server Error</p>"); 547 resp_end_html(); 548 } 549 550 static void 551 pg_redirect(const struct req *req, const char *name) 552 { 553 printf("Status: 303 See Other\r\n" 554 "Location: /"); 555 if (*scriptname != '\0') 556 printf("%s/", scriptname); 557 if (strcmp(req->q.manpath, req->p[0])) 558 printf("%s/", req->q.manpath); 559 if (req->q.arch != NULL) 560 printf("%s/", req->q.arch); 561 printf("%s", name); 562 if (req->q.sec != NULL) 563 printf(".%s", req->q.sec); 564 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n"); 565 } 566 567 static void 568 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 569 { 570 char *arch, *archend; 571 const char *sec; 572 size_t i, iuse; 573 int archprio, archpriouse; 574 int prio, priouse; 575 576 for (i = 0; i < sz; i++) { 577 if (validate_filename(r[i].file)) 578 continue; 579 warnx("invalid filename %s in %s database", 580 r[i].file, req->q.manpath); 581 pg_error_internal(); 582 return; 583 } 584 585 if (req->isquery && sz == 1) { 586 /* 587 * If we have just one result, then jump there now 588 * without any delay. 589 */ 590 printf("Status: 303 See Other\r\n" 591 "Location: /"); 592 if (*scriptname != '\0') 593 printf("%s/", scriptname); 594 if (strcmp(req->q.manpath, req->p[0])) 595 printf("%s/", req->q.manpath); 596 printf("%s\r\n" 597 "Content-Type: text/html; charset=utf-8\r\n\r\n", 598 r[0].file); 599 return; 600 } 601 602 /* 603 * In man(1) mode, show one of the pages 604 * even if more than one is found. 605 */ 606 607 iuse = 0; 608 if (req->q.equal || sz == 1) { 609 priouse = 20; 610 archpriouse = 3; 611 for (i = 0; i < sz; i++) { 612 sec = r[i].file; 613 sec += strcspn(sec, "123456789"); 614 if (sec[0] == '\0') 615 continue; 616 prio = sec_prios[sec[0] - '1']; 617 if (sec[1] != '/') 618 prio += 10; 619 if (req->q.arch == NULL) { 620 archprio = 621 ((arch = strchr(sec + 1, '/')) 622 == NULL) ? 3 : 623 ((archend = strchr(arch + 1, '/')) 624 == NULL) ? 0 : 625 strncmp(arch, "amd64/", 626 archend - arch) ? 2 : 1; 627 if (archprio < archpriouse) { 628 archpriouse = archprio; 629 priouse = prio; 630 iuse = i; 631 continue; 632 } 633 if (archprio > archpriouse) 634 continue; 635 } 636 if (prio >= priouse) 637 continue; 638 priouse = prio; 639 iuse = i; 640 } 641 resp_begin_html(200, NULL, r[iuse].file); 642 } else 643 resp_begin_html(200, NULL, NULL); 644 645 resp_searchform(req, 646 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 647 648 if (sz > 1) { 649 puts("<table class=\"results\">"); 650 for (i = 0; i < sz; i++) { 651 printf(" <tr>\n" 652 " <td>" 653 "<a class=\"Xr\" href=\"/"); 654 if (*scriptname != '\0') 655 printf("%s/", scriptname); 656 if (strcmp(req->q.manpath, req->p[0])) 657 printf("%s/", req->q.manpath); 658 printf("%s\">", r[i].file); 659 html_print(r[i].names); 660 printf("</a></td>\n" 661 " <td><span class=\"Nd\">"); 662 html_print(r[i].output); 663 puts("</span></td>\n" 664 " </tr>"); 665 } 666 puts("</table>"); 667 } 668 669 if (req->q.equal || sz == 1) { 670 puts("<hr>"); 671 resp_show(req, r[iuse].file); 672 } 673 674 resp_end_html(); 675 } 676 677 static void 678 resp_catman(const struct req *req, const char *file) 679 { 680 FILE *f; 681 char *p; 682 size_t sz; 683 ssize_t len; 684 int i; 685 int italic, bold; 686 687 if ((f = fopen(file, "r")) == NULL) { 688 puts("<p>You specified an invalid manual file.</p>"); 689 return; 690 } 691 692 puts("<div class=\"catman\">\n" 693 "<pre>"); 694 695 p = NULL; 696 sz = 0; 697 698 while ((len = getline(&p, &sz, f)) != -1) { 699 bold = italic = 0; 700 for (i = 0; i < len - 1; i++) { 701 /* 702 * This means that the catpage is out of state. 703 * Ignore it and keep going (although the 704 * catpage is bogus). 705 */ 706 707 if ('\b' == p[i] || '\n' == p[i]) 708 continue; 709 710 /* 711 * Print a regular character. 712 * Close out any bold/italic scopes. 713 * If we're in back-space mode, make sure we'll 714 * have something to enter when we backspace. 715 */ 716 717 if ('\b' != p[i + 1]) { 718 if (italic) 719 printf("</i>"); 720 if (bold) 721 printf("</b>"); 722 italic = bold = 0; 723 html_putchar(p[i]); 724 continue; 725 } else if (i + 2 >= len) 726 continue; 727 728 /* Italic mode. */ 729 730 if ('_' == p[i]) { 731 if (bold) 732 printf("</b>"); 733 if ( ! italic) 734 printf("<i>"); 735 bold = 0; 736 italic = 1; 737 i += 2; 738 html_putchar(p[i]); 739 continue; 740 } 741 742 /* 743 * Handle funny behaviour troff-isms. 744 * These grok'd from the original man2html.c. 745 */ 746 747 if (('+' == p[i] && 'o' == p[i + 2]) || 748 ('o' == p[i] && '+' == p[i + 2]) || 749 ('|' == p[i] && '=' == p[i + 2]) || 750 ('=' == p[i] && '|' == p[i + 2]) || 751 ('*' == p[i] && '=' == p[i + 2]) || 752 ('=' == p[i] && '*' == p[i + 2]) || 753 ('*' == p[i] && '|' == p[i + 2]) || 754 ('|' == p[i] && '*' == p[i + 2])) { 755 if (italic) 756 printf("</i>"); 757 if (bold) 758 printf("</b>"); 759 italic = bold = 0; 760 putchar('*'); 761 i += 2; 762 continue; 763 } else if (('|' == p[i] && '-' == p[i + 2]) || 764 ('-' == p[i] && '|' == p[i + 1]) || 765 ('+' == p[i] && '-' == p[i + 1]) || 766 ('-' == p[i] && '+' == p[i + 1]) || 767 ('+' == p[i] && '|' == p[i + 1]) || 768 ('|' == p[i] && '+' == p[i + 1])) { 769 if (italic) 770 printf("</i>"); 771 if (bold) 772 printf("</b>"); 773 italic = bold = 0; 774 putchar('+'); 775 i += 2; 776 continue; 777 } 778 779 /* Bold mode. */ 780 781 if (italic) 782 printf("</i>"); 783 if ( ! bold) 784 printf("<b>"); 785 bold = 1; 786 italic = 0; 787 i += 2; 788 html_putchar(p[i]); 789 } 790 791 /* 792 * Clean up the last character. 793 * We can get to a newline; don't print that. 794 */ 795 796 if (italic) 797 printf("</i>"); 798 if (bold) 799 printf("</b>"); 800 801 if (i == len - 1 && p[i] != '\n') 802 html_putchar(p[i]); 803 804 putchar('\n'); 805 } 806 free(p); 807 808 puts("</pre>\n" 809 "</div>"); 810 811 fclose(f); 812 } 813 814 static void 815 resp_format(const struct req *req, const char *file) 816 { 817 struct manoutput conf; 818 struct mparse *mp; 819 struct roff_man *man; 820 void *vp; 821 int fd; 822 int usepath; 823 824 if (-1 == (fd = open(file, O_RDONLY, 0))) { 825 puts("<p>You specified an invalid manual file.</p>"); 826 return; 827 } 828 829 mchars_alloc(); 830 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1, 831 MANDOCERR_MAX, NULL, MANDOC_OS_OTHER, req->q.manpath); 832 mparse_readfd(mp, fd, file); 833 close(fd); 834 835 memset(&conf, 0, sizeof(conf)); 836 conf.fragment = 1; 837 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css"); 838 usepath = strcmp(req->q.manpath, req->p[0]); 839 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S", 840 scriptname, *scriptname == '\0' ? "" : "/", 841 usepath ? req->q.manpath : "", usepath ? "/" : ""); 842 843 mparse_result(mp, &man, NULL); 844 if (man == NULL) { 845 warnx("fatal mandoc error: %s/%s", req->q.manpath, file); 846 pg_error_internal(); 847 mparse_free(mp); 848 mchars_free(); 849 return; 850 } 851 852 vp = html_alloc(&conf); 853 854 if (man->macroset == MACROSET_MDOC) { 855 mdoc_validate(man); 856 html_mdoc(vp, man); 857 } else { 858 man_validate(man); 859 html_man(vp, man); 860 } 861 862 html_free(vp); 863 mparse_free(mp); 864 mchars_free(); 865 free(conf.man); 866 free(conf.style); 867 } 868 869 static void 870 resp_show(const struct req *req, const char *file) 871 { 872 873 if ('.' == file[0] && '/' == file[1]) 874 file += 2; 875 876 if ('c' == *file) 877 resp_catman(req, file); 878 else 879 resp_format(req, file); 880 } 881 882 static void 883 pg_show(struct req *req, const char *fullpath) 884 { 885 char *manpath; 886 const char *file; 887 888 if ((file = strchr(fullpath, '/')) == NULL) { 889 pg_error_badrequest( 890 "You did not specify a page to show."); 891 return; 892 } 893 manpath = mandoc_strndup(fullpath, file - fullpath); 894 file++; 895 896 if ( ! validate_manpath(req, manpath)) { 897 pg_error_badrequest( 898 "You specified an invalid manpath."); 899 free(manpath); 900 return; 901 } 902 903 /* 904 * Begin by chdir()ing into the manpath. 905 * This way we can pick up the database files, which are 906 * relative to the manpath root. 907 */ 908 909 if (chdir(manpath) == -1) { 910 warn("chdir %s", manpath); 911 pg_error_internal(); 912 free(manpath); 913 return; 914 } 915 free(manpath); 916 917 if ( ! validate_filename(file)) { 918 pg_error_badrequest( 919 "You specified an invalid manual file."); 920 return; 921 } 922 923 resp_begin_html(200, NULL, file); 924 resp_searchform(req, FOCUS_NONE); 925 resp_show(req, file); 926 resp_end_html(); 927 } 928 929 static void 930 pg_search(const struct req *req) 931 { 932 struct mansearch search; 933 struct manpaths paths; 934 struct manpage *res; 935 char **argv; 936 char *query, *rp, *wp; 937 size_t ressz; 938 int argc; 939 940 /* 941 * Begin by chdir()ing into the root of the manpath. 942 * This way we can pick up the database files, which are 943 * relative to the manpath root. 944 */ 945 946 if (chdir(req->q.manpath) == -1) { 947 warn("chdir %s", req->q.manpath); 948 pg_error_internal(); 949 return; 950 } 951 952 search.arch = req->q.arch; 953 search.sec = req->q.sec; 954 search.outkey = "Nd"; 955 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 956 search.firstmatch = 1; 957 958 paths.sz = 1; 959 paths.paths = mandoc_malloc(sizeof(char *)); 960 paths.paths[0] = mandoc_strdup("."); 961 962 /* 963 * Break apart at spaces with backslash-escaping. 964 */ 965 966 argc = 0; 967 argv = NULL; 968 rp = query = mandoc_strdup(req->q.query); 969 for (;;) { 970 while (isspace((unsigned char)*rp)) 971 rp++; 972 if (*rp == '\0') 973 break; 974 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 975 argv[argc++] = wp = rp; 976 for (;;) { 977 if (isspace((unsigned char)*rp)) { 978 *wp = '\0'; 979 rp++; 980 break; 981 } 982 if (rp[0] == '\\' && rp[1] != '\0') 983 rp++; 984 if (wp != rp) 985 *wp = *rp; 986 if (*rp == '\0') 987 break; 988 wp++; 989 rp++; 990 } 991 } 992 993 res = NULL; 994 ressz = 0; 995 if (req->isquery && req->q.equal && argc == 1) 996 pg_redirect(req, argv[0]); 997 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0) 998 pg_noresult(req, "You entered an invalid query."); 999 else if (ressz == 0) 1000 pg_noresult(req, "No results found."); 1001 else 1002 pg_searchres(req, res, ressz); 1003 1004 free(query); 1005 mansearch_free(res, ressz); 1006 free(paths.paths[0]); 1007 free(paths.paths); 1008 } 1009 1010 int 1011 main(void) 1012 { 1013 struct req req; 1014 struct itimerval itimer; 1015 const char *path; 1016 const char *querystring; 1017 int i; 1018 1019 /* 1020 * The "rpath" pledge could be revoked after mparse_readfd() 1021 * if the file desciptor to "/footer.html" would be opened 1022 * up front, but it's probably not worth the complication 1023 * of the code it would cause: it would require scattering 1024 * pledge() calls in multiple low-level resp_*() functions. 1025 */ 1026 1027 if (pledge("stdio rpath", NULL) == -1) { 1028 warn("pledge"); 1029 pg_error_internal(); 1030 return EXIT_FAILURE; 1031 } 1032 1033 /* Poor man's ReDoS mitigation. */ 1034 1035 itimer.it_value.tv_sec = 2; 1036 itimer.it_value.tv_usec = 0; 1037 itimer.it_interval.tv_sec = 2; 1038 itimer.it_interval.tv_usec = 0; 1039 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 1040 warn("setitimer"); 1041 pg_error_internal(); 1042 return EXIT_FAILURE; 1043 } 1044 1045 /* 1046 * First we change directory into the MAN_DIR so that 1047 * subsequent scanning for manpath directories is rooted 1048 * relative to the same position. 1049 */ 1050 1051 if (chdir(MAN_DIR) == -1) { 1052 warn("MAN_DIR: %s", MAN_DIR); 1053 pg_error_internal(); 1054 return EXIT_FAILURE; 1055 } 1056 1057 memset(&req, 0, sizeof(struct req)); 1058 req.q.equal = 1; 1059 parse_manpath_conf(&req); 1060 1061 /* Parse the path info and the query string. */ 1062 1063 if ((path = getenv("PATH_INFO")) == NULL) 1064 path = ""; 1065 else if (*path == '/') 1066 path++; 1067 1068 if (*path != '\0') { 1069 parse_path_info(&req, path); 1070 if (req.q.manpath == NULL || req.q.sec == NULL || 1071 *req.q.query == '\0' || access(path, F_OK) == -1) 1072 path = ""; 1073 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1074 parse_query_string(&req, querystring); 1075 1076 /* Validate parsed data and add defaults. */ 1077 1078 if (req.q.manpath == NULL) 1079 req.q.manpath = mandoc_strdup(req.p[0]); 1080 else if ( ! validate_manpath(&req, req.q.manpath)) { 1081 pg_error_badrequest( 1082 "You specified an invalid manpath."); 1083 return EXIT_FAILURE; 1084 } 1085 1086 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) { 1087 pg_error_badrequest( 1088 "You specified an invalid architecture."); 1089 return EXIT_FAILURE; 1090 } 1091 1092 /* Dispatch to the three different pages. */ 1093 1094 if ('\0' != *path) 1095 pg_show(&req, path); 1096 else if (NULL != req.q.query) 1097 pg_search(&req); 1098 else 1099 pg_index(&req); 1100 1101 free(req.q.manpath); 1102 free(req.q.arch); 1103 free(req.q.sec); 1104 free(req.q.query); 1105 for (i = 0; i < (int)req.psz; i++) 1106 free(req.p[i]); 1107 free(req.p); 1108 return EXIT_SUCCESS; 1109 } 1110 1111 /* 1112 * If PATH_INFO is not a file name, translate it to a query. 1113 */ 1114 static void 1115 parse_path_info(struct req *req, const char *path) 1116 { 1117 char *dir[4]; 1118 int i; 1119 1120 req->isquery = 0; 1121 req->q.equal = 1; 1122 req->q.manpath = mandoc_strdup(path); 1123 req->q.arch = NULL; 1124 1125 /* Mandatory manual page name. */ 1126 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) { 1127 req->q.query = req->q.manpath; 1128 req->q.manpath = NULL; 1129 } else 1130 *req->q.query++ = '\0'; 1131 1132 /* Optional trailing section. */ 1133 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) { 1134 if(isdigit((unsigned char)req->q.sec[1])) { 1135 *req->q.sec++ = '\0'; 1136 req->q.sec = mandoc_strdup(req->q.sec); 1137 } else 1138 req->q.sec = NULL; 1139 } 1140 1141 /* Handle the case of name[.section] only. */ 1142 if (req->q.manpath == NULL) 1143 return; 1144 req->q.query = mandoc_strdup(req->q.query); 1145 1146 /* Split directory components. */ 1147 dir[i = 0] = req->q.manpath; 1148 while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) { 1149 if (++i == 3) { 1150 pg_error_badrequest( 1151 "You specified too many directory components."); 1152 exit(EXIT_FAILURE); 1153 } 1154 *dir[i]++ = '\0'; 1155 } 1156 1157 /* Optional manpath. */ 1158 if ((i = validate_manpath(req, req->q.manpath)) == 0) 1159 req->q.manpath = NULL; 1160 else if (dir[1] == NULL) 1161 return; 1162 1163 /* Optional section. */ 1164 if (strncmp(dir[i], "man", 3) == 0) { 1165 free(req->q.sec); 1166 req->q.sec = mandoc_strdup(dir[i++] + 3); 1167 } 1168 if (dir[i] == NULL) { 1169 if (req->q.manpath == NULL) 1170 free(dir[0]); 1171 return; 1172 } 1173 if (dir[i + 1] != NULL) { 1174 pg_error_badrequest( 1175 "You specified an invalid directory component."); 1176 exit(EXIT_FAILURE); 1177 } 1178 1179 /* Optional architecture. */ 1180 if (i) { 1181 req->q.arch = mandoc_strdup(dir[i]); 1182 if (req->q.manpath == NULL) 1183 free(dir[0]); 1184 } else 1185 req->q.arch = dir[0]; 1186 } 1187 1188 /* 1189 * Scan for indexable paths. 1190 */ 1191 static void 1192 parse_manpath_conf(struct req *req) 1193 { 1194 FILE *fp; 1195 char *dp; 1196 size_t dpsz; 1197 ssize_t len; 1198 1199 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1200 warn("%s/manpath.conf", MAN_DIR); 1201 pg_error_internal(); 1202 exit(EXIT_FAILURE); 1203 } 1204 1205 dp = NULL; 1206 dpsz = 0; 1207 1208 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1209 if (dp[len - 1] == '\n') 1210 dp[--len] = '\0'; 1211 req->p = mandoc_realloc(req->p, 1212 (req->psz + 1) * sizeof(char *)); 1213 if ( ! validate_urifrag(dp)) { 1214 warnx("%s/manpath.conf contains " 1215 "unsafe path \"%s\"", MAN_DIR, dp); 1216 pg_error_internal(); 1217 exit(EXIT_FAILURE); 1218 } 1219 if (strchr(dp, '/') != NULL) { 1220 warnx("%s/manpath.conf contains " 1221 "path with slash \"%s\"", MAN_DIR, dp); 1222 pg_error_internal(); 1223 exit(EXIT_FAILURE); 1224 } 1225 req->p[req->psz++] = dp; 1226 dp = NULL; 1227 dpsz = 0; 1228 } 1229 free(dp); 1230 1231 if (req->p == NULL) { 1232 warnx("%s/manpath.conf is empty", MAN_DIR); 1233 pg_error_internal(); 1234 exit(EXIT_FAILURE); 1235 } 1236 } 1237