1 /* $OpenBSD: cgi.c,v 1.99 2018/10/19 21:10:00 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014, 2015, 2016, 2017, 2018 Ingo Schwarze <schwarze@usta.de> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 #include <sys/time.h> 20 21 #include <ctype.h> 22 #include <err.h> 23 #include <errno.h> 24 #include <fcntl.h> 25 #include <limits.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <unistd.h> 31 32 #include "mandoc_aux.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "mdoc.h" 36 #include "man.h" 37 #include "main.h" 38 #include "manconf.h" 39 #include "mansearch.h" 40 #include "cgi.h" 41 42 /* 43 * A query as passed to the search function. 44 */ 45 struct query { 46 char *manpath; /* desired manual directory */ 47 char *arch; /* architecture */ 48 char *sec; /* manual section */ 49 char *query; /* unparsed query expression */ 50 int equal; /* match whole names, not substrings */ 51 }; 52 53 struct req { 54 struct query q; 55 char **p; /* array of available manpaths */ 56 size_t psz; /* number of available manpaths */ 57 int isquery; /* QUERY_STRING used, not PATH_INFO */ 58 }; 59 60 enum focus { 61 FOCUS_NONE = 0, 62 FOCUS_QUERY 63 }; 64 65 static void html_print(const char *); 66 static void html_putchar(char); 67 static int http_decode(char *); 68 static void http_encode(const char *p); 69 static void parse_manpath_conf(struct req *); 70 static void parse_path_info(struct req *req, const char *path); 71 static void parse_query_string(struct req *, const char *); 72 static void pg_error_badrequest(const char *); 73 static void pg_error_internal(void); 74 static void pg_index(const struct req *); 75 static void pg_noresult(const struct req *, const char *); 76 static void pg_redirect(const struct req *, const char *); 77 static void pg_search(const struct req *); 78 static void pg_searchres(const struct req *, 79 struct manpage *, size_t); 80 static void pg_show(struct req *, const char *); 81 static void resp_begin_html(int, const char *, const char *); 82 static void resp_begin_http(int, const char *); 83 static void resp_catman(const struct req *, const char *); 84 static void resp_copy(const char *); 85 static void resp_end_html(void); 86 static void resp_format(const struct req *, const char *); 87 static void resp_searchform(const struct req *, enum focus); 88 static void resp_show(const struct req *, const char *); 89 static void set_query_attr(char **, char **); 90 static int validate_arch(const char *); 91 static int validate_filename(const char *); 92 static int validate_manpath(const struct req *, const char *); 93 static int validate_urifrag(const char *); 94 95 static const char *scriptname = SCRIPT_NAME; 96 97 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 98 static const char *const sec_numbers[] = { 99 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 100 }; 101 static const char *const sec_names[] = { 102 "All Sections", 103 "1 - General Commands", 104 "2 - System Calls", 105 "3 - Library Functions", 106 "3p - Perl Library", 107 "4 - Device Drivers", 108 "5 - File Formats", 109 "6 - Games", 110 "7 - Miscellaneous Information", 111 "8 - System Manager\'s Manual", 112 "9 - Kernel Developer\'s Manual" 113 }; 114 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 115 116 static const char *const arch_names[] = { 117 "amd64", "alpha", "armv7", "arm64", 118 "hppa", "i386", "landisk", 119 "loongson", "luna88k", "macppc", "mips64", 120 "octeon", "sgi", "socppc", "sparc64", 121 "amiga", "arc", "armish", "arm32", 122 "atari", "aviion", "beagle", "cats", 123 "hppa64", "hp300", 124 "ia64", "mac68k", "mvme68k", "mvme88k", 125 "mvmeppc", "palm", "pc532", "pegasos", 126 "pmax", "powerpc", "solbourne", "sparc", 127 "sun3", "vax", "wgrisc", "x68k", 128 "zaurus" 129 }; 130 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 131 132 /* 133 * Print a character, escaping HTML along the way. 134 * This will pass non-ASCII straight to output: be warned! 135 */ 136 static void 137 html_putchar(char c) 138 { 139 140 switch (c) { 141 case '"': 142 printf("""); 143 break; 144 case '&': 145 printf("&"); 146 break; 147 case '>': 148 printf(">"); 149 break; 150 case '<': 151 printf("<"); 152 break; 153 default: 154 putchar((unsigned char)c); 155 break; 156 } 157 } 158 159 /* 160 * Call through to html_putchar(). 161 * Accepts NULL strings. 162 */ 163 static void 164 html_print(const char *p) 165 { 166 167 if (NULL == p) 168 return; 169 while ('\0' != *p) 170 html_putchar(*p++); 171 } 172 173 /* 174 * Transfer the responsibility for the allocated string *val 175 * to the query structure. 176 */ 177 static void 178 set_query_attr(char **attr, char **val) 179 { 180 181 free(*attr); 182 if (**val == '\0') { 183 *attr = NULL; 184 free(*val); 185 } else 186 *attr = *val; 187 *val = NULL; 188 } 189 190 /* 191 * Parse the QUERY_STRING for key-value pairs 192 * and store the values into the query structure. 193 */ 194 static void 195 parse_query_string(struct req *req, const char *qs) 196 { 197 char *key, *val; 198 size_t keysz, valsz; 199 200 req->isquery = 1; 201 req->q.manpath = NULL; 202 req->q.arch = NULL; 203 req->q.sec = NULL; 204 req->q.query = NULL; 205 req->q.equal = 1; 206 207 key = val = NULL; 208 while (*qs != '\0') { 209 210 /* Parse one key. */ 211 212 keysz = strcspn(qs, "=;&"); 213 key = mandoc_strndup(qs, keysz); 214 qs += keysz; 215 if (*qs != '=') 216 goto next; 217 218 /* Parse one value. */ 219 220 valsz = strcspn(++qs, ";&"); 221 val = mandoc_strndup(qs, valsz); 222 qs += valsz; 223 224 /* Decode and catch encoding errors. */ 225 226 if ( ! (http_decode(key) && http_decode(val))) 227 goto next; 228 229 /* Handle key-value pairs. */ 230 231 if ( ! strcmp(key, "query")) 232 set_query_attr(&req->q.query, &val); 233 234 else if ( ! strcmp(key, "apropos")) 235 req->q.equal = !strcmp(val, "0"); 236 237 else if ( ! strcmp(key, "manpath")) { 238 #ifdef COMPAT_OLDURI 239 if ( ! strncmp(val, "OpenBSD ", 8)) { 240 val[7] = '-'; 241 if ('C' == val[8]) 242 val[8] = 'c'; 243 } 244 #endif 245 set_query_attr(&req->q.manpath, &val); 246 } 247 248 else if ( ! (strcmp(key, "sec") 249 #ifdef COMPAT_OLDURI 250 && strcmp(key, "sektion") 251 #endif 252 )) { 253 if ( ! strcmp(val, "0")) 254 *val = '\0'; 255 set_query_attr(&req->q.sec, &val); 256 } 257 258 else if ( ! strcmp(key, "arch")) { 259 if ( ! strcmp(val, "default")) 260 *val = '\0'; 261 set_query_attr(&req->q.arch, &val); 262 } 263 264 /* 265 * The key must be freed in any case. 266 * The val may have been handed over to the query 267 * structure, in which case it is now NULL. 268 */ 269 next: 270 free(key); 271 key = NULL; 272 free(val); 273 val = NULL; 274 275 if (*qs != '\0') 276 qs++; 277 } 278 } 279 280 /* 281 * HTTP-decode a string. The standard explanation is that this turns 282 * "%4e+foo" into "n foo" in the regular way. This is done in-place 283 * over the allocated string. 284 */ 285 static int 286 http_decode(char *p) 287 { 288 char hex[3]; 289 char *q; 290 int c; 291 292 hex[2] = '\0'; 293 294 q = p; 295 for ( ; '\0' != *p; p++, q++) { 296 if ('%' == *p) { 297 if ('\0' == (hex[0] = *(p + 1))) 298 return 0; 299 if ('\0' == (hex[1] = *(p + 2))) 300 return 0; 301 if (1 != sscanf(hex, "%x", &c)) 302 return 0; 303 if ('\0' == c) 304 return 0; 305 306 *q = (char)c; 307 p += 2; 308 } else 309 *q = '+' == *p ? ' ' : *p; 310 } 311 312 *q = '\0'; 313 return 1; 314 } 315 316 static void 317 http_encode(const char *p) 318 { 319 for (; *p != '\0'; p++) { 320 if (isalnum((unsigned char)*p) == 0 && 321 strchr("-._~", *p) == NULL) 322 printf("%%%02.2X", (unsigned char)*p); 323 else 324 putchar(*p); 325 } 326 } 327 328 static void 329 resp_begin_http(int code, const char *msg) 330 { 331 332 if (200 != code) 333 printf("Status: %d %s\r\n", code, msg); 334 335 printf("Content-Type: text/html; charset=utf-8\r\n" 336 "Cache-Control: no-cache\r\n" 337 "Pragma: no-cache\r\n" 338 "\r\n"); 339 340 fflush(stdout); 341 } 342 343 static void 344 resp_copy(const char *filename) 345 { 346 char buf[4096]; 347 ssize_t sz; 348 int fd; 349 350 if ((fd = open(filename, O_RDONLY)) != -1) { 351 fflush(stdout); 352 while ((sz = read(fd, buf, sizeof(buf))) > 0) 353 write(STDOUT_FILENO, buf, sz); 354 close(fd); 355 } 356 } 357 358 static void 359 resp_begin_html(int code, const char *msg, const char *file) 360 { 361 char *cp; 362 363 resp_begin_http(code, msg); 364 365 printf("<!DOCTYPE html>\n" 366 "<html>\n" 367 "<head>\n" 368 " <meta charset=\"UTF-8\"/>\n" 369 " <meta name=\"viewport\"" 370 " content=\"width=device-width, initial-scale=1.0\">\n" 371 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 372 " type=\"text/css\" media=\"all\">\n" 373 " <title>", 374 CSS_DIR); 375 if (file != NULL) { 376 if ((cp = strrchr(file, '/')) != NULL) 377 file = cp + 1; 378 if ((cp = strrchr(file, '.')) != NULL) { 379 printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1); 380 } else 381 printf("%s - ", file); 382 } 383 printf("%s</title>\n" 384 "</head>\n" 385 "<body>\n", 386 CUSTOMIZE_TITLE); 387 388 resp_copy(MAN_DIR "/header.html"); 389 } 390 391 static void 392 resp_end_html(void) 393 { 394 395 resp_copy(MAN_DIR "/footer.html"); 396 397 puts("</body>\n" 398 "</html>"); 399 } 400 401 static void 402 resp_searchform(const struct req *req, enum focus focus) 403 { 404 int i; 405 406 printf("<form action=\"/%s\" method=\"get\">\n" 407 " <fieldset>\n" 408 " <legend>Manual Page Search Parameters</legend>\n", 409 scriptname); 410 411 /* Write query input box. */ 412 413 printf(" <input type=\"search\" name=\"query\" value=\""); 414 if (req->q.query != NULL) 415 html_print(req->q.query); 416 printf( "\" size=\"40\""); 417 if (focus == FOCUS_QUERY) 418 printf(" autofocus"); 419 puts(">"); 420 421 /* Write submission buttons. */ 422 423 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">" 424 "man</button>\n" 425 " <button type=\"submit\" name=\"apropos\" value=\"1\">" 426 "apropos</button>\n" 427 " <br/>\n"); 428 429 /* Write section selector. */ 430 431 puts(" <select name=\"sec\">"); 432 for (i = 0; i < sec_MAX; i++) { 433 printf(" <option value=\"%s\"", sec_numbers[i]); 434 if (NULL != req->q.sec && 435 0 == strcmp(sec_numbers[i], req->q.sec)) 436 printf(" selected=\"selected\""); 437 printf(">%s</option>\n", sec_names[i]); 438 } 439 puts(" </select>"); 440 441 /* Write architecture selector. */ 442 443 printf( " <select name=\"arch\">\n" 444 " <option value=\"default\""); 445 if (NULL == req->q.arch) 446 printf(" selected=\"selected\""); 447 puts(">All Architectures</option>"); 448 for (i = 0; i < arch_MAX; i++) { 449 printf(" <option"); 450 if (NULL != req->q.arch && 451 0 == strcmp(arch_names[i], req->q.arch)) 452 printf(" selected=\"selected\""); 453 printf(">%s</option>\n", arch_names[i]); 454 } 455 puts(" </select>"); 456 457 /* Write manpath selector. */ 458 459 if (req->psz > 1) { 460 puts(" <select name=\"manpath\">"); 461 for (i = 0; i < (int)req->psz; i++) { 462 printf(" <option"); 463 if (strcmp(req->q.manpath, req->p[i]) == 0) 464 printf(" selected=\"selected\""); 465 printf(">"); 466 html_print(req->p[i]); 467 puts("</option>"); 468 } 469 puts(" </select>"); 470 } 471 472 puts(" </fieldset>\n" 473 "</form>"); 474 } 475 476 static int 477 validate_urifrag(const char *frag) 478 { 479 480 while ('\0' != *frag) { 481 if ( ! (isalnum((unsigned char)*frag) || 482 '-' == *frag || '.' == *frag || 483 '/' == *frag || '_' == *frag)) 484 return 0; 485 frag++; 486 } 487 return 1; 488 } 489 490 static int 491 validate_manpath(const struct req *req, const char* manpath) 492 { 493 size_t i; 494 495 for (i = 0; i < req->psz; i++) 496 if ( ! strcmp(manpath, req->p[i])) 497 return 1; 498 499 return 0; 500 } 501 502 static int 503 validate_arch(const char *arch) 504 { 505 int i; 506 507 for (i = 0; i < arch_MAX; i++) 508 if (strcmp(arch, arch_names[i]) == 0) 509 return 1; 510 511 return 0; 512 } 513 514 static int 515 validate_filename(const char *file) 516 { 517 518 if ('.' == file[0] && '/' == file[1]) 519 file += 2; 520 521 return ! (strstr(file, "../") || strstr(file, "/..") || 522 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 523 } 524 525 static void 526 pg_index(const struct req *req) 527 { 528 529 resp_begin_html(200, NULL, NULL); 530 resp_searchform(req, FOCUS_QUERY); 531 printf("<p>\n" 532 "This web interface is documented in the\n" 533 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n" 534 "manual, and the\n" 535 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n" 536 "manual explains the query syntax.\n" 537 "</p>\n", 538 scriptname, *scriptname == '\0' ? "" : "/", 539 scriptname, *scriptname == '\0' ? "" : "/"); 540 resp_end_html(); 541 } 542 543 static void 544 pg_noresult(const struct req *req, const char *msg) 545 { 546 resp_begin_html(200, NULL, NULL); 547 resp_searchform(req, FOCUS_QUERY); 548 puts("<p>"); 549 puts(msg); 550 puts("</p>"); 551 resp_end_html(); 552 } 553 554 static void 555 pg_error_badrequest(const char *msg) 556 { 557 558 resp_begin_html(400, "Bad Request", NULL); 559 puts("<h1>Bad Request</h1>\n" 560 "<p>\n"); 561 puts(msg); 562 printf("Try again from the\n" 563 "<a href=\"/%s\">main page</a>.\n" 564 "</p>", scriptname); 565 resp_end_html(); 566 } 567 568 static void 569 pg_error_internal(void) 570 { 571 resp_begin_html(500, "Internal Server Error", NULL); 572 puts("<p>Internal Server Error</p>"); 573 resp_end_html(); 574 } 575 576 static void 577 pg_redirect(const struct req *req, const char *name) 578 { 579 printf("Status: 303 See Other\r\n" 580 "Location: /"); 581 if (*scriptname != '\0') 582 printf("%s/", scriptname); 583 if (strcmp(req->q.manpath, req->p[0])) 584 printf("%s/", req->q.manpath); 585 if (req->q.arch != NULL) 586 printf("%s/", req->q.arch); 587 http_encode(name); 588 if (req->q.sec != NULL) { 589 putchar('.'); 590 http_encode(req->q.sec); 591 } 592 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n"); 593 } 594 595 static void 596 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 597 { 598 char *arch, *archend; 599 const char *sec; 600 size_t i, iuse; 601 int archprio, archpriouse; 602 int prio, priouse; 603 604 for (i = 0; i < sz; i++) { 605 if (validate_filename(r[i].file)) 606 continue; 607 warnx("invalid filename %s in %s database", 608 r[i].file, req->q.manpath); 609 pg_error_internal(); 610 return; 611 } 612 613 if (req->isquery && sz == 1) { 614 /* 615 * If we have just one result, then jump there now 616 * without any delay. 617 */ 618 printf("Status: 303 See Other\r\n" 619 "Location: /"); 620 if (*scriptname != '\0') 621 printf("%s/", scriptname); 622 if (strcmp(req->q.manpath, req->p[0])) 623 printf("%s/", req->q.manpath); 624 printf("%s\r\n" 625 "Content-Type: text/html; charset=utf-8\r\n\r\n", 626 r[0].file); 627 return; 628 } 629 630 /* 631 * In man(1) mode, show one of the pages 632 * even if more than one is found. 633 */ 634 635 iuse = 0; 636 if (req->q.equal || sz == 1) { 637 priouse = 20; 638 archpriouse = 3; 639 for (i = 0; i < sz; i++) { 640 sec = r[i].file; 641 sec += strcspn(sec, "123456789"); 642 if (sec[0] == '\0') 643 continue; 644 prio = sec_prios[sec[0] - '1']; 645 if (sec[1] != '/') 646 prio += 10; 647 if (req->q.arch == NULL) { 648 archprio = 649 ((arch = strchr(sec + 1, '/')) 650 == NULL) ? 3 : 651 ((archend = strchr(arch + 1, '/')) 652 == NULL) ? 0 : 653 strncmp(arch, "amd64/", 654 archend - arch) ? 2 : 1; 655 if (archprio < archpriouse) { 656 archpriouse = archprio; 657 priouse = prio; 658 iuse = i; 659 continue; 660 } 661 if (archprio > archpriouse) 662 continue; 663 } 664 if (prio >= priouse) 665 continue; 666 priouse = prio; 667 iuse = i; 668 } 669 resp_begin_html(200, NULL, r[iuse].file); 670 } else 671 resp_begin_html(200, NULL, NULL); 672 673 resp_searchform(req, 674 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 675 676 if (sz > 1) { 677 puts("<table class=\"results\">"); 678 for (i = 0; i < sz; i++) { 679 printf(" <tr>\n" 680 " <td>" 681 "<a class=\"Xr\" href=\"/"); 682 if (*scriptname != '\0') 683 printf("%s/", scriptname); 684 if (strcmp(req->q.manpath, req->p[0])) 685 printf("%s/", req->q.manpath); 686 printf("%s\">", r[i].file); 687 html_print(r[i].names); 688 printf("</a></td>\n" 689 " <td><span class=\"Nd\">"); 690 html_print(r[i].output); 691 puts("</span></td>\n" 692 " </tr>"); 693 } 694 puts("</table>"); 695 } 696 697 if (req->q.equal || sz == 1) { 698 puts("<hr>"); 699 resp_show(req, r[iuse].file); 700 } 701 702 resp_end_html(); 703 } 704 705 static void 706 resp_catman(const struct req *req, const char *file) 707 { 708 FILE *f; 709 char *p; 710 size_t sz; 711 ssize_t len; 712 int i; 713 int italic, bold; 714 715 if ((f = fopen(file, "r")) == NULL) { 716 puts("<p>You specified an invalid manual file.</p>"); 717 return; 718 } 719 720 puts("<div class=\"catman\">\n" 721 "<pre>"); 722 723 p = NULL; 724 sz = 0; 725 726 while ((len = getline(&p, &sz, f)) != -1) { 727 bold = italic = 0; 728 for (i = 0; i < len - 1; i++) { 729 /* 730 * This means that the catpage is out of state. 731 * Ignore it and keep going (although the 732 * catpage is bogus). 733 */ 734 735 if ('\b' == p[i] || '\n' == p[i]) 736 continue; 737 738 /* 739 * Print a regular character. 740 * Close out any bold/italic scopes. 741 * If we're in back-space mode, make sure we'll 742 * have something to enter when we backspace. 743 */ 744 745 if ('\b' != p[i + 1]) { 746 if (italic) 747 printf("</i>"); 748 if (bold) 749 printf("</b>"); 750 italic = bold = 0; 751 html_putchar(p[i]); 752 continue; 753 } else if (i + 2 >= len) 754 continue; 755 756 /* Italic mode. */ 757 758 if ('_' == p[i]) { 759 if (bold) 760 printf("</b>"); 761 if ( ! italic) 762 printf("<i>"); 763 bold = 0; 764 italic = 1; 765 i += 2; 766 html_putchar(p[i]); 767 continue; 768 } 769 770 /* 771 * Handle funny behaviour troff-isms. 772 * These grok'd from the original man2html.c. 773 */ 774 775 if (('+' == p[i] && 'o' == p[i + 2]) || 776 ('o' == p[i] && '+' == p[i + 2]) || 777 ('|' == p[i] && '=' == p[i + 2]) || 778 ('=' == p[i] && '|' == p[i + 2]) || 779 ('*' == p[i] && '=' == p[i + 2]) || 780 ('=' == p[i] && '*' == p[i + 2]) || 781 ('*' == p[i] && '|' == p[i + 2]) || 782 ('|' == p[i] && '*' == p[i + 2])) { 783 if (italic) 784 printf("</i>"); 785 if (bold) 786 printf("</b>"); 787 italic = bold = 0; 788 putchar('*'); 789 i += 2; 790 continue; 791 } else if (('|' == p[i] && '-' == p[i + 2]) || 792 ('-' == p[i] && '|' == p[i + 1]) || 793 ('+' == p[i] && '-' == p[i + 1]) || 794 ('-' == p[i] && '+' == p[i + 1]) || 795 ('+' == p[i] && '|' == p[i + 1]) || 796 ('|' == p[i] && '+' == p[i + 1])) { 797 if (italic) 798 printf("</i>"); 799 if (bold) 800 printf("</b>"); 801 italic = bold = 0; 802 putchar('+'); 803 i += 2; 804 continue; 805 } 806 807 /* Bold mode. */ 808 809 if (italic) 810 printf("</i>"); 811 if ( ! bold) 812 printf("<b>"); 813 bold = 1; 814 italic = 0; 815 i += 2; 816 html_putchar(p[i]); 817 } 818 819 /* 820 * Clean up the last character. 821 * We can get to a newline; don't print that. 822 */ 823 824 if (italic) 825 printf("</i>"); 826 if (bold) 827 printf("</b>"); 828 829 if (i == len - 1 && p[i] != '\n') 830 html_putchar(p[i]); 831 832 putchar('\n'); 833 } 834 free(p); 835 836 puts("</pre>\n" 837 "</div>"); 838 839 fclose(f); 840 } 841 842 static void 843 resp_format(const struct req *req, const char *file) 844 { 845 struct manoutput conf; 846 struct mparse *mp; 847 struct roff_man *man; 848 void *vp; 849 int fd; 850 int usepath; 851 852 if (-1 == (fd = open(file, O_RDONLY, 0))) { 853 puts("<p>You specified an invalid manual file.</p>"); 854 return; 855 } 856 857 mchars_alloc(); 858 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1, 859 MANDOCERR_MAX, NULL, MANDOC_OS_OTHER, req->q.manpath); 860 mparse_readfd(mp, fd, file); 861 close(fd); 862 863 memset(&conf, 0, sizeof(conf)); 864 conf.fragment = 1; 865 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css"); 866 conf.toc = 1; 867 usepath = strcmp(req->q.manpath, req->p[0]); 868 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S", 869 scriptname, *scriptname == '\0' ? "" : "/", 870 usepath ? req->q.manpath : "", usepath ? "/" : ""); 871 872 mparse_result(mp, &man, NULL); 873 if (man == NULL) { 874 warnx("fatal mandoc error: %s/%s", req->q.manpath, file); 875 pg_error_internal(); 876 mparse_free(mp); 877 mchars_free(); 878 return; 879 } 880 881 vp = html_alloc(&conf); 882 883 if (man->macroset == MACROSET_MDOC) { 884 mdoc_validate(man); 885 html_mdoc(vp, man); 886 } else { 887 man_validate(man); 888 html_man(vp, man); 889 } 890 891 html_free(vp); 892 mparse_free(mp); 893 mchars_free(); 894 free(conf.man); 895 free(conf.style); 896 } 897 898 static void 899 resp_show(const struct req *req, const char *file) 900 { 901 902 if ('.' == file[0] && '/' == file[1]) 903 file += 2; 904 905 if ('c' == *file) 906 resp_catman(req, file); 907 else 908 resp_format(req, file); 909 } 910 911 static void 912 pg_show(struct req *req, const char *fullpath) 913 { 914 char *manpath; 915 const char *file; 916 917 if ((file = strchr(fullpath, '/')) == NULL) { 918 pg_error_badrequest( 919 "You did not specify a page to show."); 920 return; 921 } 922 manpath = mandoc_strndup(fullpath, file - fullpath); 923 file++; 924 925 if ( ! validate_manpath(req, manpath)) { 926 pg_error_badrequest( 927 "You specified an invalid manpath."); 928 free(manpath); 929 return; 930 } 931 932 /* 933 * Begin by chdir()ing into the manpath. 934 * This way we can pick up the database files, which are 935 * relative to the manpath root. 936 */ 937 938 if (chdir(manpath) == -1) { 939 warn("chdir %s", manpath); 940 pg_error_internal(); 941 free(manpath); 942 return; 943 } 944 free(manpath); 945 946 if ( ! validate_filename(file)) { 947 pg_error_badrequest( 948 "You specified an invalid manual file."); 949 return; 950 } 951 952 resp_begin_html(200, NULL, file); 953 resp_searchform(req, FOCUS_NONE); 954 resp_show(req, file); 955 resp_end_html(); 956 } 957 958 static void 959 pg_search(const struct req *req) 960 { 961 struct mansearch search; 962 struct manpaths paths; 963 struct manpage *res; 964 char **argv; 965 char *query, *rp, *wp; 966 size_t ressz; 967 int argc; 968 969 /* 970 * Begin by chdir()ing into the root of the manpath. 971 * This way we can pick up the database files, which are 972 * relative to the manpath root. 973 */ 974 975 if (chdir(req->q.manpath) == -1) { 976 warn("chdir %s", req->q.manpath); 977 pg_error_internal(); 978 return; 979 } 980 981 search.arch = req->q.arch; 982 search.sec = req->q.sec; 983 search.outkey = "Nd"; 984 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 985 search.firstmatch = 1; 986 987 paths.sz = 1; 988 paths.paths = mandoc_malloc(sizeof(char *)); 989 paths.paths[0] = mandoc_strdup("."); 990 991 /* 992 * Break apart at spaces with backslash-escaping. 993 */ 994 995 argc = 0; 996 argv = NULL; 997 rp = query = mandoc_strdup(req->q.query); 998 for (;;) { 999 while (isspace((unsigned char)*rp)) 1000 rp++; 1001 if (*rp == '\0') 1002 break; 1003 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 1004 argv[argc++] = wp = rp; 1005 for (;;) { 1006 if (isspace((unsigned char)*rp)) { 1007 *wp = '\0'; 1008 rp++; 1009 break; 1010 } 1011 if (rp[0] == '\\' && rp[1] != '\0') 1012 rp++; 1013 if (wp != rp) 1014 *wp = *rp; 1015 if (*rp == '\0') 1016 break; 1017 wp++; 1018 rp++; 1019 } 1020 } 1021 1022 res = NULL; 1023 ressz = 0; 1024 if (req->isquery && req->q.equal && argc == 1) 1025 pg_redirect(req, argv[0]); 1026 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0) 1027 pg_noresult(req, "You entered an invalid query."); 1028 else if (ressz == 0) 1029 pg_noresult(req, "No results found."); 1030 else 1031 pg_searchres(req, res, ressz); 1032 1033 free(query); 1034 mansearch_free(res, ressz); 1035 free(paths.paths[0]); 1036 free(paths.paths); 1037 } 1038 1039 int 1040 main(void) 1041 { 1042 struct req req; 1043 struct itimerval itimer; 1044 const char *path; 1045 const char *querystring; 1046 int i; 1047 1048 /* 1049 * The "rpath" pledge could be revoked after mparse_readfd() 1050 * if the file desciptor to "/footer.html" would be opened 1051 * up front, but it's probably not worth the complication 1052 * of the code it would cause: it would require scattering 1053 * pledge() calls in multiple low-level resp_*() functions. 1054 */ 1055 1056 if (pledge("stdio rpath", NULL) == -1) { 1057 warn("pledge"); 1058 pg_error_internal(); 1059 return EXIT_FAILURE; 1060 } 1061 1062 /* Poor man's ReDoS mitigation. */ 1063 1064 itimer.it_value.tv_sec = 2; 1065 itimer.it_value.tv_usec = 0; 1066 itimer.it_interval.tv_sec = 2; 1067 itimer.it_interval.tv_usec = 0; 1068 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 1069 warn("setitimer"); 1070 pg_error_internal(); 1071 return EXIT_FAILURE; 1072 } 1073 1074 /* 1075 * First we change directory into the MAN_DIR so that 1076 * subsequent scanning for manpath directories is rooted 1077 * relative to the same position. 1078 */ 1079 1080 if (chdir(MAN_DIR) == -1) { 1081 warn("MAN_DIR: %s", MAN_DIR); 1082 pg_error_internal(); 1083 return EXIT_FAILURE; 1084 } 1085 1086 memset(&req, 0, sizeof(struct req)); 1087 req.q.equal = 1; 1088 parse_manpath_conf(&req); 1089 1090 /* Parse the path info and the query string. */ 1091 1092 if ((path = getenv("PATH_INFO")) == NULL) 1093 path = ""; 1094 else if (*path == '/') 1095 path++; 1096 1097 if (*path != '\0') { 1098 parse_path_info(&req, path); 1099 if (req.q.manpath == NULL || req.q.sec == NULL || 1100 *req.q.query == '\0' || access(path, F_OK) == -1) 1101 path = ""; 1102 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1103 parse_query_string(&req, querystring); 1104 1105 /* Validate parsed data and add defaults. */ 1106 1107 if (req.q.manpath == NULL) 1108 req.q.manpath = mandoc_strdup(req.p[0]); 1109 else if ( ! validate_manpath(&req, req.q.manpath)) { 1110 pg_error_badrequest( 1111 "You specified an invalid manpath."); 1112 return EXIT_FAILURE; 1113 } 1114 1115 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) { 1116 pg_error_badrequest( 1117 "You specified an invalid architecture."); 1118 return EXIT_FAILURE; 1119 } 1120 1121 /* Dispatch to the three different pages. */ 1122 1123 if ('\0' != *path) 1124 pg_show(&req, path); 1125 else if (NULL != req.q.query) 1126 pg_search(&req); 1127 else 1128 pg_index(&req); 1129 1130 free(req.q.manpath); 1131 free(req.q.arch); 1132 free(req.q.sec); 1133 free(req.q.query); 1134 for (i = 0; i < (int)req.psz; i++) 1135 free(req.p[i]); 1136 free(req.p); 1137 return EXIT_SUCCESS; 1138 } 1139 1140 /* 1141 * Translate PATH_INFO to a query. 1142 */ 1143 static void 1144 parse_path_info(struct req *req, const char *path) 1145 { 1146 const char *name, *sec, *end; 1147 1148 req->isquery = 0; 1149 req->q.equal = 1; 1150 req->q.manpath = NULL; 1151 req->q.arch = NULL; 1152 1153 /* Mandatory manual page name. */ 1154 if ((name = strrchr(path, '/')) == NULL) 1155 name = path; 1156 else 1157 name++; 1158 1159 /* Optional trailing section. */ 1160 sec = strrchr(name, '.'); 1161 if (sec != NULL && isdigit((unsigned char)*++sec)) { 1162 req->q.query = mandoc_strndup(name, sec - name - 1); 1163 req->q.sec = mandoc_strdup(sec); 1164 } else { 1165 req->q.query = mandoc_strdup(name); 1166 req->q.sec = NULL; 1167 } 1168 1169 /* Handle the case of name[.section] only. */ 1170 if (name == path) 1171 return; 1172 1173 /* Optional manpath. */ 1174 end = strchr(path, '/'); 1175 req->q.manpath = mandoc_strndup(path, end - path); 1176 if (validate_manpath(req, req->q.manpath)) { 1177 path = end + 1; 1178 if (name == path) 1179 return; 1180 } else { 1181 free(req->q.manpath); 1182 req->q.manpath = NULL; 1183 } 1184 1185 /* Optional section. */ 1186 if (strncmp(path, "man", 3) == 0) { 1187 path += 3; 1188 end = strchr(path, '/'); 1189 free(req->q.sec); 1190 req->q.sec = mandoc_strndup(path, end - path); 1191 path = end + 1; 1192 if (name == path) 1193 return; 1194 } 1195 1196 /* Optional architecture. */ 1197 end = strchr(path, '/'); 1198 if (end + 1 != name) { 1199 pg_error_badrequest( 1200 "You specified too many directory components."); 1201 exit(EXIT_FAILURE); 1202 } 1203 req->q.arch = mandoc_strndup(path, end - path); 1204 if (validate_arch(req->q.arch) == 0) { 1205 pg_error_badrequest( 1206 "You specified an invalid directory component."); 1207 exit(EXIT_FAILURE); 1208 } 1209 } 1210 1211 /* 1212 * Scan for indexable paths. 1213 */ 1214 static void 1215 parse_manpath_conf(struct req *req) 1216 { 1217 FILE *fp; 1218 char *dp; 1219 size_t dpsz; 1220 ssize_t len; 1221 1222 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1223 warn("%s/manpath.conf", MAN_DIR); 1224 pg_error_internal(); 1225 exit(EXIT_FAILURE); 1226 } 1227 1228 dp = NULL; 1229 dpsz = 0; 1230 1231 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1232 if (dp[len - 1] == '\n') 1233 dp[--len] = '\0'; 1234 req->p = mandoc_realloc(req->p, 1235 (req->psz + 1) * sizeof(char *)); 1236 if ( ! validate_urifrag(dp)) { 1237 warnx("%s/manpath.conf contains " 1238 "unsafe path \"%s\"", MAN_DIR, dp); 1239 pg_error_internal(); 1240 exit(EXIT_FAILURE); 1241 } 1242 if (strchr(dp, '/') != NULL) { 1243 warnx("%s/manpath.conf contains " 1244 "path with slash \"%s\"", MAN_DIR, dp); 1245 pg_error_internal(); 1246 exit(EXIT_FAILURE); 1247 } 1248 req->p[req->psz++] = dp; 1249 dp = NULL; 1250 dpsz = 0; 1251 } 1252 free(dp); 1253 1254 if (req->p == NULL) { 1255 warnx("%s/manpath.conf is empty", MAN_DIR); 1256 pg_error_internal(); 1257 exit(EXIT_FAILURE); 1258 } 1259 } 1260