1 /* $OpenBSD: cgi.c,v 1.104 2019/03/06 12:32:10 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014, 2015, 2016, 2017, 2018 Ingo Schwarze <schwarze@usta.de> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 #include <sys/time.h> 20 21 #include <ctype.h> 22 #include <err.h> 23 #include <errno.h> 24 #include <fcntl.h> 25 #include <limits.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <unistd.h> 31 32 #include "mandoc_aux.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "mdoc.h" 36 #include "man.h" 37 #include "mandoc_parse.h" 38 #include "main.h" 39 #include "manconf.h" 40 #include "mansearch.h" 41 #include "cgi.h" 42 43 /* 44 * A query as passed to the search function. 45 */ 46 struct query { 47 char *manpath; /* desired manual directory */ 48 char *arch; /* architecture */ 49 char *sec; /* manual section */ 50 char *query; /* unparsed query expression */ 51 int equal; /* match whole names, not substrings */ 52 }; 53 54 struct req { 55 struct query q; 56 char **p; /* array of available manpaths */ 57 size_t psz; /* number of available manpaths */ 58 int isquery; /* QUERY_STRING used, not PATH_INFO */ 59 }; 60 61 enum focus { 62 FOCUS_NONE = 0, 63 FOCUS_QUERY 64 }; 65 66 static void html_print(const char *); 67 static void html_putchar(char); 68 static int http_decode(char *); 69 static void http_encode(const char *p); 70 static void parse_manpath_conf(struct req *); 71 static void parse_path_info(struct req *req, const char *path); 72 static void parse_query_string(struct req *, const char *); 73 static void pg_error_badrequest(const char *); 74 static void pg_error_internal(void); 75 static void pg_index(const struct req *); 76 static void pg_noresult(const struct req *, const char *); 77 static void pg_redirect(const struct req *, const char *); 78 static void pg_search(const struct req *); 79 static void pg_searchres(const struct req *, 80 struct manpage *, size_t); 81 static void pg_show(struct req *, const char *); 82 static void resp_begin_html(int, const char *, const char *); 83 static void resp_begin_http(int, const char *); 84 static void resp_catman(const struct req *, const char *); 85 static void resp_copy(const char *); 86 static void resp_end_html(void); 87 static void resp_format(const struct req *, const char *); 88 static void resp_searchform(const struct req *, enum focus); 89 static void resp_show(const struct req *, const char *); 90 static void set_query_attr(char **, char **); 91 static int validate_arch(const char *); 92 static int validate_filename(const char *); 93 static int validate_manpath(const struct req *, const char *); 94 static int validate_urifrag(const char *); 95 96 static const char *scriptname = SCRIPT_NAME; 97 98 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 99 static const char *const sec_numbers[] = { 100 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 101 }; 102 static const char *const sec_names[] = { 103 "All Sections", 104 "1 - General Commands", 105 "2 - System Calls", 106 "3 - Library Functions", 107 "3p - Perl Library", 108 "4 - Device Drivers", 109 "5 - File Formats", 110 "6 - Games", 111 "7 - Miscellaneous Information", 112 "8 - System Manager\'s Manual", 113 "9 - Kernel Developer\'s Manual" 114 }; 115 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 116 117 static const char *const arch_names[] = { 118 "amd64", "alpha", "armv7", "arm64", 119 "hppa", "i386", "landisk", 120 "loongson", "luna88k", "macppc", "mips64", 121 "octeon", "sgi", "socppc", "sparc64", 122 "amiga", "arc", "armish", "arm32", 123 "atari", "aviion", "beagle", "cats", 124 "hppa64", "hp300", 125 "ia64", "mac68k", "mvme68k", "mvme88k", 126 "mvmeppc", "palm", "pc532", "pegasos", 127 "pmax", "powerpc", "solbourne", "sparc", 128 "sun3", "vax", "wgrisc", "x68k", 129 "zaurus" 130 }; 131 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 132 133 /* 134 * Print a character, escaping HTML along the way. 135 * This will pass non-ASCII straight to output: be warned! 136 */ 137 static void 138 html_putchar(char c) 139 { 140 141 switch (c) { 142 case '"': 143 printf("""); 144 break; 145 case '&': 146 printf("&"); 147 break; 148 case '>': 149 printf(">"); 150 break; 151 case '<': 152 printf("<"); 153 break; 154 default: 155 putchar((unsigned char)c); 156 break; 157 } 158 } 159 160 /* 161 * Call through to html_putchar(). 162 * Accepts NULL strings. 163 */ 164 static void 165 html_print(const char *p) 166 { 167 168 if (NULL == p) 169 return; 170 while ('\0' != *p) 171 html_putchar(*p++); 172 } 173 174 /* 175 * Transfer the responsibility for the allocated string *val 176 * to the query structure. 177 */ 178 static void 179 set_query_attr(char **attr, char **val) 180 { 181 182 free(*attr); 183 if (**val == '\0') { 184 *attr = NULL; 185 free(*val); 186 } else 187 *attr = *val; 188 *val = NULL; 189 } 190 191 /* 192 * Parse the QUERY_STRING for key-value pairs 193 * and store the values into the query structure. 194 */ 195 static void 196 parse_query_string(struct req *req, const char *qs) 197 { 198 char *key, *val; 199 size_t keysz, valsz; 200 201 req->isquery = 1; 202 req->q.manpath = NULL; 203 req->q.arch = NULL; 204 req->q.sec = NULL; 205 req->q.query = NULL; 206 req->q.equal = 1; 207 208 key = val = NULL; 209 while (*qs != '\0') { 210 211 /* Parse one key. */ 212 213 keysz = strcspn(qs, "=;&"); 214 key = mandoc_strndup(qs, keysz); 215 qs += keysz; 216 if (*qs != '=') 217 goto next; 218 219 /* Parse one value. */ 220 221 valsz = strcspn(++qs, ";&"); 222 val = mandoc_strndup(qs, valsz); 223 qs += valsz; 224 225 /* Decode and catch encoding errors. */ 226 227 if ( ! (http_decode(key) && http_decode(val))) 228 goto next; 229 230 /* Handle key-value pairs. */ 231 232 if ( ! strcmp(key, "query")) 233 set_query_attr(&req->q.query, &val); 234 235 else if ( ! strcmp(key, "apropos")) 236 req->q.equal = !strcmp(val, "0"); 237 238 else if ( ! strcmp(key, "manpath")) { 239 #ifdef COMPAT_OLDURI 240 if ( ! strncmp(val, "OpenBSD ", 8)) { 241 val[7] = '-'; 242 if ('C' == val[8]) 243 val[8] = 'c'; 244 } 245 #endif 246 set_query_attr(&req->q.manpath, &val); 247 } 248 249 else if ( ! (strcmp(key, "sec") 250 #ifdef COMPAT_OLDURI 251 && strcmp(key, "sektion") 252 #endif 253 )) { 254 if ( ! strcmp(val, "0")) 255 *val = '\0'; 256 set_query_attr(&req->q.sec, &val); 257 } 258 259 else if ( ! strcmp(key, "arch")) { 260 if ( ! strcmp(val, "default")) 261 *val = '\0'; 262 set_query_attr(&req->q.arch, &val); 263 } 264 265 /* 266 * The key must be freed in any case. 267 * The val may have been handed over to the query 268 * structure, in which case it is now NULL. 269 */ 270 next: 271 free(key); 272 key = NULL; 273 free(val); 274 val = NULL; 275 276 if (*qs != '\0') 277 qs++; 278 } 279 } 280 281 /* 282 * HTTP-decode a string. The standard explanation is that this turns 283 * "%4e+foo" into "n foo" in the regular way. This is done in-place 284 * over the allocated string. 285 */ 286 static int 287 http_decode(char *p) 288 { 289 char hex[3]; 290 char *q; 291 int c; 292 293 hex[2] = '\0'; 294 295 q = p; 296 for ( ; '\0' != *p; p++, q++) { 297 if ('%' == *p) { 298 if ('\0' == (hex[0] = *(p + 1))) 299 return 0; 300 if ('\0' == (hex[1] = *(p + 2))) 301 return 0; 302 if (1 != sscanf(hex, "%x", &c)) 303 return 0; 304 if ('\0' == c) 305 return 0; 306 307 *q = (char)c; 308 p += 2; 309 } else 310 *q = '+' == *p ? ' ' : *p; 311 } 312 313 *q = '\0'; 314 return 1; 315 } 316 317 static void 318 http_encode(const char *p) 319 { 320 for (; *p != '\0'; p++) { 321 if (isalnum((unsigned char)*p) == 0 && 322 strchr("-._~", *p) == NULL) 323 printf("%%%2.2X", (unsigned char)*p); 324 else 325 putchar(*p); 326 } 327 } 328 329 static void 330 resp_begin_http(int code, const char *msg) 331 { 332 333 if (200 != code) 334 printf("Status: %d %s\r\n", code, msg); 335 336 printf("Content-Type: text/html; charset=utf-8\r\n" 337 "Cache-Control: no-cache\r\n" 338 "Pragma: no-cache\r\n" 339 "\r\n"); 340 341 fflush(stdout); 342 } 343 344 static void 345 resp_copy(const char *filename) 346 { 347 char buf[4096]; 348 ssize_t sz; 349 int fd; 350 351 if ((fd = open(filename, O_RDONLY)) != -1) { 352 fflush(stdout); 353 while ((sz = read(fd, buf, sizeof(buf))) > 0) 354 write(STDOUT_FILENO, buf, sz); 355 close(fd); 356 } 357 } 358 359 static void 360 resp_begin_html(int code, const char *msg, const char *file) 361 { 362 char *cp; 363 364 resp_begin_http(code, msg); 365 366 printf("<!DOCTYPE html>\n" 367 "<html>\n" 368 "<head>\n" 369 " <meta charset=\"UTF-8\"/>\n" 370 " <meta name=\"viewport\"" 371 " content=\"width=device-width, initial-scale=1.0\">\n" 372 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 373 " type=\"text/css\" media=\"all\">\n" 374 " <title>", 375 CSS_DIR); 376 if (file != NULL) { 377 if ((cp = strrchr(file, '/')) != NULL) 378 file = cp + 1; 379 if ((cp = strrchr(file, '.')) != NULL) { 380 printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1); 381 } else 382 printf("%s - ", file); 383 } 384 printf("%s</title>\n" 385 "</head>\n" 386 "<body>\n", 387 CUSTOMIZE_TITLE); 388 389 resp_copy(MAN_DIR "/header.html"); 390 } 391 392 static void 393 resp_end_html(void) 394 { 395 396 resp_copy(MAN_DIR "/footer.html"); 397 398 puts("</body>\n" 399 "</html>"); 400 } 401 402 static void 403 resp_searchform(const struct req *req, enum focus focus) 404 { 405 int i; 406 407 printf("<form action=\"/%s\" method=\"get\">\n" 408 " <fieldset>\n" 409 " <legend>Manual Page Search Parameters</legend>\n", 410 scriptname); 411 412 /* Write query input box. */ 413 414 printf(" <input type=\"search\" name=\"query\" value=\""); 415 if (req->q.query != NULL) 416 html_print(req->q.query); 417 printf( "\" size=\"40\""); 418 if (focus == FOCUS_QUERY) 419 printf(" autofocus"); 420 puts(">"); 421 422 /* Write submission buttons. */ 423 424 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">" 425 "man</button>\n" 426 " <button type=\"submit\" name=\"apropos\" value=\"1\">" 427 "apropos</button>\n" 428 " <br/>\n"); 429 430 /* Write section selector. */ 431 432 puts(" <select name=\"sec\">"); 433 for (i = 0; i < sec_MAX; i++) { 434 printf(" <option value=\"%s\"", sec_numbers[i]); 435 if (NULL != req->q.sec && 436 0 == strcmp(sec_numbers[i], req->q.sec)) 437 printf(" selected=\"selected\""); 438 printf(">%s</option>\n", sec_names[i]); 439 } 440 puts(" </select>"); 441 442 /* Write architecture selector. */ 443 444 printf( " <select name=\"arch\">\n" 445 " <option value=\"default\""); 446 if (NULL == req->q.arch) 447 printf(" selected=\"selected\""); 448 puts(">All Architectures</option>"); 449 for (i = 0; i < arch_MAX; i++) { 450 printf(" <option"); 451 if (NULL != req->q.arch && 452 0 == strcmp(arch_names[i], req->q.arch)) 453 printf(" selected=\"selected\""); 454 printf(">%s</option>\n", arch_names[i]); 455 } 456 puts(" </select>"); 457 458 /* Write manpath selector. */ 459 460 if (req->psz > 1) { 461 puts(" <select name=\"manpath\">"); 462 for (i = 0; i < (int)req->psz; i++) { 463 printf(" <option"); 464 if (strcmp(req->q.manpath, req->p[i]) == 0) 465 printf(" selected=\"selected\""); 466 printf(">"); 467 html_print(req->p[i]); 468 puts("</option>"); 469 } 470 puts(" </select>"); 471 } 472 473 puts(" </fieldset>\n" 474 "</form>"); 475 } 476 477 static int 478 validate_urifrag(const char *frag) 479 { 480 481 while ('\0' != *frag) { 482 if ( ! (isalnum((unsigned char)*frag) || 483 '-' == *frag || '.' == *frag || 484 '/' == *frag || '_' == *frag)) 485 return 0; 486 frag++; 487 } 488 return 1; 489 } 490 491 static int 492 validate_manpath(const struct req *req, const char* manpath) 493 { 494 size_t i; 495 496 for (i = 0; i < req->psz; i++) 497 if ( ! strcmp(manpath, req->p[i])) 498 return 1; 499 500 return 0; 501 } 502 503 static int 504 validate_arch(const char *arch) 505 { 506 int i; 507 508 for (i = 0; i < arch_MAX; i++) 509 if (strcmp(arch, arch_names[i]) == 0) 510 return 1; 511 512 return 0; 513 } 514 515 static int 516 validate_filename(const char *file) 517 { 518 519 if ('.' == file[0] && '/' == file[1]) 520 file += 2; 521 522 return ! (strstr(file, "../") || strstr(file, "/..") || 523 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 524 } 525 526 static void 527 pg_index(const struct req *req) 528 { 529 530 resp_begin_html(200, NULL, NULL); 531 resp_searchform(req, FOCUS_QUERY); 532 printf("<p>\n" 533 "This web interface is documented in the\n" 534 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n" 535 "manual, and the\n" 536 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n" 537 "manual explains the query syntax.\n" 538 "</p>\n", 539 scriptname, *scriptname == '\0' ? "" : "/", 540 scriptname, *scriptname == '\0' ? "" : "/"); 541 resp_end_html(); 542 } 543 544 static void 545 pg_noresult(const struct req *req, const char *msg) 546 { 547 resp_begin_html(200, NULL, NULL); 548 resp_searchform(req, FOCUS_QUERY); 549 puts("<p>"); 550 puts(msg); 551 puts("</p>"); 552 resp_end_html(); 553 } 554 555 static void 556 pg_error_badrequest(const char *msg) 557 { 558 559 resp_begin_html(400, "Bad Request", NULL); 560 puts("<h1>Bad Request</h1>\n" 561 "<p>\n"); 562 puts(msg); 563 printf("Try again from the\n" 564 "<a href=\"/%s\">main page</a>.\n" 565 "</p>", scriptname); 566 resp_end_html(); 567 } 568 569 static void 570 pg_error_internal(void) 571 { 572 resp_begin_html(500, "Internal Server Error", NULL); 573 puts("<p>Internal Server Error</p>"); 574 resp_end_html(); 575 } 576 577 static void 578 pg_redirect(const struct req *req, const char *name) 579 { 580 printf("Status: 303 See Other\r\n" 581 "Location: /"); 582 if (*scriptname != '\0') 583 printf("%s/", scriptname); 584 if (strcmp(req->q.manpath, req->p[0])) 585 printf("%s/", req->q.manpath); 586 if (req->q.arch != NULL) 587 printf("%s/", req->q.arch); 588 http_encode(name); 589 if (req->q.sec != NULL) { 590 putchar('.'); 591 http_encode(req->q.sec); 592 } 593 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n"); 594 } 595 596 static void 597 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 598 { 599 char *arch, *archend; 600 const char *sec; 601 size_t i, iuse; 602 int archprio, archpriouse; 603 int prio, priouse; 604 605 for (i = 0; i < sz; i++) { 606 if (validate_filename(r[i].file)) 607 continue; 608 warnx("invalid filename %s in %s database", 609 r[i].file, req->q.manpath); 610 pg_error_internal(); 611 return; 612 } 613 614 if (req->isquery && sz == 1) { 615 /* 616 * If we have just one result, then jump there now 617 * without any delay. 618 */ 619 printf("Status: 303 See Other\r\n" 620 "Location: /"); 621 if (*scriptname != '\0') 622 printf("%s/", scriptname); 623 if (strcmp(req->q.manpath, req->p[0])) 624 printf("%s/", req->q.manpath); 625 printf("%s\r\n" 626 "Content-Type: text/html; charset=utf-8\r\n\r\n", 627 r[0].file); 628 return; 629 } 630 631 /* 632 * In man(1) mode, show one of the pages 633 * even if more than one is found. 634 */ 635 636 iuse = 0; 637 if (req->q.equal || sz == 1) { 638 priouse = 20; 639 archpriouse = 3; 640 for (i = 0; i < sz; i++) { 641 sec = r[i].file; 642 sec += strcspn(sec, "123456789"); 643 if (sec[0] == '\0') 644 continue; 645 prio = sec_prios[sec[0] - '1']; 646 if (sec[1] != '/') 647 prio += 10; 648 if (req->q.arch == NULL) { 649 archprio = 650 ((arch = strchr(sec + 1, '/')) 651 == NULL) ? 3 : 652 ((archend = strchr(arch + 1, '/')) 653 == NULL) ? 0 : 654 strncmp(arch, "amd64/", 655 archend - arch) ? 2 : 1; 656 if (archprio < archpriouse) { 657 archpriouse = archprio; 658 priouse = prio; 659 iuse = i; 660 continue; 661 } 662 if (archprio > archpriouse) 663 continue; 664 } 665 if (prio >= priouse) 666 continue; 667 priouse = prio; 668 iuse = i; 669 } 670 resp_begin_html(200, NULL, r[iuse].file); 671 } else 672 resp_begin_html(200, NULL, NULL); 673 674 resp_searchform(req, 675 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 676 677 if (sz > 1) { 678 puts("<table class=\"results\">"); 679 for (i = 0; i < sz; i++) { 680 printf(" <tr>\n" 681 " <td>" 682 "<a class=\"Xr\" href=\"/"); 683 if (*scriptname != '\0') 684 printf("%s/", scriptname); 685 if (strcmp(req->q.manpath, req->p[0])) 686 printf("%s/", req->q.manpath); 687 printf("%s\">", r[i].file); 688 html_print(r[i].names); 689 printf("</a></td>\n" 690 " <td><span class=\"Nd\">"); 691 html_print(r[i].output); 692 puts("</span></td>\n" 693 " </tr>"); 694 } 695 puts("</table>"); 696 } 697 698 if (req->q.equal || sz == 1) { 699 puts("<hr>"); 700 resp_show(req, r[iuse].file); 701 } 702 703 resp_end_html(); 704 } 705 706 static void 707 resp_catman(const struct req *req, const char *file) 708 { 709 FILE *f; 710 char *p; 711 size_t sz; 712 ssize_t len; 713 int i; 714 int italic, bold; 715 716 if ((f = fopen(file, "r")) == NULL) { 717 puts("<p>You specified an invalid manual file.</p>"); 718 return; 719 } 720 721 puts("<div class=\"catman\">\n" 722 "<pre>"); 723 724 p = NULL; 725 sz = 0; 726 727 while ((len = getline(&p, &sz, f)) != -1) { 728 bold = italic = 0; 729 for (i = 0; i < len - 1; i++) { 730 /* 731 * This means that the catpage is out of state. 732 * Ignore it and keep going (although the 733 * catpage is bogus). 734 */ 735 736 if ('\b' == p[i] || '\n' == p[i]) 737 continue; 738 739 /* 740 * Print a regular character. 741 * Close out any bold/italic scopes. 742 * If we're in back-space mode, make sure we'll 743 * have something to enter when we backspace. 744 */ 745 746 if ('\b' != p[i + 1]) { 747 if (italic) 748 printf("</i>"); 749 if (bold) 750 printf("</b>"); 751 italic = bold = 0; 752 html_putchar(p[i]); 753 continue; 754 } else if (i + 2 >= len) 755 continue; 756 757 /* Italic mode. */ 758 759 if ('_' == p[i]) { 760 if (bold) 761 printf("</b>"); 762 if ( ! italic) 763 printf("<i>"); 764 bold = 0; 765 italic = 1; 766 i += 2; 767 html_putchar(p[i]); 768 continue; 769 } 770 771 /* 772 * Handle funny behaviour troff-isms. 773 * These grok'd from the original man2html.c. 774 */ 775 776 if (('+' == p[i] && 'o' == p[i + 2]) || 777 ('o' == p[i] && '+' == p[i + 2]) || 778 ('|' == p[i] && '=' == p[i + 2]) || 779 ('=' == p[i] && '|' == p[i + 2]) || 780 ('*' == p[i] && '=' == p[i + 2]) || 781 ('=' == p[i] && '*' == p[i + 2]) || 782 ('*' == p[i] && '|' == p[i + 2]) || 783 ('|' == p[i] && '*' == p[i + 2])) { 784 if (italic) 785 printf("</i>"); 786 if (bold) 787 printf("</b>"); 788 italic = bold = 0; 789 putchar('*'); 790 i += 2; 791 continue; 792 } else if (('|' == p[i] && '-' == p[i + 2]) || 793 ('-' == p[i] && '|' == p[i + 1]) || 794 ('+' == p[i] && '-' == p[i + 1]) || 795 ('-' == p[i] && '+' == p[i + 1]) || 796 ('+' == p[i] && '|' == p[i + 1]) || 797 ('|' == p[i] && '+' == p[i + 1])) { 798 if (italic) 799 printf("</i>"); 800 if (bold) 801 printf("</b>"); 802 italic = bold = 0; 803 putchar('+'); 804 i += 2; 805 continue; 806 } 807 808 /* Bold mode. */ 809 810 if (italic) 811 printf("</i>"); 812 if ( ! bold) 813 printf("<b>"); 814 bold = 1; 815 italic = 0; 816 i += 2; 817 html_putchar(p[i]); 818 } 819 820 /* 821 * Clean up the last character. 822 * We can get to a newline; don't print that. 823 */ 824 825 if (italic) 826 printf("</i>"); 827 if (bold) 828 printf("</b>"); 829 830 if (i == len - 1 && p[i] != '\n') 831 html_putchar(p[i]); 832 833 putchar('\n'); 834 } 835 free(p); 836 837 puts("</pre>\n" 838 "</div>"); 839 840 fclose(f); 841 } 842 843 static void 844 resp_format(const struct req *req, const char *file) 845 { 846 struct manoutput conf; 847 struct mparse *mp; 848 struct roff_meta *meta; 849 void *vp; 850 int fd; 851 int usepath; 852 853 if (-1 == (fd = open(file, O_RDONLY, 0))) { 854 puts("<p>You specified an invalid manual file.</p>"); 855 return; 856 } 857 858 mchars_alloc(); 859 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 | 860 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath); 861 mparse_readfd(mp, fd, file); 862 close(fd); 863 meta = mparse_result(mp); 864 865 memset(&conf, 0, sizeof(conf)); 866 conf.fragment = 1; 867 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css"); 868 conf.toc = 1; 869 usepath = strcmp(req->q.manpath, req->p[0]); 870 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S", 871 scriptname, *scriptname == '\0' ? "" : "/", 872 usepath ? req->q.manpath : "", usepath ? "/" : ""); 873 874 vp = html_alloc(&conf); 875 if (meta->macroset == MACROSET_MDOC) 876 html_mdoc(vp, meta); 877 else 878 html_man(vp, meta); 879 880 html_free(vp); 881 mparse_free(mp); 882 mchars_free(); 883 free(conf.man); 884 free(conf.style); 885 } 886 887 static void 888 resp_show(const struct req *req, const char *file) 889 { 890 891 if ('.' == file[0] && '/' == file[1]) 892 file += 2; 893 894 if ('c' == *file) 895 resp_catman(req, file); 896 else 897 resp_format(req, file); 898 } 899 900 static void 901 pg_show(struct req *req, const char *fullpath) 902 { 903 char *manpath; 904 const char *file; 905 906 if ((file = strchr(fullpath, '/')) == NULL) { 907 pg_error_badrequest( 908 "You did not specify a page to show."); 909 return; 910 } 911 manpath = mandoc_strndup(fullpath, file - fullpath); 912 file++; 913 914 if ( ! validate_manpath(req, manpath)) { 915 pg_error_badrequest( 916 "You specified an invalid manpath."); 917 free(manpath); 918 return; 919 } 920 921 /* 922 * Begin by chdir()ing into the manpath. 923 * This way we can pick up the database files, which are 924 * relative to the manpath root. 925 */ 926 927 if (chdir(manpath) == -1) { 928 warn("chdir %s", manpath); 929 pg_error_internal(); 930 free(manpath); 931 return; 932 } 933 free(manpath); 934 935 if ( ! validate_filename(file)) { 936 pg_error_badrequest( 937 "You specified an invalid manual file."); 938 return; 939 } 940 941 resp_begin_html(200, NULL, file); 942 resp_searchform(req, FOCUS_NONE); 943 resp_show(req, file); 944 resp_end_html(); 945 } 946 947 static void 948 pg_search(const struct req *req) 949 { 950 struct mansearch search; 951 struct manpaths paths; 952 struct manpage *res; 953 char **argv; 954 char *query, *rp, *wp; 955 size_t ressz; 956 int argc; 957 958 /* 959 * Begin by chdir()ing into the root of the manpath. 960 * This way we can pick up the database files, which are 961 * relative to the manpath root. 962 */ 963 964 if (chdir(req->q.manpath) == -1) { 965 warn("chdir %s", req->q.manpath); 966 pg_error_internal(); 967 return; 968 } 969 970 search.arch = req->q.arch; 971 search.sec = req->q.sec; 972 search.outkey = "Nd"; 973 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 974 search.firstmatch = 1; 975 976 paths.sz = 1; 977 paths.paths = mandoc_malloc(sizeof(char *)); 978 paths.paths[0] = mandoc_strdup("."); 979 980 /* 981 * Break apart at spaces with backslash-escaping. 982 */ 983 984 argc = 0; 985 argv = NULL; 986 rp = query = mandoc_strdup(req->q.query); 987 for (;;) { 988 while (isspace((unsigned char)*rp)) 989 rp++; 990 if (*rp == '\0') 991 break; 992 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 993 argv[argc++] = wp = rp; 994 for (;;) { 995 if (isspace((unsigned char)*rp)) { 996 *wp = '\0'; 997 rp++; 998 break; 999 } 1000 if (rp[0] == '\\' && rp[1] != '\0') 1001 rp++; 1002 if (wp != rp) 1003 *wp = *rp; 1004 if (*rp == '\0') 1005 break; 1006 wp++; 1007 rp++; 1008 } 1009 } 1010 1011 res = NULL; 1012 ressz = 0; 1013 if (req->isquery && req->q.equal && argc == 1) 1014 pg_redirect(req, argv[0]); 1015 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0) 1016 pg_noresult(req, "You entered an invalid query."); 1017 else if (ressz == 0) 1018 pg_noresult(req, "No results found."); 1019 else 1020 pg_searchres(req, res, ressz); 1021 1022 free(query); 1023 mansearch_free(res, ressz); 1024 free(paths.paths[0]); 1025 free(paths.paths); 1026 } 1027 1028 int 1029 main(void) 1030 { 1031 struct req req; 1032 struct itimerval itimer; 1033 const char *path; 1034 const char *querystring; 1035 int i; 1036 1037 /* 1038 * The "rpath" pledge could be revoked after mparse_readfd() 1039 * if the file desciptor to "/footer.html" would be opened 1040 * up front, but it's probably not worth the complication 1041 * of the code it would cause: it would require scattering 1042 * pledge() calls in multiple low-level resp_*() functions. 1043 */ 1044 1045 if (pledge("stdio rpath", NULL) == -1) { 1046 warn("pledge"); 1047 pg_error_internal(); 1048 return EXIT_FAILURE; 1049 } 1050 1051 /* Poor man's ReDoS mitigation. */ 1052 1053 itimer.it_value.tv_sec = 2; 1054 itimer.it_value.tv_usec = 0; 1055 itimer.it_interval.tv_sec = 2; 1056 itimer.it_interval.tv_usec = 0; 1057 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 1058 warn("setitimer"); 1059 pg_error_internal(); 1060 return EXIT_FAILURE; 1061 } 1062 1063 /* 1064 * First we change directory into the MAN_DIR so that 1065 * subsequent scanning for manpath directories is rooted 1066 * relative to the same position. 1067 */ 1068 1069 if (chdir(MAN_DIR) == -1) { 1070 warn("MAN_DIR: %s", MAN_DIR); 1071 pg_error_internal(); 1072 return EXIT_FAILURE; 1073 } 1074 1075 memset(&req, 0, sizeof(struct req)); 1076 req.q.equal = 1; 1077 parse_manpath_conf(&req); 1078 1079 /* Parse the path info and the query string. */ 1080 1081 if ((path = getenv("PATH_INFO")) == NULL) 1082 path = ""; 1083 else if (*path == '/') 1084 path++; 1085 1086 if (*path != '\0') { 1087 parse_path_info(&req, path); 1088 if (req.q.manpath == NULL || req.q.sec == NULL || 1089 *req.q.query == '\0' || access(path, F_OK) == -1) 1090 path = ""; 1091 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1092 parse_query_string(&req, querystring); 1093 1094 /* Validate parsed data and add defaults. */ 1095 1096 if (req.q.manpath == NULL) 1097 req.q.manpath = mandoc_strdup(req.p[0]); 1098 else if ( ! validate_manpath(&req, req.q.manpath)) { 1099 pg_error_badrequest( 1100 "You specified an invalid manpath."); 1101 return EXIT_FAILURE; 1102 } 1103 1104 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) { 1105 pg_error_badrequest( 1106 "You specified an invalid architecture."); 1107 return EXIT_FAILURE; 1108 } 1109 1110 /* Dispatch to the three different pages. */ 1111 1112 if ('\0' != *path) 1113 pg_show(&req, path); 1114 else if (NULL != req.q.query) 1115 pg_search(&req); 1116 else 1117 pg_index(&req); 1118 1119 free(req.q.manpath); 1120 free(req.q.arch); 1121 free(req.q.sec); 1122 free(req.q.query); 1123 for (i = 0; i < (int)req.psz; i++) 1124 free(req.p[i]); 1125 free(req.p); 1126 return EXIT_SUCCESS; 1127 } 1128 1129 /* 1130 * Translate PATH_INFO to a query. 1131 */ 1132 static void 1133 parse_path_info(struct req *req, const char *path) 1134 { 1135 const char *name, *sec, *end; 1136 1137 req->isquery = 0; 1138 req->q.equal = 1; 1139 req->q.manpath = NULL; 1140 req->q.arch = NULL; 1141 1142 /* Mandatory manual page name. */ 1143 if ((name = strrchr(path, '/')) == NULL) 1144 name = path; 1145 else 1146 name++; 1147 1148 /* Optional trailing section. */ 1149 sec = strrchr(name, '.'); 1150 if (sec != NULL && isdigit((unsigned char)*++sec)) { 1151 req->q.query = mandoc_strndup(name, sec - name - 1); 1152 req->q.sec = mandoc_strdup(sec); 1153 } else { 1154 req->q.query = mandoc_strdup(name); 1155 req->q.sec = NULL; 1156 } 1157 1158 /* Handle the case of name[.section] only. */ 1159 if (name == path) 1160 return; 1161 1162 /* Optional manpath. */ 1163 end = strchr(path, '/'); 1164 req->q.manpath = mandoc_strndup(path, end - path); 1165 if (validate_manpath(req, req->q.manpath)) { 1166 path = end + 1; 1167 if (name == path) 1168 return; 1169 } else { 1170 free(req->q.manpath); 1171 req->q.manpath = NULL; 1172 } 1173 1174 /* Optional section. */ 1175 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) { 1176 path += 3; 1177 end = strchr(path, '/'); 1178 free(req->q.sec); 1179 req->q.sec = mandoc_strndup(path, end - path); 1180 path = end + 1; 1181 if (name == path) 1182 return; 1183 } 1184 1185 /* Optional architecture. */ 1186 end = strchr(path, '/'); 1187 if (end + 1 != name) { 1188 pg_error_badrequest( 1189 "You specified too many directory components."); 1190 exit(EXIT_FAILURE); 1191 } 1192 req->q.arch = mandoc_strndup(path, end - path); 1193 if (validate_arch(req->q.arch) == 0) { 1194 pg_error_badrequest( 1195 "You specified an invalid directory component."); 1196 exit(EXIT_FAILURE); 1197 } 1198 } 1199 1200 /* 1201 * Scan for indexable paths. 1202 */ 1203 static void 1204 parse_manpath_conf(struct req *req) 1205 { 1206 FILE *fp; 1207 char *dp; 1208 size_t dpsz; 1209 ssize_t len; 1210 1211 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1212 warn("%s/manpath.conf", MAN_DIR); 1213 pg_error_internal(); 1214 exit(EXIT_FAILURE); 1215 } 1216 1217 dp = NULL; 1218 dpsz = 0; 1219 1220 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1221 if (dp[len - 1] == '\n') 1222 dp[--len] = '\0'; 1223 req->p = mandoc_realloc(req->p, 1224 (req->psz + 1) * sizeof(char *)); 1225 if ( ! validate_urifrag(dp)) { 1226 warnx("%s/manpath.conf contains " 1227 "unsafe path \"%s\"", MAN_DIR, dp); 1228 pg_error_internal(); 1229 exit(EXIT_FAILURE); 1230 } 1231 if (strchr(dp, '/') != NULL) { 1232 warnx("%s/manpath.conf contains " 1233 "path with slash \"%s\"", MAN_DIR, dp); 1234 pg_error_internal(); 1235 exit(EXIT_FAILURE); 1236 } 1237 req->p[req->psz++] = dp; 1238 dp = NULL; 1239 dpsz = 0; 1240 } 1241 free(dp); 1242 1243 if (req->p == NULL) { 1244 warnx("%s/manpath.conf is empty", MAN_DIR); 1245 pg_error_internal(); 1246 exit(EXIT_FAILURE); 1247 } 1248 } 1249