1 /* $OpenBSD: cgi.c,v 1.106 2019/10/01 17:54:04 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014-2019 Ingo Schwarze <schwarze@usta.de> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 #include <sys/time.h> 20 21 #include <ctype.h> 22 #include <err.h> 23 #include <errno.h> 24 #include <fcntl.h> 25 #include <limits.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <unistd.h> 31 32 #include "mandoc_aux.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "mdoc.h" 36 #include "man.h" 37 #include "mandoc_parse.h" 38 #include "main.h" 39 #include "manconf.h" 40 #include "mansearch.h" 41 #include "cgi.h" 42 43 /* 44 * A query as passed to the search function. 45 */ 46 struct query { 47 char *manpath; /* desired manual directory */ 48 char *arch; /* architecture */ 49 char *sec; /* manual section */ 50 char *query; /* unparsed query expression */ 51 int equal; /* match whole names, not substrings */ 52 }; 53 54 struct req { 55 struct query q; 56 char **p; /* array of available manpaths */ 57 size_t psz; /* number of available manpaths */ 58 int isquery; /* QUERY_STRING used, not PATH_INFO */ 59 }; 60 61 enum focus { 62 FOCUS_NONE = 0, 63 FOCUS_QUERY 64 }; 65 66 static void html_print(const char *); 67 static void html_putchar(char); 68 static int http_decode(char *); 69 static void http_encode(const char *p); 70 static void parse_manpath_conf(struct req *); 71 static void parse_path_info(struct req *req, const char *path); 72 static void parse_query_string(struct req *, const char *); 73 static void pg_error_badrequest(const char *); 74 static void pg_error_internal(void); 75 static void pg_index(const struct req *); 76 static void pg_noresult(const struct req *, int, const char *, 77 const char *); 78 static void pg_redirect(const struct req *, const char *); 79 static void pg_search(const struct req *); 80 static void pg_searchres(const struct req *, 81 struct manpage *, size_t); 82 static void pg_show(struct req *, const char *); 83 static void resp_begin_html(int, const char *, const char *); 84 static void resp_begin_http(int, const char *); 85 static void resp_catman(const struct req *, const char *); 86 static void resp_copy(const char *); 87 static void resp_end_html(void); 88 static void resp_format(const struct req *, const char *); 89 static void resp_searchform(const struct req *, enum focus); 90 static void resp_show(const struct req *, const char *); 91 static void set_query_attr(char **, char **); 92 static int validate_arch(const char *); 93 static int validate_filename(const char *); 94 static int validate_manpath(const struct req *, const char *); 95 static int validate_urifrag(const char *); 96 97 static const char *scriptname = SCRIPT_NAME; 98 99 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 100 static const char *const sec_numbers[] = { 101 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 102 }; 103 static const char *const sec_names[] = { 104 "All Sections", 105 "1 - General Commands", 106 "2 - System Calls", 107 "3 - Library Functions", 108 "3p - Perl Library", 109 "4 - Device Drivers", 110 "5 - File Formats", 111 "6 - Games", 112 "7 - Miscellaneous Information", 113 "8 - System Manager\'s Manual", 114 "9 - Kernel Developer\'s Manual" 115 }; 116 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 117 118 static const char *const arch_names[] = { 119 "amd64", "alpha", "armv7", "arm64", 120 "hppa", "i386", "landisk", 121 "loongson", "luna88k", "macppc", "mips64", 122 "octeon", "sgi", "socppc", "sparc64", 123 "amiga", "arc", "armish", "arm32", 124 "atari", "aviion", "beagle", "cats", 125 "hppa64", "hp300", 126 "ia64", "mac68k", "mvme68k", "mvme88k", 127 "mvmeppc", "palm", "pc532", "pegasos", 128 "pmax", "powerpc", "solbourne", "sparc", 129 "sun3", "vax", "wgrisc", "x68k", 130 "zaurus" 131 }; 132 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 133 134 /* 135 * Print a character, escaping HTML along the way. 136 * This will pass non-ASCII straight to output: be warned! 137 */ 138 static void 139 html_putchar(char c) 140 { 141 142 switch (c) { 143 case '"': 144 printf("""); 145 break; 146 case '&': 147 printf("&"); 148 break; 149 case '>': 150 printf(">"); 151 break; 152 case '<': 153 printf("<"); 154 break; 155 default: 156 putchar((unsigned char)c); 157 break; 158 } 159 } 160 161 /* 162 * Call through to html_putchar(). 163 * Accepts NULL strings. 164 */ 165 static void 166 html_print(const char *p) 167 { 168 169 if (NULL == p) 170 return; 171 while ('\0' != *p) 172 html_putchar(*p++); 173 } 174 175 /* 176 * Transfer the responsibility for the allocated string *val 177 * to the query structure. 178 */ 179 static void 180 set_query_attr(char **attr, char **val) 181 { 182 183 free(*attr); 184 if (**val == '\0') { 185 *attr = NULL; 186 free(*val); 187 } else 188 *attr = *val; 189 *val = NULL; 190 } 191 192 /* 193 * Parse the QUERY_STRING for key-value pairs 194 * and store the values into the query structure. 195 */ 196 static void 197 parse_query_string(struct req *req, const char *qs) 198 { 199 char *key, *val; 200 size_t keysz, valsz; 201 202 req->isquery = 1; 203 req->q.manpath = NULL; 204 req->q.arch = NULL; 205 req->q.sec = NULL; 206 req->q.query = NULL; 207 req->q.equal = 1; 208 209 key = val = NULL; 210 while (*qs != '\0') { 211 212 /* Parse one key. */ 213 214 keysz = strcspn(qs, "=;&"); 215 key = mandoc_strndup(qs, keysz); 216 qs += keysz; 217 if (*qs != '=') 218 goto next; 219 220 /* Parse one value. */ 221 222 valsz = strcspn(++qs, ";&"); 223 val = mandoc_strndup(qs, valsz); 224 qs += valsz; 225 226 /* Decode and catch encoding errors. */ 227 228 if ( ! (http_decode(key) && http_decode(val))) 229 goto next; 230 231 /* Handle key-value pairs. */ 232 233 if ( ! strcmp(key, "query")) 234 set_query_attr(&req->q.query, &val); 235 236 else if ( ! strcmp(key, "apropos")) 237 req->q.equal = !strcmp(val, "0"); 238 239 else if ( ! strcmp(key, "manpath")) { 240 #ifdef COMPAT_OLDURI 241 if ( ! strncmp(val, "OpenBSD ", 8)) { 242 val[7] = '-'; 243 if ('C' == val[8]) 244 val[8] = 'c'; 245 } 246 #endif 247 set_query_attr(&req->q.manpath, &val); 248 } 249 250 else if ( ! (strcmp(key, "sec") 251 #ifdef COMPAT_OLDURI 252 && strcmp(key, "sektion") 253 #endif 254 )) { 255 if ( ! strcmp(val, "0")) 256 *val = '\0'; 257 set_query_attr(&req->q.sec, &val); 258 } 259 260 else if ( ! strcmp(key, "arch")) { 261 if ( ! strcmp(val, "default")) 262 *val = '\0'; 263 set_query_attr(&req->q.arch, &val); 264 } 265 266 /* 267 * The key must be freed in any case. 268 * The val may have been handed over to the query 269 * structure, in which case it is now NULL. 270 */ 271 next: 272 free(key); 273 key = NULL; 274 free(val); 275 val = NULL; 276 277 if (*qs != '\0') 278 qs++; 279 } 280 } 281 282 /* 283 * HTTP-decode a string. The standard explanation is that this turns 284 * "%4e+foo" into "n foo" in the regular way. This is done in-place 285 * over the allocated string. 286 */ 287 static int 288 http_decode(char *p) 289 { 290 char hex[3]; 291 char *q; 292 int c; 293 294 hex[2] = '\0'; 295 296 q = p; 297 for ( ; '\0' != *p; p++, q++) { 298 if ('%' == *p) { 299 if ('\0' == (hex[0] = *(p + 1))) 300 return 0; 301 if ('\0' == (hex[1] = *(p + 2))) 302 return 0; 303 if (1 != sscanf(hex, "%x", &c)) 304 return 0; 305 if ('\0' == c) 306 return 0; 307 308 *q = (char)c; 309 p += 2; 310 } else 311 *q = '+' == *p ? ' ' : *p; 312 } 313 314 *q = '\0'; 315 return 1; 316 } 317 318 static void 319 http_encode(const char *p) 320 { 321 for (; *p != '\0'; p++) { 322 if (isalnum((unsigned char)*p) == 0 && 323 strchr("-._~", *p) == NULL) 324 printf("%%%2.2X", (unsigned char)*p); 325 else 326 putchar(*p); 327 } 328 } 329 330 static void 331 resp_begin_http(int code, const char *msg) 332 { 333 334 if (200 != code) 335 printf("Status: %d %s\r\n", code, msg); 336 337 printf("Content-Type: text/html; charset=utf-8\r\n" 338 "Cache-Control: no-cache\r\n" 339 "Pragma: no-cache\r\n" 340 "\r\n"); 341 342 fflush(stdout); 343 } 344 345 static void 346 resp_copy(const char *filename) 347 { 348 char buf[4096]; 349 ssize_t sz; 350 int fd; 351 352 if ((fd = open(filename, O_RDONLY)) != -1) { 353 fflush(stdout); 354 while ((sz = read(fd, buf, sizeof(buf))) > 0) 355 write(STDOUT_FILENO, buf, sz); 356 close(fd); 357 } 358 } 359 360 static void 361 resp_begin_html(int code, const char *msg, const char *file) 362 { 363 char *cp; 364 365 resp_begin_http(code, msg); 366 367 printf("<!DOCTYPE html>\n" 368 "<html>\n" 369 "<head>\n" 370 " <meta charset=\"UTF-8\"/>\n" 371 " <meta name=\"viewport\"" 372 " content=\"width=device-width, initial-scale=1.0\">\n" 373 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 374 " type=\"text/css\" media=\"all\">\n" 375 " <title>", 376 CSS_DIR); 377 if (file != NULL) { 378 if ((cp = strrchr(file, '/')) != NULL) 379 file = cp + 1; 380 if ((cp = strrchr(file, '.')) != NULL) { 381 printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1); 382 } else 383 printf("%s - ", file); 384 } 385 printf("%s</title>\n" 386 "</head>\n" 387 "<body>\n", 388 CUSTOMIZE_TITLE); 389 390 resp_copy(MAN_DIR "/header.html"); 391 } 392 393 static void 394 resp_end_html(void) 395 { 396 397 resp_copy(MAN_DIR "/footer.html"); 398 399 puts("</body>\n" 400 "</html>"); 401 } 402 403 static void 404 resp_searchform(const struct req *req, enum focus focus) 405 { 406 int i; 407 408 printf("<form action=\"/%s\" method=\"get\">\n" 409 " <fieldset>\n" 410 " <legend>Manual Page Search Parameters</legend>\n", 411 scriptname); 412 413 /* Write query input box. */ 414 415 printf(" <input type=\"search\" name=\"query\" value=\""); 416 if (req->q.query != NULL) 417 html_print(req->q.query); 418 printf( "\" size=\"40\""); 419 if (focus == FOCUS_QUERY) 420 printf(" autofocus"); 421 puts(">"); 422 423 /* Write submission buttons. */ 424 425 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">" 426 "man</button>\n" 427 " <button type=\"submit\" name=\"apropos\" value=\"1\">" 428 "apropos</button>\n" 429 " <br/>\n"); 430 431 /* Write section selector. */ 432 433 puts(" <select name=\"sec\">"); 434 for (i = 0; i < sec_MAX; i++) { 435 printf(" <option value=\"%s\"", sec_numbers[i]); 436 if (NULL != req->q.sec && 437 0 == strcmp(sec_numbers[i], req->q.sec)) 438 printf(" selected=\"selected\""); 439 printf(">%s</option>\n", sec_names[i]); 440 } 441 puts(" </select>"); 442 443 /* Write architecture selector. */ 444 445 printf( " <select name=\"arch\">\n" 446 " <option value=\"default\""); 447 if (NULL == req->q.arch) 448 printf(" selected=\"selected\""); 449 puts(">All Architectures</option>"); 450 for (i = 0; i < arch_MAX; i++) { 451 printf(" <option"); 452 if (NULL != req->q.arch && 453 0 == strcmp(arch_names[i], req->q.arch)) 454 printf(" selected=\"selected\""); 455 printf(">%s</option>\n", arch_names[i]); 456 } 457 puts(" </select>"); 458 459 /* Write manpath selector. */ 460 461 if (req->psz > 1) { 462 puts(" <select name=\"manpath\">"); 463 for (i = 0; i < (int)req->psz; i++) { 464 printf(" <option"); 465 if (strcmp(req->q.manpath, req->p[i]) == 0) 466 printf(" selected=\"selected\""); 467 printf(">"); 468 html_print(req->p[i]); 469 puts("</option>"); 470 } 471 puts(" </select>"); 472 } 473 474 puts(" </fieldset>\n" 475 "</form>"); 476 } 477 478 static int 479 validate_urifrag(const char *frag) 480 { 481 482 while ('\0' != *frag) { 483 if ( ! (isalnum((unsigned char)*frag) || 484 '-' == *frag || '.' == *frag || 485 '/' == *frag || '_' == *frag)) 486 return 0; 487 frag++; 488 } 489 return 1; 490 } 491 492 static int 493 validate_manpath(const struct req *req, const char* manpath) 494 { 495 size_t i; 496 497 for (i = 0; i < req->psz; i++) 498 if ( ! strcmp(manpath, req->p[i])) 499 return 1; 500 501 return 0; 502 } 503 504 static int 505 validate_arch(const char *arch) 506 { 507 int i; 508 509 for (i = 0; i < arch_MAX; i++) 510 if (strcmp(arch, arch_names[i]) == 0) 511 return 1; 512 513 return 0; 514 } 515 516 static int 517 validate_filename(const char *file) 518 { 519 520 if ('.' == file[0] && '/' == file[1]) 521 file += 2; 522 523 return ! (strstr(file, "../") || strstr(file, "/..") || 524 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 525 } 526 527 static void 528 pg_index(const struct req *req) 529 { 530 531 resp_begin_html(200, NULL, NULL); 532 resp_searchform(req, FOCUS_QUERY); 533 printf("<p>\n" 534 "This web interface is documented in the\n" 535 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n" 536 "manual, and the\n" 537 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n" 538 "manual explains the query syntax.\n" 539 "</p>\n", 540 scriptname, *scriptname == '\0' ? "" : "/", 541 scriptname, *scriptname == '\0' ? "" : "/"); 542 resp_end_html(); 543 } 544 545 static void 546 pg_noresult(const struct req *req, int code, const char *http_msg, 547 const char *user_msg) 548 { 549 resp_begin_html(code, http_msg, NULL); 550 resp_searchform(req, FOCUS_QUERY); 551 puts("<p>"); 552 puts(user_msg); 553 puts("</p>"); 554 resp_end_html(); 555 } 556 557 static void 558 pg_error_badrequest(const char *msg) 559 { 560 561 resp_begin_html(400, "Bad Request", NULL); 562 puts("<h1>Bad Request</h1>\n" 563 "<p>\n"); 564 puts(msg); 565 printf("Try again from the\n" 566 "<a href=\"/%s\">main page</a>.\n" 567 "</p>", scriptname); 568 resp_end_html(); 569 } 570 571 static void 572 pg_error_internal(void) 573 { 574 resp_begin_html(500, "Internal Server Error", NULL); 575 puts("<p>Internal Server Error</p>"); 576 resp_end_html(); 577 } 578 579 static void 580 pg_redirect(const struct req *req, const char *name) 581 { 582 printf("Status: 303 See Other\r\n" 583 "Location: /"); 584 if (*scriptname != '\0') 585 printf("%s/", scriptname); 586 if (strcmp(req->q.manpath, req->p[0])) 587 printf("%s/", req->q.manpath); 588 if (req->q.arch != NULL) 589 printf("%s/", req->q.arch); 590 http_encode(name); 591 if (req->q.sec != NULL) { 592 putchar('.'); 593 http_encode(req->q.sec); 594 } 595 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n"); 596 } 597 598 static void 599 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 600 { 601 char *arch, *archend; 602 const char *sec; 603 size_t i, iuse; 604 int archprio, archpriouse; 605 int prio, priouse; 606 607 for (i = 0; i < sz; i++) { 608 if (validate_filename(r[i].file)) 609 continue; 610 warnx("invalid filename %s in %s database", 611 r[i].file, req->q.manpath); 612 pg_error_internal(); 613 return; 614 } 615 616 if (req->isquery && sz == 1) { 617 /* 618 * If we have just one result, then jump there now 619 * without any delay. 620 */ 621 printf("Status: 303 See Other\r\n" 622 "Location: /"); 623 if (*scriptname != '\0') 624 printf("%s/", scriptname); 625 if (strcmp(req->q.manpath, req->p[0])) 626 printf("%s/", req->q.manpath); 627 printf("%s\r\n" 628 "Content-Type: text/html; charset=utf-8\r\n\r\n", 629 r[0].file); 630 return; 631 } 632 633 /* 634 * In man(1) mode, show one of the pages 635 * even if more than one is found. 636 */ 637 638 iuse = 0; 639 if (req->q.equal || sz == 1) { 640 priouse = 20; 641 archpriouse = 3; 642 for (i = 0; i < sz; i++) { 643 sec = r[i].file; 644 sec += strcspn(sec, "123456789"); 645 if (sec[0] == '\0') 646 continue; 647 prio = sec_prios[sec[0] - '1']; 648 if (sec[1] != '/') 649 prio += 10; 650 if (req->q.arch == NULL) { 651 archprio = 652 ((arch = strchr(sec + 1, '/')) 653 == NULL) ? 3 : 654 ((archend = strchr(arch + 1, '/')) 655 == NULL) ? 0 : 656 strncmp(arch, "amd64/", 657 archend - arch) ? 2 : 1; 658 if (archprio < archpriouse) { 659 archpriouse = archprio; 660 priouse = prio; 661 iuse = i; 662 continue; 663 } 664 if (archprio > archpriouse) 665 continue; 666 } 667 if (prio >= priouse) 668 continue; 669 priouse = prio; 670 iuse = i; 671 } 672 resp_begin_html(200, NULL, r[iuse].file); 673 } else 674 resp_begin_html(200, NULL, NULL); 675 676 resp_searchform(req, 677 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 678 679 if (sz > 1) { 680 puts("<table class=\"results\">"); 681 for (i = 0; i < sz; i++) { 682 printf(" <tr>\n" 683 " <td>" 684 "<a class=\"Xr\" href=\"/"); 685 if (*scriptname != '\0') 686 printf("%s/", scriptname); 687 if (strcmp(req->q.manpath, req->p[0])) 688 printf("%s/", req->q.manpath); 689 printf("%s\">", r[i].file); 690 html_print(r[i].names); 691 printf("</a></td>\n" 692 " <td><span class=\"Nd\">"); 693 html_print(r[i].output); 694 puts("</span></td>\n" 695 " </tr>"); 696 } 697 puts("</table>"); 698 } 699 700 if (req->q.equal || sz == 1) { 701 puts("<hr>"); 702 resp_show(req, r[iuse].file); 703 } 704 705 resp_end_html(); 706 } 707 708 static void 709 resp_catman(const struct req *req, const char *file) 710 { 711 FILE *f; 712 char *p; 713 size_t sz; 714 ssize_t len; 715 int i; 716 int italic, bold; 717 718 if ((f = fopen(file, "r")) == NULL) { 719 puts("<p>You specified an invalid manual file.</p>"); 720 return; 721 } 722 723 puts("<div class=\"catman\">\n" 724 "<pre>"); 725 726 p = NULL; 727 sz = 0; 728 729 while ((len = getline(&p, &sz, f)) != -1) { 730 bold = italic = 0; 731 for (i = 0; i < len - 1; i++) { 732 /* 733 * This means that the catpage is out of state. 734 * Ignore it and keep going (although the 735 * catpage is bogus). 736 */ 737 738 if ('\b' == p[i] || '\n' == p[i]) 739 continue; 740 741 /* 742 * Print a regular character. 743 * Close out any bold/italic scopes. 744 * If we're in back-space mode, make sure we'll 745 * have something to enter when we backspace. 746 */ 747 748 if ('\b' != p[i + 1]) { 749 if (italic) 750 printf("</i>"); 751 if (bold) 752 printf("</b>"); 753 italic = bold = 0; 754 html_putchar(p[i]); 755 continue; 756 } else if (i + 2 >= len) 757 continue; 758 759 /* Italic mode. */ 760 761 if ('_' == p[i]) { 762 if (bold) 763 printf("</b>"); 764 if ( ! italic) 765 printf("<i>"); 766 bold = 0; 767 italic = 1; 768 i += 2; 769 html_putchar(p[i]); 770 continue; 771 } 772 773 /* 774 * Handle funny behaviour troff-isms. 775 * These grok'd from the original man2html.c. 776 */ 777 778 if (('+' == p[i] && 'o' == p[i + 2]) || 779 ('o' == p[i] && '+' == p[i + 2]) || 780 ('|' == p[i] && '=' == p[i + 2]) || 781 ('=' == p[i] && '|' == p[i + 2]) || 782 ('*' == p[i] && '=' == p[i + 2]) || 783 ('=' == p[i] && '*' == p[i + 2]) || 784 ('*' == p[i] && '|' == p[i + 2]) || 785 ('|' == p[i] && '*' == p[i + 2])) { 786 if (italic) 787 printf("</i>"); 788 if (bold) 789 printf("</b>"); 790 italic = bold = 0; 791 putchar('*'); 792 i += 2; 793 continue; 794 } else if (('|' == p[i] && '-' == p[i + 2]) || 795 ('-' == p[i] && '|' == p[i + 1]) || 796 ('+' == p[i] && '-' == p[i + 1]) || 797 ('-' == p[i] && '+' == p[i + 1]) || 798 ('+' == p[i] && '|' == p[i + 1]) || 799 ('|' == p[i] && '+' == p[i + 1])) { 800 if (italic) 801 printf("</i>"); 802 if (bold) 803 printf("</b>"); 804 italic = bold = 0; 805 putchar('+'); 806 i += 2; 807 continue; 808 } 809 810 /* Bold mode. */ 811 812 if (italic) 813 printf("</i>"); 814 if ( ! bold) 815 printf("<b>"); 816 bold = 1; 817 italic = 0; 818 i += 2; 819 html_putchar(p[i]); 820 } 821 822 /* 823 * Clean up the last character. 824 * We can get to a newline; don't print that. 825 */ 826 827 if (italic) 828 printf("</i>"); 829 if (bold) 830 printf("</b>"); 831 832 if (i == len - 1 && p[i] != '\n') 833 html_putchar(p[i]); 834 835 putchar('\n'); 836 } 837 free(p); 838 839 puts("</pre>\n" 840 "</div>"); 841 842 fclose(f); 843 } 844 845 static void 846 resp_format(const struct req *req, const char *file) 847 { 848 struct manoutput conf; 849 struct mparse *mp; 850 struct roff_meta *meta; 851 void *vp; 852 int fd; 853 int usepath; 854 855 if (-1 == (fd = open(file, O_RDONLY, 0))) { 856 puts("<p>You specified an invalid manual file.</p>"); 857 return; 858 } 859 860 mchars_alloc(); 861 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 | 862 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath); 863 mparse_readfd(mp, fd, file); 864 close(fd); 865 meta = mparse_result(mp); 866 867 memset(&conf, 0, sizeof(conf)); 868 conf.fragment = 1; 869 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css"); 870 usepath = strcmp(req->q.manpath, req->p[0]); 871 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S", 872 scriptname, *scriptname == '\0' ? "" : "/", 873 usepath ? req->q.manpath : "", usepath ? "/" : ""); 874 875 vp = html_alloc(&conf); 876 if (meta->macroset == MACROSET_MDOC) 877 html_mdoc(vp, meta); 878 else 879 html_man(vp, meta); 880 881 html_free(vp); 882 mparse_free(mp); 883 mchars_free(); 884 free(conf.man); 885 free(conf.style); 886 } 887 888 static void 889 resp_show(const struct req *req, const char *file) 890 { 891 892 if ('.' == file[0] && '/' == file[1]) 893 file += 2; 894 895 if ('c' == *file) 896 resp_catman(req, file); 897 else 898 resp_format(req, file); 899 } 900 901 static void 902 pg_show(struct req *req, const char *fullpath) 903 { 904 char *manpath; 905 const char *file; 906 907 if ((file = strchr(fullpath, '/')) == NULL) { 908 pg_error_badrequest( 909 "You did not specify a page to show."); 910 return; 911 } 912 manpath = mandoc_strndup(fullpath, file - fullpath); 913 file++; 914 915 if ( ! validate_manpath(req, manpath)) { 916 pg_error_badrequest( 917 "You specified an invalid manpath."); 918 free(manpath); 919 return; 920 } 921 922 /* 923 * Begin by chdir()ing into the manpath. 924 * This way we can pick up the database files, which are 925 * relative to the manpath root. 926 */ 927 928 if (chdir(manpath) == -1) { 929 warn("chdir %s", manpath); 930 pg_error_internal(); 931 free(manpath); 932 return; 933 } 934 free(manpath); 935 936 if ( ! validate_filename(file)) { 937 pg_error_badrequest( 938 "You specified an invalid manual file."); 939 return; 940 } 941 942 resp_begin_html(200, NULL, file); 943 resp_searchform(req, FOCUS_NONE); 944 resp_show(req, file); 945 resp_end_html(); 946 } 947 948 static void 949 pg_search(const struct req *req) 950 { 951 struct mansearch search; 952 struct manpaths paths; 953 struct manpage *res; 954 char **argv; 955 char *query, *rp, *wp; 956 size_t ressz; 957 int argc; 958 959 /* 960 * Begin by chdir()ing into the root of the manpath. 961 * This way we can pick up the database files, which are 962 * relative to the manpath root. 963 */ 964 965 if (chdir(req->q.manpath) == -1) { 966 warn("chdir %s", req->q.manpath); 967 pg_error_internal(); 968 return; 969 } 970 971 search.arch = req->q.arch; 972 search.sec = req->q.sec; 973 search.outkey = "Nd"; 974 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 975 search.firstmatch = 1; 976 977 paths.sz = 1; 978 paths.paths = mandoc_malloc(sizeof(char *)); 979 paths.paths[0] = mandoc_strdup("."); 980 981 /* 982 * Break apart at spaces with backslash-escaping. 983 */ 984 985 argc = 0; 986 argv = NULL; 987 rp = query = mandoc_strdup(req->q.query); 988 for (;;) { 989 while (isspace((unsigned char)*rp)) 990 rp++; 991 if (*rp == '\0') 992 break; 993 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 994 argv[argc++] = wp = rp; 995 for (;;) { 996 if (isspace((unsigned char)*rp)) { 997 *wp = '\0'; 998 rp++; 999 break; 1000 } 1001 if (rp[0] == '\\' && rp[1] != '\0') 1002 rp++; 1003 if (wp != rp) 1004 *wp = *rp; 1005 if (*rp == '\0') 1006 break; 1007 wp++; 1008 rp++; 1009 } 1010 } 1011 1012 res = NULL; 1013 ressz = 0; 1014 if (req->isquery && req->q.equal && argc == 1) 1015 pg_redirect(req, argv[0]); 1016 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0) 1017 pg_noresult(req, 400, "Bad Request", 1018 "You entered an invalid query."); 1019 else if (ressz == 0) 1020 pg_noresult(req, 404, "Not Found", "No results found."); 1021 else 1022 pg_searchres(req, res, ressz); 1023 1024 free(query); 1025 mansearch_free(res, ressz); 1026 free(paths.paths[0]); 1027 free(paths.paths); 1028 } 1029 1030 int 1031 main(void) 1032 { 1033 struct req req; 1034 struct itimerval itimer; 1035 const char *path; 1036 const char *querystring; 1037 int i; 1038 1039 /* 1040 * The "rpath" pledge could be revoked after mparse_readfd() 1041 * if the file desciptor to "/footer.html" would be opened 1042 * up front, but it's probably not worth the complication 1043 * of the code it would cause: it would require scattering 1044 * pledge() calls in multiple low-level resp_*() functions. 1045 */ 1046 1047 if (pledge("stdio rpath", NULL) == -1) { 1048 warn("pledge"); 1049 pg_error_internal(); 1050 return EXIT_FAILURE; 1051 } 1052 1053 /* Poor man's ReDoS mitigation. */ 1054 1055 itimer.it_value.tv_sec = 2; 1056 itimer.it_value.tv_usec = 0; 1057 itimer.it_interval.tv_sec = 2; 1058 itimer.it_interval.tv_usec = 0; 1059 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 1060 warn("setitimer"); 1061 pg_error_internal(); 1062 return EXIT_FAILURE; 1063 } 1064 1065 /* 1066 * First we change directory into the MAN_DIR so that 1067 * subsequent scanning for manpath directories is rooted 1068 * relative to the same position. 1069 */ 1070 1071 if (chdir(MAN_DIR) == -1) { 1072 warn("MAN_DIR: %s", MAN_DIR); 1073 pg_error_internal(); 1074 return EXIT_FAILURE; 1075 } 1076 1077 memset(&req, 0, sizeof(struct req)); 1078 req.q.equal = 1; 1079 parse_manpath_conf(&req); 1080 1081 /* Parse the path info and the query string. */ 1082 1083 if ((path = getenv("PATH_INFO")) == NULL) 1084 path = ""; 1085 else if (*path == '/') 1086 path++; 1087 1088 if (*path != '\0') { 1089 parse_path_info(&req, path); 1090 if (req.q.manpath == NULL || req.q.sec == NULL || 1091 *req.q.query == '\0' || access(path, F_OK) == -1) 1092 path = ""; 1093 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1094 parse_query_string(&req, querystring); 1095 1096 /* Validate parsed data and add defaults. */ 1097 1098 if (req.q.manpath == NULL) 1099 req.q.manpath = mandoc_strdup(req.p[0]); 1100 else if ( ! validate_manpath(&req, req.q.manpath)) { 1101 pg_error_badrequest( 1102 "You specified an invalid manpath."); 1103 return EXIT_FAILURE; 1104 } 1105 1106 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) { 1107 pg_error_badrequest( 1108 "You specified an invalid architecture."); 1109 return EXIT_FAILURE; 1110 } 1111 1112 /* Dispatch to the three different pages. */ 1113 1114 if ('\0' != *path) 1115 pg_show(&req, path); 1116 else if (NULL != req.q.query) 1117 pg_search(&req); 1118 else 1119 pg_index(&req); 1120 1121 free(req.q.manpath); 1122 free(req.q.arch); 1123 free(req.q.sec); 1124 free(req.q.query); 1125 for (i = 0; i < (int)req.psz; i++) 1126 free(req.p[i]); 1127 free(req.p); 1128 return EXIT_SUCCESS; 1129 } 1130 1131 /* 1132 * Translate PATH_INFO to a query. 1133 */ 1134 static void 1135 parse_path_info(struct req *req, const char *path) 1136 { 1137 const char *name, *sec, *end; 1138 1139 req->isquery = 0; 1140 req->q.equal = 1; 1141 req->q.manpath = NULL; 1142 req->q.arch = NULL; 1143 1144 /* Mandatory manual page name. */ 1145 if ((name = strrchr(path, '/')) == NULL) 1146 name = path; 1147 else 1148 name++; 1149 1150 /* Optional trailing section. */ 1151 sec = strrchr(name, '.'); 1152 if (sec != NULL && isdigit((unsigned char)*++sec)) { 1153 req->q.query = mandoc_strndup(name, sec - name - 1); 1154 req->q.sec = mandoc_strdup(sec); 1155 } else { 1156 req->q.query = mandoc_strdup(name); 1157 req->q.sec = NULL; 1158 } 1159 1160 /* Handle the case of name[.section] only. */ 1161 if (name == path) 1162 return; 1163 1164 /* Optional manpath. */ 1165 end = strchr(path, '/'); 1166 req->q.manpath = mandoc_strndup(path, end - path); 1167 if (validate_manpath(req, req->q.manpath)) { 1168 path = end + 1; 1169 if (name == path) 1170 return; 1171 } else { 1172 free(req->q.manpath); 1173 req->q.manpath = NULL; 1174 } 1175 1176 /* Optional section. */ 1177 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) { 1178 path += 3; 1179 end = strchr(path, '/'); 1180 free(req->q.sec); 1181 req->q.sec = mandoc_strndup(path, end - path); 1182 path = end + 1; 1183 if (name == path) 1184 return; 1185 } 1186 1187 /* Optional architecture. */ 1188 end = strchr(path, '/'); 1189 if (end + 1 != name) { 1190 pg_error_badrequest( 1191 "You specified too many directory components."); 1192 exit(EXIT_FAILURE); 1193 } 1194 req->q.arch = mandoc_strndup(path, end - path); 1195 if (validate_arch(req->q.arch) == 0) { 1196 pg_error_badrequest( 1197 "You specified an invalid directory component."); 1198 exit(EXIT_FAILURE); 1199 } 1200 } 1201 1202 /* 1203 * Scan for indexable paths. 1204 */ 1205 static void 1206 parse_manpath_conf(struct req *req) 1207 { 1208 FILE *fp; 1209 char *dp; 1210 size_t dpsz; 1211 ssize_t len; 1212 1213 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1214 warn("%s/manpath.conf", MAN_DIR); 1215 pg_error_internal(); 1216 exit(EXIT_FAILURE); 1217 } 1218 1219 dp = NULL; 1220 dpsz = 0; 1221 1222 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1223 if (dp[len - 1] == '\n') 1224 dp[--len] = '\0'; 1225 req->p = mandoc_realloc(req->p, 1226 (req->psz + 1) * sizeof(char *)); 1227 if ( ! validate_urifrag(dp)) { 1228 warnx("%s/manpath.conf contains " 1229 "unsafe path \"%s\"", MAN_DIR, dp); 1230 pg_error_internal(); 1231 exit(EXIT_FAILURE); 1232 } 1233 if (strchr(dp, '/') != NULL) { 1234 warnx("%s/manpath.conf contains " 1235 "path with slash \"%s\"", MAN_DIR, dp); 1236 pg_error_internal(); 1237 exit(EXIT_FAILURE); 1238 } 1239 req->p[req->psz++] = dp; 1240 dp = NULL; 1241 dpsz = 0; 1242 } 1243 free(dp); 1244 1245 if (req->p == NULL) { 1246 warnx("%s/manpath.conf is empty", MAN_DIR); 1247 pg_error_internal(); 1248 exit(EXIT_FAILURE); 1249 } 1250 } 1251