1 /* $OpenBSD: cgi.c,v 1.71 2016/05/28 13:40:44 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@usta.de> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 #include <sys/time.h> 20 21 #include <ctype.h> 22 #include <err.h> 23 #include <errno.h> 24 #include <fcntl.h> 25 #include <limits.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <unistd.h> 31 32 #include "mandoc_aux.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "mdoc.h" 36 #include "man.h" 37 #include "main.h" 38 #include "manconf.h" 39 #include "mansearch.h" 40 #include "cgi.h" 41 42 /* 43 * A query as passed to the search function. 44 */ 45 struct query { 46 char *manpath; /* desired manual directory */ 47 char *arch; /* architecture */ 48 char *sec; /* manual section */ 49 char *query; /* unparsed query expression */ 50 int equal; /* match whole names, not substrings */ 51 }; 52 53 struct req { 54 struct query q; 55 char **p; /* array of available manpaths */ 56 size_t psz; /* number of available manpaths */ 57 int isquery; /* QUERY_STRING used, not PATH_INFO */ 58 }; 59 60 enum focus { 61 FOCUS_NONE = 0, 62 FOCUS_QUERY 63 }; 64 65 static void html_print(const char *); 66 static void html_putchar(char); 67 static int http_decode(char *); 68 static void parse_manpath_conf(struct req *); 69 static void parse_path_info(struct req *req, const char *path); 70 static void parse_query_string(struct req *, const char *); 71 static void pg_error_badrequest(const char *); 72 static void pg_error_internal(void); 73 static void pg_index(const struct req *); 74 static void pg_noresult(const struct req *, const char *); 75 static void pg_search(const struct req *); 76 static void pg_searchres(const struct req *, 77 struct manpage *, size_t); 78 static void pg_show(struct req *, const char *); 79 static void resp_begin_html(int, const char *); 80 static void resp_begin_http(int, const char *); 81 static void resp_catman(const struct req *, const char *); 82 static void resp_copy(const char *); 83 static void resp_end_html(void); 84 static void resp_format(const struct req *, const char *); 85 static void resp_searchform(const struct req *, enum focus); 86 static void resp_show(const struct req *, const char *); 87 static void set_query_attr(char **, char **); 88 static int validate_filename(const char *); 89 static int validate_manpath(const struct req *, const char *); 90 static int validate_urifrag(const char *); 91 92 static const char *scriptname = SCRIPT_NAME; 93 94 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 95 static const char *const sec_numbers[] = { 96 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 97 }; 98 static const char *const sec_names[] = { 99 "All Sections", 100 "1 - General Commands", 101 "2 - System Calls", 102 "3 - Library Functions", 103 "3p - Perl Library", 104 "4 - Device Drivers", 105 "5 - File Formats", 106 "6 - Games", 107 "7 - Miscellaneous Information", 108 "8 - System Manager\'s Manual", 109 "9 - Kernel Developer\'s Manual" 110 }; 111 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 112 113 static const char *const arch_names[] = { 114 "amd64", "alpha", "armish", "armv7", 115 "hppa", "hppa64", "i386", "landisk", 116 "loongson", "luna88k", "macppc", "mips64", 117 "octeon", "sgi", "socppc", "sparc", 118 "sparc64", "zaurus", 119 "amiga", "arc", "arm32", "atari", 120 "aviion", "beagle", "cats", "hp300", 121 "ia64", "mac68k", "mvme68k", "mvme88k", 122 "mvmeppc", "palm", "pc532", "pegasos", 123 "pmax", "powerpc", "solbourne", "sun3", 124 "vax", "wgrisc", "x68k" 125 }; 126 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 127 128 /* 129 * Print a character, escaping HTML along the way. 130 * This will pass non-ASCII straight to output: be warned! 131 */ 132 static void 133 html_putchar(char c) 134 { 135 136 switch (c) { 137 case ('"'): 138 printf(""e;"); 139 break; 140 case ('&'): 141 printf("&"); 142 break; 143 case ('>'): 144 printf(">"); 145 break; 146 case ('<'): 147 printf("<"); 148 break; 149 default: 150 putchar((unsigned char)c); 151 break; 152 } 153 } 154 155 /* 156 * Call through to html_putchar(). 157 * Accepts NULL strings. 158 */ 159 static void 160 html_print(const char *p) 161 { 162 163 if (NULL == p) 164 return; 165 while ('\0' != *p) 166 html_putchar(*p++); 167 } 168 169 /* 170 * Transfer the responsibility for the allocated string *val 171 * to the query structure. 172 */ 173 static void 174 set_query_attr(char **attr, char **val) 175 { 176 177 free(*attr); 178 if (**val == '\0') { 179 *attr = NULL; 180 free(*val); 181 } else 182 *attr = *val; 183 *val = NULL; 184 } 185 186 /* 187 * Parse the QUERY_STRING for key-value pairs 188 * and store the values into the query structure. 189 */ 190 static void 191 parse_query_string(struct req *req, const char *qs) 192 { 193 char *key, *val; 194 size_t keysz, valsz; 195 196 req->isquery = 1; 197 req->q.manpath = NULL; 198 req->q.arch = NULL; 199 req->q.sec = NULL; 200 req->q.query = NULL; 201 req->q.equal = 1; 202 203 key = val = NULL; 204 while (*qs != '\0') { 205 206 /* Parse one key. */ 207 208 keysz = strcspn(qs, "=;&"); 209 key = mandoc_strndup(qs, keysz); 210 qs += keysz; 211 if (*qs != '=') 212 goto next; 213 214 /* Parse one value. */ 215 216 valsz = strcspn(++qs, ";&"); 217 val = mandoc_strndup(qs, valsz); 218 qs += valsz; 219 220 /* Decode and catch encoding errors. */ 221 222 if ( ! (http_decode(key) && http_decode(val))) 223 goto next; 224 225 /* Handle key-value pairs. */ 226 227 if ( ! strcmp(key, "query")) 228 set_query_attr(&req->q.query, &val); 229 230 else if ( ! strcmp(key, "apropos")) 231 req->q.equal = !strcmp(val, "0"); 232 233 else if ( ! strcmp(key, "manpath")) { 234 #ifdef COMPAT_OLDURI 235 if ( ! strncmp(val, "OpenBSD ", 8)) { 236 val[7] = '-'; 237 if ('C' == val[8]) 238 val[8] = 'c'; 239 } 240 #endif 241 set_query_attr(&req->q.manpath, &val); 242 } 243 244 else if ( ! (strcmp(key, "sec") 245 #ifdef COMPAT_OLDURI 246 && strcmp(key, "sektion") 247 #endif 248 )) { 249 if ( ! strcmp(val, "0")) 250 *val = '\0'; 251 set_query_attr(&req->q.sec, &val); 252 } 253 254 else if ( ! strcmp(key, "arch")) { 255 if ( ! strcmp(val, "default")) 256 *val = '\0'; 257 set_query_attr(&req->q.arch, &val); 258 } 259 260 /* 261 * The key must be freed in any case. 262 * The val may have been handed over to the query 263 * structure, in which case it is now NULL. 264 */ 265 next: 266 free(key); 267 key = NULL; 268 free(val); 269 val = NULL; 270 271 if (*qs != '\0') 272 qs++; 273 } 274 } 275 276 /* 277 * HTTP-decode a string. The standard explanation is that this turns 278 * "%4e+foo" into "n foo" in the regular way. This is done in-place 279 * over the allocated string. 280 */ 281 static int 282 http_decode(char *p) 283 { 284 char hex[3]; 285 char *q; 286 int c; 287 288 hex[2] = '\0'; 289 290 q = p; 291 for ( ; '\0' != *p; p++, q++) { 292 if ('%' == *p) { 293 if ('\0' == (hex[0] = *(p + 1))) 294 return 0; 295 if ('\0' == (hex[1] = *(p + 2))) 296 return 0; 297 if (1 != sscanf(hex, "%x", &c)) 298 return 0; 299 if ('\0' == c) 300 return 0; 301 302 *q = (char)c; 303 p += 2; 304 } else 305 *q = '+' == *p ? ' ' : *p; 306 } 307 308 *q = '\0'; 309 return 1; 310 } 311 312 static void 313 resp_begin_http(int code, const char *msg) 314 { 315 316 if (200 != code) 317 printf("Status: %d %s\r\n", code, msg); 318 319 printf("Content-Type: text/html; charset=utf-8\r\n" 320 "Cache-Control: no-cache\r\n" 321 "Pragma: no-cache\r\n" 322 "\r\n"); 323 324 fflush(stdout); 325 } 326 327 static void 328 resp_copy(const char *filename) 329 { 330 char buf[4096]; 331 ssize_t sz; 332 int fd; 333 334 if ((fd = open(filename, O_RDONLY)) != -1) { 335 fflush(stdout); 336 while ((sz = read(fd, buf, sizeof(buf))) > 0) 337 write(STDOUT_FILENO, buf, sz); 338 } 339 } 340 341 static void 342 resp_begin_html(int code, const char *msg) 343 { 344 345 resp_begin_http(code, msg); 346 347 printf("<!DOCTYPE html>\n" 348 "<html>\n" 349 "<head>\n" 350 "<meta charset=\"UTF-8\"/>\n" 351 "<link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 352 " type=\"text/css\" media=\"all\">\n" 353 "<title>%s</title>\n" 354 "</head>\n" 355 "<body>\n" 356 "<!-- Begin page content. //-->\n", 357 CSS_DIR, CUSTOMIZE_TITLE); 358 359 resp_copy(MAN_DIR "/header.html"); 360 } 361 362 static void 363 resp_end_html(void) 364 { 365 366 resp_copy(MAN_DIR "/footer.html"); 367 368 puts("</body>\n" 369 "</html>"); 370 } 371 372 static void 373 resp_searchform(const struct req *req, enum focus focus) 374 { 375 int i; 376 377 puts("<!-- Begin search form. //-->"); 378 printf("<div id=\"mancgi\">\n" 379 "<form action=\"/%s\" method=\"get\">\n" 380 "<fieldset>\n" 381 "<legend>Manual Page Search Parameters</legend>\n", 382 scriptname); 383 384 /* Write query input box. */ 385 386 printf("<input type=\"text\" name=\"query\" value=\""); 387 if (req->q.query != NULL) 388 html_print(req->q.query); 389 printf( "\" size=\"40\""); 390 if (focus == FOCUS_QUERY) 391 printf(" autofocus"); 392 puts(">"); 393 394 /* Write submission buttons. */ 395 396 printf( "<button type=\"submit\" name=\"apropos\" value=\"0\">" 397 "man</button>\n" 398 "<button type=\"submit\" name=\"apropos\" value=\"1\">" 399 "apropos</button>\n<br/>\n"); 400 401 /* Write section selector. */ 402 403 puts("<select name=\"sec\">"); 404 for (i = 0; i < sec_MAX; i++) { 405 printf("<option value=\"%s\"", sec_numbers[i]); 406 if (NULL != req->q.sec && 407 0 == strcmp(sec_numbers[i], req->q.sec)) 408 printf(" selected=\"selected\""); 409 printf(">%s</option>\n", sec_names[i]); 410 } 411 puts("</select>"); 412 413 /* Write architecture selector. */ 414 415 printf( "<select name=\"arch\">\n" 416 "<option value=\"default\""); 417 if (NULL == req->q.arch) 418 printf(" selected=\"selected\""); 419 puts(">All Architectures</option>"); 420 for (i = 0; i < arch_MAX; i++) { 421 printf("<option value=\"%s\"", arch_names[i]); 422 if (NULL != req->q.arch && 423 0 == strcmp(arch_names[i], req->q.arch)) 424 printf(" selected=\"selected\""); 425 printf(">%s</option>\n", arch_names[i]); 426 } 427 puts("</select>"); 428 429 /* Write manpath selector. */ 430 431 if (req->psz > 1) { 432 puts("<select name=\"manpath\">"); 433 for (i = 0; i < (int)req->psz; i++) { 434 printf("<option "); 435 if (strcmp(req->q.manpath, req->p[i]) == 0) 436 printf("selected=\"selected\" "); 437 printf("value=\""); 438 html_print(req->p[i]); 439 printf("\">"); 440 html_print(req->p[i]); 441 puts("</option>"); 442 } 443 puts("</select>"); 444 } 445 446 puts("</fieldset>\n" 447 "</form>\n" 448 "</div>"); 449 puts("<!-- End search form. //-->"); 450 } 451 452 static int 453 validate_urifrag(const char *frag) 454 { 455 456 while ('\0' != *frag) { 457 if ( ! (isalnum((unsigned char)*frag) || 458 '-' == *frag || '.' == *frag || 459 '/' == *frag || '_' == *frag)) 460 return 0; 461 frag++; 462 } 463 return 1; 464 } 465 466 static int 467 validate_manpath(const struct req *req, const char* manpath) 468 { 469 size_t i; 470 471 if ( ! strcmp(manpath, "mandoc")) 472 return 1; 473 474 for (i = 0; i < req->psz; i++) 475 if ( ! strcmp(manpath, req->p[i])) 476 return 1; 477 478 return 0; 479 } 480 481 static int 482 validate_filename(const char *file) 483 { 484 485 if ('.' == file[0] && '/' == file[1]) 486 file += 2; 487 488 return ! (strstr(file, "../") || strstr(file, "/..") || 489 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 490 } 491 492 static void 493 pg_index(const struct req *req) 494 { 495 496 resp_begin_html(200, NULL); 497 resp_searchform(req, FOCUS_QUERY); 498 printf("<p>\n" 499 "This web interface is documented in the\n" 500 "<a href=\"/%s%smandoc/man8/man.cgi.8\">man.cgi</a>\n" 501 "manual, and the\n" 502 "<a href=\"/%s%smandoc/man1/apropos.1\">apropos</a>\n" 503 "manual explains the query syntax.\n" 504 "</p>\n", 505 scriptname, *scriptname == '\0' ? "" : "/", 506 scriptname, *scriptname == '\0' ? "" : "/"); 507 resp_end_html(); 508 } 509 510 static void 511 pg_noresult(const struct req *req, const char *msg) 512 { 513 resp_begin_html(200, NULL); 514 resp_searchform(req, FOCUS_QUERY); 515 puts("<p>"); 516 puts(msg); 517 puts("</p>"); 518 resp_end_html(); 519 } 520 521 static void 522 pg_error_badrequest(const char *msg) 523 { 524 525 resp_begin_html(400, "Bad Request"); 526 puts("<h1>Bad Request</h1>\n" 527 "<p>\n"); 528 puts(msg); 529 printf("Try again from the\n" 530 "<a href=\"/%s\">main page</a>.\n" 531 "</p>", scriptname); 532 resp_end_html(); 533 } 534 535 static void 536 pg_error_internal(void) 537 { 538 resp_begin_html(500, "Internal Server Error"); 539 puts("<p>Internal Server Error</p>"); 540 resp_end_html(); 541 } 542 543 static void 544 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 545 { 546 char *arch, *archend; 547 const char *sec; 548 size_t i, iuse; 549 int archprio, archpriouse; 550 int prio, priouse; 551 552 for (i = 0; i < sz; i++) { 553 if (validate_filename(r[i].file)) 554 continue; 555 warnx("invalid filename %s in %s database", 556 r[i].file, req->q.manpath); 557 pg_error_internal(); 558 return; 559 } 560 561 if (req->isquery && sz == 1) { 562 /* 563 * If we have just one result, then jump there now 564 * without any delay. 565 */ 566 printf("Status: 303 See Other\r\n"); 567 printf("Location: http://%s/%s%s%s/%s", 568 HTTP_HOST, scriptname, 569 *scriptname == '\0' ? "" : "/", 570 req->q.manpath, r[0].file); 571 printf("\r\n" 572 "Content-Type: text/html; charset=utf-8\r\n" 573 "\r\n"); 574 return; 575 } 576 577 resp_begin_html(200, NULL); 578 resp_searchform(req, 579 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 580 581 if (sz > 1) { 582 puts("<div class=\"results\">"); 583 puts("<table>"); 584 585 for (i = 0; i < sz; i++) { 586 printf("<tr>\n" 587 "<td class=\"title\">\n" 588 "<a href=\"/%s%s%s/%s", 589 scriptname, *scriptname == '\0' ? "" : "/", 590 req->q.manpath, r[i].file); 591 printf("\">"); 592 html_print(r[i].names); 593 printf("</a>\n" 594 "</td>\n" 595 "<td class=\"desc\">"); 596 html_print(r[i].output); 597 puts("</td>\n" 598 "</tr>"); 599 } 600 601 puts("</table>\n" 602 "</div>"); 603 } 604 605 /* 606 * In man(1) mode, show one of the pages 607 * even if more than one is found. 608 */ 609 610 if (req->q.equal || sz == 1) { 611 puts("<hr>"); 612 iuse = 0; 613 priouse = 20; 614 archpriouse = 3; 615 for (i = 0; i < sz; i++) { 616 sec = r[i].file; 617 sec += strcspn(sec, "123456789"); 618 if (sec[0] == '\0') 619 continue; 620 prio = sec_prios[sec[0] - '1']; 621 if (sec[1] != '/') 622 prio += 10; 623 if (req->q.arch == NULL) { 624 archprio = 625 ((arch = strchr(sec + 1, '/')) 626 == NULL) ? 3 : 627 ((archend = strchr(arch + 1, '/')) 628 == NULL) ? 0 : 629 strncmp(arch, "amd64/", 630 archend - arch) ? 2 : 1; 631 if (archprio < archpriouse) { 632 archpriouse = archprio; 633 priouse = prio; 634 iuse = i; 635 continue; 636 } 637 if (archprio > archpriouse) 638 continue; 639 } 640 if (prio >= priouse) 641 continue; 642 priouse = prio; 643 iuse = i; 644 } 645 resp_show(req, r[iuse].file); 646 } 647 648 resp_end_html(); 649 } 650 651 static void 652 resp_catman(const struct req *req, const char *file) 653 { 654 FILE *f; 655 char *p; 656 size_t sz; 657 ssize_t len; 658 int i; 659 int italic, bold; 660 661 if ((f = fopen(file, "r")) == NULL) { 662 puts("<p>You specified an invalid manual file.</p>"); 663 return; 664 } 665 666 puts("<div class=\"catman\">\n" 667 "<pre>"); 668 669 p = NULL; 670 sz = 0; 671 672 while ((len = getline(&p, &sz, f)) != -1) { 673 bold = italic = 0; 674 for (i = 0; i < len - 1; i++) { 675 /* 676 * This means that the catpage is out of state. 677 * Ignore it and keep going (although the 678 * catpage is bogus). 679 */ 680 681 if ('\b' == p[i] || '\n' == p[i]) 682 continue; 683 684 /* 685 * Print a regular character. 686 * Close out any bold/italic scopes. 687 * If we're in back-space mode, make sure we'll 688 * have something to enter when we backspace. 689 */ 690 691 if ('\b' != p[i + 1]) { 692 if (italic) 693 printf("</i>"); 694 if (bold) 695 printf("</b>"); 696 italic = bold = 0; 697 html_putchar(p[i]); 698 continue; 699 } else if (i + 2 >= len) 700 continue; 701 702 /* Italic mode. */ 703 704 if ('_' == p[i]) { 705 if (bold) 706 printf("</b>"); 707 if ( ! italic) 708 printf("<i>"); 709 bold = 0; 710 italic = 1; 711 i += 2; 712 html_putchar(p[i]); 713 continue; 714 } 715 716 /* 717 * Handle funny behaviour troff-isms. 718 * These grok'd from the original man2html.c. 719 */ 720 721 if (('+' == p[i] && 'o' == p[i + 2]) || 722 ('o' == p[i] && '+' == p[i + 2]) || 723 ('|' == p[i] && '=' == p[i + 2]) || 724 ('=' == p[i] && '|' == p[i + 2]) || 725 ('*' == p[i] && '=' == p[i + 2]) || 726 ('=' == p[i] && '*' == p[i + 2]) || 727 ('*' == p[i] && '|' == p[i + 2]) || 728 ('|' == p[i] && '*' == p[i + 2])) { 729 if (italic) 730 printf("</i>"); 731 if (bold) 732 printf("</b>"); 733 italic = bold = 0; 734 putchar('*'); 735 i += 2; 736 continue; 737 } else if (('|' == p[i] && '-' == p[i + 2]) || 738 ('-' == p[i] && '|' == p[i + 1]) || 739 ('+' == p[i] && '-' == p[i + 1]) || 740 ('-' == p[i] && '+' == p[i + 1]) || 741 ('+' == p[i] && '|' == p[i + 1]) || 742 ('|' == p[i] && '+' == p[i + 1])) { 743 if (italic) 744 printf("</i>"); 745 if (bold) 746 printf("</b>"); 747 italic = bold = 0; 748 putchar('+'); 749 i += 2; 750 continue; 751 } 752 753 /* Bold mode. */ 754 755 if (italic) 756 printf("</i>"); 757 if ( ! bold) 758 printf("<b>"); 759 bold = 1; 760 italic = 0; 761 i += 2; 762 html_putchar(p[i]); 763 } 764 765 /* 766 * Clean up the last character. 767 * We can get to a newline; don't print that. 768 */ 769 770 if (italic) 771 printf("</i>"); 772 if (bold) 773 printf("</b>"); 774 775 if (i == len - 1 && p[i] != '\n') 776 html_putchar(p[i]); 777 778 putchar('\n'); 779 } 780 free(p); 781 782 puts("</pre>\n" 783 "</div>"); 784 785 fclose(f); 786 } 787 788 static void 789 resp_format(const struct req *req, const char *file) 790 { 791 struct manoutput conf; 792 struct mparse *mp; 793 struct roff_man *man; 794 void *vp; 795 int fd; 796 int usepath; 797 798 if (-1 == (fd = open(file, O_RDONLY, 0))) { 799 puts("<p>You specified an invalid manual file.</p>"); 800 return; 801 } 802 803 mchars_alloc(); 804 mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, req->q.manpath); 805 mparse_readfd(mp, fd, file); 806 close(fd); 807 808 memset(&conf, 0, sizeof(conf)); 809 conf.fragment = 1; 810 usepath = strcmp(req->q.manpath, req->p[0]); 811 mandoc_asprintf(&conf.man, "/%s%s%%N.%%S", 812 usepath ? req->q.manpath : "", usepath ? "/" : ""); 813 814 mparse_result(mp, &man, NULL); 815 if (man == NULL) { 816 warnx("fatal mandoc error: %s/%s", req->q.manpath, file); 817 pg_error_internal(); 818 mparse_free(mp); 819 mchars_free(); 820 return; 821 } 822 823 vp = html_alloc(&conf); 824 825 if (man->macroset == MACROSET_MDOC) { 826 mdoc_validate(man); 827 html_mdoc(vp, man); 828 } else { 829 man_validate(man); 830 html_man(vp, man); 831 } 832 833 html_free(vp); 834 mparse_free(mp); 835 mchars_free(); 836 free(conf.man); 837 } 838 839 static void 840 resp_show(const struct req *req, const char *file) 841 { 842 843 if ('.' == file[0] && '/' == file[1]) 844 file += 2; 845 846 if ('c' == *file) 847 resp_catman(req, file); 848 else 849 resp_format(req, file); 850 } 851 852 static void 853 pg_show(struct req *req, const char *fullpath) 854 { 855 char *manpath; 856 const char *file; 857 858 if ((file = strchr(fullpath, '/')) == NULL) { 859 pg_error_badrequest( 860 "You did not specify a page to show."); 861 return; 862 } 863 manpath = mandoc_strndup(fullpath, file - fullpath); 864 file++; 865 866 if ( ! validate_manpath(req, manpath)) { 867 pg_error_badrequest( 868 "You specified an invalid manpath."); 869 free(manpath); 870 return; 871 } 872 873 /* 874 * Begin by chdir()ing into the manpath. 875 * This way we can pick up the database files, which are 876 * relative to the manpath root. 877 */ 878 879 if (chdir(manpath) == -1) { 880 warn("chdir %s", manpath); 881 pg_error_internal(); 882 free(manpath); 883 return; 884 } 885 886 if (strcmp(manpath, "mandoc")) { 887 free(req->q.manpath); 888 req->q.manpath = manpath; 889 } else 890 free(manpath); 891 892 if ( ! validate_filename(file)) { 893 pg_error_badrequest( 894 "You specified an invalid manual file."); 895 return; 896 } 897 898 resp_begin_html(200, NULL); 899 resp_searchform(req, FOCUS_NONE); 900 resp_show(req, file); 901 resp_end_html(); 902 } 903 904 static void 905 pg_search(const struct req *req) 906 { 907 struct mansearch search; 908 struct manpaths paths; 909 struct manpage *res; 910 char **argv; 911 char *query, *rp, *wp; 912 size_t ressz; 913 int argc; 914 915 /* 916 * Begin by chdir()ing into the root of the manpath. 917 * This way we can pick up the database files, which are 918 * relative to the manpath root. 919 */ 920 921 if (chdir(req->q.manpath) == -1) { 922 warn("chdir %s", req->q.manpath); 923 pg_error_internal(); 924 return; 925 } 926 927 search.arch = req->q.arch; 928 search.sec = req->q.sec; 929 search.outkey = "Nd"; 930 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 931 search.firstmatch = 1; 932 933 paths.sz = 1; 934 paths.paths = mandoc_malloc(sizeof(char *)); 935 paths.paths[0] = mandoc_strdup("."); 936 937 /* 938 * Break apart at spaces with backslash-escaping. 939 */ 940 941 argc = 0; 942 argv = NULL; 943 rp = query = mandoc_strdup(req->q.query); 944 for (;;) { 945 while (isspace((unsigned char)*rp)) 946 rp++; 947 if (*rp == '\0') 948 break; 949 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 950 argv[argc++] = wp = rp; 951 for (;;) { 952 if (isspace((unsigned char)*rp)) { 953 *wp = '\0'; 954 rp++; 955 break; 956 } 957 if (rp[0] == '\\' && rp[1] != '\0') 958 rp++; 959 if (wp != rp) 960 *wp = *rp; 961 if (*rp == '\0') 962 break; 963 wp++; 964 rp++; 965 } 966 } 967 968 if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz)) 969 pg_noresult(req, "You entered an invalid query."); 970 else if (0 == ressz) 971 pg_noresult(req, "No results found."); 972 else 973 pg_searchres(req, res, ressz); 974 975 free(query); 976 mansearch_free(res, ressz); 977 free(paths.paths[0]); 978 free(paths.paths); 979 } 980 981 int 982 main(void) 983 { 984 struct req req; 985 struct itimerval itimer; 986 const char *path; 987 const char *querystring; 988 int i; 989 990 /* Poor man's ReDoS mitigation. */ 991 992 itimer.it_value.tv_sec = 2; 993 itimer.it_value.tv_usec = 0; 994 itimer.it_interval.tv_sec = 2; 995 itimer.it_interval.tv_usec = 0; 996 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 997 warn("setitimer"); 998 pg_error_internal(); 999 return EXIT_FAILURE; 1000 } 1001 1002 /* 1003 * First we change directory into the MAN_DIR so that 1004 * subsequent scanning for manpath directories is rooted 1005 * relative to the same position. 1006 */ 1007 1008 if (chdir(MAN_DIR) == -1) { 1009 warn("MAN_DIR: %s", MAN_DIR); 1010 pg_error_internal(); 1011 return EXIT_FAILURE; 1012 } 1013 1014 memset(&req, 0, sizeof(struct req)); 1015 req.q.equal = 1; 1016 parse_manpath_conf(&req); 1017 1018 /* Parse the path info and the query string. */ 1019 1020 if ((path = getenv("PATH_INFO")) == NULL) 1021 path = ""; 1022 else if (*path == '/') 1023 path++; 1024 1025 if (*path != '\0') { 1026 parse_path_info(&req, path); 1027 if (access(path, F_OK) == -1) 1028 path = ""; 1029 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1030 parse_query_string(&req, querystring); 1031 1032 /* Validate parsed data and add defaults. */ 1033 1034 if (req.q.manpath == NULL) 1035 req.q.manpath = mandoc_strdup(req.p[0]); 1036 else if ( ! validate_manpath(&req, req.q.manpath)) { 1037 pg_error_badrequest( 1038 "You specified an invalid manpath."); 1039 return EXIT_FAILURE; 1040 } 1041 1042 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) { 1043 pg_error_badrequest( 1044 "You specified an invalid architecture."); 1045 return EXIT_FAILURE; 1046 } 1047 1048 /* Dispatch to the three different pages. */ 1049 1050 if ('\0' != *path) 1051 pg_show(&req, path); 1052 else if (NULL != req.q.query) 1053 pg_search(&req); 1054 else 1055 pg_index(&req); 1056 1057 free(req.q.manpath); 1058 free(req.q.arch); 1059 free(req.q.sec); 1060 free(req.q.query); 1061 for (i = 0; i < (int)req.psz; i++) 1062 free(req.p[i]); 1063 free(req.p); 1064 return EXIT_SUCCESS; 1065 } 1066 1067 /* 1068 * If PATH_INFO is not a file name, translate it to a query. 1069 */ 1070 static void 1071 parse_path_info(struct req *req, const char *path) 1072 { 1073 char *dir; 1074 1075 req->isquery = 0; 1076 req->q.equal = 1; 1077 req->q.manpath = mandoc_strdup(path); 1078 1079 /* Mandatory manual page name. */ 1080 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) { 1081 req->q.query = req->q.manpath; 1082 req->q.manpath = NULL; 1083 } else 1084 *req->q.query++ = '\0'; 1085 1086 /* Optional trailing section. */ 1087 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) { 1088 if(isdigit((unsigned char)req->q.sec[1])) { 1089 *req->q.sec++ = '\0'; 1090 req->q.sec = mandoc_strdup(req->q.sec); 1091 } else 1092 req->q.sec = NULL; 1093 } 1094 1095 /* Handle the case of name[.section] only. */ 1096 if (req->q.manpath == NULL) { 1097 req->q.arch = NULL; 1098 return; 1099 } 1100 req->q.query = mandoc_strdup(req->q.query); 1101 1102 /* Optional architecture. */ 1103 dir = strrchr(req->q.manpath, '/'); 1104 if (dir != NULL && strncmp(dir + 1, "man", 3) != 0) { 1105 *dir++ = '\0'; 1106 req->q.arch = mandoc_strdup(dir); 1107 dir = strrchr(req->q.manpath, '/'); 1108 } else 1109 req->q.arch = NULL; 1110 1111 /* Optional directory name. */ 1112 if (dir != NULL && strncmp(dir + 1, "man", 3) == 0) { 1113 *dir++ = '\0'; 1114 free(req->q.sec); 1115 req->q.sec = mandoc_strdup(dir + 3); 1116 } 1117 } 1118 1119 /* 1120 * Scan for indexable paths. 1121 */ 1122 static void 1123 parse_manpath_conf(struct req *req) 1124 { 1125 FILE *fp; 1126 char *dp; 1127 size_t dpsz; 1128 ssize_t len; 1129 1130 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1131 warn("%s/manpath.conf", MAN_DIR); 1132 pg_error_internal(); 1133 exit(EXIT_FAILURE); 1134 } 1135 1136 dp = NULL; 1137 dpsz = 0; 1138 1139 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1140 if (dp[len - 1] == '\n') 1141 dp[--len] = '\0'; 1142 req->p = mandoc_realloc(req->p, 1143 (req->psz + 1) * sizeof(char *)); 1144 if ( ! validate_urifrag(dp)) { 1145 warnx("%s/manpath.conf contains " 1146 "unsafe path \"%s\"", MAN_DIR, dp); 1147 pg_error_internal(); 1148 exit(EXIT_FAILURE); 1149 } 1150 if (strchr(dp, '/') != NULL) { 1151 warnx("%s/manpath.conf contains " 1152 "path with slash \"%s\"", MAN_DIR, dp); 1153 pg_error_internal(); 1154 exit(EXIT_FAILURE); 1155 } 1156 req->p[req->psz++] = dp; 1157 dp = NULL; 1158 dpsz = 0; 1159 } 1160 free(dp); 1161 1162 if (req->p == NULL) { 1163 warnx("%s/manpath.conf is empty", MAN_DIR); 1164 pg_error_internal(); 1165 exit(EXIT_FAILURE); 1166 } 1167 } 1168