1 /* $OpenBSD: cgi.c,v 1.70 2016/04/29 10:45:06 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@usta.de> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 #include <sys/time.h> 20 21 #include <ctype.h> 22 #include <err.h> 23 #include <errno.h> 24 #include <fcntl.h> 25 #include <limits.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <unistd.h> 31 32 #include "mandoc_aux.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "mdoc.h" 36 #include "man.h" 37 #include "main.h" 38 #include "manconf.h" 39 #include "mansearch.h" 40 #include "cgi.h" 41 42 /* 43 * A query as passed to the search function. 44 */ 45 struct query { 46 char *manpath; /* desired manual directory */ 47 char *arch; /* architecture */ 48 char *sec; /* manual section */ 49 char *query; /* unparsed query expression */ 50 int equal; /* match whole names, not substrings */ 51 }; 52 53 struct req { 54 struct query q; 55 char **p; /* array of available manpaths */ 56 size_t psz; /* number of available manpaths */ 57 int isquery; /* QUERY_STRING used, not PATH_INFO */ 58 }; 59 60 enum focus { 61 FOCUS_NONE = 0, 62 FOCUS_QUERY 63 }; 64 65 static void html_print(const char *); 66 static void html_putchar(char); 67 static int http_decode(char *); 68 static void parse_manpath_conf(struct req *); 69 static void parse_path_info(struct req *req, const char *path); 70 static void parse_query_string(struct req *, const char *); 71 static void pg_error_badrequest(const char *); 72 static void pg_error_internal(void); 73 static void pg_index(const struct req *); 74 static void pg_noresult(const struct req *, const char *); 75 static void pg_search(const struct req *); 76 static void pg_searchres(const struct req *, 77 struct manpage *, size_t); 78 static void pg_show(struct req *, const char *); 79 static void resp_begin_html(int, const char *); 80 static void resp_begin_http(int, const char *); 81 static void resp_catman(const struct req *, const char *); 82 static void resp_copy(const char *); 83 static void resp_end_html(void); 84 static void resp_format(const struct req *, const char *); 85 static void resp_searchform(const struct req *, enum focus); 86 static void resp_show(const struct req *, const char *); 87 static void set_query_attr(char **, char **); 88 static int validate_filename(const char *); 89 static int validate_manpath(const struct req *, const char *); 90 static int validate_urifrag(const char *); 91 92 static const char *scriptname = SCRIPT_NAME; 93 94 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 95 static const char *const sec_numbers[] = { 96 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 97 }; 98 static const char *const sec_names[] = { 99 "All Sections", 100 "1 - General Commands", 101 "2 - System Calls", 102 "3 - Library Functions", 103 "3p - Perl Library", 104 "4 - Device Drivers", 105 "5 - File Formats", 106 "6 - Games", 107 "7 - Miscellaneous Information", 108 "8 - System Manager\'s Manual", 109 "9 - Kernel Developer\'s Manual" 110 }; 111 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 112 113 static const char *const arch_names[] = { 114 "amd64", "alpha", "armish", "armv7", 115 "hppa", "hppa64", "i386", "landisk", 116 "loongson", "luna88k", "macppc", "mips64", 117 "octeon", "sgi", "socppc", "sparc", 118 "sparc64", "zaurus", 119 "amiga", "arc", "arm32", "atari", 120 "aviion", "beagle", "cats", "hp300", 121 "ia64", "mac68k", "mvme68k", "mvme88k", 122 "mvmeppc", "palm", "pc532", "pegasos", 123 "pmax", "powerpc", "solbourne", "sun3", 124 "vax", "wgrisc", "x68k" 125 }; 126 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 127 128 /* 129 * Print a character, escaping HTML along the way. 130 * This will pass non-ASCII straight to output: be warned! 131 */ 132 static void 133 html_putchar(char c) 134 { 135 136 switch (c) { 137 case ('"'): 138 printf(""e;"); 139 break; 140 case ('&'): 141 printf("&"); 142 break; 143 case ('>'): 144 printf(">"); 145 break; 146 case ('<'): 147 printf("<"); 148 break; 149 default: 150 putchar((unsigned char)c); 151 break; 152 } 153 } 154 155 /* 156 * Call through to html_putchar(). 157 * Accepts NULL strings. 158 */ 159 static void 160 html_print(const char *p) 161 { 162 163 if (NULL == p) 164 return; 165 while ('\0' != *p) 166 html_putchar(*p++); 167 } 168 169 /* 170 * Transfer the responsibility for the allocated string *val 171 * to the query structure. 172 */ 173 static void 174 set_query_attr(char **attr, char **val) 175 { 176 177 free(*attr); 178 if (**val == '\0') { 179 *attr = NULL; 180 free(*val); 181 } else 182 *attr = *val; 183 *val = NULL; 184 } 185 186 /* 187 * Parse the QUERY_STRING for key-value pairs 188 * and store the values into the query structure. 189 */ 190 static void 191 parse_query_string(struct req *req, const char *qs) 192 { 193 char *key, *val; 194 size_t keysz, valsz; 195 196 req->isquery = 1; 197 req->q.manpath = NULL; 198 req->q.arch = NULL; 199 req->q.sec = NULL; 200 req->q.query = NULL; 201 req->q.equal = 1; 202 203 key = val = NULL; 204 while (*qs != '\0') { 205 206 /* Parse one key. */ 207 208 keysz = strcspn(qs, "=;&"); 209 key = mandoc_strndup(qs, keysz); 210 qs += keysz; 211 if (*qs != '=') 212 goto next; 213 214 /* Parse one value. */ 215 216 valsz = strcspn(++qs, ";&"); 217 val = mandoc_strndup(qs, valsz); 218 qs += valsz; 219 220 /* Decode and catch encoding errors. */ 221 222 if ( ! (http_decode(key) && http_decode(val))) 223 goto next; 224 225 /* Handle key-value pairs. */ 226 227 if ( ! strcmp(key, "query")) 228 set_query_attr(&req->q.query, &val); 229 230 else if ( ! strcmp(key, "apropos")) 231 req->q.equal = !strcmp(val, "0"); 232 233 else if ( ! strcmp(key, "manpath")) { 234 #ifdef COMPAT_OLDURI 235 if ( ! strncmp(val, "OpenBSD ", 8)) { 236 val[7] = '-'; 237 if ('C' == val[8]) 238 val[8] = 'c'; 239 } 240 #endif 241 set_query_attr(&req->q.manpath, &val); 242 } 243 244 else if ( ! (strcmp(key, "sec") 245 #ifdef COMPAT_OLDURI 246 && strcmp(key, "sektion") 247 #endif 248 )) { 249 if ( ! strcmp(val, "0")) 250 *val = '\0'; 251 set_query_attr(&req->q.sec, &val); 252 } 253 254 else if ( ! strcmp(key, "arch")) { 255 if ( ! strcmp(val, "default")) 256 *val = '\0'; 257 set_query_attr(&req->q.arch, &val); 258 } 259 260 /* 261 * The key must be freed in any case. 262 * The val may have been handed over to the query 263 * structure, in which case it is now NULL. 264 */ 265 next: 266 free(key); 267 key = NULL; 268 free(val); 269 val = NULL; 270 271 if (*qs != '\0') 272 qs++; 273 } 274 } 275 276 /* 277 * HTTP-decode a string. The standard explanation is that this turns 278 * "%4e+foo" into "n foo" in the regular way. This is done in-place 279 * over the allocated string. 280 */ 281 static int 282 http_decode(char *p) 283 { 284 char hex[3]; 285 char *q; 286 int c; 287 288 hex[2] = '\0'; 289 290 q = p; 291 for ( ; '\0' != *p; p++, q++) { 292 if ('%' == *p) { 293 if ('\0' == (hex[0] = *(p + 1))) 294 return 0; 295 if ('\0' == (hex[1] = *(p + 2))) 296 return 0; 297 if (1 != sscanf(hex, "%x", &c)) 298 return 0; 299 if ('\0' == c) 300 return 0; 301 302 *q = (char)c; 303 p += 2; 304 } else 305 *q = '+' == *p ? ' ' : *p; 306 } 307 308 *q = '\0'; 309 return 1; 310 } 311 312 static void 313 resp_begin_http(int code, const char *msg) 314 { 315 316 if (200 != code) 317 printf("Status: %d %s\r\n", code, msg); 318 319 printf("Content-Type: text/html; charset=utf-8\r\n" 320 "Cache-Control: no-cache\r\n" 321 "Pragma: no-cache\r\n" 322 "\r\n"); 323 324 fflush(stdout); 325 } 326 327 static void 328 resp_copy(const char *filename) 329 { 330 char buf[4096]; 331 ssize_t sz; 332 int fd; 333 334 if ((fd = open(filename, O_RDONLY)) != -1) { 335 fflush(stdout); 336 while ((sz = read(fd, buf, sizeof(buf))) > 0) 337 write(STDOUT_FILENO, buf, sz); 338 } 339 } 340 341 static void 342 resp_begin_html(int code, const char *msg) 343 { 344 345 resp_begin_http(code, msg); 346 347 printf("<!DOCTYPE html>\n" 348 "<html>\n" 349 "<head>\n" 350 "<meta charset=\"UTF-8\"/>\n" 351 "<link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 352 " type=\"text/css\" media=\"all\">\n" 353 "<title>%s</title>\n" 354 "</head>\n" 355 "<body>\n" 356 "<!-- Begin page content. //-->\n", 357 CSS_DIR, CUSTOMIZE_TITLE); 358 359 resp_copy(MAN_DIR "/header.html"); 360 } 361 362 static void 363 resp_end_html(void) 364 { 365 366 resp_copy(MAN_DIR "/footer.html"); 367 368 puts("</body>\n" 369 "</html>"); 370 } 371 372 static void 373 resp_searchform(const struct req *req, enum focus focus) 374 { 375 int i; 376 377 puts("<!-- Begin search form. //-->"); 378 printf("<div id=\"mancgi\">\n" 379 "<form action=\"/%s\" method=\"get\">\n" 380 "<fieldset>\n" 381 "<legend>Manual Page Search Parameters</legend>\n", 382 scriptname); 383 384 /* Write query input box. */ 385 386 printf( "<table><tr><td>\n" 387 "<input type=\"text\" name=\"query\" value=\""); 388 if (req->q.query != NULL) 389 html_print(req->q.query); 390 printf( "\" size=\"40\""); 391 if (focus == FOCUS_QUERY) 392 printf(" autofocus"); 393 puts(">"); 394 395 /* Write submission and reset buttons. */ 396 397 printf( "<input type=\"submit\" value=\"Submit\">\n" 398 "<input type=\"reset\" value=\"Reset\">\n"); 399 400 /* Write show radio button */ 401 402 printf( "</td><td>\n" 403 "<input type=\"radio\" "); 404 if (req->q.equal) 405 printf("checked=\"checked\" "); 406 printf( "name=\"apropos\" id=\"show\" value=\"0\">\n" 407 "<label for=\"show\">Show named manual page</label>\n"); 408 409 /* Write section selector. */ 410 411 puts( "</td></tr><tr><td>\n" 412 "<select name=\"sec\">"); 413 for (i = 0; i < sec_MAX; i++) { 414 printf("<option value=\"%s\"", sec_numbers[i]); 415 if (NULL != req->q.sec && 416 0 == strcmp(sec_numbers[i], req->q.sec)) 417 printf(" selected=\"selected\""); 418 printf(">%s</option>\n", sec_names[i]); 419 } 420 puts("</select>"); 421 422 /* Write architecture selector. */ 423 424 printf( "<select name=\"arch\">\n" 425 "<option value=\"default\""); 426 if (NULL == req->q.arch) 427 printf(" selected=\"selected\""); 428 puts(">All Architectures</option>"); 429 for (i = 0; i < arch_MAX; i++) { 430 printf("<option value=\"%s\"", arch_names[i]); 431 if (NULL != req->q.arch && 432 0 == strcmp(arch_names[i], req->q.arch)) 433 printf(" selected=\"selected\""); 434 printf(">%s</option>\n", arch_names[i]); 435 } 436 puts("</select>"); 437 438 /* Write manpath selector. */ 439 440 if (req->psz > 1) { 441 puts("<select name=\"manpath\">"); 442 for (i = 0; i < (int)req->psz; i++) { 443 printf("<option "); 444 if (strcmp(req->q.manpath, req->p[i]) == 0) 445 printf("selected=\"selected\" "); 446 printf("value=\""); 447 html_print(req->p[i]); 448 printf("\">"); 449 html_print(req->p[i]); 450 puts("</option>"); 451 } 452 puts("</select>"); 453 } 454 455 /* Write search radio button */ 456 457 printf( "</td><td>\n" 458 "<input type=\"radio\" "); 459 if (0 == req->q.equal) 460 printf("checked=\"checked\" "); 461 printf( "name=\"apropos\" id=\"search\" value=\"1\">\n" 462 "<label for=\"search\">Search with apropos query</label>\n"); 463 464 puts("</td></tr></table>\n" 465 "</fieldset>\n" 466 "</form>\n" 467 "</div>"); 468 puts("<!-- End search form. //-->"); 469 } 470 471 static int 472 validate_urifrag(const char *frag) 473 { 474 475 while ('\0' != *frag) { 476 if ( ! (isalnum((unsigned char)*frag) || 477 '-' == *frag || '.' == *frag || 478 '/' == *frag || '_' == *frag)) 479 return 0; 480 frag++; 481 } 482 return 1; 483 } 484 485 static int 486 validate_manpath(const struct req *req, const char* manpath) 487 { 488 size_t i; 489 490 if ( ! strcmp(manpath, "mandoc")) 491 return 1; 492 493 for (i = 0; i < req->psz; i++) 494 if ( ! strcmp(manpath, req->p[i])) 495 return 1; 496 497 return 0; 498 } 499 500 static int 501 validate_filename(const char *file) 502 { 503 504 if ('.' == file[0] && '/' == file[1]) 505 file += 2; 506 507 return ! (strstr(file, "../") || strstr(file, "/..") || 508 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 509 } 510 511 static void 512 pg_index(const struct req *req) 513 { 514 515 resp_begin_html(200, NULL); 516 resp_searchform(req, FOCUS_QUERY); 517 printf("<p>\n" 518 "This web interface is documented in the\n" 519 "<a href=\"/%s%smandoc/man8/man.cgi.8\">man.cgi</a>\n" 520 "manual, and the\n" 521 "<a href=\"/%s%smandoc/man1/apropos.1\">apropos</a>\n" 522 "manual explains the query syntax.\n" 523 "</p>\n", 524 scriptname, *scriptname == '\0' ? "" : "/", 525 scriptname, *scriptname == '\0' ? "" : "/"); 526 resp_end_html(); 527 } 528 529 static void 530 pg_noresult(const struct req *req, const char *msg) 531 { 532 resp_begin_html(200, NULL); 533 resp_searchform(req, FOCUS_QUERY); 534 puts("<p>"); 535 puts(msg); 536 puts("</p>"); 537 resp_end_html(); 538 } 539 540 static void 541 pg_error_badrequest(const char *msg) 542 { 543 544 resp_begin_html(400, "Bad Request"); 545 puts("<h1>Bad Request</h1>\n" 546 "<p>\n"); 547 puts(msg); 548 printf("Try again from the\n" 549 "<a href=\"/%s\">main page</a>.\n" 550 "</p>", scriptname); 551 resp_end_html(); 552 } 553 554 static void 555 pg_error_internal(void) 556 { 557 resp_begin_html(500, "Internal Server Error"); 558 puts("<p>Internal Server Error</p>"); 559 resp_end_html(); 560 } 561 562 static void 563 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 564 { 565 char *arch, *archend; 566 const char *sec; 567 size_t i, iuse; 568 int archprio, archpriouse; 569 int prio, priouse; 570 571 for (i = 0; i < sz; i++) { 572 if (validate_filename(r[i].file)) 573 continue; 574 warnx("invalid filename %s in %s database", 575 r[i].file, req->q.manpath); 576 pg_error_internal(); 577 return; 578 } 579 580 if (req->isquery && sz == 1) { 581 /* 582 * If we have just one result, then jump there now 583 * without any delay. 584 */ 585 printf("Status: 303 See Other\r\n"); 586 printf("Location: http://%s/%s%s%s/%s", 587 HTTP_HOST, scriptname, 588 *scriptname == '\0' ? "" : "/", 589 req->q.manpath, r[0].file); 590 printf("\r\n" 591 "Content-Type: text/html; charset=utf-8\r\n" 592 "\r\n"); 593 return; 594 } 595 596 resp_begin_html(200, NULL); 597 resp_searchform(req, 598 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 599 600 if (sz > 1) { 601 puts("<div class=\"results\">"); 602 puts("<table>"); 603 604 for (i = 0; i < sz; i++) { 605 printf("<tr>\n" 606 "<td class=\"title\">\n" 607 "<a href=\"/%s%s%s/%s", 608 scriptname, *scriptname == '\0' ? "" : "/", 609 req->q.manpath, r[i].file); 610 printf("\">"); 611 html_print(r[i].names); 612 printf("</a>\n" 613 "</td>\n" 614 "<td class=\"desc\">"); 615 html_print(r[i].output); 616 puts("</td>\n" 617 "</tr>"); 618 } 619 620 puts("</table>\n" 621 "</div>"); 622 } 623 624 /* 625 * In man(1) mode, show one of the pages 626 * even if more than one is found. 627 */ 628 629 if (req->q.equal || sz == 1) { 630 puts("<hr>"); 631 iuse = 0; 632 priouse = 20; 633 archpriouse = 3; 634 for (i = 0; i < sz; i++) { 635 sec = r[i].file; 636 sec += strcspn(sec, "123456789"); 637 if (sec[0] == '\0') 638 continue; 639 prio = sec_prios[sec[0] - '1']; 640 if (sec[1] != '/') 641 prio += 10; 642 if (req->q.arch == NULL) { 643 archprio = 644 ((arch = strchr(sec + 1, '/')) 645 == NULL) ? 3 : 646 ((archend = strchr(arch + 1, '/')) 647 == NULL) ? 0 : 648 strncmp(arch, "amd64/", 649 archend - arch) ? 2 : 1; 650 if (archprio < archpriouse) { 651 archpriouse = archprio; 652 priouse = prio; 653 iuse = i; 654 continue; 655 } 656 if (archprio > archpriouse) 657 continue; 658 } 659 if (prio >= priouse) 660 continue; 661 priouse = prio; 662 iuse = i; 663 } 664 resp_show(req, r[iuse].file); 665 } 666 667 resp_end_html(); 668 } 669 670 static void 671 resp_catman(const struct req *req, const char *file) 672 { 673 FILE *f; 674 char *p; 675 size_t sz; 676 ssize_t len; 677 int i; 678 int italic, bold; 679 680 if ((f = fopen(file, "r")) == NULL) { 681 puts("<p>You specified an invalid manual file.</p>"); 682 return; 683 } 684 685 puts("<div class=\"catman\">\n" 686 "<pre>"); 687 688 p = NULL; 689 sz = 0; 690 691 while ((len = getline(&p, &sz, f)) != -1) { 692 bold = italic = 0; 693 for (i = 0; i < len - 1; i++) { 694 /* 695 * This means that the catpage is out of state. 696 * Ignore it and keep going (although the 697 * catpage is bogus). 698 */ 699 700 if ('\b' == p[i] || '\n' == p[i]) 701 continue; 702 703 /* 704 * Print a regular character. 705 * Close out any bold/italic scopes. 706 * If we're in back-space mode, make sure we'll 707 * have something to enter when we backspace. 708 */ 709 710 if ('\b' != p[i + 1]) { 711 if (italic) 712 printf("</i>"); 713 if (bold) 714 printf("</b>"); 715 italic = bold = 0; 716 html_putchar(p[i]); 717 continue; 718 } else if (i + 2 >= len) 719 continue; 720 721 /* Italic mode. */ 722 723 if ('_' == p[i]) { 724 if (bold) 725 printf("</b>"); 726 if ( ! italic) 727 printf("<i>"); 728 bold = 0; 729 italic = 1; 730 i += 2; 731 html_putchar(p[i]); 732 continue; 733 } 734 735 /* 736 * Handle funny behaviour troff-isms. 737 * These grok'd from the original man2html.c. 738 */ 739 740 if (('+' == p[i] && 'o' == p[i + 2]) || 741 ('o' == p[i] && '+' == p[i + 2]) || 742 ('|' == p[i] && '=' == p[i + 2]) || 743 ('=' == p[i] && '|' == p[i + 2]) || 744 ('*' == p[i] && '=' == p[i + 2]) || 745 ('=' == p[i] && '*' == p[i + 2]) || 746 ('*' == p[i] && '|' == p[i + 2]) || 747 ('|' == p[i] && '*' == p[i + 2])) { 748 if (italic) 749 printf("</i>"); 750 if (bold) 751 printf("</b>"); 752 italic = bold = 0; 753 putchar('*'); 754 i += 2; 755 continue; 756 } else if (('|' == p[i] && '-' == p[i + 2]) || 757 ('-' == p[i] && '|' == p[i + 1]) || 758 ('+' == p[i] && '-' == p[i + 1]) || 759 ('-' == p[i] && '+' == p[i + 1]) || 760 ('+' == p[i] && '|' == p[i + 1]) || 761 ('|' == p[i] && '+' == p[i + 1])) { 762 if (italic) 763 printf("</i>"); 764 if (bold) 765 printf("</b>"); 766 italic = bold = 0; 767 putchar('+'); 768 i += 2; 769 continue; 770 } 771 772 /* Bold mode. */ 773 774 if (italic) 775 printf("</i>"); 776 if ( ! bold) 777 printf("<b>"); 778 bold = 1; 779 italic = 0; 780 i += 2; 781 html_putchar(p[i]); 782 } 783 784 /* 785 * Clean up the last character. 786 * We can get to a newline; don't print that. 787 */ 788 789 if (italic) 790 printf("</i>"); 791 if (bold) 792 printf("</b>"); 793 794 if (i == len - 1 && p[i] != '\n') 795 html_putchar(p[i]); 796 797 putchar('\n'); 798 } 799 free(p); 800 801 puts("</pre>\n" 802 "</div>"); 803 804 fclose(f); 805 } 806 807 static void 808 resp_format(const struct req *req, const char *file) 809 { 810 struct manoutput conf; 811 struct mparse *mp; 812 struct roff_man *man; 813 void *vp; 814 int fd; 815 int usepath; 816 817 if (-1 == (fd = open(file, O_RDONLY, 0))) { 818 puts("<p>You specified an invalid manual file.</p>"); 819 return; 820 } 821 822 mchars_alloc(); 823 mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, req->q.manpath); 824 mparse_readfd(mp, fd, file); 825 close(fd); 826 827 memset(&conf, 0, sizeof(conf)); 828 conf.fragment = 1; 829 usepath = strcmp(req->q.manpath, req->p[0]); 830 mandoc_asprintf(&conf.man, "/%s%s%%N.%%S", 831 usepath ? req->q.manpath : "", usepath ? "/" : ""); 832 833 mparse_result(mp, &man, NULL); 834 if (man == NULL) { 835 warnx("fatal mandoc error: %s/%s", req->q.manpath, file); 836 pg_error_internal(); 837 mparse_free(mp); 838 mchars_free(); 839 return; 840 } 841 842 vp = html_alloc(&conf); 843 844 if (man->macroset == MACROSET_MDOC) { 845 mdoc_validate(man); 846 html_mdoc(vp, man); 847 } else { 848 man_validate(man); 849 html_man(vp, man); 850 } 851 852 html_free(vp); 853 mparse_free(mp); 854 mchars_free(); 855 free(conf.man); 856 } 857 858 static void 859 resp_show(const struct req *req, const char *file) 860 { 861 862 if ('.' == file[0] && '/' == file[1]) 863 file += 2; 864 865 if ('c' == *file) 866 resp_catman(req, file); 867 else 868 resp_format(req, file); 869 } 870 871 static void 872 pg_show(struct req *req, const char *fullpath) 873 { 874 char *manpath; 875 const char *file; 876 877 if ((file = strchr(fullpath, '/')) == NULL) { 878 pg_error_badrequest( 879 "You did not specify a page to show."); 880 return; 881 } 882 manpath = mandoc_strndup(fullpath, file - fullpath); 883 file++; 884 885 if ( ! validate_manpath(req, manpath)) { 886 pg_error_badrequest( 887 "You specified an invalid manpath."); 888 free(manpath); 889 return; 890 } 891 892 /* 893 * Begin by chdir()ing into the manpath. 894 * This way we can pick up the database files, which are 895 * relative to the manpath root. 896 */ 897 898 if (chdir(manpath) == -1) { 899 warn("chdir %s", manpath); 900 pg_error_internal(); 901 free(manpath); 902 return; 903 } 904 905 if (strcmp(manpath, "mandoc")) { 906 free(req->q.manpath); 907 req->q.manpath = manpath; 908 } else 909 free(manpath); 910 911 if ( ! validate_filename(file)) { 912 pg_error_badrequest( 913 "You specified an invalid manual file."); 914 return; 915 } 916 917 resp_begin_html(200, NULL); 918 resp_searchform(req, FOCUS_NONE); 919 resp_show(req, file); 920 resp_end_html(); 921 } 922 923 static void 924 pg_search(const struct req *req) 925 { 926 struct mansearch search; 927 struct manpaths paths; 928 struct manpage *res; 929 char **argv; 930 char *query, *rp, *wp; 931 size_t ressz; 932 int argc; 933 934 /* 935 * Begin by chdir()ing into the root of the manpath. 936 * This way we can pick up the database files, which are 937 * relative to the manpath root. 938 */ 939 940 if (chdir(req->q.manpath) == -1) { 941 warn("chdir %s", req->q.manpath); 942 pg_error_internal(); 943 return; 944 } 945 946 search.arch = req->q.arch; 947 search.sec = req->q.sec; 948 search.outkey = "Nd"; 949 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 950 search.firstmatch = 1; 951 952 paths.sz = 1; 953 paths.paths = mandoc_malloc(sizeof(char *)); 954 paths.paths[0] = mandoc_strdup("."); 955 956 /* 957 * Break apart at spaces with backslash-escaping. 958 */ 959 960 argc = 0; 961 argv = NULL; 962 rp = query = mandoc_strdup(req->q.query); 963 for (;;) { 964 while (isspace((unsigned char)*rp)) 965 rp++; 966 if (*rp == '\0') 967 break; 968 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 969 argv[argc++] = wp = rp; 970 for (;;) { 971 if (isspace((unsigned char)*rp)) { 972 *wp = '\0'; 973 rp++; 974 break; 975 } 976 if (rp[0] == '\\' && rp[1] != '\0') 977 rp++; 978 if (wp != rp) 979 *wp = *rp; 980 if (*rp == '\0') 981 break; 982 wp++; 983 rp++; 984 } 985 } 986 987 if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz)) 988 pg_noresult(req, "You entered an invalid query."); 989 else if (0 == ressz) 990 pg_noresult(req, "No results found."); 991 else 992 pg_searchres(req, res, ressz); 993 994 free(query); 995 mansearch_free(res, ressz); 996 free(paths.paths[0]); 997 free(paths.paths); 998 } 999 1000 int 1001 main(void) 1002 { 1003 struct req req; 1004 struct itimerval itimer; 1005 const char *path; 1006 const char *querystring; 1007 int i; 1008 1009 /* Poor man's ReDoS mitigation. */ 1010 1011 itimer.it_value.tv_sec = 2; 1012 itimer.it_value.tv_usec = 0; 1013 itimer.it_interval.tv_sec = 2; 1014 itimer.it_interval.tv_usec = 0; 1015 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 1016 warn("setitimer"); 1017 pg_error_internal(); 1018 return EXIT_FAILURE; 1019 } 1020 1021 /* 1022 * First we change directory into the MAN_DIR so that 1023 * subsequent scanning for manpath directories is rooted 1024 * relative to the same position. 1025 */ 1026 1027 if (chdir(MAN_DIR) == -1) { 1028 warn("MAN_DIR: %s", MAN_DIR); 1029 pg_error_internal(); 1030 return EXIT_FAILURE; 1031 } 1032 1033 memset(&req, 0, sizeof(struct req)); 1034 req.q.equal = 1; 1035 parse_manpath_conf(&req); 1036 1037 /* Parse the path info and the query string. */ 1038 1039 if ((path = getenv("PATH_INFO")) == NULL) 1040 path = ""; 1041 else if (*path == '/') 1042 path++; 1043 1044 if (*path != '\0') { 1045 parse_path_info(&req, path); 1046 if (access(path, F_OK) == -1) 1047 path = ""; 1048 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1049 parse_query_string(&req, querystring); 1050 1051 /* Validate parsed data and add defaults. */ 1052 1053 if (req.q.manpath == NULL) 1054 req.q.manpath = mandoc_strdup(req.p[0]); 1055 else if ( ! validate_manpath(&req, req.q.manpath)) { 1056 pg_error_badrequest( 1057 "You specified an invalid manpath."); 1058 return EXIT_FAILURE; 1059 } 1060 1061 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) { 1062 pg_error_badrequest( 1063 "You specified an invalid architecture."); 1064 return EXIT_FAILURE; 1065 } 1066 1067 /* Dispatch to the three different pages. */ 1068 1069 if ('\0' != *path) 1070 pg_show(&req, path); 1071 else if (NULL != req.q.query) 1072 pg_search(&req); 1073 else 1074 pg_index(&req); 1075 1076 free(req.q.manpath); 1077 free(req.q.arch); 1078 free(req.q.sec); 1079 free(req.q.query); 1080 for (i = 0; i < (int)req.psz; i++) 1081 free(req.p[i]); 1082 free(req.p); 1083 return EXIT_SUCCESS; 1084 } 1085 1086 /* 1087 * If PATH_INFO is not a file name, translate it to a query. 1088 */ 1089 static void 1090 parse_path_info(struct req *req, const char *path) 1091 { 1092 char *dir; 1093 1094 req->isquery = 0; 1095 req->q.equal = 1; 1096 req->q.manpath = mandoc_strdup(path); 1097 1098 /* Mandatory manual page name. */ 1099 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) { 1100 req->q.query = req->q.manpath; 1101 req->q.manpath = NULL; 1102 } else 1103 *req->q.query++ = '\0'; 1104 1105 /* Optional trailing section. */ 1106 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) { 1107 if(isdigit((unsigned char)req->q.sec[1])) { 1108 *req->q.sec++ = '\0'; 1109 req->q.sec = mandoc_strdup(req->q.sec); 1110 } else 1111 req->q.sec = NULL; 1112 } 1113 1114 /* Handle the case of name[.section] only. */ 1115 if (req->q.manpath == NULL) { 1116 req->q.arch = NULL; 1117 return; 1118 } 1119 req->q.query = mandoc_strdup(req->q.query); 1120 1121 /* Optional architecture. */ 1122 dir = strrchr(req->q.manpath, '/'); 1123 if (dir != NULL && strncmp(dir + 1, "man", 3) != 0) { 1124 *dir++ = '\0'; 1125 req->q.arch = mandoc_strdup(dir); 1126 dir = strrchr(req->q.manpath, '/'); 1127 } else 1128 req->q.arch = NULL; 1129 1130 /* Optional directory name. */ 1131 if (dir != NULL && strncmp(dir + 1, "man", 3) == 0) { 1132 *dir++ = '\0'; 1133 free(req->q.sec); 1134 req->q.sec = mandoc_strdup(dir + 3); 1135 } 1136 } 1137 1138 /* 1139 * Scan for indexable paths. 1140 */ 1141 static void 1142 parse_manpath_conf(struct req *req) 1143 { 1144 FILE *fp; 1145 char *dp; 1146 size_t dpsz; 1147 ssize_t len; 1148 1149 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1150 warn("%s/manpath.conf", MAN_DIR); 1151 pg_error_internal(); 1152 exit(EXIT_FAILURE); 1153 } 1154 1155 dp = NULL; 1156 dpsz = 0; 1157 1158 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1159 if (dp[len - 1] == '\n') 1160 dp[--len] = '\0'; 1161 req->p = mandoc_realloc(req->p, 1162 (req->psz + 1) * sizeof(char *)); 1163 if ( ! validate_urifrag(dp)) { 1164 warnx("%s/manpath.conf contains " 1165 "unsafe path \"%s\"", MAN_DIR, dp); 1166 pg_error_internal(); 1167 exit(EXIT_FAILURE); 1168 } 1169 if (strchr(dp, '/') != NULL) { 1170 warnx("%s/manpath.conf contains " 1171 "path with slash \"%s\"", MAN_DIR, dp); 1172 pg_error_internal(); 1173 exit(EXIT_FAILURE); 1174 } 1175 req->p[req->psz++] = dp; 1176 dp = NULL; 1177 dpsz = 0; 1178 } 1179 free(dp); 1180 1181 if (req->p == NULL) { 1182 warnx("%s/manpath.conf is empty", MAN_DIR); 1183 pg_error_internal(); 1184 exit(EXIT_FAILURE); 1185 } 1186 } 1187