1 /* $OpenBSD: cgi.c,v 1.83 2017/01/21 01:20:29 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 #include <sys/time.h> 20 21 #include <ctype.h> 22 #include <err.h> 23 #include <errno.h> 24 #include <fcntl.h> 25 #include <limits.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <unistd.h> 31 32 #include "mandoc_aux.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "mdoc.h" 36 #include "man.h" 37 #include "main.h" 38 #include "manconf.h" 39 #include "mansearch.h" 40 #include "cgi.h" 41 42 /* 43 * A query as passed to the search function. 44 */ 45 struct query { 46 char *manpath; /* desired manual directory */ 47 char *arch; /* architecture */ 48 char *sec; /* manual section */ 49 char *query; /* unparsed query expression */ 50 int equal; /* match whole names, not substrings */ 51 }; 52 53 struct req { 54 struct query q; 55 char **p; /* array of available manpaths */ 56 size_t psz; /* number of available manpaths */ 57 int isquery; /* QUERY_STRING used, not PATH_INFO */ 58 }; 59 60 enum focus { 61 FOCUS_NONE = 0, 62 FOCUS_QUERY 63 }; 64 65 static void html_print(const char *); 66 static void html_putchar(char); 67 static int http_decode(char *); 68 static void parse_manpath_conf(struct req *); 69 static void parse_path_info(struct req *req, const char *path); 70 static void parse_query_string(struct req *, const char *); 71 static void pg_error_badrequest(const char *); 72 static void pg_error_internal(void); 73 static void pg_index(const struct req *); 74 static void pg_noresult(const struct req *, const char *); 75 static void pg_search(const struct req *); 76 static void pg_searchres(const struct req *, 77 struct manpage *, size_t); 78 static void pg_show(struct req *, const char *); 79 static void resp_begin_html(int, const char *); 80 static void resp_begin_http(int, const char *); 81 static void resp_catman(const struct req *, const char *); 82 static void resp_copy(const char *); 83 static void resp_end_html(void); 84 static void resp_format(const struct req *, const char *); 85 static void resp_searchform(const struct req *, enum focus); 86 static void resp_show(const struct req *, const char *); 87 static void set_query_attr(char **, char **); 88 static int validate_filename(const char *); 89 static int validate_manpath(const struct req *, const char *); 90 static int validate_urifrag(const char *); 91 92 static const char *scriptname = SCRIPT_NAME; 93 94 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 95 static const char *const sec_numbers[] = { 96 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 97 }; 98 static const char *const sec_names[] = { 99 "All Sections", 100 "1 - General Commands", 101 "2 - System Calls", 102 "3 - Library Functions", 103 "3p - Perl Library", 104 "4 - Device Drivers", 105 "5 - File Formats", 106 "6 - Games", 107 "7 - Miscellaneous Information", 108 "8 - System Manager\'s Manual", 109 "9 - Kernel Developer\'s Manual" 110 }; 111 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 112 113 static const char *const arch_names[] = { 114 "amd64", "alpha", "armv7", 115 "hppa", "i386", "landisk", 116 "loongson", "luna88k", "macppc", "mips64", 117 "octeon", "sgi", "socppc", "sparc64", 118 "amiga", "arc", "armish", "arm32", 119 "atari", "aviion", "beagle", "cats", 120 "hppa64", "hp300", 121 "ia64", "mac68k", "mvme68k", "mvme88k", 122 "mvmeppc", "palm", "pc532", "pegasos", 123 "pmax", "powerpc", "solbourne", "sparc", 124 "sun3", "vax", "wgrisc", "x68k", 125 "zaurus" 126 }; 127 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 128 129 /* 130 * Print a character, escaping HTML along the way. 131 * This will pass non-ASCII straight to output: be warned! 132 */ 133 static void 134 html_putchar(char c) 135 { 136 137 switch (c) { 138 case ('"'): 139 printf("""); 140 break; 141 case ('&'): 142 printf("&"); 143 break; 144 case ('>'): 145 printf(">"); 146 break; 147 case ('<'): 148 printf("<"); 149 break; 150 default: 151 putchar((unsigned char)c); 152 break; 153 } 154 } 155 156 /* 157 * Call through to html_putchar(). 158 * Accepts NULL strings. 159 */ 160 static void 161 html_print(const char *p) 162 { 163 164 if (NULL == p) 165 return; 166 while ('\0' != *p) 167 html_putchar(*p++); 168 } 169 170 /* 171 * Transfer the responsibility for the allocated string *val 172 * to the query structure. 173 */ 174 static void 175 set_query_attr(char **attr, char **val) 176 { 177 178 free(*attr); 179 if (**val == '\0') { 180 *attr = NULL; 181 free(*val); 182 } else 183 *attr = *val; 184 *val = NULL; 185 } 186 187 /* 188 * Parse the QUERY_STRING for key-value pairs 189 * and store the values into the query structure. 190 */ 191 static void 192 parse_query_string(struct req *req, const char *qs) 193 { 194 char *key, *val; 195 size_t keysz, valsz; 196 197 req->isquery = 1; 198 req->q.manpath = NULL; 199 req->q.arch = NULL; 200 req->q.sec = NULL; 201 req->q.query = NULL; 202 req->q.equal = 1; 203 204 key = val = NULL; 205 while (*qs != '\0') { 206 207 /* Parse one key. */ 208 209 keysz = strcspn(qs, "=;&"); 210 key = mandoc_strndup(qs, keysz); 211 qs += keysz; 212 if (*qs != '=') 213 goto next; 214 215 /* Parse one value. */ 216 217 valsz = strcspn(++qs, ";&"); 218 val = mandoc_strndup(qs, valsz); 219 qs += valsz; 220 221 /* Decode and catch encoding errors. */ 222 223 if ( ! (http_decode(key) && http_decode(val))) 224 goto next; 225 226 /* Handle key-value pairs. */ 227 228 if ( ! strcmp(key, "query")) 229 set_query_attr(&req->q.query, &val); 230 231 else if ( ! strcmp(key, "apropos")) 232 req->q.equal = !strcmp(val, "0"); 233 234 else if ( ! strcmp(key, "manpath")) { 235 #ifdef COMPAT_OLDURI 236 if ( ! strncmp(val, "OpenBSD ", 8)) { 237 val[7] = '-'; 238 if ('C' == val[8]) 239 val[8] = 'c'; 240 } 241 #endif 242 set_query_attr(&req->q.manpath, &val); 243 } 244 245 else if ( ! (strcmp(key, "sec") 246 #ifdef COMPAT_OLDURI 247 && strcmp(key, "sektion") 248 #endif 249 )) { 250 if ( ! strcmp(val, "0")) 251 *val = '\0'; 252 set_query_attr(&req->q.sec, &val); 253 } 254 255 else if ( ! strcmp(key, "arch")) { 256 if ( ! strcmp(val, "default")) 257 *val = '\0'; 258 set_query_attr(&req->q.arch, &val); 259 } 260 261 /* 262 * The key must be freed in any case. 263 * The val may have been handed over to the query 264 * structure, in which case it is now NULL. 265 */ 266 next: 267 free(key); 268 key = NULL; 269 free(val); 270 val = NULL; 271 272 if (*qs != '\0') 273 qs++; 274 } 275 } 276 277 /* 278 * HTTP-decode a string. The standard explanation is that this turns 279 * "%4e+foo" into "n foo" in the regular way. This is done in-place 280 * over the allocated string. 281 */ 282 static int 283 http_decode(char *p) 284 { 285 char hex[3]; 286 char *q; 287 int c; 288 289 hex[2] = '\0'; 290 291 q = p; 292 for ( ; '\0' != *p; p++, q++) { 293 if ('%' == *p) { 294 if ('\0' == (hex[0] = *(p + 1))) 295 return 0; 296 if ('\0' == (hex[1] = *(p + 2))) 297 return 0; 298 if (1 != sscanf(hex, "%x", &c)) 299 return 0; 300 if ('\0' == c) 301 return 0; 302 303 *q = (char)c; 304 p += 2; 305 } else 306 *q = '+' == *p ? ' ' : *p; 307 } 308 309 *q = '\0'; 310 return 1; 311 } 312 313 static void 314 resp_begin_http(int code, const char *msg) 315 { 316 317 if (200 != code) 318 printf("Status: %d %s\r\n", code, msg); 319 320 printf("Content-Type: text/html; charset=utf-8\r\n" 321 "Cache-Control: no-cache\r\n" 322 "Pragma: no-cache\r\n" 323 "\r\n"); 324 325 fflush(stdout); 326 } 327 328 static void 329 resp_copy(const char *filename) 330 { 331 char buf[4096]; 332 ssize_t sz; 333 int fd; 334 335 if ((fd = open(filename, O_RDONLY)) != -1) { 336 fflush(stdout); 337 while ((sz = read(fd, buf, sizeof(buf))) > 0) 338 write(STDOUT_FILENO, buf, sz); 339 close(fd); 340 } 341 } 342 343 static void 344 resp_begin_html(int code, const char *msg) 345 { 346 347 resp_begin_http(code, msg); 348 349 printf("<!DOCTYPE html>\n" 350 "<html>\n" 351 "<head>\n" 352 " <meta charset=\"UTF-8\"/>\n" 353 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 354 " type=\"text/css\" media=\"all\">\n" 355 " <title>%s</title>\n" 356 "</head>\n" 357 "<body>\n", 358 CSS_DIR, CUSTOMIZE_TITLE); 359 360 resp_copy(MAN_DIR "/header.html"); 361 } 362 363 static void 364 resp_end_html(void) 365 { 366 367 resp_copy(MAN_DIR "/footer.html"); 368 369 puts("</body>\n" 370 "</html>"); 371 } 372 373 static void 374 resp_searchform(const struct req *req, enum focus focus) 375 { 376 int i; 377 378 printf("<form action=\"/%s\" method=\"get\">\n" 379 " <fieldset>\n" 380 " <legend>Manual Page Search Parameters</legend>\n", 381 scriptname); 382 383 /* Write query input box. */ 384 385 printf(" <input type=\"text\" name=\"query\" value=\""); 386 if (req->q.query != NULL) 387 html_print(req->q.query); 388 printf( "\" size=\"40\""); 389 if (focus == FOCUS_QUERY) 390 printf(" autofocus"); 391 puts(">"); 392 393 /* Write submission buttons. */ 394 395 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">" 396 "man</button>\n" 397 " <button type=\"submit\" name=\"apropos\" value=\"1\">" 398 "apropos</button>\n" 399 " <br/>\n"); 400 401 /* Write section selector. */ 402 403 puts(" <select name=\"sec\">"); 404 for (i = 0; i < sec_MAX; i++) { 405 printf(" <option value=\"%s\"", sec_numbers[i]); 406 if (NULL != req->q.sec && 407 0 == strcmp(sec_numbers[i], req->q.sec)) 408 printf(" selected=\"selected\""); 409 printf(">%s</option>\n", sec_names[i]); 410 } 411 puts(" </select>"); 412 413 /* Write architecture selector. */ 414 415 printf( " <select name=\"arch\">\n" 416 " <option value=\"default\""); 417 if (NULL == req->q.arch) 418 printf(" selected=\"selected\""); 419 puts(">All Architectures</option>"); 420 for (i = 0; i < arch_MAX; i++) { 421 printf(" <option value=\"%s\"", arch_names[i]); 422 if (NULL != req->q.arch && 423 0 == strcmp(arch_names[i], req->q.arch)) 424 printf(" selected=\"selected\""); 425 printf(">%s</option>\n", arch_names[i]); 426 } 427 puts(" </select>"); 428 429 /* Write manpath selector. */ 430 431 if (req->psz > 1) { 432 puts(" <select name=\"manpath\">"); 433 for (i = 0; i < (int)req->psz; i++) { 434 printf(" <option "); 435 if (strcmp(req->q.manpath, req->p[i]) == 0) 436 printf("selected=\"selected\" "); 437 printf("value=\""); 438 html_print(req->p[i]); 439 printf("\">"); 440 html_print(req->p[i]); 441 puts("</option>"); 442 } 443 puts(" </select>"); 444 } 445 446 puts(" </fieldset>\n" 447 "</form>"); 448 } 449 450 static int 451 validate_urifrag(const char *frag) 452 { 453 454 while ('\0' != *frag) { 455 if ( ! (isalnum((unsigned char)*frag) || 456 '-' == *frag || '.' == *frag || 457 '/' == *frag || '_' == *frag)) 458 return 0; 459 frag++; 460 } 461 return 1; 462 } 463 464 static int 465 validate_manpath(const struct req *req, const char* manpath) 466 { 467 size_t i; 468 469 for (i = 0; i < req->psz; i++) 470 if ( ! strcmp(manpath, req->p[i])) 471 return 1; 472 473 return 0; 474 } 475 476 static int 477 validate_filename(const char *file) 478 { 479 480 if ('.' == file[0] && '/' == file[1]) 481 file += 2; 482 483 return ! (strstr(file, "../") || strstr(file, "/..") || 484 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 485 } 486 487 static void 488 pg_index(const struct req *req) 489 { 490 491 resp_begin_html(200, NULL); 492 resp_searchform(req, FOCUS_QUERY); 493 printf("<p>\n" 494 "This web interface is documented in the\n" 495 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n" 496 "manual, and the\n" 497 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n" 498 "manual explains the query syntax.\n" 499 "</p>\n", 500 scriptname, *scriptname == '\0' ? "" : "/", 501 scriptname, *scriptname == '\0' ? "" : "/"); 502 resp_end_html(); 503 } 504 505 static void 506 pg_noresult(const struct req *req, const char *msg) 507 { 508 resp_begin_html(200, NULL); 509 resp_searchform(req, FOCUS_QUERY); 510 puts("<p>"); 511 puts(msg); 512 puts("</p>"); 513 resp_end_html(); 514 } 515 516 static void 517 pg_error_badrequest(const char *msg) 518 { 519 520 resp_begin_html(400, "Bad Request"); 521 puts("<h1>Bad Request</h1>\n" 522 "<p>\n"); 523 puts(msg); 524 printf("Try again from the\n" 525 "<a href=\"/%s\">main page</a>.\n" 526 "</p>", scriptname); 527 resp_end_html(); 528 } 529 530 static void 531 pg_error_internal(void) 532 { 533 resp_begin_html(500, "Internal Server Error"); 534 puts("<p>Internal Server Error</p>"); 535 resp_end_html(); 536 } 537 538 static void 539 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 540 { 541 char *arch, *archend; 542 const char *sec; 543 size_t i, iuse; 544 int archprio, archpriouse; 545 int prio, priouse; 546 547 for (i = 0; i < sz; i++) { 548 if (validate_filename(r[i].file)) 549 continue; 550 warnx("invalid filename %s in %s database", 551 r[i].file, req->q.manpath); 552 pg_error_internal(); 553 return; 554 } 555 556 if (req->isquery && sz == 1) { 557 /* 558 * If we have just one result, then jump there now 559 * without any delay. 560 */ 561 printf("Status: 303 See Other\r\n"); 562 printf("Location: http://%s/%s%s%s/%s", 563 HTTP_HOST, scriptname, 564 *scriptname == '\0' ? "" : "/", 565 req->q.manpath, r[0].file); 566 printf("\r\n" 567 "Content-Type: text/html; charset=utf-8\r\n" 568 "\r\n"); 569 return; 570 } 571 572 resp_begin_html(200, NULL); 573 resp_searchform(req, 574 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 575 576 if (sz > 1) { 577 puts("<table class=\"results\">"); 578 for (i = 0; i < sz; i++) { 579 printf(" <tr>\n" 580 " <td>" 581 "<a class=\"Xr\" href=\"/%s%s%s/%s\">", 582 scriptname, *scriptname == '\0' ? "" : "/", 583 req->q.manpath, r[i].file); 584 html_print(r[i].names); 585 printf("</a></td>\n" 586 " <td><span class=\"Nd\">"); 587 html_print(r[i].output); 588 puts("</span></td>\n" 589 " </tr>"); 590 } 591 puts("</table>"); 592 } 593 594 /* 595 * In man(1) mode, show one of the pages 596 * even if more than one is found. 597 */ 598 599 if (req->q.equal || sz == 1) { 600 puts("<hr>"); 601 iuse = 0; 602 priouse = 20; 603 archpriouse = 3; 604 for (i = 0; i < sz; i++) { 605 sec = r[i].file; 606 sec += strcspn(sec, "123456789"); 607 if (sec[0] == '\0') 608 continue; 609 prio = sec_prios[sec[0] - '1']; 610 if (sec[1] != '/') 611 prio += 10; 612 if (req->q.arch == NULL) { 613 archprio = 614 ((arch = strchr(sec + 1, '/')) 615 == NULL) ? 3 : 616 ((archend = strchr(arch + 1, '/')) 617 == NULL) ? 0 : 618 strncmp(arch, "amd64/", 619 archend - arch) ? 2 : 1; 620 if (archprio < archpriouse) { 621 archpriouse = archprio; 622 priouse = prio; 623 iuse = i; 624 continue; 625 } 626 if (archprio > archpriouse) 627 continue; 628 } 629 if (prio >= priouse) 630 continue; 631 priouse = prio; 632 iuse = i; 633 } 634 resp_show(req, r[iuse].file); 635 } 636 637 resp_end_html(); 638 } 639 640 static void 641 resp_catman(const struct req *req, const char *file) 642 { 643 FILE *f; 644 char *p; 645 size_t sz; 646 ssize_t len; 647 int i; 648 int italic, bold; 649 650 if ((f = fopen(file, "r")) == NULL) { 651 puts("<p>You specified an invalid manual file.</p>"); 652 return; 653 } 654 655 puts("<div class=\"catman\">\n" 656 "<pre>"); 657 658 p = NULL; 659 sz = 0; 660 661 while ((len = getline(&p, &sz, f)) != -1) { 662 bold = italic = 0; 663 for (i = 0; i < len - 1; i++) { 664 /* 665 * This means that the catpage is out of state. 666 * Ignore it and keep going (although the 667 * catpage is bogus). 668 */ 669 670 if ('\b' == p[i] || '\n' == p[i]) 671 continue; 672 673 /* 674 * Print a regular character. 675 * Close out any bold/italic scopes. 676 * If we're in back-space mode, make sure we'll 677 * have something to enter when we backspace. 678 */ 679 680 if ('\b' != p[i + 1]) { 681 if (italic) 682 printf("</i>"); 683 if (bold) 684 printf("</b>"); 685 italic = bold = 0; 686 html_putchar(p[i]); 687 continue; 688 } else if (i + 2 >= len) 689 continue; 690 691 /* Italic mode. */ 692 693 if ('_' == p[i]) { 694 if (bold) 695 printf("</b>"); 696 if ( ! italic) 697 printf("<i>"); 698 bold = 0; 699 italic = 1; 700 i += 2; 701 html_putchar(p[i]); 702 continue; 703 } 704 705 /* 706 * Handle funny behaviour troff-isms. 707 * These grok'd from the original man2html.c. 708 */ 709 710 if (('+' == p[i] && 'o' == p[i + 2]) || 711 ('o' == p[i] && '+' == p[i + 2]) || 712 ('|' == p[i] && '=' == p[i + 2]) || 713 ('=' == p[i] && '|' == p[i + 2]) || 714 ('*' == p[i] && '=' == p[i + 2]) || 715 ('=' == p[i] && '*' == p[i + 2]) || 716 ('*' == p[i] && '|' == p[i + 2]) || 717 ('|' == p[i] && '*' == p[i + 2])) { 718 if (italic) 719 printf("</i>"); 720 if (bold) 721 printf("</b>"); 722 italic = bold = 0; 723 putchar('*'); 724 i += 2; 725 continue; 726 } else if (('|' == p[i] && '-' == p[i + 2]) || 727 ('-' == p[i] && '|' == p[i + 1]) || 728 ('+' == p[i] && '-' == p[i + 1]) || 729 ('-' == p[i] && '+' == p[i + 1]) || 730 ('+' == p[i] && '|' == p[i + 1]) || 731 ('|' == p[i] && '+' == p[i + 1])) { 732 if (italic) 733 printf("</i>"); 734 if (bold) 735 printf("</b>"); 736 italic = bold = 0; 737 putchar('+'); 738 i += 2; 739 continue; 740 } 741 742 /* Bold mode. */ 743 744 if (italic) 745 printf("</i>"); 746 if ( ! bold) 747 printf("<b>"); 748 bold = 1; 749 italic = 0; 750 i += 2; 751 html_putchar(p[i]); 752 } 753 754 /* 755 * Clean up the last character. 756 * We can get to a newline; don't print that. 757 */ 758 759 if (italic) 760 printf("</i>"); 761 if (bold) 762 printf("</b>"); 763 764 if (i == len - 1 && p[i] != '\n') 765 html_putchar(p[i]); 766 767 putchar('\n'); 768 } 769 free(p); 770 771 puts("</pre>\n" 772 "</div>"); 773 774 fclose(f); 775 } 776 777 static void 778 resp_format(const struct req *req, const char *file) 779 { 780 struct manoutput conf; 781 struct mparse *mp; 782 struct roff_man *man; 783 void *vp; 784 int fd; 785 int usepath; 786 787 if (-1 == (fd = open(file, O_RDONLY, 0))) { 788 puts("<p>You specified an invalid manual file.</p>"); 789 return; 790 } 791 792 mchars_alloc(); 793 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1, 794 MANDOCLEVEL_BADARG, NULL, req->q.manpath); 795 mparse_readfd(mp, fd, file); 796 close(fd); 797 798 memset(&conf, 0, sizeof(conf)); 799 conf.fragment = 1; 800 usepath = strcmp(req->q.manpath, req->p[0]); 801 mandoc_asprintf(&conf.man, "/%s%s%%N.%%S", 802 usepath ? req->q.manpath : "", usepath ? "/" : ""); 803 804 mparse_result(mp, &man, NULL); 805 if (man == NULL) { 806 warnx("fatal mandoc error: %s/%s", req->q.manpath, file); 807 pg_error_internal(); 808 mparse_free(mp); 809 mchars_free(); 810 return; 811 } 812 813 vp = html_alloc(&conf); 814 815 if (man->macroset == MACROSET_MDOC) { 816 mdoc_validate(man); 817 html_mdoc(vp, man); 818 } else { 819 man_validate(man); 820 html_man(vp, man); 821 } 822 823 html_free(vp); 824 mparse_free(mp); 825 mchars_free(); 826 free(conf.man); 827 } 828 829 static void 830 resp_show(const struct req *req, const char *file) 831 { 832 833 if ('.' == file[0] && '/' == file[1]) 834 file += 2; 835 836 if ('c' == *file) 837 resp_catman(req, file); 838 else 839 resp_format(req, file); 840 } 841 842 static void 843 pg_show(struct req *req, const char *fullpath) 844 { 845 char *manpath; 846 const char *file; 847 848 if ((file = strchr(fullpath, '/')) == NULL) { 849 pg_error_badrequest( 850 "You did not specify a page to show."); 851 return; 852 } 853 manpath = mandoc_strndup(fullpath, file - fullpath); 854 file++; 855 856 if ( ! validate_manpath(req, manpath)) { 857 pg_error_badrequest( 858 "You specified an invalid manpath."); 859 free(manpath); 860 return; 861 } 862 863 /* 864 * Begin by chdir()ing into the manpath. 865 * This way we can pick up the database files, which are 866 * relative to the manpath root. 867 */ 868 869 if (chdir(manpath) == -1) { 870 warn("chdir %s", manpath); 871 pg_error_internal(); 872 free(manpath); 873 return; 874 } 875 free(manpath); 876 877 if ( ! validate_filename(file)) { 878 pg_error_badrequest( 879 "You specified an invalid manual file."); 880 return; 881 } 882 883 resp_begin_html(200, NULL); 884 resp_searchform(req, FOCUS_NONE); 885 resp_show(req, file); 886 resp_end_html(); 887 } 888 889 static void 890 pg_search(const struct req *req) 891 { 892 struct mansearch search; 893 struct manpaths paths; 894 struct manpage *res; 895 char **argv; 896 char *query, *rp, *wp; 897 size_t ressz; 898 int argc; 899 900 /* 901 * Begin by chdir()ing into the root of the manpath. 902 * This way we can pick up the database files, which are 903 * relative to the manpath root. 904 */ 905 906 if (chdir(req->q.manpath) == -1) { 907 warn("chdir %s", req->q.manpath); 908 pg_error_internal(); 909 return; 910 } 911 912 search.arch = req->q.arch; 913 search.sec = req->q.sec; 914 search.outkey = "Nd"; 915 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 916 search.firstmatch = 1; 917 918 paths.sz = 1; 919 paths.paths = mandoc_malloc(sizeof(char *)); 920 paths.paths[0] = mandoc_strdup("."); 921 922 /* 923 * Break apart at spaces with backslash-escaping. 924 */ 925 926 argc = 0; 927 argv = NULL; 928 rp = query = mandoc_strdup(req->q.query); 929 for (;;) { 930 while (isspace((unsigned char)*rp)) 931 rp++; 932 if (*rp == '\0') 933 break; 934 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 935 argv[argc++] = wp = rp; 936 for (;;) { 937 if (isspace((unsigned char)*rp)) { 938 *wp = '\0'; 939 rp++; 940 break; 941 } 942 if (rp[0] == '\\' && rp[1] != '\0') 943 rp++; 944 if (wp != rp) 945 *wp = *rp; 946 if (*rp == '\0') 947 break; 948 wp++; 949 rp++; 950 } 951 } 952 953 if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz)) 954 pg_noresult(req, "You entered an invalid query."); 955 else if (0 == ressz) 956 pg_noresult(req, "No results found."); 957 else 958 pg_searchres(req, res, ressz); 959 960 free(query); 961 mansearch_free(res, ressz); 962 free(paths.paths[0]); 963 free(paths.paths); 964 } 965 966 int 967 main(void) 968 { 969 struct req req; 970 struct itimerval itimer; 971 const char *path; 972 const char *querystring; 973 int i; 974 975 /* Poor man's ReDoS mitigation. */ 976 977 itimer.it_value.tv_sec = 2; 978 itimer.it_value.tv_usec = 0; 979 itimer.it_interval.tv_sec = 2; 980 itimer.it_interval.tv_usec = 0; 981 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 982 warn("setitimer"); 983 pg_error_internal(); 984 return EXIT_FAILURE; 985 } 986 987 /* 988 * First we change directory into the MAN_DIR so that 989 * subsequent scanning for manpath directories is rooted 990 * relative to the same position. 991 */ 992 993 if (chdir(MAN_DIR) == -1) { 994 warn("MAN_DIR: %s", MAN_DIR); 995 pg_error_internal(); 996 return EXIT_FAILURE; 997 } 998 999 memset(&req, 0, sizeof(struct req)); 1000 req.q.equal = 1; 1001 parse_manpath_conf(&req); 1002 1003 /* Parse the path info and the query string. */ 1004 1005 if ((path = getenv("PATH_INFO")) == NULL) 1006 path = ""; 1007 else if (*path == '/') 1008 path++; 1009 1010 if (*path != '\0') { 1011 parse_path_info(&req, path); 1012 if (req.q.manpath == NULL || access(path, F_OK) == -1) 1013 path = ""; 1014 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1015 parse_query_string(&req, querystring); 1016 1017 /* Validate parsed data and add defaults. */ 1018 1019 if (req.q.manpath == NULL) 1020 req.q.manpath = mandoc_strdup(req.p[0]); 1021 else if ( ! validate_manpath(&req, req.q.manpath)) { 1022 pg_error_badrequest( 1023 "You specified an invalid manpath."); 1024 return EXIT_FAILURE; 1025 } 1026 1027 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) { 1028 pg_error_badrequest( 1029 "You specified an invalid architecture."); 1030 return EXIT_FAILURE; 1031 } 1032 1033 /* Dispatch to the three different pages. */ 1034 1035 if ('\0' != *path) 1036 pg_show(&req, path); 1037 else if (NULL != req.q.query) 1038 pg_search(&req); 1039 else 1040 pg_index(&req); 1041 1042 free(req.q.manpath); 1043 free(req.q.arch); 1044 free(req.q.sec); 1045 free(req.q.query); 1046 for (i = 0; i < (int)req.psz; i++) 1047 free(req.p[i]); 1048 free(req.p); 1049 return EXIT_SUCCESS; 1050 } 1051 1052 /* 1053 * If PATH_INFO is not a file name, translate it to a query. 1054 */ 1055 static void 1056 parse_path_info(struct req *req, const char *path) 1057 { 1058 char *dir[4]; 1059 int i; 1060 1061 req->isquery = 0; 1062 req->q.equal = 1; 1063 req->q.manpath = mandoc_strdup(path); 1064 req->q.arch = NULL; 1065 1066 /* Mandatory manual page name. */ 1067 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) { 1068 req->q.query = req->q.manpath; 1069 req->q.manpath = NULL; 1070 } else 1071 *req->q.query++ = '\0'; 1072 1073 /* Optional trailing section. */ 1074 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) { 1075 if(isdigit((unsigned char)req->q.sec[1])) { 1076 *req->q.sec++ = '\0'; 1077 req->q.sec = mandoc_strdup(req->q.sec); 1078 } else 1079 req->q.sec = NULL; 1080 } 1081 1082 /* Handle the case of name[.section] only. */ 1083 if (req->q.manpath == NULL) 1084 return; 1085 req->q.query = mandoc_strdup(req->q.query); 1086 1087 /* Split directory components. */ 1088 dir[i = 0] = req->q.manpath; 1089 while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) { 1090 if (++i == 3) { 1091 pg_error_badrequest( 1092 "You specified too many directory components."); 1093 exit(EXIT_FAILURE); 1094 } 1095 *dir[i]++ = '\0'; 1096 } 1097 1098 /* Optional manpath. */ 1099 if ((i = validate_manpath(req, req->q.manpath)) == 0) 1100 req->q.manpath = NULL; 1101 else if (dir[1] == NULL) 1102 return; 1103 1104 /* Optional section. */ 1105 if (strncmp(dir[i], "man", 3) == 0) { 1106 free(req->q.sec); 1107 req->q.sec = mandoc_strdup(dir[i++] + 3); 1108 } 1109 if (dir[i] == NULL) { 1110 if (req->q.manpath == NULL) 1111 free(dir[0]); 1112 return; 1113 } 1114 if (dir[i + 1] != NULL) { 1115 pg_error_badrequest( 1116 "You specified an invalid directory component."); 1117 exit(EXIT_FAILURE); 1118 } 1119 1120 /* Optional architecture. */ 1121 if (i) { 1122 req->q.arch = mandoc_strdup(dir[i]); 1123 if (req->q.manpath == NULL) 1124 free(dir[0]); 1125 } else 1126 req->q.arch = dir[0]; 1127 } 1128 1129 /* 1130 * Scan for indexable paths. 1131 */ 1132 static void 1133 parse_manpath_conf(struct req *req) 1134 { 1135 FILE *fp; 1136 char *dp; 1137 size_t dpsz; 1138 ssize_t len; 1139 1140 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1141 warn("%s/manpath.conf", MAN_DIR); 1142 pg_error_internal(); 1143 exit(EXIT_FAILURE); 1144 } 1145 1146 dp = NULL; 1147 dpsz = 0; 1148 1149 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1150 if (dp[len - 1] == '\n') 1151 dp[--len] = '\0'; 1152 req->p = mandoc_realloc(req->p, 1153 (req->psz + 1) * sizeof(char *)); 1154 if ( ! validate_urifrag(dp)) { 1155 warnx("%s/manpath.conf contains " 1156 "unsafe path \"%s\"", MAN_DIR, dp); 1157 pg_error_internal(); 1158 exit(EXIT_FAILURE); 1159 } 1160 if (strchr(dp, '/') != NULL) { 1161 warnx("%s/manpath.conf contains " 1162 "path with slash \"%s\"", MAN_DIR, dp); 1163 pg_error_internal(); 1164 exit(EXIT_FAILURE); 1165 } 1166 req->p[req->psz++] = dp; 1167 dp = NULL; 1168 dpsz = 0; 1169 } 1170 free(dp); 1171 1172 if (req->p == NULL) { 1173 warnx("%s/manpath.conf is empty", MAN_DIR); 1174 pg_error_internal(); 1175 exit(EXIT_FAILURE); 1176 } 1177 } 1178