1 /* $OpenBSD: cgi.c,v 1.82 2017/01/19 13:55:49 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 #include <sys/time.h> 20 21 #include <ctype.h> 22 #include <err.h> 23 #include <errno.h> 24 #include <fcntl.h> 25 #include <limits.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <unistd.h> 31 32 #include "mandoc_aux.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "mdoc.h" 36 #include "man.h" 37 #include "main.h" 38 #include "manconf.h" 39 #include "mansearch.h" 40 #include "cgi.h" 41 42 /* 43 * A query as passed to the search function. 44 */ 45 struct query { 46 char *manpath; /* desired manual directory */ 47 char *arch; /* architecture */ 48 char *sec; /* manual section */ 49 char *query; /* unparsed query expression */ 50 int equal; /* match whole names, not substrings */ 51 }; 52 53 struct req { 54 struct query q; 55 char **p; /* array of available manpaths */ 56 size_t psz; /* number of available manpaths */ 57 int isquery; /* QUERY_STRING used, not PATH_INFO */ 58 }; 59 60 enum focus { 61 FOCUS_NONE = 0, 62 FOCUS_QUERY 63 }; 64 65 static void html_print(const char *); 66 static void html_putchar(char); 67 static int http_decode(char *); 68 static void parse_manpath_conf(struct req *); 69 static void parse_path_info(struct req *req, const char *path); 70 static void parse_query_string(struct req *, const char *); 71 static void pg_error_badrequest(const char *); 72 static void pg_error_internal(void); 73 static void pg_index(const struct req *); 74 static void pg_noresult(const struct req *, const char *); 75 static void pg_search(const struct req *); 76 static void pg_searchres(const struct req *, 77 struct manpage *, size_t); 78 static void pg_show(struct req *, const char *); 79 static void resp_begin_html(int, const char *); 80 static void resp_begin_http(int, const char *); 81 static void resp_catman(const struct req *, const char *); 82 static void resp_copy(const char *); 83 static void resp_end_html(void); 84 static void resp_format(const struct req *, const char *); 85 static void resp_searchform(const struct req *, enum focus); 86 static void resp_show(const struct req *, const char *); 87 static void set_query_attr(char **, char **); 88 static int validate_filename(const char *); 89 static int validate_manpath(const struct req *, const char *); 90 static int validate_urifrag(const char *); 91 92 static const char *scriptname = SCRIPT_NAME; 93 94 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 95 static const char *const sec_numbers[] = { 96 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 97 }; 98 static const char *const sec_names[] = { 99 "All Sections", 100 "1 - General Commands", 101 "2 - System Calls", 102 "3 - Library Functions", 103 "3p - Perl Library", 104 "4 - Device Drivers", 105 "5 - File Formats", 106 "6 - Games", 107 "7 - Miscellaneous Information", 108 "8 - System Manager\'s Manual", 109 "9 - Kernel Developer\'s Manual" 110 }; 111 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 112 113 static const char *const arch_names[] = { 114 "amd64", "alpha", "armv7", 115 "hppa", "i386", "landisk", 116 "loongson", "luna88k", "macppc", "mips64", 117 "octeon", "sgi", "socppc", "sparc64", 118 "amiga", "arc", "armish", "arm32", 119 "atari", "aviion", "beagle", "cats", 120 "hppa64", "hp300", 121 "ia64", "mac68k", "mvme68k", "mvme88k", 122 "mvmeppc", "palm", "pc532", "pegasos", 123 "pmax", "powerpc", "solbourne", "sparc", 124 "sun3", "vax", "wgrisc", "x68k", 125 "zaurus" 126 }; 127 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 128 129 /* 130 * Print a character, escaping HTML along the way. 131 * This will pass non-ASCII straight to output: be warned! 132 */ 133 static void 134 html_putchar(char c) 135 { 136 137 switch (c) { 138 case ('"'): 139 printf("""); 140 break; 141 case ('&'): 142 printf("&"); 143 break; 144 case ('>'): 145 printf(">"); 146 break; 147 case ('<'): 148 printf("<"); 149 break; 150 default: 151 putchar((unsigned char)c); 152 break; 153 } 154 } 155 156 /* 157 * Call through to html_putchar(). 158 * Accepts NULL strings. 159 */ 160 static void 161 html_print(const char *p) 162 { 163 164 if (NULL == p) 165 return; 166 while ('\0' != *p) 167 html_putchar(*p++); 168 } 169 170 /* 171 * Transfer the responsibility for the allocated string *val 172 * to the query structure. 173 */ 174 static void 175 set_query_attr(char **attr, char **val) 176 { 177 178 free(*attr); 179 if (**val == '\0') { 180 *attr = NULL; 181 free(*val); 182 } else 183 *attr = *val; 184 *val = NULL; 185 } 186 187 /* 188 * Parse the QUERY_STRING for key-value pairs 189 * and store the values into the query structure. 190 */ 191 static void 192 parse_query_string(struct req *req, const char *qs) 193 { 194 char *key, *val; 195 size_t keysz, valsz; 196 197 req->isquery = 1; 198 req->q.manpath = NULL; 199 req->q.arch = NULL; 200 req->q.sec = NULL; 201 req->q.query = NULL; 202 req->q.equal = 1; 203 204 key = val = NULL; 205 while (*qs != '\0') { 206 207 /* Parse one key. */ 208 209 keysz = strcspn(qs, "=;&"); 210 key = mandoc_strndup(qs, keysz); 211 qs += keysz; 212 if (*qs != '=') 213 goto next; 214 215 /* Parse one value. */ 216 217 valsz = strcspn(++qs, ";&"); 218 val = mandoc_strndup(qs, valsz); 219 qs += valsz; 220 221 /* Decode and catch encoding errors. */ 222 223 if ( ! (http_decode(key) && http_decode(val))) 224 goto next; 225 226 /* Handle key-value pairs. */ 227 228 if ( ! strcmp(key, "query")) 229 set_query_attr(&req->q.query, &val); 230 231 else if ( ! strcmp(key, "apropos")) 232 req->q.equal = !strcmp(val, "0"); 233 234 else if ( ! strcmp(key, "manpath")) { 235 #ifdef COMPAT_OLDURI 236 if ( ! strncmp(val, "OpenBSD ", 8)) { 237 val[7] = '-'; 238 if ('C' == val[8]) 239 val[8] = 'c'; 240 } 241 #endif 242 set_query_attr(&req->q.manpath, &val); 243 } 244 245 else if ( ! (strcmp(key, "sec") 246 #ifdef COMPAT_OLDURI 247 && strcmp(key, "sektion") 248 #endif 249 )) { 250 if ( ! strcmp(val, "0")) 251 *val = '\0'; 252 set_query_attr(&req->q.sec, &val); 253 } 254 255 else if ( ! strcmp(key, "arch")) { 256 if ( ! strcmp(val, "default")) 257 *val = '\0'; 258 set_query_attr(&req->q.arch, &val); 259 } 260 261 /* 262 * The key must be freed in any case. 263 * The val may have been handed over to the query 264 * structure, in which case it is now NULL. 265 */ 266 next: 267 free(key); 268 key = NULL; 269 free(val); 270 val = NULL; 271 272 if (*qs != '\0') 273 qs++; 274 } 275 } 276 277 /* 278 * HTTP-decode a string. The standard explanation is that this turns 279 * "%4e+foo" into "n foo" in the regular way. This is done in-place 280 * over the allocated string. 281 */ 282 static int 283 http_decode(char *p) 284 { 285 char hex[3]; 286 char *q; 287 int c; 288 289 hex[2] = '\0'; 290 291 q = p; 292 for ( ; '\0' != *p; p++, q++) { 293 if ('%' == *p) { 294 if ('\0' == (hex[0] = *(p + 1))) 295 return 0; 296 if ('\0' == (hex[1] = *(p + 2))) 297 return 0; 298 if (1 != sscanf(hex, "%x", &c)) 299 return 0; 300 if ('\0' == c) 301 return 0; 302 303 *q = (char)c; 304 p += 2; 305 } else 306 *q = '+' == *p ? ' ' : *p; 307 } 308 309 *q = '\0'; 310 return 1; 311 } 312 313 static void 314 resp_begin_http(int code, const char *msg) 315 { 316 317 if (200 != code) 318 printf("Status: %d %s\r\n", code, msg); 319 320 printf("Content-Type: text/html; charset=utf-8\r\n" 321 "Cache-Control: no-cache\r\n" 322 "Pragma: no-cache\r\n" 323 "\r\n"); 324 325 fflush(stdout); 326 } 327 328 static void 329 resp_copy(const char *filename) 330 { 331 char buf[4096]; 332 ssize_t sz; 333 int fd; 334 335 if ((fd = open(filename, O_RDONLY)) != -1) { 336 fflush(stdout); 337 while ((sz = read(fd, buf, sizeof(buf))) > 0) 338 write(STDOUT_FILENO, buf, sz); 339 close(fd); 340 } 341 } 342 343 static void 344 resp_begin_html(int code, const char *msg) 345 { 346 347 resp_begin_http(code, msg); 348 349 printf("<!DOCTYPE html>\n" 350 "<html>\n" 351 "<head>\n" 352 " <meta charset=\"UTF-8\"/>\n" 353 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 354 " type=\"text/css\" media=\"all\">\n" 355 " <title>%s</title>\n" 356 "</head>\n" 357 "<body>\n", 358 CSS_DIR, CUSTOMIZE_TITLE); 359 360 resp_copy(MAN_DIR "/header.html"); 361 } 362 363 static void 364 resp_end_html(void) 365 { 366 367 resp_copy(MAN_DIR "/footer.html"); 368 369 puts("</body>\n" 370 "</html>"); 371 } 372 373 static void 374 resp_searchform(const struct req *req, enum focus focus) 375 { 376 int i; 377 378 printf("<form action=\"/%s\" method=\"get\">\n" 379 " <fieldset>\n" 380 " <legend>Manual Page Search Parameters</legend>\n", 381 scriptname); 382 383 /* Write query input box. */ 384 385 printf(" <input type=\"text\" name=\"query\" value=\""); 386 if (req->q.query != NULL) 387 html_print(req->q.query); 388 printf( "\" size=\"40\""); 389 if (focus == FOCUS_QUERY) 390 printf(" autofocus"); 391 puts(">"); 392 393 /* Write submission buttons. */ 394 395 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">" 396 "man</button>\n" 397 " <button type=\"submit\" name=\"apropos\" value=\"1\">" 398 "apropos</button>\n" 399 " <br/>\n"); 400 401 /* Write section selector. */ 402 403 puts(" <select name=\"sec\">"); 404 for (i = 0; i < sec_MAX; i++) { 405 printf(" <option value=\"%s\"", sec_numbers[i]); 406 if (NULL != req->q.sec && 407 0 == strcmp(sec_numbers[i], req->q.sec)) 408 printf(" selected=\"selected\""); 409 printf(">%s</option>\n", sec_names[i]); 410 } 411 puts(" </select>"); 412 413 /* Write architecture selector. */ 414 415 printf( " <select name=\"arch\">\n" 416 " <option value=\"default\""); 417 if (NULL == req->q.arch) 418 printf(" selected=\"selected\""); 419 puts(">All Architectures</option>"); 420 for (i = 0; i < arch_MAX; i++) { 421 printf(" <option value=\"%s\"", arch_names[i]); 422 if (NULL != req->q.arch && 423 0 == strcmp(arch_names[i], req->q.arch)) 424 printf(" selected=\"selected\""); 425 printf(">%s</option>\n", arch_names[i]); 426 } 427 puts(" </select>"); 428 429 /* Write manpath selector. */ 430 431 if (req->psz > 1) { 432 puts(" <select name=\"manpath\">"); 433 for (i = 0; i < (int)req->psz; i++) { 434 printf(" <option "); 435 if (strcmp(req->q.manpath, req->p[i]) == 0) 436 printf("selected=\"selected\" "); 437 printf("value=\""); 438 html_print(req->p[i]); 439 printf("\">"); 440 html_print(req->p[i]); 441 puts("</option>"); 442 } 443 puts(" </select>"); 444 } 445 446 puts(" </fieldset>\n" 447 "</form>"); 448 } 449 450 static int 451 validate_urifrag(const char *frag) 452 { 453 454 while ('\0' != *frag) { 455 if ( ! (isalnum((unsigned char)*frag) || 456 '-' == *frag || '.' == *frag || 457 '/' == *frag || '_' == *frag)) 458 return 0; 459 frag++; 460 } 461 return 1; 462 } 463 464 static int 465 validate_manpath(const struct req *req, const char* manpath) 466 { 467 size_t i; 468 469 for (i = 0; i < req->psz; i++) 470 if ( ! strcmp(manpath, req->p[i])) 471 return 1; 472 473 return 0; 474 } 475 476 static int 477 validate_filename(const char *file) 478 { 479 480 if ('.' == file[0] && '/' == file[1]) 481 file += 2; 482 483 return ! (strstr(file, "../") || strstr(file, "/..") || 484 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 485 } 486 487 static void 488 pg_index(const struct req *req) 489 { 490 491 resp_begin_html(200, NULL); 492 resp_searchform(req, FOCUS_QUERY); 493 printf("<p>\n" 494 "This web interface is documented in the\n" 495 "<a href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n" 496 "manual, and the\n" 497 "<a href=\"/%s%sapropos.1\">apropos(1)</a>\n" 498 "manual explains the query syntax.\n" 499 "</p>\n", 500 scriptname, *scriptname == '\0' ? "" : "/", 501 scriptname, *scriptname == '\0' ? "" : "/"); 502 resp_end_html(); 503 } 504 505 static void 506 pg_noresult(const struct req *req, const char *msg) 507 { 508 resp_begin_html(200, NULL); 509 resp_searchform(req, FOCUS_QUERY); 510 puts("<p>"); 511 puts(msg); 512 puts("</p>"); 513 resp_end_html(); 514 } 515 516 static void 517 pg_error_badrequest(const char *msg) 518 { 519 520 resp_begin_html(400, "Bad Request"); 521 puts("<h1>Bad Request</h1>\n" 522 "<p>\n"); 523 puts(msg); 524 printf("Try again from the\n" 525 "<a href=\"/%s\">main page</a>.\n" 526 "</p>", scriptname); 527 resp_end_html(); 528 } 529 530 static void 531 pg_error_internal(void) 532 { 533 resp_begin_html(500, "Internal Server Error"); 534 puts("<p>Internal Server Error</p>"); 535 resp_end_html(); 536 } 537 538 static void 539 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 540 { 541 char *arch, *archend; 542 const char *sec; 543 size_t i, iuse; 544 int archprio, archpriouse; 545 int prio, priouse; 546 547 for (i = 0; i < sz; i++) { 548 if (validate_filename(r[i].file)) 549 continue; 550 warnx("invalid filename %s in %s database", 551 r[i].file, req->q.manpath); 552 pg_error_internal(); 553 return; 554 } 555 556 if (req->isquery && sz == 1) { 557 /* 558 * If we have just one result, then jump there now 559 * without any delay. 560 */ 561 printf("Status: 303 See Other\r\n"); 562 printf("Location: http://%s/%s%s%s/%s", 563 HTTP_HOST, scriptname, 564 *scriptname == '\0' ? "" : "/", 565 req->q.manpath, r[0].file); 566 printf("\r\n" 567 "Content-Type: text/html; charset=utf-8\r\n" 568 "\r\n"); 569 return; 570 } 571 572 resp_begin_html(200, NULL); 573 resp_searchform(req, 574 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 575 576 if (sz > 1) { 577 puts("<div class=\"results\">"); 578 puts("<table>"); 579 580 for (i = 0; i < sz; i++) { 581 printf(" <tr>\n" 582 " <td class=\"title\">" 583 "<a href=\"/%s%s%s/%s", 584 scriptname, *scriptname == '\0' ? "" : "/", 585 req->q.manpath, r[i].file); 586 printf("\">"); 587 html_print(r[i].names); 588 printf("</a></td>\n" 589 " <td class=\"desc\">"); 590 html_print(r[i].output); 591 puts("</td>\n" 592 " </tr>"); 593 } 594 595 puts("</table>\n" 596 "</div>"); 597 } 598 599 /* 600 * In man(1) mode, show one of the pages 601 * even if more than one is found. 602 */ 603 604 if (req->q.equal || sz == 1) { 605 puts("<hr>"); 606 iuse = 0; 607 priouse = 20; 608 archpriouse = 3; 609 for (i = 0; i < sz; i++) { 610 sec = r[i].file; 611 sec += strcspn(sec, "123456789"); 612 if (sec[0] == '\0') 613 continue; 614 prio = sec_prios[sec[0] - '1']; 615 if (sec[1] != '/') 616 prio += 10; 617 if (req->q.arch == NULL) { 618 archprio = 619 ((arch = strchr(sec + 1, '/')) 620 == NULL) ? 3 : 621 ((archend = strchr(arch + 1, '/')) 622 == NULL) ? 0 : 623 strncmp(arch, "amd64/", 624 archend - arch) ? 2 : 1; 625 if (archprio < archpriouse) { 626 archpriouse = archprio; 627 priouse = prio; 628 iuse = i; 629 continue; 630 } 631 if (archprio > archpriouse) 632 continue; 633 } 634 if (prio >= priouse) 635 continue; 636 priouse = prio; 637 iuse = i; 638 } 639 resp_show(req, r[iuse].file); 640 } 641 642 resp_end_html(); 643 } 644 645 static void 646 resp_catman(const struct req *req, const char *file) 647 { 648 FILE *f; 649 char *p; 650 size_t sz; 651 ssize_t len; 652 int i; 653 int italic, bold; 654 655 if ((f = fopen(file, "r")) == NULL) { 656 puts("<p>You specified an invalid manual file.</p>"); 657 return; 658 } 659 660 puts("<div class=\"catman\">\n" 661 "<pre>"); 662 663 p = NULL; 664 sz = 0; 665 666 while ((len = getline(&p, &sz, f)) != -1) { 667 bold = italic = 0; 668 for (i = 0; i < len - 1; i++) { 669 /* 670 * This means that the catpage is out of state. 671 * Ignore it and keep going (although the 672 * catpage is bogus). 673 */ 674 675 if ('\b' == p[i] || '\n' == p[i]) 676 continue; 677 678 /* 679 * Print a regular character. 680 * Close out any bold/italic scopes. 681 * If we're in back-space mode, make sure we'll 682 * have something to enter when we backspace. 683 */ 684 685 if ('\b' != p[i + 1]) { 686 if (italic) 687 printf("</i>"); 688 if (bold) 689 printf("</b>"); 690 italic = bold = 0; 691 html_putchar(p[i]); 692 continue; 693 } else if (i + 2 >= len) 694 continue; 695 696 /* Italic mode. */ 697 698 if ('_' == p[i]) { 699 if (bold) 700 printf("</b>"); 701 if ( ! italic) 702 printf("<i>"); 703 bold = 0; 704 italic = 1; 705 i += 2; 706 html_putchar(p[i]); 707 continue; 708 } 709 710 /* 711 * Handle funny behaviour troff-isms. 712 * These grok'd from the original man2html.c. 713 */ 714 715 if (('+' == p[i] && 'o' == p[i + 2]) || 716 ('o' == p[i] && '+' == p[i + 2]) || 717 ('|' == p[i] && '=' == p[i + 2]) || 718 ('=' == p[i] && '|' == p[i + 2]) || 719 ('*' == p[i] && '=' == p[i + 2]) || 720 ('=' == p[i] && '*' == p[i + 2]) || 721 ('*' == p[i] && '|' == p[i + 2]) || 722 ('|' == p[i] && '*' == p[i + 2])) { 723 if (italic) 724 printf("</i>"); 725 if (bold) 726 printf("</b>"); 727 italic = bold = 0; 728 putchar('*'); 729 i += 2; 730 continue; 731 } else if (('|' == p[i] && '-' == p[i + 2]) || 732 ('-' == p[i] && '|' == p[i + 1]) || 733 ('+' == p[i] && '-' == p[i + 1]) || 734 ('-' == p[i] && '+' == p[i + 1]) || 735 ('+' == p[i] && '|' == p[i + 1]) || 736 ('|' == p[i] && '+' == p[i + 1])) { 737 if (italic) 738 printf("</i>"); 739 if (bold) 740 printf("</b>"); 741 italic = bold = 0; 742 putchar('+'); 743 i += 2; 744 continue; 745 } 746 747 /* Bold mode. */ 748 749 if (italic) 750 printf("</i>"); 751 if ( ! bold) 752 printf("<b>"); 753 bold = 1; 754 italic = 0; 755 i += 2; 756 html_putchar(p[i]); 757 } 758 759 /* 760 * Clean up the last character. 761 * We can get to a newline; don't print that. 762 */ 763 764 if (italic) 765 printf("</i>"); 766 if (bold) 767 printf("</b>"); 768 769 if (i == len - 1 && p[i] != '\n') 770 html_putchar(p[i]); 771 772 putchar('\n'); 773 } 774 free(p); 775 776 puts("</pre>\n" 777 "</div>"); 778 779 fclose(f); 780 } 781 782 static void 783 resp_format(const struct req *req, const char *file) 784 { 785 struct manoutput conf; 786 struct mparse *mp; 787 struct roff_man *man; 788 void *vp; 789 int fd; 790 int usepath; 791 792 if (-1 == (fd = open(file, O_RDONLY, 0))) { 793 puts("<p>You specified an invalid manual file.</p>"); 794 return; 795 } 796 797 mchars_alloc(); 798 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1, 799 MANDOCLEVEL_BADARG, NULL, req->q.manpath); 800 mparse_readfd(mp, fd, file); 801 close(fd); 802 803 memset(&conf, 0, sizeof(conf)); 804 conf.fragment = 1; 805 usepath = strcmp(req->q.manpath, req->p[0]); 806 mandoc_asprintf(&conf.man, "/%s%s%%N.%%S", 807 usepath ? req->q.manpath : "", usepath ? "/" : ""); 808 809 mparse_result(mp, &man, NULL); 810 if (man == NULL) { 811 warnx("fatal mandoc error: %s/%s", req->q.manpath, file); 812 pg_error_internal(); 813 mparse_free(mp); 814 mchars_free(); 815 return; 816 } 817 818 vp = html_alloc(&conf); 819 820 if (man->macroset == MACROSET_MDOC) { 821 mdoc_validate(man); 822 html_mdoc(vp, man); 823 } else { 824 man_validate(man); 825 html_man(vp, man); 826 } 827 828 html_free(vp); 829 mparse_free(mp); 830 mchars_free(); 831 free(conf.man); 832 } 833 834 static void 835 resp_show(const struct req *req, const char *file) 836 { 837 838 if ('.' == file[0] && '/' == file[1]) 839 file += 2; 840 841 if ('c' == *file) 842 resp_catman(req, file); 843 else 844 resp_format(req, file); 845 } 846 847 static void 848 pg_show(struct req *req, const char *fullpath) 849 { 850 char *manpath; 851 const char *file; 852 853 if ((file = strchr(fullpath, '/')) == NULL) { 854 pg_error_badrequest( 855 "You did not specify a page to show."); 856 return; 857 } 858 manpath = mandoc_strndup(fullpath, file - fullpath); 859 file++; 860 861 if ( ! validate_manpath(req, manpath)) { 862 pg_error_badrequest( 863 "You specified an invalid manpath."); 864 free(manpath); 865 return; 866 } 867 868 /* 869 * Begin by chdir()ing into the manpath. 870 * This way we can pick up the database files, which are 871 * relative to the manpath root. 872 */ 873 874 if (chdir(manpath) == -1) { 875 warn("chdir %s", manpath); 876 pg_error_internal(); 877 free(manpath); 878 return; 879 } 880 free(manpath); 881 882 if ( ! validate_filename(file)) { 883 pg_error_badrequest( 884 "You specified an invalid manual file."); 885 return; 886 } 887 888 resp_begin_html(200, NULL); 889 resp_searchform(req, FOCUS_NONE); 890 resp_show(req, file); 891 resp_end_html(); 892 } 893 894 static void 895 pg_search(const struct req *req) 896 { 897 struct mansearch search; 898 struct manpaths paths; 899 struct manpage *res; 900 char **argv; 901 char *query, *rp, *wp; 902 size_t ressz; 903 int argc; 904 905 /* 906 * Begin by chdir()ing into the root of the manpath. 907 * This way we can pick up the database files, which are 908 * relative to the manpath root. 909 */ 910 911 if (chdir(req->q.manpath) == -1) { 912 warn("chdir %s", req->q.manpath); 913 pg_error_internal(); 914 return; 915 } 916 917 search.arch = req->q.arch; 918 search.sec = req->q.sec; 919 search.outkey = "Nd"; 920 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 921 search.firstmatch = 1; 922 923 paths.sz = 1; 924 paths.paths = mandoc_malloc(sizeof(char *)); 925 paths.paths[0] = mandoc_strdup("."); 926 927 /* 928 * Break apart at spaces with backslash-escaping. 929 */ 930 931 argc = 0; 932 argv = NULL; 933 rp = query = mandoc_strdup(req->q.query); 934 for (;;) { 935 while (isspace((unsigned char)*rp)) 936 rp++; 937 if (*rp == '\0') 938 break; 939 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 940 argv[argc++] = wp = rp; 941 for (;;) { 942 if (isspace((unsigned char)*rp)) { 943 *wp = '\0'; 944 rp++; 945 break; 946 } 947 if (rp[0] == '\\' && rp[1] != '\0') 948 rp++; 949 if (wp != rp) 950 *wp = *rp; 951 if (*rp == '\0') 952 break; 953 wp++; 954 rp++; 955 } 956 } 957 958 if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz)) 959 pg_noresult(req, "You entered an invalid query."); 960 else if (0 == ressz) 961 pg_noresult(req, "No results found."); 962 else 963 pg_searchres(req, res, ressz); 964 965 free(query); 966 mansearch_free(res, ressz); 967 free(paths.paths[0]); 968 free(paths.paths); 969 } 970 971 int 972 main(void) 973 { 974 struct req req; 975 struct itimerval itimer; 976 const char *path; 977 const char *querystring; 978 int i; 979 980 /* Poor man's ReDoS mitigation. */ 981 982 itimer.it_value.tv_sec = 2; 983 itimer.it_value.tv_usec = 0; 984 itimer.it_interval.tv_sec = 2; 985 itimer.it_interval.tv_usec = 0; 986 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 987 warn("setitimer"); 988 pg_error_internal(); 989 return EXIT_FAILURE; 990 } 991 992 /* 993 * First we change directory into the MAN_DIR so that 994 * subsequent scanning for manpath directories is rooted 995 * relative to the same position. 996 */ 997 998 if (chdir(MAN_DIR) == -1) { 999 warn("MAN_DIR: %s", MAN_DIR); 1000 pg_error_internal(); 1001 return EXIT_FAILURE; 1002 } 1003 1004 memset(&req, 0, sizeof(struct req)); 1005 req.q.equal = 1; 1006 parse_manpath_conf(&req); 1007 1008 /* Parse the path info and the query string. */ 1009 1010 if ((path = getenv("PATH_INFO")) == NULL) 1011 path = ""; 1012 else if (*path == '/') 1013 path++; 1014 1015 if (*path != '\0') { 1016 parse_path_info(&req, path); 1017 if (req.q.manpath == NULL || access(path, F_OK) == -1) 1018 path = ""; 1019 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1020 parse_query_string(&req, querystring); 1021 1022 /* Validate parsed data and add defaults. */ 1023 1024 if (req.q.manpath == NULL) 1025 req.q.manpath = mandoc_strdup(req.p[0]); 1026 else if ( ! validate_manpath(&req, req.q.manpath)) { 1027 pg_error_badrequest( 1028 "You specified an invalid manpath."); 1029 return EXIT_FAILURE; 1030 } 1031 1032 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) { 1033 pg_error_badrequest( 1034 "You specified an invalid architecture."); 1035 return EXIT_FAILURE; 1036 } 1037 1038 /* Dispatch to the three different pages. */ 1039 1040 if ('\0' != *path) 1041 pg_show(&req, path); 1042 else if (NULL != req.q.query) 1043 pg_search(&req); 1044 else 1045 pg_index(&req); 1046 1047 free(req.q.manpath); 1048 free(req.q.arch); 1049 free(req.q.sec); 1050 free(req.q.query); 1051 for (i = 0; i < (int)req.psz; i++) 1052 free(req.p[i]); 1053 free(req.p); 1054 return EXIT_SUCCESS; 1055 } 1056 1057 /* 1058 * If PATH_INFO is not a file name, translate it to a query. 1059 */ 1060 static void 1061 parse_path_info(struct req *req, const char *path) 1062 { 1063 char *dir[4]; 1064 int i; 1065 1066 req->isquery = 0; 1067 req->q.equal = 1; 1068 req->q.manpath = mandoc_strdup(path); 1069 req->q.arch = NULL; 1070 1071 /* Mandatory manual page name. */ 1072 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) { 1073 req->q.query = req->q.manpath; 1074 req->q.manpath = NULL; 1075 } else 1076 *req->q.query++ = '\0'; 1077 1078 /* Optional trailing section. */ 1079 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) { 1080 if(isdigit((unsigned char)req->q.sec[1])) { 1081 *req->q.sec++ = '\0'; 1082 req->q.sec = mandoc_strdup(req->q.sec); 1083 } else 1084 req->q.sec = NULL; 1085 } 1086 1087 /* Handle the case of name[.section] only. */ 1088 if (req->q.manpath == NULL) 1089 return; 1090 req->q.query = mandoc_strdup(req->q.query); 1091 1092 /* Split directory components. */ 1093 dir[i = 0] = req->q.manpath; 1094 while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) { 1095 if (++i == 3) { 1096 pg_error_badrequest( 1097 "You specified too many directory components."); 1098 exit(EXIT_FAILURE); 1099 } 1100 *dir[i]++ = '\0'; 1101 } 1102 1103 /* Optional manpath. */ 1104 if ((i = validate_manpath(req, req->q.manpath)) == 0) 1105 req->q.manpath = NULL; 1106 else if (dir[1] == NULL) 1107 return; 1108 1109 /* Optional section. */ 1110 if (strncmp(dir[i], "man", 3) == 0) { 1111 free(req->q.sec); 1112 req->q.sec = mandoc_strdup(dir[i++] + 3); 1113 } 1114 if (dir[i] == NULL) { 1115 if (req->q.manpath == NULL) 1116 free(dir[0]); 1117 return; 1118 } 1119 if (dir[i + 1] != NULL) { 1120 pg_error_badrequest( 1121 "You specified an invalid directory component."); 1122 exit(EXIT_FAILURE); 1123 } 1124 1125 /* Optional architecture. */ 1126 if (i) { 1127 req->q.arch = mandoc_strdup(dir[i]); 1128 if (req->q.manpath == NULL) 1129 free(dir[0]); 1130 } else 1131 req->q.arch = dir[0]; 1132 } 1133 1134 /* 1135 * Scan for indexable paths. 1136 */ 1137 static void 1138 parse_manpath_conf(struct req *req) 1139 { 1140 FILE *fp; 1141 char *dp; 1142 size_t dpsz; 1143 ssize_t len; 1144 1145 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1146 warn("%s/manpath.conf", MAN_DIR); 1147 pg_error_internal(); 1148 exit(EXIT_FAILURE); 1149 } 1150 1151 dp = NULL; 1152 dpsz = 0; 1153 1154 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1155 if (dp[len - 1] == '\n') 1156 dp[--len] = '\0'; 1157 req->p = mandoc_realloc(req->p, 1158 (req->psz + 1) * sizeof(char *)); 1159 if ( ! validate_urifrag(dp)) { 1160 warnx("%s/manpath.conf contains " 1161 "unsafe path \"%s\"", MAN_DIR, dp); 1162 pg_error_internal(); 1163 exit(EXIT_FAILURE); 1164 } 1165 if (strchr(dp, '/') != NULL) { 1166 warnx("%s/manpath.conf contains " 1167 "path with slash \"%s\"", MAN_DIR, dp); 1168 pg_error_internal(); 1169 exit(EXIT_FAILURE); 1170 } 1171 req->p[req->psz++] = dp; 1172 dp = NULL; 1173 dpsz = 0; 1174 } 1175 free(dp); 1176 1177 if (req->p == NULL) { 1178 warnx("%s/manpath.conf is empty", MAN_DIR); 1179 pg_error_internal(); 1180 exit(EXIT_FAILURE); 1181 } 1182 } 1183