1 /* $Vendor-Id: cgi.c,v 1.39 2011/12/25 17:49:52 kristaps Exp $ */ 2 /* 3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #ifdef HAVE_CONFIG_H 18 #include "config.h" 19 #endif 20 21 #include <sys/param.h> 22 #include <sys/wait.h> 23 24 #include <assert.h> 25 #include <ctype.h> 26 #include <errno.h> 27 #include <dirent.h> 28 #include <fcntl.h> 29 #include <limits.h> 30 #include <regex.h> 31 #include <stdio.h> 32 #include <stdarg.h> 33 #include <stdint.h> 34 #include <stdlib.h> 35 #include <string.h> 36 #include <unistd.h> 37 38 #include "apropos_db.h" 39 #include "mandoc.h" 40 #include "mdoc.h" 41 #include "man.h" 42 #include "main.h" 43 #include "manpath.h" 44 #include "mandocdb.h" 45 46 #ifdef __linux__ 47 # include <db_185.h> 48 #else 49 # include <db.h> 50 #endif 51 52 enum page { 53 PAGE_INDEX, 54 PAGE_SEARCH, 55 PAGE_SHOW, 56 PAGE__MAX 57 }; 58 59 struct paths { 60 char *name; 61 char *path; 62 }; 63 64 /* 65 * A query as passed to the search function. 66 */ 67 struct query { 68 const char *arch; /* architecture */ 69 const char *sec; /* manual section */ 70 const char *expr; /* unparsed expression string */ 71 int manroot; /* manroot index (or -1)*/ 72 int whatis; /* whether whatis mode */ 73 int legacy; /* whether legacy mode */ 74 }; 75 76 struct req { 77 struct query q; 78 struct paths *p; 79 size_t psz; 80 enum page page; 81 }; 82 83 static int atou(const char *, unsigned *); 84 static void catman(const struct req *, const char *); 85 static int cmp(const void *, const void *); 86 static void format(const struct req *, const char *); 87 static void html_print(const char *); 88 static void html_printquery(const struct req *); 89 static void html_putchar(char); 90 static int http_decode(char *); 91 static void http_parse(struct req *, char *); 92 static void http_print(const char *); 93 static void http_putchar(char); 94 static void http_printquery(const struct req *); 95 static int pathstop(DIR *); 96 static void pathgen(DIR *, char *, struct req *); 97 static void pg_index(const struct req *, char *); 98 static void pg_search(const struct req *, char *); 99 static void pg_show(const struct req *, char *); 100 static void resp_bad(void); 101 static void resp_baddb(void); 102 static void resp_error400(void); 103 static void resp_error404(const char *); 104 static void resp_begin_html(int, const char *); 105 static void resp_begin_http(int, const char *); 106 static void resp_end_html(void); 107 static void resp_index(const struct req *); 108 static void resp_search(struct res *, size_t, void *); 109 static void resp_searchform(const struct req *); 110 111 static const char *progname; /* cgi script name */ 112 static const char *cache; /* cache directory */ 113 static const char *css; /* css directory */ 114 static const char *host; /* hostname */ 115 116 static const char * const pages[PAGE__MAX] = { 117 "index", /* PAGE_INDEX */ 118 "search", /* PAGE_SEARCH */ 119 "show", /* PAGE_SHOW */ 120 }; 121 122 /* 123 * This is just OpenBSD's strtol(3) suggestion. 124 * I use it instead of strtonum(3) for portability's sake. 125 */ 126 static int 127 atou(const char *buf, unsigned *v) 128 { 129 char *ep; 130 long lval; 131 132 errno = 0; 133 lval = strtol(buf, &ep, 10); 134 if (buf[0] == '\0' || *ep != '\0') 135 return(0); 136 if ((errno == ERANGE && (lval == LONG_MAX || 137 lval == LONG_MIN)) || 138 (lval > INT_MAX || lval < 0)) 139 return(0); 140 141 *v = (unsigned int)lval; 142 return(1); 143 } 144 145 /* 146 * Print a character, escaping HTML along the way. 147 * This will pass non-ASCII straight to output: be warned! 148 */ 149 static void 150 html_putchar(char c) 151 { 152 153 switch (c) { 154 case ('"'): 155 printf(""e;"); 156 break; 157 case ('&'): 158 printf("&"); 159 break; 160 case ('>'): 161 printf(">"); 162 break; 163 case ('<'): 164 printf("<"); 165 break; 166 default: 167 putchar((unsigned char)c); 168 break; 169 } 170 } 171 static void 172 http_printquery(const struct req *req) 173 { 174 175 printf("&expr="); 176 http_print(req->q.expr ? req->q.expr : ""); 177 printf("&sec="); 178 http_print(req->q.sec ? req->q.sec : ""); 179 printf("&arch="); 180 http_print(req->q.arch ? req->q.arch : ""); 181 } 182 183 184 static void 185 html_printquery(const struct req *req) 186 { 187 188 printf("&expr="); 189 html_print(req->q.expr ? req->q.expr : ""); 190 printf("&sec="); 191 html_print(req->q.sec ? req->q.sec : ""); 192 printf("&arch="); 193 html_print(req->q.arch ? req->q.arch : ""); 194 } 195 196 static void 197 http_print(const char *p) 198 { 199 200 if (NULL == p) 201 return; 202 while ('\0' != *p) 203 http_putchar(*p++); 204 } 205 206 /* 207 * Call through to html_putchar(). 208 * Accepts NULL strings. 209 */ 210 static void 211 html_print(const char *p) 212 { 213 214 if (NULL == p) 215 return; 216 while ('\0' != *p) 217 html_putchar(*p++); 218 } 219 220 /* 221 * Parse out key-value pairs from an HTTP request variable. 222 * This can be either a cookie or a POST/GET string, although man.cgi 223 * uses only GET for simplicity. 224 */ 225 static void 226 http_parse(struct req *req, char *p) 227 { 228 char *key, *val, *manroot; 229 int i, legacy; 230 231 memset(&req->q, 0, sizeof(struct query)); 232 233 req->q.whatis = 1; 234 legacy = -1; 235 manroot = NULL; 236 237 while ('\0' != *p) { 238 key = p; 239 val = NULL; 240 241 p += (int)strcspn(p, ";&"); 242 if ('\0' != *p) 243 *p++ = '\0'; 244 if (NULL != (val = strchr(key, '='))) 245 *val++ = '\0'; 246 247 if ('\0' == *key || NULL == val || '\0' == *val) 248 continue; 249 250 /* Just abort handling. */ 251 252 if ( ! http_decode(key)) 253 break; 254 if (NULL != val && ! http_decode(val)) 255 break; 256 257 if (0 == strcmp(key, "expr")) 258 req->q.expr = val; 259 else if (0 == strcmp(key, "query")) 260 req->q.expr = val; 261 else if (0 == strcmp(key, "sec")) 262 req->q.sec = val; 263 else if (0 == strcmp(key, "sektion")) 264 req->q.sec = val; 265 else if (0 == strcmp(key, "arch")) 266 req->q.arch = val; 267 else if (0 == strcmp(key, "manpath")) 268 manroot = val; 269 else if (0 == strcmp(key, "apropos")) 270 legacy = 0 == strcmp(val, "0"); 271 else if (0 == strcmp(key, "op")) 272 req->q.whatis = 0 == strcasecmp(val, "whatis"); 273 } 274 275 /* Test for old man.cgi compatibility mode. */ 276 277 if (legacy == 0) { 278 req->q.whatis = 0; 279 req->q.legacy = 1; 280 } else if (legacy > 0) { 281 req->q.legacy = 1; 282 req->q.whatis = 1; 283 } 284 285 /* 286 * Section "0" means no section when in legacy mode. 287 * For some man.cgi scripts, "default" arch is none. 288 */ 289 290 if (req->q.legacy && NULL != req->q.sec) 291 if (0 == strcmp(req->q.sec, "0")) 292 req->q.sec = NULL; 293 if (req->q.legacy && NULL != req->q.arch) 294 if (0 == strcmp(req->q.arch, "default")) 295 req->q.arch = NULL; 296 297 /* Default to first manroot. */ 298 299 if (NULL != manroot) { 300 for (i = 0; i < (int)req->psz; i++) 301 if (0 == strcmp(req->p[i].name, manroot)) 302 break; 303 req->q.manroot = i < (int)req->psz ? i : -1; 304 } 305 } 306 307 static void 308 http_putchar(char c) 309 { 310 311 if (isalnum((unsigned char)c)) { 312 putchar((unsigned char)c); 313 return; 314 } else if (' ' == c) { 315 putchar('+'); 316 return; 317 } 318 printf("%%%.2x", c); 319 } 320 321 /* 322 * HTTP-decode a string. The standard explanation is that this turns 323 * "%4e+foo" into "n foo" in the regular way. This is done in-place 324 * over the allocated string. 325 */ 326 static int 327 http_decode(char *p) 328 { 329 char hex[3]; 330 int c; 331 332 hex[2] = '\0'; 333 334 for ( ; '\0' != *p; p++) { 335 if ('%' == *p) { 336 if ('\0' == (hex[0] = *(p + 1))) 337 return(0); 338 if ('\0' == (hex[1] = *(p + 2))) 339 return(0); 340 if (1 != sscanf(hex, "%x", &c)) 341 return(0); 342 if ('\0' == c) 343 return(0); 344 345 *p = (char)c; 346 memmove(p + 1, p + 3, strlen(p + 3) + 1); 347 } else 348 *p = '+' == *p ? ' ' : *p; 349 } 350 351 *p = '\0'; 352 return(1); 353 } 354 355 static void 356 resp_begin_http(int code, const char *msg) 357 { 358 359 if (200 != code) 360 printf("Status: %d %s\n", code, msg); 361 362 puts("Content-Type: text/html; charset=utf-8\n" 363 "Cache-Control: no-cache\n" 364 "Pragma: no-cache\n" 365 ""); 366 367 fflush(stdout); 368 } 369 370 static void 371 resp_begin_html(int code, const char *msg) 372 { 373 374 resp_begin_http(code, msg); 375 376 printf("<!DOCTYPE HTML PUBLIC " 377 " \"-//W3C//DTD HTML 4.01//EN\"" 378 " \"http://www.w3.org/TR/html4/strict.dtd\">\n" 379 "<HTML>\n" 380 "<HEAD>\n" 381 "<META HTTP-EQUIV=\"Content-Type\"" 382 " CONTENT=\"text/html; charset=utf-8\">\n" 383 "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\"" 384 " TYPE=\"text/css\" media=\"all\">\n" 385 "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\"" 386 " TYPE=\"text/css\" media=\"all\">\n" 387 "<TITLE>System Manpage Reference</TITLE>\n" 388 "</HEAD>\n" 389 "<BODY>\n" 390 "<!-- Begin page content. //-->\n", css, css); 391 } 392 393 static void 394 resp_end_html(void) 395 { 396 397 puts("</BODY>\n" 398 "</HTML>"); 399 } 400 401 static void 402 resp_searchform(const struct req *req) 403 { 404 int i; 405 406 puts("<!-- Begin search form. //-->"); 407 printf("<DIV ID=\"mancgi\">\n" 408 "<FORM ACTION=\"%s/search.html\" METHOD=\"get\">\n" 409 "<FIELDSET>\n" 410 "<LEGEND>Search Parameters</LEGEND>\n" 411 "<INPUT TYPE=\"submit\" NAME=\"op\"" 412 " VALUE=\"Whatis\"> or \n" 413 "<INPUT TYPE=\"submit\" NAME=\"op\"" 414 " VALUE=\"apropos\"> for manuals satisfying \n" 415 "<INPUT TYPE=\"text\" NAME=\"expr\" VALUE=\"", 416 progname); 417 html_print(req->q.expr ? req->q.expr : ""); 418 printf("\">, section " 419 "<INPUT TYPE=\"text\"" 420 " SIZE=\"4\" NAME=\"sec\" VALUE=\""); 421 html_print(req->q.sec ? req->q.sec : ""); 422 printf("\">, arch " 423 "<INPUT TYPE=\"text\"" 424 " SIZE=\"8\" NAME=\"arch\" VALUE=\""); 425 html_print(req->q.arch ? req->q.arch : ""); 426 printf("\">"); 427 if (req->psz > 1) { 428 puts(", <SELECT NAME=\"manpath\">"); 429 for (i = 0; i < (int)req->psz; i++) { 430 printf("<OPTION %s VALUE=\"", 431 (i == req->q.manroot) || 432 (0 == i && -1 == req->q.manroot) ? 433 "SELECTED=\"selected\"" : ""); 434 html_print(req->p[i].name); 435 printf("\">"); 436 html_print(req->p[i].name); 437 puts("</OPTION>"); 438 } 439 puts("</SELECT>"); 440 } 441 puts(".\n" 442 "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n" 443 "</FIELDSET>\n" 444 "</FORM>\n" 445 "</DIV>"); 446 puts("<!-- End search form. //-->"); 447 } 448 449 static void 450 resp_index(const struct req *req) 451 { 452 453 resp_begin_html(200, NULL); 454 resp_searchform(req); 455 resp_end_html(); 456 } 457 458 static void 459 resp_error400(void) 460 { 461 462 resp_begin_html(400, "Query Malformed"); 463 printf("<H1>Malformed Query</H1>\n" 464 "<P>\n" 465 "The query your entered was malformed.\n" 466 "Try again from the\n" 467 "<A HREF=\"%s/index.html\">main page</A>.\n" 468 "</P>", progname); 469 resp_end_html(); 470 } 471 472 static void 473 resp_error404(const char *page) 474 { 475 476 resp_begin_html(404, "Not Found"); 477 puts("<H1>Page Not Found</H1>\n" 478 "<P>\n" 479 "The page you're looking for, "); 480 printf("<B>"); 481 html_print(page); 482 printf("</B>,\n" 483 "could not be found.\n" 484 "Try searching from the\n" 485 "<A HREF=\"%s/index.html\">main page</A>.\n" 486 "</P>", progname); 487 resp_end_html(); 488 } 489 490 static void 491 resp_bad(void) 492 { 493 resp_begin_html(500, "Internal Server Error"); 494 puts("<P>Generic badness happened.</P>"); 495 resp_end_html(); 496 } 497 498 static void 499 resp_baddb(void) 500 { 501 502 resp_begin_html(500, "Internal Server Error"); 503 puts("<P>Your database is broken.</P>"); 504 resp_end_html(); 505 } 506 507 static void 508 resp_search(struct res *r, size_t sz, void *arg) 509 { 510 int i; 511 const struct req *req; 512 513 req = (const struct req *)arg; 514 515 if (sz > 0) 516 assert(req->q.manroot >= 0); 517 518 if (1 == sz) { 519 /* 520 * If we have just one result, then jump there now 521 * without any delay. 522 */ 523 puts("Status: 303 See Other"); 524 printf("Location: http://%s%s/show/%d/%u/%u.html?", 525 host, progname, req->q.manroot, 526 r[0].volume, r[0].rec); 527 http_printquery(req); 528 puts("\n" 529 "Content-Type: text/html; charset=utf-8\n"); 530 return; 531 } 532 533 qsort(r, sz, sizeof(struct res), cmp); 534 535 resp_begin_html(200, NULL); 536 resp_searchform(req); 537 538 puts("<DIV CLASS=\"results\">"); 539 540 if (0 == sz) { 541 printf("<P>\n" 542 "No %s results found.\n", 543 req->q.whatis ? "whatis" : "apropos"); 544 if (req->q.whatis) { 545 printf("(Try " 546 "<A HREF=\"%s/search.html?op=apropos", 547 progname); 548 html_printquery(req); 549 puts("\">apropos</A>?)"); 550 } 551 puts("</P>"); 552 puts("</DIV>"); 553 resp_end_html(); 554 return; 555 } 556 557 puts("<TABLE>"); 558 559 for (i = 0; i < (int)sz; i++) { 560 printf("<TR>\n" 561 "<TD CLASS=\"title\">\n" 562 "<A HREF=\"%s/show/%d/%u/%u.html?", 563 progname, req->q.manroot, 564 r[i].volume, r[i].rec); 565 html_printquery(req); 566 printf("\">"); 567 html_print(r[i].title); 568 putchar('('); 569 html_print(r[i].cat); 570 if (r[i].arch && '\0' != *r[i].arch) { 571 putchar('/'); 572 html_print(r[i].arch); 573 } 574 printf(")</A>\n" 575 "</TD>\n" 576 "<TD CLASS=\"desc\">"); 577 html_print(r[i].desc); 578 puts("</TD>\n" 579 "</TR>"); 580 } 581 582 puts("</TABLE>\n" 583 "</DIV>"); 584 resp_end_html(); 585 } 586 587 /* ARGSUSED */ 588 static void 589 pg_index(const struct req *req, char *path) 590 { 591 592 resp_index(req); 593 } 594 595 static void 596 catman(const struct req *req, const char *file) 597 { 598 FILE *f; 599 size_t len; 600 int i; 601 char *p; 602 int italic, bold; 603 604 if (NULL == (f = fopen(file, "r"))) { 605 resp_baddb(); 606 return; 607 } 608 609 resp_begin_html(200, NULL); 610 resp_searchform(req); 611 puts("<DIV CLASS=\"catman\">\n" 612 "<PRE>"); 613 614 while (NULL != (p = fgetln(f, &len))) { 615 bold = italic = 0; 616 for (i = 0; i < (int)len - 1; i++) { 617 /* 618 * This means that the catpage is out of state. 619 * Ignore it and keep going (although the 620 * catpage is bogus). 621 */ 622 623 if ('\b' == p[i] || '\n' == p[i]) 624 continue; 625 626 /* 627 * Print a regular character. 628 * Close out any bold/italic scopes. 629 * If we're in back-space mode, make sure we'll 630 * have something to enter when we backspace. 631 */ 632 633 if ('\b' != p[i + 1]) { 634 if (italic) 635 printf("</I>"); 636 if (bold) 637 printf("</B>"); 638 italic = bold = 0; 639 html_putchar(p[i]); 640 continue; 641 } else if (i + 2 >= (int)len) 642 continue; 643 644 /* Italic mode. */ 645 646 if ('_' == p[i]) { 647 if (bold) 648 printf("</B>"); 649 if ( ! italic) 650 printf("<I>"); 651 bold = 0; 652 italic = 1; 653 i += 2; 654 html_putchar(p[i]); 655 continue; 656 } 657 658 /* 659 * Handle funny behaviour troff-isms. 660 * These grok'd from the original man2html.c. 661 */ 662 663 if (('+' == p[i] && 'o' == p[i + 2]) || 664 ('o' == p[i] && '+' == p[i + 2]) || 665 ('|' == p[i] && '=' == p[i + 2]) || 666 ('=' == p[i] && '|' == p[i + 2]) || 667 ('*' == p[i] && '=' == p[i + 2]) || 668 ('=' == p[i] && '*' == p[i + 2]) || 669 ('*' == p[i] && '|' == p[i + 2]) || 670 ('|' == p[i] && '*' == p[i + 2])) { 671 if (italic) 672 printf("</I>"); 673 if (bold) 674 printf("</B>"); 675 italic = bold = 0; 676 putchar('*'); 677 i += 2; 678 continue; 679 } else if (('|' == p[i] && '-' == p[i + 2]) || 680 ('-' == p[i] && '|' == p[i + 1]) || 681 ('+' == p[i] && '-' == p[i + 1]) || 682 ('-' == p[i] && '+' == p[i + 1]) || 683 ('+' == p[i] && '|' == p[i + 1]) || 684 ('|' == p[i] && '+' == p[i + 1])) { 685 if (italic) 686 printf("</I>"); 687 if (bold) 688 printf("</B>"); 689 italic = bold = 0; 690 putchar('+'); 691 i += 2; 692 continue; 693 } 694 695 /* Bold mode. */ 696 697 if (italic) 698 printf("</I>"); 699 if ( ! bold) 700 printf("<B>"); 701 bold = 1; 702 italic = 0; 703 i += 2; 704 html_putchar(p[i]); 705 } 706 707 /* 708 * Clean up the last character. 709 * We can get to a newline; don't print that. 710 */ 711 712 if (italic) 713 printf("</I>"); 714 if (bold) 715 printf("</B>"); 716 717 if (i == (int)len - 1 && '\n' != p[i]) 718 html_putchar(p[i]); 719 720 putchar('\n'); 721 } 722 723 puts("</PRE>\n" 724 "</DIV>\n" 725 "</BODY>\n" 726 "</HTML>"); 727 728 fclose(f); 729 } 730 731 static void 732 format(const struct req *req, const char *file) 733 { 734 struct mparse *mp; 735 int fd; 736 struct mdoc *mdoc; 737 struct man *man; 738 void *vp; 739 enum mandoclevel rc; 740 char opts[MAXPATHLEN + 128]; 741 742 if (-1 == (fd = open(file, O_RDONLY, 0))) { 743 resp_baddb(); 744 return; 745 } 746 747 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL); 748 rc = mparse_readfd(mp, fd, file); 749 close(fd); 750 751 if (rc >= MANDOCLEVEL_FATAL) { 752 resp_baddb(); 753 return; 754 } 755 756 snprintf(opts, sizeof(opts), "fragment," 757 "man=%s/search.html?sec=%%S&expr=%%N," 758 /*"includes=/cgi-bin/man.cgi/usr/include/%%I"*/, 759 progname); 760 761 mparse_result(mp, &mdoc, &man); 762 if (NULL == man && NULL == mdoc) { 763 resp_baddb(); 764 mparse_free(mp); 765 return; 766 } 767 768 resp_begin_html(200, NULL); 769 resp_searchform(req); 770 771 vp = html_alloc(opts); 772 773 if (NULL != mdoc) 774 html_mdoc(vp, mdoc); 775 else 776 html_man(vp, man); 777 778 puts("</BODY>\n" 779 "</HTML>"); 780 781 html_free(vp); 782 mparse_free(mp); 783 } 784 785 static void 786 pg_show(const struct req *req, char *path) 787 { 788 struct manpaths ps; 789 size_t sz; 790 char *sub; 791 char file[MAXPATHLEN]; 792 const char *cp; 793 int rc, catm; 794 unsigned int vol, rec, mr; 795 DB *idx; 796 DBT key, val; 797 798 idx = NULL; 799 800 /* Parse out mroot, volume, and record from the path. */ 801 802 if (NULL == path || NULL == (sub = strchr(path, '/'))) { 803 resp_error400(); 804 return; 805 } 806 *sub++ = '\0'; 807 if ( ! atou(path, &mr)) { 808 resp_error400(); 809 return; 810 } 811 path = sub; 812 if (NULL == (sub = strchr(path, '/'))) { 813 resp_error400(); 814 return; 815 } 816 *sub++ = '\0'; 817 if ( ! atou(path, &vol) || ! atou(sub, &rec)) { 818 resp_error400(); 819 return; 820 } else if (mr >= (unsigned int)req->psz) { 821 resp_error400(); 822 return; 823 } 824 825 /* 826 * Begin by chdir()ing into the manroot. 827 * This way we can pick up the database files, which are 828 * relative to the manpath root. 829 */ 830 831 if (-1 == chdir(req->p[(int)mr].path)) { 832 perror(req->p[(int)mr].path); 833 resp_baddb(); 834 return; 835 } 836 837 memset(&ps, 0, sizeof(struct manpaths)); 838 manpath_manconf(&ps, "etc/catman.conf"); 839 840 if (vol >= (unsigned int)ps.sz) { 841 resp_error400(); 842 goto out; 843 } 844 845 sz = strlcpy(file, ps.paths[vol], MAXPATHLEN); 846 assert(sz < MAXPATHLEN); 847 strlcat(file, "/", MAXPATHLEN); 848 strlcat(file, MANDOC_IDX, MAXPATHLEN); 849 850 /* Open the index recno(3) database. */ 851 852 idx = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL); 853 if (NULL == idx) { 854 perror(file); 855 resp_baddb(); 856 goto out; 857 } 858 859 key.data = &rec; 860 key.size = 4; 861 862 if (0 != (rc = (*idx->get)(idx, &key, &val, 0))) { 863 rc < 0 ? resp_baddb() : resp_error400(); 864 goto out; 865 } else if (0 == val.size) { 866 resp_baddb(); 867 goto out; 868 } 869 870 cp = (char *)val.data; 871 catm = 'c' == *cp++; 872 873 if (NULL == memchr(cp, '\0', val.size - 1)) 874 resp_baddb(); 875 else { 876 file[(int)sz] = '\0'; 877 strlcat(file, "/", MAXPATHLEN); 878 strlcat(file, cp, MAXPATHLEN); 879 if (catm) 880 catman(req, file); 881 else 882 format(req, file); 883 } 884 out: 885 if (idx) 886 (*idx->close)(idx); 887 manpath_free(&ps); 888 } 889 890 static void 891 pg_search(const struct req *req, char *path) 892 { 893 size_t tt; 894 struct manpaths ps; 895 int i, sz, rc; 896 const char *ep, *start; 897 char **cp; 898 struct opts opt; 899 struct expr *expr; 900 901 if (req->q.manroot < 0 || 0 == req->psz) { 902 resp_search(NULL, 0, (void *)req); 903 return; 904 } 905 906 memset(&opt, 0, sizeof(struct opts)); 907 908 ep = req->q.expr; 909 opt.arch = req->q.arch; 910 opt.cat = req->q.sec; 911 rc = -1; 912 sz = 0; 913 cp = NULL; 914 915 /* 916 * Begin by chdir()ing into the root of the manpath. 917 * This way we can pick up the database files, which are 918 * relative to the manpath root. 919 */ 920 921 assert(req->q.manroot < (int)req->psz); 922 if (-1 == (chdir(req->p[req->q.manroot].path))) { 923 perror(req->p[req->q.manroot].path); 924 resp_search(NULL, 0, (void *)req); 925 return; 926 } 927 928 memset(&ps, 0, sizeof(struct manpaths)); 929 manpath_manconf(&ps, "etc/catman.conf"); 930 931 /* 932 * Poor man's tokenisation: just break apart by spaces. 933 * Yes, this is half-ass. But it works for now. 934 */ 935 936 while (ep && isspace((unsigned char)*ep)) 937 ep++; 938 939 while (ep && '\0' != *ep) { 940 cp = mandoc_realloc(cp, (sz + 1) * sizeof(char *)); 941 start = ep; 942 while ('\0' != *ep && ! isspace((unsigned char)*ep)) 943 ep++; 944 cp[sz] = mandoc_malloc((ep - start) + 1); 945 memcpy(cp[sz], start, ep - start); 946 cp[sz++][ep - start] = '\0'; 947 while (isspace((unsigned char)*ep)) 948 ep++; 949 } 950 951 /* 952 * Pump down into apropos backend. 953 * The resp_search() function is called with the results. 954 */ 955 956 expr = req->q.whatis ? 957 termcomp(sz, cp, &tt) : exprcomp(sz, cp, &tt); 958 959 if (NULL != expr) 960 rc = apropos_search 961 (ps.sz, ps.paths, &opt, 962 expr, tt, (void *)req, resp_search); 963 964 /* ...unless errors occured. */ 965 966 if (0 == rc) 967 resp_baddb(); 968 else if (-1 == rc) 969 resp_search(NULL, 0, (void *)req); 970 971 for (i = 0; i < sz; i++) 972 free(cp[i]); 973 974 free(cp); 975 exprfree(expr); 976 manpath_free(&ps); 977 } 978 979 int 980 main(void) 981 { 982 int i; 983 char buf[MAXPATHLEN]; 984 DIR *cwd; 985 struct req req; 986 char *p, *path, *subpath; 987 988 /* Scan our run-time environment. */ 989 990 if (NULL == (cache = getenv("CACHE_DIR"))) 991 cache = "/cache/man.cgi"; 992 993 if (NULL == (progname = getenv("SCRIPT_NAME"))) 994 progname = ""; 995 996 if (NULL == (css = getenv("CSS_DIR"))) 997 css = ""; 998 999 if (NULL == (host = getenv("HTTP_HOST"))) 1000 host = "localhost"; 1001 1002 /* 1003 * First we change directory into the cache directory so that 1004 * subsequent scanning for manpath directories is rooted 1005 * relative to the same position. 1006 */ 1007 1008 if (-1 == chdir(cache)) { 1009 perror(cache); 1010 resp_bad(); 1011 return(EXIT_FAILURE); 1012 } else if (NULL == (cwd = opendir(cache))) { 1013 perror(cache); 1014 resp_bad(); 1015 return(EXIT_FAILURE); 1016 } 1017 1018 memset(&req, 0, sizeof(struct req)); 1019 1020 strlcpy(buf, ".", MAXPATHLEN); 1021 pathgen(cwd, buf, &req); 1022 closedir(cwd); 1023 1024 /* Next parse out the query string. */ 1025 1026 if (NULL != (p = getenv("QUERY_STRING"))) 1027 http_parse(&req, p); 1028 1029 /* 1030 * Now juggle paths to extract information. 1031 * We want to extract our filetype (the file suffix), the 1032 * initial path component, then the trailing component(s). 1033 * Start with leading subpath component. 1034 */ 1035 1036 subpath = path = NULL; 1037 req.page = PAGE__MAX; 1038 1039 if (NULL == (path = getenv("PATH_INFO")) || '\0' == *path) 1040 req.page = PAGE_INDEX; 1041 1042 if (NULL != path && '/' == *path && '\0' == *++path) 1043 req.page = PAGE_INDEX; 1044 1045 /* Strip file suffix. */ 1046 1047 if (NULL != path && NULL != (p = strrchr(path, '.'))) 1048 if (NULL != p && NULL == strchr(p, '/')) 1049 *p++ = '\0'; 1050 1051 /* Resolve subpath component. */ 1052 1053 if (NULL != path && NULL != (subpath = strchr(path, '/'))) 1054 *subpath++ = '\0'; 1055 1056 /* Map path into one we recognise. */ 1057 1058 if (NULL != path && '\0' != *path) 1059 for (i = 0; i < (int)PAGE__MAX; i++) 1060 if (0 == strcmp(pages[i], path)) { 1061 req.page = (enum page)i; 1062 break; 1063 } 1064 1065 /* Route pages. */ 1066 1067 switch (req.page) { 1068 case (PAGE_INDEX): 1069 pg_index(&req, subpath); 1070 break; 1071 case (PAGE_SEARCH): 1072 pg_search(&req, subpath); 1073 break; 1074 case (PAGE_SHOW): 1075 pg_show(&req, subpath); 1076 break; 1077 default: 1078 resp_error404(path); 1079 break; 1080 } 1081 1082 for (i = 0; i < (int)req.psz; i++) { 1083 free(req.p[i].path); 1084 free(req.p[i].name); 1085 } 1086 1087 free(req.p); 1088 return(EXIT_SUCCESS); 1089 } 1090 1091 static int 1092 cmp(const void *p1, const void *p2) 1093 { 1094 1095 return(strcasecmp(((const struct res *)p1)->title, 1096 ((const struct res *)p2)->title)); 1097 } 1098 1099 /* 1100 * Check to see if an "etc" path consists of a catman.conf file. If it 1101 * does, that means that the path contains a tree created by catman(8) 1102 * and should be used for indexing. 1103 */ 1104 static int 1105 pathstop(DIR *dir) 1106 { 1107 struct dirent *d; 1108 1109 while (NULL != (d = readdir(dir))) 1110 if (DT_REG == d->d_type) 1111 if (0 == strcmp(d->d_name, "catman.conf")) 1112 return(1); 1113 1114 return(0); 1115 } 1116 1117 /* 1118 * Scan for indexable paths. 1119 * This adds all paths with "etc/catman.conf" to the buffer. 1120 */ 1121 static void 1122 pathgen(DIR *dir, char *path, struct req *req) 1123 { 1124 struct dirent *d; 1125 char *cp; 1126 DIR *cd; 1127 int rc; 1128 size_t sz, ssz; 1129 1130 sz = strlcat(path, "/", MAXPATHLEN); 1131 if (sz >= MAXPATHLEN) { 1132 fprintf(stderr, "%s: Path too long", path); 1133 return; 1134 } 1135 1136 /* 1137 * First, scan for the "etc" directory. 1138 * If it's found, then see if it should cause us to stop. This 1139 * happens when a catman.conf is found in the directory. 1140 */ 1141 1142 rc = 0; 1143 while (0 == rc && NULL != (d = readdir(dir))) { 1144 if (DT_DIR != d->d_type || strcmp(d->d_name, "etc")) 1145 continue; 1146 1147 path[(int)sz] = '\0'; 1148 ssz = strlcat(path, d->d_name, MAXPATHLEN); 1149 1150 if (ssz >= MAXPATHLEN) { 1151 fprintf(stderr, "%s: Path too long", path); 1152 return; 1153 } else if (NULL == (cd = opendir(path))) { 1154 perror(path); 1155 return; 1156 } 1157 1158 rc = pathstop(cd); 1159 closedir(cd); 1160 } 1161 1162 if (rc > 0) { 1163 /* This also strips the trailing slash. */ 1164 path[(int)--sz] = '\0'; 1165 req->p = mandoc_realloc 1166 (req->p, 1167 (req->psz + 1) * sizeof(struct paths)); 1168 /* 1169 * Strip out the leading "./" unless we're just a ".", 1170 * in which case use an empty string as our name. 1171 */ 1172 req->p[(int)req->psz].path = mandoc_strdup(path); 1173 req->p[(int)req->psz].name = 1174 cp = mandoc_strdup(path + (1 == sz ? 1 : 2)); 1175 req->psz++; 1176 /* 1177 * The name is just the path with all the slashes taken 1178 * out of it. Simple but effective. 1179 */ 1180 for ( ; '\0' != *cp; cp++) 1181 if ('/' == *cp) 1182 *cp = ' '; 1183 return; 1184 } 1185 1186 /* 1187 * If no etc/catman.conf was found, recursively enter child 1188 * directory and continue scanning. 1189 */ 1190 1191 rewinddir(dir); 1192 while (NULL != (d = readdir(dir))) { 1193 if (DT_DIR != d->d_type || '.' == d->d_name[0]) 1194 continue; 1195 1196 path[(int)sz] = '\0'; 1197 ssz = strlcat(path, d->d_name, MAXPATHLEN); 1198 1199 if (ssz >= MAXPATHLEN) { 1200 fprintf(stderr, "%s: Path too long", path); 1201 return; 1202 } else if (NULL == (cd = opendir(path))) { 1203 perror(path); 1204 return; 1205 } 1206 1207 pathgen(cd, path, req); 1208 closedir(cd); 1209 } 1210 } 1211