1 /* $Vendor-Id: cgi.c,v 1.42 2012/03/24 01:46:25 kristaps Exp $ */ 2 /* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #ifdef HAVE_CONFIG_H 18 #include "config.h" 19 #endif 20 21 #include <sys/param.h> 22 #include <sys/wait.h> 23 24 #include <assert.h> 25 #include <ctype.h> 26 #include <errno.h> 27 #include <dirent.h> 28 #include <fcntl.h> 29 #include <limits.h> 30 #include <regex.h> 31 #include <stdio.h> 32 #include <stdarg.h> 33 #include <stdint.h> 34 #include <stdlib.h> 35 #include <string.h> 36 #include <unistd.h> 37 38 #include "apropos_db.h" 39 #include "mandoc.h" 40 #include "mdoc.h" 41 #include "man.h" 42 #include "main.h" 43 #include "manpath.h" 44 #include "mandocdb.h" 45 46 #ifdef __linux__ 47 # include <db_185.h> 48 #else 49 # include <db.h> 50 #endif 51 52 enum page { 53 PAGE_INDEX, 54 PAGE_SEARCH, 55 PAGE_SHOW, 56 PAGE__MAX 57 }; 58 59 struct paths { 60 char *name; 61 char *path; 62 }; 63 64 /* 65 * A query as passed to the search function. 66 */ 67 struct query { 68 const char *arch; /* architecture */ 69 const char *sec; /* manual section */ 70 const char *expr; /* unparsed expression string */ 71 int manroot; /* manroot index (or -1)*/ 72 int legacy; /* whether legacy mode */ 73 }; 74 75 struct req { 76 struct query q; 77 struct paths *p; 78 size_t psz; 79 enum page page; 80 }; 81 82 static int atou(const char *, unsigned *); 83 static void catman(const struct req *, const char *); 84 static int cmp(const void *, const void *); 85 static void format(const struct req *, const char *); 86 static void html_print(const char *); 87 static void html_printquery(const struct req *); 88 static void html_putchar(char); 89 static int http_decode(char *); 90 static void http_parse(struct req *, char *); 91 static void http_print(const char *); 92 static void http_putchar(char); 93 static void http_printquery(const struct req *); 94 static int pathstop(DIR *); 95 static void pathgen(DIR *, char *, struct req *); 96 static void pg_index(const struct req *, char *); 97 static void pg_search(const struct req *, char *); 98 static void pg_show(const struct req *, char *); 99 static void resp_bad(void); 100 static void resp_baddb(void); 101 static void resp_error400(void); 102 static void resp_error404(const char *); 103 static void resp_begin_html(int, const char *); 104 static void resp_begin_http(int, const char *); 105 static void resp_end_html(void); 106 static void resp_index(const struct req *); 107 static void resp_search(struct res *, size_t, void *); 108 static void resp_searchform(const struct req *); 109 110 static const char *progname; /* cgi script name */ 111 static const char *cache; /* cache directory */ 112 static const char *css; /* css directory */ 113 static const char *host; /* hostname */ 114 115 static const char * const pages[PAGE__MAX] = { 116 "index", /* PAGE_INDEX */ 117 "search", /* PAGE_SEARCH */ 118 "show", /* PAGE_SHOW */ 119 }; 120 121 /* 122 * This is just OpenBSD's strtol(3) suggestion. 123 * I use it instead of strtonum(3) for portability's sake. 124 */ 125 static int 126 atou(const char *buf, unsigned *v) 127 { 128 char *ep; 129 long lval; 130 131 errno = 0; 132 lval = strtol(buf, &ep, 10); 133 if (buf[0] == '\0' || *ep != '\0') 134 return(0); 135 if ((errno == ERANGE && (lval == LONG_MAX || 136 lval == LONG_MIN)) || 137 (lval > INT_MAX || lval < 0)) 138 return(0); 139 140 *v = (unsigned int)lval; 141 return(1); 142 } 143 144 /* 145 * Print a character, escaping HTML along the way. 146 * This will pass non-ASCII straight to output: be warned! 147 */ 148 static void 149 html_putchar(char c) 150 { 151 152 switch (c) { 153 case ('"'): 154 printf(""e;"); 155 break; 156 case ('&'): 157 printf("&"); 158 break; 159 case ('>'): 160 printf(">"); 161 break; 162 case ('<'): 163 printf("<"); 164 break; 165 default: 166 putchar((unsigned char)c); 167 break; 168 } 169 } 170 static void 171 http_printquery(const struct req *req) 172 { 173 174 printf("&expr="); 175 http_print(req->q.expr ? req->q.expr : ""); 176 printf("&sec="); 177 http_print(req->q.sec ? req->q.sec : ""); 178 printf("&arch="); 179 http_print(req->q.arch ? req->q.arch : ""); 180 } 181 182 183 static void 184 html_printquery(const struct req *req) 185 { 186 187 printf("&expr="); 188 html_print(req->q.expr ? req->q.expr : ""); 189 printf("&sec="); 190 html_print(req->q.sec ? req->q.sec : ""); 191 printf("&arch="); 192 html_print(req->q.arch ? req->q.arch : ""); 193 } 194 195 static void 196 http_print(const char *p) 197 { 198 199 if (NULL == p) 200 return; 201 while ('\0' != *p) 202 http_putchar(*p++); 203 } 204 205 /* 206 * Call through to html_putchar(). 207 * Accepts NULL strings. 208 */ 209 static void 210 html_print(const char *p) 211 { 212 213 if (NULL == p) 214 return; 215 while ('\0' != *p) 216 html_putchar(*p++); 217 } 218 219 /* 220 * Parse out key-value pairs from an HTTP request variable. 221 * This can be either a cookie or a POST/GET string, although man.cgi 222 * uses only GET for simplicity. 223 */ 224 static void 225 http_parse(struct req *req, char *p) 226 { 227 char *key, *val, *manroot; 228 int i, legacy; 229 230 memset(&req->q, 0, sizeof(struct query)); 231 232 legacy = -1; 233 manroot = NULL; 234 235 while ('\0' != *p) { 236 key = p; 237 val = NULL; 238 239 p += (int)strcspn(p, ";&"); 240 if ('\0' != *p) 241 *p++ = '\0'; 242 if (NULL != (val = strchr(key, '='))) 243 *val++ = '\0'; 244 245 if ('\0' == *key || NULL == val || '\0' == *val) 246 continue; 247 248 /* Just abort handling. */ 249 250 if ( ! http_decode(key)) 251 break; 252 if (NULL != val && ! http_decode(val)) 253 break; 254 255 if (0 == strcmp(key, "expr")) 256 req->q.expr = val; 257 else if (0 == strcmp(key, "query")) 258 req->q.expr = val; 259 else if (0 == strcmp(key, "sec")) 260 req->q.sec = val; 261 else if (0 == strcmp(key, "sektion")) 262 req->q.sec = val; 263 else if (0 == strcmp(key, "arch")) 264 req->q.arch = val; 265 else if (0 == strcmp(key, "manpath")) 266 manroot = val; 267 else if (0 == strcmp(key, "apropos")) 268 legacy = 0 == strcmp(val, "0"); 269 } 270 271 /* Test for old man.cgi compatibility mode. */ 272 273 req->q.legacy = legacy > 0; 274 275 /* 276 * Section "0" means no section when in legacy mode. 277 * For some man.cgi scripts, "default" arch is none. 278 */ 279 280 if (req->q.legacy && NULL != req->q.sec) 281 if (0 == strcmp(req->q.sec, "0")) 282 req->q.sec = NULL; 283 if (req->q.legacy && NULL != req->q.arch) 284 if (0 == strcmp(req->q.arch, "default")) 285 req->q.arch = NULL; 286 287 /* Default to first manroot. */ 288 289 if (NULL != manroot) { 290 for (i = 0; i < (int)req->psz; i++) 291 if (0 == strcmp(req->p[i].name, manroot)) 292 break; 293 req->q.manroot = i < (int)req->psz ? i : -1; 294 } 295 } 296 297 static void 298 http_putchar(char c) 299 { 300 301 if (isalnum((unsigned char)c)) { 302 putchar((unsigned char)c); 303 return; 304 } else if (' ' == c) { 305 putchar('+'); 306 return; 307 } 308 printf("%%%.2x", c); 309 } 310 311 /* 312 * HTTP-decode a string. The standard explanation is that this turns 313 * "%4e+foo" into "n foo" in the regular way. This is done in-place 314 * over the allocated string. 315 */ 316 static int 317 http_decode(char *p) 318 { 319 char hex[3]; 320 int c; 321 322 hex[2] = '\0'; 323 324 for ( ; '\0' != *p; p++) { 325 if ('%' == *p) { 326 if ('\0' == (hex[0] = *(p + 1))) 327 return(0); 328 if ('\0' == (hex[1] = *(p + 2))) 329 return(0); 330 if (1 != sscanf(hex, "%x", &c)) 331 return(0); 332 if ('\0' == c) 333 return(0); 334 335 *p = (char)c; 336 memmove(p + 1, p + 3, strlen(p + 3) + 1); 337 } else 338 *p = '+' == *p ? ' ' : *p; 339 } 340 341 *p = '\0'; 342 return(1); 343 } 344 345 static void 346 resp_begin_http(int code, const char *msg) 347 { 348 349 if (200 != code) 350 printf("Status: %d %s\n", code, msg); 351 352 puts("Content-Type: text/html; charset=utf-8\n" 353 "Cache-Control: no-cache\n" 354 "Pragma: no-cache\n" 355 ""); 356 357 fflush(stdout); 358 } 359 360 static void 361 resp_begin_html(int code, const char *msg) 362 { 363 364 resp_begin_http(code, msg); 365 366 printf("<!DOCTYPE HTML PUBLIC " 367 " \"-//W3C//DTD HTML 4.01//EN\"" 368 " \"http://www.w3.org/TR/html4/strict.dtd\">\n" 369 "<HTML>\n" 370 "<HEAD>\n" 371 "<META HTTP-EQUIV=\"Content-Type\"" 372 " CONTENT=\"text/html; charset=utf-8\">\n" 373 "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\"" 374 " TYPE=\"text/css\" media=\"all\">\n" 375 "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\"" 376 " TYPE=\"text/css\" media=\"all\">\n" 377 "<TITLE>System Manpage Reference</TITLE>\n" 378 "</HEAD>\n" 379 "<BODY>\n" 380 "<!-- Begin page content. //-->\n", css, css); 381 } 382 383 static void 384 resp_end_html(void) 385 { 386 387 puts("</BODY>\n" 388 "</HTML>"); 389 } 390 391 static void 392 resp_searchform(const struct req *req) 393 { 394 int i; 395 396 puts("<!-- Begin search form. //-->"); 397 printf("<DIV ID=\"mancgi\">\n" 398 "<FORM ACTION=\"%s/search.html\" METHOD=\"get\">\n" 399 "<FIELDSET>\n" 400 "<LEGEND>Search Parameters</LEGEND>\n" 401 "<INPUT TYPE=\"submit\" " 402 " VALUE=\"Search\"> for manuals satisfying \n" 403 "<INPUT TYPE=\"text\" NAME=\"expr\" VALUE=\"", 404 progname); 405 html_print(req->q.expr ? req->q.expr : ""); 406 printf("\">, section " 407 "<INPUT TYPE=\"text\"" 408 " SIZE=\"4\" NAME=\"sec\" VALUE=\""); 409 html_print(req->q.sec ? req->q.sec : ""); 410 printf("\">, arch " 411 "<INPUT TYPE=\"text\"" 412 " SIZE=\"8\" NAME=\"arch\" VALUE=\""); 413 html_print(req->q.arch ? req->q.arch : ""); 414 printf("\">"); 415 if (req->psz > 1) { 416 puts(", <SELECT NAME=\"manpath\">"); 417 for (i = 0; i < (int)req->psz; i++) { 418 printf("<OPTION %s VALUE=\"", 419 (i == req->q.manroot) || 420 (0 == i && -1 == req->q.manroot) ? 421 "SELECTED=\"selected\"" : ""); 422 html_print(req->p[i].name); 423 printf("\">"); 424 html_print(req->p[i].name); 425 puts("</OPTION>"); 426 } 427 puts("</SELECT>"); 428 } 429 puts(".\n" 430 "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n" 431 "</FIELDSET>\n" 432 "</FORM>\n" 433 "</DIV>"); 434 puts("<!-- End search form. //-->"); 435 } 436 437 static void 438 resp_index(const struct req *req) 439 { 440 441 resp_begin_html(200, NULL); 442 resp_searchform(req); 443 resp_end_html(); 444 } 445 446 static void 447 resp_error400(void) 448 { 449 450 resp_begin_html(400, "Query Malformed"); 451 printf("<H1>Malformed Query</H1>\n" 452 "<P>\n" 453 "The query your entered was malformed.\n" 454 "Try again from the\n" 455 "<A HREF=\"%s/index.html\">main page</A>.\n" 456 "</P>", progname); 457 resp_end_html(); 458 } 459 460 static void 461 resp_error404(const char *page) 462 { 463 464 resp_begin_html(404, "Not Found"); 465 puts("<H1>Page Not Found</H1>\n" 466 "<P>\n" 467 "The page you're looking for, "); 468 printf("<B>"); 469 html_print(page); 470 printf("</B>,\n" 471 "could not be found.\n" 472 "Try searching from the\n" 473 "<A HREF=\"%s/index.html\">main page</A>.\n" 474 "</P>", progname); 475 resp_end_html(); 476 } 477 478 static void 479 resp_bad(void) 480 { 481 resp_begin_html(500, "Internal Server Error"); 482 puts("<P>Generic badness happened.</P>"); 483 resp_end_html(); 484 } 485 486 static void 487 resp_baddb(void) 488 { 489 490 resp_begin_html(500, "Internal Server Error"); 491 puts("<P>Your database is broken.</P>"); 492 resp_end_html(); 493 } 494 495 static void 496 resp_search(struct res *r, size_t sz, void *arg) 497 { 498 size_t i, matched; 499 const struct req *req; 500 501 req = (const struct req *)arg; 502 503 if (sz > 0) 504 assert(req->q.manroot >= 0); 505 506 for (matched = i = 0; i < sz; i++) 507 if (r[i].matched) 508 matched++; 509 510 if (1 == matched) { 511 for (i = 0; i < sz; i++) 512 if (r[i].matched) 513 break; 514 /* 515 * If we have just one result, then jump there now 516 * without any delay. 517 */ 518 puts("Status: 303 See Other"); 519 printf("Location: http://%s%s/show/%d/%u/%u.html?", 520 host, progname, req->q.manroot, 521 r[i].volume, r[i].rec); 522 http_printquery(req); 523 puts("\n" 524 "Content-Type: text/html; charset=utf-8\n"); 525 return; 526 } 527 528 resp_begin_html(200, NULL); 529 resp_searchform(req); 530 531 puts("<DIV CLASS=\"results\">"); 532 533 if (0 == matched) { 534 puts("<P>\n" 535 "No results found.\n" 536 "</P>\n" 537 "</DIV>"); 538 resp_end_html(); 539 return; 540 } 541 542 qsort(r, sz, sizeof(struct res), cmp); 543 544 puts("<TABLE>"); 545 546 for (i = 0; i < sz; i++) { 547 if ( ! r[i].matched) 548 continue; 549 printf("<TR>\n" 550 "<TD CLASS=\"title\">\n" 551 "<A HREF=\"%s/show/%d/%u/%u.html?", 552 progname, req->q.manroot, 553 r[i].volume, r[i].rec); 554 html_printquery(req); 555 printf("\">"); 556 html_print(r[i].title); 557 putchar('('); 558 html_print(r[i].cat); 559 if (r[i].arch && '\0' != *r[i].arch) { 560 putchar('/'); 561 html_print(r[i].arch); 562 } 563 printf(")</A>\n" 564 "</TD>\n" 565 "<TD CLASS=\"desc\">"); 566 html_print(r[i].desc); 567 puts("</TD>\n" 568 "</TR>"); 569 } 570 571 puts("</TABLE>\n" 572 "</DIV>"); 573 resp_end_html(); 574 } 575 576 /* ARGSUSED */ 577 static void 578 pg_index(const struct req *req, char *path) 579 { 580 581 resp_index(req); 582 } 583 584 static void 585 catman(const struct req *req, const char *file) 586 { 587 FILE *f; 588 size_t len; 589 int i; 590 char *p; 591 int italic, bold; 592 593 if (NULL == (f = fopen(file, "r"))) { 594 resp_baddb(); 595 return; 596 } 597 598 resp_begin_html(200, NULL); 599 resp_searchform(req); 600 puts("<DIV CLASS=\"catman\">\n" 601 "<PRE>"); 602 603 while (NULL != (p = fgetln(f, &len))) { 604 bold = italic = 0; 605 for (i = 0; i < (int)len - 1; i++) { 606 /* 607 * This means that the catpage is out of state. 608 * Ignore it and keep going (although the 609 * catpage is bogus). 610 */ 611 612 if ('\b' == p[i] || '\n' == p[i]) 613 continue; 614 615 /* 616 * Print a regular character. 617 * Close out any bold/italic scopes. 618 * If we're in back-space mode, make sure we'll 619 * have something to enter when we backspace. 620 */ 621 622 if ('\b' != p[i + 1]) { 623 if (italic) 624 printf("</I>"); 625 if (bold) 626 printf("</B>"); 627 italic = bold = 0; 628 html_putchar(p[i]); 629 continue; 630 } else if (i + 2 >= (int)len) 631 continue; 632 633 /* Italic mode. */ 634 635 if ('_' == p[i]) { 636 if (bold) 637 printf("</B>"); 638 if ( ! italic) 639 printf("<I>"); 640 bold = 0; 641 italic = 1; 642 i += 2; 643 html_putchar(p[i]); 644 continue; 645 } 646 647 /* 648 * Handle funny behaviour troff-isms. 649 * These grok'd from the original man2html.c. 650 */ 651 652 if (('+' == p[i] && 'o' == p[i + 2]) || 653 ('o' == p[i] && '+' == p[i + 2]) || 654 ('|' == p[i] && '=' == p[i + 2]) || 655 ('=' == p[i] && '|' == p[i + 2]) || 656 ('*' == p[i] && '=' == p[i + 2]) || 657 ('=' == p[i] && '*' == p[i + 2]) || 658 ('*' == p[i] && '|' == p[i + 2]) || 659 ('|' == p[i] && '*' == p[i + 2])) { 660 if (italic) 661 printf("</I>"); 662 if (bold) 663 printf("</B>"); 664 italic = bold = 0; 665 putchar('*'); 666 i += 2; 667 continue; 668 } else if (('|' == p[i] && '-' == p[i + 2]) || 669 ('-' == p[i] && '|' == p[i + 1]) || 670 ('+' == p[i] && '-' == p[i + 1]) || 671 ('-' == p[i] && '+' == p[i + 1]) || 672 ('+' == p[i] && '|' == p[i + 1]) || 673 ('|' == p[i] && '+' == p[i + 1])) { 674 if (italic) 675 printf("</I>"); 676 if (bold) 677 printf("</B>"); 678 italic = bold = 0; 679 putchar('+'); 680 i += 2; 681 continue; 682 } 683 684 /* Bold mode. */ 685 686 if (italic) 687 printf("</I>"); 688 if ( ! bold) 689 printf("<B>"); 690 bold = 1; 691 italic = 0; 692 i += 2; 693 html_putchar(p[i]); 694 } 695 696 /* 697 * Clean up the last character. 698 * We can get to a newline; don't print that. 699 */ 700 701 if (italic) 702 printf("</I>"); 703 if (bold) 704 printf("</B>"); 705 706 if (i == (int)len - 1 && '\n' != p[i]) 707 html_putchar(p[i]); 708 709 putchar('\n'); 710 } 711 712 puts("</PRE>\n" 713 "</DIV>\n" 714 "</BODY>\n" 715 "</HTML>"); 716 717 fclose(f); 718 } 719 720 static void 721 format(const struct req *req, const char *file) 722 { 723 struct mparse *mp; 724 int fd; 725 struct mdoc *mdoc; 726 struct man *man; 727 void *vp; 728 enum mandoclevel rc; 729 char opts[MAXPATHLEN + 128]; 730 731 if (-1 == (fd = open(file, O_RDONLY, 0))) { 732 resp_baddb(); 733 return; 734 } 735 736 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL); 737 rc = mparse_readfd(mp, fd, file); 738 close(fd); 739 740 if (rc >= MANDOCLEVEL_FATAL) { 741 resp_baddb(); 742 return; 743 } 744 745 snprintf(opts, sizeof(opts), "fragment," 746 "man=%s/search.html?sec=%%S&expr=%%N," 747 /*"includes=/cgi-bin/man.cgi/usr/include/%%I"*/, 748 progname); 749 750 mparse_result(mp, &mdoc, &man); 751 if (NULL == man && NULL == mdoc) { 752 resp_baddb(); 753 mparse_free(mp); 754 return; 755 } 756 757 resp_begin_html(200, NULL); 758 resp_searchform(req); 759 760 vp = html_alloc(opts); 761 762 if (NULL != mdoc) 763 html_mdoc(vp, mdoc); 764 else 765 html_man(vp, man); 766 767 puts("</BODY>\n" 768 "</HTML>"); 769 770 html_free(vp); 771 mparse_free(mp); 772 } 773 774 static void 775 pg_show(const struct req *req, char *path) 776 { 777 struct manpaths ps; 778 size_t sz; 779 char *sub; 780 char file[MAXPATHLEN]; 781 const char *cp; 782 int rc, catm; 783 unsigned int vol, rec, mr; 784 DB *idx; 785 DBT key, val; 786 787 idx = NULL; 788 789 /* Parse out mroot, volume, and record from the path. */ 790 791 if (NULL == path || NULL == (sub = strchr(path, '/'))) { 792 resp_error400(); 793 return; 794 } 795 *sub++ = '\0'; 796 if ( ! atou(path, &mr)) { 797 resp_error400(); 798 return; 799 } 800 path = sub; 801 if (NULL == (sub = strchr(path, '/'))) { 802 resp_error400(); 803 return; 804 } 805 *sub++ = '\0'; 806 if ( ! atou(path, &vol) || ! atou(sub, &rec)) { 807 resp_error400(); 808 return; 809 } else if (mr >= (unsigned int)req->psz) { 810 resp_error400(); 811 return; 812 } 813 814 /* 815 * Begin by chdir()ing into the manroot. 816 * This way we can pick up the database files, which are 817 * relative to the manpath root. 818 */ 819 820 if (-1 == chdir(req->p[(int)mr].path)) { 821 perror(req->p[(int)mr].path); 822 resp_baddb(); 823 return; 824 } 825 826 memset(&ps, 0, sizeof(struct manpaths)); 827 manpath_manconf(&ps, "etc/catman.conf"); 828 829 if (vol >= (unsigned int)ps.sz) { 830 resp_error400(); 831 goto out; 832 } 833 834 sz = strlcpy(file, ps.paths[vol], MAXPATHLEN); 835 assert(sz < MAXPATHLEN); 836 strlcat(file, "/", MAXPATHLEN); 837 strlcat(file, MANDOC_IDX, MAXPATHLEN); 838 839 /* Open the index recno(3) database. */ 840 841 idx = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL); 842 if (NULL == idx) { 843 perror(file); 844 resp_baddb(); 845 goto out; 846 } 847 848 key.data = &rec; 849 key.size = 4; 850 851 if (0 != (rc = (*idx->get)(idx, &key, &val, 0))) { 852 rc < 0 ? resp_baddb() : resp_error400(); 853 goto out; 854 } else if (0 == val.size) { 855 resp_baddb(); 856 goto out; 857 } 858 859 cp = (char *)val.data; 860 catm = 'c' == *cp++; 861 862 if (NULL == memchr(cp, '\0', val.size - 1)) 863 resp_baddb(); 864 else { 865 file[(int)sz] = '\0'; 866 strlcat(file, "/", MAXPATHLEN); 867 strlcat(file, cp, MAXPATHLEN); 868 if (catm) 869 catman(req, file); 870 else 871 format(req, file); 872 } 873 out: 874 if (idx) 875 (*idx->close)(idx); 876 manpath_free(&ps); 877 } 878 879 static void 880 pg_search(const struct req *req, char *path) 881 { 882 size_t tt, ressz; 883 struct manpaths ps; 884 int i, sz, rc; 885 const char *ep, *start; 886 struct res *res; 887 char **cp; 888 struct opts opt; 889 struct expr *expr; 890 891 if (req->q.manroot < 0 || 0 == req->psz) { 892 resp_search(NULL, 0, (void *)req); 893 return; 894 } 895 896 memset(&opt, 0, sizeof(struct opts)); 897 898 ep = req->q.expr; 899 opt.arch = req->q.arch; 900 opt.cat = req->q.sec; 901 rc = -1; 902 sz = 0; 903 cp = NULL; 904 ressz = 0; 905 res = NULL; 906 907 /* 908 * Begin by chdir()ing into the root of the manpath. 909 * This way we can pick up the database files, which are 910 * relative to the manpath root. 911 */ 912 913 assert(req->q.manroot < (int)req->psz); 914 if (-1 == (chdir(req->p[req->q.manroot].path))) { 915 perror(req->p[req->q.manroot].path); 916 resp_search(NULL, 0, (void *)req); 917 return; 918 } 919 920 memset(&ps, 0, sizeof(struct manpaths)); 921 manpath_manconf(&ps, "etc/catman.conf"); 922 923 /* 924 * Poor man's tokenisation: just break apart by spaces. 925 * Yes, this is half-ass. But it works for now. 926 */ 927 928 while (ep && isspace((unsigned char)*ep)) 929 ep++; 930 931 while (ep && '\0' != *ep) { 932 cp = mandoc_realloc(cp, (sz + 1) * sizeof(char *)); 933 start = ep; 934 while ('\0' != *ep && ! isspace((unsigned char)*ep)) 935 ep++; 936 cp[sz] = mandoc_malloc((ep - start) + 1); 937 memcpy(cp[sz], start, ep - start); 938 cp[sz++][ep - start] = '\0'; 939 while (isspace((unsigned char)*ep)) 940 ep++; 941 } 942 943 /* 944 * Pump down into apropos backend. 945 * The resp_search() function is called with the results. 946 */ 947 948 expr = req->q.legacy ? 949 termcomp(sz, cp, &tt) : exprcomp(sz, cp, &tt); 950 951 if (NULL != expr) 952 rc = apropos_search 953 (ps.sz, ps.paths, &opt, expr, tt, 954 (void *)req, &ressz, &res, resp_search); 955 956 /* ...unless errors occured. */ 957 958 if (0 == rc) 959 resp_baddb(); 960 else if (-1 == rc) 961 resp_search(NULL, 0, NULL); 962 963 for (i = 0; i < sz; i++) 964 free(cp[i]); 965 966 free(cp); 967 resfree(res, ressz); 968 exprfree(expr); 969 manpath_free(&ps); 970 } 971 972 int 973 main(void) 974 { 975 int i; 976 char buf[MAXPATHLEN]; 977 DIR *cwd; 978 struct req req; 979 char *p, *path, *subpath; 980 981 /* Scan our run-time environment. */ 982 983 if (NULL == (cache = getenv("CACHE_DIR"))) 984 cache = "/cache/man.cgi"; 985 986 if (NULL == (progname = getenv("SCRIPT_NAME"))) 987 progname = ""; 988 989 if (NULL == (css = getenv("CSS_DIR"))) 990 css = ""; 991 992 if (NULL == (host = getenv("HTTP_HOST"))) 993 host = "localhost"; 994 995 /* 996 * First we change directory into the cache directory so that 997 * subsequent scanning for manpath directories is rooted 998 * relative to the same position. 999 */ 1000 1001 if (-1 == chdir(cache)) { 1002 perror(cache); 1003 resp_bad(); 1004 return(EXIT_FAILURE); 1005 } else if (NULL == (cwd = opendir(cache))) { 1006 perror(cache); 1007 resp_bad(); 1008 return(EXIT_FAILURE); 1009 } 1010 1011 memset(&req, 0, sizeof(struct req)); 1012 1013 strlcpy(buf, ".", MAXPATHLEN); 1014 pathgen(cwd, buf, &req); 1015 closedir(cwd); 1016 1017 /* Next parse out the query string. */ 1018 1019 if (NULL != (p = getenv("QUERY_STRING"))) 1020 http_parse(&req, p); 1021 1022 /* 1023 * Now juggle paths to extract information. 1024 * We want to extract our filetype (the file suffix), the 1025 * initial path component, then the trailing component(s). 1026 * Start with leading subpath component. 1027 */ 1028 1029 subpath = path = NULL; 1030 req.page = PAGE__MAX; 1031 1032 if (NULL == (path = getenv("PATH_INFO")) || '\0' == *path) 1033 req.page = PAGE_INDEX; 1034 1035 if (NULL != path && '/' == *path && '\0' == *++path) 1036 req.page = PAGE_INDEX; 1037 1038 /* Strip file suffix. */ 1039 1040 if (NULL != path && NULL != (p = strrchr(path, '.'))) 1041 if (NULL != p && NULL == strchr(p, '/')) 1042 *p++ = '\0'; 1043 1044 /* Resolve subpath component. */ 1045 1046 if (NULL != path && NULL != (subpath = strchr(path, '/'))) 1047 *subpath++ = '\0'; 1048 1049 /* Map path into one we recognise. */ 1050 1051 if (NULL != path && '\0' != *path) 1052 for (i = 0; i < (int)PAGE__MAX; i++) 1053 if (0 == strcmp(pages[i], path)) { 1054 req.page = (enum page)i; 1055 break; 1056 } 1057 1058 /* Route pages. */ 1059 1060 switch (req.page) { 1061 case (PAGE_INDEX): 1062 pg_index(&req, subpath); 1063 break; 1064 case (PAGE_SEARCH): 1065 pg_search(&req, subpath); 1066 break; 1067 case (PAGE_SHOW): 1068 pg_show(&req, subpath); 1069 break; 1070 default: 1071 resp_error404(path); 1072 break; 1073 } 1074 1075 for (i = 0; i < (int)req.psz; i++) { 1076 free(req.p[i].path); 1077 free(req.p[i].name); 1078 } 1079 1080 free(req.p); 1081 return(EXIT_SUCCESS); 1082 } 1083 1084 static int 1085 cmp(const void *p1, const void *p2) 1086 { 1087 1088 return(strcasecmp(((const struct res *)p1)->title, 1089 ((const struct res *)p2)->title)); 1090 } 1091 1092 /* 1093 * Check to see if an "etc" path consists of a catman.conf file. If it 1094 * does, that means that the path contains a tree created by catman(8) 1095 * and should be used for indexing. 1096 */ 1097 static int 1098 pathstop(DIR *dir) 1099 { 1100 struct dirent *d; 1101 1102 while (NULL != (d = readdir(dir))) 1103 if (DT_REG == d->d_type) 1104 if (0 == strcmp(d->d_name, "catman.conf")) 1105 return(1); 1106 1107 return(0); 1108 } 1109 1110 /* 1111 * Scan for indexable paths. 1112 * This adds all paths with "etc/catman.conf" to the buffer. 1113 */ 1114 static void 1115 pathgen(DIR *dir, char *path, struct req *req) 1116 { 1117 struct dirent *d; 1118 char *cp; 1119 DIR *cd; 1120 int rc; 1121 size_t sz, ssz; 1122 1123 sz = strlcat(path, "/", MAXPATHLEN); 1124 if (sz >= MAXPATHLEN) { 1125 fprintf(stderr, "%s: Path too long", path); 1126 return; 1127 } 1128 1129 /* 1130 * First, scan for the "etc" directory. 1131 * If it's found, then see if it should cause us to stop. This 1132 * happens when a catman.conf is found in the directory. 1133 */ 1134 1135 rc = 0; 1136 while (0 == rc && NULL != (d = readdir(dir))) { 1137 if (DT_DIR != d->d_type || strcmp(d->d_name, "etc")) 1138 continue; 1139 1140 path[(int)sz] = '\0'; 1141 ssz = strlcat(path, d->d_name, MAXPATHLEN); 1142 1143 if (ssz >= MAXPATHLEN) { 1144 fprintf(stderr, "%s: Path too long", path); 1145 return; 1146 } else if (NULL == (cd = opendir(path))) { 1147 perror(path); 1148 return; 1149 } 1150 1151 rc = pathstop(cd); 1152 closedir(cd); 1153 } 1154 1155 if (rc > 0) { 1156 /* This also strips the trailing slash. */ 1157 path[(int)--sz] = '\0'; 1158 req->p = mandoc_realloc 1159 (req->p, 1160 (req->psz + 1) * sizeof(struct paths)); 1161 /* 1162 * Strip out the leading "./" unless we're just a ".", 1163 * in which case use an empty string as our name. 1164 */ 1165 req->p[(int)req->psz].path = mandoc_strdup(path); 1166 req->p[(int)req->psz].name = 1167 cp = mandoc_strdup(path + (1 == sz ? 1 : 2)); 1168 req->psz++; 1169 /* 1170 * The name is just the path with all the slashes taken 1171 * out of it. Simple but effective. 1172 */ 1173 for ( ; '\0' != *cp; cp++) 1174 if ('/' == *cp) 1175 *cp = ' '; 1176 return; 1177 } 1178 1179 /* 1180 * If no etc/catman.conf was found, recursively enter child 1181 * directory and continue scanning. 1182 */ 1183 1184 rewinddir(dir); 1185 while (NULL != (d = readdir(dir))) { 1186 if (DT_DIR != d->d_type || '.' == d->d_name[0]) 1187 continue; 1188 1189 path[(int)sz] = '\0'; 1190 ssz = strlcat(path, d->d_name, MAXPATHLEN); 1191 1192 if (ssz >= MAXPATHLEN) { 1193 fprintf(stderr, "%s: Path too long", path); 1194 return; 1195 } else if (NULL == (cd = opendir(path))) { 1196 perror(path); 1197 return; 1198 } 1199 1200 pathgen(cd, path, req); 1201 closedir(cd); 1202 } 1203 } 1204