xref: /netbsd-src/external/bsd/mdocml/dist/cgi.c (revision a4ddc2c8fb9af816efe3b1c375a5530aef0e89e9)
1 /*	$Vendor-Id: cgi.c,v 1.39 2011/12/25 17:49:52 kristaps Exp $ */
2 /*
3  * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20 
21 #include <sys/param.h>
22 #include <sys/wait.h>
23 
24 #include <assert.h>
25 #include <ctype.h>
26 #include <errno.h>
27 #include <dirent.h>
28 #include <fcntl.h>
29 #include <limits.h>
30 #include <regex.h>
31 #include <stdio.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37 
38 #include "apropos_db.h"
39 #include "mandoc.h"
40 #include "mdoc.h"
41 #include "man.h"
42 #include "main.h"
43 #include "manpath.h"
44 #include "mandocdb.h"
45 
46 #ifdef __linux__
47 # include <db_185.h>
48 #else
49 # include <db.h>
50 #endif
51 
52 enum	page {
53 	PAGE_INDEX,
54 	PAGE_SEARCH,
55 	PAGE_SHOW,
56 	PAGE__MAX
57 };
58 
59 struct	paths {
60 	char		*name;
61 	char		*path;
62 };
63 
64 /*
65  * A query as passed to the search function.
66  */
67 struct	query {
68 	const char	*arch; /* architecture */
69 	const char	*sec; /* manual section */
70 	const char	*expr; /* unparsed expression string */
71 	int		 manroot; /* manroot index (or -1)*/
72 	int		 whatis; /* whether whatis mode */
73 	int		 legacy; /* whether legacy mode */
74 };
75 
76 struct	req {
77 	struct query	 q;
78 	struct paths	*p;
79 	size_t		 psz;
80 	enum page	 page;
81 };
82 
83 static	int		 atou(const char *, unsigned *);
84 static	void		 catman(const struct req *, const char *);
85 static	int	 	 cmp(const void *, const void *);
86 static	void		 format(const struct req *, const char *);
87 static	void		 html_print(const char *);
88 static	void		 html_printquery(const struct req *);
89 static	void		 html_putchar(char);
90 static	int 		 http_decode(char *);
91 static	void		 http_parse(struct req *, char *);
92 static	void		 http_print(const char *);
93 static	void 		 http_putchar(char);
94 static	void		 http_printquery(const struct req *);
95 static	int		 pathstop(DIR *);
96 static	void		 pathgen(DIR *, char *, struct req *);
97 static	void		 pg_index(const struct req *, char *);
98 static	void		 pg_search(const struct req *, char *);
99 static	void		 pg_show(const struct req *, char *);
100 static	void		 resp_bad(void);
101 static	void		 resp_baddb(void);
102 static	void		 resp_error400(void);
103 static	void		 resp_error404(const char *);
104 static	void		 resp_begin_html(int, const char *);
105 static	void		 resp_begin_http(int, const char *);
106 static	void		 resp_end_html(void);
107 static	void		 resp_index(const struct req *);
108 static	void		 resp_search(struct res *, size_t, void *);
109 static	void		 resp_searchform(const struct req *);
110 
111 static	const char	 *progname; /* cgi script name */
112 static	const char	 *cache; /* cache directory */
113 static	const char	 *css; /* css directory */
114 static	const char	 *host; /* hostname */
115 
116 static	const char * const pages[PAGE__MAX] = {
117 	"index", /* PAGE_INDEX */
118 	"search", /* PAGE_SEARCH */
119 	"show", /* PAGE_SHOW */
120 };
121 
122 /*
123  * This is just OpenBSD's strtol(3) suggestion.
124  * I use it instead of strtonum(3) for portability's sake.
125  */
126 static int
127 atou(const char *buf, unsigned *v)
128 {
129 	char		*ep;
130 	long		 lval;
131 
132 	errno = 0;
133 	lval = strtol(buf, &ep, 10);
134 	if (buf[0] == '\0' || *ep != '\0')
135 		return(0);
136 	if ((errno == ERANGE && (lval == LONG_MAX ||
137 					lval == LONG_MIN)) ||
138 			(lval > INT_MAX || lval < 0))
139 		return(0);
140 
141 	*v = (unsigned int)lval;
142 	return(1);
143 }
144 
145 /*
146  * Print a character, escaping HTML along the way.
147  * This will pass non-ASCII straight to output: be warned!
148  */
149 static void
150 html_putchar(char c)
151 {
152 
153 	switch (c) {
154 	case ('"'):
155 		printf("&quote;");
156 		break;
157 	case ('&'):
158 		printf("&amp;");
159 		break;
160 	case ('>'):
161 		printf("&gt;");
162 		break;
163 	case ('<'):
164 		printf("&lt;");
165 		break;
166 	default:
167 		putchar((unsigned char)c);
168 		break;
169 	}
170 }
171 static void
172 http_printquery(const struct req *req)
173 {
174 
175 	printf("&expr=");
176 	http_print(req->q.expr ? req->q.expr : "");
177 	printf("&sec=");
178 	http_print(req->q.sec ? req->q.sec : "");
179 	printf("&arch=");
180 	http_print(req->q.arch ? req->q.arch : "");
181 }
182 
183 
184 static void
185 html_printquery(const struct req *req)
186 {
187 
188 	printf("&amp;expr=");
189 	html_print(req->q.expr ? req->q.expr : "");
190 	printf("&amp;sec=");
191 	html_print(req->q.sec ? req->q.sec : "");
192 	printf("&amp;arch=");
193 	html_print(req->q.arch ? req->q.arch : "");
194 }
195 
196 static void
197 http_print(const char *p)
198 {
199 
200 	if (NULL == p)
201 		return;
202 	while ('\0' != *p)
203 		http_putchar(*p++);
204 }
205 
206 /*
207  * Call through to html_putchar().
208  * Accepts NULL strings.
209  */
210 static void
211 html_print(const char *p)
212 {
213 
214 	if (NULL == p)
215 		return;
216 	while ('\0' != *p)
217 		html_putchar(*p++);
218 }
219 
220 /*
221  * Parse out key-value pairs from an HTTP request variable.
222  * This can be either a cookie or a POST/GET string, although man.cgi
223  * uses only GET for simplicity.
224  */
225 static void
226 http_parse(struct req *req, char *p)
227 {
228 	char            *key, *val, *manroot;
229 	int		 i, legacy;
230 
231 	memset(&req->q, 0, sizeof(struct query));
232 
233 	req->q.whatis = 1;
234 	legacy = -1;
235 	manroot = NULL;
236 
237 	while ('\0' != *p) {
238 		key = p;
239 		val = NULL;
240 
241 		p += (int)strcspn(p, ";&");
242 		if ('\0' != *p)
243 			*p++ = '\0';
244 		if (NULL != (val = strchr(key, '=')))
245 			*val++ = '\0';
246 
247 		if ('\0' == *key || NULL == val || '\0' == *val)
248 			continue;
249 
250 		/* Just abort handling. */
251 
252 		if ( ! http_decode(key))
253 			break;
254 		if (NULL != val && ! http_decode(val))
255 			break;
256 
257 		if (0 == strcmp(key, "expr"))
258 			req->q.expr = val;
259 		else if (0 == strcmp(key, "query"))
260 			req->q.expr = val;
261 		else if (0 == strcmp(key, "sec"))
262 			req->q.sec = val;
263 		else if (0 == strcmp(key, "sektion"))
264 			req->q.sec = val;
265 		else if (0 == strcmp(key, "arch"))
266 			req->q.arch = val;
267 		else if (0 == strcmp(key, "manpath"))
268 			manroot = val;
269 		else if (0 == strcmp(key, "apropos"))
270 			legacy = 0 == strcmp(val, "0");
271 		else if (0 == strcmp(key, "op"))
272 			req->q.whatis = 0 == strcasecmp(val, "whatis");
273 	}
274 
275 	/* Test for old man.cgi compatibility mode. */
276 
277 	if (legacy == 0) {
278 		req->q.whatis = 0;
279 		req->q.legacy = 1;
280 	} else if (legacy > 0) {
281 		req->q.legacy = 1;
282 		req->q.whatis = 1;
283 	}
284 
285 	/*
286 	 * Section "0" means no section when in legacy mode.
287 	 * For some man.cgi scripts, "default" arch is none.
288 	 */
289 
290 	if (req->q.legacy && NULL != req->q.sec)
291 		if (0 == strcmp(req->q.sec, "0"))
292 			req->q.sec = NULL;
293 	if (req->q.legacy && NULL != req->q.arch)
294 		if (0 == strcmp(req->q.arch, "default"))
295 			req->q.arch = NULL;
296 
297 	/* Default to first manroot. */
298 
299 	if (NULL != manroot) {
300 		for (i = 0; i < (int)req->psz; i++)
301 			if (0 == strcmp(req->p[i].name, manroot))
302 				break;
303 		req->q.manroot = i < (int)req->psz ? i : -1;
304 	}
305 }
306 
307 static void
308 http_putchar(char c)
309 {
310 
311 	if (isalnum((unsigned char)c)) {
312 		putchar((unsigned char)c);
313 		return;
314 	} else if (' ' == c) {
315 		putchar('+');
316 		return;
317 	}
318 	printf("%%%.2x", c);
319 }
320 
321 /*
322  * HTTP-decode a string.  The standard explanation is that this turns
323  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
324  * over the allocated string.
325  */
326 static int
327 http_decode(char *p)
328 {
329 	char             hex[3];
330 	int              c;
331 
332 	hex[2] = '\0';
333 
334 	for ( ; '\0' != *p; p++) {
335 		if ('%' == *p) {
336 			if ('\0' == (hex[0] = *(p + 1)))
337 				return(0);
338 			if ('\0' == (hex[1] = *(p + 2)))
339 				return(0);
340 			if (1 != sscanf(hex, "%x", &c))
341 				return(0);
342 			if ('\0' == c)
343 				return(0);
344 
345 			*p = (char)c;
346 			memmove(p + 1, p + 3, strlen(p + 3) + 1);
347 		} else
348 			*p = '+' == *p ? ' ' : *p;
349 	}
350 
351 	*p = '\0';
352 	return(1);
353 }
354 
355 static void
356 resp_begin_http(int code, const char *msg)
357 {
358 
359 	if (200 != code)
360 		printf("Status: %d %s\n", code, msg);
361 
362 	puts("Content-Type: text/html; charset=utf-8\n"
363 	     "Cache-Control: no-cache\n"
364 	     "Pragma: no-cache\n"
365 	     "");
366 
367 	fflush(stdout);
368 }
369 
370 static void
371 resp_begin_html(int code, const char *msg)
372 {
373 
374 	resp_begin_http(code, msg);
375 
376 	printf("<!DOCTYPE HTML PUBLIC "
377 	       " \"-//W3C//DTD HTML 4.01//EN\""
378 	       " \"http://www.w3.org/TR/html4/strict.dtd\">\n"
379 	       "<HTML>\n"
380 	       "<HEAD>\n"
381 	       "<META HTTP-EQUIV=\"Content-Type\""
382 	       " CONTENT=\"text/html; charset=utf-8\">\n"
383 	       "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\""
384 	       " TYPE=\"text/css\" media=\"all\">\n"
385 	       "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\""
386 	       " TYPE=\"text/css\" media=\"all\">\n"
387 	       "<TITLE>System Manpage Reference</TITLE>\n"
388 	       "</HEAD>\n"
389 	       "<BODY>\n"
390 	       "<!-- Begin page content. //-->\n", css, css);
391 }
392 
393 static void
394 resp_end_html(void)
395 {
396 
397 	puts("</BODY>\n"
398 	     "</HTML>");
399 }
400 
401 static void
402 resp_searchform(const struct req *req)
403 {
404 	int		 i;
405 
406 	puts("<!-- Begin search form. //-->");
407 	printf("<DIV ID=\"mancgi\">\n"
408 	       "<FORM ACTION=\"%s/search.html\" METHOD=\"get\">\n"
409 	       "<FIELDSET>\n"
410 	       "<LEGEND>Search Parameters</LEGEND>\n"
411 	       "<INPUT TYPE=\"submit\" NAME=\"op\""
412 	       " VALUE=\"Whatis\"> or \n"
413 	       "<INPUT TYPE=\"submit\" NAME=\"op\""
414 	       " VALUE=\"apropos\"> for manuals satisfying \n"
415 	       "<INPUT TYPE=\"text\" NAME=\"expr\" VALUE=\"",
416 	       progname);
417 	html_print(req->q.expr ? req->q.expr : "");
418 	printf("\">, section "
419 	       "<INPUT TYPE=\"text\""
420 	       " SIZE=\"4\" NAME=\"sec\" VALUE=\"");
421 	html_print(req->q.sec ? req->q.sec : "");
422 	printf("\">, arch "
423 	       "<INPUT TYPE=\"text\""
424 	       " SIZE=\"8\" NAME=\"arch\" VALUE=\"");
425 	html_print(req->q.arch ? req->q.arch : "");
426 	printf("\">");
427 	if (req->psz > 1) {
428 		puts(", <SELECT NAME=\"manpath\">");
429 		for (i = 0; i < (int)req->psz; i++) {
430 			printf("<OPTION %s VALUE=\"",
431 				(i == req->q.manroot) ||
432 				(0 == i && -1 == req->q.manroot) ?
433 				"SELECTED=\"selected\"" : "");
434 			html_print(req->p[i].name);
435 			printf("\">");
436 			html_print(req->p[i].name);
437 			puts("</OPTION>");
438 		}
439 		puts("</SELECT>");
440 	}
441 	puts(".\n"
442 	     "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n"
443 	     "</FIELDSET>\n"
444 	     "</FORM>\n"
445 	     "</DIV>");
446 	puts("<!-- End search form. //-->");
447 }
448 
449 static void
450 resp_index(const struct req *req)
451 {
452 
453 	resp_begin_html(200, NULL);
454 	resp_searchform(req);
455 	resp_end_html();
456 }
457 
458 static void
459 resp_error400(void)
460 {
461 
462 	resp_begin_html(400, "Query Malformed");
463 	printf("<H1>Malformed Query</H1>\n"
464 	       "<P>\n"
465 	       "The query your entered was malformed.\n"
466 	       "Try again from the\n"
467 	       "<A HREF=\"%s/index.html\">main page</A>.\n"
468 	       "</P>", progname);
469 	resp_end_html();
470 }
471 
472 static void
473 resp_error404(const char *page)
474 {
475 
476 	resp_begin_html(404, "Not Found");
477 	puts("<H1>Page Not Found</H1>\n"
478 	     "<P>\n"
479 	     "The page you're looking for, ");
480 	printf("<B>");
481 	html_print(page);
482 	printf("</B>,\n"
483 	       "could not be found.\n"
484 	       "Try searching from the\n"
485 	       "<A HREF=\"%s/index.html\">main page</A>.\n"
486 	       "</P>", progname);
487 	resp_end_html();
488 }
489 
490 static void
491 resp_bad(void)
492 {
493 	resp_begin_html(500, "Internal Server Error");
494 	puts("<P>Generic badness happened.</P>");
495 	resp_end_html();
496 }
497 
498 static void
499 resp_baddb(void)
500 {
501 
502 	resp_begin_html(500, "Internal Server Error");
503 	puts("<P>Your database is broken.</P>");
504 	resp_end_html();
505 }
506 
507 static void
508 resp_search(struct res *r, size_t sz, void *arg)
509 {
510 	int		  i;
511 	const struct req *req;
512 
513 	req = (const struct req *)arg;
514 
515 	if (sz > 0)
516 		assert(req->q.manroot >= 0);
517 
518 	if (1 == sz) {
519 		/*
520 		 * If we have just one result, then jump there now
521 		 * without any delay.
522 		 */
523 		puts("Status: 303 See Other");
524 		printf("Location: http://%s%s/show/%d/%u/%u.html?",
525 				host, progname, req->q.manroot,
526 				r[0].volume, r[0].rec);
527 		http_printquery(req);
528 		puts("\n"
529 		     "Content-Type: text/html; charset=utf-8\n");
530 		return;
531 	}
532 
533 	qsort(r, sz, sizeof(struct res), cmp);
534 
535 	resp_begin_html(200, NULL);
536 	resp_searchform(req);
537 
538 	puts("<DIV CLASS=\"results\">");
539 
540 	if (0 == sz) {
541 		printf("<P>\n"
542 		       "No %s results found.\n",
543 		       req->q.whatis ? "whatis" : "apropos");
544 		if (req->q.whatis) {
545 			printf("(Try "
546 			       "<A HREF=\"%s/search.html?op=apropos",
547 			       progname);
548 			html_printquery(req);
549 			puts("\">apropos</A>?)");
550 		}
551 		puts("</P>");
552 		puts("</DIV>");
553 		resp_end_html();
554 		return;
555 	}
556 
557 	puts("<TABLE>");
558 
559 	for (i = 0; i < (int)sz; i++) {
560 		printf("<TR>\n"
561 		       "<TD CLASS=\"title\">\n"
562 		       "<A HREF=\"%s/show/%d/%u/%u.html?",
563 				progname, req->q.manroot,
564 				r[i].volume, r[i].rec);
565 		html_printquery(req);
566 		printf("\">");
567 		html_print(r[i].title);
568 		putchar('(');
569 		html_print(r[i].cat);
570 		if (r[i].arch && '\0' != *r[i].arch) {
571 			putchar('/');
572 			html_print(r[i].arch);
573 		}
574 		printf(")</A>\n"
575 		       "</TD>\n"
576 		       "<TD CLASS=\"desc\">");
577 		html_print(r[i].desc);
578 		puts("</TD>\n"
579 		     "</TR>");
580 	}
581 
582 	puts("</TABLE>\n"
583 	     "</DIV>");
584 	resp_end_html();
585 }
586 
587 /* ARGSUSED */
588 static void
589 pg_index(const struct req *req, char *path)
590 {
591 
592 	resp_index(req);
593 }
594 
595 static void
596 catman(const struct req *req, const char *file)
597 {
598 	FILE		*f;
599 	size_t		 len;
600 	int		 i;
601 	char		*p;
602 	int		 italic, bold;
603 
604 	if (NULL == (f = fopen(file, "r"))) {
605 		resp_baddb();
606 		return;
607 	}
608 
609 	resp_begin_html(200, NULL);
610 	resp_searchform(req);
611 	puts("<DIV CLASS=\"catman\">\n"
612 	     "<PRE>");
613 
614 	while (NULL != (p = fgetln(f, &len))) {
615 		bold = italic = 0;
616 		for (i = 0; i < (int)len - 1; i++) {
617 			/*
618 			 * This means that the catpage is out of state.
619 			 * Ignore it and keep going (although the
620 			 * catpage is bogus).
621 			 */
622 
623 			if ('\b' == p[i] || '\n' == p[i])
624 				continue;
625 
626 			/*
627 			 * Print a regular character.
628 			 * Close out any bold/italic scopes.
629 			 * If we're in back-space mode, make sure we'll
630 			 * have something to enter when we backspace.
631 			 */
632 
633 			if ('\b' != p[i + 1]) {
634 				if (italic)
635 					printf("</I>");
636 				if (bold)
637 					printf("</B>");
638 				italic = bold = 0;
639 				html_putchar(p[i]);
640 				continue;
641 			} else if (i + 2 >= (int)len)
642 				continue;
643 
644 			/* Italic mode. */
645 
646 			if ('_' == p[i]) {
647 				if (bold)
648 					printf("</B>");
649 				if ( ! italic)
650 					printf("<I>");
651 				bold = 0;
652 				italic = 1;
653 				i += 2;
654 				html_putchar(p[i]);
655 				continue;
656 			}
657 
658 			/*
659 			 * Handle funny behaviour troff-isms.
660 			 * These grok'd from the original man2html.c.
661 			 */
662 
663 			if (('+' == p[i] && 'o' == p[i + 2]) ||
664 					('o' == p[i] && '+' == p[i + 2]) ||
665 					('|' == p[i] && '=' == p[i + 2]) ||
666 					('=' == p[i] && '|' == p[i + 2]) ||
667 					('*' == p[i] && '=' == p[i + 2]) ||
668 					('=' == p[i] && '*' == p[i + 2]) ||
669 					('*' == p[i] && '|' == p[i + 2]) ||
670 					('|' == p[i] && '*' == p[i + 2]))  {
671 				if (italic)
672 					printf("</I>");
673 				if (bold)
674 					printf("</B>");
675 				italic = bold = 0;
676 				putchar('*');
677 				i += 2;
678 				continue;
679 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
680 					('-' == p[i] && '|' == p[i + 1]) ||
681 					('+' == p[i] && '-' == p[i + 1]) ||
682 					('-' == p[i] && '+' == p[i + 1]) ||
683 					('+' == p[i] && '|' == p[i + 1]) ||
684 					('|' == p[i] && '+' == p[i + 1]))  {
685 				if (italic)
686 					printf("</I>");
687 				if (bold)
688 					printf("</B>");
689 				italic = bold = 0;
690 				putchar('+');
691 				i += 2;
692 				continue;
693 			}
694 
695 			/* Bold mode. */
696 
697 			if (italic)
698 				printf("</I>");
699 			if ( ! bold)
700 				printf("<B>");
701 			bold = 1;
702 			italic = 0;
703 			i += 2;
704 			html_putchar(p[i]);
705 		}
706 
707 		/*
708 		 * Clean up the last character.
709 		 * We can get to a newline; don't print that.
710 		 */
711 
712 		if (italic)
713 			printf("</I>");
714 		if (bold)
715 			printf("</B>");
716 
717 		if (i == (int)len - 1 && '\n' != p[i])
718 			html_putchar(p[i]);
719 
720 		putchar('\n');
721 	}
722 
723 	puts("</PRE>\n"
724 	     "</DIV>\n"
725 	     "</BODY>\n"
726 	     "</HTML>");
727 
728 	fclose(f);
729 }
730 
731 static void
732 format(const struct req *req, const char *file)
733 {
734 	struct mparse	*mp;
735 	int		 fd;
736 	struct mdoc	*mdoc;
737 	struct man	*man;
738 	void		*vp;
739 	enum mandoclevel rc;
740 	char		 opts[MAXPATHLEN + 128];
741 
742 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
743 		resp_baddb();
744 		return;
745 	}
746 
747 	mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
748 	rc = mparse_readfd(mp, fd, file);
749 	close(fd);
750 
751 	if (rc >= MANDOCLEVEL_FATAL) {
752 		resp_baddb();
753 		return;
754 	}
755 
756 	snprintf(opts, sizeof(opts), "fragment,"
757 			"man=%s/search.html?sec=%%S&expr=%%N,"
758 			/*"includes=/cgi-bin/man.cgi/usr/include/%%I"*/,
759 			progname);
760 
761 	mparse_result(mp, &mdoc, &man);
762 	if (NULL == man && NULL == mdoc) {
763 		resp_baddb();
764 		mparse_free(mp);
765 		return;
766 	}
767 
768 	resp_begin_html(200, NULL);
769 	resp_searchform(req);
770 
771 	vp = html_alloc(opts);
772 
773 	if (NULL != mdoc)
774 		html_mdoc(vp, mdoc);
775 	else
776 		html_man(vp, man);
777 
778 	puts("</BODY>\n"
779 	     "</HTML>");
780 
781 	html_free(vp);
782 	mparse_free(mp);
783 }
784 
785 static void
786 pg_show(const struct req *req, char *path)
787 {
788 	struct manpaths	 ps;
789 	size_t		 sz;
790 	char		*sub;
791 	char		 file[MAXPATHLEN];
792 	const char	*cp;
793 	int		 rc, catm;
794 	unsigned int	 vol, rec, mr;
795 	DB		*idx;
796 	DBT		 key, val;
797 
798 	idx = NULL;
799 
800 	/* Parse out mroot, volume, and record from the path. */
801 
802 	if (NULL == path || NULL == (sub = strchr(path, '/'))) {
803 		resp_error400();
804 		return;
805 	}
806 	*sub++ = '\0';
807 	if ( ! atou(path, &mr)) {
808 		resp_error400();
809 		return;
810 	}
811 	path = sub;
812 	if (NULL == (sub = strchr(path, '/'))) {
813 		resp_error400();
814 		return;
815 	}
816 	*sub++ = '\0';
817 	if ( ! atou(path, &vol) || ! atou(sub, &rec)) {
818 		resp_error400();
819 		return;
820 	} else if (mr >= (unsigned int)req->psz) {
821 		resp_error400();
822 		return;
823 	}
824 
825 	/*
826 	 * Begin by chdir()ing into the manroot.
827 	 * This way we can pick up the database files, which are
828 	 * relative to the manpath root.
829 	 */
830 
831 	if (-1 == chdir(req->p[(int)mr].path)) {
832 		perror(req->p[(int)mr].path);
833 		resp_baddb();
834 		return;
835 	}
836 
837 	memset(&ps, 0, sizeof(struct manpaths));
838 	manpath_manconf(&ps, "etc/catman.conf");
839 
840 	if (vol >= (unsigned int)ps.sz) {
841 		resp_error400();
842 		goto out;
843 	}
844 
845 	sz = strlcpy(file, ps.paths[vol], MAXPATHLEN);
846 	assert(sz < MAXPATHLEN);
847 	strlcat(file, "/", MAXPATHLEN);
848 	strlcat(file, MANDOC_IDX, MAXPATHLEN);
849 
850 	/* Open the index recno(3) database. */
851 
852 	idx = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL);
853 	if (NULL == idx) {
854 		perror(file);
855 		resp_baddb();
856 		goto out;
857 	}
858 
859 	key.data = &rec;
860 	key.size = 4;
861 
862 	if (0 != (rc = (*idx->get)(idx, &key, &val, 0))) {
863 		rc < 0 ? resp_baddb() : resp_error400();
864 		goto out;
865 	} else if (0 == val.size) {
866 		resp_baddb();
867 		goto out;
868 	}
869 
870 	cp = (char *)val.data;
871 	catm = 'c' == *cp++;
872 
873 	if (NULL == memchr(cp, '\0', val.size - 1))
874 		resp_baddb();
875 	else {
876  		file[(int)sz] = '\0';
877  		strlcat(file, "/", MAXPATHLEN);
878  		strlcat(file, cp, MAXPATHLEN);
879 		if (catm)
880 			catman(req, file);
881 		else
882 			format(req, file);
883 	}
884 out:
885 	if (idx)
886 		(*idx->close)(idx);
887 	manpath_free(&ps);
888 }
889 
890 static void
891 pg_search(const struct req *req, char *path)
892 {
893 	size_t		  tt;
894 	struct manpaths	  ps;
895 	int		  i, sz, rc;
896 	const char	 *ep, *start;
897 	char		**cp;
898 	struct opts	  opt;
899 	struct expr	 *expr;
900 
901 	if (req->q.manroot < 0 || 0 == req->psz) {
902 		resp_search(NULL, 0, (void *)req);
903 		return;
904 	}
905 
906 	memset(&opt, 0, sizeof(struct opts));
907 
908 	ep 	 = req->q.expr;
909 	opt.arch = req->q.arch;
910 	opt.cat  = req->q.sec;
911 	rc 	 = -1;
912 	sz 	 = 0;
913 	cp	 = NULL;
914 
915 	/*
916 	 * Begin by chdir()ing into the root of the manpath.
917 	 * This way we can pick up the database files, which are
918 	 * relative to the manpath root.
919 	 */
920 
921 	assert(req->q.manroot < (int)req->psz);
922 	if (-1 == (chdir(req->p[req->q.manroot].path))) {
923 		perror(req->p[req->q.manroot].path);
924 		resp_search(NULL, 0, (void *)req);
925 		return;
926 	}
927 
928 	memset(&ps, 0, sizeof(struct manpaths));
929 	manpath_manconf(&ps, "etc/catman.conf");
930 
931 	/*
932 	 * Poor man's tokenisation: just break apart by spaces.
933 	 * Yes, this is half-ass.  But it works for now.
934 	 */
935 
936 	while (ep && isspace((unsigned char)*ep))
937 		ep++;
938 
939 	while (ep && '\0' != *ep) {
940 		cp = mandoc_realloc(cp, (sz + 1) * sizeof(char *));
941 		start = ep;
942 		while ('\0' != *ep && ! isspace((unsigned char)*ep))
943 			ep++;
944 		cp[sz] = mandoc_malloc((ep - start) + 1);
945 		memcpy(cp[sz], start, ep - start);
946 		cp[sz++][ep - start] = '\0';
947 		while (isspace((unsigned char)*ep))
948 			ep++;
949 	}
950 
951 	/*
952 	 * Pump down into apropos backend.
953 	 * The resp_search() function is called with the results.
954 	 */
955 
956 	expr = req->q.whatis ?
957 		termcomp(sz, cp, &tt) : exprcomp(sz, cp, &tt);
958 
959 	if (NULL != expr)
960 		rc = apropos_search
961 			(ps.sz, ps.paths, &opt,
962 			 expr, tt, (void *)req, resp_search);
963 
964 	/* ...unless errors occured. */
965 
966 	if (0 == rc)
967 		resp_baddb();
968 	else if (-1 == rc)
969 		resp_search(NULL, 0, (void *)req);
970 
971 	for (i = 0; i < sz; i++)
972 		free(cp[i]);
973 
974 	free(cp);
975 	exprfree(expr);
976 	manpath_free(&ps);
977 }
978 
979 int
980 main(void)
981 {
982 	int		 i;
983 	char		 buf[MAXPATHLEN];
984 	DIR		*cwd;
985 	struct req	 req;
986 	char		*p, *path, *subpath;
987 
988 	/* Scan our run-time environment. */
989 
990 	if (NULL == (cache = getenv("CACHE_DIR")))
991 		cache = "/cache/man.cgi";
992 
993 	if (NULL == (progname = getenv("SCRIPT_NAME")))
994 		progname = "";
995 
996 	if (NULL == (css = getenv("CSS_DIR")))
997 		css = "";
998 
999 	if (NULL == (host = getenv("HTTP_HOST")))
1000 		host = "localhost";
1001 
1002 	/*
1003 	 * First we change directory into the cache directory so that
1004 	 * subsequent scanning for manpath directories is rooted
1005 	 * relative to the same position.
1006 	 */
1007 
1008 	if (-1 == chdir(cache)) {
1009 		perror(cache);
1010 		resp_bad();
1011 		return(EXIT_FAILURE);
1012 	} else if (NULL == (cwd = opendir(cache))) {
1013 		perror(cache);
1014 		resp_bad();
1015 		return(EXIT_FAILURE);
1016 	}
1017 
1018 	memset(&req, 0, sizeof(struct req));
1019 
1020 	strlcpy(buf, ".", MAXPATHLEN);
1021 	pathgen(cwd, buf, &req);
1022 	closedir(cwd);
1023 
1024 	/* Next parse out the query string. */
1025 
1026 	if (NULL != (p = getenv("QUERY_STRING")))
1027 		http_parse(&req, p);
1028 
1029 	/*
1030 	 * Now juggle paths to extract information.
1031 	 * We want to extract our filetype (the file suffix), the
1032 	 * initial path component, then the trailing component(s).
1033 	 * Start with leading subpath component.
1034 	 */
1035 
1036 	subpath = path = NULL;
1037 	req.page = PAGE__MAX;
1038 
1039 	if (NULL == (path = getenv("PATH_INFO")) || '\0' == *path)
1040 		req.page = PAGE_INDEX;
1041 
1042 	if (NULL != path && '/' == *path && '\0' == *++path)
1043 		req.page = PAGE_INDEX;
1044 
1045 	/* Strip file suffix. */
1046 
1047 	if (NULL != path && NULL != (p = strrchr(path, '.')))
1048 		if (NULL != p && NULL == strchr(p, '/'))
1049 			*p++ = '\0';
1050 
1051 	/* Resolve subpath component. */
1052 
1053 	if (NULL != path && NULL != (subpath = strchr(path, '/')))
1054 		*subpath++ = '\0';
1055 
1056 	/* Map path into one we recognise. */
1057 
1058 	if (NULL != path && '\0' != *path)
1059 		for (i = 0; i < (int)PAGE__MAX; i++)
1060 			if (0 == strcmp(pages[i], path)) {
1061 				req.page = (enum page)i;
1062 				break;
1063 			}
1064 
1065 	/* Route pages. */
1066 
1067 	switch (req.page) {
1068 	case (PAGE_INDEX):
1069 		pg_index(&req, subpath);
1070 		break;
1071 	case (PAGE_SEARCH):
1072 		pg_search(&req, subpath);
1073 		break;
1074 	case (PAGE_SHOW):
1075 		pg_show(&req, subpath);
1076 		break;
1077 	default:
1078 		resp_error404(path);
1079 		break;
1080 	}
1081 
1082 	for (i = 0; i < (int)req.psz; i++) {
1083 		free(req.p[i].path);
1084 		free(req.p[i].name);
1085 	}
1086 
1087 	free(req.p);
1088 	return(EXIT_SUCCESS);
1089 }
1090 
1091 static int
1092 cmp(const void *p1, const void *p2)
1093 {
1094 
1095 	return(strcasecmp(((const struct res *)p1)->title,
1096 				((const struct res *)p2)->title));
1097 }
1098 
1099 /*
1100  * Check to see if an "etc" path consists of a catman.conf file.  If it
1101  * does, that means that the path contains a tree created by catman(8)
1102  * and should be used for indexing.
1103  */
1104 static int
1105 pathstop(DIR *dir)
1106 {
1107 	struct dirent	*d;
1108 
1109 	while (NULL != (d = readdir(dir)))
1110 		if (DT_REG == d->d_type)
1111 			if (0 == strcmp(d->d_name, "catman.conf"))
1112 				return(1);
1113 
1114 	return(0);
1115 }
1116 
1117 /*
1118  * Scan for indexable paths.
1119  * This adds all paths with "etc/catman.conf" to the buffer.
1120  */
1121 static void
1122 pathgen(DIR *dir, char *path, struct req *req)
1123 {
1124 	struct dirent	*d;
1125 	char		*cp;
1126 	DIR		*cd;
1127 	int		 rc;
1128 	size_t		 sz, ssz;
1129 
1130 	sz = strlcat(path, "/", MAXPATHLEN);
1131 	if (sz >= MAXPATHLEN) {
1132 		fprintf(stderr, "%s: Path too long", path);
1133 		return;
1134 	}
1135 
1136 	/*
1137 	 * First, scan for the "etc" directory.
1138 	 * If it's found, then see if it should cause us to stop.  This
1139 	 * happens when a catman.conf is found in the directory.
1140 	 */
1141 
1142 	rc = 0;
1143 	while (0 == rc && NULL != (d = readdir(dir))) {
1144 		if (DT_DIR != d->d_type || strcmp(d->d_name, "etc"))
1145 			continue;
1146 
1147 		path[(int)sz] = '\0';
1148 		ssz = strlcat(path, d->d_name, MAXPATHLEN);
1149 
1150 		if (ssz >= MAXPATHLEN) {
1151 			fprintf(stderr, "%s: Path too long", path);
1152 			return;
1153 		} else if (NULL == (cd = opendir(path))) {
1154 			perror(path);
1155 			return;
1156 		}
1157 
1158 		rc = pathstop(cd);
1159 		closedir(cd);
1160 	}
1161 
1162 	if (rc > 0) {
1163 		/* This also strips the trailing slash. */
1164 		path[(int)--sz] = '\0';
1165 		req->p = mandoc_realloc
1166 			(req->p,
1167 			 (req->psz + 1) * sizeof(struct paths));
1168 		/*
1169 		 * Strip out the leading "./" unless we're just a ".",
1170 		 * in which case use an empty string as our name.
1171 		 */
1172 		req->p[(int)req->psz].path = mandoc_strdup(path);
1173 		req->p[(int)req->psz].name =
1174 			cp = mandoc_strdup(path + (1 == sz ? 1 : 2));
1175 		req->psz++;
1176 		/*
1177 		 * The name is just the path with all the slashes taken
1178 		 * out of it.  Simple but effective.
1179 		 */
1180 		for ( ; '\0' != *cp; cp++)
1181 			if ('/' == *cp)
1182 				*cp = ' ';
1183 		return;
1184 	}
1185 
1186 	/*
1187 	 * If no etc/catman.conf was found, recursively enter child
1188 	 * directory and continue scanning.
1189 	 */
1190 
1191 	rewinddir(dir);
1192 	while (NULL != (d = readdir(dir))) {
1193 		if (DT_DIR != d->d_type || '.' == d->d_name[0])
1194 			continue;
1195 
1196 		path[(int)sz] = '\0';
1197 		ssz = strlcat(path, d->d_name, MAXPATHLEN);
1198 
1199 		if (ssz >= MAXPATHLEN) {
1200 			fprintf(stderr, "%s: Path too long", path);
1201 			return;
1202 		} else if (NULL == (cd = opendir(path))) {
1203 			perror(path);
1204 			return;
1205 		}
1206 
1207 		pathgen(cd, path, req);
1208 		closedir(cd);
1209 	}
1210 }
1211