xref: /openbsd-src/usr.bin/mandoc/cgi.c (revision 8e0c768258d4632c51876b4397034bc3152bf8db)
1 /*	$OpenBSD: cgi.c,v 1.106 2019/10/01 17:54:04 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014-2019 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/time.h>
20 
21 #include <ctype.h>
22 #include <err.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <limits.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "mandoc_aux.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mdoc.h"
36 #include "man.h"
37 #include "mandoc_parse.h"
38 #include "main.h"
39 #include "manconf.h"
40 #include "mansearch.h"
41 #include "cgi.h"
42 
43 /*
44  * A query as passed to the search function.
45  */
46 struct	query {
47 	char		*manpath; /* desired manual directory */
48 	char		*arch; /* architecture */
49 	char		*sec; /* manual section */
50 	char		*query; /* unparsed query expression */
51 	int		 equal; /* match whole names, not substrings */
52 };
53 
54 struct	req {
55 	struct query	  q;
56 	char		**p; /* array of available manpaths */
57 	size_t		  psz; /* number of available manpaths */
58 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
59 };
60 
61 enum	focus {
62 	FOCUS_NONE = 0,
63 	FOCUS_QUERY
64 };
65 
66 static	void		 html_print(const char *);
67 static	void		 html_putchar(char);
68 static	int		 http_decode(char *);
69 static	void		 http_encode(const char *p);
70 static	void		 parse_manpath_conf(struct req *);
71 static	void		 parse_path_info(struct req *req, const char *path);
72 static	void		 parse_query_string(struct req *, const char *);
73 static	void		 pg_error_badrequest(const char *);
74 static	void		 pg_error_internal(void);
75 static	void		 pg_index(const struct req *);
76 static	void		 pg_noresult(const struct req *, int, const char *,
77 				const char *);
78 static	void		 pg_redirect(const struct req *, const char *);
79 static	void		 pg_search(const struct req *);
80 static	void		 pg_searchres(const struct req *,
81 				struct manpage *, size_t);
82 static	void		 pg_show(struct req *, const char *);
83 static	void		 resp_begin_html(int, const char *, const char *);
84 static	void		 resp_begin_http(int, const char *);
85 static	void		 resp_catman(const struct req *, const char *);
86 static	void		 resp_copy(const char *);
87 static	void		 resp_end_html(void);
88 static	void		 resp_format(const struct req *, const char *);
89 static	void		 resp_searchform(const struct req *, enum focus);
90 static	void		 resp_show(const struct req *, const char *);
91 static	void		 set_query_attr(char **, char **);
92 static	int		 validate_arch(const char *);
93 static	int		 validate_filename(const char *);
94 static	int		 validate_manpath(const struct req *, const char *);
95 static	int		 validate_urifrag(const char *);
96 
97 static	const char	 *scriptname = SCRIPT_NAME;
98 
99 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
100 static	const char *const sec_numbers[] = {
101     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
102 };
103 static	const char *const sec_names[] = {
104     "All Sections",
105     "1 - General Commands",
106     "2 - System Calls",
107     "3 - Library Functions",
108     "3p - Perl Library",
109     "4 - Device Drivers",
110     "5 - File Formats",
111     "6 - Games",
112     "7 - Miscellaneous Information",
113     "8 - System Manager\'s Manual",
114     "9 - Kernel Developer\'s Manual"
115 };
116 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
117 
118 static	const char *const arch_names[] = {
119     "amd64",       "alpha",       "armv7",	"arm64",
120     "hppa",        "i386",        "landisk",
121     "loongson",    "luna88k",     "macppc",      "mips64",
122     "octeon",      "sgi",         "socppc",      "sparc64",
123     "amiga",       "arc",         "armish",      "arm32",
124     "atari",       "aviion",      "beagle",      "cats",
125     "hppa64",      "hp300",
126     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
127     "mvmeppc",     "palm",        "pc532",       "pegasos",
128     "pmax",        "powerpc",     "solbourne",   "sparc",
129     "sun3",        "vax",         "wgrisc",      "x68k",
130     "zaurus"
131 };
132 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
133 
134 /*
135  * Print a character, escaping HTML along the way.
136  * This will pass non-ASCII straight to output: be warned!
137  */
138 static void
139 html_putchar(char c)
140 {
141 
142 	switch (c) {
143 	case '"':
144 		printf("&quot;");
145 		break;
146 	case '&':
147 		printf("&amp;");
148 		break;
149 	case '>':
150 		printf("&gt;");
151 		break;
152 	case '<':
153 		printf("&lt;");
154 		break;
155 	default:
156 		putchar((unsigned char)c);
157 		break;
158 	}
159 }
160 
161 /*
162  * Call through to html_putchar().
163  * Accepts NULL strings.
164  */
165 static void
166 html_print(const char *p)
167 {
168 
169 	if (NULL == p)
170 		return;
171 	while ('\0' != *p)
172 		html_putchar(*p++);
173 }
174 
175 /*
176  * Transfer the responsibility for the allocated string *val
177  * to the query structure.
178  */
179 static void
180 set_query_attr(char **attr, char **val)
181 {
182 
183 	free(*attr);
184 	if (**val == '\0') {
185 		*attr = NULL;
186 		free(*val);
187 	} else
188 		*attr = *val;
189 	*val = NULL;
190 }
191 
192 /*
193  * Parse the QUERY_STRING for key-value pairs
194  * and store the values into the query structure.
195  */
196 static void
197 parse_query_string(struct req *req, const char *qs)
198 {
199 	char		*key, *val;
200 	size_t		 keysz, valsz;
201 
202 	req->isquery	= 1;
203 	req->q.manpath	= NULL;
204 	req->q.arch	= NULL;
205 	req->q.sec	= NULL;
206 	req->q.query	= NULL;
207 	req->q.equal	= 1;
208 
209 	key = val = NULL;
210 	while (*qs != '\0') {
211 
212 		/* Parse one key. */
213 
214 		keysz = strcspn(qs, "=;&");
215 		key = mandoc_strndup(qs, keysz);
216 		qs += keysz;
217 		if (*qs != '=')
218 			goto next;
219 
220 		/* Parse one value. */
221 
222 		valsz = strcspn(++qs, ";&");
223 		val = mandoc_strndup(qs, valsz);
224 		qs += valsz;
225 
226 		/* Decode and catch encoding errors. */
227 
228 		if ( ! (http_decode(key) && http_decode(val)))
229 			goto next;
230 
231 		/* Handle key-value pairs. */
232 
233 		if ( ! strcmp(key, "query"))
234 			set_query_attr(&req->q.query, &val);
235 
236 		else if ( ! strcmp(key, "apropos"))
237 			req->q.equal = !strcmp(val, "0");
238 
239 		else if ( ! strcmp(key, "manpath")) {
240 #ifdef COMPAT_OLDURI
241 			if ( ! strncmp(val, "OpenBSD ", 8)) {
242 				val[7] = '-';
243 				if ('C' == val[8])
244 					val[8] = 'c';
245 			}
246 #endif
247 			set_query_attr(&req->q.manpath, &val);
248 		}
249 
250 		else if ( ! (strcmp(key, "sec")
251 #ifdef COMPAT_OLDURI
252 		    && strcmp(key, "sektion")
253 #endif
254 		    )) {
255 			if ( ! strcmp(val, "0"))
256 				*val = '\0';
257 			set_query_attr(&req->q.sec, &val);
258 		}
259 
260 		else if ( ! strcmp(key, "arch")) {
261 			if ( ! strcmp(val, "default"))
262 				*val = '\0';
263 			set_query_attr(&req->q.arch, &val);
264 		}
265 
266 		/*
267 		 * The key must be freed in any case.
268 		 * The val may have been handed over to the query
269 		 * structure, in which case it is now NULL.
270 		 */
271 next:
272 		free(key);
273 		key = NULL;
274 		free(val);
275 		val = NULL;
276 
277 		if (*qs != '\0')
278 			qs++;
279 	}
280 }
281 
282 /*
283  * HTTP-decode a string.  The standard explanation is that this turns
284  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
285  * over the allocated string.
286  */
287 static int
288 http_decode(char *p)
289 {
290 	char             hex[3];
291 	char		*q;
292 	int              c;
293 
294 	hex[2] = '\0';
295 
296 	q = p;
297 	for ( ; '\0' != *p; p++, q++) {
298 		if ('%' == *p) {
299 			if ('\0' == (hex[0] = *(p + 1)))
300 				return 0;
301 			if ('\0' == (hex[1] = *(p + 2)))
302 				return 0;
303 			if (1 != sscanf(hex, "%x", &c))
304 				return 0;
305 			if ('\0' == c)
306 				return 0;
307 
308 			*q = (char)c;
309 			p += 2;
310 		} else
311 			*q = '+' == *p ? ' ' : *p;
312 	}
313 
314 	*q = '\0';
315 	return 1;
316 }
317 
318 static void
319 http_encode(const char *p)
320 {
321 	for (; *p != '\0'; p++) {
322 		if (isalnum((unsigned char)*p) == 0 &&
323 		    strchr("-._~", *p) == NULL)
324 			printf("%%%2.2X", (unsigned char)*p);
325 		else
326 			putchar(*p);
327 	}
328 }
329 
330 static void
331 resp_begin_http(int code, const char *msg)
332 {
333 
334 	if (200 != code)
335 		printf("Status: %d %s\r\n", code, msg);
336 
337 	printf("Content-Type: text/html; charset=utf-8\r\n"
338 	     "Cache-Control: no-cache\r\n"
339 	     "Pragma: no-cache\r\n"
340 	     "\r\n");
341 
342 	fflush(stdout);
343 }
344 
345 static void
346 resp_copy(const char *filename)
347 {
348 	char	 buf[4096];
349 	ssize_t	 sz;
350 	int	 fd;
351 
352 	if ((fd = open(filename, O_RDONLY)) != -1) {
353 		fflush(stdout);
354 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
355 			write(STDOUT_FILENO, buf, sz);
356 		close(fd);
357 	}
358 }
359 
360 static void
361 resp_begin_html(int code, const char *msg, const char *file)
362 {
363 	char	*cp;
364 
365 	resp_begin_http(code, msg);
366 
367 	printf("<!DOCTYPE html>\n"
368 	       "<html>\n"
369 	       "<head>\n"
370 	       "  <meta charset=\"UTF-8\"/>\n"
371 	       "  <meta name=\"viewport\""
372 		      " content=\"width=device-width, initial-scale=1.0\">\n"
373 	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
374 	       " type=\"text/css\" media=\"all\">\n"
375 	       "  <title>",
376 	       CSS_DIR);
377 	if (file != NULL) {
378 		if ((cp = strrchr(file, '/')) != NULL)
379 			file = cp + 1;
380 		if ((cp = strrchr(file, '.')) != NULL) {
381 			printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
382 		} else
383 			printf("%s - ", file);
384 	}
385 	printf("%s</title>\n"
386 	       "</head>\n"
387 	       "<body>\n",
388 	       CUSTOMIZE_TITLE);
389 
390 	resp_copy(MAN_DIR "/header.html");
391 }
392 
393 static void
394 resp_end_html(void)
395 {
396 
397 	resp_copy(MAN_DIR "/footer.html");
398 
399 	puts("</body>\n"
400 	     "</html>");
401 }
402 
403 static void
404 resp_searchform(const struct req *req, enum focus focus)
405 {
406 	int		 i;
407 
408 	printf("<form action=\"/%s\" method=\"get\">\n"
409 	       "  <fieldset>\n"
410 	       "    <legend>Manual Page Search Parameters</legend>\n",
411 	       scriptname);
412 
413 	/* Write query input box. */
414 
415 	printf("    <input type=\"search\" name=\"query\" value=\"");
416 	if (req->q.query != NULL)
417 		html_print(req->q.query);
418 	printf( "\" size=\"40\"");
419 	if (focus == FOCUS_QUERY)
420 		printf(" autofocus");
421 	puts(">");
422 
423 	/* Write submission buttons. */
424 
425 	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
426 		"man</button>\n"
427 		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
428 		"apropos</button>\n"
429 		"    <br/>\n");
430 
431 	/* Write section selector. */
432 
433 	puts("    <select name=\"sec\">");
434 	for (i = 0; i < sec_MAX; i++) {
435 		printf("      <option value=\"%s\"", sec_numbers[i]);
436 		if (NULL != req->q.sec &&
437 		    0 == strcmp(sec_numbers[i], req->q.sec))
438 			printf(" selected=\"selected\"");
439 		printf(">%s</option>\n", sec_names[i]);
440 	}
441 	puts("    </select>");
442 
443 	/* Write architecture selector. */
444 
445 	printf(	"    <select name=\"arch\">\n"
446 		"      <option value=\"default\"");
447 	if (NULL == req->q.arch)
448 		printf(" selected=\"selected\"");
449 	puts(">All Architectures</option>");
450 	for (i = 0; i < arch_MAX; i++) {
451 		printf("      <option");
452 		if (NULL != req->q.arch &&
453 		    0 == strcmp(arch_names[i], req->q.arch))
454 			printf(" selected=\"selected\"");
455 		printf(">%s</option>\n", arch_names[i]);
456 	}
457 	puts("    </select>");
458 
459 	/* Write manpath selector. */
460 
461 	if (req->psz > 1) {
462 		puts("    <select name=\"manpath\">");
463 		for (i = 0; i < (int)req->psz; i++) {
464 			printf("      <option");
465 			if (strcmp(req->q.manpath, req->p[i]) == 0)
466 				printf(" selected=\"selected\"");
467 			printf(">");
468 			html_print(req->p[i]);
469 			puts("</option>");
470 		}
471 		puts("    </select>");
472 	}
473 
474 	puts("  </fieldset>\n"
475 	     "</form>");
476 }
477 
478 static int
479 validate_urifrag(const char *frag)
480 {
481 
482 	while ('\0' != *frag) {
483 		if ( ! (isalnum((unsigned char)*frag) ||
484 		    '-' == *frag || '.' == *frag ||
485 		    '/' == *frag || '_' == *frag))
486 			return 0;
487 		frag++;
488 	}
489 	return 1;
490 }
491 
492 static int
493 validate_manpath(const struct req *req, const char* manpath)
494 {
495 	size_t	 i;
496 
497 	for (i = 0; i < req->psz; i++)
498 		if ( ! strcmp(manpath, req->p[i]))
499 			return 1;
500 
501 	return 0;
502 }
503 
504 static int
505 validate_arch(const char *arch)
506 {
507 	int	 i;
508 
509 	for (i = 0; i < arch_MAX; i++)
510 		if (strcmp(arch, arch_names[i]) == 0)
511 			return 1;
512 
513 	return 0;
514 }
515 
516 static int
517 validate_filename(const char *file)
518 {
519 
520 	if ('.' == file[0] && '/' == file[1])
521 		file += 2;
522 
523 	return ! (strstr(file, "../") || strstr(file, "/..") ||
524 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
525 }
526 
527 static void
528 pg_index(const struct req *req)
529 {
530 
531 	resp_begin_html(200, NULL, NULL);
532 	resp_searchform(req, FOCUS_QUERY);
533 	printf("<p>\n"
534 	       "This web interface is documented in the\n"
535 	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
536 	       "manual, and the\n"
537 	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
538 	       "manual explains the query syntax.\n"
539 	       "</p>\n",
540 	       scriptname, *scriptname == '\0' ? "" : "/",
541 	       scriptname, *scriptname == '\0' ? "" : "/");
542 	resp_end_html();
543 }
544 
545 static void
546 pg_noresult(const struct req *req, int code, const char *http_msg,
547     const char *user_msg)
548 {
549 	resp_begin_html(code, http_msg, NULL);
550 	resp_searchform(req, FOCUS_QUERY);
551 	puts("<p>");
552 	puts(user_msg);
553 	puts("</p>");
554 	resp_end_html();
555 }
556 
557 static void
558 pg_error_badrequest(const char *msg)
559 {
560 
561 	resp_begin_html(400, "Bad Request", NULL);
562 	puts("<h1>Bad Request</h1>\n"
563 	     "<p>\n");
564 	puts(msg);
565 	printf("Try again from the\n"
566 	       "<a href=\"/%s\">main page</a>.\n"
567 	       "</p>", scriptname);
568 	resp_end_html();
569 }
570 
571 static void
572 pg_error_internal(void)
573 {
574 	resp_begin_html(500, "Internal Server Error", NULL);
575 	puts("<p>Internal Server Error</p>");
576 	resp_end_html();
577 }
578 
579 static void
580 pg_redirect(const struct req *req, const char *name)
581 {
582 	printf("Status: 303 See Other\r\n"
583 	    "Location: /");
584 	if (*scriptname != '\0')
585 		printf("%s/", scriptname);
586 	if (strcmp(req->q.manpath, req->p[0]))
587 		printf("%s/", req->q.manpath);
588 	if (req->q.arch != NULL)
589 		printf("%s/", req->q.arch);
590 	http_encode(name);
591 	if (req->q.sec != NULL) {
592 		putchar('.');
593 		http_encode(req->q.sec);
594 	}
595 	printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
596 }
597 
598 static void
599 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
600 {
601 	char		*arch, *archend;
602 	const char	*sec;
603 	size_t		 i, iuse;
604 	int		 archprio, archpriouse;
605 	int		 prio, priouse;
606 
607 	for (i = 0; i < sz; i++) {
608 		if (validate_filename(r[i].file))
609 			continue;
610 		warnx("invalid filename %s in %s database",
611 		    r[i].file, req->q.manpath);
612 		pg_error_internal();
613 		return;
614 	}
615 
616 	if (req->isquery && sz == 1) {
617 		/*
618 		 * If we have just one result, then jump there now
619 		 * without any delay.
620 		 */
621 		printf("Status: 303 See Other\r\n"
622 		    "Location: /");
623 		if (*scriptname != '\0')
624 			printf("%s/", scriptname);
625 		if (strcmp(req->q.manpath, req->p[0]))
626 			printf("%s/", req->q.manpath);
627 		printf("%s\r\n"
628 		    "Content-Type: text/html; charset=utf-8\r\n\r\n",
629 		    r[0].file);
630 		return;
631 	}
632 
633 	/*
634 	 * In man(1) mode, show one of the pages
635 	 * even if more than one is found.
636 	 */
637 
638 	iuse = 0;
639 	if (req->q.equal || sz == 1) {
640 		priouse = 20;
641 		archpriouse = 3;
642 		for (i = 0; i < sz; i++) {
643 			sec = r[i].file;
644 			sec += strcspn(sec, "123456789");
645 			if (sec[0] == '\0')
646 				continue;
647 			prio = sec_prios[sec[0] - '1'];
648 			if (sec[1] != '/')
649 				prio += 10;
650 			if (req->q.arch == NULL) {
651 				archprio =
652 				    ((arch = strchr(sec + 1, '/'))
653 					== NULL) ? 3 :
654 				    ((archend = strchr(arch + 1, '/'))
655 					== NULL) ? 0 :
656 				    strncmp(arch, "amd64/",
657 					archend - arch) ? 2 : 1;
658 				if (archprio < archpriouse) {
659 					archpriouse = archprio;
660 					priouse = prio;
661 					iuse = i;
662 					continue;
663 				}
664 				if (archprio > archpriouse)
665 					continue;
666 			}
667 			if (prio >= priouse)
668 				continue;
669 			priouse = prio;
670 			iuse = i;
671 		}
672 		resp_begin_html(200, NULL, r[iuse].file);
673 	} else
674 		resp_begin_html(200, NULL, NULL);
675 
676 	resp_searchform(req,
677 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
678 
679 	if (sz > 1) {
680 		puts("<table class=\"results\">");
681 		for (i = 0; i < sz; i++) {
682 			printf("  <tr>\n"
683 			       "    <td>"
684 			       "<a class=\"Xr\" href=\"/");
685 			if (*scriptname != '\0')
686 				printf("%s/", scriptname);
687 			if (strcmp(req->q.manpath, req->p[0]))
688 				printf("%s/", req->q.manpath);
689 			printf("%s\">", r[i].file);
690 			html_print(r[i].names);
691 			printf("</a></td>\n"
692 			       "    <td><span class=\"Nd\">");
693 			html_print(r[i].output);
694 			puts("</span></td>\n"
695 			     "  </tr>");
696 		}
697 		puts("</table>");
698 	}
699 
700 	if (req->q.equal || sz == 1) {
701 		puts("<hr>");
702 		resp_show(req, r[iuse].file);
703 	}
704 
705 	resp_end_html();
706 }
707 
708 static void
709 resp_catman(const struct req *req, const char *file)
710 {
711 	FILE		*f;
712 	char		*p;
713 	size_t		 sz;
714 	ssize_t		 len;
715 	int		 i;
716 	int		 italic, bold;
717 
718 	if ((f = fopen(file, "r")) == NULL) {
719 		puts("<p>You specified an invalid manual file.</p>");
720 		return;
721 	}
722 
723 	puts("<div class=\"catman\">\n"
724 	     "<pre>");
725 
726 	p = NULL;
727 	sz = 0;
728 
729 	while ((len = getline(&p, &sz, f)) != -1) {
730 		bold = italic = 0;
731 		for (i = 0; i < len - 1; i++) {
732 			/*
733 			 * This means that the catpage is out of state.
734 			 * Ignore it and keep going (although the
735 			 * catpage is bogus).
736 			 */
737 
738 			if ('\b' == p[i] || '\n' == p[i])
739 				continue;
740 
741 			/*
742 			 * Print a regular character.
743 			 * Close out any bold/italic scopes.
744 			 * If we're in back-space mode, make sure we'll
745 			 * have something to enter when we backspace.
746 			 */
747 
748 			if ('\b' != p[i + 1]) {
749 				if (italic)
750 					printf("</i>");
751 				if (bold)
752 					printf("</b>");
753 				italic = bold = 0;
754 				html_putchar(p[i]);
755 				continue;
756 			} else if (i + 2 >= len)
757 				continue;
758 
759 			/* Italic mode. */
760 
761 			if ('_' == p[i]) {
762 				if (bold)
763 					printf("</b>");
764 				if ( ! italic)
765 					printf("<i>");
766 				bold = 0;
767 				italic = 1;
768 				i += 2;
769 				html_putchar(p[i]);
770 				continue;
771 			}
772 
773 			/*
774 			 * Handle funny behaviour troff-isms.
775 			 * These grok'd from the original man2html.c.
776 			 */
777 
778 			if (('+' == p[i] && 'o' == p[i + 2]) ||
779 					('o' == p[i] && '+' == p[i + 2]) ||
780 					('|' == p[i] && '=' == p[i + 2]) ||
781 					('=' == p[i] && '|' == p[i + 2]) ||
782 					('*' == p[i] && '=' == p[i + 2]) ||
783 					('=' == p[i] && '*' == p[i + 2]) ||
784 					('*' == p[i] && '|' == p[i + 2]) ||
785 					('|' == p[i] && '*' == p[i + 2]))  {
786 				if (italic)
787 					printf("</i>");
788 				if (bold)
789 					printf("</b>");
790 				italic = bold = 0;
791 				putchar('*');
792 				i += 2;
793 				continue;
794 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
795 					('-' == p[i] && '|' == p[i + 1]) ||
796 					('+' == p[i] && '-' == p[i + 1]) ||
797 					('-' == p[i] && '+' == p[i + 1]) ||
798 					('+' == p[i] && '|' == p[i + 1]) ||
799 					('|' == p[i] && '+' == p[i + 1]))  {
800 				if (italic)
801 					printf("</i>");
802 				if (bold)
803 					printf("</b>");
804 				italic = bold = 0;
805 				putchar('+');
806 				i += 2;
807 				continue;
808 			}
809 
810 			/* Bold mode. */
811 
812 			if (italic)
813 				printf("</i>");
814 			if ( ! bold)
815 				printf("<b>");
816 			bold = 1;
817 			italic = 0;
818 			i += 2;
819 			html_putchar(p[i]);
820 		}
821 
822 		/*
823 		 * Clean up the last character.
824 		 * We can get to a newline; don't print that.
825 		 */
826 
827 		if (italic)
828 			printf("</i>");
829 		if (bold)
830 			printf("</b>");
831 
832 		if (i == len - 1 && p[i] != '\n')
833 			html_putchar(p[i]);
834 
835 		putchar('\n');
836 	}
837 	free(p);
838 
839 	puts("</pre>\n"
840 	     "</div>");
841 
842 	fclose(f);
843 }
844 
845 static void
846 resp_format(const struct req *req, const char *file)
847 {
848 	struct manoutput conf;
849 	struct mparse	*mp;
850 	struct roff_meta *meta;
851 	void		*vp;
852 	int		 fd;
853 	int		 usepath;
854 
855 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
856 		puts("<p>You specified an invalid manual file.</p>");
857 		return;
858 	}
859 
860 	mchars_alloc();
861 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
862 	    MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
863 	mparse_readfd(mp, fd, file);
864 	close(fd);
865 	meta = mparse_result(mp);
866 
867 	memset(&conf, 0, sizeof(conf));
868 	conf.fragment = 1;
869 	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
870 	usepath = strcmp(req->q.manpath, req->p[0]);
871 	mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
872 	    scriptname, *scriptname == '\0' ? "" : "/",
873 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
874 
875 	vp = html_alloc(&conf);
876 	if (meta->macroset == MACROSET_MDOC)
877 		html_mdoc(vp, meta);
878 	else
879 		html_man(vp, meta);
880 
881 	html_free(vp);
882 	mparse_free(mp);
883 	mchars_free();
884 	free(conf.man);
885 	free(conf.style);
886 }
887 
888 static void
889 resp_show(const struct req *req, const char *file)
890 {
891 
892 	if ('.' == file[0] && '/' == file[1])
893 		file += 2;
894 
895 	if ('c' == *file)
896 		resp_catman(req, file);
897 	else
898 		resp_format(req, file);
899 }
900 
901 static void
902 pg_show(struct req *req, const char *fullpath)
903 {
904 	char		*manpath;
905 	const char	*file;
906 
907 	if ((file = strchr(fullpath, '/')) == NULL) {
908 		pg_error_badrequest(
909 		    "You did not specify a page to show.");
910 		return;
911 	}
912 	manpath = mandoc_strndup(fullpath, file - fullpath);
913 	file++;
914 
915 	if ( ! validate_manpath(req, manpath)) {
916 		pg_error_badrequest(
917 		    "You specified an invalid manpath.");
918 		free(manpath);
919 		return;
920 	}
921 
922 	/*
923 	 * Begin by chdir()ing into the manpath.
924 	 * This way we can pick up the database files, which are
925 	 * relative to the manpath root.
926 	 */
927 
928 	if (chdir(manpath) == -1) {
929 		warn("chdir %s", manpath);
930 		pg_error_internal();
931 		free(manpath);
932 		return;
933 	}
934 	free(manpath);
935 
936 	if ( ! validate_filename(file)) {
937 		pg_error_badrequest(
938 		    "You specified an invalid manual file.");
939 		return;
940 	}
941 
942 	resp_begin_html(200, NULL, file);
943 	resp_searchform(req, FOCUS_NONE);
944 	resp_show(req, file);
945 	resp_end_html();
946 }
947 
948 static void
949 pg_search(const struct req *req)
950 {
951 	struct mansearch	  search;
952 	struct manpaths		  paths;
953 	struct manpage		 *res;
954 	char			**argv;
955 	char			 *query, *rp, *wp;
956 	size_t			  ressz;
957 	int			  argc;
958 
959 	/*
960 	 * Begin by chdir()ing into the root of the manpath.
961 	 * This way we can pick up the database files, which are
962 	 * relative to the manpath root.
963 	 */
964 
965 	if (chdir(req->q.manpath) == -1) {
966 		warn("chdir %s", req->q.manpath);
967 		pg_error_internal();
968 		return;
969 	}
970 
971 	search.arch = req->q.arch;
972 	search.sec = req->q.sec;
973 	search.outkey = "Nd";
974 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
975 	search.firstmatch = 1;
976 
977 	paths.sz = 1;
978 	paths.paths = mandoc_malloc(sizeof(char *));
979 	paths.paths[0] = mandoc_strdup(".");
980 
981 	/*
982 	 * Break apart at spaces with backslash-escaping.
983 	 */
984 
985 	argc = 0;
986 	argv = NULL;
987 	rp = query = mandoc_strdup(req->q.query);
988 	for (;;) {
989 		while (isspace((unsigned char)*rp))
990 			rp++;
991 		if (*rp == '\0')
992 			break;
993 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
994 		argv[argc++] = wp = rp;
995 		for (;;) {
996 			if (isspace((unsigned char)*rp)) {
997 				*wp = '\0';
998 				rp++;
999 				break;
1000 			}
1001 			if (rp[0] == '\\' && rp[1] != '\0')
1002 				rp++;
1003 			if (wp != rp)
1004 				*wp = *rp;
1005 			if (*rp == '\0')
1006 				break;
1007 			wp++;
1008 			rp++;
1009 		}
1010 	}
1011 
1012 	res = NULL;
1013 	ressz = 0;
1014 	if (req->isquery && req->q.equal && argc == 1)
1015 		pg_redirect(req, argv[0]);
1016 	else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1017 		pg_noresult(req, 400, "Bad Request",
1018 		    "You entered an invalid query.");
1019 	else if (ressz == 0)
1020 		pg_noresult(req, 404, "Not Found", "No results found.");
1021 	else
1022 		pg_searchres(req, res, ressz);
1023 
1024 	free(query);
1025 	mansearch_free(res, ressz);
1026 	free(paths.paths[0]);
1027 	free(paths.paths);
1028 }
1029 
1030 int
1031 main(void)
1032 {
1033 	struct req	 req;
1034 	struct itimerval itimer;
1035 	const char	*path;
1036 	const char	*querystring;
1037 	int		 i;
1038 
1039 	/*
1040 	 * The "rpath" pledge could be revoked after mparse_readfd()
1041 	 * if the file desciptor to "/footer.html" would be opened
1042 	 * up front, but it's probably not worth the complication
1043 	 * of the code it would cause: it would require scattering
1044 	 * pledge() calls in multiple low-level resp_*() functions.
1045 	 */
1046 
1047 	if (pledge("stdio rpath", NULL) == -1) {
1048 		warn("pledge");
1049 		pg_error_internal();
1050 		return EXIT_FAILURE;
1051 	}
1052 
1053 	/* Poor man's ReDoS mitigation. */
1054 
1055 	itimer.it_value.tv_sec = 2;
1056 	itimer.it_value.tv_usec = 0;
1057 	itimer.it_interval.tv_sec = 2;
1058 	itimer.it_interval.tv_usec = 0;
1059 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1060 		warn("setitimer");
1061 		pg_error_internal();
1062 		return EXIT_FAILURE;
1063 	}
1064 
1065 	/*
1066 	 * First we change directory into the MAN_DIR so that
1067 	 * subsequent scanning for manpath directories is rooted
1068 	 * relative to the same position.
1069 	 */
1070 
1071 	if (chdir(MAN_DIR) == -1) {
1072 		warn("MAN_DIR: %s", MAN_DIR);
1073 		pg_error_internal();
1074 		return EXIT_FAILURE;
1075 	}
1076 
1077 	memset(&req, 0, sizeof(struct req));
1078 	req.q.equal = 1;
1079 	parse_manpath_conf(&req);
1080 
1081 	/* Parse the path info and the query string. */
1082 
1083 	if ((path = getenv("PATH_INFO")) == NULL)
1084 		path = "";
1085 	else if (*path == '/')
1086 		path++;
1087 
1088 	if (*path != '\0') {
1089 		parse_path_info(&req, path);
1090 		if (req.q.manpath == NULL || req.q.sec == NULL ||
1091 		    *req.q.query == '\0' || access(path, F_OK) == -1)
1092 			path = "";
1093 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1094 		parse_query_string(&req, querystring);
1095 
1096 	/* Validate parsed data and add defaults. */
1097 
1098 	if (req.q.manpath == NULL)
1099 		req.q.manpath = mandoc_strdup(req.p[0]);
1100 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1101 		pg_error_badrequest(
1102 		    "You specified an invalid manpath.");
1103 		return EXIT_FAILURE;
1104 	}
1105 
1106 	if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1107 		pg_error_badrequest(
1108 		    "You specified an invalid architecture.");
1109 		return EXIT_FAILURE;
1110 	}
1111 
1112 	/* Dispatch to the three different pages. */
1113 
1114 	if ('\0' != *path)
1115 		pg_show(&req, path);
1116 	else if (NULL != req.q.query)
1117 		pg_search(&req);
1118 	else
1119 		pg_index(&req);
1120 
1121 	free(req.q.manpath);
1122 	free(req.q.arch);
1123 	free(req.q.sec);
1124 	free(req.q.query);
1125 	for (i = 0; i < (int)req.psz; i++)
1126 		free(req.p[i]);
1127 	free(req.p);
1128 	return EXIT_SUCCESS;
1129 }
1130 
1131 /*
1132  * Translate PATH_INFO to a query.
1133  */
1134 static void
1135 parse_path_info(struct req *req, const char *path)
1136 {
1137 	const char	*name, *sec, *end;
1138 
1139 	req->isquery = 0;
1140 	req->q.equal = 1;
1141 	req->q.manpath = NULL;
1142 	req->q.arch = NULL;
1143 
1144 	/* Mandatory manual page name. */
1145 	if ((name = strrchr(path, '/')) == NULL)
1146 		name = path;
1147 	else
1148 		name++;
1149 
1150 	/* Optional trailing section. */
1151 	sec = strrchr(name, '.');
1152 	if (sec != NULL && isdigit((unsigned char)*++sec)) {
1153 		req->q.query = mandoc_strndup(name, sec - name - 1);
1154 		req->q.sec = mandoc_strdup(sec);
1155 	} else {
1156 		req->q.query = mandoc_strdup(name);
1157 		req->q.sec = NULL;
1158 	}
1159 
1160 	/* Handle the case of name[.section] only. */
1161 	if (name == path)
1162 		return;
1163 
1164 	/* Optional manpath. */
1165 	end = strchr(path, '/');
1166 	req->q.manpath = mandoc_strndup(path, end - path);
1167 	if (validate_manpath(req, req->q.manpath)) {
1168 		path = end + 1;
1169 		if (name == path)
1170 			return;
1171 	} else {
1172 		free(req->q.manpath);
1173 		req->q.manpath = NULL;
1174 	}
1175 
1176 	/* Optional section. */
1177 	if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1178 		path += 3;
1179 		end = strchr(path, '/');
1180 		free(req->q.sec);
1181 		req->q.sec = mandoc_strndup(path, end - path);
1182 		path = end + 1;
1183 		if (name == path)
1184 			return;
1185 	}
1186 
1187 	/* Optional architecture. */
1188 	end = strchr(path, '/');
1189 	if (end + 1 != name) {
1190 		pg_error_badrequest(
1191 		    "You specified too many directory components.");
1192 		exit(EXIT_FAILURE);
1193 	}
1194 	req->q.arch = mandoc_strndup(path, end - path);
1195 	if (validate_arch(req->q.arch) == 0) {
1196 		pg_error_badrequest(
1197 		    "You specified an invalid directory component.");
1198 		exit(EXIT_FAILURE);
1199 	}
1200 }
1201 
1202 /*
1203  * Scan for indexable paths.
1204  */
1205 static void
1206 parse_manpath_conf(struct req *req)
1207 {
1208 	FILE	*fp;
1209 	char	*dp;
1210 	size_t	 dpsz;
1211 	ssize_t	 len;
1212 
1213 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1214 		warn("%s/manpath.conf", MAN_DIR);
1215 		pg_error_internal();
1216 		exit(EXIT_FAILURE);
1217 	}
1218 
1219 	dp = NULL;
1220 	dpsz = 0;
1221 
1222 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1223 		if (dp[len - 1] == '\n')
1224 			dp[--len] = '\0';
1225 		req->p = mandoc_realloc(req->p,
1226 		    (req->psz + 1) * sizeof(char *));
1227 		if ( ! validate_urifrag(dp)) {
1228 			warnx("%s/manpath.conf contains "
1229 			    "unsafe path \"%s\"", MAN_DIR, dp);
1230 			pg_error_internal();
1231 			exit(EXIT_FAILURE);
1232 		}
1233 		if (strchr(dp, '/') != NULL) {
1234 			warnx("%s/manpath.conf contains "
1235 			    "path with slash \"%s\"", MAN_DIR, dp);
1236 			pg_error_internal();
1237 			exit(EXIT_FAILURE);
1238 		}
1239 		req->p[req->psz++] = dp;
1240 		dp = NULL;
1241 		dpsz = 0;
1242 	}
1243 	free(dp);
1244 
1245 	if (req->p == NULL) {
1246 		warnx("%s/manpath.conf is empty", MAN_DIR);
1247 		pg_error_internal();
1248 		exit(EXIT_FAILURE);
1249 	}
1250 }
1251