xref: /openbsd-src/usr.bin/mandoc/cgi.c (revision 5a38ef86d0b61900239c7913d24a05e7b88a58f0)
1 /* $OpenBSD: cgi.c,v 1.115 2021/10/24 21:24:16 deraadt Exp $ */
2 /*
3  * Copyright (c) 2014-2019, 2021 Ingo Schwarze <schwarze@usta.de>
4  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * Implementation of the man.cgi(8) program.
19  */
20 #include <sys/types.h>
21 #include <sys/time.h>
22 
23 #include <ctype.h>
24 #include <err.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <limits.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33 
34 #include "mandoc_aux.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mdoc.h"
38 #include "man.h"
39 #include "mandoc_parse.h"
40 #include "main.h"
41 #include "manconf.h"
42 #include "mansearch.h"
43 #include "cgi.h"
44 
45 /*
46  * A query as passed to the search function.
47  */
48 struct	query {
49 	char		*manpath; /* desired manual directory */
50 	char		*arch; /* architecture */
51 	char		*sec; /* manual section */
52 	char		*query; /* unparsed query expression */
53 	int		 equal; /* match whole names, not substrings */
54 };
55 
56 struct	req {
57 	struct query	  q;
58 	char		**p; /* array of available manpaths */
59 	size_t		  psz; /* number of available manpaths */
60 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
61 };
62 
63 enum	focus {
64 	FOCUS_NONE = 0,
65 	FOCUS_QUERY
66 };
67 
68 static	void		 html_print(const char *);
69 static	void		 html_putchar(char);
70 static	int		 http_decode(char *);
71 static	void		 http_encode(const char *);
72 static	void		 parse_manpath_conf(struct req *);
73 static	void		 parse_path_info(struct req *, const char *);
74 static	void		 parse_query_string(struct req *, const char *);
75 static	void		 pg_error_badrequest(const char *);
76 static	void		 pg_error_internal(void);
77 static	void		 pg_index(const struct req *);
78 static	void		 pg_noresult(const struct req *, int, const char *,
79 				const char *);
80 static	void		 pg_redirect(const struct req *, const char *);
81 static	void		 pg_search(const struct req *);
82 static	void		 pg_searchres(const struct req *,
83 				struct manpage *, size_t);
84 static	void		 pg_show(struct req *, const char *);
85 static	void		 resp_begin_html(int, const char *, const char *);
86 static	void		 resp_begin_http(int, const char *);
87 static	void		 resp_catman(const struct req *, const char *);
88 static	void		 resp_copy(const char *);
89 static	void		 resp_end_html(void);
90 static	void		 resp_format(const struct req *, const char *);
91 static	void		 resp_searchform(const struct req *, enum focus);
92 static	void		 resp_show(const struct req *, const char *);
93 static	void		 set_query_attr(char **, char **);
94 static	int		 validate_arch(const char *);
95 static	int		 validate_filename(const char *);
96 static	int		 validate_manpath(const struct req *, const char *);
97 static	int		 validate_urifrag(const char *);
98 
99 static	const char	 *scriptname = SCRIPT_NAME;
100 
101 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
102 static	const char *const sec_numbers[] = {
103     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
104 };
105 static	const char *const sec_names[] = {
106     "All Sections",
107     "1 - General Commands",
108     "2 - System Calls",
109     "3 - Library Functions",
110     "3p - Perl Library",
111     "4 - Device Drivers",
112     "5 - File Formats",
113     "6 - Games",
114     "7 - Miscellaneous Information",
115     "8 - System Manager\'s Manual",
116     "9 - Kernel Developer\'s Manual"
117 };
118 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
119 
120 static	const char *const arch_names[] = {
121     "amd64",       "alpha",       "armv7",       "arm64",
122     "hppa",        "i386",        "landisk",     "loongson",
123     "luna88k",     "macppc",      "mips64",      "octeon",
124     "powerpc64",   "riscv64",     "sparc64",
125 
126     "amiga",       "arc",         "armish",      "arm32",
127     "atari",       "aviion",      "beagle",      "cats",
128     "hppa64",      "hp300",
129     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
130     "mvmeppc",     "palm",        "pc532",       "pegasos",
131     "pmax",        "powerpc",     "sgi",         "socppc",
132     "solbourne",   "sparc",
133     "sun3",        "vax",         "wgrisc",      "x68k",
134     "zaurus"
135 };
136 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
137 
138 /*
139  * Print a character, escaping HTML along the way.
140  * This will pass non-ASCII straight to output: be warned!
141  */
142 static void
143 html_putchar(char c)
144 {
145 
146 	switch (c) {
147 	case '"':
148 		printf("&quot;");
149 		break;
150 	case '&':
151 		printf("&amp;");
152 		break;
153 	case '>':
154 		printf("&gt;");
155 		break;
156 	case '<':
157 		printf("&lt;");
158 		break;
159 	default:
160 		putchar((unsigned char)c);
161 		break;
162 	}
163 }
164 
165 /*
166  * Call through to html_putchar().
167  * Accepts NULL strings.
168  */
169 static void
170 html_print(const char *p)
171 {
172 
173 	if (NULL == p)
174 		return;
175 	while ('\0' != *p)
176 		html_putchar(*p++);
177 }
178 
179 /*
180  * Transfer the responsibility for the allocated string *val
181  * to the query structure.
182  */
183 static void
184 set_query_attr(char **attr, char **val)
185 {
186 
187 	free(*attr);
188 	if (**val == '\0') {
189 		*attr = NULL;
190 		free(*val);
191 	} else
192 		*attr = *val;
193 	*val = NULL;
194 }
195 
196 /*
197  * Parse the QUERY_STRING for key-value pairs
198  * and store the values into the query structure.
199  */
200 static void
201 parse_query_string(struct req *req, const char *qs)
202 {
203 	char		*key, *val;
204 	size_t		 keysz, valsz;
205 
206 	req->isquery	= 1;
207 	req->q.manpath	= NULL;
208 	req->q.arch	= NULL;
209 	req->q.sec	= NULL;
210 	req->q.query	= NULL;
211 	req->q.equal	= 1;
212 
213 	key = val = NULL;
214 	while (*qs != '\0') {
215 
216 		/* Parse one key. */
217 
218 		keysz = strcspn(qs, "=;&");
219 		key = mandoc_strndup(qs, keysz);
220 		qs += keysz;
221 		if (*qs != '=')
222 			goto next;
223 
224 		/* Parse one value. */
225 
226 		valsz = strcspn(++qs, ";&");
227 		val = mandoc_strndup(qs, valsz);
228 		qs += valsz;
229 
230 		/* Decode and catch encoding errors. */
231 
232 		if ( ! (http_decode(key) && http_decode(val)))
233 			goto next;
234 
235 		/* Handle key-value pairs. */
236 
237 		if ( ! strcmp(key, "query"))
238 			set_query_attr(&req->q.query, &val);
239 
240 		else if ( ! strcmp(key, "apropos"))
241 			req->q.equal = !strcmp(val, "0");
242 
243 		else if ( ! strcmp(key, "manpath")) {
244 #ifdef COMPAT_OLDURI
245 			if ( ! strncmp(val, "OpenBSD ", 8)) {
246 				val[7] = '-';
247 				if ('C' == val[8])
248 					val[8] = 'c';
249 			}
250 #endif
251 			set_query_attr(&req->q.manpath, &val);
252 		}
253 
254 		else if ( ! (strcmp(key, "sec")
255 #ifdef COMPAT_OLDURI
256 		    && strcmp(key, "sektion")
257 #endif
258 		    )) {
259 			if ( ! strcmp(val, "0"))
260 				*val = '\0';
261 			set_query_attr(&req->q.sec, &val);
262 		}
263 
264 		else if ( ! strcmp(key, "arch")) {
265 			if ( ! strcmp(val, "default"))
266 				*val = '\0';
267 			set_query_attr(&req->q.arch, &val);
268 		}
269 
270 		/*
271 		 * The key must be freed in any case.
272 		 * The val may have been handed over to the query
273 		 * structure, in which case it is now NULL.
274 		 */
275 next:
276 		free(key);
277 		key = NULL;
278 		free(val);
279 		val = NULL;
280 
281 		if (*qs != '\0')
282 			qs++;
283 	}
284 }
285 
286 /*
287  * HTTP-decode a string.  The standard explanation is that this turns
288  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
289  * over the allocated string.
290  */
291 static int
292 http_decode(char *p)
293 {
294 	char             hex[3];
295 	char		*q;
296 	int              c;
297 
298 	hex[2] = '\0';
299 
300 	q = p;
301 	for ( ; '\0' != *p; p++, q++) {
302 		if ('%' == *p) {
303 			if ('\0' == (hex[0] = *(p + 1)))
304 				return 0;
305 			if ('\0' == (hex[1] = *(p + 2)))
306 				return 0;
307 			if (1 != sscanf(hex, "%x", &c))
308 				return 0;
309 			if ('\0' == c)
310 				return 0;
311 
312 			*q = (char)c;
313 			p += 2;
314 		} else
315 			*q = '+' == *p ? ' ' : *p;
316 	}
317 
318 	*q = '\0';
319 	return 1;
320 }
321 
322 static void
323 http_encode(const char *p)
324 {
325 	for (; *p != '\0'; p++) {
326 		if (isalnum((unsigned char)*p) == 0 &&
327 		    strchr("-._~", *p) == NULL)
328 			printf("%%%2.2X", (unsigned char)*p);
329 		else
330 			putchar(*p);
331 	}
332 }
333 
334 static void
335 resp_begin_http(int code, const char *msg)
336 {
337 
338 	if (200 != code)
339 		printf("Status: %d %s\r\n", code, msg);
340 
341 	printf("Content-Type: text/html; charset=utf-8\r\n"
342 	     "Cache-Control: no-cache\r\n"
343 	     "Content-Security-Policy: default-src 'none'; "
344 	     "style-src 'self' 'unsafe-inline'\r\n"
345 	     "Pragma: no-cache\r\n"
346 	     "\r\n");
347 
348 	fflush(stdout);
349 }
350 
351 static void
352 resp_copy(const char *filename)
353 {
354 	char	 buf[4096];
355 	ssize_t	 sz;
356 	int	 fd;
357 
358 	if ((fd = open(filename, O_RDONLY)) != -1) {
359 		fflush(stdout);
360 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
361 			write(STDOUT_FILENO, buf, sz);
362 		close(fd);
363 	}
364 }
365 
366 static void
367 resp_begin_html(int code, const char *msg, const char *file)
368 {
369 	const char	*name, *sec, *cp;
370 	int		 namesz, secsz;
371 
372 	resp_begin_http(code, msg);
373 
374 	printf("<!DOCTYPE html>\n"
375 	       "<html>\n"
376 	       "<head>\n"
377 	       "  <meta charset=\"UTF-8\"/>\n"
378 	       "  <meta name=\"viewport\""
379 		      " content=\"width=device-width, initial-scale=1.0\">\n"
380 	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
381 	       " type=\"text/css\" media=\"all\">\n"
382 	       "  <title>",
383 	       CSS_DIR);
384 	if (file != NULL) {
385 		cp = strrchr(file, '/');
386 		name = cp == NULL ? file : cp + 1;
387 		cp = strrchr(name, '.');
388 		namesz = cp == NULL ? strlen(name) : cp - name;
389 		sec = NULL;
390 		if (cp != NULL && cp[1] != '0') {
391 			sec = cp + 1;
392 			secsz = strlen(sec);
393 		} else if (name - file > 1) {
394 			for (cp = name - 2; cp >= file; cp--) {
395 				if (*cp < '1' || *cp > '9')
396 					continue;
397 				sec = cp;
398 				secsz = name - cp - 1;
399 				break;
400 			}
401 		}
402 		printf("%.*s", namesz, name);
403 		if (sec != NULL)
404 			printf("(%.*s)", secsz, sec);
405 		fputs(" - ", stdout);
406 	}
407 	printf("%s</title>\n"
408 	       "</head>\n"
409 	       "<body>\n",
410 	       CUSTOMIZE_TITLE);
411 
412 	resp_copy(MAN_DIR "/header.html");
413 }
414 
415 static void
416 resp_end_html(void)
417 {
418 
419 	resp_copy(MAN_DIR "/footer.html");
420 
421 	puts("</body>\n"
422 	     "</html>");
423 }
424 
425 static void
426 resp_searchform(const struct req *req, enum focus focus)
427 {
428 	int		 i;
429 
430 	printf("<form action=\"/%s\" method=\"get\" "
431 	       "autocomplete=\"off\" autocapitalize=\"none\">\n"
432 	       "  <fieldset>\n"
433 	       "    <legend>Manual Page Search Parameters</legend>\n",
434 	       scriptname);
435 
436 	/* Write query input box. */
437 
438 	printf("    <input type=\"search\" name=\"query\" value=\"");
439 	if (req->q.query != NULL)
440 		html_print(req->q.query);
441 	printf( "\" size=\"40\"");
442 	if (focus == FOCUS_QUERY)
443 		printf(" autofocus");
444 	puts(">");
445 
446 	/* Write submission buttons. */
447 
448 	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
449 		"man</button>\n"
450 		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
451 		"apropos</button>\n"
452 		"    <br/>\n");
453 
454 	/* Write section selector. */
455 
456 	puts("    <select name=\"sec\">");
457 	for (i = 0; i < sec_MAX; i++) {
458 		printf("      <option value=\"%s\"", sec_numbers[i]);
459 		if (NULL != req->q.sec &&
460 		    0 == strcmp(sec_numbers[i], req->q.sec))
461 			printf(" selected=\"selected\"");
462 		printf(">%s</option>\n", sec_names[i]);
463 	}
464 	puts("    </select>");
465 
466 	/* Write architecture selector. */
467 
468 	printf(	"    <select name=\"arch\">\n"
469 		"      <option value=\"default\"");
470 	if (NULL == req->q.arch)
471 		printf(" selected=\"selected\"");
472 	puts(">All Architectures</option>");
473 	for (i = 0; i < arch_MAX; i++) {
474 		printf("      <option");
475 		if (NULL != req->q.arch &&
476 		    0 == strcmp(arch_names[i], req->q.arch))
477 			printf(" selected=\"selected\"");
478 		printf(">%s</option>\n", arch_names[i]);
479 	}
480 	puts("    </select>");
481 
482 	/* Write manpath selector. */
483 
484 	if (req->psz > 1) {
485 		puts("    <select name=\"manpath\">");
486 		for (i = 0; i < (int)req->psz; i++) {
487 			printf("      <option");
488 			if (strcmp(req->q.manpath, req->p[i]) == 0)
489 				printf(" selected=\"selected\"");
490 			printf(">");
491 			html_print(req->p[i]);
492 			puts("</option>");
493 		}
494 		puts("    </select>");
495 	}
496 
497 	puts("  </fieldset>\n"
498 	     "</form>");
499 }
500 
501 static int
502 validate_urifrag(const char *frag)
503 {
504 
505 	while ('\0' != *frag) {
506 		if ( ! (isalnum((unsigned char)*frag) ||
507 		    '-' == *frag || '.' == *frag ||
508 		    '/' == *frag || '_' == *frag))
509 			return 0;
510 		frag++;
511 	}
512 	return 1;
513 }
514 
515 static int
516 validate_manpath(const struct req *req, const char* manpath)
517 {
518 	size_t	 i;
519 
520 	for (i = 0; i < req->psz; i++)
521 		if ( ! strcmp(manpath, req->p[i]))
522 			return 1;
523 
524 	return 0;
525 }
526 
527 static int
528 validate_arch(const char *arch)
529 {
530 	int	 i;
531 
532 	for (i = 0; i < arch_MAX; i++)
533 		if (strcmp(arch, arch_names[i]) == 0)
534 			return 1;
535 
536 	return 0;
537 }
538 
539 static int
540 validate_filename(const char *file)
541 {
542 
543 	if ('.' == file[0] && '/' == file[1])
544 		file += 2;
545 
546 	return ! (strstr(file, "../") || strstr(file, "/..") ||
547 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
548 }
549 
550 static void
551 pg_index(const struct req *req)
552 {
553 
554 	resp_begin_html(200, NULL, NULL);
555 	resp_searchform(req, FOCUS_QUERY);
556 	printf("<p>\n"
557 	       "This web interface is documented in the\n"
558 	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
559 	       "manual, and the\n"
560 	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
561 	       "manual explains the query syntax.\n"
562 	       "</p>\n",
563 	       scriptname, *scriptname == '\0' ? "" : "/",
564 	       scriptname, *scriptname == '\0' ? "" : "/");
565 	resp_end_html();
566 }
567 
568 static void
569 pg_noresult(const struct req *req, int code, const char *http_msg,
570     const char *user_msg)
571 {
572 	resp_begin_html(code, http_msg, NULL);
573 	resp_searchform(req, FOCUS_QUERY);
574 	puts("<p>");
575 	puts(user_msg);
576 	puts("</p>");
577 	resp_end_html();
578 }
579 
580 static void
581 pg_error_badrequest(const char *msg)
582 {
583 
584 	resp_begin_html(400, "Bad Request", NULL);
585 	puts("<h1>Bad Request</h1>\n"
586 	     "<p>\n");
587 	puts(msg);
588 	printf("Try again from the\n"
589 	       "<a href=\"/%s\">main page</a>.\n"
590 	       "</p>", scriptname);
591 	resp_end_html();
592 }
593 
594 static void
595 pg_error_internal(void)
596 {
597 	resp_begin_html(500, "Internal Server Error", NULL);
598 	puts("<p>Internal Server Error</p>");
599 	resp_end_html();
600 }
601 
602 static void
603 pg_redirect(const struct req *req, const char *name)
604 {
605 	printf("Status: 303 See Other\r\n"
606 	    "Location: /");
607 	if (*scriptname != '\0')
608 		printf("%s/", scriptname);
609 	if (strcmp(req->q.manpath, req->p[0]))
610 		printf("%s/", req->q.manpath);
611 	if (req->q.arch != NULL)
612 		printf("%s/", req->q.arch);
613 	http_encode(name);
614 	if (req->q.sec != NULL) {
615 		putchar('.');
616 		http_encode(req->q.sec);
617 	}
618 	printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
619 }
620 
621 static void
622 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
623 {
624 	char		*arch, *archend;
625 	const char	*sec;
626 	size_t		 i, iuse;
627 	int		 archprio, archpriouse;
628 	int		 prio, priouse;
629 
630 	for (i = 0; i < sz; i++) {
631 		if (validate_filename(r[i].file))
632 			continue;
633 		warnx("invalid filename %s in %s database",
634 		    r[i].file, req->q.manpath);
635 		pg_error_internal();
636 		return;
637 	}
638 
639 	if (req->isquery && sz == 1) {
640 		/*
641 		 * If we have just one result, then jump there now
642 		 * without any delay.
643 		 */
644 		printf("Status: 303 See Other\r\n"
645 		    "Location: /");
646 		if (*scriptname != '\0')
647 			printf("%s/", scriptname);
648 		if (strcmp(req->q.manpath, req->p[0]))
649 			printf("%s/", req->q.manpath);
650 		printf("%s\r\n"
651 		    "Content-Type: text/html; charset=utf-8\r\n\r\n",
652 		    r[0].file);
653 		return;
654 	}
655 
656 	/*
657 	 * In man(1) mode, show one of the pages
658 	 * even if more than one is found.
659 	 */
660 
661 	iuse = 0;
662 	if (req->q.equal || sz == 1) {
663 		priouse = 20;
664 		archpriouse = 3;
665 		for (i = 0; i < sz; i++) {
666 			sec = r[i].file;
667 			sec += strcspn(sec, "123456789");
668 			if (sec[0] == '\0')
669 				continue;
670 			prio = sec_prios[sec[0] - '1'];
671 			if (sec[1] != '/')
672 				prio += 10;
673 			if (req->q.arch == NULL) {
674 				archprio =
675 				    ((arch = strchr(sec + 1, '/'))
676 					== NULL) ? 3 :
677 				    ((archend = strchr(arch + 1, '/'))
678 					== NULL) ? 0 :
679 				    strncmp(arch, "amd64/",
680 					archend - arch) ? 2 : 1;
681 				if (archprio < archpriouse) {
682 					archpriouse = archprio;
683 					priouse = prio;
684 					iuse = i;
685 					continue;
686 				}
687 				if (archprio > archpriouse)
688 					continue;
689 			}
690 			if (prio >= priouse)
691 				continue;
692 			priouse = prio;
693 			iuse = i;
694 		}
695 		resp_begin_html(200, NULL, r[iuse].file);
696 	} else
697 		resp_begin_html(200, NULL, NULL);
698 
699 	resp_searchform(req,
700 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
701 
702 	if (sz > 1) {
703 		puts("<table class=\"results\">");
704 		for (i = 0; i < sz; i++) {
705 			printf("  <tr>\n"
706 			       "    <td>"
707 			       "<a class=\"Xr\" href=\"/");
708 			if (*scriptname != '\0')
709 				printf("%s/", scriptname);
710 			if (strcmp(req->q.manpath, req->p[0]))
711 				printf("%s/", req->q.manpath);
712 			printf("%s\">", r[i].file);
713 			html_print(r[i].names);
714 			printf("</a></td>\n"
715 			       "    <td><span class=\"Nd\">");
716 			html_print(r[i].output);
717 			puts("</span></td>\n"
718 			     "  </tr>");
719 		}
720 		puts("</table>");
721 	}
722 
723 	if (req->q.equal || sz == 1) {
724 		puts("<hr>");
725 		resp_show(req, r[iuse].file);
726 	}
727 
728 	resp_end_html();
729 }
730 
731 static void
732 resp_catman(const struct req *req, const char *file)
733 {
734 	FILE		*f;
735 	char		*p;
736 	size_t		 sz;
737 	ssize_t		 len;
738 	int		 i;
739 	int		 italic, bold;
740 
741 	if ((f = fopen(file, "r")) == NULL) {
742 		puts("<p>You specified an invalid manual file.</p>");
743 		return;
744 	}
745 
746 	puts("<div class=\"catman\">\n"
747 	     "<pre>");
748 
749 	p = NULL;
750 	sz = 0;
751 
752 	while ((len = getline(&p, &sz, f)) != -1) {
753 		bold = italic = 0;
754 		for (i = 0; i < len - 1; i++) {
755 			/*
756 			 * This means that the catpage is out of state.
757 			 * Ignore it and keep going (although the
758 			 * catpage is bogus).
759 			 */
760 
761 			if ('\b' == p[i] || '\n' == p[i])
762 				continue;
763 
764 			/*
765 			 * Print a regular character.
766 			 * Close out any bold/italic scopes.
767 			 * If we're in back-space mode, make sure we'll
768 			 * have something to enter when we backspace.
769 			 */
770 
771 			if ('\b' != p[i + 1]) {
772 				if (italic)
773 					printf("</i>");
774 				if (bold)
775 					printf("</b>");
776 				italic = bold = 0;
777 				html_putchar(p[i]);
778 				continue;
779 			} else if (i + 2 >= len)
780 				continue;
781 
782 			/* Italic mode. */
783 
784 			if ('_' == p[i]) {
785 				if (bold)
786 					printf("</b>");
787 				if ( ! italic)
788 					printf("<i>");
789 				bold = 0;
790 				italic = 1;
791 				i += 2;
792 				html_putchar(p[i]);
793 				continue;
794 			}
795 
796 			/*
797 			 * Handle funny behaviour troff-isms.
798 			 * These grok'd from the original man2html.c.
799 			 */
800 
801 			if (('+' == p[i] && 'o' == p[i + 2]) ||
802 					('o' == p[i] && '+' == p[i + 2]) ||
803 					('|' == p[i] && '=' == p[i + 2]) ||
804 					('=' == p[i] && '|' == p[i + 2]) ||
805 					('*' == p[i] && '=' == p[i + 2]) ||
806 					('=' == p[i] && '*' == p[i + 2]) ||
807 					('*' == p[i] && '|' == p[i + 2]) ||
808 					('|' == p[i] && '*' == p[i + 2]))  {
809 				if (italic)
810 					printf("</i>");
811 				if (bold)
812 					printf("</b>");
813 				italic = bold = 0;
814 				putchar('*');
815 				i += 2;
816 				continue;
817 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
818 					('-' == p[i] && '|' == p[i + 1]) ||
819 					('+' == p[i] && '-' == p[i + 1]) ||
820 					('-' == p[i] && '+' == p[i + 1]) ||
821 					('+' == p[i] && '|' == p[i + 1]) ||
822 					('|' == p[i] && '+' == p[i + 1]))  {
823 				if (italic)
824 					printf("</i>");
825 				if (bold)
826 					printf("</b>");
827 				italic = bold = 0;
828 				putchar('+');
829 				i += 2;
830 				continue;
831 			}
832 
833 			/* Bold mode. */
834 
835 			if (italic)
836 				printf("</i>");
837 			if ( ! bold)
838 				printf("<b>");
839 			bold = 1;
840 			italic = 0;
841 			i += 2;
842 			html_putchar(p[i]);
843 		}
844 
845 		/*
846 		 * Clean up the last character.
847 		 * We can get to a newline; don't print that.
848 		 */
849 
850 		if (italic)
851 			printf("</i>");
852 		if (bold)
853 			printf("</b>");
854 
855 		if (i == len - 1 && p[i] != '\n')
856 			html_putchar(p[i]);
857 
858 		putchar('\n');
859 	}
860 	free(p);
861 
862 	puts("</pre>\n"
863 	     "</div>");
864 
865 	fclose(f);
866 }
867 
868 static void
869 resp_format(const struct req *req, const char *file)
870 {
871 	struct manoutput conf;
872 	struct mparse	*mp;
873 	struct roff_meta *meta;
874 	void		*vp;
875 	int		 fd;
876 	int		 usepath;
877 
878 	if (-1 == (fd = open(file, O_RDONLY))) {
879 		puts("<p>You specified an invalid manual file.</p>");
880 		return;
881 	}
882 
883 	mchars_alloc();
884 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
885 	    MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
886 	mparse_readfd(mp, fd, file);
887 	close(fd);
888 	meta = mparse_result(mp);
889 
890 	memset(&conf, 0, sizeof(conf));
891 	conf.fragment = 1;
892 	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
893 	usepath = strcmp(req->q.manpath, req->p[0]);
894 	mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
895 	    scriptname, *scriptname == '\0' ? "" : "/",
896 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
897 
898 	vp = html_alloc(&conf);
899 	if (meta->macroset == MACROSET_MDOC)
900 		html_mdoc(vp, meta);
901 	else
902 		html_man(vp, meta);
903 
904 	html_free(vp);
905 	mparse_free(mp);
906 	mchars_free();
907 	free(conf.man);
908 	free(conf.style);
909 }
910 
911 static void
912 resp_show(const struct req *req, const char *file)
913 {
914 
915 	if ('.' == file[0] && '/' == file[1])
916 		file += 2;
917 
918 	if ('c' == *file)
919 		resp_catman(req, file);
920 	else
921 		resp_format(req, file);
922 }
923 
924 static void
925 pg_show(struct req *req, const char *fullpath)
926 {
927 	char		*manpath;
928 	const char	*file;
929 
930 	if ((file = strchr(fullpath, '/')) == NULL) {
931 		pg_error_badrequest(
932 		    "You did not specify a page to show.");
933 		return;
934 	}
935 	manpath = mandoc_strndup(fullpath, file - fullpath);
936 	file++;
937 
938 	if ( ! validate_manpath(req, manpath)) {
939 		pg_error_badrequest(
940 		    "You specified an invalid manpath.");
941 		free(manpath);
942 		return;
943 	}
944 
945 	/*
946 	 * Begin by chdir()ing into the manpath.
947 	 * This way we can pick up the database files, which are
948 	 * relative to the manpath root.
949 	 */
950 
951 	if (chdir(manpath) == -1) {
952 		warn("chdir %s", manpath);
953 		pg_error_internal();
954 		free(manpath);
955 		return;
956 	}
957 	free(manpath);
958 
959 	if ( ! validate_filename(file)) {
960 		pg_error_badrequest(
961 		    "You specified an invalid manual file.");
962 		return;
963 	}
964 
965 	resp_begin_html(200, NULL, file);
966 	resp_searchform(req, FOCUS_NONE);
967 	resp_show(req, file);
968 	resp_end_html();
969 }
970 
971 static void
972 pg_search(const struct req *req)
973 {
974 	struct mansearch	  search;
975 	struct manpaths		  paths;
976 	struct manpage		 *res;
977 	char			**argv;
978 	char			 *query, *rp, *wp;
979 	size_t			  ressz;
980 	int			  argc;
981 
982 	/*
983 	 * Begin by chdir()ing into the root of the manpath.
984 	 * This way we can pick up the database files, which are
985 	 * relative to the manpath root.
986 	 */
987 
988 	if (chdir(req->q.manpath) == -1) {
989 		warn("chdir %s", req->q.manpath);
990 		pg_error_internal();
991 		return;
992 	}
993 
994 	search.arch = req->q.arch;
995 	search.sec = req->q.sec;
996 	search.outkey = "Nd";
997 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
998 	search.firstmatch = 1;
999 
1000 	paths.sz = 1;
1001 	paths.paths = mandoc_malloc(sizeof(char *));
1002 	paths.paths[0] = mandoc_strdup(".");
1003 
1004 	/*
1005 	 * Break apart at spaces with backslash-escaping.
1006 	 */
1007 
1008 	argc = 0;
1009 	argv = NULL;
1010 	rp = query = mandoc_strdup(req->q.query);
1011 	for (;;) {
1012 		while (isspace((unsigned char)*rp))
1013 			rp++;
1014 		if (*rp == '\0')
1015 			break;
1016 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1017 		argv[argc++] = wp = rp;
1018 		for (;;) {
1019 			if (isspace((unsigned char)*rp)) {
1020 				*wp = '\0';
1021 				rp++;
1022 				break;
1023 			}
1024 			if (rp[0] == '\\' && rp[1] != '\0')
1025 				rp++;
1026 			if (wp != rp)
1027 				*wp = *rp;
1028 			if (*rp == '\0')
1029 				break;
1030 			wp++;
1031 			rp++;
1032 		}
1033 	}
1034 
1035 	res = NULL;
1036 	ressz = 0;
1037 	if (req->isquery && req->q.equal && argc == 1)
1038 		pg_redirect(req, argv[0]);
1039 	else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1040 		pg_noresult(req, 400, "Bad Request",
1041 		    "You entered an invalid query.");
1042 	else if (ressz == 0)
1043 		pg_noresult(req, 404, "Not Found", "No results found.");
1044 	else
1045 		pg_searchres(req, res, ressz);
1046 
1047 	free(query);
1048 	mansearch_free(res, ressz);
1049 	free(paths.paths[0]);
1050 	free(paths.paths);
1051 }
1052 
1053 int
1054 main(void)
1055 {
1056 	struct req	 req;
1057 	struct itimerval itimer;
1058 	const char	*path;
1059 	const char	*querystring;
1060 	int		 i;
1061 
1062 	/*
1063 	 * The "rpath" pledge could be revoked after mparse_readfd()
1064 	 * if the file desciptor to "/footer.html" would be opened
1065 	 * up front, but it's probably not worth the complication
1066 	 * of the code it would cause: it would require scattering
1067 	 * pledge() calls in multiple low-level resp_*() functions.
1068 	 */
1069 
1070 	if (pledge("stdio rpath", NULL) == -1) {
1071 		warn("pledge");
1072 		pg_error_internal();
1073 		return EXIT_FAILURE;
1074 	}
1075 
1076 	/* Poor man's ReDoS mitigation. */
1077 
1078 	itimer.it_value.tv_sec = 2;
1079 	itimer.it_value.tv_usec = 0;
1080 	itimer.it_interval.tv_sec = 2;
1081 	itimer.it_interval.tv_usec = 0;
1082 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1083 		warn("setitimer");
1084 		pg_error_internal();
1085 		return EXIT_FAILURE;
1086 	}
1087 
1088 	/*
1089 	 * First we change directory into the MAN_DIR so that
1090 	 * subsequent scanning for manpath directories is rooted
1091 	 * relative to the same position.
1092 	 */
1093 
1094 	if (chdir(MAN_DIR) == -1) {
1095 		warn("MAN_DIR: %s", MAN_DIR);
1096 		pg_error_internal();
1097 		return EXIT_FAILURE;
1098 	}
1099 
1100 	memset(&req, 0, sizeof(struct req));
1101 	req.q.equal = 1;
1102 	parse_manpath_conf(&req);
1103 
1104 	/* Parse the path info and the query string. */
1105 
1106 	if ((path = getenv("PATH_INFO")) == NULL)
1107 		path = "";
1108 	else if (*path == '/')
1109 		path++;
1110 
1111 	if (*path != '\0') {
1112 		parse_path_info(&req, path);
1113 		if (req.q.manpath == NULL || req.q.sec == NULL ||
1114 		    *req.q.query == '\0' || access(path, F_OK) == -1)
1115 			path = "";
1116 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1117 		parse_query_string(&req, querystring);
1118 
1119 	/* Validate parsed data and add defaults. */
1120 
1121 	if (req.q.manpath == NULL)
1122 		req.q.manpath = mandoc_strdup(req.p[0]);
1123 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1124 		pg_error_badrequest(
1125 		    "You specified an invalid manpath.");
1126 		return EXIT_FAILURE;
1127 	}
1128 
1129 	if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1130 		pg_error_badrequest(
1131 		    "You specified an invalid architecture.");
1132 		return EXIT_FAILURE;
1133 	}
1134 
1135 	/* Dispatch to the three different pages. */
1136 
1137 	if ('\0' != *path)
1138 		pg_show(&req, path);
1139 	else if (NULL != req.q.query)
1140 		pg_search(&req);
1141 	else
1142 		pg_index(&req);
1143 
1144 	free(req.q.manpath);
1145 	free(req.q.arch);
1146 	free(req.q.sec);
1147 	free(req.q.query);
1148 	for (i = 0; i < (int)req.psz; i++)
1149 		free(req.p[i]);
1150 	free(req.p);
1151 	return EXIT_SUCCESS;
1152 }
1153 
1154 /*
1155  * Translate PATH_INFO to a query.
1156  */
1157 static void
1158 parse_path_info(struct req *req, const char *path)
1159 {
1160 	const char	*name, *sec, *end;
1161 
1162 	req->isquery = 0;
1163 	req->q.equal = 1;
1164 	req->q.manpath = NULL;
1165 	req->q.arch = NULL;
1166 
1167 	/* Mandatory manual page name. */
1168 	if ((name = strrchr(path, '/')) == NULL)
1169 		name = path;
1170 	else
1171 		name++;
1172 
1173 	/* Optional trailing section. */
1174 	sec = strrchr(name, '.');
1175 	if (sec != NULL && isdigit((unsigned char)*++sec)) {
1176 		req->q.query = mandoc_strndup(name, sec - name - 1);
1177 		req->q.sec = mandoc_strdup(sec);
1178 	} else {
1179 		req->q.query = mandoc_strdup(name);
1180 		req->q.sec = NULL;
1181 	}
1182 
1183 	/* Handle the case of name[.section] only. */
1184 	if (name == path)
1185 		return;
1186 
1187 	/* Optional manpath. */
1188 	end = strchr(path, '/');
1189 	req->q.manpath = mandoc_strndup(path, end - path);
1190 	if (validate_manpath(req, req->q.manpath)) {
1191 		path = end + 1;
1192 		if (name == path)
1193 			return;
1194 	} else {
1195 		free(req->q.manpath);
1196 		req->q.manpath = NULL;
1197 	}
1198 
1199 	/* Optional section. */
1200 	if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1201 		path += 3;
1202 		end = strchr(path, '/');
1203 		free(req->q.sec);
1204 		req->q.sec = mandoc_strndup(path, end - path);
1205 		path = end + 1;
1206 		if (name == path)
1207 			return;
1208 	}
1209 
1210 	/* Optional architecture. */
1211 	end = strchr(path, '/');
1212 	if (end + 1 != name) {
1213 		pg_error_badrequest(
1214 		    "You specified too many directory components.");
1215 		exit(EXIT_FAILURE);
1216 	}
1217 	req->q.arch = mandoc_strndup(path, end - path);
1218 	if (validate_arch(req->q.arch) == 0) {
1219 		pg_error_badrequest(
1220 		    "You specified an invalid directory component.");
1221 		exit(EXIT_FAILURE);
1222 	}
1223 }
1224 
1225 /*
1226  * Scan for indexable paths.
1227  */
1228 static void
1229 parse_manpath_conf(struct req *req)
1230 {
1231 	FILE	*fp;
1232 	char	*dp;
1233 	size_t	 dpsz;
1234 	ssize_t	 len;
1235 
1236 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1237 		warn("%s/manpath.conf", MAN_DIR);
1238 		pg_error_internal();
1239 		exit(EXIT_FAILURE);
1240 	}
1241 
1242 	dp = NULL;
1243 	dpsz = 0;
1244 
1245 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1246 		if (dp[len - 1] == '\n')
1247 			dp[--len] = '\0';
1248 		req->p = mandoc_realloc(req->p,
1249 		    (req->psz + 1) * sizeof(char *));
1250 		if ( ! validate_urifrag(dp)) {
1251 			warnx("%s/manpath.conf contains "
1252 			    "unsafe path \"%s\"", MAN_DIR, dp);
1253 			pg_error_internal();
1254 			exit(EXIT_FAILURE);
1255 		}
1256 		if (strchr(dp, '/') != NULL) {
1257 			warnx("%s/manpath.conf contains "
1258 			    "path with slash \"%s\"", MAN_DIR, dp);
1259 			pg_error_internal();
1260 			exit(EXIT_FAILURE);
1261 		}
1262 		req->p[req->psz++] = dp;
1263 		dp = NULL;
1264 		dpsz = 0;
1265 	}
1266 	free(dp);
1267 
1268 	if (req->p == NULL) {
1269 		warnx("%s/manpath.conf is empty", MAN_DIR);
1270 		pg_error_internal();
1271 		exit(EXIT_FAILURE);
1272 	}
1273 }
1274