xref: /openbsd-src/usr.bin/mandoc/cgi.c (revision c020cf82e0cc147236f01a8dca7052034cf9d30d)
1 /* $OpenBSD: cgi.c,v 1.110 2020/04/03 11:34:19 schwarze Exp $ */
2 /*
3  * Copyright (c) 2014-2019 Ingo Schwarze <schwarze@usta.de>
4  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * Implementation of the man.cgi(8) program.
19  */
20 #include <sys/types.h>
21 #include <sys/time.h>
22 
23 #include <ctype.h>
24 #include <err.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <limits.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33 
34 #include "mandoc_aux.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mdoc.h"
38 #include "man.h"
39 #include "mandoc_parse.h"
40 #include "main.h"
41 #include "manconf.h"
42 #include "mansearch.h"
43 #include "cgi.h"
44 
45 /*
46  * A query as passed to the search function.
47  */
48 struct	query {
49 	char		*manpath; /* desired manual directory */
50 	char		*arch; /* architecture */
51 	char		*sec; /* manual section */
52 	char		*query; /* unparsed query expression */
53 	int		 equal; /* match whole names, not substrings */
54 };
55 
56 struct	req {
57 	struct query	  q;
58 	char		**p; /* array of available manpaths */
59 	size_t		  psz; /* number of available manpaths */
60 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
61 };
62 
63 enum	focus {
64 	FOCUS_NONE = 0,
65 	FOCUS_QUERY
66 };
67 
68 static	void		 html_print(const char *);
69 static	void		 html_putchar(char);
70 static	int		 http_decode(char *);
71 static	void		 http_encode(const char *);
72 static	void		 parse_manpath_conf(struct req *);
73 static	void		 parse_path_info(struct req *, const char *);
74 static	void		 parse_query_string(struct req *, const char *);
75 static	void		 pg_error_badrequest(const char *);
76 static	void		 pg_error_internal(void);
77 static	void		 pg_index(const struct req *);
78 static	void		 pg_noresult(const struct req *, int, const char *,
79 				const char *);
80 static	void		 pg_redirect(const struct req *, const char *);
81 static	void		 pg_search(const struct req *);
82 static	void		 pg_searchres(const struct req *,
83 				struct manpage *, size_t);
84 static	void		 pg_show(struct req *, const char *);
85 static	void		 resp_begin_html(int, const char *, const char *);
86 static	void		 resp_begin_http(int, const char *);
87 static	void		 resp_catman(const struct req *, const char *);
88 static	void		 resp_copy(const char *);
89 static	void		 resp_end_html(void);
90 static	void		 resp_format(const struct req *, const char *);
91 static	void		 resp_searchform(const struct req *, enum focus);
92 static	void		 resp_show(const struct req *, const char *);
93 static	void		 set_query_attr(char **, char **);
94 static	int		 validate_arch(const char *);
95 static	int		 validate_filename(const char *);
96 static	int		 validate_manpath(const struct req *, const char *);
97 static	int		 validate_urifrag(const char *);
98 
99 static	const char	 *scriptname = SCRIPT_NAME;
100 
101 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
102 static	const char *const sec_numbers[] = {
103     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
104 };
105 static	const char *const sec_names[] = {
106     "All Sections",
107     "1 - General Commands",
108     "2 - System Calls",
109     "3 - Library Functions",
110     "3p - Perl Library",
111     "4 - Device Drivers",
112     "5 - File Formats",
113     "6 - Games",
114     "7 - Miscellaneous Information",
115     "8 - System Manager\'s Manual",
116     "9 - Kernel Developer\'s Manual"
117 };
118 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
119 
120 static	const char *const arch_names[] = {
121     "amd64",       "alpha",       "armv7",	"arm64",
122     "hppa",        "i386",        "landisk",
123     "loongson",    "luna88k",     "macppc",      "mips64",
124     "octeon",      "sgi",         "socppc",      "sparc64",
125     "amiga",       "arc",         "armish",      "arm32",
126     "atari",       "aviion",      "beagle",      "cats",
127     "hppa64",      "hp300",
128     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
129     "mvmeppc",     "palm",        "pc532",       "pegasos",
130     "pmax",        "powerpc",     "solbourne",   "sparc",
131     "sun3",        "vax",         "wgrisc",      "x68k",
132     "zaurus"
133 };
134 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
135 
136 /*
137  * Print a character, escaping HTML along the way.
138  * This will pass non-ASCII straight to output: be warned!
139  */
140 static void
141 html_putchar(char c)
142 {
143 
144 	switch (c) {
145 	case '"':
146 		printf("&quot;");
147 		break;
148 	case '&':
149 		printf("&amp;");
150 		break;
151 	case '>':
152 		printf("&gt;");
153 		break;
154 	case '<':
155 		printf("&lt;");
156 		break;
157 	default:
158 		putchar((unsigned char)c);
159 		break;
160 	}
161 }
162 
163 /*
164  * Call through to html_putchar().
165  * Accepts NULL strings.
166  */
167 static void
168 html_print(const char *p)
169 {
170 
171 	if (NULL == p)
172 		return;
173 	while ('\0' != *p)
174 		html_putchar(*p++);
175 }
176 
177 /*
178  * Transfer the responsibility for the allocated string *val
179  * to the query structure.
180  */
181 static void
182 set_query_attr(char **attr, char **val)
183 {
184 
185 	free(*attr);
186 	if (**val == '\0') {
187 		*attr = NULL;
188 		free(*val);
189 	} else
190 		*attr = *val;
191 	*val = NULL;
192 }
193 
194 /*
195  * Parse the QUERY_STRING for key-value pairs
196  * and store the values into the query structure.
197  */
198 static void
199 parse_query_string(struct req *req, const char *qs)
200 {
201 	char		*key, *val;
202 	size_t		 keysz, valsz;
203 
204 	req->isquery	= 1;
205 	req->q.manpath	= NULL;
206 	req->q.arch	= NULL;
207 	req->q.sec	= NULL;
208 	req->q.query	= NULL;
209 	req->q.equal	= 1;
210 
211 	key = val = NULL;
212 	while (*qs != '\0') {
213 
214 		/* Parse one key. */
215 
216 		keysz = strcspn(qs, "=;&");
217 		key = mandoc_strndup(qs, keysz);
218 		qs += keysz;
219 		if (*qs != '=')
220 			goto next;
221 
222 		/* Parse one value. */
223 
224 		valsz = strcspn(++qs, ";&");
225 		val = mandoc_strndup(qs, valsz);
226 		qs += valsz;
227 
228 		/* Decode and catch encoding errors. */
229 
230 		if ( ! (http_decode(key) && http_decode(val)))
231 			goto next;
232 
233 		/* Handle key-value pairs. */
234 
235 		if ( ! strcmp(key, "query"))
236 			set_query_attr(&req->q.query, &val);
237 
238 		else if ( ! strcmp(key, "apropos"))
239 			req->q.equal = !strcmp(val, "0");
240 
241 		else if ( ! strcmp(key, "manpath")) {
242 #ifdef COMPAT_OLDURI
243 			if ( ! strncmp(val, "OpenBSD ", 8)) {
244 				val[7] = '-';
245 				if ('C' == val[8])
246 					val[8] = 'c';
247 			}
248 #endif
249 			set_query_attr(&req->q.manpath, &val);
250 		}
251 
252 		else if ( ! (strcmp(key, "sec")
253 #ifdef COMPAT_OLDURI
254 		    && strcmp(key, "sektion")
255 #endif
256 		    )) {
257 			if ( ! strcmp(val, "0"))
258 				*val = '\0';
259 			set_query_attr(&req->q.sec, &val);
260 		}
261 
262 		else if ( ! strcmp(key, "arch")) {
263 			if ( ! strcmp(val, "default"))
264 				*val = '\0';
265 			set_query_attr(&req->q.arch, &val);
266 		}
267 
268 		/*
269 		 * The key must be freed in any case.
270 		 * The val may have been handed over to the query
271 		 * structure, in which case it is now NULL.
272 		 */
273 next:
274 		free(key);
275 		key = NULL;
276 		free(val);
277 		val = NULL;
278 
279 		if (*qs != '\0')
280 			qs++;
281 	}
282 }
283 
284 /*
285  * HTTP-decode a string.  The standard explanation is that this turns
286  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
287  * over the allocated string.
288  */
289 static int
290 http_decode(char *p)
291 {
292 	char             hex[3];
293 	char		*q;
294 	int              c;
295 
296 	hex[2] = '\0';
297 
298 	q = p;
299 	for ( ; '\0' != *p; p++, q++) {
300 		if ('%' == *p) {
301 			if ('\0' == (hex[0] = *(p + 1)))
302 				return 0;
303 			if ('\0' == (hex[1] = *(p + 2)))
304 				return 0;
305 			if (1 != sscanf(hex, "%x", &c))
306 				return 0;
307 			if ('\0' == c)
308 				return 0;
309 
310 			*q = (char)c;
311 			p += 2;
312 		} else
313 			*q = '+' == *p ? ' ' : *p;
314 	}
315 
316 	*q = '\0';
317 	return 1;
318 }
319 
320 static void
321 http_encode(const char *p)
322 {
323 	for (; *p != '\0'; p++) {
324 		if (isalnum((unsigned char)*p) == 0 &&
325 		    strchr("-._~", *p) == NULL)
326 			printf("%%%2.2X", (unsigned char)*p);
327 		else
328 			putchar(*p);
329 	}
330 }
331 
332 static void
333 resp_begin_http(int code, const char *msg)
334 {
335 
336 	if (200 != code)
337 		printf("Status: %d %s\r\n", code, msg);
338 
339 	printf("Content-Type: text/html; charset=utf-8\r\n"
340 	     "Cache-Control: no-cache\r\n"
341 	     "Content-Security-Policy: default-src 'none'; "
342 	     "style-src 'self' 'unsafe-inline'\r\n"
343 	     "Pragma: no-cache\r\n"
344 	     "\r\n");
345 
346 	fflush(stdout);
347 }
348 
349 static void
350 resp_copy(const char *filename)
351 {
352 	char	 buf[4096];
353 	ssize_t	 sz;
354 	int	 fd;
355 
356 	if ((fd = open(filename, O_RDONLY)) != -1) {
357 		fflush(stdout);
358 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
359 			write(STDOUT_FILENO, buf, sz);
360 		close(fd);
361 	}
362 }
363 
364 static void
365 resp_begin_html(int code, const char *msg, const char *file)
366 {
367 	char	*cp;
368 
369 	resp_begin_http(code, msg);
370 
371 	printf("<!DOCTYPE html>\n"
372 	       "<html>\n"
373 	       "<head>\n"
374 	       "  <meta charset=\"UTF-8\"/>\n"
375 	       "  <meta name=\"viewport\""
376 		      " content=\"width=device-width, initial-scale=1.0\">\n"
377 	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
378 	       " type=\"text/css\" media=\"all\">\n"
379 	       "  <title>",
380 	       CSS_DIR);
381 	if (file != NULL) {
382 		if ((cp = strrchr(file, '/')) != NULL)
383 			file = cp + 1;
384 		if ((cp = strrchr(file, '.')) != NULL) {
385 			printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
386 		} else
387 			printf("%s - ", file);
388 	}
389 	printf("%s</title>\n"
390 	       "</head>\n"
391 	       "<body>\n",
392 	       CUSTOMIZE_TITLE);
393 
394 	resp_copy(MAN_DIR "/header.html");
395 }
396 
397 static void
398 resp_end_html(void)
399 {
400 
401 	resp_copy(MAN_DIR "/footer.html");
402 
403 	puts("</body>\n"
404 	     "</html>");
405 }
406 
407 static void
408 resp_searchform(const struct req *req, enum focus focus)
409 {
410 	int		 i;
411 
412 	printf("<form action=\"/%s\" method=\"get\" "
413 	       "autocomplete=\"off\" autocapitalize=\"none\">\n"
414 	       "  <fieldset>\n"
415 	       "    <legend>Manual Page Search Parameters</legend>\n",
416 	       scriptname);
417 
418 	/* Write query input box. */
419 
420 	printf("    <input type=\"search\" name=\"query\" value=\"");
421 	if (req->q.query != NULL)
422 		html_print(req->q.query);
423 	printf( "\" size=\"40\"");
424 	if (focus == FOCUS_QUERY)
425 		printf(" autofocus");
426 	puts(">");
427 
428 	/* Write submission buttons. */
429 
430 	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
431 		"man</button>\n"
432 		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
433 		"apropos</button>\n"
434 		"    <br/>\n");
435 
436 	/* Write section selector. */
437 
438 	puts("    <select name=\"sec\">");
439 	for (i = 0; i < sec_MAX; i++) {
440 		printf("      <option value=\"%s\"", sec_numbers[i]);
441 		if (NULL != req->q.sec &&
442 		    0 == strcmp(sec_numbers[i], req->q.sec))
443 			printf(" selected=\"selected\"");
444 		printf(">%s</option>\n", sec_names[i]);
445 	}
446 	puts("    </select>");
447 
448 	/* Write architecture selector. */
449 
450 	printf(	"    <select name=\"arch\">\n"
451 		"      <option value=\"default\"");
452 	if (NULL == req->q.arch)
453 		printf(" selected=\"selected\"");
454 	puts(">All Architectures</option>");
455 	for (i = 0; i < arch_MAX; i++) {
456 		printf("      <option");
457 		if (NULL != req->q.arch &&
458 		    0 == strcmp(arch_names[i], req->q.arch))
459 			printf(" selected=\"selected\"");
460 		printf(">%s</option>\n", arch_names[i]);
461 	}
462 	puts("    </select>");
463 
464 	/* Write manpath selector. */
465 
466 	if (req->psz > 1) {
467 		puts("    <select name=\"manpath\">");
468 		for (i = 0; i < (int)req->psz; i++) {
469 			printf("      <option");
470 			if (strcmp(req->q.manpath, req->p[i]) == 0)
471 				printf(" selected=\"selected\"");
472 			printf(">");
473 			html_print(req->p[i]);
474 			puts("</option>");
475 		}
476 		puts("    </select>");
477 	}
478 
479 	puts("  </fieldset>\n"
480 	     "</form>");
481 }
482 
483 static int
484 validate_urifrag(const char *frag)
485 {
486 
487 	while ('\0' != *frag) {
488 		if ( ! (isalnum((unsigned char)*frag) ||
489 		    '-' == *frag || '.' == *frag ||
490 		    '/' == *frag || '_' == *frag))
491 			return 0;
492 		frag++;
493 	}
494 	return 1;
495 }
496 
497 static int
498 validate_manpath(const struct req *req, const char* manpath)
499 {
500 	size_t	 i;
501 
502 	for (i = 0; i < req->psz; i++)
503 		if ( ! strcmp(manpath, req->p[i]))
504 			return 1;
505 
506 	return 0;
507 }
508 
509 static int
510 validate_arch(const char *arch)
511 {
512 	int	 i;
513 
514 	for (i = 0; i < arch_MAX; i++)
515 		if (strcmp(arch, arch_names[i]) == 0)
516 			return 1;
517 
518 	return 0;
519 }
520 
521 static int
522 validate_filename(const char *file)
523 {
524 
525 	if ('.' == file[0] && '/' == file[1])
526 		file += 2;
527 
528 	return ! (strstr(file, "../") || strstr(file, "/..") ||
529 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
530 }
531 
532 static void
533 pg_index(const struct req *req)
534 {
535 
536 	resp_begin_html(200, NULL, NULL);
537 	resp_searchform(req, FOCUS_QUERY);
538 	printf("<p>\n"
539 	       "This web interface is documented in the\n"
540 	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
541 	       "manual, and the\n"
542 	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
543 	       "manual explains the query syntax.\n"
544 	       "</p>\n",
545 	       scriptname, *scriptname == '\0' ? "" : "/",
546 	       scriptname, *scriptname == '\0' ? "" : "/");
547 	resp_end_html();
548 }
549 
550 static void
551 pg_noresult(const struct req *req, int code, const char *http_msg,
552     const char *user_msg)
553 {
554 	resp_begin_html(code, http_msg, NULL);
555 	resp_searchform(req, FOCUS_QUERY);
556 	puts("<p>");
557 	puts(user_msg);
558 	puts("</p>");
559 	resp_end_html();
560 }
561 
562 static void
563 pg_error_badrequest(const char *msg)
564 {
565 
566 	resp_begin_html(400, "Bad Request", NULL);
567 	puts("<h1>Bad Request</h1>\n"
568 	     "<p>\n");
569 	puts(msg);
570 	printf("Try again from the\n"
571 	       "<a href=\"/%s\">main page</a>.\n"
572 	       "</p>", scriptname);
573 	resp_end_html();
574 }
575 
576 static void
577 pg_error_internal(void)
578 {
579 	resp_begin_html(500, "Internal Server Error", NULL);
580 	puts("<p>Internal Server Error</p>");
581 	resp_end_html();
582 }
583 
584 static void
585 pg_redirect(const struct req *req, const char *name)
586 {
587 	printf("Status: 303 See Other\r\n"
588 	    "Location: /");
589 	if (*scriptname != '\0')
590 		printf("%s/", scriptname);
591 	if (strcmp(req->q.manpath, req->p[0]))
592 		printf("%s/", req->q.manpath);
593 	if (req->q.arch != NULL)
594 		printf("%s/", req->q.arch);
595 	http_encode(name);
596 	if (req->q.sec != NULL) {
597 		putchar('.');
598 		http_encode(req->q.sec);
599 	}
600 	printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
601 }
602 
603 static void
604 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
605 {
606 	char		*arch, *archend;
607 	const char	*sec;
608 	size_t		 i, iuse;
609 	int		 archprio, archpriouse;
610 	int		 prio, priouse;
611 
612 	for (i = 0; i < sz; i++) {
613 		if (validate_filename(r[i].file))
614 			continue;
615 		warnx("invalid filename %s in %s database",
616 		    r[i].file, req->q.manpath);
617 		pg_error_internal();
618 		return;
619 	}
620 
621 	if (req->isquery && sz == 1) {
622 		/*
623 		 * If we have just one result, then jump there now
624 		 * without any delay.
625 		 */
626 		printf("Status: 303 See Other\r\n"
627 		    "Location: /");
628 		if (*scriptname != '\0')
629 			printf("%s/", scriptname);
630 		if (strcmp(req->q.manpath, req->p[0]))
631 			printf("%s/", req->q.manpath);
632 		printf("%s\r\n"
633 		    "Content-Type: text/html; charset=utf-8\r\n\r\n",
634 		    r[0].file);
635 		return;
636 	}
637 
638 	/*
639 	 * In man(1) mode, show one of the pages
640 	 * even if more than one is found.
641 	 */
642 
643 	iuse = 0;
644 	if (req->q.equal || sz == 1) {
645 		priouse = 20;
646 		archpriouse = 3;
647 		for (i = 0; i < sz; i++) {
648 			sec = r[i].file;
649 			sec += strcspn(sec, "123456789");
650 			if (sec[0] == '\0')
651 				continue;
652 			prio = sec_prios[sec[0] - '1'];
653 			if (sec[1] != '/')
654 				prio += 10;
655 			if (req->q.arch == NULL) {
656 				archprio =
657 				    ((arch = strchr(sec + 1, '/'))
658 					== NULL) ? 3 :
659 				    ((archend = strchr(arch + 1, '/'))
660 					== NULL) ? 0 :
661 				    strncmp(arch, "amd64/",
662 					archend - arch) ? 2 : 1;
663 				if (archprio < archpriouse) {
664 					archpriouse = archprio;
665 					priouse = prio;
666 					iuse = i;
667 					continue;
668 				}
669 				if (archprio > archpriouse)
670 					continue;
671 			}
672 			if (prio >= priouse)
673 				continue;
674 			priouse = prio;
675 			iuse = i;
676 		}
677 		resp_begin_html(200, NULL, r[iuse].file);
678 	} else
679 		resp_begin_html(200, NULL, NULL);
680 
681 	resp_searchform(req,
682 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
683 
684 	if (sz > 1) {
685 		puts("<table class=\"results\">");
686 		for (i = 0; i < sz; i++) {
687 			printf("  <tr>\n"
688 			       "    <td>"
689 			       "<a class=\"Xr\" href=\"/");
690 			if (*scriptname != '\0')
691 				printf("%s/", scriptname);
692 			if (strcmp(req->q.manpath, req->p[0]))
693 				printf("%s/", req->q.manpath);
694 			printf("%s\">", r[i].file);
695 			html_print(r[i].names);
696 			printf("</a></td>\n"
697 			       "    <td><span class=\"Nd\">");
698 			html_print(r[i].output);
699 			puts("</span></td>\n"
700 			     "  </tr>");
701 		}
702 		puts("</table>");
703 	}
704 
705 	if (req->q.equal || sz == 1) {
706 		puts("<hr>");
707 		resp_show(req, r[iuse].file);
708 	}
709 
710 	resp_end_html();
711 }
712 
713 static void
714 resp_catman(const struct req *req, const char *file)
715 {
716 	FILE		*f;
717 	char		*p;
718 	size_t		 sz;
719 	ssize_t		 len;
720 	int		 i;
721 	int		 italic, bold;
722 
723 	if ((f = fopen(file, "r")) == NULL) {
724 		puts("<p>You specified an invalid manual file.</p>");
725 		return;
726 	}
727 
728 	puts("<div class=\"catman\">\n"
729 	     "<pre>");
730 
731 	p = NULL;
732 	sz = 0;
733 
734 	while ((len = getline(&p, &sz, f)) != -1) {
735 		bold = italic = 0;
736 		for (i = 0; i < len - 1; i++) {
737 			/*
738 			 * This means that the catpage is out of state.
739 			 * Ignore it and keep going (although the
740 			 * catpage is bogus).
741 			 */
742 
743 			if ('\b' == p[i] || '\n' == p[i])
744 				continue;
745 
746 			/*
747 			 * Print a regular character.
748 			 * Close out any bold/italic scopes.
749 			 * If we're in back-space mode, make sure we'll
750 			 * have something to enter when we backspace.
751 			 */
752 
753 			if ('\b' != p[i + 1]) {
754 				if (italic)
755 					printf("</i>");
756 				if (bold)
757 					printf("</b>");
758 				italic = bold = 0;
759 				html_putchar(p[i]);
760 				continue;
761 			} else if (i + 2 >= len)
762 				continue;
763 
764 			/* Italic mode. */
765 
766 			if ('_' == p[i]) {
767 				if (bold)
768 					printf("</b>");
769 				if ( ! italic)
770 					printf("<i>");
771 				bold = 0;
772 				italic = 1;
773 				i += 2;
774 				html_putchar(p[i]);
775 				continue;
776 			}
777 
778 			/*
779 			 * Handle funny behaviour troff-isms.
780 			 * These grok'd from the original man2html.c.
781 			 */
782 
783 			if (('+' == p[i] && 'o' == p[i + 2]) ||
784 					('o' == p[i] && '+' == p[i + 2]) ||
785 					('|' == p[i] && '=' == p[i + 2]) ||
786 					('=' == p[i] && '|' == p[i + 2]) ||
787 					('*' == p[i] && '=' == p[i + 2]) ||
788 					('=' == p[i] && '*' == p[i + 2]) ||
789 					('*' == p[i] && '|' == p[i + 2]) ||
790 					('|' == p[i] && '*' == p[i + 2]))  {
791 				if (italic)
792 					printf("</i>");
793 				if (bold)
794 					printf("</b>");
795 				italic = bold = 0;
796 				putchar('*');
797 				i += 2;
798 				continue;
799 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
800 					('-' == p[i] && '|' == p[i + 1]) ||
801 					('+' == p[i] && '-' == p[i + 1]) ||
802 					('-' == p[i] && '+' == p[i + 1]) ||
803 					('+' == p[i] && '|' == p[i + 1]) ||
804 					('|' == p[i] && '+' == p[i + 1]))  {
805 				if (italic)
806 					printf("</i>");
807 				if (bold)
808 					printf("</b>");
809 				italic = bold = 0;
810 				putchar('+');
811 				i += 2;
812 				continue;
813 			}
814 
815 			/* Bold mode. */
816 
817 			if (italic)
818 				printf("</i>");
819 			if ( ! bold)
820 				printf("<b>");
821 			bold = 1;
822 			italic = 0;
823 			i += 2;
824 			html_putchar(p[i]);
825 		}
826 
827 		/*
828 		 * Clean up the last character.
829 		 * We can get to a newline; don't print that.
830 		 */
831 
832 		if (italic)
833 			printf("</i>");
834 		if (bold)
835 			printf("</b>");
836 
837 		if (i == len - 1 && p[i] != '\n')
838 			html_putchar(p[i]);
839 
840 		putchar('\n');
841 	}
842 	free(p);
843 
844 	puts("</pre>\n"
845 	     "</div>");
846 
847 	fclose(f);
848 }
849 
850 static void
851 resp_format(const struct req *req, const char *file)
852 {
853 	struct manoutput conf;
854 	struct mparse	*mp;
855 	struct roff_meta *meta;
856 	void		*vp;
857 	int		 fd;
858 	int		 usepath;
859 
860 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
861 		puts("<p>You specified an invalid manual file.</p>");
862 		return;
863 	}
864 
865 	mchars_alloc();
866 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
867 	    MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
868 	mparse_readfd(mp, fd, file);
869 	close(fd);
870 	meta = mparse_result(mp);
871 
872 	memset(&conf, 0, sizeof(conf));
873 	conf.fragment = 1;
874 	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
875 	usepath = strcmp(req->q.manpath, req->p[0]);
876 	mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
877 	    scriptname, *scriptname == '\0' ? "" : "/",
878 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
879 
880 	vp = html_alloc(&conf);
881 	if (meta->macroset == MACROSET_MDOC)
882 		html_mdoc(vp, meta);
883 	else
884 		html_man(vp, meta);
885 
886 	html_free(vp);
887 	mparse_free(mp);
888 	mchars_free();
889 	free(conf.man);
890 	free(conf.style);
891 }
892 
893 static void
894 resp_show(const struct req *req, const char *file)
895 {
896 
897 	if ('.' == file[0] && '/' == file[1])
898 		file += 2;
899 
900 	if ('c' == *file)
901 		resp_catman(req, file);
902 	else
903 		resp_format(req, file);
904 }
905 
906 static void
907 pg_show(struct req *req, const char *fullpath)
908 {
909 	char		*manpath;
910 	const char	*file;
911 
912 	if ((file = strchr(fullpath, '/')) == NULL) {
913 		pg_error_badrequest(
914 		    "You did not specify a page to show.");
915 		return;
916 	}
917 	manpath = mandoc_strndup(fullpath, file - fullpath);
918 	file++;
919 
920 	if ( ! validate_manpath(req, manpath)) {
921 		pg_error_badrequest(
922 		    "You specified an invalid manpath.");
923 		free(manpath);
924 		return;
925 	}
926 
927 	/*
928 	 * Begin by chdir()ing into the manpath.
929 	 * This way we can pick up the database files, which are
930 	 * relative to the manpath root.
931 	 */
932 
933 	if (chdir(manpath) == -1) {
934 		warn("chdir %s", manpath);
935 		pg_error_internal();
936 		free(manpath);
937 		return;
938 	}
939 	free(manpath);
940 
941 	if ( ! validate_filename(file)) {
942 		pg_error_badrequest(
943 		    "You specified an invalid manual file.");
944 		return;
945 	}
946 
947 	resp_begin_html(200, NULL, file);
948 	resp_searchform(req, FOCUS_NONE);
949 	resp_show(req, file);
950 	resp_end_html();
951 }
952 
953 static void
954 pg_search(const struct req *req)
955 {
956 	struct mansearch	  search;
957 	struct manpaths		  paths;
958 	struct manpage		 *res;
959 	char			**argv;
960 	char			 *query, *rp, *wp;
961 	size_t			  ressz;
962 	int			  argc;
963 
964 	/*
965 	 * Begin by chdir()ing into the root of the manpath.
966 	 * This way we can pick up the database files, which are
967 	 * relative to the manpath root.
968 	 */
969 
970 	if (chdir(req->q.manpath) == -1) {
971 		warn("chdir %s", req->q.manpath);
972 		pg_error_internal();
973 		return;
974 	}
975 
976 	search.arch = req->q.arch;
977 	search.sec = req->q.sec;
978 	search.outkey = "Nd";
979 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
980 	search.firstmatch = 1;
981 
982 	paths.sz = 1;
983 	paths.paths = mandoc_malloc(sizeof(char *));
984 	paths.paths[0] = mandoc_strdup(".");
985 
986 	/*
987 	 * Break apart at spaces with backslash-escaping.
988 	 */
989 
990 	argc = 0;
991 	argv = NULL;
992 	rp = query = mandoc_strdup(req->q.query);
993 	for (;;) {
994 		while (isspace((unsigned char)*rp))
995 			rp++;
996 		if (*rp == '\0')
997 			break;
998 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
999 		argv[argc++] = wp = rp;
1000 		for (;;) {
1001 			if (isspace((unsigned char)*rp)) {
1002 				*wp = '\0';
1003 				rp++;
1004 				break;
1005 			}
1006 			if (rp[0] == '\\' && rp[1] != '\0')
1007 				rp++;
1008 			if (wp != rp)
1009 				*wp = *rp;
1010 			if (*rp == '\0')
1011 				break;
1012 			wp++;
1013 			rp++;
1014 		}
1015 	}
1016 
1017 	res = NULL;
1018 	ressz = 0;
1019 	if (req->isquery && req->q.equal && argc == 1)
1020 		pg_redirect(req, argv[0]);
1021 	else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1022 		pg_noresult(req, 400, "Bad Request",
1023 		    "You entered an invalid query.");
1024 	else if (ressz == 0)
1025 		pg_noresult(req, 404, "Not Found", "No results found.");
1026 	else
1027 		pg_searchres(req, res, ressz);
1028 
1029 	free(query);
1030 	mansearch_free(res, ressz);
1031 	free(paths.paths[0]);
1032 	free(paths.paths);
1033 }
1034 
1035 int
1036 main(void)
1037 {
1038 	struct req	 req;
1039 	struct itimerval itimer;
1040 	const char	*path;
1041 	const char	*querystring;
1042 	int		 i;
1043 
1044 	/*
1045 	 * The "rpath" pledge could be revoked after mparse_readfd()
1046 	 * if the file desciptor to "/footer.html" would be opened
1047 	 * up front, but it's probably not worth the complication
1048 	 * of the code it would cause: it would require scattering
1049 	 * pledge() calls in multiple low-level resp_*() functions.
1050 	 */
1051 
1052 	if (pledge("stdio rpath", NULL) == -1) {
1053 		warn("pledge");
1054 		pg_error_internal();
1055 		return EXIT_FAILURE;
1056 	}
1057 
1058 	/* Poor man's ReDoS mitigation. */
1059 
1060 	itimer.it_value.tv_sec = 2;
1061 	itimer.it_value.tv_usec = 0;
1062 	itimer.it_interval.tv_sec = 2;
1063 	itimer.it_interval.tv_usec = 0;
1064 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1065 		warn("setitimer");
1066 		pg_error_internal();
1067 		return EXIT_FAILURE;
1068 	}
1069 
1070 	/*
1071 	 * First we change directory into the MAN_DIR so that
1072 	 * subsequent scanning for manpath directories is rooted
1073 	 * relative to the same position.
1074 	 */
1075 
1076 	if (chdir(MAN_DIR) == -1) {
1077 		warn("MAN_DIR: %s", MAN_DIR);
1078 		pg_error_internal();
1079 		return EXIT_FAILURE;
1080 	}
1081 
1082 	memset(&req, 0, sizeof(struct req));
1083 	req.q.equal = 1;
1084 	parse_manpath_conf(&req);
1085 
1086 	/* Parse the path info and the query string. */
1087 
1088 	if ((path = getenv("PATH_INFO")) == NULL)
1089 		path = "";
1090 	else if (*path == '/')
1091 		path++;
1092 
1093 	if (*path != '\0') {
1094 		parse_path_info(&req, path);
1095 		if (req.q.manpath == NULL || req.q.sec == NULL ||
1096 		    *req.q.query == '\0' || access(path, F_OK) == -1)
1097 			path = "";
1098 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1099 		parse_query_string(&req, querystring);
1100 
1101 	/* Validate parsed data and add defaults. */
1102 
1103 	if (req.q.manpath == NULL)
1104 		req.q.manpath = mandoc_strdup(req.p[0]);
1105 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1106 		pg_error_badrequest(
1107 		    "You specified an invalid manpath.");
1108 		return EXIT_FAILURE;
1109 	}
1110 
1111 	if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1112 		pg_error_badrequest(
1113 		    "You specified an invalid architecture.");
1114 		return EXIT_FAILURE;
1115 	}
1116 
1117 	/* Dispatch to the three different pages. */
1118 
1119 	if ('\0' != *path)
1120 		pg_show(&req, path);
1121 	else if (NULL != req.q.query)
1122 		pg_search(&req);
1123 	else
1124 		pg_index(&req);
1125 
1126 	free(req.q.manpath);
1127 	free(req.q.arch);
1128 	free(req.q.sec);
1129 	free(req.q.query);
1130 	for (i = 0; i < (int)req.psz; i++)
1131 		free(req.p[i]);
1132 	free(req.p);
1133 	return EXIT_SUCCESS;
1134 }
1135 
1136 /*
1137  * Translate PATH_INFO to a query.
1138  */
1139 static void
1140 parse_path_info(struct req *req, const char *path)
1141 {
1142 	const char	*name, *sec, *end;
1143 
1144 	req->isquery = 0;
1145 	req->q.equal = 1;
1146 	req->q.manpath = NULL;
1147 	req->q.arch = NULL;
1148 
1149 	/* Mandatory manual page name. */
1150 	if ((name = strrchr(path, '/')) == NULL)
1151 		name = path;
1152 	else
1153 		name++;
1154 
1155 	/* Optional trailing section. */
1156 	sec = strrchr(name, '.');
1157 	if (sec != NULL && isdigit((unsigned char)*++sec)) {
1158 		req->q.query = mandoc_strndup(name, sec - name - 1);
1159 		req->q.sec = mandoc_strdup(sec);
1160 	} else {
1161 		req->q.query = mandoc_strdup(name);
1162 		req->q.sec = NULL;
1163 	}
1164 
1165 	/* Handle the case of name[.section] only. */
1166 	if (name == path)
1167 		return;
1168 
1169 	/* Optional manpath. */
1170 	end = strchr(path, '/');
1171 	req->q.manpath = mandoc_strndup(path, end - path);
1172 	if (validate_manpath(req, req->q.manpath)) {
1173 		path = end + 1;
1174 		if (name == path)
1175 			return;
1176 	} else {
1177 		free(req->q.manpath);
1178 		req->q.manpath = NULL;
1179 	}
1180 
1181 	/* Optional section. */
1182 	if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1183 		path += 3;
1184 		end = strchr(path, '/');
1185 		free(req->q.sec);
1186 		req->q.sec = mandoc_strndup(path, end - path);
1187 		path = end + 1;
1188 		if (name == path)
1189 			return;
1190 	}
1191 
1192 	/* Optional architecture. */
1193 	end = strchr(path, '/');
1194 	if (end + 1 != name) {
1195 		pg_error_badrequest(
1196 		    "You specified too many directory components.");
1197 		exit(EXIT_FAILURE);
1198 	}
1199 	req->q.arch = mandoc_strndup(path, end - path);
1200 	if (validate_arch(req->q.arch) == 0) {
1201 		pg_error_badrequest(
1202 		    "You specified an invalid directory component.");
1203 		exit(EXIT_FAILURE);
1204 	}
1205 }
1206 
1207 /*
1208  * Scan for indexable paths.
1209  */
1210 static void
1211 parse_manpath_conf(struct req *req)
1212 {
1213 	FILE	*fp;
1214 	char	*dp;
1215 	size_t	 dpsz;
1216 	ssize_t	 len;
1217 
1218 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1219 		warn("%s/manpath.conf", MAN_DIR);
1220 		pg_error_internal();
1221 		exit(EXIT_FAILURE);
1222 	}
1223 
1224 	dp = NULL;
1225 	dpsz = 0;
1226 
1227 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1228 		if (dp[len - 1] == '\n')
1229 			dp[--len] = '\0';
1230 		req->p = mandoc_realloc(req->p,
1231 		    (req->psz + 1) * sizeof(char *));
1232 		if ( ! validate_urifrag(dp)) {
1233 			warnx("%s/manpath.conf contains "
1234 			    "unsafe path \"%s\"", MAN_DIR, dp);
1235 			pg_error_internal();
1236 			exit(EXIT_FAILURE);
1237 		}
1238 		if (strchr(dp, '/') != NULL) {
1239 			warnx("%s/manpath.conf contains "
1240 			    "path with slash \"%s\"", MAN_DIR, dp);
1241 			pg_error_internal();
1242 			exit(EXIT_FAILURE);
1243 		}
1244 		req->p[req->psz++] = dp;
1245 		dp = NULL;
1246 		dpsz = 0;
1247 	}
1248 	free(dp);
1249 
1250 	if (req->p == NULL) {
1251 		warnx("%s/manpath.conf is empty", MAN_DIR);
1252 		pg_error_internal();
1253 		exit(EXIT_FAILURE);
1254 	}
1255 }
1256