xref: /openbsd-src/usr.bin/mandoc/cgi.c (revision 24bb5fcea3ed904bc467217bdaadb5dfc618d5bf)
1 /* $OpenBSD: cgi.c,v 1.113 2021/05/01 16:11:17 visa Exp $ */
2 /*
3  * Copyright (c) 2014-2019 Ingo Schwarze <schwarze@usta.de>
4  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * Implementation of the man.cgi(8) program.
19  */
20 #include <sys/types.h>
21 #include <sys/time.h>
22 
23 #include <ctype.h>
24 #include <err.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <limits.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33 
34 #include "mandoc_aux.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mdoc.h"
38 #include "man.h"
39 #include "mandoc_parse.h"
40 #include "main.h"
41 #include "manconf.h"
42 #include "mansearch.h"
43 #include "cgi.h"
44 
45 /*
46  * A query as passed to the search function.
47  */
48 struct	query {
49 	char		*manpath; /* desired manual directory */
50 	char		*arch; /* architecture */
51 	char		*sec; /* manual section */
52 	char		*query; /* unparsed query expression */
53 	int		 equal; /* match whole names, not substrings */
54 };
55 
56 struct	req {
57 	struct query	  q;
58 	char		**p; /* array of available manpaths */
59 	size_t		  psz; /* number of available manpaths */
60 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
61 };
62 
63 enum	focus {
64 	FOCUS_NONE = 0,
65 	FOCUS_QUERY
66 };
67 
68 static	void		 html_print(const char *);
69 static	void		 html_putchar(char);
70 static	int		 http_decode(char *);
71 static	void		 http_encode(const char *);
72 static	void		 parse_manpath_conf(struct req *);
73 static	void		 parse_path_info(struct req *, const char *);
74 static	void		 parse_query_string(struct req *, const char *);
75 static	void		 pg_error_badrequest(const char *);
76 static	void		 pg_error_internal(void);
77 static	void		 pg_index(const struct req *);
78 static	void		 pg_noresult(const struct req *, int, const char *,
79 				const char *);
80 static	void		 pg_redirect(const struct req *, const char *);
81 static	void		 pg_search(const struct req *);
82 static	void		 pg_searchres(const struct req *,
83 				struct manpage *, size_t);
84 static	void		 pg_show(struct req *, const char *);
85 static	void		 resp_begin_html(int, const char *, const char *);
86 static	void		 resp_begin_http(int, const char *);
87 static	void		 resp_catman(const struct req *, const char *);
88 static	void		 resp_copy(const char *);
89 static	void		 resp_end_html(void);
90 static	void		 resp_format(const struct req *, const char *);
91 static	void		 resp_searchform(const struct req *, enum focus);
92 static	void		 resp_show(const struct req *, const char *);
93 static	void		 set_query_attr(char **, char **);
94 static	int		 validate_arch(const char *);
95 static	int		 validate_filename(const char *);
96 static	int		 validate_manpath(const struct req *, const char *);
97 static	int		 validate_urifrag(const char *);
98 
99 static	const char	 *scriptname = SCRIPT_NAME;
100 
101 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
102 static	const char *const sec_numbers[] = {
103     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
104 };
105 static	const char *const sec_names[] = {
106     "All Sections",
107     "1 - General Commands",
108     "2 - System Calls",
109     "3 - Library Functions",
110     "3p - Perl Library",
111     "4 - Device Drivers",
112     "5 - File Formats",
113     "6 - Games",
114     "7 - Miscellaneous Information",
115     "8 - System Manager\'s Manual",
116     "9 - Kernel Developer\'s Manual"
117 };
118 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
119 
120 static	const char *const arch_names[] = {
121     "amd64",       "alpha",       "armv7",       "arm64",
122     "hppa",        "i386",        "landisk",     "loongson",
123     "luna88k",     "macppc",      "mips64",      "octeon",
124     "powerpc64",   "riscv64",     "sparc64",
125 
126     "amiga",       "arc",         "armish",      "arm32",
127     "atari",       "aviion",      "beagle",      "cats",
128     "hppa64",      "hp300",
129     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
130     "mvmeppc",     "palm",        "pc532",       "pegasos",
131     "pmax",        "powerpc",     "sgi",         "socppc",
132     "solbourne",   "sparc",
133     "sun3",        "vax",         "wgrisc",      "x68k",
134     "zaurus"
135 };
136 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
137 
138 /*
139  * Print a character, escaping HTML along the way.
140  * This will pass non-ASCII straight to output: be warned!
141  */
142 static void
143 html_putchar(char c)
144 {
145 
146 	switch (c) {
147 	case '"':
148 		printf("&quot;");
149 		break;
150 	case '&':
151 		printf("&amp;");
152 		break;
153 	case '>':
154 		printf("&gt;");
155 		break;
156 	case '<':
157 		printf("&lt;");
158 		break;
159 	default:
160 		putchar((unsigned char)c);
161 		break;
162 	}
163 }
164 
165 /*
166  * Call through to html_putchar().
167  * Accepts NULL strings.
168  */
169 static void
170 html_print(const char *p)
171 {
172 
173 	if (NULL == p)
174 		return;
175 	while ('\0' != *p)
176 		html_putchar(*p++);
177 }
178 
179 /*
180  * Transfer the responsibility for the allocated string *val
181  * to the query structure.
182  */
183 static void
184 set_query_attr(char **attr, char **val)
185 {
186 
187 	free(*attr);
188 	if (**val == '\0') {
189 		*attr = NULL;
190 		free(*val);
191 	} else
192 		*attr = *val;
193 	*val = NULL;
194 }
195 
196 /*
197  * Parse the QUERY_STRING for key-value pairs
198  * and store the values into the query structure.
199  */
200 static void
201 parse_query_string(struct req *req, const char *qs)
202 {
203 	char		*key, *val;
204 	size_t		 keysz, valsz;
205 
206 	req->isquery	= 1;
207 	req->q.manpath	= NULL;
208 	req->q.arch	= NULL;
209 	req->q.sec	= NULL;
210 	req->q.query	= NULL;
211 	req->q.equal	= 1;
212 
213 	key = val = NULL;
214 	while (*qs != '\0') {
215 
216 		/* Parse one key. */
217 
218 		keysz = strcspn(qs, "=;&");
219 		key = mandoc_strndup(qs, keysz);
220 		qs += keysz;
221 		if (*qs != '=')
222 			goto next;
223 
224 		/* Parse one value. */
225 
226 		valsz = strcspn(++qs, ";&");
227 		val = mandoc_strndup(qs, valsz);
228 		qs += valsz;
229 
230 		/* Decode and catch encoding errors. */
231 
232 		if ( ! (http_decode(key) && http_decode(val)))
233 			goto next;
234 
235 		/* Handle key-value pairs. */
236 
237 		if ( ! strcmp(key, "query"))
238 			set_query_attr(&req->q.query, &val);
239 
240 		else if ( ! strcmp(key, "apropos"))
241 			req->q.equal = !strcmp(val, "0");
242 
243 		else if ( ! strcmp(key, "manpath")) {
244 #ifdef COMPAT_OLDURI
245 			if ( ! strncmp(val, "OpenBSD ", 8)) {
246 				val[7] = '-';
247 				if ('C' == val[8])
248 					val[8] = 'c';
249 			}
250 #endif
251 			set_query_attr(&req->q.manpath, &val);
252 		}
253 
254 		else if ( ! (strcmp(key, "sec")
255 #ifdef COMPAT_OLDURI
256 		    && strcmp(key, "sektion")
257 #endif
258 		    )) {
259 			if ( ! strcmp(val, "0"))
260 				*val = '\0';
261 			set_query_attr(&req->q.sec, &val);
262 		}
263 
264 		else if ( ! strcmp(key, "arch")) {
265 			if ( ! strcmp(val, "default"))
266 				*val = '\0';
267 			set_query_attr(&req->q.arch, &val);
268 		}
269 
270 		/*
271 		 * The key must be freed in any case.
272 		 * The val may have been handed over to the query
273 		 * structure, in which case it is now NULL.
274 		 */
275 next:
276 		free(key);
277 		key = NULL;
278 		free(val);
279 		val = NULL;
280 
281 		if (*qs != '\0')
282 			qs++;
283 	}
284 }
285 
286 /*
287  * HTTP-decode a string.  The standard explanation is that this turns
288  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
289  * over the allocated string.
290  */
291 static int
292 http_decode(char *p)
293 {
294 	char             hex[3];
295 	char		*q;
296 	int              c;
297 
298 	hex[2] = '\0';
299 
300 	q = p;
301 	for ( ; '\0' != *p; p++, q++) {
302 		if ('%' == *p) {
303 			if ('\0' == (hex[0] = *(p + 1)))
304 				return 0;
305 			if ('\0' == (hex[1] = *(p + 2)))
306 				return 0;
307 			if (1 != sscanf(hex, "%x", &c))
308 				return 0;
309 			if ('\0' == c)
310 				return 0;
311 
312 			*q = (char)c;
313 			p += 2;
314 		} else
315 			*q = '+' == *p ? ' ' : *p;
316 	}
317 
318 	*q = '\0';
319 	return 1;
320 }
321 
322 static void
323 http_encode(const char *p)
324 {
325 	for (; *p != '\0'; p++) {
326 		if (isalnum((unsigned char)*p) == 0 &&
327 		    strchr("-._~", *p) == NULL)
328 			printf("%%%2.2X", (unsigned char)*p);
329 		else
330 			putchar(*p);
331 	}
332 }
333 
334 static void
335 resp_begin_http(int code, const char *msg)
336 {
337 
338 	if (200 != code)
339 		printf("Status: %d %s\r\n", code, msg);
340 
341 	printf("Content-Type: text/html; charset=utf-8\r\n"
342 	     "Cache-Control: no-cache\r\n"
343 	     "Content-Security-Policy: default-src 'none'; "
344 	     "style-src 'self' 'unsafe-inline'\r\n"
345 	     "Pragma: no-cache\r\n"
346 	     "\r\n");
347 
348 	fflush(stdout);
349 }
350 
351 static void
352 resp_copy(const char *filename)
353 {
354 	char	 buf[4096];
355 	ssize_t	 sz;
356 	int	 fd;
357 
358 	if ((fd = open(filename, O_RDONLY)) != -1) {
359 		fflush(stdout);
360 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
361 			write(STDOUT_FILENO, buf, sz);
362 		close(fd);
363 	}
364 }
365 
366 static void
367 resp_begin_html(int code, const char *msg, const char *file)
368 {
369 	char	*cp;
370 
371 	resp_begin_http(code, msg);
372 
373 	printf("<!DOCTYPE html>\n"
374 	       "<html>\n"
375 	       "<head>\n"
376 	       "  <meta charset=\"UTF-8\"/>\n"
377 	       "  <meta name=\"viewport\""
378 		      " content=\"width=device-width, initial-scale=1.0\">\n"
379 	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
380 	       " type=\"text/css\" media=\"all\">\n"
381 	       "  <title>",
382 	       CSS_DIR);
383 	if (file != NULL) {
384 		if ((cp = strrchr(file, '/')) != NULL)
385 			file = cp + 1;
386 		if ((cp = strrchr(file, '.')) != NULL) {
387 			printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
388 		} else
389 			printf("%s - ", file);
390 	}
391 	printf("%s</title>\n"
392 	       "</head>\n"
393 	       "<body>\n",
394 	       CUSTOMIZE_TITLE);
395 
396 	resp_copy(MAN_DIR "/header.html");
397 }
398 
399 static void
400 resp_end_html(void)
401 {
402 
403 	resp_copy(MAN_DIR "/footer.html");
404 
405 	puts("</body>\n"
406 	     "</html>");
407 }
408 
409 static void
410 resp_searchform(const struct req *req, enum focus focus)
411 {
412 	int		 i;
413 
414 	printf("<form action=\"/%s\" method=\"get\" "
415 	       "autocomplete=\"off\" autocapitalize=\"none\">\n"
416 	       "  <fieldset>\n"
417 	       "    <legend>Manual Page Search Parameters</legend>\n",
418 	       scriptname);
419 
420 	/* Write query input box. */
421 
422 	printf("    <input type=\"search\" name=\"query\" value=\"");
423 	if (req->q.query != NULL)
424 		html_print(req->q.query);
425 	printf( "\" size=\"40\"");
426 	if (focus == FOCUS_QUERY)
427 		printf(" autofocus");
428 	puts(">");
429 
430 	/* Write submission buttons. */
431 
432 	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
433 		"man</button>\n"
434 		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
435 		"apropos</button>\n"
436 		"    <br/>\n");
437 
438 	/* Write section selector. */
439 
440 	puts("    <select name=\"sec\">");
441 	for (i = 0; i < sec_MAX; i++) {
442 		printf("      <option value=\"%s\"", sec_numbers[i]);
443 		if (NULL != req->q.sec &&
444 		    0 == strcmp(sec_numbers[i], req->q.sec))
445 			printf(" selected=\"selected\"");
446 		printf(">%s</option>\n", sec_names[i]);
447 	}
448 	puts("    </select>");
449 
450 	/* Write architecture selector. */
451 
452 	printf(	"    <select name=\"arch\">\n"
453 		"      <option value=\"default\"");
454 	if (NULL == req->q.arch)
455 		printf(" selected=\"selected\"");
456 	puts(">All Architectures</option>");
457 	for (i = 0; i < arch_MAX; i++) {
458 		printf("      <option");
459 		if (NULL != req->q.arch &&
460 		    0 == strcmp(arch_names[i], req->q.arch))
461 			printf(" selected=\"selected\"");
462 		printf(">%s</option>\n", arch_names[i]);
463 	}
464 	puts("    </select>");
465 
466 	/* Write manpath selector. */
467 
468 	if (req->psz > 1) {
469 		puts("    <select name=\"manpath\">");
470 		for (i = 0; i < (int)req->psz; i++) {
471 			printf("      <option");
472 			if (strcmp(req->q.manpath, req->p[i]) == 0)
473 				printf(" selected=\"selected\"");
474 			printf(">");
475 			html_print(req->p[i]);
476 			puts("</option>");
477 		}
478 		puts("    </select>");
479 	}
480 
481 	puts("  </fieldset>\n"
482 	     "</form>");
483 }
484 
485 static int
486 validate_urifrag(const char *frag)
487 {
488 
489 	while ('\0' != *frag) {
490 		if ( ! (isalnum((unsigned char)*frag) ||
491 		    '-' == *frag || '.' == *frag ||
492 		    '/' == *frag || '_' == *frag))
493 			return 0;
494 		frag++;
495 	}
496 	return 1;
497 }
498 
499 static int
500 validate_manpath(const struct req *req, const char* manpath)
501 {
502 	size_t	 i;
503 
504 	for (i = 0; i < req->psz; i++)
505 		if ( ! strcmp(manpath, req->p[i]))
506 			return 1;
507 
508 	return 0;
509 }
510 
511 static int
512 validate_arch(const char *arch)
513 {
514 	int	 i;
515 
516 	for (i = 0; i < arch_MAX; i++)
517 		if (strcmp(arch, arch_names[i]) == 0)
518 			return 1;
519 
520 	return 0;
521 }
522 
523 static int
524 validate_filename(const char *file)
525 {
526 
527 	if ('.' == file[0] && '/' == file[1])
528 		file += 2;
529 
530 	return ! (strstr(file, "../") || strstr(file, "/..") ||
531 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
532 }
533 
534 static void
535 pg_index(const struct req *req)
536 {
537 
538 	resp_begin_html(200, NULL, NULL);
539 	resp_searchform(req, FOCUS_QUERY);
540 	printf("<p>\n"
541 	       "This web interface is documented in the\n"
542 	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
543 	       "manual, and the\n"
544 	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
545 	       "manual explains the query syntax.\n"
546 	       "</p>\n",
547 	       scriptname, *scriptname == '\0' ? "" : "/",
548 	       scriptname, *scriptname == '\0' ? "" : "/");
549 	resp_end_html();
550 }
551 
552 static void
553 pg_noresult(const struct req *req, int code, const char *http_msg,
554     const char *user_msg)
555 {
556 	resp_begin_html(code, http_msg, NULL);
557 	resp_searchform(req, FOCUS_QUERY);
558 	puts("<p>");
559 	puts(user_msg);
560 	puts("</p>");
561 	resp_end_html();
562 }
563 
564 static void
565 pg_error_badrequest(const char *msg)
566 {
567 
568 	resp_begin_html(400, "Bad Request", NULL);
569 	puts("<h1>Bad Request</h1>\n"
570 	     "<p>\n");
571 	puts(msg);
572 	printf("Try again from the\n"
573 	       "<a href=\"/%s\">main page</a>.\n"
574 	       "</p>", scriptname);
575 	resp_end_html();
576 }
577 
578 static void
579 pg_error_internal(void)
580 {
581 	resp_begin_html(500, "Internal Server Error", NULL);
582 	puts("<p>Internal Server Error</p>");
583 	resp_end_html();
584 }
585 
586 static void
587 pg_redirect(const struct req *req, const char *name)
588 {
589 	printf("Status: 303 See Other\r\n"
590 	    "Location: /");
591 	if (*scriptname != '\0')
592 		printf("%s/", scriptname);
593 	if (strcmp(req->q.manpath, req->p[0]))
594 		printf("%s/", req->q.manpath);
595 	if (req->q.arch != NULL)
596 		printf("%s/", req->q.arch);
597 	http_encode(name);
598 	if (req->q.sec != NULL) {
599 		putchar('.');
600 		http_encode(req->q.sec);
601 	}
602 	printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
603 }
604 
605 static void
606 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
607 {
608 	char		*arch, *archend;
609 	const char	*sec;
610 	size_t		 i, iuse;
611 	int		 archprio, archpriouse;
612 	int		 prio, priouse;
613 
614 	for (i = 0; i < sz; i++) {
615 		if (validate_filename(r[i].file))
616 			continue;
617 		warnx("invalid filename %s in %s database",
618 		    r[i].file, req->q.manpath);
619 		pg_error_internal();
620 		return;
621 	}
622 
623 	if (req->isquery && sz == 1) {
624 		/*
625 		 * If we have just one result, then jump there now
626 		 * without any delay.
627 		 */
628 		printf("Status: 303 See Other\r\n"
629 		    "Location: /");
630 		if (*scriptname != '\0')
631 			printf("%s/", scriptname);
632 		if (strcmp(req->q.manpath, req->p[0]))
633 			printf("%s/", req->q.manpath);
634 		printf("%s\r\n"
635 		    "Content-Type: text/html; charset=utf-8\r\n\r\n",
636 		    r[0].file);
637 		return;
638 	}
639 
640 	/*
641 	 * In man(1) mode, show one of the pages
642 	 * even if more than one is found.
643 	 */
644 
645 	iuse = 0;
646 	if (req->q.equal || sz == 1) {
647 		priouse = 20;
648 		archpriouse = 3;
649 		for (i = 0; i < sz; i++) {
650 			sec = r[i].file;
651 			sec += strcspn(sec, "123456789");
652 			if (sec[0] == '\0')
653 				continue;
654 			prio = sec_prios[sec[0] - '1'];
655 			if (sec[1] != '/')
656 				prio += 10;
657 			if (req->q.arch == NULL) {
658 				archprio =
659 				    ((arch = strchr(sec + 1, '/'))
660 					== NULL) ? 3 :
661 				    ((archend = strchr(arch + 1, '/'))
662 					== NULL) ? 0 :
663 				    strncmp(arch, "amd64/",
664 					archend - arch) ? 2 : 1;
665 				if (archprio < archpriouse) {
666 					archpriouse = archprio;
667 					priouse = prio;
668 					iuse = i;
669 					continue;
670 				}
671 				if (archprio > archpriouse)
672 					continue;
673 			}
674 			if (prio >= priouse)
675 				continue;
676 			priouse = prio;
677 			iuse = i;
678 		}
679 		resp_begin_html(200, NULL, r[iuse].file);
680 	} else
681 		resp_begin_html(200, NULL, NULL);
682 
683 	resp_searchform(req,
684 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
685 
686 	if (sz > 1) {
687 		puts("<table class=\"results\">");
688 		for (i = 0; i < sz; i++) {
689 			printf("  <tr>\n"
690 			       "    <td>"
691 			       "<a class=\"Xr\" href=\"/");
692 			if (*scriptname != '\0')
693 				printf("%s/", scriptname);
694 			if (strcmp(req->q.manpath, req->p[0]))
695 				printf("%s/", req->q.manpath);
696 			printf("%s\">", r[i].file);
697 			html_print(r[i].names);
698 			printf("</a></td>\n"
699 			       "    <td><span class=\"Nd\">");
700 			html_print(r[i].output);
701 			puts("</span></td>\n"
702 			     "  </tr>");
703 		}
704 		puts("</table>");
705 	}
706 
707 	if (req->q.equal || sz == 1) {
708 		puts("<hr>");
709 		resp_show(req, r[iuse].file);
710 	}
711 
712 	resp_end_html();
713 }
714 
715 static void
716 resp_catman(const struct req *req, const char *file)
717 {
718 	FILE		*f;
719 	char		*p;
720 	size_t		 sz;
721 	ssize_t		 len;
722 	int		 i;
723 	int		 italic, bold;
724 
725 	if ((f = fopen(file, "r")) == NULL) {
726 		puts("<p>You specified an invalid manual file.</p>");
727 		return;
728 	}
729 
730 	puts("<div class=\"catman\">\n"
731 	     "<pre>");
732 
733 	p = NULL;
734 	sz = 0;
735 
736 	while ((len = getline(&p, &sz, f)) != -1) {
737 		bold = italic = 0;
738 		for (i = 0; i < len - 1; i++) {
739 			/*
740 			 * This means that the catpage is out of state.
741 			 * Ignore it and keep going (although the
742 			 * catpage is bogus).
743 			 */
744 
745 			if ('\b' == p[i] || '\n' == p[i])
746 				continue;
747 
748 			/*
749 			 * Print a regular character.
750 			 * Close out any bold/italic scopes.
751 			 * If we're in back-space mode, make sure we'll
752 			 * have something to enter when we backspace.
753 			 */
754 
755 			if ('\b' != p[i + 1]) {
756 				if (italic)
757 					printf("</i>");
758 				if (bold)
759 					printf("</b>");
760 				italic = bold = 0;
761 				html_putchar(p[i]);
762 				continue;
763 			} else if (i + 2 >= len)
764 				continue;
765 
766 			/* Italic mode. */
767 
768 			if ('_' == p[i]) {
769 				if (bold)
770 					printf("</b>");
771 				if ( ! italic)
772 					printf("<i>");
773 				bold = 0;
774 				italic = 1;
775 				i += 2;
776 				html_putchar(p[i]);
777 				continue;
778 			}
779 
780 			/*
781 			 * Handle funny behaviour troff-isms.
782 			 * These grok'd from the original man2html.c.
783 			 */
784 
785 			if (('+' == p[i] && 'o' == p[i + 2]) ||
786 					('o' == p[i] && '+' == p[i + 2]) ||
787 					('|' == p[i] && '=' == p[i + 2]) ||
788 					('=' == p[i] && '|' == p[i + 2]) ||
789 					('*' == p[i] && '=' == p[i + 2]) ||
790 					('=' == p[i] && '*' == p[i + 2]) ||
791 					('*' == p[i] && '|' == p[i + 2]) ||
792 					('|' == p[i] && '*' == p[i + 2]))  {
793 				if (italic)
794 					printf("</i>");
795 				if (bold)
796 					printf("</b>");
797 				italic = bold = 0;
798 				putchar('*');
799 				i += 2;
800 				continue;
801 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
802 					('-' == p[i] && '|' == p[i + 1]) ||
803 					('+' == p[i] && '-' == p[i + 1]) ||
804 					('-' == p[i] && '+' == p[i + 1]) ||
805 					('+' == p[i] && '|' == p[i + 1]) ||
806 					('|' == p[i] && '+' == p[i + 1]))  {
807 				if (italic)
808 					printf("</i>");
809 				if (bold)
810 					printf("</b>");
811 				italic = bold = 0;
812 				putchar('+');
813 				i += 2;
814 				continue;
815 			}
816 
817 			/* Bold mode. */
818 
819 			if (italic)
820 				printf("</i>");
821 			if ( ! bold)
822 				printf("<b>");
823 			bold = 1;
824 			italic = 0;
825 			i += 2;
826 			html_putchar(p[i]);
827 		}
828 
829 		/*
830 		 * Clean up the last character.
831 		 * We can get to a newline; don't print that.
832 		 */
833 
834 		if (italic)
835 			printf("</i>");
836 		if (bold)
837 			printf("</b>");
838 
839 		if (i == len - 1 && p[i] != '\n')
840 			html_putchar(p[i]);
841 
842 		putchar('\n');
843 	}
844 	free(p);
845 
846 	puts("</pre>\n"
847 	     "</div>");
848 
849 	fclose(f);
850 }
851 
852 static void
853 resp_format(const struct req *req, const char *file)
854 {
855 	struct manoutput conf;
856 	struct mparse	*mp;
857 	struct roff_meta *meta;
858 	void		*vp;
859 	int		 fd;
860 	int		 usepath;
861 
862 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
863 		puts("<p>You specified an invalid manual file.</p>");
864 		return;
865 	}
866 
867 	mchars_alloc();
868 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
869 	    MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
870 	mparse_readfd(mp, fd, file);
871 	close(fd);
872 	meta = mparse_result(mp);
873 
874 	memset(&conf, 0, sizeof(conf));
875 	conf.fragment = 1;
876 	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
877 	usepath = strcmp(req->q.manpath, req->p[0]);
878 	mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
879 	    scriptname, *scriptname == '\0' ? "" : "/",
880 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
881 
882 	vp = html_alloc(&conf);
883 	if (meta->macroset == MACROSET_MDOC)
884 		html_mdoc(vp, meta);
885 	else
886 		html_man(vp, meta);
887 
888 	html_free(vp);
889 	mparse_free(mp);
890 	mchars_free();
891 	free(conf.man);
892 	free(conf.style);
893 }
894 
895 static void
896 resp_show(const struct req *req, const char *file)
897 {
898 
899 	if ('.' == file[0] && '/' == file[1])
900 		file += 2;
901 
902 	if ('c' == *file)
903 		resp_catman(req, file);
904 	else
905 		resp_format(req, file);
906 }
907 
908 static void
909 pg_show(struct req *req, const char *fullpath)
910 {
911 	char		*manpath;
912 	const char	*file;
913 
914 	if ((file = strchr(fullpath, '/')) == NULL) {
915 		pg_error_badrequest(
916 		    "You did not specify a page to show.");
917 		return;
918 	}
919 	manpath = mandoc_strndup(fullpath, file - fullpath);
920 	file++;
921 
922 	if ( ! validate_manpath(req, manpath)) {
923 		pg_error_badrequest(
924 		    "You specified an invalid manpath.");
925 		free(manpath);
926 		return;
927 	}
928 
929 	/*
930 	 * Begin by chdir()ing into the manpath.
931 	 * This way we can pick up the database files, which are
932 	 * relative to the manpath root.
933 	 */
934 
935 	if (chdir(manpath) == -1) {
936 		warn("chdir %s", manpath);
937 		pg_error_internal();
938 		free(manpath);
939 		return;
940 	}
941 	free(manpath);
942 
943 	if ( ! validate_filename(file)) {
944 		pg_error_badrequest(
945 		    "You specified an invalid manual file.");
946 		return;
947 	}
948 
949 	resp_begin_html(200, NULL, file);
950 	resp_searchform(req, FOCUS_NONE);
951 	resp_show(req, file);
952 	resp_end_html();
953 }
954 
955 static void
956 pg_search(const struct req *req)
957 {
958 	struct mansearch	  search;
959 	struct manpaths		  paths;
960 	struct manpage		 *res;
961 	char			**argv;
962 	char			 *query, *rp, *wp;
963 	size_t			  ressz;
964 	int			  argc;
965 
966 	/*
967 	 * Begin by chdir()ing into the root of the manpath.
968 	 * This way we can pick up the database files, which are
969 	 * relative to the manpath root.
970 	 */
971 
972 	if (chdir(req->q.manpath) == -1) {
973 		warn("chdir %s", req->q.manpath);
974 		pg_error_internal();
975 		return;
976 	}
977 
978 	search.arch = req->q.arch;
979 	search.sec = req->q.sec;
980 	search.outkey = "Nd";
981 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
982 	search.firstmatch = 1;
983 
984 	paths.sz = 1;
985 	paths.paths = mandoc_malloc(sizeof(char *));
986 	paths.paths[0] = mandoc_strdup(".");
987 
988 	/*
989 	 * Break apart at spaces with backslash-escaping.
990 	 */
991 
992 	argc = 0;
993 	argv = NULL;
994 	rp = query = mandoc_strdup(req->q.query);
995 	for (;;) {
996 		while (isspace((unsigned char)*rp))
997 			rp++;
998 		if (*rp == '\0')
999 			break;
1000 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1001 		argv[argc++] = wp = rp;
1002 		for (;;) {
1003 			if (isspace((unsigned char)*rp)) {
1004 				*wp = '\0';
1005 				rp++;
1006 				break;
1007 			}
1008 			if (rp[0] == '\\' && rp[1] != '\0')
1009 				rp++;
1010 			if (wp != rp)
1011 				*wp = *rp;
1012 			if (*rp == '\0')
1013 				break;
1014 			wp++;
1015 			rp++;
1016 		}
1017 	}
1018 
1019 	res = NULL;
1020 	ressz = 0;
1021 	if (req->isquery && req->q.equal && argc == 1)
1022 		pg_redirect(req, argv[0]);
1023 	else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1024 		pg_noresult(req, 400, "Bad Request",
1025 		    "You entered an invalid query.");
1026 	else if (ressz == 0)
1027 		pg_noresult(req, 404, "Not Found", "No results found.");
1028 	else
1029 		pg_searchres(req, res, ressz);
1030 
1031 	free(query);
1032 	mansearch_free(res, ressz);
1033 	free(paths.paths[0]);
1034 	free(paths.paths);
1035 }
1036 
1037 int
1038 main(void)
1039 {
1040 	struct req	 req;
1041 	struct itimerval itimer;
1042 	const char	*path;
1043 	const char	*querystring;
1044 	int		 i;
1045 
1046 	/*
1047 	 * The "rpath" pledge could be revoked after mparse_readfd()
1048 	 * if the file desciptor to "/footer.html" would be opened
1049 	 * up front, but it's probably not worth the complication
1050 	 * of the code it would cause: it would require scattering
1051 	 * pledge() calls in multiple low-level resp_*() functions.
1052 	 */
1053 
1054 	if (pledge("stdio rpath", NULL) == -1) {
1055 		warn("pledge");
1056 		pg_error_internal();
1057 		return EXIT_FAILURE;
1058 	}
1059 
1060 	/* Poor man's ReDoS mitigation. */
1061 
1062 	itimer.it_value.tv_sec = 2;
1063 	itimer.it_value.tv_usec = 0;
1064 	itimer.it_interval.tv_sec = 2;
1065 	itimer.it_interval.tv_usec = 0;
1066 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1067 		warn("setitimer");
1068 		pg_error_internal();
1069 		return EXIT_FAILURE;
1070 	}
1071 
1072 	/*
1073 	 * First we change directory into the MAN_DIR so that
1074 	 * subsequent scanning for manpath directories is rooted
1075 	 * relative to the same position.
1076 	 */
1077 
1078 	if (chdir(MAN_DIR) == -1) {
1079 		warn("MAN_DIR: %s", MAN_DIR);
1080 		pg_error_internal();
1081 		return EXIT_FAILURE;
1082 	}
1083 
1084 	memset(&req, 0, sizeof(struct req));
1085 	req.q.equal = 1;
1086 	parse_manpath_conf(&req);
1087 
1088 	/* Parse the path info and the query string. */
1089 
1090 	if ((path = getenv("PATH_INFO")) == NULL)
1091 		path = "";
1092 	else if (*path == '/')
1093 		path++;
1094 
1095 	if (*path != '\0') {
1096 		parse_path_info(&req, path);
1097 		if (req.q.manpath == NULL || req.q.sec == NULL ||
1098 		    *req.q.query == '\0' || access(path, F_OK) == -1)
1099 			path = "";
1100 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1101 		parse_query_string(&req, querystring);
1102 
1103 	/* Validate parsed data and add defaults. */
1104 
1105 	if (req.q.manpath == NULL)
1106 		req.q.manpath = mandoc_strdup(req.p[0]);
1107 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1108 		pg_error_badrequest(
1109 		    "You specified an invalid manpath.");
1110 		return EXIT_FAILURE;
1111 	}
1112 
1113 	if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1114 		pg_error_badrequest(
1115 		    "You specified an invalid architecture.");
1116 		return EXIT_FAILURE;
1117 	}
1118 
1119 	/* Dispatch to the three different pages. */
1120 
1121 	if ('\0' != *path)
1122 		pg_show(&req, path);
1123 	else if (NULL != req.q.query)
1124 		pg_search(&req);
1125 	else
1126 		pg_index(&req);
1127 
1128 	free(req.q.manpath);
1129 	free(req.q.arch);
1130 	free(req.q.sec);
1131 	free(req.q.query);
1132 	for (i = 0; i < (int)req.psz; i++)
1133 		free(req.p[i]);
1134 	free(req.p);
1135 	return EXIT_SUCCESS;
1136 }
1137 
1138 /*
1139  * Translate PATH_INFO to a query.
1140  */
1141 static void
1142 parse_path_info(struct req *req, const char *path)
1143 {
1144 	const char	*name, *sec, *end;
1145 
1146 	req->isquery = 0;
1147 	req->q.equal = 1;
1148 	req->q.manpath = NULL;
1149 	req->q.arch = NULL;
1150 
1151 	/* Mandatory manual page name. */
1152 	if ((name = strrchr(path, '/')) == NULL)
1153 		name = path;
1154 	else
1155 		name++;
1156 
1157 	/* Optional trailing section. */
1158 	sec = strrchr(name, '.');
1159 	if (sec != NULL && isdigit((unsigned char)*++sec)) {
1160 		req->q.query = mandoc_strndup(name, sec - name - 1);
1161 		req->q.sec = mandoc_strdup(sec);
1162 	} else {
1163 		req->q.query = mandoc_strdup(name);
1164 		req->q.sec = NULL;
1165 	}
1166 
1167 	/* Handle the case of name[.section] only. */
1168 	if (name == path)
1169 		return;
1170 
1171 	/* Optional manpath. */
1172 	end = strchr(path, '/');
1173 	req->q.manpath = mandoc_strndup(path, end - path);
1174 	if (validate_manpath(req, req->q.manpath)) {
1175 		path = end + 1;
1176 		if (name == path)
1177 			return;
1178 	} else {
1179 		free(req->q.manpath);
1180 		req->q.manpath = NULL;
1181 	}
1182 
1183 	/* Optional section. */
1184 	if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1185 		path += 3;
1186 		end = strchr(path, '/');
1187 		free(req->q.sec);
1188 		req->q.sec = mandoc_strndup(path, end - path);
1189 		path = end + 1;
1190 		if (name == path)
1191 			return;
1192 	}
1193 
1194 	/* Optional architecture. */
1195 	end = strchr(path, '/');
1196 	if (end + 1 != name) {
1197 		pg_error_badrequest(
1198 		    "You specified too many directory components.");
1199 		exit(EXIT_FAILURE);
1200 	}
1201 	req->q.arch = mandoc_strndup(path, end - path);
1202 	if (validate_arch(req->q.arch) == 0) {
1203 		pg_error_badrequest(
1204 		    "You specified an invalid directory component.");
1205 		exit(EXIT_FAILURE);
1206 	}
1207 }
1208 
1209 /*
1210  * Scan for indexable paths.
1211  */
1212 static void
1213 parse_manpath_conf(struct req *req)
1214 {
1215 	FILE	*fp;
1216 	char	*dp;
1217 	size_t	 dpsz;
1218 	ssize_t	 len;
1219 
1220 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1221 		warn("%s/manpath.conf", MAN_DIR);
1222 		pg_error_internal();
1223 		exit(EXIT_FAILURE);
1224 	}
1225 
1226 	dp = NULL;
1227 	dpsz = 0;
1228 
1229 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1230 		if (dp[len - 1] == '\n')
1231 			dp[--len] = '\0';
1232 		req->p = mandoc_realloc(req->p,
1233 		    (req->psz + 1) * sizeof(char *));
1234 		if ( ! validate_urifrag(dp)) {
1235 			warnx("%s/manpath.conf contains "
1236 			    "unsafe path \"%s\"", MAN_DIR, dp);
1237 			pg_error_internal();
1238 			exit(EXIT_FAILURE);
1239 		}
1240 		if (strchr(dp, '/') != NULL) {
1241 			warnx("%s/manpath.conf contains "
1242 			    "path with slash \"%s\"", MAN_DIR, dp);
1243 			pg_error_internal();
1244 			exit(EXIT_FAILURE);
1245 		}
1246 		req->p[req->psz++] = dp;
1247 		dp = NULL;
1248 		dpsz = 0;
1249 	}
1250 	free(dp);
1251 
1252 	if (req->p == NULL) {
1253 		warnx("%s/manpath.conf is empty", MAN_DIR);
1254 		pg_error_internal();
1255 		exit(EXIT_FAILURE);
1256 	}
1257 }
1258