xref: /openbsd-src/usr.bin/mandoc/cgi.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: cgi.c,v 1.109 2020/01/10 15:20:49 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014-2019 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/time.h>
20 
21 #include <ctype.h>
22 #include <err.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <limits.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "mandoc_aux.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mdoc.h"
36 #include "man.h"
37 #include "mandoc_parse.h"
38 #include "main.h"
39 #include "manconf.h"
40 #include "mansearch.h"
41 #include "cgi.h"
42 
43 /*
44  * A query as passed to the search function.
45  */
46 struct	query {
47 	char		*manpath; /* desired manual directory */
48 	char		*arch; /* architecture */
49 	char		*sec; /* manual section */
50 	char		*query; /* unparsed query expression */
51 	int		 equal; /* match whole names, not substrings */
52 };
53 
54 struct	req {
55 	struct query	  q;
56 	char		**p; /* array of available manpaths */
57 	size_t		  psz; /* number of available manpaths */
58 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
59 };
60 
61 enum	focus {
62 	FOCUS_NONE = 0,
63 	FOCUS_QUERY
64 };
65 
66 static	void		 html_print(const char *);
67 static	void		 html_putchar(char);
68 static	int		 http_decode(char *);
69 static	void		 http_encode(const char *p);
70 static	void		 parse_manpath_conf(struct req *);
71 static	void		 parse_path_info(struct req *req, const char *path);
72 static	void		 parse_query_string(struct req *, const char *);
73 static	void		 pg_error_badrequest(const char *);
74 static	void		 pg_error_internal(void);
75 static	void		 pg_index(const struct req *);
76 static	void		 pg_noresult(const struct req *, int, const char *,
77 				const char *);
78 static	void		 pg_redirect(const struct req *, const char *);
79 static	void		 pg_search(const struct req *);
80 static	void		 pg_searchres(const struct req *,
81 				struct manpage *, size_t);
82 static	void		 pg_show(struct req *, const char *);
83 static	void		 resp_begin_html(int, const char *, const char *);
84 static	void		 resp_begin_http(int, const char *);
85 static	void		 resp_catman(const struct req *, const char *);
86 static	void		 resp_copy(const char *);
87 static	void		 resp_end_html(void);
88 static	void		 resp_format(const struct req *, const char *);
89 static	void		 resp_searchform(const struct req *, enum focus);
90 static	void		 resp_show(const struct req *, const char *);
91 static	void		 set_query_attr(char **, char **);
92 static	int		 validate_arch(const char *);
93 static	int		 validate_filename(const char *);
94 static	int		 validate_manpath(const struct req *, const char *);
95 static	int		 validate_urifrag(const char *);
96 
97 static	const char	 *scriptname = SCRIPT_NAME;
98 
99 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
100 static	const char *const sec_numbers[] = {
101     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
102 };
103 static	const char *const sec_names[] = {
104     "All Sections",
105     "1 - General Commands",
106     "2 - System Calls",
107     "3 - Library Functions",
108     "3p - Perl Library",
109     "4 - Device Drivers",
110     "5 - File Formats",
111     "6 - Games",
112     "7 - Miscellaneous Information",
113     "8 - System Manager\'s Manual",
114     "9 - Kernel Developer\'s Manual"
115 };
116 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
117 
118 static	const char *const arch_names[] = {
119     "amd64",       "alpha",       "armv7",	"arm64",
120     "hppa",        "i386",        "landisk",
121     "loongson",    "luna88k",     "macppc",      "mips64",
122     "octeon",      "sgi",         "socppc",      "sparc64",
123     "amiga",       "arc",         "armish",      "arm32",
124     "atari",       "aviion",      "beagle",      "cats",
125     "hppa64",      "hp300",
126     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
127     "mvmeppc",     "palm",        "pc532",       "pegasos",
128     "pmax",        "powerpc",     "solbourne",   "sparc",
129     "sun3",        "vax",         "wgrisc",      "x68k",
130     "zaurus"
131 };
132 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
133 
134 /*
135  * Print a character, escaping HTML along the way.
136  * This will pass non-ASCII straight to output: be warned!
137  */
138 static void
139 html_putchar(char c)
140 {
141 
142 	switch (c) {
143 	case '"':
144 		printf("&quot;");
145 		break;
146 	case '&':
147 		printf("&amp;");
148 		break;
149 	case '>':
150 		printf("&gt;");
151 		break;
152 	case '<':
153 		printf("&lt;");
154 		break;
155 	default:
156 		putchar((unsigned char)c);
157 		break;
158 	}
159 }
160 
161 /*
162  * Call through to html_putchar().
163  * Accepts NULL strings.
164  */
165 static void
166 html_print(const char *p)
167 {
168 
169 	if (NULL == p)
170 		return;
171 	while ('\0' != *p)
172 		html_putchar(*p++);
173 }
174 
175 /*
176  * Transfer the responsibility for the allocated string *val
177  * to the query structure.
178  */
179 static void
180 set_query_attr(char **attr, char **val)
181 {
182 
183 	free(*attr);
184 	if (**val == '\0') {
185 		*attr = NULL;
186 		free(*val);
187 	} else
188 		*attr = *val;
189 	*val = NULL;
190 }
191 
192 /*
193  * Parse the QUERY_STRING for key-value pairs
194  * and store the values into the query structure.
195  */
196 static void
197 parse_query_string(struct req *req, const char *qs)
198 {
199 	char		*key, *val;
200 	size_t		 keysz, valsz;
201 
202 	req->isquery	= 1;
203 	req->q.manpath	= NULL;
204 	req->q.arch	= NULL;
205 	req->q.sec	= NULL;
206 	req->q.query	= NULL;
207 	req->q.equal	= 1;
208 
209 	key = val = NULL;
210 	while (*qs != '\0') {
211 
212 		/* Parse one key. */
213 
214 		keysz = strcspn(qs, "=;&");
215 		key = mandoc_strndup(qs, keysz);
216 		qs += keysz;
217 		if (*qs != '=')
218 			goto next;
219 
220 		/* Parse one value. */
221 
222 		valsz = strcspn(++qs, ";&");
223 		val = mandoc_strndup(qs, valsz);
224 		qs += valsz;
225 
226 		/* Decode and catch encoding errors. */
227 
228 		if ( ! (http_decode(key) && http_decode(val)))
229 			goto next;
230 
231 		/* Handle key-value pairs. */
232 
233 		if ( ! strcmp(key, "query"))
234 			set_query_attr(&req->q.query, &val);
235 
236 		else if ( ! strcmp(key, "apropos"))
237 			req->q.equal = !strcmp(val, "0");
238 
239 		else if ( ! strcmp(key, "manpath")) {
240 #ifdef COMPAT_OLDURI
241 			if ( ! strncmp(val, "OpenBSD ", 8)) {
242 				val[7] = '-';
243 				if ('C' == val[8])
244 					val[8] = 'c';
245 			}
246 #endif
247 			set_query_attr(&req->q.manpath, &val);
248 		}
249 
250 		else if ( ! (strcmp(key, "sec")
251 #ifdef COMPAT_OLDURI
252 		    && strcmp(key, "sektion")
253 #endif
254 		    )) {
255 			if ( ! strcmp(val, "0"))
256 				*val = '\0';
257 			set_query_attr(&req->q.sec, &val);
258 		}
259 
260 		else if ( ! strcmp(key, "arch")) {
261 			if ( ! strcmp(val, "default"))
262 				*val = '\0';
263 			set_query_attr(&req->q.arch, &val);
264 		}
265 
266 		/*
267 		 * The key must be freed in any case.
268 		 * The val may have been handed over to the query
269 		 * structure, in which case it is now NULL.
270 		 */
271 next:
272 		free(key);
273 		key = NULL;
274 		free(val);
275 		val = NULL;
276 
277 		if (*qs != '\0')
278 			qs++;
279 	}
280 }
281 
282 /*
283  * HTTP-decode a string.  The standard explanation is that this turns
284  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
285  * over the allocated string.
286  */
287 static int
288 http_decode(char *p)
289 {
290 	char             hex[3];
291 	char		*q;
292 	int              c;
293 
294 	hex[2] = '\0';
295 
296 	q = p;
297 	for ( ; '\0' != *p; p++, q++) {
298 		if ('%' == *p) {
299 			if ('\0' == (hex[0] = *(p + 1)))
300 				return 0;
301 			if ('\0' == (hex[1] = *(p + 2)))
302 				return 0;
303 			if (1 != sscanf(hex, "%x", &c))
304 				return 0;
305 			if ('\0' == c)
306 				return 0;
307 
308 			*q = (char)c;
309 			p += 2;
310 		} else
311 			*q = '+' == *p ? ' ' : *p;
312 	}
313 
314 	*q = '\0';
315 	return 1;
316 }
317 
318 static void
319 http_encode(const char *p)
320 {
321 	for (; *p != '\0'; p++) {
322 		if (isalnum((unsigned char)*p) == 0 &&
323 		    strchr("-._~", *p) == NULL)
324 			printf("%%%2.2X", (unsigned char)*p);
325 		else
326 			putchar(*p);
327 	}
328 }
329 
330 static void
331 resp_begin_http(int code, const char *msg)
332 {
333 
334 	if (200 != code)
335 		printf("Status: %d %s\r\n", code, msg);
336 
337 	printf("Content-Type: text/html; charset=utf-8\r\n"
338 	     "Cache-Control: no-cache\r\n"
339 	     "Content-Security-Policy: default-src 'none'; "
340 	     "style-src 'self' 'unsafe-inline'\r\n"
341 	     "Pragma: no-cache\r\n"
342 	     "\r\n");
343 
344 	fflush(stdout);
345 }
346 
347 static void
348 resp_copy(const char *filename)
349 {
350 	char	 buf[4096];
351 	ssize_t	 sz;
352 	int	 fd;
353 
354 	if ((fd = open(filename, O_RDONLY)) != -1) {
355 		fflush(stdout);
356 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
357 			write(STDOUT_FILENO, buf, sz);
358 		close(fd);
359 	}
360 }
361 
362 static void
363 resp_begin_html(int code, const char *msg, const char *file)
364 {
365 	char	*cp;
366 
367 	resp_begin_http(code, msg);
368 
369 	printf("<!DOCTYPE html>\n"
370 	       "<html>\n"
371 	       "<head>\n"
372 	       "  <meta charset=\"UTF-8\"/>\n"
373 	       "  <meta name=\"viewport\""
374 		      " content=\"width=device-width, initial-scale=1.0\">\n"
375 	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
376 	       " type=\"text/css\" media=\"all\">\n"
377 	       "  <title>",
378 	       CSS_DIR);
379 	if (file != NULL) {
380 		if ((cp = strrchr(file, '/')) != NULL)
381 			file = cp + 1;
382 		if ((cp = strrchr(file, '.')) != NULL) {
383 			printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
384 		} else
385 			printf("%s - ", file);
386 	}
387 	printf("%s</title>\n"
388 	       "</head>\n"
389 	       "<body>\n",
390 	       CUSTOMIZE_TITLE);
391 
392 	resp_copy(MAN_DIR "/header.html");
393 }
394 
395 static void
396 resp_end_html(void)
397 {
398 
399 	resp_copy(MAN_DIR "/footer.html");
400 
401 	puts("</body>\n"
402 	     "</html>");
403 }
404 
405 static void
406 resp_searchform(const struct req *req, enum focus focus)
407 {
408 	int		 i;
409 
410 	printf("<form action=\"/%s\" method=\"get\" "
411 	       "autocomplete=\"off\" autocapitalize=\"none\">\n"
412 	       "  <fieldset>\n"
413 	       "    <legend>Manual Page Search Parameters</legend>\n",
414 	       scriptname);
415 
416 	/* Write query input box. */
417 
418 	printf("    <input type=\"search\" name=\"query\" value=\"");
419 	if (req->q.query != NULL)
420 		html_print(req->q.query);
421 	printf( "\" size=\"40\"");
422 	if (focus == FOCUS_QUERY)
423 		printf(" autofocus");
424 	puts(">");
425 
426 	/* Write submission buttons. */
427 
428 	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
429 		"man</button>\n"
430 		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
431 		"apropos</button>\n"
432 		"    <br/>\n");
433 
434 	/* Write section selector. */
435 
436 	puts("    <select name=\"sec\">");
437 	for (i = 0; i < sec_MAX; i++) {
438 		printf("      <option value=\"%s\"", sec_numbers[i]);
439 		if (NULL != req->q.sec &&
440 		    0 == strcmp(sec_numbers[i], req->q.sec))
441 			printf(" selected=\"selected\"");
442 		printf(">%s</option>\n", sec_names[i]);
443 	}
444 	puts("    </select>");
445 
446 	/* Write architecture selector. */
447 
448 	printf(	"    <select name=\"arch\">\n"
449 		"      <option value=\"default\"");
450 	if (NULL == req->q.arch)
451 		printf(" selected=\"selected\"");
452 	puts(">All Architectures</option>");
453 	for (i = 0; i < arch_MAX; i++) {
454 		printf("      <option");
455 		if (NULL != req->q.arch &&
456 		    0 == strcmp(arch_names[i], req->q.arch))
457 			printf(" selected=\"selected\"");
458 		printf(">%s</option>\n", arch_names[i]);
459 	}
460 	puts("    </select>");
461 
462 	/* Write manpath selector. */
463 
464 	if (req->psz > 1) {
465 		puts("    <select name=\"manpath\">");
466 		for (i = 0; i < (int)req->psz; i++) {
467 			printf("      <option");
468 			if (strcmp(req->q.manpath, req->p[i]) == 0)
469 				printf(" selected=\"selected\"");
470 			printf(">");
471 			html_print(req->p[i]);
472 			puts("</option>");
473 		}
474 		puts("    </select>");
475 	}
476 
477 	puts("  </fieldset>\n"
478 	     "</form>");
479 }
480 
481 static int
482 validate_urifrag(const char *frag)
483 {
484 
485 	while ('\0' != *frag) {
486 		if ( ! (isalnum((unsigned char)*frag) ||
487 		    '-' == *frag || '.' == *frag ||
488 		    '/' == *frag || '_' == *frag))
489 			return 0;
490 		frag++;
491 	}
492 	return 1;
493 }
494 
495 static int
496 validate_manpath(const struct req *req, const char* manpath)
497 {
498 	size_t	 i;
499 
500 	for (i = 0; i < req->psz; i++)
501 		if ( ! strcmp(manpath, req->p[i]))
502 			return 1;
503 
504 	return 0;
505 }
506 
507 static int
508 validate_arch(const char *arch)
509 {
510 	int	 i;
511 
512 	for (i = 0; i < arch_MAX; i++)
513 		if (strcmp(arch, arch_names[i]) == 0)
514 			return 1;
515 
516 	return 0;
517 }
518 
519 static int
520 validate_filename(const char *file)
521 {
522 
523 	if ('.' == file[0] && '/' == file[1])
524 		file += 2;
525 
526 	return ! (strstr(file, "../") || strstr(file, "/..") ||
527 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
528 }
529 
530 static void
531 pg_index(const struct req *req)
532 {
533 
534 	resp_begin_html(200, NULL, NULL);
535 	resp_searchform(req, FOCUS_QUERY);
536 	printf("<p>\n"
537 	       "This web interface is documented in the\n"
538 	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
539 	       "manual, and the\n"
540 	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
541 	       "manual explains the query syntax.\n"
542 	       "</p>\n",
543 	       scriptname, *scriptname == '\0' ? "" : "/",
544 	       scriptname, *scriptname == '\0' ? "" : "/");
545 	resp_end_html();
546 }
547 
548 static void
549 pg_noresult(const struct req *req, int code, const char *http_msg,
550     const char *user_msg)
551 {
552 	resp_begin_html(code, http_msg, NULL);
553 	resp_searchform(req, FOCUS_QUERY);
554 	puts("<p>");
555 	puts(user_msg);
556 	puts("</p>");
557 	resp_end_html();
558 }
559 
560 static void
561 pg_error_badrequest(const char *msg)
562 {
563 
564 	resp_begin_html(400, "Bad Request", NULL);
565 	puts("<h1>Bad Request</h1>\n"
566 	     "<p>\n");
567 	puts(msg);
568 	printf("Try again from the\n"
569 	       "<a href=\"/%s\">main page</a>.\n"
570 	       "</p>", scriptname);
571 	resp_end_html();
572 }
573 
574 static void
575 pg_error_internal(void)
576 {
577 	resp_begin_html(500, "Internal Server Error", NULL);
578 	puts("<p>Internal Server Error</p>");
579 	resp_end_html();
580 }
581 
582 static void
583 pg_redirect(const struct req *req, const char *name)
584 {
585 	printf("Status: 303 See Other\r\n"
586 	    "Location: /");
587 	if (*scriptname != '\0')
588 		printf("%s/", scriptname);
589 	if (strcmp(req->q.manpath, req->p[0]))
590 		printf("%s/", req->q.manpath);
591 	if (req->q.arch != NULL)
592 		printf("%s/", req->q.arch);
593 	http_encode(name);
594 	if (req->q.sec != NULL) {
595 		putchar('.');
596 		http_encode(req->q.sec);
597 	}
598 	printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
599 }
600 
601 static void
602 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
603 {
604 	char		*arch, *archend;
605 	const char	*sec;
606 	size_t		 i, iuse;
607 	int		 archprio, archpriouse;
608 	int		 prio, priouse;
609 
610 	for (i = 0; i < sz; i++) {
611 		if (validate_filename(r[i].file))
612 			continue;
613 		warnx("invalid filename %s in %s database",
614 		    r[i].file, req->q.manpath);
615 		pg_error_internal();
616 		return;
617 	}
618 
619 	if (req->isquery && sz == 1) {
620 		/*
621 		 * If we have just one result, then jump there now
622 		 * without any delay.
623 		 */
624 		printf("Status: 303 See Other\r\n"
625 		    "Location: /");
626 		if (*scriptname != '\0')
627 			printf("%s/", scriptname);
628 		if (strcmp(req->q.manpath, req->p[0]))
629 			printf("%s/", req->q.manpath);
630 		printf("%s\r\n"
631 		    "Content-Type: text/html; charset=utf-8\r\n\r\n",
632 		    r[0].file);
633 		return;
634 	}
635 
636 	/*
637 	 * In man(1) mode, show one of the pages
638 	 * even if more than one is found.
639 	 */
640 
641 	iuse = 0;
642 	if (req->q.equal || sz == 1) {
643 		priouse = 20;
644 		archpriouse = 3;
645 		for (i = 0; i < sz; i++) {
646 			sec = r[i].file;
647 			sec += strcspn(sec, "123456789");
648 			if (sec[0] == '\0')
649 				continue;
650 			prio = sec_prios[sec[0] - '1'];
651 			if (sec[1] != '/')
652 				prio += 10;
653 			if (req->q.arch == NULL) {
654 				archprio =
655 				    ((arch = strchr(sec + 1, '/'))
656 					== NULL) ? 3 :
657 				    ((archend = strchr(arch + 1, '/'))
658 					== NULL) ? 0 :
659 				    strncmp(arch, "amd64/",
660 					archend - arch) ? 2 : 1;
661 				if (archprio < archpriouse) {
662 					archpriouse = archprio;
663 					priouse = prio;
664 					iuse = i;
665 					continue;
666 				}
667 				if (archprio > archpriouse)
668 					continue;
669 			}
670 			if (prio >= priouse)
671 				continue;
672 			priouse = prio;
673 			iuse = i;
674 		}
675 		resp_begin_html(200, NULL, r[iuse].file);
676 	} else
677 		resp_begin_html(200, NULL, NULL);
678 
679 	resp_searchform(req,
680 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
681 
682 	if (sz > 1) {
683 		puts("<table class=\"results\">");
684 		for (i = 0; i < sz; i++) {
685 			printf("  <tr>\n"
686 			       "    <td>"
687 			       "<a class=\"Xr\" href=\"/");
688 			if (*scriptname != '\0')
689 				printf("%s/", scriptname);
690 			if (strcmp(req->q.manpath, req->p[0]))
691 				printf("%s/", req->q.manpath);
692 			printf("%s\">", r[i].file);
693 			html_print(r[i].names);
694 			printf("</a></td>\n"
695 			       "    <td><span class=\"Nd\">");
696 			html_print(r[i].output);
697 			puts("</span></td>\n"
698 			     "  </tr>");
699 		}
700 		puts("</table>");
701 	}
702 
703 	if (req->q.equal || sz == 1) {
704 		puts("<hr>");
705 		resp_show(req, r[iuse].file);
706 	}
707 
708 	resp_end_html();
709 }
710 
711 static void
712 resp_catman(const struct req *req, const char *file)
713 {
714 	FILE		*f;
715 	char		*p;
716 	size_t		 sz;
717 	ssize_t		 len;
718 	int		 i;
719 	int		 italic, bold;
720 
721 	if ((f = fopen(file, "r")) == NULL) {
722 		puts("<p>You specified an invalid manual file.</p>");
723 		return;
724 	}
725 
726 	puts("<div class=\"catman\">\n"
727 	     "<pre>");
728 
729 	p = NULL;
730 	sz = 0;
731 
732 	while ((len = getline(&p, &sz, f)) != -1) {
733 		bold = italic = 0;
734 		for (i = 0; i < len - 1; i++) {
735 			/*
736 			 * This means that the catpage is out of state.
737 			 * Ignore it and keep going (although the
738 			 * catpage is bogus).
739 			 */
740 
741 			if ('\b' == p[i] || '\n' == p[i])
742 				continue;
743 
744 			/*
745 			 * Print a regular character.
746 			 * Close out any bold/italic scopes.
747 			 * If we're in back-space mode, make sure we'll
748 			 * have something to enter when we backspace.
749 			 */
750 
751 			if ('\b' != p[i + 1]) {
752 				if (italic)
753 					printf("</i>");
754 				if (bold)
755 					printf("</b>");
756 				italic = bold = 0;
757 				html_putchar(p[i]);
758 				continue;
759 			} else if (i + 2 >= len)
760 				continue;
761 
762 			/* Italic mode. */
763 
764 			if ('_' == p[i]) {
765 				if (bold)
766 					printf("</b>");
767 				if ( ! italic)
768 					printf("<i>");
769 				bold = 0;
770 				italic = 1;
771 				i += 2;
772 				html_putchar(p[i]);
773 				continue;
774 			}
775 
776 			/*
777 			 * Handle funny behaviour troff-isms.
778 			 * These grok'd from the original man2html.c.
779 			 */
780 
781 			if (('+' == p[i] && 'o' == p[i + 2]) ||
782 					('o' == p[i] && '+' == p[i + 2]) ||
783 					('|' == p[i] && '=' == p[i + 2]) ||
784 					('=' == p[i] && '|' == p[i + 2]) ||
785 					('*' == p[i] && '=' == p[i + 2]) ||
786 					('=' == p[i] && '*' == p[i + 2]) ||
787 					('*' == p[i] && '|' == p[i + 2]) ||
788 					('|' == p[i] && '*' == p[i + 2]))  {
789 				if (italic)
790 					printf("</i>");
791 				if (bold)
792 					printf("</b>");
793 				italic = bold = 0;
794 				putchar('*');
795 				i += 2;
796 				continue;
797 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
798 					('-' == p[i] && '|' == p[i + 1]) ||
799 					('+' == p[i] && '-' == p[i + 1]) ||
800 					('-' == p[i] && '+' == p[i + 1]) ||
801 					('+' == p[i] && '|' == p[i + 1]) ||
802 					('|' == p[i] && '+' == p[i + 1]))  {
803 				if (italic)
804 					printf("</i>");
805 				if (bold)
806 					printf("</b>");
807 				italic = bold = 0;
808 				putchar('+');
809 				i += 2;
810 				continue;
811 			}
812 
813 			/* Bold mode. */
814 
815 			if (italic)
816 				printf("</i>");
817 			if ( ! bold)
818 				printf("<b>");
819 			bold = 1;
820 			italic = 0;
821 			i += 2;
822 			html_putchar(p[i]);
823 		}
824 
825 		/*
826 		 * Clean up the last character.
827 		 * We can get to a newline; don't print that.
828 		 */
829 
830 		if (italic)
831 			printf("</i>");
832 		if (bold)
833 			printf("</b>");
834 
835 		if (i == len - 1 && p[i] != '\n')
836 			html_putchar(p[i]);
837 
838 		putchar('\n');
839 	}
840 	free(p);
841 
842 	puts("</pre>\n"
843 	     "</div>");
844 
845 	fclose(f);
846 }
847 
848 static void
849 resp_format(const struct req *req, const char *file)
850 {
851 	struct manoutput conf;
852 	struct mparse	*mp;
853 	struct roff_meta *meta;
854 	void		*vp;
855 	int		 fd;
856 	int		 usepath;
857 
858 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
859 		puts("<p>You specified an invalid manual file.</p>");
860 		return;
861 	}
862 
863 	mchars_alloc();
864 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
865 	    MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
866 	mparse_readfd(mp, fd, file);
867 	close(fd);
868 	meta = mparse_result(mp);
869 
870 	memset(&conf, 0, sizeof(conf));
871 	conf.fragment = 1;
872 	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
873 	usepath = strcmp(req->q.manpath, req->p[0]);
874 	mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
875 	    scriptname, *scriptname == '\0' ? "" : "/",
876 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
877 
878 	vp = html_alloc(&conf);
879 	if (meta->macroset == MACROSET_MDOC)
880 		html_mdoc(vp, meta);
881 	else
882 		html_man(vp, meta);
883 
884 	html_free(vp);
885 	mparse_free(mp);
886 	mchars_free();
887 	free(conf.man);
888 	free(conf.style);
889 }
890 
891 static void
892 resp_show(const struct req *req, const char *file)
893 {
894 
895 	if ('.' == file[0] && '/' == file[1])
896 		file += 2;
897 
898 	if ('c' == *file)
899 		resp_catman(req, file);
900 	else
901 		resp_format(req, file);
902 }
903 
904 static void
905 pg_show(struct req *req, const char *fullpath)
906 {
907 	char		*manpath;
908 	const char	*file;
909 
910 	if ((file = strchr(fullpath, '/')) == NULL) {
911 		pg_error_badrequest(
912 		    "You did not specify a page to show.");
913 		return;
914 	}
915 	manpath = mandoc_strndup(fullpath, file - fullpath);
916 	file++;
917 
918 	if ( ! validate_manpath(req, manpath)) {
919 		pg_error_badrequest(
920 		    "You specified an invalid manpath.");
921 		free(manpath);
922 		return;
923 	}
924 
925 	/*
926 	 * Begin by chdir()ing into the manpath.
927 	 * This way we can pick up the database files, which are
928 	 * relative to the manpath root.
929 	 */
930 
931 	if (chdir(manpath) == -1) {
932 		warn("chdir %s", manpath);
933 		pg_error_internal();
934 		free(manpath);
935 		return;
936 	}
937 	free(manpath);
938 
939 	if ( ! validate_filename(file)) {
940 		pg_error_badrequest(
941 		    "You specified an invalid manual file.");
942 		return;
943 	}
944 
945 	resp_begin_html(200, NULL, file);
946 	resp_searchform(req, FOCUS_NONE);
947 	resp_show(req, file);
948 	resp_end_html();
949 }
950 
951 static void
952 pg_search(const struct req *req)
953 {
954 	struct mansearch	  search;
955 	struct manpaths		  paths;
956 	struct manpage		 *res;
957 	char			**argv;
958 	char			 *query, *rp, *wp;
959 	size_t			  ressz;
960 	int			  argc;
961 
962 	/*
963 	 * Begin by chdir()ing into the root of the manpath.
964 	 * This way we can pick up the database files, which are
965 	 * relative to the manpath root.
966 	 */
967 
968 	if (chdir(req->q.manpath) == -1) {
969 		warn("chdir %s", req->q.manpath);
970 		pg_error_internal();
971 		return;
972 	}
973 
974 	search.arch = req->q.arch;
975 	search.sec = req->q.sec;
976 	search.outkey = "Nd";
977 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
978 	search.firstmatch = 1;
979 
980 	paths.sz = 1;
981 	paths.paths = mandoc_malloc(sizeof(char *));
982 	paths.paths[0] = mandoc_strdup(".");
983 
984 	/*
985 	 * Break apart at spaces with backslash-escaping.
986 	 */
987 
988 	argc = 0;
989 	argv = NULL;
990 	rp = query = mandoc_strdup(req->q.query);
991 	for (;;) {
992 		while (isspace((unsigned char)*rp))
993 			rp++;
994 		if (*rp == '\0')
995 			break;
996 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
997 		argv[argc++] = wp = rp;
998 		for (;;) {
999 			if (isspace((unsigned char)*rp)) {
1000 				*wp = '\0';
1001 				rp++;
1002 				break;
1003 			}
1004 			if (rp[0] == '\\' && rp[1] != '\0')
1005 				rp++;
1006 			if (wp != rp)
1007 				*wp = *rp;
1008 			if (*rp == '\0')
1009 				break;
1010 			wp++;
1011 			rp++;
1012 		}
1013 	}
1014 
1015 	res = NULL;
1016 	ressz = 0;
1017 	if (req->isquery && req->q.equal && argc == 1)
1018 		pg_redirect(req, argv[0]);
1019 	else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1020 		pg_noresult(req, 400, "Bad Request",
1021 		    "You entered an invalid query.");
1022 	else if (ressz == 0)
1023 		pg_noresult(req, 404, "Not Found", "No results found.");
1024 	else
1025 		pg_searchres(req, res, ressz);
1026 
1027 	free(query);
1028 	mansearch_free(res, ressz);
1029 	free(paths.paths[0]);
1030 	free(paths.paths);
1031 }
1032 
1033 int
1034 main(void)
1035 {
1036 	struct req	 req;
1037 	struct itimerval itimer;
1038 	const char	*path;
1039 	const char	*querystring;
1040 	int		 i;
1041 
1042 	/*
1043 	 * The "rpath" pledge could be revoked after mparse_readfd()
1044 	 * if the file desciptor to "/footer.html" would be opened
1045 	 * up front, but it's probably not worth the complication
1046 	 * of the code it would cause: it would require scattering
1047 	 * pledge() calls in multiple low-level resp_*() functions.
1048 	 */
1049 
1050 	if (pledge("stdio rpath", NULL) == -1) {
1051 		warn("pledge");
1052 		pg_error_internal();
1053 		return EXIT_FAILURE;
1054 	}
1055 
1056 	/* Poor man's ReDoS mitigation. */
1057 
1058 	itimer.it_value.tv_sec = 2;
1059 	itimer.it_value.tv_usec = 0;
1060 	itimer.it_interval.tv_sec = 2;
1061 	itimer.it_interval.tv_usec = 0;
1062 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1063 		warn("setitimer");
1064 		pg_error_internal();
1065 		return EXIT_FAILURE;
1066 	}
1067 
1068 	/*
1069 	 * First we change directory into the MAN_DIR so that
1070 	 * subsequent scanning for manpath directories is rooted
1071 	 * relative to the same position.
1072 	 */
1073 
1074 	if (chdir(MAN_DIR) == -1) {
1075 		warn("MAN_DIR: %s", MAN_DIR);
1076 		pg_error_internal();
1077 		return EXIT_FAILURE;
1078 	}
1079 
1080 	memset(&req, 0, sizeof(struct req));
1081 	req.q.equal = 1;
1082 	parse_manpath_conf(&req);
1083 
1084 	/* Parse the path info and the query string. */
1085 
1086 	if ((path = getenv("PATH_INFO")) == NULL)
1087 		path = "";
1088 	else if (*path == '/')
1089 		path++;
1090 
1091 	if (*path != '\0') {
1092 		parse_path_info(&req, path);
1093 		if (req.q.manpath == NULL || req.q.sec == NULL ||
1094 		    *req.q.query == '\0' || access(path, F_OK) == -1)
1095 			path = "";
1096 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1097 		parse_query_string(&req, querystring);
1098 
1099 	/* Validate parsed data and add defaults. */
1100 
1101 	if (req.q.manpath == NULL)
1102 		req.q.manpath = mandoc_strdup(req.p[0]);
1103 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1104 		pg_error_badrequest(
1105 		    "You specified an invalid manpath.");
1106 		return EXIT_FAILURE;
1107 	}
1108 
1109 	if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1110 		pg_error_badrequest(
1111 		    "You specified an invalid architecture.");
1112 		return EXIT_FAILURE;
1113 	}
1114 
1115 	/* Dispatch to the three different pages. */
1116 
1117 	if ('\0' != *path)
1118 		pg_show(&req, path);
1119 	else if (NULL != req.q.query)
1120 		pg_search(&req);
1121 	else
1122 		pg_index(&req);
1123 
1124 	free(req.q.manpath);
1125 	free(req.q.arch);
1126 	free(req.q.sec);
1127 	free(req.q.query);
1128 	for (i = 0; i < (int)req.psz; i++)
1129 		free(req.p[i]);
1130 	free(req.p);
1131 	return EXIT_SUCCESS;
1132 }
1133 
1134 /*
1135  * Translate PATH_INFO to a query.
1136  */
1137 static void
1138 parse_path_info(struct req *req, const char *path)
1139 {
1140 	const char	*name, *sec, *end;
1141 
1142 	req->isquery = 0;
1143 	req->q.equal = 1;
1144 	req->q.manpath = NULL;
1145 	req->q.arch = NULL;
1146 
1147 	/* Mandatory manual page name. */
1148 	if ((name = strrchr(path, '/')) == NULL)
1149 		name = path;
1150 	else
1151 		name++;
1152 
1153 	/* Optional trailing section. */
1154 	sec = strrchr(name, '.');
1155 	if (sec != NULL && isdigit((unsigned char)*++sec)) {
1156 		req->q.query = mandoc_strndup(name, sec - name - 1);
1157 		req->q.sec = mandoc_strdup(sec);
1158 	} else {
1159 		req->q.query = mandoc_strdup(name);
1160 		req->q.sec = NULL;
1161 	}
1162 
1163 	/* Handle the case of name[.section] only. */
1164 	if (name == path)
1165 		return;
1166 
1167 	/* Optional manpath. */
1168 	end = strchr(path, '/');
1169 	req->q.manpath = mandoc_strndup(path, end - path);
1170 	if (validate_manpath(req, req->q.manpath)) {
1171 		path = end + 1;
1172 		if (name == path)
1173 			return;
1174 	} else {
1175 		free(req->q.manpath);
1176 		req->q.manpath = NULL;
1177 	}
1178 
1179 	/* Optional section. */
1180 	if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1181 		path += 3;
1182 		end = strchr(path, '/');
1183 		free(req->q.sec);
1184 		req->q.sec = mandoc_strndup(path, end - path);
1185 		path = end + 1;
1186 		if (name == path)
1187 			return;
1188 	}
1189 
1190 	/* Optional architecture. */
1191 	end = strchr(path, '/');
1192 	if (end + 1 != name) {
1193 		pg_error_badrequest(
1194 		    "You specified too many directory components.");
1195 		exit(EXIT_FAILURE);
1196 	}
1197 	req->q.arch = mandoc_strndup(path, end - path);
1198 	if (validate_arch(req->q.arch) == 0) {
1199 		pg_error_badrequest(
1200 		    "You specified an invalid directory component.");
1201 		exit(EXIT_FAILURE);
1202 	}
1203 }
1204 
1205 /*
1206  * Scan for indexable paths.
1207  */
1208 static void
1209 parse_manpath_conf(struct req *req)
1210 {
1211 	FILE	*fp;
1212 	char	*dp;
1213 	size_t	 dpsz;
1214 	ssize_t	 len;
1215 
1216 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1217 		warn("%s/manpath.conf", MAN_DIR);
1218 		pg_error_internal();
1219 		exit(EXIT_FAILURE);
1220 	}
1221 
1222 	dp = NULL;
1223 	dpsz = 0;
1224 
1225 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1226 		if (dp[len - 1] == '\n')
1227 			dp[--len] = '\0';
1228 		req->p = mandoc_realloc(req->p,
1229 		    (req->psz + 1) * sizeof(char *));
1230 		if ( ! validate_urifrag(dp)) {
1231 			warnx("%s/manpath.conf contains "
1232 			    "unsafe path \"%s\"", MAN_DIR, dp);
1233 			pg_error_internal();
1234 			exit(EXIT_FAILURE);
1235 		}
1236 		if (strchr(dp, '/') != NULL) {
1237 			warnx("%s/manpath.conf contains "
1238 			    "path with slash \"%s\"", MAN_DIR, dp);
1239 			pg_error_internal();
1240 			exit(EXIT_FAILURE);
1241 		}
1242 		req->p[req->psz++] = dp;
1243 		dp = NULL;
1244 		dpsz = 0;
1245 	}
1246 	free(dp);
1247 
1248 	if (req->p == NULL) {
1249 		warnx("%s/manpath.conf is empty", MAN_DIR);
1250 		pg_error_internal();
1251 		exit(EXIT_FAILURE);
1252 	}
1253 }
1254