xref: /openbsd-src/usr.bin/mandoc/cgi.c (revision 5030b68b5dc9572c8575b9b6c2bee71b90256b70)
1 /*	$OpenBSD: cgi.c,v 1.104 2019/03/06 12:32:10 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2016, 2017, 2018 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/time.h>
20 
21 #include <ctype.h>
22 #include <err.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <limits.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "mandoc_aux.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mdoc.h"
36 #include "man.h"
37 #include "mandoc_parse.h"
38 #include "main.h"
39 #include "manconf.h"
40 #include "mansearch.h"
41 #include "cgi.h"
42 
43 /*
44  * A query as passed to the search function.
45  */
46 struct	query {
47 	char		*manpath; /* desired manual directory */
48 	char		*arch; /* architecture */
49 	char		*sec; /* manual section */
50 	char		*query; /* unparsed query expression */
51 	int		 equal; /* match whole names, not substrings */
52 };
53 
54 struct	req {
55 	struct query	  q;
56 	char		**p; /* array of available manpaths */
57 	size_t		  psz; /* number of available manpaths */
58 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
59 };
60 
61 enum	focus {
62 	FOCUS_NONE = 0,
63 	FOCUS_QUERY
64 };
65 
66 static	void		 html_print(const char *);
67 static	void		 html_putchar(char);
68 static	int		 http_decode(char *);
69 static	void		 http_encode(const char *p);
70 static	void		 parse_manpath_conf(struct req *);
71 static	void		 parse_path_info(struct req *req, const char *path);
72 static	void		 parse_query_string(struct req *, const char *);
73 static	void		 pg_error_badrequest(const char *);
74 static	void		 pg_error_internal(void);
75 static	void		 pg_index(const struct req *);
76 static	void		 pg_noresult(const struct req *, const char *);
77 static	void		 pg_redirect(const struct req *, const char *);
78 static	void		 pg_search(const struct req *);
79 static	void		 pg_searchres(const struct req *,
80 				struct manpage *, size_t);
81 static	void		 pg_show(struct req *, const char *);
82 static	void		 resp_begin_html(int, const char *, const char *);
83 static	void		 resp_begin_http(int, const char *);
84 static	void		 resp_catman(const struct req *, const char *);
85 static	void		 resp_copy(const char *);
86 static	void		 resp_end_html(void);
87 static	void		 resp_format(const struct req *, const char *);
88 static	void		 resp_searchform(const struct req *, enum focus);
89 static	void		 resp_show(const struct req *, const char *);
90 static	void		 set_query_attr(char **, char **);
91 static	int		 validate_arch(const char *);
92 static	int		 validate_filename(const char *);
93 static	int		 validate_manpath(const struct req *, const char *);
94 static	int		 validate_urifrag(const char *);
95 
96 static	const char	 *scriptname = SCRIPT_NAME;
97 
98 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
99 static	const char *const sec_numbers[] = {
100     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
101 };
102 static	const char *const sec_names[] = {
103     "All Sections",
104     "1 - General Commands",
105     "2 - System Calls",
106     "3 - Library Functions",
107     "3p - Perl Library",
108     "4 - Device Drivers",
109     "5 - File Formats",
110     "6 - Games",
111     "7 - Miscellaneous Information",
112     "8 - System Manager\'s Manual",
113     "9 - Kernel Developer\'s Manual"
114 };
115 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
116 
117 static	const char *const arch_names[] = {
118     "amd64",       "alpha",       "armv7",	"arm64",
119     "hppa",        "i386",        "landisk",
120     "loongson",    "luna88k",     "macppc",      "mips64",
121     "octeon",      "sgi",         "socppc",      "sparc64",
122     "amiga",       "arc",         "armish",      "arm32",
123     "atari",       "aviion",      "beagle",      "cats",
124     "hppa64",      "hp300",
125     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
126     "mvmeppc",     "palm",        "pc532",       "pegasos",
127     "pmax",        "powerpc",     "solbourne",   "sparc",
128     "sun3",        "vax",         "wgrisc",      "x68k",
129     "zaurus"
130 };
131 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
132 
133 /*
134  * Print a character, escaping HTML along the way.
135  * This will pass non-ASCII straight to output: be warned!
136  */
137 static void
138 html_putchar(char c)
139 {
140 
141 	switch (c) {
142 	case '"':
143 		printf("&quot;");
144 		break;
145 	case '&':
146 		printf("&amp;");
147 		break;
148 	case '>':
149 		printf("&gt;");
150 		break;
151 	case '<':
152 		printf("&lt;");
153 		break;
154 	default:
155 		putchar((unsigned char)c);
156 		break;
157 	}
158 }
159 
160 /*
161  * Call through to html_putchar().
162  * Accepts NULL strings.
163  */
164 static void
165 html_print(const char *p)
166 {
167 
168 	if (NULL == p)
169 		return;
170 	while ('\0' != *p)
171 		html_putchar(*p++);
172 }
173 
174 /*
175  * Transfer the responsibility for the allocated string *val
176  * to the query structure.
177  */
178 static void
179 set_query_attr(char **attr, char **val)
180 {
181 
182 	free(*attr);
183 	if (**val == '\0') {
184 		*attr = NULL;
185 		free(*val);
186 	} else
187 		*attr = *val;
188 	*val = NULL;
189 }
190 
191 /*
192  * Parse the QUERY_STRING for key-value pairs
193  * and store the values into the query structure.
194  */
195 static void
196 parse_query_string(struct req *req, const char *qs)
197 {
198 	char		*key, *val;
199 	size_t		 keysz, valsz;
200 
201 	req->isquery	= 1;
202 	req->q.manpath	= NULL;
203 	req->q.arch	= NULL;
204 	req->q.sec	= NULL;
205 	req->q.query	= NULL;
206 	req->q.equal	= 1;
207 
208 	key = val = NULL;
209 	while (*qs != '\0') {
210 
211 		/* Parse one key. */
212 
213 		keysz = strcspn(qs, "=;&");
214 		key = mandoc_strndup(qs, keysz);
215 		qs += keysz;
216 		if (*qs != '=')
217 			goto next;
218 
219 		/* Parse one value. */
220 
221 		valsz = strcspn(++qs, ";&");
222 		val = mandoc_strndup(qs, valsz);
223 		qs += valsz;
224 
225 		/* Decode and catch encoding errors. */
226 
227 		if ( ! (http_decode(key) && http_decode(val)))
228 			goto next;
229 
230 		/* Handle key-value pairs. */
231 
232 		if ( ! strcmp(key, "query"))
233 			set_query_attr(&req->q.query, &val);
234 
235 		else if ( ! strcmp(key, "apropos"))
236 			req->q.equal = !strcmp(val, "0");
237 
238 		else if ( ! strcmp(key, "manpath")) {
239 #ifdef COMPAT_OLDURI
240 			if ( ! strncmp(val, "OpenBSD ", 8)) {
241 				val[7] = '-';
242 				if ('C' == val[8])
243 					val[8] = 'c';
244 			}
245 #endif
246 			set_query_attr(&req->q.manpath, &val);
247 		}
248 
249 		else if ( ! (strcmp(key, "sec")
250 #ifdef COMPAT_OLDURI
251 		    && strcmp(key, "sektion")
252 #endif
253 		    )) {
254 			if ( ! strcmp(val, "0"))
255 				*val = '\0';
256 			set_query_attr(&req->q.sec, &val);
257 		}
258 
259 		else if ( ! strcmp(key, "arch")) {
260 			if ( ! strcmp(val, "default"))
261 				*val = '\0';
262 			set_query_attr(&req->q.arch, &val);
263 		}
264 
265 		/*
266 		 * The key must be freed in any case.
267 		 * The val may have been handed over to the query
268 		 * structure, in which case it is now NULL.
269 		 */
270 next:
271 		free(key);
272 		key = NULL;
273 		free(val);
274 		val = NULL;
275 
276 		if (*qs != '\0')
277 			qs++;
278 	}
279 }
280 
281 /*
282  * HTTP-decode a string.  The standard explanation is that this turns
283  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
284  * over the allocated string.
285  */
286 static int
287 http_decode(char *p)
288 {
289 	char             hex[3];
290 	char		*q;
291 	int              c;
292 
293 	hex[2] = '\0';
294 
295 	q = p;
296 	for ( ; '\0' != *p; p++, q++) {
297 		if ('%' == *p) {
298 			if ('\0' == (hex[0] = *(p + 1)))
299 				return 0;
300 			if ('\0' == (hex[1] = *(p + 2)))
301 				return 0;
302 			if (1 != sscanf(hex, "%x", &c))
303 				return 0;
304 			if ('\0' == c)
305 				return 0;
306 
307 			*q = (char)c;
308 			p += 2;
309 		} else
310 			*q = '+' == *p ? ' ' : *p;
311 	}
312 
313 	*q = '\0';
314 	return 1;
315 }
316 
317 static void
318 http_encode(const char *p)
319 {
320 	for (; *p != '\0'; p++) {
321 		if (isalnum((unsigned char)*p) == 0 &&
322 		    strchr("-._~", *p) == NULL)
323 			printf("%%%2.2X", (unsigned char)*p);
324 		else
325 			putchar(*p);
326 	}
327 }
328 
329 static void
330 resp_begin_http(int code, const char *msg)
331 {
332 
333 	if (200 != code)
334 		printf("Status: %d %s\r\n", code, msg);
335 
336 	printf("Content-Type: text/html; charset=utf-8\r\n"
337 	     "Cache-Control: no-cache\r\n"
338 	     "Pragma: no-cache\r\n"
339 	     "\r\n");
340 
341 	fflush(stdout);
342 }
343 
344 static void
345 resp_copy(const char *filename)
346 {
347 	char	 buf[4096];
348 	ssize_t	 sz;
349 	int	 fd;
350 
351 	if ((fd = open(filename, O_RDONLY)) != -1) {
352 		fflush(stdout);
353 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
354 			write(STDOUT_FILENO, buf, sz);
355 		close(fd);
356 	}
357 }
358 
359 static void
360 resp_begin_html(int code, const char *msg, const char *file)
361 {
362 	char	*cp;
363 
364 	resp_begin_http(code, msg);
365 
366 	printf("<!DOCTYPE html>\n"
367 	       "<html>\n"
368 	       "<head>\n"
369 	       "  <meta charset=\"UTF-8\"/>\n"
370 	       "  <meta name=\"viewport\""
371 		      " content=\"width=device-width, initial-scale=1.0\">\n"
372 	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
373 	       " type=\"text/css\" media=\"all\">\n"
374 	       "  <title>",
375 	       CSS_DIR);
376 	if (file != NULL) {
377 		if ((cp = strrchr(file, '/')) != NULL)
378 			file = cp + 1;
379 		if ((cp = strrchr(file, '.')) != NULL) {
380 			printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
381 		} else
382 			printf("%s - ", file);
383 	}
384 	printf("%s</title>\n"
385 	       "</head>\n"
386 	       "<body>\n",
387 	       CUSTOMIZE_TITLE);
388 
389 	resp_copy(MAN_DIR "/header.html");
390 }
391 
392 static void
393 resp_end_html(void)
394 {
395 
396 	resp_copy(MAN_DIR "/footer.html");
397 
398 	puts("</body>\n"
399 	     "</html>");
400 }
401 
402 static void
403 resp_searchform(const struct req *req, enum focus focus)
404 {
405 	int		 i;
406 
407 	printf("<form action=\"/%s\" method=\"get\">\n"
408 	       "  <fieldset>\n"
409 	       "    <legend>Manual Page Search Parameters</legend>\n",
410 	       scriptname);
411 
412 	/* Write query input box. */
413 
414 	printf("    <input type=\"search\" name=\"query\" value=\"");
415 	if (req->q.query != NULL)
416 		html_print(req->q.query);
417 	printf( "\" size=\"40\"");
418 	if (focus == FOCUS_QUERY)
419 		printf(" autofocus");
420 	puts(">");
421 
422 	/* Write submission buttons. */
423 
424 	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
425 		"man</button>\n"
426 		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
427 		"apropos</button>\n"
428 		"    <br/>\n");
429 
430 	/* Write section selector. */
431 
432 	puts("    <select name=\"sec\">");
433 	for (i = 0; i < sec_MAX; i++) {
434 		printf("      <option value=\"%s\"", sec_numbers[i]);
435 		if (NULL != req->q.sec &&
436 		    0 == strcmp(sec_numbers[i], req->q.sec))
437 			printf(" selected=\"selected\"");
438 		printf(">%s</option>\n", sec_names[i]);
439 	}
440 	puts("    </select>");
441 
442 	/* Write architecture selector. */
443 
444 	printf(	"    <select name=\"arch\">\n"
445 		"      <option value=\"default\"");
446 	if (NULL == req->q.arch)
447 		printf(" selected=\"selected\"");
448 	puts(">All Architectures</option>");
449 	for (i = 0; i < arch_MAX; i++) {
450 		printf("      <option");
451 		if (NULL != req->q.arch &&
452 		    0 == strcmp(arch_names[i], req->q.arch))
453 			printf(" selected=\"selected\"");
454 		printf(">%s</option>\n", arch_names[i]);
455 	}
456 	puts("    </select>");
457 
458 	/* Write manpath selector. */
459 
460 	if (req->psz > 1) {
461 		puts("    <select name=\"manpath\">");
462 		for (i = 0; i < (int)req->psz; i++) {
463 			printf("      <option");
464 			if (strcmp(req->q.manpath, req->p[i]) == 0)
465 				printf(" selected=\"selected\"");
466 			printf(">");
467 			html_print(req->p[i]);
468 			puts("</option>");
469 		}
470 		puts("    </select>");
471 	}
472 
473 	puts("  </fieldset>\n"
474 	     "</form>");
475 }
476 
477 static int
478 validate_urifrag(const char *frag)
479 {
480 
481 	while ('\0' != *frag) {
482 		if ( ! (isalnum((unsigned char)*frag) ||
483 		    '-' == *frag || '.' == *frag ||
484 		    '/' == *frag || '_' == *frag))
485 			return 0;
486 		frag++;
487 	}
488 	return 1;
489 }
490 
491 static int
492 validate_manpath(const struct req *req, const char* manpath)
493 {
494 	size_t	 i;
495 
496 	for (i = 0; i < req->psz; i++)
497 		if ( ! strcmp(manpath, req->p[i]))
498 			return 1;
499 
500 	return 0;
501 }
502 
503 static int
504 validate_arch(const char *arch)
505 {
506 	int	 i;
507 
508 	for (i = 0; i < arch_MAX; i++)
509 		if (strcmp(arch, arch_names[i]) == 0)
510 			return 1;
511 
512 	return 0;
513 }
514 
515 static int
516 validate_filename(const char *file)
517 {
518 
519 	if ('.' == file[0] && '/' == file[1])
520 		file += 2;
521 
522 	return ! (strstr(file, "../") || strstr(file, "/..") ||
523 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
524 }
525 
526 static void
527 pg_index(const struct req *req)
528 {
529 
530 	resp_begin_html(200, NULL, NULL);
531 	resp_searchform(req, FOCUS_QUERY);
532 	printf("<p>\n"
533 	       "This web interface is documented in the\n"
534 	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
535 	       "manual, and the\n"
536 	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
537 	       "manual explains the query syntax.\n"
538 	       "</p>\n",
539 	       scriptname, *scriptname == '\0' ? "" : "/",
540 	       scriptname, *scriptname == '\0' ? "" : "/");
541 	resp_end_html();
542 }
543 
544 static void
545 pg_noresult(const struct req *req, const char *msg)
546 {
547 	resp_begin_html(200, NULL, NULL);
548 	resp_searchform(req, FOCUS_QUERY);
549 	puts("<p>");
550 	puts(msg);
551 	puts("</p>");
552 	resp_end_html();
553 }
554 
555 static void
556 pg_error_badrequest(const char *msg)
557 {
558 
559 	resp_begin_html(400, "Bad Request", NULL);
560 	puts("<h1>Bad Request</h1>\n"
561 	     "<p>\n");
562 	puts(msg);
563 	printf("Try again from the\n"
564 	       "<a href=\"/%s\">main page</a>.\n"
565 	       "</p>", scriptname);
566 	resp_end_html();
567 }
568 
569 static void
570 pg_error_internal(void)
571 {
572 	resp_begin_html(500, "Internal Server Error", NULL);
573 	puts("<p>Internal Server Error</p>");
574 	resp_end_html();
575 }
576 
577 static void
578 pg_redirect(const struct req *req, const char *name)
579 {
580 	printf("Status: 303 See Other\r\n"
581 	    "Location: /");
582 	if (*scriptname != '\0')
583 		printf("%s/", scriptname);
584 	if (strcmp(req->q.manpath, req->p[0]))
585 		printf("%s/", req->q.manpath);
586 	if (req->q.arch != NULL)
587 		printf("%s/", req->q.arch);
588 	http_encode(name);
589 	if (req->q.sec != NULL) {
590 		putchar('.');
591 		http_encode(req->q.sec);
592 	}
593 	printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
594 }
595 
596 static void
597 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
598 {
599 	char		*arch, *archend;
600 	const char	*sec;
601 	size_t		 i, iuse;
602 	int		 archprio, archpriouse;
603 	int		 prio, priouse;
604 
605 	for (i = 0; i < sz; i++) {
606 		if (validate_filename(r[i].file))
607 			continue;
608 		warnx("invalid filename %s in %s database",
609 		    r[i].file, req->q.manpath);
610 		pg_error_internal();
611 		return;
612 	}
613 
614 	if (req->isquery && sz == 1) {
615 		/*
616 		 * If we have just one result, then jump there now
617 		 * without any delay.
618 		 */
619 		printf("Status: 303 See Other\r\n"
620 		    "Location: /");
621 		if (*scriptname != '\0')
622 			printf("%s/", scriptname);
623 		if (strcmp(req->q.manpath, req->p[0]))
624 			printf("%s/", req->q.manpath);
625 		printf("%s\r\n"
626 		    "Content-Type: text/html; charset=utf-8\r\n\r\n",
627 		    r[0].file);
628 		return;
629 	}
630 
631 	/*
632 	 * In man(1) mode, show one of the pages
633 	 * even if more than one is found.
634 	 */
635 
636 	iuse = 0;
637 	if (req->q.equal || sz == 1) {
638 		priouse = 20;
639 		archpriouse = 3;
640 		for (i = 0; i < sz; i++) {
641 			sec = r[i].file;
642 			sec += strcspn(sec, "123456789");
643 			if (sec[0] == '\0')
644 				continue;
645 			prio = sec_prios[sec[0] - '1'];
646 			if (sec[1] != '/')
647 				prio += 10;
648 			if (req->q.arch == NULL) {
649 				archprio =
650 				    ((arch = strchr(sec + 1, '/'))
651 					== NULL) ? 3 :
652 				    ((archend = strchr(arch + 1, '/'))
653 					== NULL) ? 0 :
654 				    strncmp(arch, "amd64/",
655 					archend - arch) ? 2 : 1;
656 				if (archprio < archpriouse) {
657 					archpriouse = archprio;
658 					priouse = prio;
659 					iuse = i;
660 					continue;
661 				}
662 				if (archprio > archpriouse)
663 					continue;
664 			}
665 			if (prio >= priouse)
666 				continue;
667 			priouse = prio;
668 			iuse = i;
669 		}
670 		resp_begin_html(200, NULL, r[iuse].file);
671 	} else
672 		resp_begin_html(200, NULL, NULL);
673 
674 	resp_searchform(req,
675 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
676 
677 	if (sz > 1) {
678 		puts("<table class=\"results\">");
679 		for (i = 0; i < sz; i++) {
680 			printf("  <tr>\n"
681 			       "    <td>"
682 			       "<a class=\"Xr\" href=\"/");
683 			if (*scriptname != '\0')
684 				printf("%s/", scriptname);
685 			if (strcmp(req->q.manpath, req->p[0]))
686 				printf("%s/", req->q.manpath);
687 			printf("%s\">", r[i].file);
688 			html_print(r[i].names);
689 			printf("</a></td>\n"
690 			       "    <td><span class=\"Nd\">");
691 			html_print(r[i].output);
692 			puts("</span></td>\n"
693 			     "  </tr>");
694 		}
695 		puts("</table>");
696 	}
697 
698 	if (req->q.equal || sz == 1) {
699 		puts("<hr>");
700 		resp_show(req, r[iuse].file);
701 	}
702 
703 	resp_end_html();
704 }
705 
706 static void
707 resp_catman(const struct req *req, const char *file)
708 {
709 	FILE		*f;
710 	char		*p;
711 	size_t		 sz;
712 	ssize_t		 len;
713 	int		 i;
714 	int		 italic, bold;
715 
716 	if ((f = fopen(file, "r")) == NULL) {
717 		puts("<p>You specified an invalid manual file.</p>");
718 		return;
719 	}
720 
721 	puts("<div class=\"catman\">\n"
722 	     "<pre>");
723 
724 	p = NULL;
725 	sz = 0;
726 
727 	while ((len = getline(&p, &sz, f)) != -1) {
728 		bold = italic = 0;
729 		for (i = 0; i < len - 1; i++) {
730 			/*
731 			 * This means that the catpage is out of state.
732 			 * Ignore it and keep going (although the
733 			 * catpage is bogus).
734 			 */
735 
736 			if ('\b' == p[i] || '\n' == p[i])
737 				continue;
738 
739 			/*
740 			 * Print a regular character.
741 			 * Close out any bold/italic scopes.
742 			 * If we're in back-space mode, make sure we'll
743 			 * have something to enter when we backspace.
744 			 */
745 
746 			if ('\b' != p[i + 1]) {
747 				if (italic)
748 					printf("</i>");
749 				if (bold)
750 					printf("</b>");
751 				italic = bold = 0;
752 				html_putchar(p[i]);
753 				continue;
754 			} else if (i + 2 >= len)
755 				continue;
756 
757 			/* Italic mode. */
758 
759 			if ('_' == p[i]) {
760 				if (bold)
761 					printf("</b>");
762 				if ( ! italic)
763 					printf("<i>");
764 				bold = 0;
765 				italic = 1;
766 				i += 2;
767 				html_putchar(p[i]);
768 				continue;
769 			}
770 
771 			/*
772 			 * Handle funny behaviour troff-isms.
773 			 * These grok'd from the original man2html.c.
774 			 */
775 
776 			if (('+' == p[i] && 'o' == p[i + 2]) ||
777 					('o' == p[i] && '+' == p[i + 2]) ||
778 					('|' == p[i] && '=' == p[i + 2]) ||
779 					('=' == p[i] && '|' == p[i + 2]) ||
780 					('*' == p[i] && '=' == p[i + 2]) ||
781 					('=' == p[i] && '*' == p[i + 2]) ||
782 					('*' == p[i] && '|' == p[i + 2]) ||
783 					('|' == p[i] && '*' == p[i + 2]))  {
784 				if (italic)
785 					printf("</i>");
786 				if (bold)
787 					printf("</b>");
788 				italic = bold = 0;
789 				putchar('*');
790 				i += 2;
791 				continue;
792 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
793 					('-' == p[i] && '|' == p[i + 1]) ||
794 					('+' == p[i] && '-' == p[i + 1]) ||
795 					('-' == p[i] && '+' == p[i + 1]) ||
796 					('+' == p[i] && '|' == p[i + 1]) ||
797 					('|' == p[i] && '+' == p[i + 1]))  {
798 				if (italic)
799 					printf("</i>");
800 				if (bold)
801 					printf("</b>");
802 				italic = bold = 0;
803 				putchar('+');
804 				i += 2;
805 				continue;
806 			}
807 
808 			/* Bold mode. */
809 
810 			if (italic)
811 				printf("</i>");
812 			if ( ! bold)
813 				printf("<b>");
814 			bold = 1;
815 			italic = 0;
816 			i += 2;
817 			html_putchar(p[i]);
818 		}
819 
820 		/*
821 		 * Clean up the last character.
822 		 * We can get to a newline; don't print that.
823 		 */
824 
825 		if (italic)
826 			printf("</i>");
827 		if (bold)
828 			printf("</b>");
829 
830 		if (i == len - 1 && p[i] != '\n')
831 			html_putchar(p[i]);
832 
833 		putchar('\n');
834 	}
835 	free(p);
836 
837 	puts("</pre>\n"
838 	     "</div>");
839 
840 	fclose(f);
841 }
842 
843 static void
844 resp_format(const struct req *req, const char *file)
845 {
846 	struct manoutput conf;
847 	struct mparse	*mp;
848 	struct roff_meta *meta;
849 	void		*vp;
850 	int		 fd;
851 	int		 usepath;
852 
853 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
854 		puts("<p>You specified an invalid manual file.</p>");
855 		return;
856 	}
857 
858 	mchars_alloc();
859 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
860 	    MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
861 	mparse_readfd(mp, fd, file);
862 	close(fd);
863 	meta = mparse_result(mp);
864 
865 	memset(&conf, 0, sizeof(conf));
866 	conf.fragment = 1;
867 	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
868 	conf.toc = 1;
869 	usepath = strcmp(req->q.manpath, req->p[0]);
870 	mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
871 	    scriptname, *scriptname == '\0' ? "" : "/",
872 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
873 
874 	vp = html_alloc(&conf);
875 	if (meta->macroset == MACROSET_MDOC)
876 		html_mdoc(vp, meta);
877 	else
878 		html_man(vp, meta);
879 
880 	html_free(vp);
881 	mparse_free(mp);
882 	mchars_free();
883 	free(conf.man);
884 	free(conf.style);
885 }
886 
887 static void
888 resp_show(const struct req *req, const char *file)
889 {
890 
891 	if ('.' == file[0] && '/' == file[1])
892 		file += 2;
893 
894 	if ('c' == *file)
895 		resp_catman(req, file);
896 	else
897 		resp_format(req, file);
898 }
899 
900 static void
901 pg_show(struct req *req, const char *fullpath)
902 {
903 	char		*manpath;
904 	const char	*file;
905 
906 	if ((file = strchr(fullpath, '/')) == NULL) {
907 		pg_error_badrequest(
908 		    "You did not specify a page to show.");
909 		return;
910 	}
911 	manpath = mandoc_strndup(fullpath, file - fullpath);
912 	file++;
913 
914 	if ( ! validate_manpath(req, manpath)) {
915 		pg_error_badrequest(
916 		    "You specified an invalid manpath.");
917 		free(manpath);
918 		return;
919 	}
920 
921 	/*
922 	 * Begin by chdir()ing into the manpath.
923 	 * This way we can pick up the database files, which are
924 	 * relative to the manpath root.
925 	 */
926 
927 	if (chdir(manpath) == -1) {
928 		warn("chdir %s", manpath);
929 		pg_error_internal();
930 		free(manpath);
931 		return;
932 	}
933 	free(manpath);
934 
935 	if ( ! validate_filename(file)) {
936 		pg_error_badrequest(
937 		    "You specified an invalid manual file.");
938 		return;
939 	}
940 
941 	resp_begin_html(200, NULL, file);
942 	resp_searchform(req, FOCUS_NONE);
943 	resp_show(req, file);
944 	resp_end_html();
945 }
946 
947 static void
948 pg_search(const struct req *req)
949 {
950 	struct mansearch	  search;
951 	struct manpaths		  paths;
952 	struct manpage		 *res;
953 	char			**argv;
954 	char			 *query, *rp, *wp;
955 	size_t			  ressz;
956 	int			  argc;
957 
958 	/*
959 	 * Begin by chdir()ing into the root of the manpath.
960 	 * This way we can pick up the database files, which are
961 	 * relative to the manpath root.
962 	 */
963 
964 	if (chdir(req->q.manpath) == -1) {
965 		warn("chdir %s", req->q.manpath);
966 		pg_error_internal();
967 		return;
968 	}
969 
970 	search.arch = req->q.arch;
971 	search.sec = req->q.sec;
972 	search.outkey = "Nd";
973 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
974 	search.firstmatch = 1;
975 
976 	paths.sz = 1;
977 	paths.paths = mandoc_malloc(sizeof(char *));
978 	paths.paths[0] = mandoc_strdup(".");
979 
980 	/*
981 	 * Break apart at spaces with backslash-escaping.
982 	 */
983 
984 	argc = 0;
985 	argv = NULL;
986 	rp = query = mandoc_strdup(req->q.query);
987 	for (;;) {
988 		while (isspace((unsigned char)*rp))
989 			rp++;
990 		if (*rp == '\0')
991 			break;
992 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
993 		argv[argc++] = wp = rp;
994 		for (;;) {
995 			if (isspace((unsigned char)*rp)) {
996 				*wp = '\0';
997 				rp++;
998 				break;
999 			}
1000 			if (rp[0] == '\\' && rp[1] != '\0')
1001 				rp++;
1002 			if (wp != rp)
1003 				*wp = *rp;
1004 			if (*rp == '\0')
1005 				break;
1006 			wp++;
1007 			rp++;
1008 		}
1009 	}
1010 
1011 	res = NULL;
1012 	ressz = 0;
1013 	if (req->isquery && req->q.equal && argc == 1)
1014 		pg_redirect(req, argv[0]);
1015 	else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1016 		pg_noresult(req, "You entered an invalid query.");
1017 	else if (ressz == 0)
1018 		pg_noresult(req, "No results found.");
1019 	else
1020 		pg_searchres(req, res, ressz);
1021 
1022 	free(query);
1023 	mansearch_free(res, ressz);
1024 	free(paths.paths[0]);
1025 	free(paths.paths);
1026 }
1027 
1028 int
1029 main(void)
1030 {
1031 	struct req	 req;
1032 	struct itimerval itimer;
1033 	const char	*path;
1034 	const char	*querystring;
1035 	int		 i;
1036 
1037 	/*
1038 	 * The "rpath" pledge could be revoked after mparse_readfd()
1039 	 * if the file desciptor to "/footer.html" would be opened
1040 	 * up front, but it's probably not worth the complication
1041 	 * of the code it would cause: it would require scattering
1042 	 * pledge() calls in multiple low-level resp_*() functions.
1043 	 */
1044 
1045 	if (pledge("stdio rpath", NULL) == -1) {
1046 		warn("pledge");
1047 		pg_error_internal();
1048 		return EXIT_FAILURE;
1049 	}
1050 
1051 	/* Poor man's ReDoS mitigation. */
1052 
1053 	itimer.it_value.tv_sec = 2;
1054 	itimer.it_value.tv_usec = 0;
1055 	itimer.it_interval.tv_sec = 2;
1056 	itimer.it_interval.tv_usec = 0;
1057 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1058 		warn("setitimer");
1059 		pg_error_internal();
1060 		return EXIT_FAILURE;
1061 	}
1062 
1063 	/*
1064 	 * First we change directory into the MAN_DIR so that
1065 	 * subsequent scanning for manpath directories is rooted
1066 	 * relative to the same position.
1067 	 */
1068 
1069 	if (chdir(MAN_DIR) == -1) {
1070 		warn("MAN_DIR: %s", MAN_DIR);
1071 		pg_error_internal();
1072 		return EXIT_FAILURE;
1073 	}
1074 
1075 	memset(&req, 0, sizeof(struct req));
1076 	req.q.equal = 1;
1077 	parse_manpath_conf(&req);
1078 
1079 	/* Parse the path info and the query string. */
1080 
1081 	if ((path = getenv("PATH_INFO")) == NULL)
1082 		path = "";
1083 	else if (*path == '/')
1084 		path++;
1085 
1086 	if (*path != '\0') {
1087 		parse_path_info(&req, path);
1088 		if (req.q.manpath == NULL || req.q.sec == NULL ||
1089 		    *req.q.query == '\0' || access(path, F_OK) == -1)
1090 			path = "";
1091 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1092 		parse_query_string(&req, querystring);
1093 
1094 	/* Validate parsed data and add defaults. */
1095 
1096 	if (req.q.manpath == NULL)
1097 		req.q.manpath = mandoc_strdup(req.p[0]);
1098 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1099 		pg_error_badrequest(
1100 		    "You specified an invalid manpath.");
1101 		return EXIT_FAILURE;
1102 	}
1103 
1104 	if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1105 		pg_error_badrequest(
1106 		    "You specified an invalid architecture.");
1107 		return EXIT_FAILURE;
1108 	}
1109 
1110 	/* Dispatch to the three different pages. */
1111 
1112 	if ('\0' != *path)
1113 		pg_show(&req, path);
1114 	else if (NULL != req.q.query)
1115 		pg_search(&req);
1116 	else
1117 		pg_index(&req);
1118 
1119 	free(req.q.manpath);
1120 	free(req.q.arch);
1121 	free(req.q.sec);
1122 	free(req.q.query);
1123 	for (i = 0; i < (int)req.psz; i++)
1124 		free(req.p[i]);
1125 	free(req.p);
1126 	return EXIT_SUCCESS;
1127 }
1128 
1129 /*
1130  * Translate PATH_INFO to a query.
1131  */
1132 static void
1133 parse_path_info(struct req *req, const char *path)
1134 {
1135 	const char	*name, *sec, *end;
1136 
1137 	req->isquery = 0;
1138 	req->q.equal = 1;
1139 	req->q.manpath = NULL;
1140 	req->q.arch = NULL;
1141 
1142 	/* Mandatory manual page name. */
1143 	if ((name = strrchr(path, '/')) == NULL)
1144 		name = path;
1145 	else
1146 		name++;
1147 
1148 	/* Optional trailing section. */
1149 	sec = strrchr(name, '.');
1150 	if (sec != NULL && isdigit((unsigned char)*++sec)) {
1151 		req->q.query = mandoc_strndup(name, sec - name - 1);
1152 		req->q.sec = mandoc_strdup(sec);
1153 	} else {
1154 		req->q.query = mandoc_strdup(name);
1155 		req->q.sec = NULL;
1156 	}
1157 
1158 	/* Handle the case of name[.section] only. */
1159 	if (name == path)
1160 		return;
1161 
1162 	/* Optional manpath. */
1163 	end = strchr(path, '/');
1164 	req->q.manpath = mandoc_strndup(path, end - path);
1165 	if (validate_manpath(req, req->q.manpath)) {
1166 		path = end + 1;
1167 		if (name == path)
1168 			return;
1169 	} else {
1170 		free(req->q.manpath);
1171 		req->q.manpath = NULL;
1172 	}
1173 
1174 	/* Optional section. */
1175 	if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1176 		path += 3;
1177 		end = strchr(path, '/');
1178 		free(req->q.sec);
1179 		req->q.sec = mandoc_strndup(path, end - path);
1180 		path = end + 1;
1181 		if (name == path)
1182 			return;
1183 	}
1184 
1185 	/* Optional architecture. */
1186 	end = strchr(path, '/');
1187 	if (end + 1 != name) {
1188 		pg_error_badrequest(
1189 		    "You specified too many directory components.");
1190 		exit(EXIT_FAILURE);
1191 	}
1192 	req->q.arch = mandoc_strndup(path, end - path);
1193 	if (validate_arch(req->q.arch) == 0) {
1194 		pg_error_badrequest(
1195 		    "You specified an invalid directory component.");
1196 		exit(EXIT_FAILURE);
1197 	}
1198 }
1199 
1200 /*
1201  * Scan for indexable paths.
1202  */
1203 static void
1204 parse_manpath_conf(struct req *req)
1205 {
1206 	FILE	*fp;
1207 	char	*dp;
1208 	size_t	 dpsz;
1209 	ssize_t	 len;
1210 
1211 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1212 		warn("%s/manpath.conf", MAN_DIR);
1213 		pg_error_internal();
1214 		exit(EXIT_FAILURE);
1215 	}
1216 
1217 	dp = NULL;
1218 	dpsz = 0;
1219 
1220 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1221 		if (dp[len - 1] == '\n')
1222 			dp[--len] = '\0';
1223 		req->p = mandoc_realloc(req->p,
1224 		    (req->psz + 1) * sizeof(char *));
1225 		if ( ! validate_urifrag(dp)) {
1226 			warnx("%s/manpath.conf contains "
1227 			    "unsafe path \"%s\"", MAN_DIR, dp);
1228 			pg_error_internal();
1229 			exit(EXIT_FAILURE);
1230 		}
1231 		if (strchr(dp, '/') != NULL) {
1232 			warnx("%s/manpath.conf contains "
1233 			    "path with slash \"%s\"", MAN_DIR, dp);
1234 			pg_error_internal();
1235 			exit(EXIT_FAILURE);
1236 		}
1237 		req->p[req->psz++] = dp;
1238 		dp = NULL;
1239 		dpsz = 0;
1240 	}
1241 	free(dp);
1242 
1243 	if (req->p == NULL) {
1244 		warnx("%s/manpath.conf is empty", MAN_DIR);
1245 		pg_error_internal();
1246 		exit(EXIT_FAILURE);
1247 	}
1248 }
1249