xref: /openbsd-src/usr.bin/mandoc/cgi.c (revision ae3cb403620ab940fbaabb3055fac045a63d56b7)
1 /*	$OpenBSD: cgi.c,v 1.94 2017/06/24 14:38:27 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/time.h>
20 
21 #include <ctype.h>
22 #include <err.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <limits.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "mandoc_aux.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mdoc.h"
36 #include "man.h"
37 #include "main.h"
38 #include "manconf.h"
39 #include "mansearch.h"
40 #include "cgi.h"
41 
42 /*
43  * A query as passed to the search function.
44  */
45 struct	query {
46 	char		*manpath; /* desired manual directory */
47 	char		*arch; /* architecture */
48 	char		*sec; /* manual section */
49 	char		*query; /* unparsed query expression */
50 	int		 equal; /* match whole names, not substrings */
51 };
52 
53 struct	req {
54 	struct query	  q;
55 	char		**p; /* array of available manpaths */
56 	size_t		  psz; /* number of available manpaths */
57 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
58 };
59 
60 enum	focus {
61 	FOCUS_NONE = 0,
62 	FOCUS_QUERY
63 };
64 
65 static	void		 html_print(const char *);
66 static	void		 html_putchar(char);
67 static	int		 http_decode(char *);
68 static	void		 parse_manpath_conf(struct req *);
69 static	void		 parse_path_info(struct req *req, const char *path);
70 static	void		 parse_query_string(struct req *, const char *);
71 static	void		 pg_error_badrequest(const char *);
72 static	void		 pg_error_internal(void);
73 static	void		 pg_index(const struct req *);
74 static	void		 pg_noresult(const struct req *, const char *);
75 static	void		 pg_redirect(const struct req *, const char *);
76 static	void		 pg_search(const struct req *);
77 static	void		 pg_searchres(const struct req *,
78 				struct manpage *, size_t);
79 static	void		 pg_show(struct req *, const char *);
80 static	void		 resp_begin_html(int, const char *, const char *);
81 static	void		 resp_begin_http(int, const char *);
82 static	void		 resp_catman(const struct req *, const char *);
83 static	void		 resp_copy(const char *);
84 static	void		 resp_end_html(void);
85 static	void		 resp_format(const struct req *, const char *);
86 static	void		 resp_searchform(const struct req *, enum focus);
87 static	void		 resp_show(const struct req *, const char *);
88 static	void		 set_query_attr(char **, char **);
89 static	int		 validate_filename(const char *);
90 static	int		 validate_manpath(const struct req *, const char *);
91 static	int		 validate_urifrag(const char *);
92 
93 static	const char	 *scriptname = SCRIPT_NAME;
94 
95 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
96 static	const char *const sec_numbers[] = {
97     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
98 };
99 static	const char *const sec_names[] = {
100     "All Sections",
101     "1 - General Commands",
102     "2 - System Calls",
103     "3 - Library Functions",
104     "3p - Perl Library",
105     "4 - Device Drivers",
106     "5 - File Formats",
107     "6 - Games",
108     "7 - Miscellaneous Information",
109     "8 - System Manager\'s Manual",
110     "9 - Kernel Developer\'s Manual"
111 };
112 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
113 
114 static	const char *const arch_names[] = {
115     "amd64",       "alpha",       "armv7",	"arm64",
116     "hppa",        "i386",        "landisk",
117     "loongson",    "luna88k",     "macppc",      "mips64",
118     "octeon",      "sgi",         "socppc",      "sparc64",
119     "amiga",       "arc",         "armish",      "arm32",
120     "atari",       "aviion",      "beagle",      "cats",
121     "hppa64",      "hp300",
122     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
123     "mvmeppc",     "palm",        "pc532",       "pegasos",
124     "pmax",        "powerpc",     "solbourne",   "sparc",
125     "sun3",        "vax",         "wgrisc",      "x68k",
126     "zaurus"
127 };
128 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
129 
130 /*
131  * Print a character, escaping HTML along the way.
132  * This will pass non-ASCII straight to output: be warned!
133  */
134 static void
135 html_putchar(char c)
136 {
137 
138 	switch (c) {
139 	case '"':
140 		printf("&quot;");
141 		break;
142 	case '&':
143 		printf("&amp;");
144 		break;
145 	case '>':
146 		printf("&gt;");
147 		break;
148 	case '<':
149 		printf("&lt;");
150 		break;
151 	default:
152 		putchar((unsigned char)c);
153 		break;
154 	}
155 }
156 
157 /*
158  * Call through to html_putchar().
159  * Accepts NULL strings.
160  */
161 static void
162 html_print(const char *p)
163 {
164 
165 	if (NULL == p)
166 		return;
167 	while ('\0' != *p)
168 		html_putchar(*p++);
169 }
170 
171 /*
172  * Transfer the responsibility for the allocated string *val
173  * to the query structure.
174  */
175 static void
176 set_query_attr(char **attr, char **val)
177 {
178 
179 	free(*attr);
180 	if (**val == '\0') {
181 		*attr = NULL;
182 		free(*val);
183 	} else
184 		*attr = *val;
185 	*val = NULL;
186 }
187 
188 /*
189  * Parse the QUERY_STRING for key-value pairs
190  * and store the values into the query structure.
191  */
192 static void
193 parse_query_string(struct req *req, const char *qs)
194 {
195 	char		*key, *val;
196 	size_t		 keysz, valsz;
197 
198 	req->isquery	= 1;
199 	req->q.manpath	= NULL;
200 	req->q.arch	= NULL;
201 	req->q.sec	= NULL;
202 	req->q.query	= NULL;
203 	req->q.equal	= 1;
204 
205 	key = val = NULL;
206 	while (*qs != '\0') {
207 
208 		/* Parse one key. */
209 
210 		keysz = strcspn(qs, "=;&");
211 		key = mandoc_strndup(qs, keysz);
212 		qs += keysz;
213 		if (*qs != '=')
214 			goto next;
215 
216 		/* Parse one value. */
217 
218 		valsz = strcspn(++qs, ";&");
219 		val = mandoc_strndup(qs, valsz);
220 		qs += valsz;
221 
222 		/* Decode and catch encoding errors. */
223 
224 		if ( ! (http_decode(key) && http_decode(val)))
225 			goto next;
226 
227 		/* Handle key-value pairs. */
228 
229 		if ( ! strcmp(key, "query"))
230 			set_query_attr(&req->q.query, &val);
231 
232 		else if ( ! strcmp(key, "apropos"))
233 			req->q.equal = !strcmp(val, "0");
234 
235 		else if ( ! strcmp(key, "manpath")) {
236 #ifdef COMPAT_OLDURI
237 			if ( ! strncmp(val, "OpenBSD ", 8)) {
238 				val[7] = '-';
239 				if ('C' == val[8])
240 					val[8] = 'c';
241 			}
242 #endif
243 			set_query_attr(&req->q.manpath, &val);
244 		}
245 
246 		else if ( ! (strcmp(key, "sec")
247 #ifdef COMPAT_OLDURI
248 		    && strcmp(key, "sektion")
249 #endif
250 		    )) {
251 			if ( ! strcmp(val, "0"))
252 				*val = '\0';
253 			set_query_attr(&req->q.sec, &val);
254 		}
255 
256 		else if ( ! strcmp(key, "arch")) {
257 			if ( ! strcmp(val, "default"))
258 				*val = '\0';
259 			set_query_attr(&req->q.arch, &val);
260 		}
261 
262 		/*
263 		 * The key must be freed in any case.
264 		 * The val may have been handed over to the query
265 		 * structure, in which case it is now NULL.
266 		 */
267 next:
268 		free(key);
269 		key = NULL;
270 		free(val);
271 		val = NULL;
272 
273 		if (*qs != '\0')
274 			qs++;
275 	}
276 }
277 
278 /*
279  * HTTP-decode a string.  The standard explanation is that this turns
280  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
281  * over the allocated string.
282  */
283 static int
284 http_decode(char *p)
285 {
286 	char             hex[3];
287 	char		*q;
288 	int              c;
289 
290 	hex[2] = '\0';
291 
292 	q = p;
293 	for ( ; '\0' != *p; p++, q++) {
294 		if ('%' == *p) {
295 			if ('\0' == (hex[0] = *(p + 1)))
296 				return 0;
297 			if ('\0' == (hex[1] = *(p + 2)))
298 				return 0;
299 			if (1 != sscanf(hex, "%x", &c))
300 				return 0;
301 			if ('\0' == c)
302 				return 0;
303 
304 			*q = (char)c;
305 			p += 2;
306 		} else
307 			*q = '+' == *p ? ' ' : *p;
308 	}
309 
310 	*q = '\0';
311 	return 1;
312 }
313 
314 static void
315 resp_begin_http(int code, const char *msg)
316 {
317 
318 	if (200 != code)
319 		printf("Status: %d %s\r\n", code, msg);
320 
321 	printf("Content-Type: text/html; charset=utf-8\r\n"
322 	     "Cache-Control: no-cache\r\n"
323 	     "Pragma: no-cache\r\n"
324 	     "\r\n");
325 
326 	fflush(stdout);
327 }
328 
329 static void
330 resp_copy(const char *filename)
331 {
332 	char	 buf[4096];
333 	ssize_t	 sz;
334 	int	 fd;
335 
336 	if ((fd = open(filename, O_RDONLY)) != -1) {
337 		fflush(stdout);
338 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
339 			write(STDOUT_FILENO, buf, sz);
340 		close(fd);
341 	}
342 }
343 
344 static void
345 resp_begin_html(int code, const char *msg, const char *file)
346 {
347 	char	*cp;
348 
349 	resp_begin_http(code, msg);
350 
351 	printf("<!DOCTYPE html>\n"
352 	       "<html>\n"
353 	       "<head>\n"
354 	       "  <meta charset=\"UTF-8\"/>\n"
355 	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
356 	       " type=\"text/css\" media=\"all\">\n"
357 	       "  <title>",
358 	       CSS_DIR);
359 	if (file != NULL) {
360 		if ((cp = strrchr(file, '/')) != NULL)
361 			file = cp + 1;
362 		if ((cp = strrchr(file, '.')) != NULL) {
363 			printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
364 		} else
365 			printf("%s - ", file);
366 	}
367 	printf("%s</title>\n"
368 	       "</head>\n"
369 	       "<body>\n",
370 	       CUSTOMIZE_TITLE);
371 
372 	resp_copy(MAN_DIR "/header.html");
373 }
374 
375 static void
376 resp_end_html(void)
377 {
378 
379 	resp_copy(MAN_DIR "/footer.html");
380 
381 	puts("</body>\n"
382 	     "</html>");
383 }
384 
385 static void
386 resp_searchform(const struct req *req, enum focus focus)
387 {
388 	int		 i;
389 
390 	printf("<form action=\"/%s\" method=\"get\">\n"
391 	       "  <fieldset>\n"
392 	       "    <legend>Manual Page Search Parameters</legend>\n",
393 	       scriptname);
394 
395 	/* Write query input box. */
396 
397 	printf("    <input type=\"text\" name=\"query\" value=\"");
398 	if (req->q.query != NULL)
399 		html_print(req->q.query);
400 	printf( "\" size=\"40\"");
401 	if (focus == FOCUS_QUERY)
402 		printf(" autofocus");
403 	puts(">");
404 
405 	/* Write submission buttons. */
406 
407 	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
408 		"man</button>\n"
409 		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
410 		"apropos</button>\n"
411 		"    <br/>\n");
412 
413 	/* Write section selector. */
414 
415 	puts("    <select name=\"sec\">");
416 	for (i = 0; i < sec_MAX; i++) {
417 		printf("      <option value=\"%s\"", sec_numbers[i]);
418 		if (NULL != req->q.sec &&
419 		    0 == strcmp(sec_numbers[i], req->q.sec))
420 			printf(" selected=\"selected\"");
421 		printf(">%s</option>\n", sec_names[i]);
422 	}
423 	puts("    </select>");
424 
425 	/* Write architecture selector. */
426 
427 	printf(	"    <select name=\"arch\">\n"
428 		"      <option value=\"default\"");
429 	if (NULL == req->q.arch)
430 		printf(" selected=\"selected\"");
431 	puts(">All Architectures</option>");
432 	for (i = 0; i < arch_MAX; i++) {
433 		printf("      <option value=\"%s\"", arch_names[i]);
434 		if (NULL != req->q.arch &&
435 		    0 == strcmp(arch_names[i], req->q.arch))
436 			printf(" selected=\"selected\"");
437 		printf(">%s</option>\n", arch_names[i]);
438 	}
439 	puts("    </select>");
440 
441 	/* Write manpath selector. */
442 
443 	if (req->psz > 1) {
444 		puts("    <select name=\"manpath\">");
445 		for (i = 0; i < (int)req->psz; i++) {
446 			printf("      <option ");
447 			if (strcmp(req->q.manpath, req->p[i]) == 0)
448 				printf("selected=\"selected\" ");
449 			printf("value=\"");
450 			html_print(req->p[i]);
451 			printf("\">");
452 			html_print(req->p[i]);
453 			puts("</option>");
454 		}
455 		puts("    </select>");
456 	}
457 
458 	puts("  </fieldset>\n"
459 	     "</form>");
460 }
461 
462 static int
463 validate_urifrag(const char *frag)
464 {
465 
466 	while ('\0' != *frag) {
467 		if ( ! (isalnum((unsigned char)*frag) ||
468 		    '-' == *frag || '.' == *frag ||
469 		    '/' == *frag || '_' == *frag))
470 			return 0;
471 		frag++;
472 	}
473 	return 1;
474 }
475 
476 static int
477 validate_manpath(const struct req *req, const char* manpath)
478 {
479 	size_t	 i;
480 
481 	for (i = 0; i < req->psz; i++)
482 		if ( ! strcmp(manpath, req->p[i]))
483 			return 1;
484 
485 	return 0;
486 }
487 
488 static int
489 validate_filename(const char *file)
490 {
491 
492 	if ('.' == file[0] && '/' == file[1])
493 		file += 2;
494 
495 	return ! (strstr(file, "../") || strstr(file, "/..") ||
496 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
497 }
498 
499 static void
500 pg_index(const struct req *req)
501 {
502 
503 	resp_begin_html(200, NULL, NULL);
504 	resp_searchform(req, FOCUS_QUERY);
505 	printf("<p>\n"
506 	       "This web interface is documented in the\n"
507 	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
508 	       "manual, and the\n"
509 	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
510 	       "manual explains the query syntax.\n"
511 	       "</p>\n",
512 	       scriptname, *scriptname == '\0' ? "" : "/",
513 	       scriptname, *scriptname == '\0' ? "" : "/");
514 	resp_end_html();
515 }
516 
517 static void
518 pg_noresult(const struct req *req, const char *msg)
519 {
520 	resp_begin_html(200, NULL, NULL);
521 	resp_searchform(req, FOCUS_QUERY);
522 	puts("<p>");
523 	puts(msg);
524 	puts("</p>");
525 	resp_end_html();
526 }
527 
528 static void
529 pg_error_badrequest(const char *msg)
530 {
531 
532 	resp_begin_html(400, "Bad Request", NULL);
533 	puts("<h1>Bad Request</h1>\n"
534 	     "<p>\n");
535 	puts(msg);
536 	printf("Try again from the\n"
537 	       "<a href=\"/%s\">main page</a>.\n"
538 	       "</p>", scriptname);
539 	resp_end_html();
540 }
541 
542 static void
543 pg_error_internal(void)
544 {
545 	resp_begin_html(500, "Internal Server Error", NULL);
546 	puts("<p>Internal Server Error</p>");
547 	resp_end_html();
548 }
549 
550 static void
551 pg_redirect(const struct req *req, const char *name)
552 {
553 	printf("Status: 303 See Other\r\n"
554 	    "Location: /");
555 	if (*scriptname != '\0')
556 		printf("%s/", scriptname);
557 	if (strcmp(req->q.manpath, req->p[0]))
558 		printf("%s/", req->q.manpath);
559 	if (req->q.arch != NULL)
560 		printf("%s/", req->q.arch);
561 	printf("%s", name);
562 	if (req->q.sec != NULL)
563 		printf(".%s", req->q.sec);
564 	printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
565 }
566 
567 static void
568 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
569 {
570 	char		*arch, *archend;
571 	const char	*sec;
572 	size_t		 i, iuse;
573 	int		 archprio, archpriouse;
574 	int		 prio, priouse;
575 
576 	for (i = 0; i < sz; i++) {
577 		if (validate_filename(r[i].file))
578 			continue;
579 		warnx("invalid filename %s in %s database",
580 		    r[i].file, req->q.manpath);
581 		pg_error_internal();
582 		return;
583 	}
584 
585 	if (req->isquery && sz == 1) {
586 		/*
587 		 * If we have just one result, then jump there now
588 		 * without any delay.
589 		 */
590 		printf("Status: 303 See Other\r\n"
591 		    "Location: /");
592 		if (*scriptname != '\0')
593 			printf("%s/", scriptname);
594 		if (strcmp(req->q.manpath, req->p[0]))
595 			printf("%s/", req->q.manpath);
596 		printf("%s\r\n"
597 		    "Content-Type: text/html; charset=utf-8\r\n\r\n",
598 		    r[0].file);
599 		return;
600 	}
601 
602 	/*
603 	 * In man(1) mode, show one of the pages
604 	 * even if more than one is found.
605 	 */
606 
607 	iuse = 0;
608 	if (req->q.equal || sz == 1) {
609 		priouse = 20;
610 		archpriouse = 3;
611 		for (i = 0; i < sz; i++) {
612 			sec = r[i].file;
613 			sec += strcspn(sec, "123456789");
614 			if (sec[0] == '\0')
615 				continue;
616 			prio = sec_prios[sec[0] - '1'];
617 			if (sec[1] != '/')
618 				prio += 10;
619 			if (req->q.arch == NULL) {
620 				archprio =
621 				    ((arch = strchr(sec + 1, '/'))
622 					== NULL) ? 3 :
623 				    ((archend = strchr(arch + 1, '/'))
624 					== NULL) ? 0 :
625 				    strncmp(arch, "amd64/",
626 					archend - arch) ? 2 : 1;
627 				if (archprio < archpriouse) {
628 					archpriouse = archprio;
629 					priouse = prio;
630 					iuse = i;
631 					continue;
632 				}
633 				if (archprio > archpriouse)
634 					continue;
635 			}
636 			if (prio >= priouse)
637 				continue;
638 			priouse = prio;
639 			iuse = i;
640 		}
641 		resp_begin_html(200, NULL, r[iuse].file);
642 	} else
643 		resp_begin_html(200, NULL, NULL);
644 
645 	resp_searchform(req,
646 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
647 
648 	if (sz > 1) {
649 		puts("<table class=\"results\">");
650 		for (i = 0; i < sz; i++) {
651 			printf("  <tr>\n"
652 			       "    <td>"
653 			       "<a class=\"Xr\" href=\"/");
654 			if (*scriptname != '\0')
655 				printf("%s/", scriptname);
656 			if (strcmp(req->q.manpath, req->p[0]))
657 				printf("%s/", req->q.manpath);
658 			printf("%s\">", r[i].file);
659 			html_print(r[i].names);
660 			printf("</a></td>\n"
661 			       "    <td><span class=\"Nd\">");
662 			html_print(r[i].output);
663 			puts("</span></td>\n"
664 			     "  </tr>");
665 		}
666 		puts("</table>");
667 	}
668 
669 	if (req->q.equal || sz == 1) {
670 		puts("<hr>");
671 		resp_show(req, r[iuse].file);
672 	}
673 
674 	resp_end_html();
675 }
676 
677 static void
678 resp_catman(const struct req *req, const char *file)
679 {
680 	FILE		*f;
681 	char		*p;
682 	size_t		 sz;
683 	ssize_t		 len;
684 	int		 i;
685 	int		 italic, bold;
686 
687 	if ((f = fopen(file, "r")) == NULL) {
688 		puts("<p>You specified an invalid manual file.</p>");
689 		return;
690 	}
691 
692 	puts("<div class=\"catman\">\n"
693 	     "<pre>");
694 
695 	p = NULL;
696 	sz = 0;
697 
698 	while ((len = getline(&p, &sz, f)) != -1) {
699 		bold = italic = 0;
700 		for (i = 0; i < len - 1; i++) {
701 			/*
702 			 * This means that the catpage is out of state.
703 			 * Ignore it and keep going (although the
704 			 * catpage is bogus).
705 			 */
706 
707 			if ('\b' == p[i] || '\n' == p[i])
708 				continue;
709 
710 			/*
711 			 * Print a regular character.
712 			 * Close out any bold/italic scopes.
713 			 * If we're in back-space mode, make sure we'll
714 			 * have something to enter when we backspace.
715 			 */
716 
717 			if ('\b' != p[i + 1]) {
718 				if (italic)
719 					printf("</i>");
720 				if (bold)
721 					printf("</b>");
722 				italic = bold = 0;
723 				html_putchar(p[i]);
724 				continue;
725 			} else if (i + 2 >= len)
726 				continue;
727 
728 			/* Italic mode. */
729 
730 			if ('_' == p[i]) {
731 				if (bold)
732 					printf("</b>");
733 				if ( ! italic)
734 					printf("<i>");
735 				bold = 0;
736 				italic = 1;
737 				i += 2;
738 				html_putchar(p[i]);
739 				continue;
740 			}
741 
742 			/*
743 			 * Handle funny behaviour troff-isms.
744 			 * These grok'd from the original man2html.c.
745 			 */
746 
747 			if (('+' == p[i] && 'o' == p[i + 2]) ||
748 					('o' == p[i] && '+' == p[i + 2]) ||
749 					('|' == p[i] && '=' == p[i + 2]) ||
750 					('=' == p[i] && '|' == p[i + 2]) ||
751 					('*' == p[i] && '=' == p[i + 2]) ||
752 					('=' == p[i] && '*' == p[i + 2]) ||
753 					('*' == p[i] && '|' == p[i + 2]) ||
754 					('|' == p[i] && '*' == p[i + 2]))  {
755 				if (italic)
756 					printf("</i>");
757 				if (bold)
758 					printf("</b>");
759 				italic = bold = 0;
760 				putchar('*');
761 				i += 2;
762 				continue;
763 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
764 					('-' == p[i] && '|' == p[i + 1]) ||
765 					('+' == p[i] && '-' == p[i + 1]) ||
766 					('-' == p[i] && '+' == p[i + 1]) ||
767 					('+' == p[i] && '|' == p[i + 1]) ||
768 					('|' == p[i] && '+' == p[i + 1]))  {
769 				if (italic)
770 					printf("</i>");
771 				if (bold)
772 					printf("</b>");
773 				italic = bold = 0;
774 				putchar('+');
775 				i += 2;
776 				continue;
777 			}
778 
779 			/* Bold mode. */
780 
781 			if (italic)
782 				printf("</i>");
783 			if ( ! bold)
784 				printf("<b>");
785 			bold = 1;
786 			italic = 0;
787 			i += 2;
788 			html_putchar(p[i]);
789 		}
790 
791 		/*
792 		 * Clean up the last character.
793 		 * We can get to a newline; don't print that.
794 		 */
795 
796 		if (italic)
797 			printf("</i>");
798 		if (bold)
799 			printf("</b>");
800 
801 		if (i == len - 1 && p[i] != '\n')
802 			html_putchar(p[i]);
803 
804 		putchar('\n');
805 	}
806 	free(p);
807 
808 	puts("</pre>\n"
809 	     "</div>");
810 
811 	fclose(f);
812 }
813 
814 static void
815 resp_format(const struct req *req, const char *file)
816 {
817 	struct manoutput conf;
818 	struct mparse	*mp;
819 	struct roff_man	*man;
820 	void		*vp;
821 	int		 fd;
822 	int		 usepath;
823 
824 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
825 		puts("<p>You specified an invalid manual file.</p>");
826 		return;
827 	}
828 
829 	mchars_alloc();
830 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
831 	    MANDOCERR_MAX, NULL, MANDOC_OS_OTHER, req->q.manpath);
832 	mparse_readfd(mp, fd, file);
833 	close(fd);
834 
835 	memset(&conf, 0, sizeof(conf));
836 	conf.fragment = 1;
837 	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
838 	usepath = strcmp(req->q.manpath, req->p[0]);
839 	mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
840 	    scriptname, *scriptname == '\0' ? "" : "/",
841 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
842 
843 	mparse_result(mp, &man, NULL);
844 	if (man == NULL) {
845 		warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
846 		pg_error_internal();
847 		mparse_free(mp);
848 		mchars_free();
849 		return;
850 	}
851 
852 	vp = html_alloc(&conf);
853 
854 	if (man->macroset == MACROSET_MDOC) {
855 		mdoc_validate(man);
856 		html_mdoc(vp, man);
857 	} else {
858 		man_validate(man);
859 		html_man(vp, man);
860 	}
861 
862 	html_free(vp);
863 	mparse_free(mp);
864 	mchars_free();
865 	free(conf.man);
866 	free(conf.style);
867 }
868 
869 static void
870 resp_show(const struct req *req, const char *file)
871 {
872 
873 	if ('.' == file[0] && '/' == file[1])
874 		file += 2;
875 
876 	if ('c' == *file)
877 		resp_catman(req, file);
878 	else
879 		resp_format(req, file);
880 }
881 
882 static void
883 pg_show(struct req *req, const char *fullpath)
884 {
885 	char		*manpath;
886 	const char	*file;
887 
888 	if ((file = strchr(fullpath, '/')) == NULL) {
889 		pg_error_badrequest(
890 		    "You did not specify a page to show.");
891 		return;
892 	}
893 	manpath = mandoc_strndup(fullpath, file - fullpath);
894 	file++;
895 
896 	if ( ! validate_manpath(req, manpath)) {
897 		pg_error_badrequest(
898 		    "You specified an invalid manpath.");
899 		free(manpath);
900 		return;
901 	}
902 
903 	/*
904 	 * Begin by chdir()ing into the manpath.
905 	 * This way we can pick up the database files, which are
906 	 * relative to the manpath root.
907 	 */
908 
909 	if (chdir(manpath) == -1) {
910 		warn("chdir %s", manpath);
911 		pg_error_internal();
912 		free(manpath);
913 		return;
914 	}
915 	free(manpath);
916 
917 	if ( ! validate_filename(file)) {
918 		pg_error_badrequest(
919 		    "You specified an invalid manual file.");
920 		return;
921 	}
922 
923 	resp_begin_html(200, NULL, file);
924 	resp_searchform(req, FOCUS_NONE);
925 	resp_show(req, file);
926 	resp_end_html();
927 }
928 
929 static void
930 pg_search(const struct req *req)
931 {
932 	struct mansearch	  search;
933 	struct manpaths		  paths;
934 	struct manpage		 *res;
935 	char			**argv;
936 	char			 *query, *rp, *wp;
937 	size_t			  ressz;
938 	int			  argc;
939 
940 	/*
941 	 * Begin by chdir()ing into the root of the manpath.
942 	 * This way we can pick up the database files, which are
943 	 * relative to the manpath root.
944 	 */
945 
946 	if (chdir(req->q.manpath) == -1) {
947 		warn("chdir %s", req->q.manpath);
948 		pg_error_internal();
949 		return;
950 	}
951 
952 	search.arch = req->q.arch;
953 	search.sec = req->q.sec;
954 	search.outkey = "Nd";
955 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
956 	search.firstmatch = 1;
957 
958 	paths.sz = 1;
959 	paths.paths = mandoc_malloc(sizeof(char *));
960 	paths.paths[0] = mandoc_strdup(".");
961 
962 	/*
963 	 * Break apart at spaces with backslash-escaping.
964 	 */
965 
966 	argc = 0;
967 	argv = NULL;
968 	rp = query = mandoc_strdup(req->q.query);
969 	for (;;) {
970 		while (isspace((unsigned char)*rp))
971 			rp++;
972 		if (*rp == '\0')
973 			break;
974 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
975 		argv[argc++] = wp = rp;
976 		for (;;) {
977 			if (isspace((unsigned char)*rp)) {
978 				*wp = '\0';
979 				rp++;
980 				break;
981 			}
982 			if (rp[0] == '\\' && rp[1] != '\0')
983 				rp++;
984 			if (wp != rp)
985 				*wp = *rp;
986 			if (*rp == '\0')
987 				break;
988 			wp++;
989 			rp++;
990 		}
991 	}
992 
993 	res = NULL;
994 	ressz = 0;
995 	if (req->isquery && req->q.equal && argc == 1)
996 		pg_redirect(req, argv[0]);
997 	else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
998 		pg_noresult(req, "You entered an invalid query.");
999 	else if (ressz == 0)
1000 		pg_noresult(req, "No results found.");
1001 	else
1002 		pg_searchres(req, res, ressz);
1003 
1004 	free(query);
1005 	mansearch_free(res, ressz);
1006 	free(paths.paths[0]);
1007 	free(paths.paths);
1008 }
1009 
1010 int
1011 main(void)
1012 {
1013 	struct req	 req;
1014 	struct itimerval itimer;
1015 	const char	*path;
1016 	const char	*querystring;
1017 	int		 i;
1018 
1019 	/*
1020 	 * The "rpath" pledge could be revoked after mparse_readfd()
1021 	 * if the file desciptor to "/footer.html" would be opened
1022 	 * up front, but it's probably not worth the complication
1023 	 * of the code it would cause: it would require scattering
1024 	 * pledge() calls in multiple low-level resp_*() functions.
1025 	 */
1026 
1027 	if (pledge("stdio rpath", NULL) == -1) {
1028 		warn("pledge");
1029 		pg_error_internal();
1030 		return EXIT_FAILURE;
1031 	}
1032 
1033 	/* Poor man's ReDoS mitigation. */
1034 
1035 	itimer.it_value.tv_sec = 2;
1036 	itimer.it_value.tv_usec = 0;
1037 	itimer.it_interval.tv_sec = 2;
1038 	itimer.it_interval.tv_usec = 0;
1039 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1040 		warn("setitimer");
1041 		pg_error_internal();
1042 		return EXIT_FAILURE;
1043 	}
1044 
1045 	/*
1046 	 * First we change directory into the MAN_DIR so that
1047 	 * subsequent scanning for manpath directories is rooted
1048 	 * relative to the same position.
1049 	 */
1050 
1051 	if (chdir(MAN_DIR) == -1) {
1052 		warn("MAN_DIR: %s", MAN_DIR);
1053 		pg_error_internal();
1054 		return EXIT_FAILURE;
1055 	}
1056 
1057 	memset(&req, 0, sizeof(struct req));
1058 	req.q.equal = 1;
1059 	parse_manpath_conf(&req);
1060 
1061 	/* Parse the path info and the query string. */
1062 
1063 	if ((path = getenv("PATH_INFO")) == NULL)
1064 		path = "";
1065 	else if (*path == '/')
1066 		path++;
1067 
1068 	if (*path != '\0') {
1069 		parse_path_info(&req, path);
1070 		if (req.q.manpath == NULL || req.q.sec == NULL ||
1071 		    *req.q.query == '\0' || access(path, F_OK) == -1)
1072 			path = "";
1073 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1074 		parse_query_string(&req, querystring);
1075 
1076 	/* Validate parsed data and add defaults. */
1077 
1078 	if (req.q.manpath == NULL)
1079 		req.q.manpath = mandoc_strdup(req.p[0]);
1080 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1081 		pg_error_badrequest(
1082 		    "You specified an invalid manpath.");
1083 		return EXIT_FAILURE;
1084 	}
1085 
1086 	if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1087 		pg_error_badrequest(
1088 		    "You specified an invalid architecture.");
1089 		return EXIT_FAILURE;
1090 	}
1091 
1092 	/* Dispatch to the three different pages. */
1093 
1094 	if ('\0' != *path)
1095 		pg_show(&req, path);
1096 	else if (NULL != req.q.query)
1097 		pg_search(&req);
1098 	else
1099 		pg_index(&req);
1100 
1101 	free(req.q.manpath);
1102 	free(req.q.arch);
1103 	free(req.q.sec);
1104 	free(req.q.query);
1105 	for (i = 0; i < (int)req.psz; i++)
1106 		free(req.p[i]);
1107 	free(req.p);
1108 	return EXIT_SUCCESS;
1109 }
1110 
1111 /*
1112  * If PATH_INFO is not a file name, translate it to a query.
1113  */
1114 static void
1115 parse_path_info(struct req *req, const char *path)
1116 {
1117 	char	*dir[4];
1118 	int	 i;
1119 
1120 	req->isquery = 0;
1121 	req->q.equal = 1;
1122 	req->q.manpath = mandoc_strdup(path);
1123 	req->q.arch = NULL;
1124 
1125 	/* Mandatory manual page name. */
1126 	if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1127 		req->q.query = req->q.manpath;
1128 		req->q.manpath = NULL;
1129 	} else
1130 		*req->q.query++ = '\0';
1131 
1132 	/* Optional trailing section. */
1133 	if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1134 		if(isdigit((unsigned char)req->q.sec[1])) {
1135 			*req->q.sec++ = '\0';
1136 			req->q.sec = mandoc_strdup(req->q.sec);
1137 		} else
1138 			req->q.sec = NULL;
1139 	}
1140 
1141 	/* Handle the case of name[.section] only. */
1142 	if (req->q.manpath == NULL)
1143 		return;
1144 	req->q.query = mandoc_strdup(req->q.query);
1145 
1146 	/* Split directory components. */
1147 	dir[i = 0] = req->q.manpath;
1148 	while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1149 		if (++i == 3) {
1150 			pg_error_badrequest(
1151 			    "You specified too many directory components.");
1152 			exit(EXIT_FAILURE);
1153 		}
1154 		*dir[i]++ = '\0';
1155 	}
1156 
1157 	/* Optional manpath. */
1158 	if ((i = validate_manpath(req, req->q.manpath)) == 0)
1159 		req->q.manpath = NULL;
1160 	else if (dir[1] == NULL)
1161 		return;
1162 
1163 	/* Optional section. */
1164 	if (strncmp(dir[i], "man", 3) == 0) {
1165 		free(req->q.sec);
1166 		req->q.sec = mandoc_strdup(dir[i++] + 3);
1167 	}
1168 	if (dir[i] == NULL) {
1169 		if (req->q.manpath == NULL)
1170 			free(dir[0]);
1171 		return;
1172 	}
1173 	if (dir[i + 1] != NULL) {
1174 		pg_error_badrequest(
1175 		    "You specified an invalid directory component.");
1176 		exit(EXIT_FAILURE);
1177 	}
1178 
1179 	/* Optional architecture. */
1180 	if (i) {
1181 		req->q.arch = mandoc_strdup(dir[i]);
1182 		if (req->q.manpath == NULL)
1183 			free(dir[0]);
1184 	} else
1185 		req->q.arch = dir[0];
1186 }
1187 
1188 /*
1189  * Scan for indexable paths.
1190  */
1191 static void
1192 parse_manpath_conf(struct req *req)
1193 {
1194 	FILE	*fp;
1195 	char	*dp;
1196 	size_t	 dpsz;
1197 	ssize_t	 len;
1198 
1199 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1200 		warn("%s/manpath.conf", MAN_DIR);
1201 		pg_error_internal();
1202 		exit(EXIT_FAILURE);
1203 	}
1204 
1205 	dp = NULL;
1206 	dpsz = 0;
1207 
1208 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1209 		if (dp[len - 1] == '\n')
1210 			dp[--len] = '\0';
1211 		req->p = mandoc_realloc(req->p,
1212 		    (req->psz + 1) * sizeof(char *));
1213 		if ( ! validate_urifrag(dp)) {
1214 			warnx("%s/manpath.conf contains "
1215 			    "unsafe path \"%s\"", MAN_DIR, dp);
1216 			pg_error_internal();
1217 			exit(EXIT_FAILURE);
1218 		}
1219 		if (strchr(dp, '/') != NULL) {
1220 			warnx("%s/manpath.conf contains "
1221 			    "path with slash \"%s\"", MAN_DIR, dp);
1222 			pg_error_internal();
1223 			exit(EXIT_FAILURE);
1224 		}
1225 		req->p[req->psz++] = dp;
1226 		dp = NULL;
1227 		dpsz = 0;
1228 	}
1229 	free(dp);
1230 
1231 	if (req->p == NULL) {
1232 		warnx("%s/manpath.conf is empty", MAN_DIR);
1233 		pg_error_internal();
1234 		exit(EXIT_FAILURE);
1235 	}
1236 }
1237