xref: /openbsd-src/usr.bin/mandoc/cgi.c (revision 2777ee89d0e541ec819d05abee114837837abbec)
1 /*	$OpenBSD: cgi.c,v 1.70 2016/04/29 10:45:06 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/time.h>
20 
21 #include <ctype.h>
22 #include <err.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <limits.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "mandoc_aux.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mdoc.h"
36 #include "man.h"
37 #include "main.h"
38 #include "manconf.h"
39 #include "mansearch.h"
40 #include "cgi.h"
41 
42 /*
43  * A query as passed to the search function.
44  */
45 struct	query {
46 	char		*manpath; /* desired manual directory */
47 	char		*arch; /* architecture */
48 	char		*sec; /* manual section */
49 	char		*query; /* unparsed query expression */
50 	int		 equal; /* match whole names, not substrings */
51 };
52 
53 struct	req {
54 	struct query	  q;
55 	char		**p; /* array of available manpaths */
56 	size_t		  psz; /* number of available manpaths */
57 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
58 };
59 
60 enum	focus {
61 	FOCUS_NONE = 0,
62 	FOCUS_QUERY
63 };
64 
65 static	void		 html_print(const char *);
66 static	void		 html_putchar(char);
67 static	int		 http_decode(char *);
68 static	void		 parse_manpath_conf(struct req *);
69 static	void		 parse_path_info(struct req *req, const char *path);
70 static	void		 parse_query_string(struct req *, const char *);
71 static	void		 pg_error_badrequest(const char *);
72 static	void		 pg_error_internal(void);
73 static	void		 pg_index(const struct req *);
74 static	void		 pg_noresult(const struct req *, const char *);
75 static	void		 pg_search(const struct req *);
76 static	void		 pg_searchres(const struct req *,
77 				struct manpage *, size_t);
78 static	void		 pg_show(struct req *, const char *);
79 static	void		 resp_begin_html(int, const char *);
80 static	void		 resp_begin_http(int, const char *);
81 static	void		 resp_catman(const struct req *, const char *);
82 static	void		 resp_copy(const char *);
83 static	void		 resp_end_html(void);
84 static	void		 resp_format(const struct req *, const char *);
85 static	void		 resp_searchform(const struct req *, enum focus);
86 static	void		 resp_show(const struct req *, const char *);
87 static	void		 set_query_attr(char **, char **);
88 static	int		 validate_filename(const char *);
89 static	int		 validate_manpath(const struct req *, const char *);
90 static	int		 validate_urifrag(const char *);
91 
92 static	const char	 *scriptname = SCRIPT_NAME;
93 
94 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
95 static	const char *const sec_numbers[] = {
96     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
97 };
98 static	const char *const sec_names[] = {
99     "All Sections",
100     "1 - General Commands",
101     "2 - System Calls",
102     "3 - Library Functions",
103     "3p - Perl Library",
104     "4 - Device Drivers",
105     "5 - File Formats",
106     "6 - Games",
107     "7 - Miscellaneous Information",
108     "8 - System Manager\'s Manual",
109     "9 - Kernel Developer\'s Manual"
110 };
111 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
112 
113 static	const char *const arch_names[] = {
114     "amd64",       "alpha",       "armish",      "armv7",
115     "hppa",        "hppa64",      "i386",        "landisk",
116     "loongson",    "luna88k",     "macppc",      "mips64",
117     "octeon",      "sgi",         "socppc",      "sparc",
118     "sparc64",     "zaurus",
119     "amiga",       "arc",         "arm32",       "atari",
120     "aviion",      "beagle",      "cats",        "hp300",
121     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
122     "mvmeppc",     "palm",        "pc532",       "pegasos",
123     "pmax",        "powerpc",     "solbourne",   "sun3",
124     "vax",         "wgrisc",      "x68k"
125 };
126 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
127 
128 /*
129  * Print a character, escaping HTML along the way.
130  * This will pass non-ASCII straight to output: be warned!
131  */
132 static void
133 html_putchar(char c)
134 {
135 
136 	switch (c) {
137 	case ('"'):
138 		printf("&quote;");
139 		break;
140 	case ('&'):
141 		printf("&amp;");
142 		break;
143 	case ('>'):
144 		printf("&gt;");
145 		break;
146 	case ('<'):
147 		printf("&lt;");
148 		break;
149 	default:
150 		putchar((unsigned char)c);
151 		break;
152 	}
153 }
154 
155 /*
156  * Call through to html_putchar().
157  * Accepts NULL strings.
158  */
159 static void
160 html_print(const char *p)
161 {
162 
163 	if (NULL == p)
164 		return;
165 	while ('\0' != *p)
166 		html_putchar(*p++);
167 }
168 
169 /*
170  * Transfer the responsibility for the allocated string *val
171  * to the query structure.
172  */
173 static void
174 set_query_attr(char **attr, char **val)
175 {
176 
177 	free(*attr);
178 	if (**val == '\0') {
179 		*attr = NULL;
180 		free(*val);
181 	} else
182 		*attr = *val;
183 	*val = NULL;
184 }
185 
186 /*
187  * Parse the QUERY_STRING for key-value pairs
188  * and store the values into the query structure.
189  */
190 static void
191 parse_query_string(struct req *req, const char *qs)
192 {
193 	char		*key, *val;
194 	size_t		 keysz, valsz;
195 
196 	req->isquery	= 1;
197 	req->q.manpath	= NULL;
198 	req->q.arch	= NULL;
199 	req->q.sec	= NULL;
200 	req->q.query	= NULL;
201 	req->q.equal	= 1;
202 
203 	key = val = NULL;
204 	while (*qs != '\0') {
205 
206 		/* Parse one key. */
207 
208 		keysz = strcspn(qs, "=;&");
209 		key = mandoc_strndup(qs, keysz);
210 		qs += keysz;
211 		if (*qs != '=')
212 			goto next;
213 
214 		/* Parse one value. */
215 
216 		valsz = strcspn(++qs, ";&");
217 		val = mandoc_strndup(qs, valsz);
218 		qs += valsz;
219 
220 		/* Decode and catch encoding errors. */
221 
222 		if ( ! (http_decode(key) && http_decode(val)))
223 			goto next;
224 
225 		/* Handle key-value pairs. */
226 
227 		if ( ! strcmp(key, "query"))
228 			set_query_attr(&req->q.query, &val);
229 
230 		else if ( ! strcmp(key, "apropos"))
231 			req->q.equal = !strcmp(val, "0");
232 
233 		else if ( ! strcmp(key, "manpath")) {
234 #ifdef COMPAT_OLDURI
235 			if ( ! strncmp(val, "OpenBSD ", 8)) {
236 				val[7] = '-';
237 				if ('C' == val[8])
238 					val[8] = 'c';
239 			}
240 #endif
241 			set_query_attr(&req->q.manpath, &val);
242 		}
243 
244 		else if ( ! (strcmp(key, "sec")
245 #ifdef COMPAT_OLDURI
246 		    && strcmp(key, "sektion")
247 #endif
248 		    )) {
249 			if ( ! strcmp(val, "0"))
250 				*val = '\0';
251 			set_query_attr(&req->q.sec, &val);
252 		}
253 
254 		else if ( ! strcmp(key, "arch")) {
255 			if ( ! strcmp(val, "default"))
256 				*val = '\0';
257 			set_query_attr(&req->q.arch, &val);
258 		}
259 
260 		/*
261 		 * The key must be freed in any case.
262 		 * The val may have been handed over to the query
263 		 * structure, in which case it is now NULL.
264 		 */
265 next:
266 		free(key);
267 		key = NULL;
268 		free(val);
269 		val = NULL;
270 
271 		if (*qs != '\0')
272 			qs++;
273 	}
274 }
275 
276 /*
277  * HTTP-decode a string.  The standard explanation is that this turns
278  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
279  * over the allocated string.
280  */
281 static int
282 http_decode(char *p)
283 {
284 	char             hex[3];
285 	char		*q;
286 	int              c;
287 
288 	hex[2] = '\0';
289 
290 	q = p;
291 	for ( ; '\0' != *p; p++, q++) {
292 		if ('%' == *p) {
293 			if ('\0' == (hex[0] = *(p + 1)))
294 				return 0;
295 			if ('\0' == (hex[1] = *(p + 2)))
296 				return 0;
297 			if (1 != sscanf(hex, "%x", &c))
298 				return 0;
299 			if ('\0' == c)
300 				return 0;
301 
302 			*q = (char)c;
303 			p += 2;
304 		} else
305 			*q = '+' == *p ? ' ' : *p;
306 	}
307 
308 	*q = '\0';
309 	return 1;
310 }
311 
312 static void
313 resp_begin_http(int code, const char *msg)
314 {
315 
316 	if (200 != code)
317 		printf("Status: %d %s\r\n", code, msg);
318 
319 	printf("Content-Type: text/html; charset=utf-8\r\n"
320 	     "Cache-Control: no-cache\r\n"
321 	     "Pragma: no-cache\r\n"
322 	     "\r\n");
323 
324 	fflush(stdout);
325 }
326 
327 static void
328 resp_copy(const char *filename)
329 {
330 	char	 buf[4096];
331 	ssize_t	 sz;
332 	int	 fd;
333 
334 	if ((fd = open(filename, O_RDONLY)) != -1) {
335 		fflush(stdout);
336 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
337 			write(STDOUT_FILENO, buf, sz);
338 	}
339 }
340 
341 static void
342 resp_begin_html(int code, const char *msg)
343 {
344 
345 	resp_begin_http(code, msg);
346 
347 	printf("<!DOCTYPE html>\n"
348 	       "<html>\n"
349 	       "<head>\n"
350 	       "<meta charset=\"UTF-8\"/>\n"
351 	       "<link rel=\"stylesheet\" href=\"%s/mandoc.css\""
352 	       " type=\"text/css\" media=\"all\">\n"
353 	       "<title>%s</title>\n"
354 	       "</head>\n"
355 	       "<body>\n"
356 	       "<!-- Begin page content. //-->\n",
357 	       CSS_DIR, CUSTOMIZE_TITLE);
358 
359 	resp_copy(MAN_DIR "/header.html");
360 }
361 
362 static void
363 resp_end_html(void)
364 {
365 
366 	resp_copy(MAN_DIR "/footer.html");
367 
368 	puts("</body>\n"
369 	     "</html>");
370 }
371 
372 static void
373 resp_searchform(const struct req *req, enum focus focus)
374 {
375 	int		 i;
376 
377 	puts("<!-- Begin search form. //-->");
378 	printf("<div id=\"mancgi\">\n"
379 	       "<form action=\"/%s\" method=\"get\">\n"
380 	       "<fieldset>\n"
381 	       "<legend>Manual Page Search Parameters</legend>\n",
382 	       scriptname);
383 
384 	/* Write query input box. */
385 
386 	printf(	"<table><tr><td>\n"
387 		"<input type=\"text\" name=\"query\" value=\"");
388 	if (req->q.query != NULL)
389 		html_print(req->q.query);
390 	printf( "\" size=\"40\"");
391 	if (focus == FOCUS_QUERY)
392 		printf(" autofocus");
393 	puts(">");
394 
395 	/* Write submission and reset buttons. */
396 
397 	printf(	"<input type=\"submit\" value=\"Submit\">\n"
398 		"<input type=\"reset\" value=\"Reset\">\n");
399 
400 	/* Write show radio button */
401 
402 	printf(	"</td><td>\n"
403 		"<input type=\"radio\" ");
404 	if (req->q.equal)
405 		printf("checked=\"checked\" ");
406 	printf(	"name=\"apropos\" id=\"show\" value=\"0\">\n"
407 		"<label for=\"show\">Show named manual page</label>\n");
408 
409 	/* Write section selector. */
410 
411 	puts(	"</td></tr><tr><td>\n"
412 		"<select name=\"sec\">");
413 	for (i = 0; i < sec_MAX; i++) {
414 		printf("<option value=\"%s\"", sec_numbers[i]);
415 		if (NULL != req->q.sec &&
416 		    0 == strcmp(sec_numbers[i], req->q.sec))
417 			printf(" selected=\"selected\"");
418 		printf(">%s</option>\n", sec_names[i]);
419 	}
420 	puts("</select>");
421 
422 	/* Write architecture selector. */
423 
424 	printf(	"<select name=\"arch\">\n"
425 		"<option value=\"default\"");
426 	if (NULL == req->q.arch)
427 		printf(" selected=\"selected\"");
428 	puts(">All Architectures</option>");
429 	for (i = 0; i < arch_MAX; i++) {
430 		printf("<option value=\"%s\"", arch_names[i]);
431 		if (NULL != req->q.arch &&
432 		    0 == strcmp(arch_names[i], req->q.arch))
433 			printf(" selected=\"selected\"");
434 		printf(">%s</option>\n", arch_names[i]);
435 	}
436 	puts("</select>");
437 
438 	/* Write manpath selector. */
439 
440 	if (req->psz > 1) {
441 		puts("<select name=\"manpath\">");
442 		for (i = 0; i < (int)req->psz; i++) {
443 			printf("<option ");
444 			if (strcmp(req->q.manpath, req->p[i]) == 0)
445 				printf("selected=\"selected\" ");
446 			printf("value=\"");
447 			html_print(req->p[i]);
448 			printf("\">");
449 			html_print(req->p[i]);
450 			puts("</option>");
451 		}
452 		puts("</select>");
453 	}
454 
455 	/* Write search radio button */
456 
457 	printf(	"</td><td>\n"
458 		"<input type=\"radio\" ");
459 	if (0 == req->q.equal)
460 		printf("checked=\"checked\" ");
461 	printf(	"name=\"apropos\" id=\"search\" value=\"1\">\n"
462 		"<label for=\"search\">Search with apropos query</label>\n");
463 
464 	puts("</td></tr></table>\n"
465 	     "</fieldset>\n"
466 	     "</form>\n"
467 	     "</div>");
468 	puts("<!-- End search form. //-->");
469 }
470 
471 static int
472 validate_urifrag(const char *frag)
473 {
474 
475 	while ('\0' != *frag) {
476 		if ( ! (isalnum((unsigned char)*frag) ||
477 		    '-' == *frag || '.' == *frag ||
478 		    '/' == *frag || '_' == *frag))
479 			return 0;
480 		frag++;
481 	}
482 	return 1;
483 }
484 
485 static int
486 validate_manpath(const struct req *req, const char* manpath)
487 {
488 	size_t	 i;
489 
490 	if ( ! strcmp(manpath, "mandoc"))
491 		return 1;
492 
493 	for (i = 0; i < req->psz; i++)
494 		if ( ! strcmp(manpath, req->p[i]))
495 			return 1;
496 
497 	return 0;
498 }
499 
500 static int
501 validate_filename(const char *file)
502 {
503 
504 	if ('.' == file[0] && '/' == file[1])
505 		file += 2;
506 
507 	return ! (strstr(file, "../") || strstr(file, "/..") ||
508 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
509 }
510 
511 static void
512 pg_index(const struct req *req)
513 {
514 
515 	resp_begin_html(200, NULL);
516 	resp_searchform(req, FOCUS_QUERY);
517 	printf("<p>\n"
518 	       "This web interface is documented in the\n"
519 	       "<a href=\"/%s%smandoc/man8/man.cgi.8\">man.cgi</a>\n"
520 	       "manual, and the\n"
521 	       "<a href=\"/%s%smandoc/man1/apropos.1\">apropos</a>\n"
522 	       "manual explains the query syntax.\n"
523 	       "</p>\n",
524 	       scriptname, *scriptname == '\0' ? "" : "/",
525 	       scriptname, *scriptname == '\0' ? "" : "/");
526 	resp_end_html();
527 }
528 
529 static void
530 pg_noresult(const struct req *req, const char *msg)
531 {
532 	resp_begin_html(200, NULL);
533 	resp_searchform(req, FOCUS_QUERY);
534 	puts("<p>");
535 	puts(msg);
536 	puts("</p>");
537 	resp_end_html();
538 }
539 
540 static void
541 pg_error_badrequest(const char *msg)
542 {
543 
544 	resp_begin_html(400, "Bad Request");
545 	puts("<h1>Bad Request</h1>\n"
546 	     "<p>\n");
547 	puts(msg);
548 	printf("Try again from the\n"
549 	       "<a href=\"/%s\">main page</a>.\n"
550 	       "</p>", scriptname);
551 	resp_end_html();
552 }
553 
554 static void
555 pg_error_internal(void)
556 {
557 	resp_begin_html(500, "Internal Server Error");
558 	puts("<p>Internal Server Error</p>");
559 	resp_end_html();
560 }
561 
562 static void
563 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
564 {
565 	char		*arch, *archend;
566 	const char	*sec;
567 	size_t		 i, iuse;
568 	int		 archprio, archpriouse;
569 	int		 prio, priouse;
570 
571 	for (i = 0; i < sz; i++) {
572 		if (validate_filename(r[i].file))
573 			continue;
574 		warnx("invalid filename %s in %s database",
575 		    r[i].file, req->q.manpath);
576 		pg_error_internal();
577 		return;
578 	}
579 
580 	if (req->isquery && sz == 1) {
581 		/*
582 		 * If we have just one result, then jump there now
583 		 * without any delay.
584 		 */
585 		printf("Status: 303 See Other\r\n");
586 		printf("Location: http://%s/%s%s%s/%s",
587 		    HTTP_HOST, scriptname,
588 		    *scriptname == '\0' ? "" : "/",
589 		    req->q.manpath, r[0].file);
590 		printf("\r\n"
591 		     "Content-Type: text/html; charset=utf-8\r\n"
592 		     "\r\n");
593 		return;
594 	}
595 
596 	resp_begin_html(200, NULL);
597 	resp_searchform(req,
598 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
599 
600 	if (sz > 1) {
601 		puts("<div class=\"results\">");
602 		puts("<table>");
603 
604 		for (i = 0; i < sz; i++) {
605 			printf("<tr>\n"
606 			       "<td class=\"title\">\n"
607 			       "<a href=\"/%s%s%s/%s",
608 			    scriptname, *scriptname == '\0' ? "" : "/",
609 			    req->q.manpath, r[i].file);
610 			printf("\">");
611 			html_print(r[i].names);
612 			printf("</a>\n"
613 			       "</td>\n"
614 			       "<td class=\"desc\">");
615 			html_print(r[i].output);
616 			puts("</td>\n"
617 			     "</tr>");
618 		}
619 
620 		puts("</table>\n"
621 		     "</div>");
622 	}
623 
624 	/*
625 	 * In man(1) mode, show one of the pages
626 	 * even if more than one is found.
627 	 */
628 
629 	if (req->q.equal || sz == 1) {
630 		puts("<hr>");
631 		iuse = 0;
632 		priouse = 20;
633 		archpriouse = 3;
634 		for (i = 0; i < sz; i++) {
635 			sec = r[i].file;
636 			sec += strcspn(sec, "123456789");
637 			if (sec[0] == '\0')
638 				continue;
639 			prio = sec_prios[sec[0] - '1'];
640 			if (sec[1] != '/')
641 				prio += 10;
642 			if (req->q.arch == NULL) {
643 				archprio =
644 				    ((arch = strchr(sec + 1, '/'))
645 					== NULL) ? 3 :
646 				    ((archend = strchr(arch + 1, '/'))
647 					== NULL) ? 0 :
648 				    strncmp(arch, "amd64/",
649 					archend - arch) ? 2 : 1;
650 				if (archprio < archpriouse) {
651 					archpriouse = archprio;
652 					priouse = prio;
653 					iuse = i;
654 					continue;
655 				}
656 				if (archprio > archpriouse)
657 					continue;
658 			}
659 			if (prio >= priouse)
660 				continue;
661 			priouse = prio;
662 			iuse = i;
663 		}
664 		resp_show(req, r[iuse].file);
665 	}
666 
667 	resp_end_html();
668 }
669 
670 static void
671 resp_catman(const struct req *req, const char *file)
672 {
673 	FILE		*f;
674 	char		*p;
675 	size_t		 sz;
676 	ssize_t		 len;
677 	int		 i;
678 	int		 italic, bold;
679 
680 	if ((f = fopen(file, "r")) == NULL) {
681 		puts("<p>You specified an invalid manual file.</p>");
682 		return;
683 	}
684 
685 	puts("<div class=\"catman\">\n"
686 	     "<pre>");
687 
688 	p = NULL;
689 	sz = 0;
690 
691 	while ((len = getline(&p, &sz, f)) != -1) {
692 		bold = italic = 0;
693 		for (i = 0; i < len - 1; i++) {
694 			/*
695 			 * This means that the catpage is out of state.
696 			 * Ignore it and keep going (although the
697 			 * catpage is bogus).
698 			 */
699 
700 			if ('\b' == p[i] || '\n' == p[i])
701 				continue;
702 
703 			/*
704 			 * Print a regular character.
705 			 * Close out any bold/italic scopes.
706 			 * If we're in back-space mode, make sure we'll
707 			 * have something to enter when we backspace.
708 			 */
709 
710 			if ('\b' != p[i + 1]) {
711 				if (italic)
712 					printf("</i>");
713 				if (bold)
714 					printf("</b>");
715 				italic = bold = 0;
716 				html_putchar(p[i]);
717 				continue;
718 			} else if (i + 2 >= len)
719 				continue;
720 
721 			/* Italic mode. */
722 
723 			if ('_' == p[i]) {
724 				if (bold)
725 					printf("</b>");
726 				if ( ! italic)
727 					printf("<i>");
728 				bold = 0;
729 				italic = 1;
730 				i += 2;
731 				html_putchar(p[i]);
732 				continue;
733 			}
734 
735 			/*
736 			 * Handle funny behaviour troff-isms.
737 			 * These grok'd from the original man2html.c.
738 			 */
739 
740 			if (('+' == p[i] && 'o' == p[i + 2]) ||
741 					('o' == p[i] && '+' == p[i + 2]) ||
742 					('|' == p[i] && '=' == p[i + 2]) ||
743 					('=' == p[i] && '|' == p[i + 2]) ||
744 					('*' == p[i] && '=' == p[i + 2]) ||
745 					('=' == p[i] && '*' == p[i + 2]) ||
746 					('*' == p[i] && '|' == p[i + 2]) ||
747 					('|' == p[i] && '*' == p[i + 2]))  {
748 				if (italic)
749 					printf("</i>");
750 				if (bold)
751 					printf("</b>");
752 				italic = bold = 0;
753 				putchar('*');
754 				i += 2;
755 				continue;
756 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
757 					('-' == p[i] && '|' == p[i + 1]) ||
758 					('+' == p[i] && '-' == p[i + 1]) ||
759 					('-' == p[i] && '+' == p[i + 1]) ||
760 					('+' == p[i] && '|' == p[i + 1]) ||
761 					('|' == p[i] && '+' == p[i + 1]))  {
762 				if (italic)
763 					printf("</i>");
764 				if (bold)
765 					printf("</b>");
766 				italic = bold = 0;
767 				putchar('+');
768 				i += 2;
769 				continue;
770 			}
771 
772 			/* Bold mode. */
773 
774 			if (italic)
775 				printf("</i>");
776 			if ( ! bold)
777 				printf("<b>");
778 			bold = 1;
779 			italic = 0;
780 			i += 2;
781 			html_putchar(p[i]);
782 		}
783 
784 		/*
785 		 * Clean up the last character.
786 		 * We can get to a newline; don't print that.
787 		 */
788 
789 		if (italic)
790 			printf("</i>");
791 		if (bold)
792 			printf("</b>");
793 
794 		if (i == len - 1 && p[i] != '\n')
795 			html_putchar(p[i]);
796 
797 		putchar('\n');
798 	}
799 	free(p);
800 
801 	puts("</pre>\n"
802 	     "</div>");
803 
804 	fclose(f);
805 }
806 
807 static void
808 resp_format(const struct req *req, const char *file)
809 {
810 	struct manoutput conf;
811 	struct mparse	*mp;
812 	struct roff_man	*man;
813 	void		*vp;
814 	int		 fd;
815 	int		 usepath;
816 
817 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
818 		puts("<p>You specified an invalid manual file.</p>");
819 		return;
820 	}
821 
822 	mchars_alloc();
823 	mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, req->q.manpath);
824 	mparse_readfd(mp, fd, file);
825 	close(fd);
826 
827 	memset(&conf, 0, sizeof(conf));
828 	conf.fragment = 1;
829 	usepath = strcmp(req->q.manpath, req->p[0]);
830 	mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
831 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
832 
833 	mparse_result(mp, &man, NULL);
834 	if (man == NULL) {
835 		warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
836 		pg_error_internal();
837 		mparse_free(mp);
838 		mchars_free();
839 		return;
840 	}
841 
842 	vp = html_alloc(&conf);
843 
844 	if (man->macroset == MACROSET_MDOC) {
845 		mdoc_validate(man);
846 		html_mdoc(vp, man);
847 	} else {
848 		man_validate(man);
849 		html_man(vp, man);
850 	}
851 
852 	html_free(vp);
853 	mparse_free(mp);
854 	mchars_free();
855 	free(conf.man);
856 }
857 
858 static void
859 resp_show(const struct req *req, const char *file)
860 {
861 
862 	if ('.' == file[0] && '/' == file[1])
863 		file += 2;
864 
865 	if ('c' == *file)
866 		resp_catman(req, file);
867 	else
868 		resp_format(req, file);
869 }
870 
871 static void
872 pg_show(struct req *req, const char *fullpath)
873 {
874 	char		*manpath;
875 	const char	*file;
876 
877 	if ((file = strchr(fullpath, '/')) == NULL) {
878 		pg_error_badrequest(
879 		    "You did not specify a page to show.");
880 		return;
881 	}
882 	manpath = mandoc_strndup(fullpath, file - fullpath);
883 	file++;
884 
885 	if ( ! validate_manpath(req, manpath)) {
886 		pg_error_badrequest(
887 		    "You specified an invalid manpath.");
888 		free(manpath);
889 		return;
890 	}
891 
892 	/*
893 	 * Begin by chdir()ing into the manpath.
894 	 * This way we can pick up the database files, which are
895 	 * relative to the manpath root.
896 	 */
897 
898 	if (chdir(manpath) == -1) {
899 		warn("chdir %s", manpath);
900 		pg_error_internal();
901 		free(manpath);
902 		return;
903 	}
904 
905 	if (strcmp(manpath, "mandoc")) {
906 		free(req->q.manpath);
907 		req->q.manpath = manpath;
908 	} else
909 		free(manpath);
910 
911 	if ( ! validate_filename(file)) {
912 		pg_error_badrequest(
913 		    "You specified an invalid manual file.");
914 		return;
915 	}
916 
917 	resp_begin_html(200, NULL);
918 	resp_searchform(req, FOCUS_NONE);
919 	resp_show(req, file);
920 	resp_end_html();
921 }
922 
923 static void
924 pg_search(const struct req *req)
925 {
926 	struct mansearch	  search;
927 	struct manpaths		  paths;
928 	struct manpage		 *res;
929 	char			**argv;
930 	char			 *query, *rp, *wp;
931 	size_t			  ressz;
932 	int			  argc;
933 
934 	/*
935 	 * Begin by chdir()ing into the root of the manpath.
936 	 * This way we can pick up the database files, which are
937 	 * relative to the manpath root.
938 	 */
939 
940 	if (chdir(req->q.manpath) == -1) {
941 		warn("chdir %s", req->q.manpath);
942 		pg_error_internal();
943 		return;
944 	}
945 
946 	search.arch = req->q.arch;
947 	search.sec = req->q.sec;
948 	search.outkey = "Nd";
949 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
950 	search.firstmatch = 1;
951 
952 	paths.sz = 1;
953 	paths.paths = mandoc_malloc(sizeof(char *));
954 	paths.paths[0] = mandoc_strdup(".");
955 
956 	/*
957 	 * Break apart at spaces with backslash-escaping.
958 	 */
959 
960 	argc = 0;
961 	argv = NULL;
962 	rp = query = mandoc_strdup(req->q.query);
963 	for (;;) {
964 		while (isspace((unsigned char)*rp))
965 			rp++;
966 		if (*rp == '\0')
967 			break;
968 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
969 		argv[argc++] = wp = rp;
970 		for (;;) {
971 			if (isspace((unsigned char)*rp)) {
972 				*wp = '\0';
973 				rp++;
974 				break;
975 			}
976 			if (rp[0] == '\\' && rp[1] != '\0')
977 				rp++;
978 			if (wp != rp)
979 				*wp = *rp;
980 			if (*rp == '\0')
981 				break;
982 			wp++;
983 			rp++;
984 		}
985 	}
986 
987 	if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
988 		pg_noresult(req, "You entered an invalid query.");
989 	else if (0 == ressz)
990 		pg_noresult(req, "No results found.");
991 	else
992 		pg_searchres(req, res, ressz);
993 
994 	free(query);
995 	mansearch_free(res, ressz);
996 	free(paths.paths[0]);
997 	free(paths.paths);
998 }
999 
1000 int
1001 main(void)
1002 {
1003 	struct req	 req;
1004 	struct itimerval itimer;
1005 	const char	*path;
1006 	const char	*querystring;
1007 	int		 i;
1008 
1009 	/* Poor man's ReDoS mitigation. */
1010 
1011 	itimer.it_value.tv_sec = 2;
1012 	itimer.it_value.tv_usec = 0;
1013 	itimer.it_interval.tv_sec = 2;
1014 	itimer.it_interval.tv_usec = 0;
1015 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1016 		warn("setitimer");
1017 		pg_error_internal();
1018 		return EXIT_FAILURE;
1019 	}
1020 
1021 	/*
1022 	 * First we change directory into the MAN_DIR so that
1023 	 * subsequent scanning for manpath directories is rooted
1024 	 * relative to the same position.
1025 	 */
1026 
1027 	if (chdir(MAN_DIR) == -1) {
1028 		warn("MAN_DIR: %s", MAN_DIR);
1029 		pg_error_internal();
1030 		return EXIT_FAILURE;
1031 	}
1032 
1033 	memset(&req, 0, sizeof(struct req));
1034 	req.q.equal = 1;
1035 	parse_manpath_conf(&req);
1036 
1037 	/* Parse the path info and the query string. */
1038 
1039 	if ((path = getenv("PATH_INFO")) == NULL)
1040 		path = "";
1041 	else if (*path == '/')
1042 		path++;
1043 
1044 	if (*path != '\0') {
1045 		parse_path_info(&req, path);
1046 		if (access(path, F_OK) == -1)
1047 			path = "";
1048 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1049 		parse_query_string(&req, querystring);
1050 
1051 	/* Validate parsed data and add defaults. */
1052 
1053 	if (req.q.manpath == NULL)
1054 		req.q.manpath = mandoc_strdup(req.p[0]);
1055 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1056 		pg_error_badrequest(
1057 		    "You specified an invalid manpath.");
1058 		return EXIT_FAILURE;
1059 	}
1060 
1061 	if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1062 		pg_error_badrequest(
1063 		    "You specified an invalid architecture.");
1064 		return EXIT_FAILURE;
1065 	}
1066 
1067 	/* Dispatch to the three different pages. */
1068 
1069 	if ('\0' != *path)
1070 		pg_show(&req, path);
1071 	else if (NULL != req.q.query)
1072 		pg_search(&req);
1073 	else
1074 		pg_index(&req);
1075 
1076 	free(req.q.manpath);
1077 	free(req.q.arch);
1078 	free(req.q.sec);
1079 	free(req.q.query);
1080 	for (i = 0; i < (int)req.psz; i++)
1081 		free(req.p[i]);
1082 	free(req.p);
1083 	return EXIT_SUCCESS;
1084 }
1085 
1086 /*
1087  * If PATH_INFO is not a file name, translate it to a query.
1088  */
1089 static void
1090 parse_path_info(struct req *req, const char *path)
1091 {
1092 	char	*dir;
1093 
1094 	req->isquery = 0;
1095 	req->q.equal = 1;
1096 	req->q.manpath = mandoc_strdup(path);
1097 
1098 	/* Mandatory manual page name. */
1099 	if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1100 		req->q.query = req->q.manpath;
1101 		req->q.manpath = NULL;
1102 	} else
1103 		*req->q.query++ = '\0';
1104 
1105 	/* Optional trailing section. */
1106 	if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1107 		if(isdigit((unsigned char)req->q.sec[1])) {
1108 			*req->q.sec++ = '\0';
1109 			req->q.sec = mandoc_strdup(req->q.sec);
1110 		} else
1111 			req->q.sec = NULL;
1112 	}
1113 
1114 	/* Handle the case of name[.section] only. */
1115 	if (req->q.manpath == NULL) {
1116 		req->q.arch = NULL;
1117 		return;
1118 	}
1119 	req->q.query = mandoc_strdup(req->q.query);
1120 
1121 	/* Optional architecture. */
1122 	dir = strrchr(req->q.manpath, '/');
1123 	if (dir != NULL && strncmp(dir + 1, "man", 3) != 0) {
1124 		*dir++ = '\0';
1125 		req->q.arch = mandoc_strdup(dir);
1126 		dir = strrchr(req->q.manpath, '/');
1127 	} else
1128 		req->q.arch = NULL;
1129 
1130 	/* Optional directory name. */
1131 	if (dir != NULL && strncmp(dir + 1, "man", 3) == 0) {
1132 		*dir++ = '\0';
1133 		free(req->q.sec);
1134 		req->q.sec = mandoc_strdup(dir + 3);
1135 	}
1136 }
1137 
1138 /*
1139  * Scan for indexable paths.
1140  */
1141 static void
1142 parse_manpath_conf(struct req *req)
1143 {
1144 	FILE	*fp;
1145 	char	*dp;
1146 	size_t	 dpsz;
1147 	ssize_t	 len;
1148 
1149 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1150 		warn("%s/manpath.conf", MAN_DIR);
1151 		pg_error_internal();
1152 		exit(EXIT_FAILURE);
1153 	}
1154 
1155 	dp = NULL;
1156 	dpsz = 0;
1157 
1158 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1159 		if (dp[len - 1] == '\n')
1160 			dp[--len] = '\0';
1161 		req->p = mandoc_realloc(req->p,
1162 		    (req->psz + 1) * sizeof(char *));
1163 		if ( ! validate_urifrag(dp)) {
1164 			warnx("%s/manpath.conf contains "
1165 			    "unsafe path \"%s\"", MAN_DIR, dp);
1166 			pg_error_internal();
1167 			exit(EXIT_FAILURE);
1168 		}
1169 		if (strchr(dp, '/') != NULL) {
1170 			warnx("%s/manpath.conf contains "
1171 			    "path with slash \"%s\"", MAN_DIR, dp);
1172 			pg_error_internal();
1173 			exit(EXIT_FAILURE);
1174 		}
1175 		req->p[req->psz++] = dp;
1176 		dp = NULL;
1177 		dpsz = 0;
1178 	}
1179 	free(dp);
1180 
1181 	if (req->p == NULL) {
1182 		warnx("%s/manpath.conf is empty", MAN_DIR);
1183 		pg_error_internal();
1184 		exit(EXIT_FAILURE);
1185 	}
1186 }
1187