xref: /openbsd-src/usr.bin/mandoc/cgi.c (revision 03adc85b7600a1f8f04886b8321c1c1c0c4933d4)
1 /*	$OpenBSD: cgi.c,v 1.83 2017/01/21 01:20:29 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/time.h>
20 
21 #include <ctype.h>
22 #include <err.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <limits.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "mandoc_aux.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mdoc.h"
36 #include "man.h"
37 #include "main.h"
38 #include "manconf.h"
39 #include "mansearch.h"
40 #include "cgi.h"
41 
42 /*
43  * A query as passed to the search function.
44  */
45 struct	query {
46 	char		*manpath; /* desired manual directory */
47 	char		*arch; /* architecture */
48 	char		*sec; /* manual section */
49 	char		*query; /* unparsed query expression */
50 	int		 equal; /* match whole names, not substrings */
51 };
52 
53 struct	req {
54 	struct query	  q;
55 	char		**p; /* array of available manpaths */
56 	size_t		  psz; /* number of available manpaths */
57 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
58 };
59 
60 enum	focus {
61 	FOCUS_NONE = 0,
62 	FOCUS_QUERY
63 };
64 
65 static	void		 html_print(const char *);
66 static	void		 html_putchar(char);
67 static	int		 http_decode(char *);
68 static	void		 parse_manpath_conf(struct req *);
69 static	void		 parse_path_info(struct req *req, const char *path);
70 static	void		 parse_query_string(struct req *, const char *);
71 static	void		 pg_error_badrequest(const char *);
72 static	void		 pg_error_internal(void);
73 static	void		 pg_index(const struct req *);
74 static	void		 pg_noresult(const struct req *, const char *);
75 static	void		 pg_search(const struct req *);
76 static	void		 pg_searchres(const struct req *,
77 				struct manpage *, size_t);
78 static	void		 pg_show(struct req *, const char *);
79 static	void		 resp_begin_html(int, const char *);
80 static	void		 resp_begin_http(int, const char *);
81 static	void		 resp_catman(const struct req *, const char *);
82 static	void		 resp_copy(const char *);
83 static	void		 resp_end_html(void);
84 static	void		 resp_format(const struct req *, const char *);
85 static	void		 resp_searchform(const struct req *, enum focus);
86 static	void		 resp_show(const struct req *, const char *);
87 static	void		 set_query_attr(char **, char **);
88 static	int		 validate_filename(const char *);
89 static	int		 validate_manpath(const struct req *, const char *);
90 static	int		 validate_urifrag(const char *);
91 
92 static	const char	 *scriptname = SCRIPT_NAME;
93 
94 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
95 static	const char *const sec_numbers[] = {
96     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
97 };
98 static	const char *const sec_names[] = {
99     "All Sections",
100     "1 - General Commands",
101     "2 - System Calls",
102     "3 - Library Functions",
103     "3p - Perl Library",
104     "4 - Device Drivers",
105     "5 - File Formats",
106     "6 - Games",
107     "7 - Miscellaneous Information",
108     "8 - System Manager\'s Manual",
109     "9 - Kernel Developer\'s Manual"
110 };
111 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
112 
113 static	const char *const arch_names[] = {
114     "amd64",       "alpha",       "armv7",
115     "hppa",        "i386",        "landisk",
116     "loongson",    "luna88k",     "macppc",      "mips64",
117     "octeon",      "sgi",         "socppc",      "sparc64",
118     "amiga",       "arc",         "armish",      "arm32",
119     "atari",       "aviion",      "beagle",      "cats",
120     "hppa64",      "hp300",
121     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
122     "mvmeppc",     "palm",        "pc532",       "pegasos",
123     "pmax",        "powerpc",     "solbourne",   "sparc",
124     "sun3",        "vax",         "wgrisc",      "x68k",
125     "zaurus"
126 };
127 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
128 
129 /*
130  * Print a character, escaping HTML along the way.
131  * This will pass non-ASCII straight to output: be warned!
132  */
133 static void
134 html_putchar(char c)
135 {
136 
137 	switch (c) {
138 	case ('"'):
139 		printf("&quot;");
140 		break;
141 	case ('&'):
142 		printf("&amp;");
143 		break;
144 	case ('>'):
145 		printf("&gt;");
146 		break;
147 	case ('<'):
148 		printf("&lt;");
149 		break;
150 	default:
151 		putchar((unsigned char)c);
152 		break;
153 	}
154 }
155 
156 /*
157  * Call through to html_putchar().
158  * Accepts NULL strings.
159  */
160 static void
161 html_print(const char *p)
162 {
163 
164 	if (NULL == p)
165 		return;
166 	while ('\0' != *p)
167 		html_putchar(*p++);
168 }
169 
170 /*
171  * Transfer the responsibility for the allocated string *val
172  * to the query structure.
173  */
174 static void
175 set_query_attr(char **attr, char **val)
176 {
177 
178 	free(*attr);
179 	if (**val == '\0') {
180 		*attr = NULL;
181 		free(*val);
182 	} else
183 		*attr = *val;
184 	*val = NULL;
185 }
186 
187 /*
188  * Parse the QUERY_STRING for key-value pairs
189  * and store the values into the query structure.
190  */
191 static void
192 parse_query_string(struct req *req, const char *qs)
193 {
194 	char		*key, *val;
195 	size_t		 keysz, valsz;
196 
197 	req->isquery	= 1;
198 	req->q.manpath	= NULL;
199 	req->q.arch	= NULL;
200 	req->q.sec	= NULL;
201 	req->q.query	= NULL;
202 	req->q.equal	= 1;
203 
204 	key = val = NULL;
205 	while (*qs != '\0') {
206 
207 		/* Parse one key. */
208 
209 		keysz = strcspn(qs, "=;&");
210 		key = mandoc_strndup(qs, keysz);
211 		qs += keysz;
212 		if (*qs != '=')
213 			goto next;
214 
215 		/* Parse one value. */
216 
217 		valsz = strcspn(++qs, ";&");
218 		val = mandoc_strndup(qs, valsz);
219 		qs += valsz;
220 
221 		/* Decode and catch encoding errors. */
222 
223 		if ( ! (http_decode(key) && http_decode(val)))
224 			goto next;
225 
226 		/* Handle key-value pairs. */
227 
228 		if ( ! strcmp(key, "query"))
229 			set_query_attr(&req->q.query, &val);
230 
231 		else if ( ! strcmp(key, "apropos"))
232 			req->q.equal = !strcmp(val, "0");
233 
234 		else if ( ! strcmp(key, "manpath")) {
235 #ifdef COMPAT_OLDURI
236 			if ( ! strncmp(val, "OpenBSD ", 8)) {
237 				val[7] = '-';
238 				if ('C' == val[8])
239 					val[8] = 'c';
240 			}
241 #endif
242 			set_query_attr(&req->q.manpath, &val);
243 		}
244 
245 		else if ( ! (strcmp(key, "sec")
246 #ifdef COMPAT_OLDURI
247 		    && strcmp(key, "sektion")
248 #endif
249 		    )) {
250 			if ( ! strcmp(val, "0"))
251 				*val = '\0';
252 			set_query_attr(&req->q.sec, &val);
253 		}
254 
255 		else if ( ! strcmp(key, "arch")) {
256 			if ( ! strcmp(val, "default"))
257 				*val = '\0';
258 			set_query_attr(&req->q.arch, &val);
259 		}
260 
261 		/*
262 		 * The key must be freed in any case.
263 		 * The val may have been handed over to the query
264 		 * structure, in which case it is now NULL.
265 		 */
266 next:
267 		free(key);
268 		key = NULL;
269 		free(val);
270 		val = NULL;
271 
272 		if (*qs != '\0')
273 			qs++;
274 	}
275 }
276 
277 /*
278  * HTTP-decode a string.  The standard explanation is that this turns
279  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
280  * over the allocated string.
281  */
282 static int
283 http_decode(char *p)
284 {
285 	char             hex[3];
286 	char		*q;
287 	int              c;
288 
289 	hex[2] = '\0';
290 
291 	q = p;
292 	for ( ; '\0' != *p; p++, q++) {
293 		if ('%' == *p) {
294 			if ('\0' == (hex[0] = *(p + 1)))
295 				return 0;
296 			if ('\0' == (hex[1] = *(p + 2)))
297 				return 0;
298 			if (1 != sscanf(hex, "%x", &c))
299 				return 0;
300 			if ('\0' == c)
301 				return 0;
302 
303 			*q = (char)c;
304 			p += 2;
305 		} else
306 			*q = '+' == *p ? ' ' : *p;
307 	}
308 
309 	*q = '\0';
310 	return 1;
311 }
312 
313 static void
314 resp_begin_http(int code, const char *msg)
315 {
316 
317 	if (200 != code)
318 		printf("Status: %d %s\r\n", code, msg);
319 
320 	printf("Content-Type: text/html; charset=utf-8\r\n"
321 	     "Cache-Control: no-cache\r\n"
322 	     "Pragma: no-cache\r\n"
323 	     "\r\n");
324 
325 	fflush(stdout);
326 }
327 
328 static void
329 resp_copy(const char *filename)
330 {
331 	char	 buf[4096];
332 	ssize_t	 sz;
333 	int	 fd;
334 
335 	if ((fd = open(filename, O_RDONLY)) != -1) {
336 		fflush(stdout);
337 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
338 			write(STDOUT_FILENO, buf, sz);
339 		close(fd);
340 	}
341 }
342 
343 static void
344 resp_begin_html(int code, const char *msg)
345 {
346 
347 	resp_begin_http(code, msg);
348 
349 	printf("<!DOCTYPE html>\n"
350 	       "<html>\n"
351 	       "<head>\n"
352 	       "  <meta charset=\"UTF-8\"/>\n"
353 	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
354 	       " type=\"text/css\" media=\"all\">\n"
355 	       "  <title>%s</title>\n"
356 	       "</head>\n"
357 	       "<body>\n",
358 	       CSS_DIR, CUSTOMIZE_TITLE);
359 
360 	resp_copy(MAN_DIR "/header.html");
361 }
362 
363 static void
364 resp_end_html(void)
365 {
366 
367 	resp_copy(MAN_DIR "/footer.html");
368 
369 	puts("</body>\n"
370 	     "</html>");
371 }
372 
373 static void
374 resp_searchform(const struct req *req, enum focus focus)
375 {
376 	int		 i;
377 
378 	printf("<form action=\"/%s\" method=\"get\">\n"
379 	       "  <fieldset>\n"
380 	       "    <legend>Manual Page Search Parameters</legend>\n",
381 	       scriptname);
382 
383 	/* Write query input box. */
384 
385 	printf("    <input type=\"text\" name=\"query\" value=\"");
386 	if (req->q.query != NULL)
387 		html_print(req->q.query);
388 	printf( "\" size=\"40\"");
389 	if (focus == FOCUS_QUERY)
390 		printf(" autofocus");
391 	puts(">");
392 
393 	/* Write submission buttons. */
394 
395 	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
396 		"man</button>\n"
397 		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
398 		"apropos</button>\n"
399 		"    <br/>\n");
400 
401 	/* Write section selector. */
402 
403 	puts("    <select name=\"sec\">");
404 	for (i = 0; i < sec_MAX; i++) {
405 		printf("      <option value=\"%s\"", sec_numbers[i]);
406 		if (NULL != req->q.sec &&
407 		    0 == strcmp(sec_numbers[i], req->q.sec))
408 			printf(" selected=\"selected\"");
409 		printf(">%s</option>\n", sec_names[i]);
410 	}
411 	puts("    </select>");
412 
413 	/* Write architecture selector. */
414 
415 	printf(	"    <select name=\"arch\">\n"
416 		"      <option value=\"default\"");
417 	if (NULL == req->q.arch)
418 		printf(" selected=\"selected\"");
419 	puts(">All Architectures</option>");
420 	for (i = 0; i < arch_MAX; i++) {
421 		printf("      <option value=\"%s\"", arch_names[i]);
422 		if (NULL != req->q.arch &&
423 		    0 == strcmp(arch_names[i], req->q.arch))
424 			printf(" selected=\"selected\"");
425 		printf(">%s</option>\n", arch_names[i]);
426 	}
427 	puts("    </select>");
428 
429 	/* Write manpath selector. */
430 
431 	if (req->psz > 1) {
432 		puts("    <select name=\"manpath\">");
433 		for (i = 0; i < (int)req->psz; i++) {
434 			printf("      <option ");
435 			if (strcmp(req->q.manpath, req->p[i]) == 0)
436 				printf("selected=\"selected\" ");
437 			printf("value=\"");
438 			html_print(req->p[i]);
439 			printf("\">");
440 			html_print(req->p[i]);
441 			puts("</option>");
442 		}
443 		puts("    </select>");
444 	}
445 
446 	puts("  </fieldset>\n"
447 	     "</form>");
448 }
449 
450 static int
451 validate_urifrag(const char *frag)
452 {
453 
454 	while ('\0' != *frag) {
455 		if ( ! (isalnum((unsigned char)*frag) ||
456 		    '-' == *frag || '.' == *frag ||
457 		    '/' == *frag || '_' == *frag))
458 			return 0;
459 		frag++;
460 	}
461 	return 1;
462 }
463 
464 static int
465 validate_manpath(const struct req *req, const char* manpath)
466 {
467 	size_t	 i;
468 
469 	for (i = 0; i < req->psz; i++)
470 		if ( ! strcmp(manpath, req->p[i]))
471 			return 1;
472 
473 	return 0;
474 }
475 
476 static int
477 validate_filename(const char *file)
478 {
479 
480 	if ('.' == file[0] && '/' == file[1])
481 		file += 2;
482 
483 	return ! (strstr(file, "../") || strstr(file, "/..") ||
484 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
485 }
486 
487 static void
488 pg_index(const struct req *req)
489 {
490 
491 	resp_begin_html(200, NULL);
492 	resp_searchform(req, FOCUS_QUERY);
493 	printf("<p>\n"
494 	       "This web interface is documented in the\n"
495 	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
496 	       "manual, and the\n"
497 	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
498 	       "manual explains the query syntax.\n"
499 	       "</p>\n",
500 	       scriptname, *scriptname == '\0' ? "" : "/",
501 	       scriptname, *scriptname == '\0' ? "" : "/");
502 	resp_end_html();
503 }
504 
505 static void
506 pg_noresult(const struct req *req, const char *msg)
507 {
508 	resp_begin_html(200, NULL);
509 	resp_searchform(req, FOCUS_QUERY);
510 	puts("<p>");
511 	puts(msg);
512 	puts("</p>");
513 	resp_end_html();
514 }
515 
516 static void
517 pg_error_badrequest(const char *msg)
518 {
519 
520 	resp_begin_html(400, "Bad Request");
521 	puts("<h1>Bad Request</h1>\n"
522 	     "<p>\n");
523 	puts(msg);
524 	printf("Try again from the\n"
525 	       "<a href=\"/%s\">main page</a>.\n"
526 	       "</p>", scriptname);
527 	resp_end_html();
528 }
529 
530 static void
531 pg_error_internal(void)
532 {
533 	resp_begin_html(500, "Internal Server Error");
534 	puts("<p>Internal Server Error</p>");
535 	resp_end_html();
536 }
537 
538 static void
539 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
540 {
541 	char		*arch, *archend;
542 	const char	*sec;
543 	size_t		 i, iuse;
544 	int		 archprio, archpriouse;
545 	int		 prio, priouse;
546 
547 	for (i = 0; i < sz; i++) {
548 		if (validate_filename(r[i].file))
549 			continue;
550 		warnx("invalid filename %s in %s database",
551 		    r[i].file, req->q.manpath);
552 		pg_error_internal();
553 		return;
554 	}
555 
556 	if (req->isquery && sz == 1) {
557 		/*
558 		 * If we have just one result, then jump there now
559 		 * without any delay.
560 		 */
561 		printf("Status: 303 See Other\r\n");
562 		printf("Location: http://%s/%s%s%s/%s",
563 		    HTTP_HOST, scriptname,
564 		    *scriptname == '\0' ? "" : "/",
565 		    req->q.manpath, r[0].file);
566 		printf("\r\n"
567 		     "Content-Type: text/html; charset=utf-8\r\n"
568 		     "\r\n");
569 		return;
570 	}
571 
572 	resp_begin_html(200, NULL);
573 	resp_searchform(req,
574 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
575 
576 	if (sz > 1) {
577 		puts("<table class=\"results\">");
578 		for (i = 0; i < sz; i++) {
579 			printf("  <tr>\n"
580 			       "    <td>"
581 			       "<a class=\"Xr\" href=\"/%s%s%s/%s\">",
582 			    scriptname, *scriptname == '\0' ? "" : "/",
583 			    req->q.manpath, r[i].file);
584 			html_print(r[i].names);
585 			printf("</a></td>\n"
586 			       "    <td><span class=\"Nd\">");
587 			html_print(r[i].output);
588 			puts("</span></td>\n"
589 			     "  </tr>");
590 		}
591 		puts("</table>");
592 	}
593 
594 	/*
595 	 * In man(1) mode, show one of the pages
596 	 * even if more than one is found.
597 	 */
598 
599 	if (req->q.equal || sz == 1) {
600 		puts("<hr>");
601 		iuse = 0;
602 		priouse = 20;
603 		archpriouse = 3;
604 		for (i = 0; i < sz; i++) {
605 			sec = r[i].file;
606 			sec += strcspn(sec, "123456789");
607 			if (sec[0] == '\0')
608 				continue;
609 			prio = sec_prios[sec[0] - '1'];
610 			if (sec[1] != '/')
611 				prio += 10;
612 			if (req->q.arch == NULL) {
613 				archprio =
614 				    ((arch = strchr(sec + 1, '/'))
615 					== NULL) ? 3 :
616 				    ((archend = strchr(arch + 1, '/'))
617 					== NULL) ? 0 :
618 				    strncmp(arch, "amd64/",
619 					archend - arch) ? 2 : 1;
620 				if (archprio < archpriouse) {
621 					archpriouse = archprio;
622 					priouse = prio;
623 					iuse = i;
624 					continue;
625 				}
626 				if (archprio > archpriouse)
627 					continue;
628 			}
629 			if (prio >= priouse)
630 				continue;
631 			priouse = prio;
632 			iuse = i;
633 		}
634 		resp_show(req, r[iuse].file);
635 	}
636 
637 	resp_end_html();
638 }
639 
640 static void
641 resp_catman(const struct req *req, const char *file)
642 {
643 	FILE		*f;
644 	char		*p;
645 	size_t		 sz;
646 	ssize_t		 len;
647 	int		 i;
648 	int		 italic, bold;
649 
650 	if ((f = fopen(file, "r")) == NULL) {
651 		puts("<p>You specified an invalid manual file.</p>");
652 		return;
653 	}
654 
655 	puts("<div class=\"catman\">\n"
656 	     "<pre>");
657 
658 	p = NULL;
659 	sz = 0;
660 
661 	while ((len = getline(&p, &sz, f)) != -1) {
662 		bold = italic = 0;
663 		for (i = 0; i < len - 1; i++) {
664 			/*
665 			 * This means that the catpage is out of state.
666 			 * Ignore it and keep going (although the
667 			 * catpage is bogus).
668 			 */
669 
670 			if ('\b' == p[i] || '\n' == p[i])
671 				continue;
672 
673 			/*
674 			 * Print a regular character.
675 			 * Close out any bold/italic scopes.
676 			 * If we're in back-space mode, make sure we'll
677 			 * have something to enter when we backspace.
678 			 */
679 
680 			if ('\b' != p[i + 1]) {
681 				if (italic)
682 					printf("</i>");
683 				if (bold)
684 					printf("</b>");
685 				italic = bold = 0;
686 				html_putchar(p[i]);
687 				continue;
688 			} else if (i + 2 >= len)
689 				continue;
690 
691 			/* Italic mode. */
692 
693 			if ('_' == p[i]) {
694 				if (bold)
695 					printf("</b>");
696 				if ( ! italic)
697 					printf("<i>");
698 				bold = 0;
699 				italic = 1;
700 				i += 2;
701 				html_putchar(p[i]);
702 				continue;
703 			}
704 
705 			/*
706 			 * Handle funny behaviour troff-isms.
707 			 * These grok'd from the original man2html.c.
708 			 */
709 
710 			if (('+' == p[i] && 'o' == p[i + 2]) ||
711 					('o' == p[i] && '+' == p[i + 2]) ||
712 					('|' == p[i] && '=' == p[i + 2]) ||
713 					('=' == p[i] && '|' == p[i + 2]) ||
714 					('*' == p[i] && '=' == p[i + 2]) ||
715 					('=' == p[i] && '*' == p[i + 2]) ||
716 					('*' == p[i] && '|' == p[i + 2]) ||
717 					('|' == p[i] && '*' == p[i + 2]))  {
718 				if (italic)
719 					printf("</i>");
720 				if (bold)
721 					printf("</b>");
722 				italic = bold = 0;
723 				putchar('*');
724 				i += 2;
725 				continue;
726 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
727 					('-' == p[i] && '|' == p[i + 1]) ||
728 					('+' == p[i] && '-' == p[i + 1]) ||
729 					('-' == p[i] && '+' == p[i + 1]) ||
730 					('+' == p[i] && '|' == p[i + 1]) ||
731 					('|' == p[i] && '+' == p[i + 1]))  {
732 				if (italic)
733 					printf("</i>");
734 				if (bold)
735 					printf("</b>");
736 				italic = bold = 0;
737 				putchar('+');
738 				i += 2;
739 				continue;
740 			}
741 
742 			/* Bold mode. */
743 
744 			if (italic)
745 				printf("</i>");
746 			if ( ! bold)
747 				printf("<b>");
748 			bold = 1;
749 			italic = 0;
750 			i += 2;
751 			html_putchar(p[i]);
752 		}
753 
754 		/*
755 		 * Clean up the last character.
756 		 * We can get to a newline; don't print that.
757 		 */
758 
759 		if (italic)
760 			printf("</i>");
761 		if (bold)
762 			printf("</b>");
763 
764 		if (i == len - 1 && p[i] != '\n')
765 			html_putchar(p[i]);
766 
767 		putchar('\n');
768 	}
769 	free(p);
770 
771 	puts("</pre>\n"
772 	     "</div>");
773 
774 	fclose(f);
775 }
776 
777 static void
778 resp_format(const struct req *req, const char *file)
779 {
780 	struct manoutput conf;
781 	struct mparse	*mp;
782 	struct roff_man	*man;
783 	void		*vp;
784 	int		 fd;
785 	int		 usepath;
786 
787 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
788 		puts("<p>You specified an invalid manual file.</p>");
789 		return;
790 	}
791 
792 	mchars_alloc();
793 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
794 	    MANDOCLEVEL_BADARG, NULL, req->q.manpath);
795 	mparse_readfd(mp, fd, file);
796 	close(fd);
797 
798 	memset(&conf, 0, sizeof(conf));
799 	conf.fragment = 1;
800 	usepath = strcmp(req->q.manpath, req->p[0]);
801 	mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
802 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
803 
804 	mparse_result(mp, &man, NULL);
805 	if (man == NULL) {
806 		warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
807 		pg_error_internal();
808 		mparse_free(mp);
809 		mchars_free();
810 		return;
811 	}
812 
813 	vp = html_alloc(&conf);
814 
815 	if (man->macroset == MACROSET_MDOC) {
816 		mdoc_validate(man);
817 		html_mdoc(vp, man);
818 	} else {
819 		man_validate(man);
820 		html_man(vp, man);
821 	}
822 
823 	html_free(vp);
824 	mparse_free(mp);
825 	mchars_free();
826 	free(conf.man);
827 }
828 
829 static void
830 resp_show(const struct req *req, const char *file)
831 {
832 
833 	if ('.' == file[0] && '/' == file[1])
834 		file += 2;
835 
836 	if ('c' == *file)
837 		resp_catman(req, file);
838 	else
839 		resp_format(req, file);
840 }
841 
842 static void
843 pg_show(struct req *req, const char *fullpath)
844 {
845 	char		*manpath;
846 	const char	*file;
847 
848 	if ((file = strchr(fullpath, '/')) == NULL) {
849 		pg_error_badrequest(
850 		    "You did not specify a page to show.");
851 		return;
852 	}
853 	manpath = mandoc_strndup(fullpath, file - fullpath);
854 	file++;
855 
856 	if ( ! validate_manpath(req, manpath)) {
857 		pg_error_badrequest(
858 		    "You specified an invalid manpath.");
859 		free(manpath);
860 		return;
861 	}
862 
863 	/*
864 	 * Begin by chdir()ing into the manpath.
865 	 * This way we can pick up the database files, which are
866 	 * relative to the manpath root.
867 	 */
868 
869 	if (chdir(manpath) == -1) {
870 		warn("chdir %s", manpath);
871 		pg_error_internal();
872 		free(manpath);
873 		return;
874 	}
875 	free(manpath);
876 
877 	if ( ! validate_filename(file)) {
878 		pg_error_badrequest(
879 		    "You specified an invalid manual file.");
880 		return;
881 	}
882 
883 	resp_begin_html(200, NULL);
884 	resp_searchform(req, FOCUS_NONE);
885 	resp_show(req, file);
886 	resp_end_html();
887 }
888 
889 static void
890 pg_search(const struct req *req)
891 {
892 	struct mansearch	  search;
893 	struct manpaths		  paths;
894 	struct manpage		 *res;
895 	char			**argv;
896 	char			 *query, *rp, *wp;
897 	size_t			  ressz;
898 	int			  argc;
899 
900 	/*
901 	 * Begin by chdir()ing into the root of the manpath.
902 	 * This way we can pick up the database files, which are
903 	 * relative to the manpath root.
904 	 */
905 
906 	if (chdir(req->q.manpath) == -1) {
907 		warn("chdir %s", req->q.manpath);
908 		pg_error_internal();
909 		return;
910 	}
911 
912 	search.arch = req->q.arch;
913 	search.sec = req->q.sec;
914 	search.outkey = "Nd";
915 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
916 	search.firstmatch = 1;
917 
918 	paths.sz = 1;
919 	paths.paths = mandoc_malloc(sizeof(char *));
920 	paths.paths[0] = mandoc_strdup(".");
921 
922 	/*
923 	 * Break apart at spaces with backslash-escaping.
924 	 */
925 
926 	argc = 0;
927 	argv = NULL;
928 	rp = query = mandoc_strdup(req->q.query);
929 	for (;;) {
930 		while (isspace((unsigned char)*rp))
931 			rp++;
932 		if (*rp == '\0')
933 			break;
934 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
935 		argv[argc++] = wp = rp;
936 		for (;;) {
937 			if (isspace((unsigned char)*rp)) {
938 				*wp = '\0';
939 				rp++;
940 				break;
941 			}
942 			if (rp[0] == '\\' && rp[1] != '\0')
943 				rp++;
944 			if (wp != rp)
945 				*wp = *rp;
946 			if (*rp == '\0')
947 				break;
948 			wp++;
949 			rp++;
950 		}
951 	}
952 
953 	if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
954 		pg_noresult(req, "You entered an invalid query.");
955 	else if (0 == ressz)
956 		pg_noresult(req, "No results found.");
957 	else
958 		pg_searchres(req, res, ressz);
959 
960 	free(query);
961 	mansearch_free(res, ressz);
962 	free(paths.paths[0]);
963 	free(paths.paths);
964 }
965 
966 int
967 main(void)
968 {
969 	struct req	 req;
970 	struct itimerval itimer;
971 	const char	*path;
972 	const char	*querystring;
973 	int		 i;
974 
975 	/* Poor man's ReDoS mitigation. */
976 
977 	itimer.it_value.tv_sec = 2;
978 	itimer.it_value.tv_usec = 0;
979 	itimer.it_interval.tv_sec = 2;
980 	itimer.it_interval.tv_usec = 0;
981 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
982 		warn("setitimer");
983 		pg_error_internal();
984 		return EXIT_FAILURE;
985 	}
986 
987 	/*
988 	 * First we change directory into the MAN_DIR so that
989 	 * subsequent scanning for manpath directories is rooted
990 	 * relative to the same position.
991 	 */
992 
993 	if (chdir(MAN_DIR) == -1) {
994 		warn("MAN_DIR: %s", MAN_DIR);
995 		pg_error_internal();
996 		return EXIT_FAILURE;
997 	}
998 
999 	memset(&req, 0, sizeof(struct req));
1000 	req.q.equal = 1;
1001 	parse_manpath_conf(&req);
1002 
1003 	/* Parse the path info and the query string. */
1004 
1005 	if ((path = getenv("PATH_INFO")) == NULL)
1006 		path = "";
1007 	else if (*path == '/')
1008 		path++;
1009 
1010 	if (*path != '\0') {
1011 		parse_path_info(&req, path);
1012 		if (req.q.manpath == NULL || access(path, F_OK) == -1)
1013 			path = "";
1014 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1015 		parse_query_string(&req, querystring);
1016 
1017 	/* Validate parsed data and add defaults. */
1018 
1019 	if (req.q.manpath == NULL)
1020 		req.q.manpath = mandoc_strdup(req.p[0]);
1021 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1022 		pg_error_badrequest(
1023 		    "You specified an invalid manpath.");
1024 		return EXIT_FAILURE;
1025 	}
1026 
1027 	if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1028 		pg_error_badrequest(
1029 		    "You specified an invalid architecture.");
1030 		return EXIT_FAILURE;
1031 	}
1032 
1033 	/* Dispatch to the three different pages. */
1034 
1035 	if ('\0' != *path)
1036 		pg_show(&req, path);
1037 	else if (NULL != req.q.query)
1038 		pg_search(&req);
1039 	else
1040 		pg_index(&req);
1041 
1042 	free(req.q.manpath);
1043 	free(req.q.arch);
1044 	free(req.q.sec);
1045 	free(req.q.query);
1046 	for (i = 0; i < (int)req.psz; i++)
1047 		free(req.p[i]);
1048 	free(req.p);
1049 	return EXIT_SUCCESS;
1050 }
1051 
1052 /*
1053  * If PATH_INFO is not a file name, translate it to a query.
1054  */
1055 static void
1056 parse_path_info(struct req *req, const char *path)
1057 {
1058 	char	*dir[4];
1059 	int	 i;
1060 
1061 	req->isquery = 0;
1062 	req->q.equal = 1;
1063 	req->q.manpath = mandoc_strdup(path);
1064 	req->q.arch = NULL;
1065 
1066 	/* Mandatory manual page name. */
1067 	if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1068 		req->q.query = req->q.manpath;
1069 		req->q.manpath = NULL;
1070 	} else
1071 		*req->q.query++ = '\0';
1072 
1073 	/* Optional trailing section. */
1074 	if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1075 		if(isdigit((unsigned char)req->q.sec[1])) {
1076 			*req->q.sec++ = '\0';
1077 			req->q.sec = mandoc_strdup(req->q.sec);
1078 		} else
1079 			req->q.sec = NULL;
1080 	}
1081 
1082 	/* Handle the case of name[.section] only. */
1083 	if (req->q.manpath == NULL)
1084 		return;
1085 	req->q.query = mandoc_strdup(req->q.query);
1086 
1087 	/* Split directory components. */
1088 	dir[i = 0] = req->q.manpath;
1089 	while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1090 		if (++i == 3) {
1091 			pg_error_badrequest(
1092 			    "You specified too many directory components.");
1093 			exit(EXIT_FAILURE);
1094 		}
1095 		*dir[i]++ = '\0';
1096 	}
1097 
1098 	/* Optional manpath. */
1099 	if ((i = validate_manpath(req, req->q.manpath)) == 0)
1100 		req->q.manpath = NULL;
1101 	else if (dir[1] == NULL)
1102 		return;
1103 
1104 	/* Optional section. */
1105 	if (strncmp(dir[i], "man", 3) == 0) {
1106 		free(req->q.sec);
1107 		req->q.sec = mandoc_strdup(dir[i++] + 3);
1108 	}
1109 	if (dir[i] == NULL) {
1110 		if (req->q.manpath == NULL)
1111 			free(dir[0]);
1112 		return;
1113 	}
1114 	if (dir[i + 1] != NULL) {
1115 		pg_error_badrequest(
1116 		    "You specified an invalid directory component.");
1117 		exit(EXIT_FAILURE);
1118 	}
1119 
1120 	/* Optional architecture. */
1121 	if (i) {
1122 		req->q.arch = mandoc_strdup(dir[i]);
1123 		if (req->q.manpath == NULL)
1124 			free(dir[0]);
1125 	} else
1126 		req->q.arch = dir[0];
1127 }
1128 
1129 /*
1130  * Scan for indexable paths.
1131  */
1132 static void
1133 parse_manpath_conf(struct req *req)
1134 {
1135 	FILE	*fp;
1136 	char	*dp;
1137 	size_t	 dpsz;
1138 	ssize_t	 len;
1139 
1140 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1141 		warn("%s/manpath.conf", MAN_DIR);
1142 		pg_error_internal();
1143 		exit(EXIT_FAILURE);
1144 	}
1145 
1146 	dp = NULL;
1147 	dpsz = 0;
1148 
1149 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1150 		if (dp[len - 1] == '\n')
1151 			dp[--len] = '\0';
1152 		req->p = mandoc_realloc(req->p,
1153 		    (req->psz + 1) * sizeof(char *));
1154 		if ( ! validate_urifrag(dp)) {
1155 			warnx("%s/manpath.conf contains "
1156 			    "unsafe path \"%s\"", MAN_DIR, dp);
1157 			pg_error_internal();
1158 			exit(EXIT_FAILURE);
1159 		}
1160 		if (strchr(dp, '/') != NULL) {
1161 			warnx("%s/manpath.conf contains "
1162 			    "path with slash \"%s\"", MAN_DIR, dp);
1163 			pg_error_internal();
1164 			exit(EXIT_FAILURE);
1165 		}
1166 		req->p[req->psz++] = dp;
1167 		dp = NULL;
1168 		dpsz = 0;
1169 	}
1170 	free(dp);
1171 
1172 	if (req->p == NULL) {
1173 		warnx("%s/manpath.conf is empty", MAN_DIR);
1174 		pg_error_internal();
1175 		exit(EXIT_FAILURE);
1176 	}
1177 }
1178