xref: /openbsd-src/usr.bin/mandoc/cgi.c (revision d59bb9942320b767f2a19aaa7690c8c6e30b724c)
1 /*	$OpenBSD: cgi.c,v 1.86 2017/02/22 16:16:35 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/time.h>
20 
21 #include <ctype.h>
22 #include <err.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <limits.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "mandoc_aux.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mdoc.h"
36 #include "man.h"
37 #include "main.h"
38 #include "manconf.h"
39 #include "mansearch.h"
40 #include "cgi.h"
41 
42 /*
43  * A query as passed to the search function.
44  */
45 struct	query {
46 	char		*manpath; /* desired manual directory */
47 	char		*arch; /* architecture */
48 	char		*sec; /* manual section */
49 	char		*query; /* unparsed query expression */
50 	int		 equal; /* match whole names, not substrings */
51 };
52 
53 struct	req {
54 	struct query	  q;
55 	char		**p; /* array of available manpaths */
56 	size_t		  psz; /* number of available manpaths */
57 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
58 };
59 
60 enum	focus {
61 	FOCUS_NONE = 0,
62 	FOCUS_QUERY
63 };
64 
65 static	void		 html_print(const char *);
66 static	void		 html_putchar(char);
67 static	int		 http_decode(char *);
68 static	void		 parse_manpath_conf(struct req *);
69 static	void		 parse_path_info(struct req *req, const char *path);
70 static	void		 parse_query_string(struct req *, const char *);
71 static	void		 pg_error_badrequest(const char *);
72 static	void		 pg_error_internal(void);
73 static	void		 pg_index(const struct req *);
74 static	void		 pg_noresult(const struct req *, const char *);
75 static	void		 pg_search(const struct req *);
76 static	void		 pg_searchres(const struct req *,
77 				struct manpage *, size_t);
78 static	void		 pg_show(struct req *, const char *);
79 static	void		 resp_begin_html(int, const char *);
80 static	void		 resp_begin_http(int, const char *);
81 static	void		 resp_catman(const struct req *, const char *);
82 static	void		 resp_copy(const char *);
83 static	void		 resp_end_html(void);
84 static	void		 resp_format(const struct req *, const char *);
85 static	void		 resp_searchform(const struct req *, enum focus);
86 static	void		 resp_show(const struct req *, const char *);
87 static	void		 set_query_attr(char **, char **);
88 static	int		 validate_filename(const char *);
89 static	int		 validate_manpath(const struct req *, const char *);
90 static	int		 validate_urifrag(const char *);
91 
92 static	const char	 *scriptname = SCRIPT_NAME;
93 
94 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
95 static	const char *const sec_numbers[] = {
96     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
97 };
98 static	const char *const sec_names[] = {
99     "All Sections",
100     "1 - General Commands",
101     "2 - System Calls",
102     "3 - Library Functions",
103     "3p - Perl Library",
104     "4 - Device Drivers",
105     "5 - File Formats",
106     "6 - Games",
107     "7 - Miscellaneous Information",
108     "8 - System Manager\'s Manual",
109     "9 - Kernel Developer\'s Manual"
110 };
111 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
112 
113 static	const char *const arch_names[] = {
114     "amd64",       "alpha",       "armv7",	"arm64",
115     "hppa",        "i386",        "landisk",
116     "loongson",    "luna88k",     "macppc",      "mips64",
117     "octeon",      "sgi",         "socppc",      "sparc64",
118     "amiga",       "arc",         "armish",      "arm32",
119     "atari",       "aviion",      "beagle",      "cats",
120     "hppa64",      "hp300",
121     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
122     "mvmeppc",     "palm",        "pc532",       "pegasos",
123     "pmax",        "powerpc",     "solbourne",   "sparc",
124     "sun3",        "vax",         "wgrisc",      "x68k",
125     "zaurus"
126 };
127 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
128 
129 /*
130  * Print a character, escaping HTML along the way.
131  * This will pass non-ASCII straight to output: be warned!
132  */
133 static void
134 html_putchar(char c)
135 {
136 
137 	switch (c) {
138 	case ('"'):
139 		printf("&quot;");
140 		break;
141 	case ('&'):
142 		printf("&amp;");
143 		break;
144 	case ('>'):
145 		printf("&gt;");
146 		break;
147 	case ('<'):
148 		printf("&lt;");
149 		break;
150 	default:
151 		putchar((unsigned char)c);
152 		break;
153 	}
154 }
155 
156 /*
157  * Call through to html_putchar().
158  * Accepts NULL strings.
159  */
160 static void
161 html_print(const char *p)
162 {
163 
164 	if (NULL == p)
165 		return;
166 	while ('\0' != *p)
167 		html_putchar(*p++);
168 }
169 
170 /*
171  * Transfer the responsibility for the allocated string *val
172  * to the query structure.
173  */
174 static void
175 set_query_attr(char **attr, char **val)
176 {
177 
178 	free(*attr);
179 	if (**val == '\0') {
180 		*attr = NULL;
181 		free(*val);
182 	} else
183 		*attr = *val;
184 	*val = NULL;
185 }
186 
187 /*
188  * Parse the QUERY_STRING for key-value pairs
189  * and store the values into the query structure.
190  */
191 static void
192 parse_query_string(struct req *req, const char *qs)
193 {
194 	char		*key, *val;
195 	size_t		 keysz, valsz;
196 
197 	req->isquery	= 1;
198 	req->q.manpath	= NULL;
199 	req->q.arch	= NULL;
200 	req->q.sec	= NULL;
201 	req->q.query	= NULL;
202 	req->q.equal	= 1;
203 
204 	key = val = NULL;
205 	while (*qs != '\0') {
206 
207 		/* Parse one key. */
208 
209 		keysz = strcspn(qs, "=;&");
210 		key = mandoc_strndup(qs, keysz);
211 		qs += keysz;
212 		if (*qs != '=')
213 			goto next;
214 
215 		/* Parse one value. */
216 
217 		valsz = strcspn(++qs, ";&");
218 		val = mandoc_strndup(qs, valsz);
219 		qs += valsz;
220 
221 		/* Decode and catch encoding errors. */
222 
223 		if ( ! (http_decode(key) && http_decode(val)))
224 			goto next;
225 
226 		/* Handle key-value pairs. */
227 
228 		if ( ! strcmp(key, "query"))
229 			set_query_attr(&req->q.query, &val);
230 
231 		else if ( ! strcmp(key, "apropos"))
232 			req->q.equal = !strcmp(val, "0");
233 
234 		else if ( ! strcmp(key, "manpath")) {
235 #ifdef COMPAT_OLDURI
236 			if ( ! strncmp(val, "OpenBSD ", 8)) {
237 				val[7] = '-';
238 				if ('C' == val[8])
239 					val[8] = 'c';
240 			}
241 #endif
242 			set_query_attr(&req->q.manpath, &val);
243 		}
244 
245 		else if ( ! (strcmp(key, "sec")
246 #ifdef COMPAT_OLDURI
247 		    && strcmp(key, "sektion")
248 #endif
249 		    )) {
250 			if ( ! strcmp(val, "0"))
251 				*val = '\0';
252 			set_query_attr(&req->q.sec, &val);
253 		}
254 
255 		else if ( ! strcmp(key, "arch")) {
256 			if ( ! strcmp(val, "default"))
257 				*val = '\0';
258 			set_query_attr(&req->q.arch, &val);
259 		}
260 
261 		/*
262 		 * The key must be freed in any case.
263 		 * The val may have been handed over to the query
264 		 * structure, in which case it is now NULL.
265 		 */
266 next:
267 		free(key);
268 		key = NULL;
269 		free(val);
270 		val = NULL;
271 
272 		if (*qs != '\0')
273 			qs++;
274 	}
275 }
276 
277 /*
278  * HTTP-decode a string.  The standard explanation is that this turns
279  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
280  * over the allocated string.
281  */
282 static int
283 http_decode(char *p)
284 {
285 	char             hex[3];
286 	char		*q;
287 	int              c;
288 
289 	hex[2] = '\0';
290 
291 	q = p;
292 	for ( ; '\0' != *p; p++, q++) {
293 		if ('%' == *p) {
294 			if ('\0' == (hex[0] = *(p + 1)))
295 				return 0;
296 			if ('\0' == (hex[1] = *(p + 2)))
297 				return 0;
298 			if (1 != sscanf(hex, "%x", &c))
299 				return 0;
300 			if ('\0' == c)
301 				return 0;
302 
303 			*q = (char)c;
304 			p += 2;
305 		} else
306 			*q = '+' == *p ? ' ' : *p;
307 	}
308 
309 	*q = '\0';
310 	return 1;
311 }
312 
313 static void
314 resp_begin_http(int code, const char *msg)
315 {
316 
317 	if (200 != code)
318 		printf("Status: %d %s\r\n", code, msg);
319 
320 	printf("Content-Type: text/html; charset=utf-8\r\n"
321 	     "Cache-Control: no-cache\r\n"
322 	     "Pragma: no-cache\r\n"
323 	     "\r\n");
324 
325 	fflush(stdout);
326 }
327 
328 static void
329 resp_copy(const char *filename)
330 {
331 	char	 buf[4096];
332 	ssize_t	 sz;
333 	int	 fd;
334 
335 	if ((fd = open(filename, O_RDONLY)) != -1) {
336 		fflush(stdout);
337 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
338 			write(STDOUT_FILENO, buf, sz);
339 		close(fd);
340 	}
341 }
342 
343 static void
344 resp_begin_html(int code, const char *msg)
345 {
346 
347 	resp_begin_http(code, msg);
348 
349 	printf("<!DOCTYPE html>\n"
350 	       "<html>\n"
351 	       "<head>\n"
352 	       "  <meta charset=\"UTF-8\"/>\n"
353 	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
354 	       " type=\"text/css\" media=\"all\">\n"
355 	       "  <title>%s</title>\n"
356 	       "</head>\n"
357 	       "<body>\n",
358 	       CSS_DIR, CUSTOMIZE_TITLE);
359 
360 	resp_copy(MAN_DIR "/header.html");
361 }
362 
363 static void
364 resp_end_html(void)
365 {
366 
367 	resp_copy(MAN_DIR "/footer.html");
368 
369 	puts("</body>\n"
370 	     "</html>");
371 }
372 
373 static void
374 resp_searchform(const struct req *req, enum focus focus)
375 {
376 	int		 i;
377 
378 	printf("<form action=\"/%s\" method=\"get\">\n"
379 	       "  <fieldset>\n"
380 	       "    <legend>Manual Page Search Parameters</legend>\n",
381 	       scriptname);
382 
383 	/* Write query input box. */
384 
385 	printf("    <input type=\"text\" name=\"query\" value=\"");
386 	if (req->q.query != NULL)
387 		html_print(req->q.query);
388 	printf( "\" size=\"40\"");
389 	if (focus == FOCUS_QUERY)
390 		printf(" autofocus");
391 	puts(">");
392 
393 	/* Write submission buttons. */
394 
395 	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
396 		"man</button>\n"
397 		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
398 		"apropos</button>\n"
399 		"    <br/>\n");
400 
401 	/* Write section selector. */
402 
403 	puts("    <select name=\"sec\">");
404 	for (i = 0; i < sec_MAX; i++) {
405 		printf("      <option value=\"%s\"", sec_numbers[i]);
406 		if (NULL != req->q.sec &&
407 		    0 == strcmp(sec_numbers[i], req->q.sec))
408 			printf(" selected=\"selected\"");
409 		printf(">%s</option>\n", sec_names[i]);
410 	}
411 	puts("    </select>");
412 
413 	/* Write architecture selector. */
414 
415 	printf(	"    <select name=\"arch\">\n"
416 		"      <option value=\"default\"");
417 	if (NULL == req->q.arch)
418 		printf(" selected=\"selected\"");
419 	puts(">All Architectures</option>");
420 	for (i = 0; i < arch_MAX; i++) {
421 		printf("      <option value=\"%s\"", arch_names[i]);
422 		if (NULL != req->q.arch &&
423 		    0 == strcmp(arch_names[i], req->q.arch))
424 			printf(" selected=\"selected\"");
425 		printf(">%s</option>\n", arch_names[i]);
426 	}
427 	puts("    </select>");
428 
429 	/* Write manpath selector. */
430 
431 	if (req->psz > 1) {
432 		puts("    <select name=\"manpath\">");
433 		for (i = 0; i < (int)req->psz; i++) {
434 			printf("      <option ");
435 			if (strcmp(req->q.manpath, req->p[i]) == 0)
436 				printf("selected=\"selected\" ");
437 			printf("value=\"");
438 			html_print(req->p[i]);
439 			printf("\">");
440 			html_print(req->p[i]);
441 			puts("</option>");
442 		}
443 		puts("    </select>");
444 	}
445 
446 	puts("  </fieldset>\n"
447 	     "</form>");
448 }
449 
450 static int
451 validate_urifrag(const char *frag)
452 {
453 
454 	while ('\0' != *frag) {
455 		if ( ! (isalnum((unsigned char)*frag) ||
456 		    '-' == *frag || '.' == *frag ||
457 		    '/' == *frag || '_' == *frag))
458 			return 0;
459 		frag++;
460 	}
461 	return 1;
462 }
463 
464 static int
465 validate_manpath(const struct req *req, const char* manpath)
466 {
467 	size_t	 i;
468 
469 	for (i = 0; i < req->psz; i++)
470 		if ( ! strcmp(manpath, req->p[i]))
471 			return 1;
472 
473 	return 0;
474 }
475 
476 static int
477 validate_filename(const char *file)
478 {
479 
480 	if ('.' == file[0] && '/' == file[1])
481 		file += 2;
482 
483 	return ! (strstr(file, "../") || strstr(file, "/..") ||
484 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
485 }
486 
487 static void
488 pg_index(const struct req *req)
489 {
490 
491 	resp_begin_html(200, NULL);
492 	resp_searchform(req, FOCUS_QUERY);
493 	printf("<p>\n"
494 	       "This web interface is documented in the\n"
495 	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
496 	       "manual, and the\n"
497 	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
498 	       "manual explains the query syntax.\n"
499 	       "</p>\n",
500 	       scriptname, *scriptname == '\0' ? "" : "/",
501 	       scriptname, *scriptname == '\0' ? "" : "/");
502 	resp_end_html();
503 }
504 
505 static void
506 pg_noresult(const struct req *req, const char *msg)
507 {
508 	resp_begin_html(200, NULL);
509 	resp_searchform(req, FOCUS_QUERY);
510 	puts("<p>");
511 	puts(msg);
512 	puts("</p>");
513 	resp_end_html();
514 }
515 
516 static void
517 pg_error_badrequest(const char *msg)
518 {
519 
520 	resp_begin_html(400, "Bad Request");
521 	puts("<h1>Bad Request</h1>\n"
522 	     "<p>\n");
523 	puts(msg);
524 	printf("Try again from the\n"
525 	       "<a href=\"/%s\">main page</a>.\n"
526 	       "</p>", scriptname);
527 	resp_end_html();
528 }
529 
530 static void
531 pg_error_internal(void)
532 {
533 	resp_begin_html(500, "Internal Server Error");
534 	puts("<p>Internal Server Error</p>");
535 	resp_end_html();
536 }
537 
538 static void
539 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
540 {
541 	char		*arch, *archend;
542 	const char	*sec;
543 	size_t		 i, iuse;
544 	int		 archprio, archpriouse;
545 	int		 prio, priouse;
546 
547 	for (i = 0; i < sz; i++) {
548 		if (validate_filename(r[i].file))
549 			continue;
550 		warnx("invalid filename %s in %s database",
551 		    r[i].file, req->q.manpath);
552 		pg_error_internal();
553 		return;
554 	}
555 
556 	if (req->isquery && sz == 1) {
557 		/*
558 		 * If we have just one result, then jump there now
559 		 * without any delay.
560 		 */
561 		printf("Status: 303 See Other\r\n");
562 		printf("Location: http://%s/%s%s%s/%s",
563 		    HTTP_HOST, scriptname,
564 		    *scriptname == '\0' ? "" : "/",
565 		    req->q.manpath, r[0].file);
566 		printf("\r\n"
567 		     "Content-Type: text/html; charset=utf-8\r\n"
568 		     "\r\n");
569 		return;
570 	}
571 
572 	resp_begin_html(200, NULL);
573 	resp_searchform(req,
574 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
575 
576 	if (sz > 1) {
577 		puts("<table class=\"results\">");
578 		for (i = 0; i < sz; i++) {
579 			printf("  <tr>\n"
580 			       "    <td>"
581 			       "<a class=\"Xr\" href=\"/%s%s%s/%s\">",
582 			    scriptname, *scriptname == '\0' ? "" : "/",
583 			    req->q.manpath, r[i].file);
584 			html_print(r[i].names);
585 			printf("</a></td>\n"
586 			       "    <td><span class=\"Nd\">");
587 			html_print(r[i].output);
588 			puts("</span></td>\n"
589 			     "  </tr>");
590 		}
591 		puts("</table>");
592 	}
593 
594 	/*
595 	 * In man(1) mode, show one of the pages
596 	 * even if more than one is found.
597 	 */
598 
599 	if (req->q.equal || sz == 1) {
600 		puts("<hr>");
601 		iuse = 0;
602 		priouse = 20;
603 		archpriouse = 3;
604 		for (i = 0; i < sz; i++) {
605 			sec = r[i].file;
606 			sec += strcspn(sec, "123456789");
607 			if (sec[0] == '\0')
608 				continue;
609 			prio = sec_prios[sec[0] - '1'];
610 			if (sec[1] != '/')
611 				prio += 10;
612 			if (req->q.arch == NULL) {
613 				archprio =
614 				    ((arch = strchr(sec + 1, '/'))
615 					== NULL) ? 3 :
616 				    ((archend = strchr(arch + 1, '/'))
617 					== NULL) ? 0 :
618 				    strncmp(arch, "amd64/",
619 					archend - arch) ? 2 : 1;
620 				if (archprio < archpriouse) {
621 					archpriouse = archprio;
622 					priouse = prio;
623 					iuse = i;
624 					continue;
625 				}
626 				if (archprio > archpriouse)
627 					continue;
628 			}
629 			if (prio >= priouse)
630 				continue;
631 			priouse = prio;
632 			iuse = i;
633 		}
634 		resp_show(req, r[iuse].file);
635 	}
636 
637 	resp_end_html();
638 }
639 
640 static void
641 resp_catman(const struct req *req, const char *file)
642 {
643 	FILE		*f;
644 	char		*p;
645 	size_t		 sz;
646 	ssize_t		 len;
647 	int		 i;
648 	int		 italic, bold;
649 
650 	if ((f = fopen(file, "r")) == NULL) {
651 		puts("<p>You specified an invalid manual file.</p>");
652 		return;
653 	}
654 
655 	puts("<div class=\"catman\">\n"
656 	     "<pre>");
657 
658 	p = NULL;
659 	sz = 0;
660 
661 	while ((len = getline(&p, &sz, f)) != -1) {
662 		bold = italic = 0;
663 		for (i = 0; i < len - 1; i++) {
664 			/*
665 			 * This means that the catpage is out of state.
666 			 * Ignore it and keep going (although the
667 			 * catpage is bogus).
668 			 */
669 
670 			if ('\b' == p[i] || '\n' == p[i])
671 				continue;
672 
673 			/*
674 			 * Print a regular character.
675 			 * Close out any bold/italic scopes.
676 			 * If we're in back-space mode, make sure we'll
677 			 * have something to enter when we backspace.
678 			 */
679 
680 			if ('\b' != p[i + 1]) {
681 				if (italic)
682 					printf("</i>");
683 				if (bold)
684 					printf("</b>");
685 				italic = bold = 0;
686 				html_putchar(p[i]);
687 				continue;
688 			} else if (i + 2 >= len)
689 				continue;
690 
691 			/* Italic mode. */
692 
693 			if ('_' == p[i]) {
694 				if (bold)
695 					printf("</b>");
696 				if ( ! italic)
697 					printf("<i>");
698 				bold = 0;
699 				italic = 1;
700 				i += 2;
701 				html_putchar(p[i]);
702 				continue;
703 			}
704 
705 			/*
706 			 * Handle funny behaviour troff-isms.
707 			 * These grok'd from the original man2html.c.
708 			 */
709 
710 			if (('+' == p[i] && 'o' == p[i + 2]) ||
711 					('o' == p[i] && '+' == p[i + 2]) ||
712 					('|' == p[i] && '=' == p[i + 2]) ||
713 					('=' == p[i] && '|' == p[i + 2]) ||
714 					('*' == p[i] && '=' == p[i + 2]) ||
715 					('=' == p[i] && '*' == p[i + 2]) ||
716 					('*' == p[i] && '|' == p[i + 2]) ||
717 					('|' == p[i] && '*' == p[i + 2]))  {
718 				if (italic)
719 					printf("</i>");
720 				if (bold)
721 					printf("</b>");
722 				italic = bold = 0;
723 				putchar('*');
724 				i += 2;
725 				continue;
726 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
727 					('-' == p[i] && '|' == p[i + 1]) ||
728 					('+' == p[i] && '-' == p[i + 1]) ||
729 					('-' == p[i] && '+' == p[i + 1]) ||
730 					('+' == p[i] && '|' == p[i + 1]) ||
731 					('|' == p[i] && '+' == p[i + 1]))  {
732 				if (italic)
733 					printf("</i>");
734 				if (bold)
735 					printf("</b>");
736 				italic = bold = 0;
737 				putchar('+');
738 				i += 2;
739 				continue;
740 			}
741 
742 			/* Bold mode. */
743 
744 			if (italic)
745 				printf("</i>");
746 			if ( ! bold)
747 				printf("<b>");
748 			bold = 1;
749 			italic = 0;
750 			i += 2;
751 			html_putchar(p[i]);
752 		}
753 
754 		/*
755 		 * Clean up the last character.
756 		 * We can get to a newline; don't print that.
757 		 */
758 
759 		if (italic)
760 			printf("</i>");
761 		if (bold)
762 			printf("</b>");
763 
764 		if (i == len - 1 && p[i] != '\n')
765 			html_putchar(p[i]);
766 
767 		putchar('\n');
768 	}
769 	free(p);
770 
771 	puts("</pre>\n"
772 	     "</div>");
773 
774 	fclose(f);
775 }
776 
777 static void
778 resp_format(const struct req *req, const char *file)
779 {
780 	struct manoutput conf;
781 	struct mparse	*mp;
782 	struct roff_man	*man;
783 	void		*vp;
784 	int		 fd;
785 	int		 usepath;
786 
787 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
788 		puts("<p>You specified an invalid manual file.</p>");
789 		return;
790 	}
791 
792 	mchars_alloc();
793 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
794 	    MANDOCLEVEL_BADARG, NULL, req->q.manpath);
795 	mparse_readfd(mp, fd, file);
796 	close(fd);
797 
798 	memset(&conf, 0, sizeof(conf));
799 	conf.fragment = 1;
800 	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
801 	usepath = strcmp(req->q.manpath, req->p[0]);
802 	mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
803 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
804 
805 	mparse_result(mp, &man, NULL);
806 	if (man == NULL) {
807 		warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
808 		pg_error_internal();
809 		mparse_free(mp);
810 		mchars_free();
811 		return;
812 	}
813 
814 	vp = html_alloc(&conf);
815 
816 	if (man->macroset == MACROSET_MDOC) {
817 		mdoc_validate(man);
818 		html_mdoc(vp, man);
819 	} else {
820 		man_validate(man);
821 		html_man(vp, man);
822 	}
823 
824 	html_free(vp);
825 	mparse_free(mp);
826 	mchars_free();
827 	free(conf.man);
828 	free(conf.style);
829 }
830 
831 static void
832 resp_show(const struct req *req, const char *file)
833 {
834 
835 	if ('.' == file[0] && '/' == file[1])
836 		file += 2;
837 
838 	if ('c' == *file)
839 		resp_catman(req, file);
840 	else
841 		resp_format(req, file);
842 }
843 
844 static void
845 pg_show(struct req *req, const char *fullpath)
846 {
847 	char		*manpath;
848 	const char	*file;
849 
850 	if ((file = strchr(fullpath, '/')) == NULL) {
851 		pg_error_badrequest(
852 		    "You did not specify a page to show.");
853 		return;
854 	}
855 	manpath = mandoc_strndup(fullpath, file - fullpath);
856 	file++;
857 
858 	if ( ! validate_manpath(req, manpath)) {
859 		pg_error_badrequest(
860 		    "You specified an invalid manpath.");
861 		free(manpath);
862 		return;
863 	}
864 
865 	/*
866 	 * Begin by chdir()ing into the manpath.
867 	 * This way we can pick up the database files, which are
868 	 * relative to the manpath root.
869 	 */
870 
871 	if (chdir(manpath) == -1) {
872 		warn("chdir %s", manpath);
873 		pg_error_internal();
874 		free(manpath);
875 		return;
876 	}
877 	free(manpath);
878 
879 	if ( ! validate_filename(file)) {
880 		pg_error_badrequest(
881 		    "You specified an invalid manual file.");
882 		return;
883 	}
884 
885 	resp_begin_html(200, NULL);
886 	resp_searchform(req, FOCUS_NONE);
887 	resp_show(req, file);
888 	resp_end_html();
889 }
890 
891 static void
892 pg_search(const struct req *req)
893 {
894 	struct mansearch	  search;
895 	struct manpaths		  paths;
896 	struct manpage		 *res;
897 	char			**argv;
898 	char			 *query, *rp, *wp;
899 	size_t			  ressz;
900 	int			  argc;
901 
902 	/*
903 	 * Begin by chdir()ing into the root of the manpath.
904 	 * This way we can pick up the database files, which are
905 	 * relative to the manpath root.
906 	 */
907 
908 	if (chdir(req->q.manpath) == -1) {
909 		warn("chdir %s", req->q.manpath);
910 		pg_error_internal();
911 		return;
912 	}
913 
914 	search.arch = req->q.arch;
915 	search.sec = req->q.sec;
916 	search.outkey = "Nd";
917 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
918 	search.firstmatch = 1;
919 
920 	paths.sz = 1;
921 	paths.paths = mandoc_malloc(sizeof(char *));
922 	paths.paths[0] = mandoc_strdup(".");
923 
924 	/*
925 	 * Break apart at spaces with backslash-escaping.
926 	 */
927 
928 	argc = 0;
929 	argv = NULL;
930 	rp = query = mandoc_strdup(req->q.query);
931 	for (;;) {
932 		while (isspace((unsigned char)*rp))
933 			rp++;
934 		if (*rp == '\0')
935 			break;
936 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
937 		argv[argc++] = wp = rp;
938 		for (;;) {
939 			if (isspace((unsigned char)*rp)) {
940 				*wp = '\0';
941 				rp++;
942 				break;
943 			}
944 			if (rp[0] == '\\' && rp[1] != '\0')
945 				rp++;
946 			if (wp != rp)
947 				*wp = *rp;
948 			if (*rp == '\0')
949 				break;
950 			wp++;
951 			rp++;
952 		}
953 	}
954 
955 	if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
956 		pg_noresult(req, "You entered an invalid query.");
957 	else if (0 == ressz)
958 		pg_noresult(req, "No results found.");
959 	else
960 		pg_searchres(req, res, ressz);
961 
962 	free(query);
963 	mansearch_free(res, ressz);
964 	free(paths.paths[0]);
965 	free(paths.paths);
966 }
967 
968 int
969 main(void)
970 {
971 	struct req	 req;
972 	struct itimerval itimer;
973 	const char	*path;
974 	const char	*querystring;
975 	int		 i;
976 
977 	/*
978 	 * The "rpath" pledge could be revoked after mparse_readfd()
979 	 * if the file desciptor to "/footer.html" would be opened
980 	 * up front, but it's probably not worth the complication
981 	 * of the code it would cause: it would require scattering
982 	 * pledge() calls in multiple low-level resp_*() functions.
983 	 */
984 
985 	if (pledge("stdio rpath", NULL) == -1) {
986 		warn("pledge");
987 		pg_error_internal();
988 		return EXIT_FAILURE;
989 	}
990 
991 	/* Poor man's ReDoS mitigation. */
992 
993 	itimer.it_value.tv_sec = 2;
994 	itimer.it_value.tv_usec = 0;
995 	itimer.it_interval.tv_sec = 2;
996 	itimer.it_interval.tv_usec = 0;
997 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
998 		warn("setitimer");
999 		pg_error_internal();
1000 		return EXIT_FAILURE;
1001 	}
1002 
1003 	/*
1004 	 * First we change directory into the MAN_DIR so that
1005 	 * subsequent scanning for manpath directories is rooted
1006 	 * relative to the same position.
1007 	 */
1008 
1009 	if (chdir(MAN_DIR) == -1) {
1010 		warn("MAN_DIR: %s", MAN_DIR);
1011 		pg_error_internal();
1012 		return EXIT_FAILURE;
1013 	}
1014 
1015 	memset(&req, 0, sizeof(struct req));
1016 	req.q.equal = 1;
1017 	parse_manpath_conf(&req);
1018 
1019 	/* Parse the path info and the query string. */
1020 
1021 	if ((path = getenv("PATH_INFO")) == NULL)
1022 		path = "";
1023 	else if (*path == '/')
1024 		path++;
1025 
1026 	if (*path != '\0') {
1027 		parse_path_info(&req, path);
1028 		if (req.q.manpath == NULL || access(path, F_OK) == -1)
1029 			path = "";
1030 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1031 		parse_query_string(&req, querystring);
1032 
1033 	/* Validate parsed data and add defaults. */
1034 
1035 	if (req.q.manpath == NULL)
1036 		req.q.manpath = mandoc_strdup(req.p[0]);
1037 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1038 		pg_error_badrequest(
1039 		    "You specified an invalid manpath.");
1040 		return EXIT_FAILURE;
1041 	}
1042 
1043 	if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1044 		pg_error_badrequest(
1045 		    "You specified an invalid architecture.");
1046 		return EXIT_FAILURE;
1047 	}
1048 
1049 	/* Dispatch to the three different pages. */
1050 
1051 	if ('\0' != *path)
1052 		pg_show(&req, path);
1053 	else if (NULL != req.q.query)
1054 		pg_search(&req);
1055 	else
1056 		pg_index(&req);
1057 
1058 	free(req.q.manpath);
1059 	free(req.q.arch);
1060 	free(req.q.sec);
1061 	free(req.q.query);
1062 	for (i = 0; i < (int)req.psz; i++)
1063 		free(req.p[i]);
1064 	free(req.p);
1065 	return EXIT_SUCCESS;
1066 }
1067 
1068 /*
1069  * If PATH_INFO is not a file name, translate it to a query.
1070  */
1071 static void
1072 parse_path_info(struct req *req, const char *path)
1073 {
1074 	char	*dir[4];
1075 	int	 i;
1076 
1077 	req->isquery = 0;
1078 	req->q.equal = 1;
1079 	req->q.manpath = mandoc_strdup(path);
1080 	req->q.arch = NULL;
1081 
1082 	/* Mandatory manual page name. */
1083 	if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1084 		req->q.query = req->q.manpath;
1085 		req->q.manpath = NULL;
1086 	} else
1087 		*req->q.query++ = '\0';
1088 
1089 	/* Optional trailing section. */
1090 	if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1091 		if(isdigit((unsigned char)req->q.sec[1])) {
1092 			*req->q.sec++ = '\0';
1093 			req->q.sec = mandoc_strdup(req->q.sec);
1094 		} else
1095 			req->q.sec = NULL;
1096 	}
1097 
1098 	/* Handle the case of name[.section] only. */
1099 	if (req->q.manpath == NULL)
1100 		return;
1101 	req->q.query = mandoc_strdup(req->q.query);
1102 
1103 	/* Split directory components. */
1104 	dir[i = 0] = req->q.manpath;
1105 	while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1106 		if (++i == 3) {
1107 			pg_error_badrequest(
1108 			    "You specified too many directory components.");
1109 			exit(EXIT_FAILURE);
1110 		}
1111 		*dir[i]++ = '\0';
1112 	}
1113 
1114 	/* Optional manpath. */
1115 	if ((i = validate_manpath(req, req->q.manpath)) == 0)
1116 		req->q.manpath = NULL;
1117 	else if (dir[1] == NULL)
1118 		return;
1119 
1120 	/* Optional section. */
1121 	if (strncmp(dir[i], "man", 3) == 0) {
1122 		free(req->q.sec);
1123 		req->q.sec = mandoc_strdup(dir[i++] + 3);
1124 	}
1125 	if (dir[i] == NULL) {
1126 		if (req->q.manpath == NULL)
1127 			free(dir[0]);
1128 		return;
1129 	}
1130 	if (dir[i + 1] != NULL) {
1131 		pg_error_badrequest(
1132 		    "You specified an invalid directory component.");
1133 		exit(EXIT_FAILURE);
1134 	}
1135 
1136 	/* Optional architecture. */
1137 	if (i) {
1138 		req->q.arch = mandoc_strdup(dir[i]);
1139 		if (req->q.manpath == NULL)
1140 			free(dir[0]);
1141 	} else
1142 		req->q.arch = dir[0];
1143 }
1144 
1145 /*
1146  * Scan for indexable paths.
1147  */
1148 static void
1149 parse_manpath_conf(struct req *req)
1150 {
1151 	FILE	*fp;
1152 	char	*dp;
1153 	size_t	 dpsz;
1154 	ssize_t	 len;
1155 
1156 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1157 		warn("%s/manpath.conf", MAN_DIR);
1158 		pg_error_internal();
1159 		exit(EXIT_FAILURE);
1160 	}
1161 
1162 	dp = NULL;
1163 	dpsz = 0;
1164 
1165 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1166 		if (dp[len - 1] == '\n')
1167 			dp[--len] = '\0';
1168 		req->p = mandoc_realloc(req->p,
1169 		    (req->psz + 1) * sizeof(char *));
1170 		if ( ! validate_urifrag(dp)) {
1171 			warnx("%s/manpath.conf contains "
1172 			    "unsafe path \"%s\"", MAN_DIR, dp);
1173 			pg_error_internal();
1174 			exit(EXIT_FAILURE);
1175 		}
1176 		if (strchr(dp, '/') != NULL) {
1177 			warnx("%s/manpath.conf contains "
1178 			    "path with slash \"%s\"", MAN_DIR, dp);
1179 			pg_error_internal();
1180 			exit(EXIT_FAILURE);
1181 		}
1182 		req->p[req->psz++] = dp;
1183 		dp = NULL;
1184 		dpsz = 0;
1185 	}
1186 	free(dp);
1187 
1188 	if (req->p == NULL) {
1189 		warnx("%s/manpath.conf is empty", MAN_DIR);
1190 		pg_error_internal();
1191 		exit(EXIT_FAILURE);
1192 	}
1193 }
1194