xref: /openbsd-src/usr.bin/mandoc/cgi.c (revision fb8aa7497fded39583f40e800732f9c046411717)
1 /*	$OpenBSD: cgi.c,v 1.71 2016/05/28 13:40:44 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/time.h>
20 
21 #include <ctype.h>
22 #include <err.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <limits.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "mandoc_aux.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mdoc.h"
36 #include "man.h"
37 #include "main.h"
38 #include "manconf.h"
39 #include "mansearch.h"
40 #include "cgi.h"
41 
42 /*
43  * A query as passed to the search function.
44  */
45 struct	query {
46 	char		*manpath; /* desired manual directory */
47 	char		*arch; /* architecture */
48 	char		*sec; /* manual section */
49 	char		*query; /* unparsed query expression */
50 	int		 equal; /* match whole names, not substrings */
51 };
52 
53 struct	req {
54 	struct query	  q;
55 	char		**p; /* array of available manpaths */
56 	size_t		  psz; /* number of available manpaths */
57 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
58 };
59 
60 enum	focus {
61 	FOCUS_NONE = 0,
62 	FOCUS_QUERY
63 };
64 
65 static	void		 html_print(const char *);
66 static	void		 html_putchar(char);
67 static	int		 http_decode(char *);
68 static	void		 parse_manpath_conf(struct req *);
69 static	void		 parse_path_info(struct req *req, const char *path);
70 static	void		 parse_query_string(struct req *, const char *);
71 static	void		 pg_error_badrequest(const char *);
72 static	void		 pg_error_internal(void);
73 static	void		 pg_index(const struct req *);
74 static	void		 pg_noresult(const struct req *, const char *);
75 static	void		 pg_search(const struct req *);
76 static	void		 pg_searchres(const struct req *,
77 				struct manpage *, size_t);
78 static	void		 pg_show(struct req *, const char *);
79 static	void		 resp_begin_html(int, const char *);
80 static	void		 resp_begin_http(int, const char *);
81 static	void		 resp_catman(const struct req *, const char *);
82 static	void		 resp_copy(const char *);
83 static	void		 resp_end_html(void);
84 static	void		 resp_format(const struct req *, const char *);
85 static	void		 resp_searchform(const struct req *, enum focus);
86 static	void		 resp_show(const struct req *, const char *);
87 static	void		 set_query_attr(char **, char **);
88 static	int		 validate_filename(const char *);
89 static	int		 validate_manpath(const struct req *, const char *);
90 static	int		 validate_urifrag(const char *);
91 
92 static	const char	 *scriptname = SCRIPT_NAME;
93 
94 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
95 static	const char *const sec_numbers[] = {
96     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
97 };
98 static	const char *const sec_names[] = {
99     "All Sections",
100     "1 - General Commands",
101     "2 - System Calls",
102     "3 - Library Functions",
103     "3p - Perl Library",
104     "4 - Device Drivers",
105     "5 - File Formats",
106     "6 - Games",
107     "7 - Miscellaneous Information",
108     "8 - System Manager\'s Manual",
109     "9 - Kernel Developer\'s Manual"
110 };
111 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
112 
113 static	const char *const arch_names[] = {
114     "amd64",       "alpha",       "armish",      "armv7",
115     "hppa",        "hppa64",      "i386",        "landisk",
116     "loongson",    "luna88k",     "macppc",      "mips64",
117     "octeon",      "sgi",         "socppc",      "sparc",
118     "sparc64",     "zaurus",
119     "amiga",       "arc",         "arm32",       "atari",
120     "aviion",      "beagle",      "cats",        "hp300",
121     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
122     "mvmeppc",     "palm",        "pc532",       "pegasos",
123     "pmax",        "powerpc",     "solbourne",   "sun3",
124     "vax",         "wgrisc",      "x68k"
125 };
126 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
127 
128 /*
129  * Print a character, escaping HTML along the way.
130  * This will pass non-ASCII straight to output: be warned!
131  */
132 static void
133 html_putchar(char c)
134 {
135 
136 	switch (c) {
137 	case ('"'):
138 		printf("&quote;");
139 		break;
140 	case ('&'):
141 		printf("&amp;");
142 		break;
143 	case ('>'):
144 		printf("&gt;");
145 		break;
146 	case ('<'):
147 		printf("&lt;");
148 		break;
149 	default:
150 		putchar((unsigned char)c);
151 		break;
152 	}
153 }
154 
155 /*
156  * Call through to html_putchar().
157  * Accepts NULL strings.
158  */
159 static void
160 html_print(const char *p)
161 {
162 
163 	if (NULL == p)
164 		return;
165 	while ('\0' != *p)
166 		html_putchar(*p++);
167 }
168 
169 /*
170  * Transfer the responsibility for the allocated string *val
171  * to the query structure.
172  */
173 static void
174 set_query_attr(char **attr, char **val)
175 {
176 
177 	free(*attr);
178 	if (**val == '\0') {
179 		*attr = NULL;
180 		free(*val);
181 	} else
182 		*attr = *val;
183 	*val = NULL;
184 }
185 
186 /*
187  * Parse the QUERY_STRING for key-value pairs
188  * and store the values into the query structure.
189  */
190 static void
191 parse_query_string(struct req *req, const char *qs)
192 {
193 	char		*key, *val;
194 	size_t		 keysz, valsz;
195 
196 	req->isquery	= 1;
197 	req->q.manpath	= NULL;
198 	req->q.arch	= NULL;
199 	req->q.sec	= NULL;
200 	req->q.query	= NULL;
201 	req->q.equal	= 1;
202 
203 	key = val = NULL;
204 	while (*qs != '\0') {
205 
206 		/* Parse one key. */
207 
208 		keysz = strcspn(qs, "=;&");
209 		key = mandoc_strndup(qs, keysz);
210 		qs += keysz;
211 		if (*qs != '=')
212 			goto next;
213 
214 		/* Parse one value. */
215 
216 		valsz = strcspn(++qs, ";&");
217 		val = mandoc_strndup(qs, valsz);
218 		qs += valsz;
219 
220 		/* Decode and catch encoding errors. */
221 
222 		if ( ! (http_decode(key) && http_decode(val)))
223 			goto next;
224 
225 		/* Handle key-value pairs. */
226 
227 		if ( ! strcmp(key, "query"))
228 			set_query_attr(&req->q.query, &val);
229 
230 		else if ( ! strcmp(key, "apropos"))
231 			req->q.equal = !strcmp(val, "0");
232 
233 		else if ( ! strcmp(key, "manpath")) {
234 #ifdef COMPAT_OLDURI
235 			if ( ! strncmp(val, "OpenBSD ", 8)) {
236 				val[7] = '-';
237 				if ('C' == val[8])
238 					val[8] = 'c';
239 			}
240 #endif
241 			set_query_attr(&req->q.manpath, &val);
242 		}
243 
244 		else if ( ! (strcmp(key, "sec")
245 #ifdef COMPAT_OLDURI
246 		    && strcmp(key, "sektion")
247 #endif
248 		    )) {
249 			if ( ! strcmp(val, "0"))
250 				*val = '\0';
251 			set_query_attr(&req->q.sec, &val);
252 		}
253 
254 		else if ( ! strcmp(key, "arch")) {
255 			if ( ! strcmp(val, "default"))
256 				*val = '\0';
257 			set_query_attr(&req->q.arch, &val);
258 		}
259 
260 		/*
261 		 * The key must be freed in any case.
262 		 * The val may have been handed over to the query
263 		 * structure, in which case it is now NULL.
264 		 */
265 next:
266 		free(key);
267 		key = NULL;
268 		free(val);
269 		val = NULL;
270 
271 		if (*qs != '\0')
272 			qs++;
273 	}
274 }
275 
276 /*
277  * HTTP-decode a string.  The standard explanation is that this turns
278  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
279  * over the allocated string.
280  */
281 static int
282 http_decode(char *p)
283 {
284 	char             hex[3];
285 	char		*q;
286 	int              c;
287 
288 	hex[2] = '\0';
289 
290 	q = p;
291 	for ( ; '\0' != *p; p++, q++) {
292 		if ('%' == *p) {
293 			if ('\0' == (hex[0] = *(p + 1)))
294 				return 0;
295 			if ('\0' == (hex[1] = *(p + 2)))
296 				return 0;
297 			if (1 != sscanf(hex, "%x", &c))
298 				return 0;
299 			if ('\0' == c)
300 				return 0;
301 
302 			*q = (char)c;
303 			p += 2;
304 		} else
305 			*q = '+' == *p ? ' ' : *p;
306 	}
307 
308 	*q = '\0';
309 	return 1;
310 }
311 
312 static void
313 resp_begin_http(int code, const char *msg)
314 {
315 
316 	if (200 != code)
317 		printf("Status: %d %s\r\n", code, msg);
318 
319 	printf("Content-Type: text/html; charset=utf-8\r\n"
320 	     "Cache-Control: no-cache\r\n"
321 	     "Pragma: no-cache\r\n"
322 	     "\r\n");
323 
324 	fflush(stdout);
325 }
326 
327 static void
328 resp_copy(const char *filename)
329 {
330 	char	 buf[4096];
331 	ssize_t	 sz;
332 	int	 fd;
333 
334 	if ((fd = open(filename, O_RDONLY)) != -1) {
335 		fflush(stdout);
336 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
337 			write(STDOUT_FILENO, buf, sz);
338 	}
339 }
340 
341 static void
342 resp_begin_html(int code, const char *msg)
343 {
344 
345 	resp_begin_http(code, msg);
346 
347 	printf("<!DOCTYPE html>\n"
348 	       "<html>\n"
349 	       "<head>\n"
350 	       "<meta charset=\"UTF-8\"/>\n"
351 	       "<link rel=\"stylesheet\" href=\"%s/mandoc.css\""
352 	       " type=\"text/css\" media=\"all\">\n"
353 	       "<title>%s</title>\n"
354 	       "</head>\n"
355 	       "<body>\n"
356 	       "<!-- Begin page content. //-->\n",
357 	       CSS_DIR, CUSTOMIZE_TITLE);
358 
359 	resp_copy(MAN_DIR "/header.html");
360 }
361 
362 static void
363 resp_end_html(void)
364 {
365 
366 	resp_copy(MAN_DIR "/footer.html");
367 
368 	puts("</body>\n"
369 	     "</html>");
370 }
371 
372 static void
373 resp_searchform(const struct req *req, enum focus focus)
374 {
375 	int		 i;
376 
377 	puts("<!-- Begin search form. //-->");
378 	printf("<div id=\"mancgi\">\n"
379 	       "<form action=\"/%s\" method=\"get\">\n"
380 	       "<fieldset>\n"
381 	       "<legend>Manual Page Search Parameters</legend>\n",
382 	       scriptname);
383 
384 	/* Write query input box. */
385 
386 	printf("<input type=\"text\" name=\"query\" value=\"");
387 	if (req->q.query != NULL)
388 		html_print(req->q.query);
389 	printf( "\" size=\"40\"");
390 	if (focus == FOCUS_QUERY)
391 		printf(" autofocus");
392 	puts(">");
393 
394 	/* Write submission buttons. */
395 
396 	printf(	"<button type=\"submit\" name=\"apropos\" value=\"0\">"
397 		"man</button>\n"
398 		"<button type=\"submit\" name=\"apropos\" value=\"1\">"
399 		"apropos</button>\n<br/>\n");
400 
401 	/* Write section selector. */
402 
403 	puts("<select name=\"sec\">");
404 	for (i = 0; i < sec_MAX; i++) {
405 		printf("<option value=\"%s\"", sec_numbers[i]);
406 		if (NULL != req->q.sec &&
407 		    0 == strcmp(sec_numbers[i], req->q.sec))
408 			printf(" selected=\"selected\"");
409 		printf(">%s</option>\n", sec_names[i]);
410 	}
411 	puts("</select>");
412 
413 	/* Write architecture selector. */
414 
415 	printf(	"<select name=\"arch\">\n"
416 		"<option value=\"default\"");
417 	if (NULL == req->q.arch)
418 		printf(" selected=\"selected\"");
419 	puts(">All Architectures</option>");
420 	for (i = 0; i < arch_MAX; i++) {
421 		printf("<option value=\"%s\"", arch_names[i]);
422 		if (NULL != req->q.arch &&
423 		    0 == strcmp(arch_names[i], req->q.arch))
424 			printf(" selected=\"selected\"");
425 		printf(">%s</option>\n", arch_names[i]);
426 	}
427 	puts("</select>");
428 
429 	/* Write manpath selector. */
430 
431 	if (req->psz > 1) {
432 		puts("<select name=\"manpath\">");
433 		for (i = 0; i < (int)req->psz; i++) {
434 			printf("<option ");
435 			if (strcmp(req->q.manpath, req->p[i]) == 0)
436 				printf("selected=\"selected\" ");
437 			printf("value=\"");
438 			html_print(req->p[i]);
439 			printf("\">");
440 			html_print(req->p[i]);
441 			puts("</option>");
442 		}
443 		puts("</select>");
444 	}
445 
446 	puts("</fieldset>\n"
447 	     "</form>\n"
448 	     "</div>");
449 	puts("<!-- End search form. //-->");
450 }
451 
452 static int
453 validate_urifrag(const char *frag)
454 {
455 
456 	while ('\0' != *frag) {
457 		if ( ! (isalnum((unsigned char)*frag) ||
458 		    '-' == *frag || '.' == *frag ||
459 		    '/' == *frag || '_' == *frag))
460 			return 0;
461 		frag++;
462 	}
463 	return 1;
464 }
465 
466 static int
467 validate_manpath(const struct req *req, const char* manpath)
468 {
469 	size_t	 i;
470 
471 	if ( ! strcmp(manpath, "mandoc"))
472 		return 1;
473 
474 	for (i = 0; i < req->psz; i++)
475 		if ( ! strcmp(manpath, req->p[i]))
476 			return 1;
477 
478 	return 0;
479 }
480 
481 static int
482 validate_filename(const char *file)
483 {
484 
485 	if ('.' == file[0] && '/' == file[1])
486 		file += 2;
487 
488 	return ! (strstr(file, "../") || strstr(file, "/..") ||
489 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
490 }
491 
492 static void
493 pg_index(const struct req *req)
494 {
495 
496 	resp_begin_html(200, NULL);
497 	resp_searchform(req, FOCUS_QUERY);
498 	printf("<p>\n"
499 	       "This web interface is documented in the\n"
500 	       "<a href=\"/%s%smandoc/man8/man.cgi.8\">man.cgi</a>\n"
501 	       "manual, and the\n"
502 	       "<a href=\"/%s%smandoc/man1/apropos.1\">apropos</a>\n"
503 	       "manual explains the query syntax.\n"
504 	       "</p>\n",
505 	       scriptname, *scriptname == '\0' ? "" : "/",
506 	       scriptname, *scriptname == '\0' ? "" : "/");
507 	resp_end_html();
508 }
509 
510 static void
511 pg_noresult(const struct req *req, const char *msg)
512 {
513 	resp_begin_html(200, NULL);
514 	resp_searchform(req, FOCUS_QUERY);
515 	puts("<p>");
516 	puts(msg);
517 	puts("</p>");
518 	resp_end_html();
519 }
520 
521 static void
522 pg_error_badrequest(const char *msg)
523 {
524 
525 	resp_begin_html(400, "Bad Request");
526 	puts("<h1>Bad Request</h1>\n"
527 	     "<p>\n");
528 	puts(msg);
529 	printf("Try again from the\n"
530 	       "<a href=\"/%s\">main page</a>.\n"
531 	       "</p>", scriptname);
532 	resp_end_html();
533 }
534 
535 static void
536 pg_error_internal(void)
537 {
538 	resp_begin_html(500, "Internal Server Error");
539 	puts("<p>Internal Server Error</p>");
540 	resp_end_html();
541 }
542 
543 static void
544 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
545 {
546 	char		*arch, *archend;
547 	const char	*sec;
548 	size_t		 i, iuse;
549 	int		 archprio, archpriouse;
550 	int		 prio, priouse;
551 
552 	for (i = 0; i < sz; i++) {
553 		if (validate_filename(r[i].file))
554 			continue;
555 		warnx("invalid filename %s in %s database",
556 		    r[i].file, req->q.manpath);
557 		pg_error_internal();
558 		return;
559 	}
560 
561 	if (req->isquery && sz == 1) {
562 		/*
563 		 * If we have just one result, then jump there now
564 		 * without any delay.
565 		 */
566 		printf("Status: 303 See Other\r\n");
567 		printf("Location: http://%s/%s%s%s/%s",
568 		    HTTP_HOST, scriptname,
569 		    *scriptname == '\0' ? "" : "/",
570 		    req->q.manpath, r[0].file);
571 		printf("\r\n"
572 		     "Content-Type: text/html; charset=utf-8\r\n"
573 		     "\r\n");
574 		return;
575 	}
576 
577 	resp_begin_html(200, NULL);
578 	resp_searchform(req,
579 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
580 
581 	if (sz > 1) {
582 		puts("<div class=\"results\">");
583 		puts("<table>");
584 
585 		for (i = 0; i < sz; i++) {
586 			printf("<tr>\n"
587 			       "<td class=\"title\">\n"
588 			       "<a href=\"/%s%s%s/%s",
589 			    scriptname, *scriptname == '\0' ? "" : "/",
590 			    req->q.manpath, r[i].file);
591 			printf("\">");
592 			html_print(r[i].names);
593 			printf("</a>\n"
594 			       "</td>\n"
595 			       "<td class=\"desc\">");
596 			html_print(r[i].output);
597 			puts("</td>\n"
598 			     "</tr>");
599 		}
600 
601 		puts("</table>\n"
602 		     "</div>");
603 	}
604 
605 	/*
606 	 * In man(1) mode, show one of the pages
607 	 * even if more than one is found.
608 	 */
609 
610 	if (req->q.equal || sz == 1) {
611 		puts("<hr>");
612 		iuse = 0;
613 		priouse = 20;
614 		archpriouse = 3;
615 		for (i = 0; i < sz; i++) {
616 			sec = r[i].file;
617 			sec += strcspn(sec, "123456789");
618 			if (sec[0] == '\0')
619 				continue;
620 			prio = sec_prios[sec[0] - '1'];
621 			if (sec[1] != '/')
622 				prio += 10;
623 			if (req->q.arch == NULL) {
624 				archprio =
625 				    ((arch = strchr(sec + 1, '/'))
626 					== NULL) ? 3 :
627 				    ((archend = strchr(arch + 1, '/'))
628 					== NULL) ? 0 :
629 				    strncmp(arch, "amd64/",
630 					archend - arch) ? 2 : 1;
631 				if (archprio < archpriouse) {
632 					archpriouse = archprio;
633 					priouse = prio;
634 					iuse = i;
635 					continue;
636 				}
637 				if (archprio > archpriouse)
638 					continue;
639 			}
640 			if (prio >= priouse)
641 				continue;
642 			priouse = prio;
643 			iuse = i;
644 		}
645 		resp_show(req, r[iuse].file);
646 	}
647 
648 	resp_end_html();
649 }
650 
651 static void
652 resp_catman(const struct req *req, const char *file)
653 {
654 	FILE		*f;
655 	char		*p;
656 	size_t		 sz;
657 	ssize_t		 len;
658 	int		 i;
659 	int		 italic, bold;
660 
661 	if ((f = fopen(file, "r")) == NULL) {
662 		puts("<p>You specified an invalid manual file.</p>");
663 		return;
664 	}
665 
666 	puts("<div class=\"catman\">\n"
667 	     "<pre>");
668 
669 	p = NULL;
670 	sz = 0;
671 
672 	while ((len = getline(&p, &sz, f)) != -1) {
673 		bold = italic = 0;
674 		for (i = 0; i < len - 1; i++) {
675 			/*
676 			 * This means that the catpage is out of state.
677 			 * Ignore it and keep going (although the
678 			 * catpage is bogus).
679 			 */
680 
681 			if ('\b' == p[i] || '\n' == p[i])
682 				continue;
683 
684 			/*
685 			 * Print a regular character.
686 			 * Close out any bold/italic scopes.
687 			 * If we're in back-space mode, make sure we'll
688 			 * have something to enter when we backspace.
689 			 */
690 
691 			if ('\b' != p[i + 1]) {
692 				if (italic)
693 					printf("</i>");
694 				if (bold)
695 					printf("</b>");
696 				italic = bold = 0;
697 				html_putchar(p[i]);
698 				continue;
699 			} else if (i + 2 >= len)
700 				continue;
701 
702 			/* Italic mode. */
703 
704 			if ('_' == p[i]) {
705 				if (bold)
706 					printf("</b>");
707 				if ( ! italic)
708 					printf("<i>");
709 				bold = 0;
710 				italic = 1;
711 				i += 2;
712 				html_putchar(p[i]);
713 				continue;
714 			}
715 
716 			/*
717 			 * Handle funny behaviour troff-isms.
718 			 * These grok'd from the original man2html.c.
719 			 */
720 
721 			if (('+' == p[i] && 'o' == p[i + 2]) ||
722 					('o' == p[i] && '+' == p[i + 2]) ||
723 					('|' == p[i] && '=' == p[i + 2]) ||
724 					('=' == p[i] && '|' == p[i + 2]) ||
725 					('*' == p[i] && '=' == p[i + 2]) ||
726 					('=' == p[i] && '*' == p[i + 2]) ||
727 					('*' == p[i] && '|' == p[i + 2]) ||
728 					('|' == p[i] && '*' == p[i + 2]))  {
729 				if (italic)
730 					printf("</i>");
731 				if (bold)
732 					printf("</b>");
733 				italic = bold = 0;
734 				putchar('*');
735 				i += 2;
736 				continue;
737 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
738 					('-' == p[i] && '|' == p[i + 1]) ||
739 					('+' == p[i] && '-' == p[i + 1]) ||
740 					('-' == p[i] && '+' == p[i + 1]) ||
741 					('+' == p[i] && '|' == p[i + 1]) ||
742 					('|' == p[i] && '+' == p[i + 1]))  {
743 				if (italic)
744 					printf("</i>");
745 				if (bold)
746 					printf("</b>");
747 				italic = bold = 0;
748 				putchar('+');
749 				i += 2;
750 				continue;
751 			}
752 
753 			/* Bold mode. */
754 
755 			if (italic)
756 				printf("</i>");
757 			if ( ! bold)
758 				printf("<b>");
759 			bold = 1;
760 			italic = 0;
761 			i += 2;
762 			html_putchar(p[i]);
763 		}
764 
765 		/*
766 		 * Clean up the last character.
767 		 * We can get to a newline; don't print that.
768 		 */
769 
770 		if (italic)
771 			printf("</i>");
772 		if (bold)
773 			printf("</b>");
774 
775 		if (i == len - 1 && p[i] != '\n')
776 			html_putchar(p[i]);
777 
778 		putchar('\n');
779 	}
780 	free(p);
781 
782 	puts("</pre>\n"
783 	     "</div>");
784 
785 	fclose(f);
786 }
787 
788 static void
789 resp_format(const struct req *req, const char *file)
790 {
791 	struct manoutput conf;
792 	struct mparse	*mp;
793 	struct roff_man	*man;
794 	void		*vp;
795 	int		 fd;
796 	int		 usepath;
797 
798 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
799 		puts("<p>You specified an invalid manual file.</p>");
800 		return;
801 	}
802 
803 	mchars_alloc();
804 	mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, req->q.manpath);
805 	mparse_readfd(mp, fd, file);
806 	close(fd);
807 
808 	memset(&conf, 0, sizeof(conf));
809 	conf.fragment = 1;
810 	usepath = strcmp(req->q.manpath, req->p[0]);
811 	mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
812 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
813 
814 	mparse_result(mp, &man, NULL);
815 	if (man == NULL) {
816 		warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
817 		pg_error_internal();
818 		mparse_free(mp);
819 		mchars_free();
820 		return;
821 	}
822 
823 	vp = html_alloc(&conf);
824 
825 	if (man->macroset == MACROSET_MDOC) {
826 		mdoc_validate(man);
827 		html_mdoc(vp, man);
828 	} else {
829 		man_validate(man);
830 		html_man(vp, man);
831 	}
832 
833 	html_free(vp);
834 	mparse_free(mp);
835 	mchars_free();
836 	free(conf.man);
837 }
838 
839 static void
840 resp_show(const struct req *req, const char *file)
841 {
842 
843 	if ('.' == file[0] && '/' == file[1])
844 		file += 2;
845 
846 	if ('c' == *file)
847 		resp_catman(req, file);
848 	else
849 		resp_format(req, file);
850 }
851 
852 static void
853 pg_show(struct req *req, const char *fullpath)
854 {
855 	char		*manpath;
856 	const char	*file;
857 
858 	if ((file = strchr(fullpath, '/')) == NULL) {
859 		pg_error_badrequest(
860 		    "You did not specify a page to show.");
861 		return;
862 	}
863 	manpath = mandoc_strndup(fullpath, file - fullpath);
864 	file++;
865 
866 	if ( ! validate_manpath(req, manpath)) {
867 		pg_error_badrequest(
868 		    "You specified an invalid manpath.");
869 		free(manpath);
870 		return;
871 	}
872 
873 	/*
874 	 * Begin by chdir()ing into the manpath.
875 	 * This way we can pick up the database files, which are
876 	 * relative to the manpath root.
877 	 */
878 
879 	if (chdir(manpath) == -1) {
880 		warn("chdir %s", manpath);
881 		pg_error_internal();
882 		free(manpath);
883 		return;
884 	}
885 
886 	if (strcmp(manpath, "mandoc")) {
887 		free(req->q.manpath);
888 		req->q.manpath = manpath;
889 	} else
890 		free(manpath);
891 
892 	if ( ! validate_filename(file)) {
893 		pg_error_badrequest(
894 		    "You specified an invalid manual file.");
895 		return;
896 	}
897 
898 	resp_begin_html(200, NULL);
899 	resp_searchform(req, FOCUS_NONE);
900 	resp_show(req, file);
901 	resp_end_html();
902 }
903 
904 static void
905 pg_search(const struct req *req)
906 {
907 	struct mansearch	  search;
908 	struct manpaths		  paths;
909 	struct manpage		 *res;
910 	char			**argv;
911 	char			 *query, *rp, *wp;
912 	size_t			  ressz;
913 	int			  argc;
914 
915 	/*
916 	 * Begin by chdir()ing into the root of the manpath.
917 	 * This way we can pick up the database files, which are
918 	 * relative to the manpath root.
919 	 */
920 
921 	if (chdir(req->q.manpath) == -1) {
922 		warn("chdir %s", req->q.manpath);
923 		pg_error_internal();
924 		return;
925 	}
926 
927 	search.arch = req->q.arch;
928 	search.sec = req->q.sec;
929 	search.outkey = "Nd";
930 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
931 	search.firstmatch = 1;
932 
933 	paths.sz = 1;
934 	paths.paths = mandoc_malloc(sizeof(char *));
935 	paths.paths[0] = mandoc_strdup(".");
936 
937 	/*
938 	 * Break apart at spaces with backslash-escaping.
939 	 */
940 
941 	argc = 0;
942 	argv = NULL;
943 	rp = query = mandoc_strdup(req->q.query);
944 	for (;;) {
945 		while (isspace((unsigned char)*rp))
946 			rp++;
947 		if (*rp == '\0')
948 			break;
949 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
950 		argv[argc++] = wp = rp;
951 		for (;;) {
952 			if (isspace((unsigned char)*rp)) {
953 				*wp = '\0';
954 				rp++;
955 				break;
956 			}
957 			if (rp[0] == '\\' && rp[1] != '\0')
958 				rp++;
959 			if (wp != rp)
960 				*wp = *rp;
961 			if (*rp == '\0')
962 				break;
963 			wp++;
964 			rp++;
965 		}
966 	}
967 
968 	if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
969 		pg_noresult(req, "You entered an invalid query.");
970 	else if (0 == ressz)
971 		pg_noresult(req, "No results found.");
972 	else
973 		pg_searchres(req, res, ressz);
974 
975 	free(query);
976 	mansearch_free(res, ressz);
977 	free(paths.paths[0]);
978 	free(paths.paths);
979 }
980 
981 int
982 main(void)
983 {
984 	struct req	 req;
985 	struct itimerval itimer;
986 	const char	*path;
987 	const char	*querystring;
988 	int		 i;
989 
990 	/* Poor man's ReDoS mitigation. */
991 
992 	itimer.it_value.tv_sec = 2;
993 	itimer.it_value.tv_usec = 0;
994 	itimer.it_interval.tv_sec = 2;
995 	itimer.it_interval.tv_usec = 0;
996 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
997 		warn("setitimer");
998 		pg_error_internal();
999 		return EXIT_FAILURE;
1000 	}
1001 
1002 	/*
1003 	 * First we change directory into the MAN_DIR so that
1004 	 * subsequent scanning for manpath directories is rooted
1005 	 * relative to the same position.
1006 	 */
1007 
1008 	if (chdir(MAN_DIR) == -1) {
1009 		warn("MAN_DIR: %s", MAN_DIR);
1010 		pg_error_internal();
1011 		return EXIT_FAILURE;
1012 	}
1013 
1014 	memset(&req, 0, sizeof(struct req));
1015 	req.q.equal = 1;
1016 	parse_manpath_conf(&req);
1017 
1018 	/* Parse the path info and the query string. */
1019 
1020 	if ((path = getenv("PATH_INFO")) == NULL)
1021 		path = "";
1022 	else if (*path == '/')
1023 		path++;
1024 
1025 	if (*path != '\0') {
1026 		parse_path_info(&req, path);
1027 		if (access(path, F_OK) == -1)
1028 			path = "";
1029 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1030 		parse_query_string(&req, querystring);
1031 
1032 	/* Validate parsed data and add defaults. */
1033 
1034 	if (req.q.manpath == NULL)
1035 		req.q.manpath = mandoc_strdup(req.p[0]);
1036 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1037 		pg_error_badrequest(
1038 		    "You specified an invalid manpath.");
1039 		return EXIT_FAILURE;
1040 	}
1041 
1042 	if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1043 		pg_error_badrequest(
1044 		    "You specified an invalid architecture.");
1045 		return EXIT_FAILURE;
1046 	}
1047 
1048 	/* Dispatch to the three different pages. */
1049 
1050 	if ('\0' != *path)
1051 		pg_show(&req, path);
1052 	else if (NULL != req.q.query)
1053 		pg_search(&req);
1054 	else
1055 		pg_index(&req);
1056 
1057 	free(req.q.manpath);
1058 	free(req.q.arch);
1059 	free(req.q.sec);
1060 	free(req.q.query);
1061 	for (i = 0; i < (int)req.psz; i++)
1062 		free(req.p[i]);
1063 	free(req.p);
1064 	return EXIT_SUCCESS;
1065 }
1066 
1067 /*
1068  * If PATH_INFO is not a file name, translate it to a query.
1069  */
1070 static void
1071 parse_path_info(struct req *req, const char *path)
1072 {
1073 	char	*dir;
1074 
1075 	req->isquery = 0;
1076 	req->q.equal = 1;
1077 	req->q.manpath = mandoc_strdup(path);
1078 
1079 	/* Mandatory manual page name. */
1080 	if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1081 		req->q.query = req->q.manpath;
1082 		req->q.manpath = NULL;
1083 	} else
1084 		*req->q.query++ = '\0';
1085 
1086 	/* Optional trailing section. */
1087 	if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1088 		if(isdigit((unsigned char)req->q.sec[1])) {
1089 			*req->q.sec++ = '\0';
1090 			req->q.sec = mandoc_strdup(req->q.sec);
1091 		} else
1092 			req->q.sec = NULL;
1093 	}
1094 
1095 	/* Handle the case of name[.section] only. */
1096 	if (req->q.manpath == NULL) {
1097 		req->q.arch = NULL;
1098 		return;
1099 	}
1100 	req->q.query = mandoc_strdup(req->q.query);
1101 
1102 	/* Optional architecture. */
1103 	dir = strrchr(req->q.manpath, '/');
1104 	if (dir != NULL && strncmp(dir + 1, "man", 3) != 0) {
1105 		*dir++ = '\0';
1106 		req->q.arch = mandoc_strdup(dir);
1107 		dir = strrchr(req->q.manpath, '/');
1108 	} else
1109 		req->q.arch = NULL;
1110 
1111 	/* Optional directory name. */
1112 	if (dir != NULL && strncmp(dir + 1, "man", 3) == 0) {
1113 		*dir++ = '\0';
1114 		free(req->q.sec);
1115 		req->q.sec = mandoc_strdup(dir + 3);
1116 	}
1117 }
1118 
1119 /*
1120  * Scan for indexable paths.
1121  */
1122 static void
1123 parse_manpath_conf(struct req *req)
1124 {
1125 	FILE	*fp;
1126 	char	*dp;
1127 	size_t	 dpsz;
1128 	ssize_t	 len;
1129 
1130 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1131 		warn("%s/manpath.conf", MAN_DIR);
1132 		pg_error_internal();
1133 		exit(EXIT_FAILURE);
1134 	}
1135 
1136 	dp = NULL;
1137 	dpsz = 0;
1138 
1139 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1140 		if (dp[len - 1] == '\n')
1141 			dp[--len] = '\0';
1142 		req->p = mandoc_realloc(req->p,
1143 		    (req->psz + 1) * sizeof(char *));
1144 		if ( ! validate_urifrag(dp)) {
1145 			warnx("%s/manpath.conf contains "
1146 			    "unsafe path \"%s\"", MAN_DIR, dp);
1147 			pg_error_internal();
1148 			exit(EXIT_FAILURE);
1149 		}
1150 		if (strchr(dp, '/') != NULL) {
1151 			warnx("%s/manpath.conf contains "
1152 			    "path with slash \"%s\"", MAN_DIR, dp);
1153 			pg_error_internal();
1154 			exit(EXIT_FAILURE);
1155 		}
1156 		req->p[req->psz++] = dp;
1157 		dp = NULL;
1158 		dpsz = 0;
1159 	}
1160 	free(dp);
1161 
1162 	if (req->p == NULL) {
1163 		warnx("%s/manpath.conf is empty", MAN_DIR);
1164 		pg_error_internal();
1165 		exit(EXIT_FAILURE);
1166 	}
1167 }
1168