xref: /netbsd-src/external/bsd/mdocml/dist/cgi.c (revision 796c32c94f6e154afc9de0f63da35c91bb739b45)
1 /*	Id: cgi.c,v 1.147 2017/02/08 13:34:27 schwarze Exp  */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 #include <sys/time.h>
22 
23 #include <ctype.h>
24 #if HAVE_ERR
25 #include <err.h>
26 #endif
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <limits.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35 
36 #include "mandoc_aux.h"
37 #include "mandoc.h"
38 #include "roff.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "main.h"
42 #include "manconf.h"
43 #include "mansearch.h"
44 #include "cgi.h"
45 
46 /*
47  * A query as passed to the search function.
48  */
49 struct	query {
50 	char		*manpath; /* desired manual directory */
51 	char		*arch; /* architecture */
52 	char		*sec; /* manual section */
53 	char		*query; /* unparsed query expression */
54 	int		 equal; /* match whole names, not substrings */
55 };
56 
57 struct	req {
58 	struct query	  q;
59 	char		**p; /* array of available manpaths */
60 	size_t		  psz; /* number of available manpaths */
61 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
62 };
63 
64 enum	focus {
65 	FOCUS_NONE = 0,
66 	FOCUS_QUERY
67 };
68 
69 static	void		 html_print(const char *);
70 static	void		 html_putchar(char);
71 static	int		 http_decode(char *);
72 static	void		 parse_manpath_conf(struct req *);
73 static	void		 parse_path_info(struct req *req, const char *path);
74 static	void		 parse_query_string(struct req *, const char *);
75 static	void		 pg_error_badrequest(const char *);
76 static	void		 pg_error_internal(void);
77 static	void		 pg_index(const struct req *);
78 static	void		 pg_noresult(const struct req *, const char *);
79 static	void		 pg_search(const struct req *);
80 static	void		 pg_searchres(const struct req *,
81 				struct manpage *, size_t);
82 static	void		 pg_show(struct req *, const char *);
83 static	void		 resp_begin_html(int, const char *);
84 static	void		 resp_begin_http(int, const char *);
85 static	void		 resp_catman(const struct req *, const char *);
86 static	void		 resp_copy(const char *);
87 static	void		 resp_end_html(void);
88 static	void		 resp_format(const struct req *, const char *);
89 static	void		 resp_searchform(const struct req *, enum focus);
90 static	void		 resp_show(const struct req *, const char *);
91 static	void		 set_query_attr(char **, char **);
92 static	int		 validate_filename(const char *);
93 static	int		 validate_manpath(const struct req *, const char *);
94 static	int		 validate_urifrag(const char *);
95 
96 static	const char	 *scriptname = SCRIPT_NAME;
97 
98 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
99 static	const char *const sec_numbers[] = {
100     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
101 };
102 static	const char *const sec_names[] = {
103     "All Sections",
104     "1 - General Commands",
105     "2 - System Calls",
106     "3 - Library Functions",
107     "3p - Perl Library",
108     "4 - Device Drivers",
109     "5 - File Formats",
110     "6 - Games",
111     "7 - Miscellaneous Information",
112     "8 - System Manager\'s Manual",
113     "9 - Kernel Developer\'s Manual"
114 };
115 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
116 
117 static	const char *const arch_names[] = {
118     "amd64",       "alpha",       "armv7",	"arm64",
119     "hppa",        "i386",        "landisk",
120     "loongson",    "luna88k",     "macppc",      "mips64",
121     "octeon",      "sgi",         "socppc",      "sparc64",
122     "amiga",       "arc",         "armish",      "arm32",
123     "atari",       "aviion",      "beagle",      "cats",
124     "hppa64",      "hp300",
125     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
126     "mvmeppc",     "palm",        "pc532",       "pegasos",
127     "pmax",        "powerpc",     "solbourne",   "sparc",
128     "sun3",        "vax",         "wgrisc",      "x68k",
129     "zaurus"
130 };
131 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
132 
133 /*
134  * Print a character, escaping HTML along the way.
135  * This will pass non-ASCII straight to output: be warned!
136  */
137 static void
138 html_putchar(char c)
139 {
140 
141 	switch (c) {
142 	case ('"'):
143 		printf("&quot;");
144 		break;
145 	case ('&'):
146 		printf("&amp;");
147 		break;
148 	case ('>'):
149 		printf("&gt;");
150 		break;
151 	case ('<'):
152 		printf("&lt;");
153 		break;
154 	default:
155 		putchar((unsigned char)c);
156 		break;
157 	}
158 }
159 
160 /*
161  * Call through to html_putchar().
162  * Accepts NULL strings.
163  */
164 static void
165 html_print(const char *p)
166 {
167 
168 	if (NULL == p)
169 		return;
170 	while ('\0' != *p)
171 		html_putchar(*p++);
172 }
173 
174 /*
175  * Transfer the responsibility for the allocated string *val
176  * to the query structure.
177  */
178 static void
179 set_query_attr(char **attr, char **val)
180 {
181 
182 	free(*attr);
183 	if (**val == '\0') {
184 		*attr = NULL;
185 		free(*val);
186 	} else
187 		*attr = *val;
188 	*val = NULL;
189 }
190 
191 /*
192  * Parse the QUERY_STRING for key-value pairs
193  * and store the values into the query structure.
194  */
195 static void
196 parse_query_string(struct req *req, const char *qs)
197 {
198 	char		*key, *val;
199 	size_t		 keysz, valsz;
200 
201 	req->isquery	= 1;
202 	req->q.manpath	= NULL;
203 	req->q.arch	= NULL;
204 	req->q.sec	= NULL;
205 	req->q.query	= NULL;
206 	req->q.equal	= 1;
207 
208 	key = val = NULL;
209 	while (*qs != '\0') {
210 
211 		/* Parse one key. */
212 
213 		keysz = strcspn(qs, "=;&");
214 		key = mandoc_strndup(qs, keysz);
215 		qs += keysz;
216 		if (*qs != '=')
217 			goto next;
218 
219 		/* Parse one value. */
220 
221 		valsz = strcspn(++qs, ";&");
222 		val = mandoc_strndup(qs, valsz);
223 		qs += valsz;
224 
225 		/* Decode and catch encoding errors. */
226 
227 		if ( ! (http_decode(key) && http_decode(val)))
228 			goto next;
229 
230 		/* Handle key-value pairs. */
231 
232 		if ( ! strcmp(key, "query"))
233 			set_query_attr(&req->q.query, &val);
234 
235 		else if ( ! strcmp(key, "apropos"))
236 			req->q.equal = !strcmp(val, "0");
237 
238 		else if ( ! strcmp(key, "manpath")) {
239 #ifdef COMPAT_OLDURI
240 			if ( ! strncmp(val, "OpenBSD ", 8)) {
241 				val[7] = '-';
242 				if ('C' == val[8])
243 					val[8] = 'c';
244 			}
245 #endif
246 			set_query_attr(&req->q.manpath, &val);
247 		}
248 
249 		else if ( ! (strcmp(key, "sec")
250 #ifdef COMPAT_OLDURI
251 		    && strcmp(key, "sektion")
252 #endif
253 		    )) {
254 			if ( ! strcmp(val, "0"))
255 				*val = '\0';
256 			set_query_attr(&req->q.sec, &val);
257 		}
258 
259 		else if ( ! strcmp(key, "arch")) {
260 			if ( ! strcmp(val, "default"))
261 				*val = '\0';
262 			set_query_attr(&req->q.arch, &val);
263 		}
264 
265 		/*
266 		 * The key must be freed in any case.
267 		 * The val may have been handed over to the query
268 		 * structure, in which case it is now NULL.
269 		 */
270 next:
271 		free(key);
272 		key = NULL;
273 		free(val);
274 		val = NULL;
275 
276 		if (*qs != '\0')
277 			qs++;
278 	}
279 }
280 
281 /*
282  * HTTP-decode a string.  The standard explanation is that this turns
283  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
284  * over the allocated string.
285  */
286 static int
287 http_decode(char *p)
288 {
289 	char             hex[3];
290 	char		*q;
291 	int              c;
292 
293 	hex[2] = '\0';
294 
295 	q = p;
296 	for ( ; '\0' != *p; p++, q++) {
297 		if ('%' == *p) {
298 			if ('\0' == (hex[0] = *(p + 1)))
299 				return 0;
300 			if ('\0' == (hex[1] = *(p + 2)))
301 				return 0;
302 			if (1 != sscanf(hex, "%x", &c))
303 				return 0;
304 			if ('\0' == c)
305 				return 0;
306 
307 			*q = (char)c;
308 			p += 2;
309 		} else
310 			*q = '+' == *p ? ' ' : *p;
311 	}
312 
313 	*q = '\0';
314 	return 1;
315 }
316 
317 static void
318 resp_begin_http(int code, const char *msg)
319 {
320 
321 	if (200 != code)
322 		printf("Status: %d %s\r\n", code, msg);
323 
324 	printf("Content-Type: text/html; charset=utf-8\r\n"
325 	     "Cache-Control: no-cache\r\n"
326 	     "Pragma: no-cache\r\n"
327 	     "\r\n");
328 
329 	fflush(stdout);
330 }
331 
332 static void
333 resp_copy(const char *filename)
334 {
335 	char	 buf[4096];
336 	ssize_t	 sz;
337 	int	 fd;
338 
339 	if ((fd = open(filename, O_RDONLY)) != -1) {
340 		fflush(stdout);
341 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
342 			write(STDOUT_FILENO, buf, sz);
343 		close(fd);
344 	}
345 }
346 
347 static void
348 resp_begin_html(int code, const char *msg)
349 {
350 
351 	resp_begin_http(code, msg);
352 
353 	printf("<!DOCTYPE html>\n"
354 	       "<html>\n"
355 	       "<head>\n"
356 	       "  <meta charset=\"UTF-8\"/>\n"
357 	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
358 	       " type=\"text/css\" media=\"all\">\n"
359 	       "  <title>%s</title>\n"
360 	       "</head>\n"
361 	       "<body>\n",
362 	       CSS_DIR, CUSTOMIZE_TITLE);
363 
364 	resp_copy(MAN_DIR "/header.html");
365 }
366 
367 static void
368 resp_end_html(void)
369 {
370 
371 	resp_copy(MAN_DIR "/footer.html");
372 
373 	puts("</body>\n"
374 	     "</html>");
375 }
376 
377 static void
378 resp_searchform(const struct req *req, enum focus focus)
379 {
380 	int		 i;
381 
382 	printf("<form action=\"/%s\" method=\"get\">\n"
383 	       "  <fieldset>\n"
384 	       "    <legend>Manual Page Search Parameters</legend>\n",
385 	       scriptname);
386 
387 	/* Write query input box. */
388 
389 	printf("    <input type=\"text\" name=\"query\" value=\"");
390 	if (req->q.query != NULL)
391 		html_print(req->q.query);
392 	printf( "\" size=\"40\"");
393 	if (focus == FOCUS_QUERY)
394 		printf(" autofocus");
395 	puts(">");
396 
397 	/* Write submission buttons. */
398 
399 	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
400 		"man</button>\n"
401 		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
402 		"apropos</button>\n"
403 		"    <br/>\n");
404 
405 	/* Write section selector. */
406 
407 	puts("    <select name=\"sec\">");
408 	for (i = 0; i < sec_MAX; i++) {
409 		printf("      <option value=\"%s\"", sec_numbers[i]);
410 		if (NULL != req->q.sec &&
411 		    0 == strcmp(sec_numbers[i], req->q.sec))
412 			printf(" selected=\"selected\"");
413 		printf(">%s</option>\n", sec_names[i]);
414 	}
415 	puts("    </select>");
416 
417 	/* Write architecture selector. */
418 
419 	printf(	"    <select name=\"arch\">\n"
420 		"      <option value=\"default\"");
421 	if (NULL == req->q.arch)
422 		printf(" selected=\"selected\"");
423 	puts(">All Architectures</option>");
424 	for (i = 0; i < arch_MAX; i++) {
425 		printf("      <option value=\"%s\"", arch_names[i]);
426 		if (NULL != req->q.arch &&
427 		    0 == strcmp(arch_names[i], req->q.arch))
428 			printf(" selected=\"selected\"");
429 		printf(">%s</option>\n", arch_names[i]);
430 	}
431 	puts("    </select>");
432 
433 	/* Write manpath selector. */
434 
435 	if (req->psz > 1) {
436 		puts("    <select name=\"manpath\">");
437 		for (i = 0; i < (int)req->psz; i++) {
438 			printf("      <option ");
439 			if (strcmp(req->q.manpath, req->p[i]) == 0)
440 				printf("selected=\"selected\" ");
441 			printf("value=\"");
442 			html_print(req->p[i]);
443 			printf("\">");
444 			html_print(req->p[i]);
445 			puts("</option>");
446 		}
447 		puts("    </select>");
448 	}
449 
450 	puts("  </fieldset>\n"
451 	     "</form>");
452 }
453 
454 static int
455 validate_urifrag(const char *frag)
456 {
457 
458 	while ('\0' != *frag) {
459 		if ( ! (isalnum((unsigned char)*frag) ||
460 		    '-' == *frag || '.' == *frag ||
461 		    '/' == *frag || '_' == *frag))
462 			return 0;
463 		frag++;
464 	}
465 	return 1;
466 }
467 
468 static int
469 validate_manpath(const struct req *req, const char* manpath)
470 {
471 	size_t	 i;
472 
473 	for (i = 0; i < req->psz; i++)
474 		if ( ! strcmp(manpath, req->p[i]))
475 			return 1;
476 
477 	return 0;
478 }
479 
480 static int
481 validate_filename(const char *file)
482 {
483 
484 	if ('.' == file[0] && '/' == file[1])
485 		file += 2;
486 
487 	return ! (strstr(file, "../") || strstr(file, "/..") ||
488 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
489 }
490 
491 static void
492 pg_index(const struct req *req)
493 {
494 
495 	resp_begin_html(200, NULL);
496 	resp_searchform(req, FOCUS_QUERY);
497 	printf("<p>\n"
498 	       "This web interface is documented in the\n"
499 	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
500 	       "manual, and the\n"
501 	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
502 	       "manual explains the query syntax.\n"
503 	       "</p>\n",
504 	       scriptname, *scriptname == '\0' ? "" : "/",
505 	       scriptname, *scriptname == '\0' ? "" : "/");
506 	resp_end_html();
507 }
508 
509 static void
510 pg_noresult(const struct req *req, const char *msg)
511 {
512 	resp_begin_html(200, NULL);
513 	resp_searchform(req, FOCUS_QUERY);
514 	puts("<p>");
515 	puts(msg);
516 	puts("</p>");
517 	resp_end_html();
518 }
519 
520 static void
521 pg_error_badrequest(const char *msg)
522 {
523 
524 	resp_begin_html(400, "Bad Request");
525 	puts("<h1>Bad Request</h1>\n"
526 	     "<p>\n");
527 	puts(msg);
528 	printf("Try again from the\n"
529 	       "<a href=\"/%s\">main page</a>.\n"
530 	       "</p>", scriptname);
531 	resp_end_html();
532 }
533 
534 static void
535 pg_error_internal(void)
536 {
537 	resp_begin_html(500, "Internal Server Error");
538 	puts("<p>Internal Server Error</p>");
539 	resp_end_html();
540 }
541 
542 static void
543 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
544 {
545 	char		*arch, *archend;
546 	const char	*sec;
547 	size_t		 i, iuse;
548 	int		 archprio, archpriouse;
549 	int		 prio, priouse;
550 
551 	for (i = 0; i < sz; i++) {
552 		if (validate_filename(r[i].file))
553 			continue;
554 		warnx("invalid filename %s in %s database",
555 		    r[i].file, req->q.manpath);
556 		pg_error_internal();
557 		return;
558 	}
559 
560 	if (req->isquery && sz == 1) {
561 		/*
562 		 * If we have just one result, then jump there now
563 		 * without any delay.
564 		 */
565 		printf("Status: 303 See Other\r\n");
566 		printf("Location: http://%s/%s%s%s/%s",
567 		    HTTP_HOST, scriptname,
568 		    *scriptname == '\0' ? "" : "/",
569 		    req->q.manpath, r[0].file);
570 		printf("\r\n"
571 		     "Content-Type: text/html; charset=utf-8\r\n"
572 		     "\r\n");
573 		return;
574 	}
575 
576 	resp_begin_html(200, NULL);
577 	resp_searchform(req,
578 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
579 
580 	if (sz > 1) {
581 		puts("<table class=\"results\">");
582 		for (i = 0; i < sz; i++) {
583 			printf("  <tr>\n"
584 			       "    <td>"
585 			       "<a class=\"Xr\" href=\"/%s%s%s/%s\">",
586 			    scriptname, *scriptname == '\0' ? "" : "/",
587 			    req->q.manpath, r[i].file);
588 			html_print(r[i].names);
589 			printf("</a></td>\n"
590 			       "    <td><span class=\"Nd\">");
591 			html_print(r[i].output);
592 			puts("</span></td>\n"
593 			     "  </tr>");
594 		}
595 		puts("</table>");
596 	}
597 
598 	/*
599 	 * In man(1) mode, show one of the pages
600 	 * even if more than one is found.
601 	 */
602 
603 	if (req->q.equal || sz == 1) {
604 		puts("<hr>");
605 		iuse = 0;
606 		priouse = 20;
607 		archpriouse = 3;
608 		for (i = 0; i < sz; i++) {
609 			sec = r[i].file;
610 			sec += strcspn(sec, "123456789");
611 			if (sec[0] == '\0')
612 				continue;
613 			prio = sec_prios[sec[0] - '1'];
614 			if (sec[1] != '/')
615 				prio += 10;
616 			if (req->q.arch == NULL) {
617 				archprio =
618 				    ((arch = strchr(sec + 1, '/'))
619 					== NULL) ? 3 :
620 				    ((archend = strchr(arch + 1, '/'))
621 					== NULL) ? 0 :
622 				    strncmp(arch, "amd64/",
623 					archend - arch) ? 2 : 1;
624 				if (archprio < archpriouse) {
625 					archpriouse = archprio;
626 					priouse = prio;
627 					iuse = i;
628 					continue;
629 				}
630 				if (archprio > archpriouse)
631 					continue;
632 			}
633 			if (prio >= priouse)
634 				continue;
635 			priouse = prio;
636 			iuse = i;
637 		}
638 		resp_show(req, r[iuse].file);
639 	}
640 
641 	resp_end_html();
642 }
643 
644 static void
645 resp_catman(const struct req *req, const char *file)
646 {
647 	FILE		*f;
648 	char		*p;
649 	size_t		 sz;
650 	ssize_t		 len;
651 	int		 i;
652 	int		 italic, bold;
653 
654 	if ((f = fopen(file, "r")) == NULL) {
655 		puts("<p>You specified an invalid manual file.</p>");
656 		return;
657 	}
658 
659 	puts("<div class=\"catman\">\n"
660 	     "<pre>");
661 
662 	p = NULL;
663 	sz = 0;
664 
665 	while ((len = getline(&p, &sz, f)) != -1) {
666 		bold = italic = 0;
667 		for (i = 0; i < len - 1; i++) {
668 			/*
669 			 * This means that the catpage is out of state.
670 			 * Ignore it and keep going (although the
671 			 * catpage is bogus).
672 			 */
673 
674 			if ('\b' == p[i] || '\n' == p[i])
675 				continue;
676 
677 			/*
678 			 * Print a regular character.
679 			 * Close out any bold/italic scopes.
680 			 * If we're in back-space mode, make sure we'll
681 			 * have something to enter when we backspace.
682 			 */
683 
684 			if ('\b' != p[i + 1]) {
685 				if (italic)
686 					printf("</i>");
687 				if (bold)
688 					printf("</b>");
689 				italic = bold = 0;
690 				html_putchar(p[i]);
691 				continue;
692 			} else if (i + 2 >= len)
693 				continue;
694 
695 			/* Italic mode. */
696 
697 			if ('_' == p[i]) {
698 				if (bold)
699 					printf("</b>");
700 				if ( ! italic)
701 					printf("<i>");
702 				bold = 0;
703 				italic = 1;
704 				i += 2;
705 				html_putchar(p[i]);
706 				continue;
707 			}
708 
709 			/*
710 			 * Handle funny behaviour troff-isms.
711 			 * These grok'd from the original man2html.c.
712 			 */
713 
714 			if (('+' == p[i] && 'o' == p[i + 2]) ||
715 					('o' == p[i] && '+' == p[i + 2]) ||
716 					('|' == p[i] && '=' == p[i + 2]) ||
717 					('=' == p[i] && '|' == p[i + 2]) ||
718 					('*' == p[i] && '=' == p[i + 2]) ||
719 					('=' == p[i] && '*' == p[i + 2]) ||
720 					('*' == p[i] && '|' == p[i + 2]) ||
721 					('|' == p[i] && '*' == p[i + 2]))  {
722 				if (italic)
723 					printf("</i>");
724 				if (bold)
725 					printf("</b>");
726 				italic = bold = 0;
727 				putchar('*');
728 				i += 2;
729 				continue;
730 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
731 					('-' == p[i] && '|' == p[i + 1]) ||
732 					('+' == p[i] && '-' == p[i + 1]) ||
733 					('-' == p[i] && '+' == p[i + 1]) ||
734 					('+' == p[i] && '|' == p[i + 1]) ||
735 					('|' == p[i] && '+' == p[i + 1]))  {
736 				if (italic)
737 					printf("</i>");
738 				if (bold)
739 					printf("</b>");
740 				italic = bold = 0;
741 				putchar('+');
742 				i += 2;
743 				continue;
744 			}
745 
746 			/* Bold mode. */
747 
748 			if (italic)
749 				printf("</i>");
750 			if ( ! bold)
751 				printf("<b>");
752 			bold = 1;
753 			italic = 0;
754 			i += 2;
755 			html_putchar(p[i]);
756 		}
757 
758 		/*
759 		 * Clean up the last character.
760 		 * We can get to a newline; don't print that.
761 		 */
762 
763 		if (italic)
764 			printf("</i>");
765 		if (bold)
766 			printf("</b>");
767 
768 		if (i == len - 1 && p[i] != '\n')
769 			html_putchar(p[i]);
770 
771 		putchar('\n');
772 	}
773 	free(p);
774 
775 	puts("</pre>\n"
776 	     "</div>");
777 
778 	fclose(f);
779 }
780 
781 static void
782 resp_format(const struct req *req, const char *file)
783 {
784 	struct manoutput conf;
785 	struct mparse	*mp;
786 	struct roff_man	*man;
787 	void		*vp;
788 	int		 fd;
789 	int		 usepath;
790 
791 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
792 		puts("<p>You specified an invalid manual file.</p>");
793 		return;
794 	}
795 
796 	mchars_alloc();
797 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
798 	    MANDOCLEVEL_BADARG, NULL, req->q.manpath);
799 	mparse_readfd(mp, fd, file);
800 	close(fd);
801 
802 	memset(&conf, 0, sizeof(conf));
803 	conf.fragment = 1;
804 	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
805 	usepath = strcmp(req->q.manpath, req->p[0]);
806 	mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
807 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
808 
809 	mparse_result(mp, &man, NULL);
810 	if (man == NULL) {
811 		warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
812 		pg_error_internal();
813 		mparse_free(mp);
814 		mchars_free();
815 		return;
816 	}
817 
818 	vp = html_alloc(&conf);
819 
820 	if (man->macroset == MACROSET_MDOC) {
821 		mdoc_validate(man);
822 		html_mdoc(vp, man);
823 	} else {
824 		man_validate(man);
825 		html_man(vp, man);
826 	}
827 
828 	html_free(vp);
829 	mparse_free(mp);
830 	mchars_free();
831 	free(conf.man);
832 	free(conf.style);
833 }
834 
835 static void
836 resp_show(const struct req *req, const char *file)
837 {
838 
839 	if ('.' == file[0] && '/' == file[1])
840 		file += 2;
841 
842 	if ('c' == *file)
843 		resp_catman(req, file);
844 	else
845 		resp_format(req, file);
846 }
847 
848 static void
849 pg_show(struct req *req, const char *fullpath)
850 {
851 	char		*manpath;
852 	const char	*file;
853 
854 	if ((file = strchr(fullpath, '/')) == NULL) {
855 		pg_error_badrequest(
856 		    "You did not specify a page to show.");
857 		return;
858 	}
859 	manpath = mandoc_strndup(fullpath, file - fullpath);
860 	file++;
861 
862 	if ( ! validate_manpath(req, manpath)) {
863 		pg_error_badrequest(
864 		    "You specified an invalid manpath.");
865 		free(manpath);
866 		return;
867 	}
868 
869 	/*
870 	 * Begin by chdir()ing into the manpath.
871 	 * This way we can pick up the database files, which are
872 	 * relative to the manpath root.
873 	 */
874 
875 	if (chdir(manpath) == -1) {
876 		warn("chdir %s", manpath);
877 		pg_error_internal();
878 		free(manpath);
879 		return;
880 	}
881 	free(manpath);
882 
883 	if ( ! validate_filename(file)) {
884 		pg_error_badrequest(
885 		    "You specified an invalid manual file.");
886 		return;
887 	}
888 
889 	resp_begin_html(200, NULL);
890 	resp_searchform(req, FOCUS_NONE);
891 	resp_show(req, file);
892 	resp_end_html();
893 }
894 
895 static void
896 pg_search(const struct req *req)
897 {
898 	struct mansearch	  search;
899 	struct manpaths		  paths;
900 	struct manpage		 *res;
901 	char			**argv;
902 	char			 *query, *rp, *wp;
903 	size_t			  ressz;
904 	int			  argc;
905 
906 	/*
907 	 * Begin by chdir()ing into the root of the manpath.
908 	 * This way we can pick up the database files, which are
909 	 * relative to the manpath root.
910 	 */
911 
912 	if (chdir(req->q.manpath) == -1) {
913 		warn("chdir %s", req->q.manpath);
914 		pg_error_internal();
915 		return;
916 	}
917 
918 	search.arch = req->q.arch;
919 	search.sec = req->q.sec;
920 	search.outkey = "Nd";
921 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
922 	search.firstmatch = 1;
923 
924 	paths.sz = 1;
925 	paths.paths = mandoc_malloc(sizeof(char *));
926 	paths.paths[0] = mandoc_strdup(".");
927 
928 	/*
929 	 * Break apart at spaces with backslash-escaping.
930 	 */
931 
932 	argc = 0;
933 	argv = NULL;
934 	rp = query = mandoc_strdup(req->q.query);
935 	for (;;) {
936 		while (isspace((unsigned char)*rp))
937 			rp++;
938 		if (*rp == '\0')
939 			break;
940 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
941 		argv[argc++] = wp = rp;
942 		for (;;) {
943 			if (isspace((unsigned char)*rp)) {
944 				*wp = '\0';
945 				rp++;
946 				break;
947 			}
948 			if (rp[0] == '\\' && rp[1] != '\0')
949 				rp++;
950 			if (wp != rp)
951 				*wp = *rp;
952 			if (*rp == '\0')
953 				break;
954 			wp++;
955 			rp++;
956 		}
957 	}
958 
959 	if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
960 		pg_noresult(req, "You entered an invalid query.");
961 	else if (0 == ressz)
962 		pg_noresult(req, "No results found.");
963 	else
964 		pg_searchres(req, res, ressz);
965 
966 	free(query);
967 	mansearch_free(res, ressz);
968 	free(paths.paths[0]);
969 	free(paths.paths);
970 }
971 
972 int
973 main(void)
974 {
975 	struct req	 req;
976 	struct itimerval itimer;
977 	const char	*path;
978 	const char	*querystring;
979 	int		 i;
980 
981 	/* Poor man's ReDoS mitigation. */
982 
983 	itimer.it_value.tv_sec = 2;
984 	itimer.it_value.tv_usec = 0;
985 	itimer.it_interval.tv_sec = 2;
986 	itimer.it_interval.tv_usec = 0;
987 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
988 		warn("setitimer");
989 		pg_error_internal();
990 		return EXIT_FAILURE;
991 	}
992 
993 	/*
994 	 * First we change directory into the MAN_DIR so that
995 	 * subsequent scanning for manpath directories is rooted
996 	 * relative to the same position.
997 	 */
998 
999 	if (chdir(MAN_DIR) == -1) {
1000 		warn("MAN_DIR: %s", MAN_DIR);
1001 		pg_error_internal();
1002 		return EXIT_FAILURE;
1003 	}
1004 
1005 	memset(&req, 0, sizeof(struct req));
1006 	req.q.equal = 1;
1007 	parse_manpath_conf(&req);
1008 
1009 	/* Parse the path info and the query string. */
1010 
1011 	if ((path = getenv("PATH_INFO")) == NULL)
1012 		path = "";
1013 	else if (*path == '/')
1014 		path++;
1015 
1016 	if (*path != '\0') {
1017 		parse_path_info(&req, path);
1018 		if (req.q.manpath == NULL || access(path, F_OK) == -1)
1019 			path = "";
1020 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1021 		parse_query_string(&req, querystring);
1022 
1023 	/* Validate parsed data and add defaults. */
1024 
1025 	if (req.q.manpath == NULL)
1026 		req.q.manpath = mandoc_strdup(req.p[0]);
1027 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1028 		pg_error_badrequest(
1029 		    "You specified an invalid manpath.");
1030 		return EXIT_FAILURE;
1031 	}
1032 
1033 	if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1034 		pg_error_badrequest(
1035 		    "You specified an invalid architecture.");
1036 		return EXIT_FAILURE;
1037 	}
1038 
1039 	/* Dispatch to the three different pages. */
1040 
1041 	if ('\0' != *path)
1042 		pg_show(&req, path);
1043 	else if (NULL != req.q.query)
1044 		pg_search(&req);
1045 	else
1046 		pg_index(&req);
1047 
1048 	free(req.q.manpath);
1049 	free(req.q.arch);
1050 	free(req.q.sec);
1051 	free(req.q.query);
1052 	for (i = 0; i < (int)req.psz; i++)
1053 		free(req.p[i]);
1054 	free(req.p);
1055 	return EXIT_SUCCESS;
1056 }
1057 
1058 /*
1059  * If PATH_INFO is not a file name, translate it to a query.
1060  */
1061 static void
1062 parse_path_info(struct req *req, const char *path)
1063 {
1064 	char	*dir[4];
1065 	int	 i;
1066 
1067 	req->isquery = 0;
1068 	req->q.equal = 1;
1069 	req->q.manpath = mandoc_strdup(path);
1070 	req->q.arch = NULL;
1071 
1072 	/* Mandatory manual page name. */
1073 	if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1074 		req->q.query = req->q.manpath;
1075 		req->q.manpath = NULL;
1076 	} else
1077 		*req->q.query++ = '\0';
1078 
1079 	/* Optional trailing section. */
1080 	if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1081 		if(isdigit((unsigned char)req->q.sec[1])) {
1082 			*req->q.sec++ = '\0';
1083 			req->q.sec = mandoc_strdup(req->q.sec);
1084 		} else
1085 			req->q.sec = NULL;
1086 	}
1087 
1088 	/* Handle the case of name[.section] only. */
1089 	if (req->q.manpath == NULL)
1090 		return;
1091 	req->q.query = mandoc_strdup(req->q.query);
1092 
1093 	/* Split directory components. */
1094 	dir[i = 0] = req->q.manpath;
1095 	while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1096 		if (++i == 3) {
1097 			pg_error_badrequest(
1098 			    "You specified too many directory components.");
1099 			exit(EXIT_FAILURE);
1100 		}
1101 		*dir[i]++ = '\0';
1102 	}
1103 
1104 	/* Optional manpath. */
1105 	if ((i = validate_manpath(req, req->q.manpath)) == 0)
1106 		req->q.manpath = NULL;
1107 	else if (dir[1] == NULL)
1108 		return;
1109 
1110 	/* Optional section. */
1111 	if (strncmp(dir[i], "man", 3) == 0) {
1112 		free(req->q.sec);
1113 		req->q.sec = mandoc_strdup(dir[i++] + 3);
1114 	}
1115 	if (dir[i] == NULL) {
1116 		if (req->q.manpath == NULL)
1117 			free(dir[0]);
1118 		return;
1119 	}
1120 	if (dir[i + 1] != NULL) {
1121 		pg_error_badrequest(
1122 		    "You specified an invalid directory component.");
1123 		exit(EXIT_FAILURE);
1124 	}
1125 
1126 	/* Optional architecture. */
1127 	if (i) {
1128 		req->q.arch = mandoc_strdup(dir[i]);
1129 		if (req->q.manpath == NULL)
1130 			free(dir[0]);
1131 	} else
1132 		req->q.arch = dir[0];
1133 }
1134 
1135 /*
1136  * Scan for indexable paths.
1137  */
1138 static void
1139 parse_manpath_conf(struct req *req)
1140 {
1141 	FILE	*fp;
1142 	char	*dp;
1143 	size_t	 dpsz;
1144 	ssize_t	 len;
1145 
1146 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1147 		warn("%s/manpath.conf", MAN_DIR);
1148 		pg_error_internal();
1149 		exit(EXIT_FAILURE);
1150 	}
1151 
1152 	dp = NULL;
1153 	dpsz = 0;
1154 
1155 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1156 		if (dp[len - 1] == '\n')
1157 			dp[--len] = '\0';
1158 		req->p = mandoc_realloc(req->p,
1159 		    (req->psz + 1) * sizeof(char *));
1160 		if ( ! validate_urifrag(dp)) {
1161 			warnx("%s/manpath.conf contains "
1162 			    "unsafe path \"%s\"", MAN_DIR, dp);
1163 			pg_error_internal();
1164 			exit(EXIT_FAILURE);
1165 		}
1166 		if (strchr(dp, '/') != NULL) {
1167 			warnx("%s/manpath.conf contains "
1168 			    "path with slash \"%s\"", MAN_DIR, dp);
1169 			pg_error_internal();
1170 			exit(EXIT_FAILURE);
1171 		}
1172 		req->p[req->psz++] = dp;
1173 		dp = NULL;
1174 		dpsz = 0;
1175 	}
1176 	free(dp);
1177 
1178 	if (req->p == NULL) {
1179 		warnx("%s/manpath.conf is empty", MAN_DIR);
1180 		pg_error_internal();
1181 		exit(EXIT_FAILURE);
1182 	}
1183 }
1184