xref: /netbsd-src/external/bsd/mdocml/dist/cgi.c (revision e89934bbf778a6d6d6894877c4da59d0c7835b0f)
1 /*	Id: cgi.c,v 1.135 2016/07/11 22:48:37 schwarze Exp  */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 #include <sys/time.h>
22 
23 #include <ctype.h>
24 #include <err.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <limits.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33 
34 #include "mandoc_aux.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mdoc.h"
38 #include "man.h"
39 #include "main.h"
40 #include "manconf.h"
41 #include "mansearch.h"
42 #include "cgi.h"
43 
44 /*
45  * A query as passed to the search function.
46  */
47 struct	query {
48 	char		*manpath; /* desired manual directory */
49 	char		*arch; /* architecture */
50 	char		*sec; /* manual section */
51 	char		*query; /* unparsed query expression */
52 	int		 equal; /* match whole names, not substrings */
53 };
54 
55 struct	req {
56 	struct query	  q;
57 	char		**p; /* array of available manpaths */
58 	size_t		  psz; /* number of available manpaths */
59 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
60 };
61 
62 enum	focus {
63 	FOCUS_NONE = 0,
64 	FOCUS_QUERY
65 };
66 
67 static	void		 html_print(const char *);
68 static	void		 html_putchar(char);
69 static	int		 http_decode(char *);
70 static	void		 parse_manpath_conf(struct req *);
71 static	void		 parse_path_info(struct req *req, const char *path);
72 static	void		 parse_query_string(struct req *, const char *);
73 static	void		 pg_error_badrequest(const char *);
74 static	void		 pg_error_internal(void);
75 static	void		 pg_index(const struct req *);
76 static	void		 pg_noresult(const struct req *, const char *);
77 static	void		 pg_search(const struct req *);
78 static	void		 pg_searchres(const struct req *,
79 				struct manpage *, size_t);
80 static	void		 pg_show(struct req *, const char *);
81 static	void		 resp_begin_html(int, const char *);
82 static	void		 resp_begin_http(int, const char *);
83 static	void		 resp_catman(const struct req *, const char *);
84 static	void		 resp_copy(const char *);
85 static	void		 resp_end_html(void);
86 static	void		 resp_format(const struct req *, const char *);
87 static	void		 resp_searchform(const struct req *, enum focus);
88 static	void		 resp_show(const struct req *, const char *);
89 static	void		 set_query_attr(char **, char **);
90 static	int		 validate_filename(const char *);
91 static	int		 validate_manpath(const struct req *, const char *);
92 static	int		 validate_urifrag(const char *);
93 
94 static	const char	 *scriptname = SCRIPT_NAME;
95 
96 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
97 static	const char *const sec_numbers[] = {
98     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
99 };
100 static	const char *const sec_names[] = {
101     "All Sections",
102     "1 - General Commands",
103     "2 - System Calls",
104     "3 - Library Functions",
105     "3p - Perl Library",
106     "4 - Device Drivers",
107     "5 - File Formats",
108     "6 - Games",
109     "7 - Miscellaneous Information",
110     "8 - System Manager\'s Manual",
111     "9 - Kernel Developer\'s Manual"
112 };
113 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
114 
115 static	const char *const arch_names[] = {
116     "amd64",       "alpha",       "armish",      "armv7",
117     "hppa",        "hppa64",      "i386",        "landisk",
118     "loongson",    "luna88k",     "macppc",      "mips64",
119     "octeon",      "sgi",         "socppc",      "sparc",
120     "sparc64",     "zaurus",
121     "amiga",       "arc",         "arm32",       "atari",
122     "aviion",      "beagle",      "cats",        "hp300",
123     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
124     "mvmeppc",     "palm",        "pc532",       "pegasos",
125     "pmax",        "powerpc",     "solbourne",   "sun3",
126     "vax",         "wgrisc",      "x68k"
127 };
128 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
129 
130 /*
131  * Print a character, escaping HTML along the way.
132  * This will pass non-ASCII straight to output: be warned!
133  */
134 static void
135 html_putchar(char c)
136 {
137 
138 	switch (c) {
139 	case ('"'):
140 		printf("&quote;");
141 		break;
142 	case ('&'):
143 		printf("&amp;");
144 		break;
145 	case ('>'):
146 		printf("&gt;");
147 		break;
148 	case ('<'):
149 		printf("&lt;");
150 		break;
151 	default:
152 		putchar((unsigned char)c);
153 		break;
154 	}
155 }
156 
157 /*
158  * Call through to html_putchar().
159  * Accepts NULL strings.
160  */
161 static void
162 html_print(const char *p)
163 {
164 
165 	if (NULL == p)
166 		return;
167 	while ('\0' != *p)
168 		html_putchar(*p++);
169 }
170 
171 /*
172  * Transfer the responsibility for the allocated string *val
173  * to the query structure.
174  */
175 static void
176 set_query_attr(char **attr, char **val)
177 {
178 
179 	free(*attr);
180 	if (**val == '\0') {
181 		*attr = NULL;
182 		free(*val);
183 	} else
184 		*attr = *val;
185 	*val = NULL;
186 }
187 
188 /*
189  * Parse the QUERY_STRING for key-value pairs
190  * and store the values into the query structure.
191  */
192 static void
193 parse_query_string(struct req *req, const char *qs)
194 {
195 	char		*key, *val;
196 	size_t		 keysz, valsz;
197 
198 	req->isquery	= 1;
199 	req->q.manpath	= NULL;
200 	req->q.arch	= NULL;
201 	req->q.sec	= NULL;
202 	req->q.query	= NULL;
203 	req->q.equal	= 1;
204 
205 	key = val = NULL;
206 	while (*qs != '\0') {
207 
208 		/* Parse one key. */
209 
210 		keysz = strcspn(qs, "=;&");
211 		key = mandoc_strndup(qs, keysz);
212 		qs += keysz;
213 		if (*qs != '=')
214 			goto next;
215 
216 		/* Parse one value. */
217 
218 		valsz = strcspn(++qs, ";&");
219 		val = mandoc_strndup(qs, valsz);
220 		qs += valsz;
221 
222 		/* Decode and catch encoding errors. */
223 
224 		if ( ! (http_decode(key) && http_decode(val)))
225 			goto next;
226 
227 		/* Handle key-value pairs. */
228 
229 		if ( ! strcmp(key, "query"))
230 			set_query_attr(&req->q.query, &val);
231 
232 		else if ( ! strcmp(key, "apropos"))
233 			req->q.equal = !strcmp(val, "0");
234 
235 		else if ( ! strcmp(key, "manpath")) {
236 #ifdef COMPAT_OLDURI
237 			if ( ! strncmp(val, "OpenBSD ", 8)) {
238 				val[7] = '-';
239 				if ('C' == val[8])
240 					val[8] = 'c';
241 			}
242 #endif
243 			set_query_attr(&req->q.manpath, &val);
244 		}
245 
246 		else if ( ! (strcmp(key, "sec")
247 #ifdef COMPAT_OLDURI
248 		    && strcmp(key, "sektion")
249 #endif
250 		    )) {
251 			if ( ! strcmp(val, "0"))
252 				*val = '\0';
253 			set_query_attr(&req->q.sec, &val);
254 		}
255 
256 		else if ( ! strcmp(key, "arch")) {
257 			if ( ! strcmp(val, "default"))
258 				*val = '\0';
259 			set_query_attr(&req->q.arch, &val);
260 		}
261 
262 		/*
263 		 * The key must be freed in any case.
264 		 * The val may have been handed over to the query
265 		 * structure, in which case it is now NULL.
266 		 */
267 next:
268 		free(key);
269 		key = NULL;
270 		free(val);
271 		val = NULL;
272 
273 		if (*qs != '\0')
274 			qs++;
275 	}
276 }
277 
278 /*
279  * HTTP-decode a string.  The standard explanation is that this turns
280  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
281  * over the allocated string.
282  */
283 static int
284 http_decode(char *p)
285 {
286 	char             hex[3];
287 	char		*q;
288 	int              c;
289 
290 	hex[2] = '\0';
291 
292 	q = p;
293 	for ( ; '\0' != *p; p++, q++) {
294 		if ('%' == *p) {
295 			if ('\0' == (hex[0] = *(p + 1)))
296 				return 0;
297 			if ('\0' == (hex[1] = *(p + 2)))
298 				return 0;
299 			if (1 != sscanf(hex, "%x", &c))
300 				return 0;
301 			if ('\0' == c)
302 				return 0;
303 
304 			*q = (char)c;
305 			p += 2;
306 		} else
307 			*q = '+' == *p ? ' ' : *p;
308 	}
309 
310 	*q = '\0';
311 	return 1;
312 }
313 
314 static void
315 resp_begin_http(int code, const char *msg)
316 {
317 
318 	if (200 != code)
319 		printf("Status: %d %s\r\n", code, msg);
320 
321 	printf("Content-Type: text/html; charset=utf-8\r\n"
322 	     "Cache-Control: no-cache\r\n"
323 	     "Pragma: no-cache\r\n"
324 	     "\r\n");
325 
326 	fflush(stdout);
327 }
328 
329 static void
330 resp_copy(const char *filename)
331 {
332 	char	 buf[4096];
333 	ssize_t	 sz;
334 	int	 fd;
335 
336 	if ((fd = open(filename, O_RDONLY)) != -1) {
337 		fflush(stdout);
338 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
339 			write(STDOUT_FILENO, buf, sz);
340 	}
341 }
342 
343 static void
344 resp_begin_html(int code, const char *msg)
345 {
346 
347 	resp_begin_http(code, msg);
348 
349 	printf("<!DOCTYPE html>\n"
350 	       "<html>\n"
351 	       "<head>\n"
352 	       "<meta charset=\"UTF-8\"/>\n"
353 	       "<link rel=\"stylesheet\" href=\"%s/mandoc.css\""
354 	       " type=\"text/css\" media=\"all\">\n"
355 	       "<title>%s</title>\n"
356 	       "</head>\n"
357 	       "<body>\n"
358 	       "<!-- Begin page content. //-->\n",
359 	       CSS_DIR, CUSTOMIZE_TITLE);
360 
361 	resp_copy(MAN_DIR "/header.html");
362 }
363 
364 static void
365 resp_end_html(void)
366 {
367 
368 	resp_copy(MAN_DIR "/footer.html");
369 
370 	puts("</body>\n"
371 	     "</html>");
372 }
373 
374 static void
375 resp_searchform(const struct req *req, enum focus focus)
376 {
377 	int		 i;
378 
379 	puts("<!-- Begin search form. //-->");
380 	printf("<div id=\"mancgi\">\n"
381 	       "<form action=\"/%s\" method=\"get\">\n"
382 	       "<fieldset>\n"
383 	       "<legend>Manual Page Search Parameters</legend>\n",
384 	       scriptname);
385 
386 	/* Write query input box. */
387 
388 	printf("<input type=\"text\" name=\"query\" value=\"");
389 	if (req->q.query != NULL)
390 		html_print(req->q.query);
391 	printf( "\" size=\"40\"");
392 	if (focus == FOCUS_QUERY)
393 		printf(" autofocus");
394 	puts(">");
395 
396 	/* Write submission buttons. */
397 
398 	printf(	"<button type=\"submit\" name=\"apropos\" value=\"0\">"
399 		"man</button>\n"
400 		"<button type=\"submit\" name=\"apropos\" value=\"1\">"
401 		"apropos</button>\n<br/>\n");
402 
403 	/* Write section selector. */
404 
405 	puts("<select name=\"sec\">");
406 	for (i = 0; i < sec_MAX; i++) {
407 		printf("<option value=\"%s\"", sec_numbers[i]);
408 		if (NULL != req->q.sec &&
409 		    0 == strcmp(sec_numbers[i], req->q.sec))
410 			printf(" selected=\"selected\"");
411 		printf(">%s</option>\n", sec_names[i]);
412 	}
413 	puts("</select>");
414 
415 	/* Write architecture selector. */
416 
417 	printf(	"<select name=\"arch\">\n"
418 		"<option value=\"default\"");
419 	if (NULL == req->q.arch)
420 		printf(" selected=\"selected\"");
421 	puts(">All Architectures</option>");
422 	for (i = 0; i < arch_MAX; i++) {
423 		printf("<option value=\"%s\"", arch_names[i]);
424 		if (NULL != req->q.arch &&
425 		    0 == strcmp(arch_names[i], req->q.arch))
426 			printf(" selected=\"selected\"");
427 		printf(">%s</option>\n", arch_names[i]);
428 	}
429 	puts("</select>");
430 
431 	/* Write manpath selector. */
432 
433 	if (req->psz > 1) {
434 		puts("<select name=\"manpath\">");
435 		for (i = 0; i < (int)req->psz; i++) {
436 			printf("<option ");
437 			if (strcmp(req->q.manpath, req->p[i]) == 0)
438 				printf("selected=\"selected\" ");
439 			printf("value=\"");
440 			html_print(req->p[i]);
441 			printf("\">");
442 			html_print(req->p[i]);
443 			puts("</option>");
444 		}
445 		puts("</select>");
446 	}
447 
448 	puts("</fieldset>\n"
449 	     "</form>\n"
450 	     "</div>");
451 	puts("<!-- End search form. //-->");
452 }
453 
454 static int
455 validate_urifrag(const char *frag)
456 {
457 
458 	while ('\0' != *frag) {
459 		if ( ! (isalnum((unsigned char)*frag) ||
460 		    '-' == *frag || '.' == *frag ||
461 		    '/' == *frag || '_' == *frag))
462 			return 0;
463 		frag++;
464 	}
465 	return 1;
466 }
467 
468 static int
469 validate_manpath(const struct req *req, const char* manpath)
470 {
471 	size_t	 i;
472 
473 	for (i = 0; i < req->psz; i++)
474 		if ( ! strcmp(manpath, req->p[i]))
475 			return 1;
476 
477 	return 0;
478 }
479 
480 static int
481 validate_filename(const char *file)
482 {
483 
484 	if ('.' == file[0] && '/' == file[1])
485 		file += 2;
486 
487 	return ! (strstr(file, "../") || strstr(file, "/..") ||
488 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
489 }
490 
491 static void
492 pg_index(const struct req *req)
493 {
494 
495 	resp_begin_html(200, NULL);
496 	resp_searchform(req, FOCUS_QUERY);
497 	printf("<p>\n"
498 	       "This web interface is documented in the\n"
499 	       "<a href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
500 	       "manual, and the\n"
501 	       "<a href=\"/%s%sapropos.1\">apropos(1)</a>\n"
502 	       "manual explains the query syntax.\n"
503 	       "</p>\n",
504 	       scriptname, *scriptname == '\0' ? "" : "/",
505 	       scriptname, *scriptname == '\0' ? "" : "/");
506 	resp_end_html();
507 }
508 
509 static void
510 pg_noresult(const struct req *req, const char *msg)
511 {
512 	resp_begin_html(200, NULL);
513 	resp_searchform(req, FOCUS_QUERY);
514 	puts("<p>");
515 	puts(msg);
516 	puts("</p>");
517 	resp_end_html();
518 }
519 
520 static void
521 pg_error_badrequest(const char *msg)
522 {
523 
524 	resp_begin_html(400, "Bad Request");
525 	puts("<h1>Bad Request</h1>\n"
526 	     "<p>\n");
527 	puts(msg);
528 	printf("Try again from the\n"
529 	       "<a href=\"/%s\">main page</a>.\n"
530 	       "</p>", scriptname);
531 	resp_end_html();
532 }
533 
534 static void
535 pg_error_internal(void)
536 {
537 	resp_begin_html(500, "Internal Server Error");
538 	puts("<p>Internal Server Error</p>");
539 	resp_end_html();
540 }
541 
542 static void
543 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
544 {
545 	char		*arch, *archend;
546 	const char	*sec;
547 	size_t		 i, iuse;
548 	int		 archprio, archpriouse;
549 	int		 prio, priouse;
550 
551 	for (i = 0; i < sz; i++) {
552 		if (validate_filename(r[i].file))
553 			continue;
554 		warnx("invalid filename %s in %s database",
555 		    r[i].file, req->q.manpath);
556 		pg_error_internal();
557 		return;
558 	}
559 
560 	if (req->isquery && sz == 1) {
561 		/*
562 		 * If we have just one result, then jump there now
563 		 * without any delay.
564 		 */
565 		printf("Status: 303 See Other\r\n");
566 		printf("Location: http://%s/%s%s%s/%s",
567 		    HTTP_HOST, scriptname,
568 		    *scriptname == '\0' ? "" : "/",
569 		    req->q.manpath, r[0].file);
570 		printf("\r\n"
571 		     "Content-Type: text/html; charset=utf-8\r\n"
572 		     "\r\n");
573 		return;
574 	}
575 
576 	resp_begin_html(200, NULL);
577 	resp_searchform(req,
578 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
579 
580 	if (sz > 1) {
581 		puts("<div class=\"results\">");
582 		puts("<table>");
583 
584 		for (i = 0; i < sz; i++) {
585 			printf("<tr>\n"
586 			       "<td class=\"title\">\n"
587 			       "<a href=\"/%s%s%s/%s",
588 			    scriptname, *scriptname == '\0' ? "" : "/",
589 			    req->q.manpath, r[i].file);
590 			printf("\">");
591 			html_print(r[i].names);
592 			printf("</a>\n"
593 			       "</td>\n"
594 			       "<td class=\"desc\">");
595 			html_print(r[i].output);
596 			puts("</td>\n"
597 			     "</tr>");
598 		}
599 
600 		puts("</table>\n"
601 		     "</div>");
602 	}
603 
604 	/*
605 	 * In man(1) mode, show one of the pages
606 	 * even if more than one is found.
607 	 */
608 
609 	if (req->q.equal || sz == 1) {
610 		puts("<hr>");
611 		iuse = 0;
612 		priouse = 20;
613 		archpriouse = 3;
614 		for (i = 0; i < sz; i++) {
615 			sec = r[i].file;
616 			sec += strcspn(sec, "123456789");
617 			if (sec[0] == '\0')
618 				continue;
619 			prio = sec_prios[sec[0] - '1'];
620 			if (sec[1] != '/')
621 				prio += 10;
622 			if (req->q.arch == NULL) {
623 				archprio =
624 				    ((arch = strchr(sec + 1, '/'))
625 					== NULL) ? 3 :
626 				    ((archend = strchr(arch + 1, '/'))
627 					== NULL) ? 0 :
628 				    strncmp(arch, "amd64/",
629 					archend - arch) ? 2 : 1;
630 				if (archprio < archpriouse) {
631 					archpriouse = archprio;
632 					priouse = prio;
633 					iuse = i;
634 					continue;
635 				}
636 				if (archprio > archpriouse)
637 					continue;
638 			}
639 			if (prio >= priouse)
640 				continue;
641 			priouse = prio;
642 			iuse = i;
643 		}
644 		resp_show(req, r[iuse].file);
645 	}
646 
647 	resp_end_html();
648 }
649 
650 static void
651 resp_catman(const struct req *req, const char *file)
652 {
653 	FILE		*f;
654 	char		*p;
655 	size_t		 sz;
656 	ssize_t		 len;
657 	int		 i;
658 	int		 italic, bold;
659 
660 	if ((f = fopen(file, "r")) == NULL) {
661 		puts("<p>You specified an invalid manual file.</p>");
662 		return;
663 	}
664 
665 	puts("<div class=\"catman\">\n"
666 	     "<pre>");
667 
668 	p = NULL;
669 	sz = 0;
670 
671 	while ((len = getline(&p, &sz, f)) != -1) {
672 		bold = italic = 0;
673 		for (i = 0; i < len - 1; i++) {
674 			/*
675 			 * This means that the catpage is out of state.
676 			 * Ignore it and keep going (although the
677 			 * catpage is bogus).
678 			 */
679 
680 			if ('\b' == p[i] || '\n' == p[i])
681 				continue;
682 
683 			/*
684 			 * Print a regular character.
685 			 * Close out any bold/italic scopes.
686 			 * If we're in back-space mode, make sure we'll
687 			 * have something to enter when we backspace.
688 			 */
689 
690 			if ('\b' != p[i + 1]) {
691 				if (italic)
692 					printf("</i>");
693 				if (bold)
694 					printf("</b>");
695 				italic = bold = 0;
696 				html_putchar(p[i]);
697 				continue;
698 			} else if (i + 2 >= len)
699 				continue;
700 
701 			/* Italic mode. */
702 
703 			if ('_' == p[i]) {
704 				if (bold)
705 					printf("</b>");
706 				if ( ! italic)
707 					printf("<i>");
708 				bold = 0;
709 				italic = 1;
710 				i += 2;
711 				html_putchar(p[i]);
712 				continue;
713 			}
714 
715 			/*
716 			 * Handle funny behaviour troff-isms.
717 			 * These grok'd from the original man2html.c.
718 			 */
719 
720 			if (('+' == p[i] && 'o' == p[i + 2]) ||
721 					('o' == p[i] && '+' == p[i + 2]) ||
722 					('|' == p[i] && '=' == p[i + 2]) ||
723 					('=' == p[i] && '|' == p[i + 2]) ||
724 					('*' == p[i] && '=' == p[i + 2]) ||
725 					('=' == p[i] && '*' == p[i + 2]) ||
726 					('*' == p[i] && '|' == p[i + 2]) ||
727 					('|' == p[i] && '*' == p[i + 2]))  {
728 				if (italic)
729 					printf("</i>");
730 				if (bold)
731 					printf("</b>");
732 				italic = bold = 0;
733 				putchar('*');
734 				i += 2;
735 				continue;
736 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
737 					('-' == p[i] && '|' == p[i + 1]) ||
738 					('+' == p[i] && '-' == p[i + 1]) ||
739 					('-' == p[i] && '+' == p[i + 1]) ||
740 					('+' == p[i] && '|' == p[i + 1]) ||
741 					('|' == p[i] && '+' == p[i + 1]))  {
742 				if (italic)
743 					printf("</i>");
744 				if (bold)
745 					printf("</b>");
746 				italic = bold = 0;
747 				putchar('+');
748 				i += 2;
749 				continue;
750 			}
751 
752 			/* Bold mode. */
753 
754 			if (italic)
755 				printf("</i>");
756 			if ( ! bold)
757 				printf("<b>");
758 			bold = 1;
759 			italic = 0;
760 			i += 2;
761 			html_putchar(p[i]);
762 		}
763 
764 		/*
765 		 * Clean up the last character.
766 		 * We can get to a newline; don't print that.
767 		 */
768 
769 		if (italic)
770 			printf("</i>");
771 		if (bold)
772 			printf("</b>");
773 
774 		if (i == len - 1 && p[i] != '\n')
775 			html_putchar(p[i]);
776 
777 		putchar('\n');
778 	}
779 	free(p);
780 
781 	puts("</pre>\n"
782 	     "</div>");
783 
784 	fclose(f);
785 }
786 
787 static void
788 resp_format(const struct req *req, const char *file)
789 {
790 	struct manoutput conf;
791 	struct mparse	*mp;
792 	struct roff_man	*man;
793 	void		*vp;
794 	int		 fd;
795 	int		 usepath;
796 
797 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
798 		puts("<p>You specified an invalid manual file.</p>");
799 		return;
800 	}
801 
802 	mchars_alloc();
803 	mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, req->q.manpath);
804 	mparse_readfd(mp, fd, file);
805 	close(fd);
806 
807 	memset(&conf, 0, sizeof(conf));
808 	conf.fragment = 1;
809 	usepath = strcmp(req->q.manpath, req->p[0]);
810 	mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
811 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
812 
813 	mparse_result(mp, &man, NULL);
814 	if (man == NULL) {
815 		warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
816 		pg_error_internal();
817 		mparse_free(mp);
818 		mchars_free();
819 		return;
820 	}
821 
822 	vp = html_alloc(&conf);
823 
824 	if (man->macroset == MACROSET_MDOC) {
825 		mdoc_validate(man);
826 		html_mdoc(vp, man);
827 	} else {
828 		man_validate(man);
829 		html_man(vp, man);
830 	}
831 
832 	html_free(vp);
833 	mparse_free(mp);
834 	mchars_free();
835 	free(conf.man);
836 }
837 
838 static void
839 resp_show(const struct req *req, const char *file)
840 {
841 
842 	if ('.' == file[0] && '/' == file[1])
843 		file += 2;
844 
845 	if ('c' == *file)
846 		resp_catman(req, file);
847 	else
848 		resp_format(req, file);
849 }
850 
851 static void
852 pg_show(struct req *req, const char *fullpath)
853 {
854 	char		*manpath;
855 	const char	*file;
856 
857 	if ((file = strchr(fullpath, '/')) == NULL) {
858 		pg_error_badrequest(
859 		    "You did not specify a page to show.");
860 		return;
861 	}
862 	manpath = mandoc_strndup(fullpath, file - fullpath);
863 	file++;
864 
865 	if ( ! validate_manpath(req, manpath)) {
866 		pg_error_badrequest(
867 		    "You specified an invalid manpath.");
868 		free(manpath);
869 		return;
870 	}
871 
872 	/*
873 	 * Begin by chdir()ing into the manpath.
874 	 * This way we can pick up the database files, which are
875 	 * relative to the manpath root.
876 	 */
877 
878 	if (chdir(manpath) == -1) {
879 		warn("chdir %s", manpath);
880 		pg_error_internal();
881 		free(manpath);
882 		return;
883 	}
884 	free(manpath);
885 
886 	if ( ! validate_filename(file)) {
887 		pg_error_badrequest(
888 		    "You specified an invalid manual file.");
889 		return;
890 	}
891 
892 	resp_begin_html(200, NULL);
893 	resp_searchform(req, FOCUS_NONE);
894 	resp_show(req, file);
895 	resp_end_html();
896 }
897 
898 static void
899 pg_search(const struct req *req)
900 {
901 	struct mansearch	  search;
902 	struct manpaths		  paths;
903 	struct manpage		 *res;
904 	char			**argv;
905 	char			 *query, *rp, *wp;
906 	size_t			  ressz;
907 	int			  argc;
908 
909 	/*
910 	 * Begin by chdir()ing into the root of the manpath.
911 	 * This way we can pick up the database files, which are
912 	 * relative to the manpath root.
913 	 */
914 
915 	if (chdir(req->q.manpath) == -1) {
916 		warn("chdir %s", req->q.manpath);
917 		pg_error_internal();
918 		return;
919 	}
920 
921 	search.arch = req->q.arch;
922 	search.sec = req->q.sec;
923 	search.outkey = "Nd";
924 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
925 	search.firstmatch = 1;
926 
927 	paths.sz = 1;
928 	paths.paths = mandoc_malloc(sizeof(char *));
929 	paths.paths[0] = mandoc_strdup(".");
930 
931 	/*
932 	 * Break apart at spaces with backslash-escaping.
933 	 */
934 
935 	argc = 0;
936 	argv = NULL;
937 	rp = query = mandoc_strdup(req->q.query);
938 	for (;;) {
939 		while (isspace((unsigned char)*rp))
940 			rp++;
941 		if (*rp == '\0')
942 			break;
943 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
944 		argv[argc++] = wp = rp;
945 		for (;;) {
946 			if (isspace((unsigned char)*rp)) {
947 				*wp = '\0';
948 				rp++;
949 				break;
950 			}
951 			if (rp[0] == '\\' && rp[1] != '\0')
952 				rp++;
953 			if (wp != rp)
954 				*wp = *rp;
955 			if (*rp == '\0')
956 				break;
957 			wp++;
958 			rp++;
959 		}
960 	}
961 
962 	if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
963 		pg_noresult(req, "You entered an invalid query.");
964 	else if (0 == ressz)
965 		pg_noresult(req, "No results found.");
966 	else
967 		pg_searchres(req, res, ressz);
968 
969 	free(query);
970 	mansearch_free(res, ressz);
971 	free(paths.paths[0]);
972 	free(paths.paths);
973 }
974 
975 int
976 main(void)
977 {
978 	struct req	 req;
979 	struct itimerval itimer;
980 	const char	*path;
981 	const char	*querystring;
982 	int		 i;
983 
984 	/* Poor man's ReDoS mitigation. */
985 
986 	itimer.it_value.tv_sec = 2;
987 	itimer.it_value.tv_usec = 0;
988 	itimer.it_interval.tv_sec = 2;
989 	itimer.it_interval.tv_usec = 0;
990 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
991 		warn("setitimer");
992 		pg_error_internal();
993 		return EXIT_FAILURE;
994 	}
995 
996 	/*
997 	 * First we change directory into the MAN_DIR so that
998 	 * subsequent scanning for manpath directories is rooted
999 	 * relative to the same position.
1000 	 */
1001 
1002 	if (chdir(MAN_DIR) == -1) {
1003 		warn("MAN_DIR: %s", MAN_DIR);
1004 		pg_error_internal();
1005 		return EXIT_FAILURE;
1006 	}
1007 
1008 	memset(&req, 0, sizeof(struct req));
1009 	req.q.equal = 1;
1010 	parse_manpath_conf(&req);
1011 
1012 	/* Parse the path info and the query string. */
1013 
1014 	if ((path = getenv("PATH_INFO")) == NULL)
1015 		path = "";
1016 	else if (*path == '/')
1017 		path++;
1018 
1019 	if (*path != '\0') {
1020 		parse_path_info(&req, path);
1021 		if (req.q.manpath == NULL || access(path, F_OK) == -1)
1022 			path = "";
1023 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1024 		parse_query_string(&req, querystring);
1025 
1026 	/* Validate parsed data and add defaults. */
1027 
1028 	if (req.q.manpath == NULL)
1029 		req.q.manpath = mandoc_strdup(req.p[0]);
1030 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1031 		pg_error_badrequest(
1032 		    "You specified an invalid manpath.");
1033 		return EXIT_FAILURE;
1034 	}
1035 
1036 	if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1037 		pg_error_badrequest(
1038 		    "You specified an invalid architecture.");
1039 		return EXIT_FAILURE;
1040 	}
1041 
1042 	/* Dispatch to the three different pages. */
1043 
1044 	if ('\0' != *path)
1045 		pg_show(&req, path);
1046 	else if (NULL != req.q.query)
1047 		pg_search(&req);
1048 	else
1049 		pg_index(&req);
1050 
1051 	free(req.q.manpath);
1052 	free(req.q.arch);
1053 	free(req.q.sec);
1054 	free(req.q.query);
1055 	for (i = 0; i < (int)req.psz; i++)
1056 		free(req.p[i]);
1057 	free(req.p);
1058 	return EXIT_SUCCESS;
1059 }
1060 
1061 /*
1062  * If PATH_INFO is not a file name, translate it to a query.
1063  */
1064 static void
1065 parse_path_info(struct req *req, const char *path)
1066 {
1067 	char	*dir[4];
1068 	int	 i;
1069 
1070 	req->isquery = 0;
1071 	req->q.equal = 1;
1072 	req->q.manpath = mandoc_strdup(path);
1073 	req->q.arch = NULL;
1074 
1075 	/* Mandatory manual page name. */
1076 	if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1077 		req->q.query = req->q.manpath;
1078 		req->q.manpath = NULL;
1079 	} else
1080 		*req->q.query++ = '\0';
1081 
1082 	/* Optional trailing section. */
1083 	if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1084 		if(isdigit((unsigned char)req->q.sec[1])) {
1085 			*req->q.sec++ = '\0';
1086 			req->q.sec = mandoc_strdup(req->q.sec);
1087 		} else
1088 			req->q.sec = NULL;
1089 	}
1090 
1091 	/* Handle the case of name[.section] only. */
1092 	if (req->q.manpath == NULL)
1093 		return;
1094 	req->q.query = mandoc_strdup(req->q.query);
1095 
1096 	/* Split directory components. */
1097 	dir[i = 0] = req->q.manpath;
1098 	while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1099 		if (++i == 3) {
1100 			pg_error_badrequest(
1101 			    "You specified too many directory components.");
1102 			exit(EXIT_FAILURE);
1103 		}
1104 		*dir[i]++ = '\0';
1105 	}
1106 
1107 	/* Optional manpath. */
1108 	if ((i = validate_manpath(req, req->q.manpath)) == 0)
1109 		req->q.manpath = NULL;
1110 	else if (dir[1] == NULL)
1111 		return;
1112 
1113 	/* Optional section. */
1114 	if (strncmp(dir[i], "man", 3) == 0) {
1115 		free(req->q.sec);
1116 		req->q.sec = mandoc_strdup(dir[i++] + 3);
1117 	}
1118 	if (dir[i] == NULL) {
1119 		if (req->q.manpath == NULL)
1120 			free(dir[0]);
1121 		return;
1122 	}
1123 	if (dir[i + 1] != NULL) {
1124 		pg_error_badrequest(
1125 		    "You specified an invalid directory component.");
1126 		exit(EXIT_FAILURE);
1127 	}
1128 
1129 	/* Optional architecture. */
1130 	if (i) {
1131 		req->q.arch = mandoc_strdup(dir[i]);
1132 		if (req->q.manpath == NULL)
1133 			free(dir[0]);
1134 	} else
1135 		req->q.arch = dir[0];
1136 }
1137 
1138 /*
1139  * Scan for indexable paths.
1140  */
1141 static void
1142 parse_manpath_conf(struct req *req)
1143 {
1144 	FILE	*fp;
1145 	char	*dp;
1146 	size_t	 dpsz;
1147 	ssize_t	 len;
1148 
1149 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1150 		warn("%s/manpath.conf", MAN_DIR);
1151 		pg_error_internal();
1152 		exit(EXIT_FAILURE);
1153 	}
1154 
1155 	dp = NULL;
1156 	dpsz = 0;
1157 
1158 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1159 		if (dp[len - 1] == '\n')
1160 			dp[--len] = '\0';
1161 		req->p = mandoc_realloc(req->p,
1162 		    (req->psz + 1) * sizeof(char *));
1163 		if ( ! validate_urifrag(dp)) {
1164 			warnx("%s/manpath.conf contains "
1165 			    "unsafe path \"%s\"", MAN_DIR, dp);
1166 			pg_error_internal();
1167 			exit(EXIT_FAILURE);
1168 		}
1169 		if (strchr(dp, '/') != NULL) {
1170 			warnx("%s/manpath.conf contains "
1171 			    "path with slash \"%s\"", MAN_DIR, dp);
1172 			pg_error_internal();
1173 			exit(EXIT_FAILURE);
1174 		}
1175 		req->p[req->psz++] = dp;
1176 		dp = NULL;
1177 		dpsz = 0;
1178 	}
1179 	free(dp);
1180 
1181 	if (req->p == NULL) {
1182 		warnx("%s/manpath.conf is empty", MAN_DIR);
1183 		pg_error_internal();
1184 		exit(EXIT_FAILURE);
1185 	}
1186 }
1187