xref: /openbsd-src/usr.bin/mandoc/cgi.c (revision 0b7734b3d77bb9b21afec6f4621cae6c805dbd45)
1 /*	$OpenBSD: cgi.c,v 1.74 2016/07/11 22:46:57 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/time.h>
20 
21 #include <ctype.h>
22 #include <err.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <limits.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "mandoc_aux.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mdoc.h"
36 #include "man.h"
37 #include "main.h"
38 #include "manconf.h"
39 #include "mansearch.h"
40 #include "cgi.h"
41 
42 /*
43  * A query as passed to the search function.
44  */
45 struct	query {
46 	char		*manpath; /* desired manual directory */
47 	char		*arch; /* architecture */
48 	char		*sec; /* manual section */
49 	char		*query; /* unparsed query expression */
50 	int		 equal; /* match whole names, not substrings */
51 };
52 
53 struct	req {
54 	struct query	  q;
55 	char		**p; /* array of available manpaths */
56 	size_t		  psz; /* number of available manpaths */
57 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
58 };
59 
60 enum	focus {
61 	FOCUS_NONE = 0,
62 	FOCUS_QUERY
63 };
64 
65 static	void		 html_print(const char *);
66 static	void		 html_putchar(char);
67 static	int		 http_decode(char *);
68 static	void		 parse_manpath_conf(struct req *);
69 static	void		 parse_path_info(struct req *req, const char *path);
70 static	void		 parse_query_string(struct req *, const char *);
71 static	void		 pg_error_badrequest(const char *);
72 static	void		 pg_error_internal(void);
73 static	void		 pg_index(const struct req *);
74 static	void		 pg_noresult(const struct req *, const char *);
75 static	void		 pg_search(const struct req *);
76 static	void		 pg_searchres(const struct req *,
77 				struct manpage *, size_t);
78 static	void		 pg_show(struct req *, const char *);
79 static	void		 resp_begin_html(int, const char *);
80 static	void		 resp_begin_http(int, const char *);
81 static	void		 resp_catman(const struct req *, const char *);
82 static	void		 resp_copy(const char *);
83 static	void		 resp_end_html(void);
84 static	void		 resp_format(const struct req *, const char *);
85 static	void		 resp_searchform(const struct req *, enum focus);
86 static	void		 resp_show(const struct req *, const char *);
87 static	void		 set_query_attr(char **, char **);
88 static	int		 validate_filename(const char *);
89 static	int		 validate_manpath(const struct req *, const char *);
90 static	int		 validate_urifrag(const char *);
91 
92 static	const char	 *scriptname = SCRIPT_NAME;
93 
94 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
95 static	const char *const sec_numbers[] = {
96     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
97 };
98 static	const char *const sec_names[] = {
99     "All Sections",
100     "1 - General Commands",
101     "2 - System Calls",
102     "3 - Library Functions",
103     "3p - Perl Library",
104     "4 - Device Drivers",
105     "5 - File Formats",
106     "6 - Games",
107     "7 - Miscellaneous Information",
108     "8 - System Manager\'s Manual",
109     "9 - Kernel Developer\'s Manual"
110 };
111 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
112 
113 static	const char *const arch_names[] = {
114     "amd64",       "alpha",       "armish",      "armv7",
115     "hppa",        "hppa64",      "i386",        "landisk",
116     "loongson",    "luna88k",     "macppc",      "mips64",
117     "octeon",      "sgi",         "socppc",      "sparc",
118     "sparc64",     "zaurus",
119     "amiga",       "arc",         "arm32",       "atari",
120     "aviion",      "beagle",      "cats",        "hp300",
121     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
122     "mvmeppc",     "palm",        "pc532",       "pegasos",
123     "pmax",        "powerpc",     "solbourne",   "sun3",
124     "vax",         "wgrisc",      "x68k"
125 };
126 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
127 
128 /*
129  * Print a character, escaping HTML along the way.
130  * This will pass non-ASCII straight to output: be warned!
131  */
132 static void
133 html_putchar(char c)
134 {
135 
136 	switch (c) {
137 	case ('"'):
138 		printf("&quote;");
139 		break;
140 	case ('&'):
141 		printf("&amp;");
142 		break;
143 	case ('>'):
144 		printf("&gt;");
145 		break;
146 	case ('<'):
147 		printf("&lt;");
148 		break;
149 	default:
150 		putchar((unsigned char)c);
151 		break;
152 	}
153 }
154 
155 /*
156  * Call through to html_putchar().
157  * Accepts NULL strings.
158  */
159 static void
160 html_print(const char *p)
161 {
162 
163 	if (NULL == p)
164 		return;
165 	while ('\0' != *p)
166 		html_putchar(*p++);
167 }
168 
169 /*
170  * Transfer the responsibility for the allocated string *val
171  * to the query structure.
172  */
173 static void
174 set_query_attr(char **attr, char **val)
175 {
176 
177 	free(*attr);
178 	if (**val == '\0') {
179 		*attr = NULL;
180 		free(*val);
181 	} else
182 		*attr = *val;
183 	*val = NULL;
184 }
185 
186 /*
187  * Parse the QUERY_STRING for key-value pairs
188  * and store the values into the query structure.
189  */
190 static void
191 parse_query_string(struct req *req, const char *qs)
192 {
193 	char		*key, *val;
194 	size_t		 keysz, valsz;
195 
196 	req->isquery	= 1;
197 	req->q.manpath	= NULL;
198 	req->q.arch	= NULL;
199 	req->q.sec	= NULL;
200 	req->q.query	= NULL;
201 	req->q.equal	= 1;
202 
203 	key = val = NULL;
204 	while (*qs != '\0') {
205 
206 		/* Parse one key. */
207 
208 		keysz = strcspn(qs, "=;&");
209 		key = mandoc_strndup(qs, keysz);
210 		qs += keysz;
211 		if (*qs != '=')
212 			goto next;
213 
214 		/* Parse one value. */
215 
216 		valsz = strcspn(++qs, ";&");
217 		val = mandoc_strndup(qs, valsz);
218 		qs += valsz;
219 
220 		/* Decode and catch encoding errors. */
221 
222 		if ( ! (http_decode(key) && http_decode(val)))
223 			goto next;
224 
225 		/* Handle key-value pairs. */
226 
227 		if ( ! strcmp(key, "query"))
228 			set_query_attr(&req->q.query, &val);
229 
230 		else if ( ! strcmp(key, "apropos"))
231 			req->q.equal = !strcmp(val, "0");
232 
233 		else if ( ! strcmp(key, "manpath")) {
234 #ifdef COMPAT_OLDURI
235 			if ( ! strncmp(val, "OpenBSD ", 8)) {
236 				val[7] = '-';
237 				if ('C' == val[8])
238 					val[8] = 'c';
239 			}
240 #endif
241 			set_query_attr(&req->q.manpath, &val);
242 		}
243 
244 		else if ( ! (strcmp(key, "sec")
245 #ifdef COMPAT_OLDURI
246 		    && strcmp(key, "sektion")
247 #endif
248 		    )) {
249 			if ( ! strcmp(val, "0"))
250 				*val = '\0';
251 			set_query_attr(&req->q.sec, &val);
252 		}
253 
254 		else if ( ! strcmp(key, "arch")) {
255 			if ( ! strcmp(val, "default"))
256 				*val = '\0';
257 			set_query_attr(&req->q.arch, &val);
258 		}
259 
260 		/*
261 		 * The key must be freed in any case.
262 		 * The val may have been handed over to the query
263 		 * structure, in which case it is now NULL.
264 		 */
265 next:
266 		free(key);
267 		key = NULL;
268 		free(val);
269 		val = NULL;
270 
271 		if (*qs != '\0')
272 			qs++;
273 	}
274 }
275 
276 /*
277  * HTTP-decode a string.  The standard explanation is that this turns
278  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
279  * over the allocated string.
280  */
281 static int
282 http_decode(char *p)
283 {
284 	char             hex[3];
285 	char		*q;
286 	int              c;
287 
288 	hex[2] = '\0';
289 
290 	q = p;
291 	for ( ; '\0' != *p; p++, q++) {
292 		if ('%' == *p) {
293 			if ('\0' == (hex[0] = *(p + 1)))
294 				return 0;
295 			if ('\0' == (hex[1] = *(p + 2)))
296 				return 0;
297 			if (1 != sscanf(hex, "%x", &c))
298 				return 0;
299 			if ('\0' == c)
300 				return 0;
301 
302 			*q = (char)c;
303 			p += 2;
304 		} else
305 			*q = '+' == *p ? ' ' : *p;
306 	}
307 
308 	*q = '\0';
309 	return 1;
310 }
311 
312 static void
313 resp_begin_http(int code, const char *msg)
314 {
315 
316 	if (200 != code)
317 		printf("Status: %d %s\r\n", code, msg);
318 
319 	printf("Content-Type: text/html; charset=utf-8\r\n"
320 	     "Cache-Control: no-cache\r\n"
321 	     "Pragma: no-cache\r\n"
322 	     "\r\n");
323 
324 	fflush(stdout);
325 }
326 
327 static void
328 resp_copy(const char *filename)
329 {
330 	char	 buf[4096];
331 	ssize_t	 sz;
332 	int	 fd;
333 
334 	if ((fd = open(filename, O_RDONLY)) != -1) {
335 		fflush(stdout);
336 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
337 			write(STDOUT_FILENO, buf, sz);
338 	}
339 }
340 
341 static void
342 resp_begin_html(int code, const char *msg)
343 {
344 
345 	resp_begin_http(code, msg);
346 
347 	printf("<!DOCTYPE html>\n"
348 	       "<html>\n"
349 	       "<head>\n"
350 	       "<meta charset=\"UTF-8\"/>\n"
351 	       "<link rel=\"stylesheet\" href=\"%s/mandoc.css\""
352 	       " type=\"text/css\" media=\"all\">\n"
353 	       "<title>%s</title>\n"
354 	       "</head>\n"
355 	       "<body>\n"
356 	       "<!-- Begin page content. //-->\n",
357 	       CSS_DIR, CUSTOMIZE_TITLE);
358 
359 	resp_copy(MAN_DIR "/header.html");
360 }
361 
362 static void
363 resp_end_html(void)
364 {
365 
366 	resp_copy(MAN_DIR "/footer.html");
367 
368 	puts("</body>\n"
369 	     "</html>");
370 }
371 
372 static void
373 resp_searchform(const struct req *req, enum focus focus)
374 {
375 	int		 i;
376 
377 	puts("<!-- Begin search form. //-->");
378 	printf("<div id=\"mancgi\">\n"
379 	       "<form action=\"/%s\" method=\"get\">\n"
380 	       "<fieldset>\n"
381 	       "<legend>Manual Page Search Parameters</legend>\n",
382 	       scriptname);
383 
384 	/* Write query input box. */
385 
386 	printf("<input type=\"text\" name=\"query\" value=\"");
387 	if (req->q.query != NULL)
388 		html_print(req->q.query);
389 	printf( "\" size=\"40\"");
390 	if (focus == FOCUS_QUERY)
391 		printf(" autofocus");
392 	puts(">");
393 
394 	/* Write submission buttons. */
395 
396 	printf(	"<button type=\"submit\" name=\"apropos\" value=\"0\">"
397 		"man</button>\n"
398 		"<button type=\"submit\" name=\"apropos\" value=\"1\">"
399 		"apropos</button>\n<br/>\n");
400 
401 	/* Write section selector. */
402 
403 	puts("<select name=\"sec\">");
404 	for (i = 0; i < sec_MAX; i++) {
405 		printf("<option value=\"%s\"", sec_numbers[i]);
406 		if (NULL != req->q.sec &&
407 		    0 == strcmp(sec_numbers[i], req->q.sec))
408 			printf(" selected=\"selected\"");
409 		printf(">%s</option>\n", sec_names[i]);
410 	}
411 	puts("</select>");
412 
413 	/* Write architecture selector. */
414 
415 	printf(	"<select name=\"arch\">\n"
416 		"<option value=\"default\"");
417 	if (NULL == req->q.arch)
418 		printf(" selected=\"selected\"");
419 	puts(">All Architectures</option>");
420 	for (i = 0; i < arch_MAX; i++) {
421 		printf("<option value=\"%s\"", arch_names[i]);
422 		if (NULL != req->q.arch &&
423 		    0 == strcmp(arch_names[i], req->q.arch))
424 			printf(" selected=\"selected\"");
425 		printf(">%s</option>\n", arch_names[i]);
426 	}
427 	puts("</select>");
428 
429 	/* Write manpath selector. */
430 
431 	if (req->psz > 1) {
432 		puts("<select name=\"manpath\">");
433 		for (i = 0; i < (int)req->psz; i++) {
434 			printf("<option ");
435 			if (strcmp(req->q.manpath, req->p[i]) == 0)
436 				printf("selected=\"selected\" ");
437 			printf("value=\"");
438 			html_print(req->p[i]);
439 			printf("\">");
440 			html_print(req->p[i]);
441 			puts("</option>");
442 		}
443 		puts("</select>");
444 	}
445 
446 	puts("</fieldset>\n"
447 	     "</form>\n"
448 	     "</div>");
449 	puts("<!-- End search form. //-->");
450 }
451 
452 static int
453 validate_urifrag(const char *frag)
454 {
455 
456 	while ('\0' != *frag) {
457 		if ( ! (isalnum((unsigned char)*frag) ||
458 		    '-' == *frag || '.' == *frag ||
459 		    '/' == *frag || '_' == *frag))
460 			return 0;
461 		frag++;
462 	}
463 	return 1;
464 }
465 
466 static int
467 validate_manpath(const struct req *req, const char* manpath)
468 {
469 	size_t	 i;
470 
471 	for (i = 0; i < req->psz; i++)
472 		if ( ! strcmp(manpath, req->p[i]))
473 			return 1;
474 
475 	return 0;
476 }
477 
478 static int
479 validate_filename(const char *file)
480 {
481 
482 	if ('.' == file[0] && '/' == file[1])
483 		file += 2;
484 
485 	return ! (strstr(file, "../") || strstr(file, "/..") ||
486 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
487 }
488 
489 static void
490 pg_index(const struct req *req)
491 {
492 
493 	resp_begin_html(200, NULL);
494 	resp_searchform(req, FOCUS_QUERY);
495 	printf("<p>\n"
496 	       "This web interface is documented in the\n"
497 	       "<a href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
498 	       "manual, and the\n"
499 	       "<a href=\"/%s%sapropos.1\">apropos(1)</a>\n"
500 	       "manual explains the query syntax.\n"
501 	       "</p>\n",
502 	       scriptname, *scriptname == '\0' ? "" : "/",
503 	       scriptname, *scriptname == '\0' ? "" : "/");
504 	resp_end_html();
505 }
506 
507 static void
508 pg_noresult(const struct req *req, const char *msg)
509 {
510 	resp_begin_html(200, NULL);
511 	resp_searchform(req, FOCUS_QUERY);
512 	puts("<p>");
513 	puts(msg);
514 	puts("</p>");
515 	resp_end_html();
516 }
517 
518 static void
519 pg_error_badrequest(const char *msg)
520 {
521 
522 	resp_begin_html(400, "Bad Request");
523 	puts("<h1>Bad Request</h1>\n"
524 	     "<p>\n");
525 	puts(msg);
526 	printf("Try again from the\n"
527 	       "<a href=\"/%s\">main page</a>.\n"
528 	       "</p>", scriptname);
529 	resp_end_html();
530 }
531 
532 static void
533 pg_error_internal(void)
534 {
535 	resp_begin_html(500, "Internal Server Error");
536 	puts("<p>Internal Server Error</p>");
537 	resp_end_html();
538 }
539 
540 static void
541 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
542 {
543 	char		*arch, *archend;
544 	const char	*sec;
545 	size_t		 i, iuse;
546 	int		 archprio, archpriouse;
547 	int		 prio, priouse;
548 
549 	for (i = 0; i < sz; i++) {
550 		if (validate_filename(r[i].file))
551 			continue;
552 		warnx("invalid filename %s in %s database",
553 		    r[i].file, req->q.manpath);
554 		pg_error_internal();
555 		return;
556 	}
557 
558 	if (req->isquery && sz == 1) {
559 		/*
560 		 * If we have just one result, then jump there now
561 		 * without any delay.
562 		 */
563 		printf("Status: 303 See Other\r\n");
564 		printf("Location: http://%s/%s%s%s/%s",
565 		    HTTP_HOST, scriptname,
566 		    *scriptname == '\0' ? "" : "/",
567 		    req->q.manpath, r[0].file);
568 		printf("\r\n"
569 		     "Content-Type: text/html; charset=utf-8\r\n"
570 		     "\r\n");
571 		return;
572 	}
573 
574 	resp_begin_html(200, NULL);
575 	resp_searchform(req,
576 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
577 
578 	if (sz > 1) {
579 		puts("<div class=\"results\">");
580 		puts("<table>");
581 
582 		for (i = 0; i < sz; i++) {
583 			printf("<tr>\n"
584 			       "<td class=\"title\">\n"
585 			       "<a href=\"/%s%s%s/%s",
586 			    scriptname, *scriptname == '\0' ? "" : "/",
587 			    req->q.manpath, r[i].file);
588 			printf("\">");
589 			html_print(r[i].names);
590 			printf("</a>\n"
591 			       "</td>\n"
592 			       "<td class=\"desc\">");
593 			html_print(r[i].output);
594 			puts("</td>\n"
595 			     "</tr>");
596 		}
597 
598 		puts("</table>\n"
599 		     "</div>");
600 	}
601 
602 	/*
603 	 * In man(1) mode, show one of the pages
604 	 * even if more than one is found.
605 	 */
606 
607 	if (req->q.equal || sz == 1) {
608 		puts("<hr>");
609 		iuse = 0;
610 		priouse = 20;
611 		archpriouse = 3;
612 		for (i = 0; i < sz; i++) {
613 			sec = r[i].file;
614 			sec += strcspn(sec, "123456789");
615 			if (sec[0] == '\0')
616 				continue;
617 			prio = sec_prios[sec[0] - '1'];
618 			if (sec[1] != '/')
619 				prio += 10;
620 			if (req->q.arch == NULL) {
621 				archprio =
622 				    ((arch = strchr(sec + 1, '/'))
623 					== NULL) ? 3 :
624 				    ((archend = strchr(arch + 1, '/'))
625 					== NULL) ? 0 :
626 				    strncmp(arch, "amd64/",
627 					archend - arch) ? 2 : 1;
628 				if (archprio < archpriouse) {
629 					archpriouse = archprio;
630 					priouse = prio;
631 					iuse = i;
632 					continue;
633 				}
634 				if (archprio > archpriouse)
635 					continue;
636 			}
637 			if (prio >= priouse)
638 				continue;
639 			priouse = prio;
640 			iuse = i;
641 		}
642 		resp_show(req, r[iuse].file);
643 	}
644 
645 	resp_end_html();
646 }
647 
648 static void
649 resp_catman(const struct req *req, const char *file)
650 {
651 	FILE		*f;
652 	char		*p;
653 	size_t		 sz;
654 	ssize_t		 len;
655 	int		 i;
656 	int		 italic, bold;
657 
658 	if ((f = fopen(file, "r")) == NULL) {
659 		puts("<p>You specified an invalid manual file.</p>");
660 		return;
661 	}
662 
663 	puts("<div class=\"catman\">\n"
664 	     "<pre>");
665 
666 	p = NULL;
667 	sz = 0;
668 
669 	while ((len = getline(&p, &sz, f)) != -1) {
670 		bold = italic = 0;
671 		for (i = 0; i < len - 1; i++) {
672 			/*
673 			 * This means that the catpage is out of state.
674 			 * Ignore it and keep going (although the
675 			 * catpage is bogus).
676 			 */
677 
678 			if ('\b' == p[i] || '\n' == p[i])
679 				continue;
680 
681 			/*
682 			 * Print a regular character.
683 			 * Close out any bold/italic scopes.
684 			 * If we're in back-space mode, make sure we'll
685 			 * have something to enter when we backspace.
686 			 */
687 
688 			if ('\b' != p[i + 1]) {
689 				if (italic)
690 					printf("</i>");
691 				if (bold)
692 					printf("</b>");
693 				italic = bold = 0;
694 				html_putchar(p[i]);
695 				continue;
696 			} else if (i + 2 >= len)
697 				continue;
698 
699 			/* Italic mode. */
700 
701 			if ('_' == p[i]) {
702 				if (bold)
703 					printf("</b>");
704 				if ( ! italic)
705 					printf("<i>");
706 				bold = 0;
707 				italic = 1;
708 				i += 2;
709 				html_putchar(p[i]);
710 				continue;
711 			}
712 
713 			/*
714 			 * Handle funny behaviour troff-isms.
715 			 * These grok'd from the original man2html.c.
716 			 */
717 
718 			if (('+' == p[i] && 'o' == p[i + 2]) ||
719 					('o' == p[i] && '+' == p[i + 2]) ||
720 					('|' == p[i] && '=' == p[i + 2]) ||
721 					('=' == p[i] && '|' == p[i + 2]) ||
722 					('*' == p[i] && '=' == p[i + 2]) ||
723 					('=' == p[i] && '*' == p[i + 2]) ||
724 					('*' == p[i] && '|' == p[i + 2]) ||
725 					('|' == p[i] && '*' == p[i + 2]))  {
726 				if (italic)
727 					printf("</i>");
728 				if (bold)
729 					printf("</b>");
730 				italic = bold = 0;
731 				putchar('*');
732 				i += 2;
733 				continue;
734 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
735 					('-' == p[i] && '|' == p[i + 1]) ||
736 					('+' == p[i] && '-' == p[i + 1]) ||
737 					('-' == p[i] && '+' == p[i + 1]) ||
738 					('+' == p[i] && '|' == p[i + 1]) ||
739 					('|' == p[i] && '+' == p[i + 1]))  {
740 				if (italic)
741 					printf("</i>");
742 				if (bold)
743 					printf("</b>");
744 				italic = bold = 0;
745 				putchar('+');
746 				i += 2;
747 				continue;
748 			}
749 
750 			/* Bold mode. */
751 
752 			if (italic)
753 				printf("</i>");
754 			if ( ! bold)
755 				printf("<b>");
756 			bold = 1;
757 			italic = 0;
758 			i += 2;
759 			html_putchar(p[i]);
760 		}
761 
762 		/*
763 		 * Clean up the last character.
764 		 * We can get to a newline; don't print that.
765 		 */
766 
767 		if (italic)
768 			printf("</i>");
769 		if (bold)
770 			printf("</b>");
771 
772 		if (i == len - 1 && p[i] != '\n')
773 			html_putchar(p[i]);
774 
775 		putchar('\n');
776 	}
777 	free(p);
778 
779 	puts("</pre>\n"
780 	     "</div>");
781 
782 	fclose(f);
783 }
784 
785 static void
786 resp_format(const struct req *req, const char *file)
787 {
788 	struct manoutput conf;
789 	struct mparse	*mp;
790 	struct roff_man	*man;
791 	void		*vp;
792 	int		 fd;
793 	int		 usepath;
794 
795 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
796 		puts("<p>You specified an invalid manual file.</p>");
797 		return;
798 	}
799 
800 	mchars_alloc();
801 	mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, req->q.manpath);
802 	mparse_readfd(mp, fd, file);
803 	close(fd);
804 
805 	memset(&conf, 0, sizeof(conf));
806 	conf.fragment = 1;
807 	usepath = strcmp(req->q.manpath, req->p[0]);
808 	mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
809 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
810 
811 	mparse_result(mp, &man, NULL);
812 	if (man == NULL) {
813 		warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
814 		pg_error_internal();
815 		mparse_free(mp);
816 		mchars_free();
817 		return;
818 	}
819 
820 	vp = html_alloc(&conf);
821 
822 	if (man->macroset == MACROSET_MDOC) {
823 		mdoc_validate(man);
824 		html_mdoc(vp, man);
825 	} else {
826 		man_validate(man);
827 		html_man(vp, man);
828 	}
829 
830 	html_free(vp);
831 	mparse_free(mp);
832 	mchars_free();
833 	free(conf.man);
834 }
835 
836 static void
837 resp_show(const struct req *req, const char *file)
838 {
839 
840 	if ('.' == file[0] && '/' == file[1])
841 		file += 2;
842 
843 	if ('c' == *file)
844 		resp_catman(req, file);
845 	else
846 		resp_format(req, file);
847 }
848 
849 static void
850 pg_show(struct req *req, const char *fullpath)
851 {
852 	char		*manpath;
853 	const char	*file;
854 
855 	if ((file = strchr(fullpath, '/')) == NULL) {
856 		pg_error_badrequest(
857 		    "You did not specify a page to show.");
858 		return;
859 	}
860 	manpath = mandoc_strndup(fullpath, file - fullpath);
861 	file++;
862 
863 	if ( ! validate_manpath(req, manpath)) {
864 		pg_error_badrequest(
865 		    "You specified an invalid manpath.");
866 		free(manpath);
867 		return;
868 	}
869 
870 	/*
871 	 * Begin by chdir()ing into the manpath.
872 	 * This way we can pick up the database files, which are
873 	 * relative to the manpath root.
874 	 */
875 
876 	if (chdir(manpath) == -1) {
877 		warn("chdir %s", manpath);
878 		pg_error_internal();
879 		free(manpath);
880 		return;
881 	}
882 	free(manpath);
883 
884 	if ( ! validate_filename(file)) {
885 		pg_error_badrequest(
886 		    "You specified an invalid manual file.");
887 		return;
888 	}
889 
890 	resp_begin_html(200, NULL);
891 	resp_searchform(req, FOCUS_NONE);
892 	resp_show(req, file);
893 	resp_end_html();
894 }
895 
896 static void
897 pg_search(const struct req *req)
898 {
899 	struct mansearch	  search;
900 	struct manpaths		  paths;
901 	struct manpage		 *res;
902 	char			**argv;
903 	char			 *query, *rp, *wp;
904 	size_t			  ressz;
905 	int			  argc;
906 
907 	/*
908 	 * Begin by chdir()ing into the root of the manpath.
909 	 * This way we can pick up the database files, which are
910 	 * relative to the manpath root.
911 	 */
912 
913 	if (chdir(req->q.manpath) == -1) {
914 		warn("chdir %s", req->q.manpath);
915 		pg_error_internal();
916 		return;
917 	}
918 
919 	search.arch = req->q.arch;
920 	search.sec = req->q.sec;
921 	search.outkey = "Nd";
922 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
923 	search.firstmatch = 1;
924 
925 	paths.sz = 1;
926 	paths.paths = mandoc_malloc(sizeof(char *));
927 	paths.paths[0] = mandoc_strdup(".");
928 
929 	/*
930 	 * Break apart at spaces with backslash-escaping.
931 	 */
932 
933 	argc = 0;
934 	argv = NULL;
935 	rp = query = mandoc_strdup(req->q.query);
936 	for (;;) {
937 		while (isspace((unsigned char)*rp))
938 			rp++;
939 		if (*rp == '\0')
940 			break;
941 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
942 		argv[argc++] = wp = rp;
943 		for (;;) {
944 			if (isspace((unsigned char)*rp)) {
945 				*wp = '\0';
946 				rp++;
947 				break;
948 			}
949 			if (rp[0] == '\\' && rp[1] != '\0')
950 				rp++;
951 			if (wp != rp)
952 				*wp = *rp;
953 			if (*rp == '\0')
954 				break;
955 			wp++;
956 			rp++;
957 		}
958 	}
959 
960 	if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
961 		pg_noresult(req, "You entered an invalid query.");
962 	else if (0 == ressz)
963 		pg_noresult(req, "No results found.");
964 	else
965 		pg_searchres(req, res, ressz);
966 
967 	free(query);
968 	mansearch_free(res, ressz);
969 	free(paths.paths[0]);
970 	free(paths.paths);
971 }
972 
973 int
974 main(void)
975 {
976 	struct req	 req;
977 	struct itimerval itimer;
978 	const char	*path;
979 	const char	*querystring;
980 	int		 i;
981 
982 	/* Poor man's ReDoS mitigation. */
983 
984 	itimer.it_value.tv_sec = 2;
985 	itimer.it_value.tv_usec = 0;
986 	itimer.it_interval.tv_sec = 2;
987 	itimer.it_interval.tv_usec = 0;
988 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
989 		warn("setitimer");
990 		pg_error_internal();
991 		return EXIT_FAILURE;
992 	}
993 
994 	/*
995 	 * First we change directory into the MAN_DIR so that
996 	 * subsequent scanning for manpath directories is rooted
997 	 * relative to the same position.
998 	 */
999 
1000 	if (chdir(MAN_DIR) == -1) {
1001 		warn("MAN_DIR: %s", MAN_DIR);
1002 		pg_error_internal();
1003 		return EXIT_FAILURE;
1004 	}
1005 
1006 	memset(&req, 0, sizeof(struct req));
1007 	req.q.equal = 1;
1008 	parse_manpath_conf(&req);
1009 
1010 	/* Parse the path info and the query string. */
1011 
1012 	if ((path = getenv("PATH_INFO")) == NULL)
1013 		path = "";
1014 	else if (*path == '/')
1015 		path++;
1016 
1017 	if (*path != '\0') {
1018 		parse_path_info(&req, path);
1019 		if (req.q.manpath == NULL || access(path, F_OK) == -1)
1020 			path = "";
1021 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1022 		parse_query_string(&req, querystring);
1023 
1024 	/* Validate parsed data and add defaults. */
1025 
1026 	if (req.q.manpath == NULL)
1027 		req.q.manpath = mandoc_strdup(req.p[0]);
1028 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1029 		pg_error_badrequest(
1030 		    "You specified an invalid manpath.");
1031 		return EXIT_FAILURE;
1032 	}
1033 
1034 	if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1035 		pg_error_badrequest(
1036 		    "You specified an invalid architecture.");
1037 		return EXIT_FAILURE;
1038 	}
1039 
1040 	/* Dispatch to the three different pages. */
1041 
1042 	if ('\0' != *path)
1043 		pg_show(&req, path);
1044 	else if (NULL != req.q.query)
1045 		pg_search(&req);
1046 	else
1047 		pg_index(&req);
1048 
1049 	free(req.q.manpath);
1050 	free(req.q.arch);
1051 	free(req.q.sec);
1052 	free(req.q.query);
1053 	for (i = 0; i < (int)req.psz; i++)
1054 		free(req.p[i]);
1055 	free(req.p);
1056 	return EXIT_SUCCESS;
1057 }
1058 
1059 /*
1060  * If PATH_INFO is not a file name, translate it to a query.
1061  */
1062 static void
1063 parse_path_info(struct req *req, const char *path)
1064 {
1065 	char	*dir[4];
1066 	int	 i;
1067 
1068 	req->isquery = 0;
1069 	req->q.equal = 1;
1070 	req->q.manpath = mandoc_strdup(path);
1071 	req->q.arch = NULL;
1072 
1073 	/* Mandatory manual page name. */
1074 	if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1075 		req->q.query = req->q.manpath;
1076 		req->q.manpath = NULL;
1077 	} else
1078 		*req->q.query++ = '\0';
1079 
1080 	/* Optional trailing section. */
1081 	if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1082 		if(isdigit((unsigned char)req->q.sec[1])) {
1083 			*req->q.sec++ = '\0';
1084 			req->q.sec = mandoc_strdup(req->q.sec);
1085 		} else
1086 			req->q.sec = NULL;
1087 	}
1088 
1089 	/* Handle the case of name[.section] only. */
1090 	if (req->q.manpath == NULL)
1091 		return;
1092 	req->q.query = mandoc_strdup(req->q.query);
1093 
1094 	/* Split directory components. */
1095 	dir[i = 0] = req->q.manpath;
1096 	while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1097 		if (++i == 3) {
1098 			pg_error_badrequest(
1099 			    "You specified too many directory components.");
1100 			exit(EXIT_FAILURE);
1101 		}
1102 		*dir[i]++ = '\0';
1103 	}
1104 
1105 	/* Optional manpath. */
1106 	if ((i = validate_manpath(req, req->q.manpath)) == 0)
1107 		req->q.manpath = NULL;
1108 	else if (dir[1] == NULL)
1109 		return;
1110 
1111 	/* Optional section. */
1112 	if (strncmp(dir[i], "man", 3) == 0) {
1113 		free(req->q.sec);
1114 		req->q.sec = mandoc_strdup(dir[i++] + 3);
1115 	}
1116 	if (dir[i] == NULL) {
1117 		if (req->q.manpath == NULL)
1118 			free(dir[0]);
1119 		return;
1120 	}
1121 	if (dir[i + 1] != NULL) {
1122 		pg_error_badrequest(
1123 		    "You specified an invalid directory component.");
1124 		exit(EXIT_FAILURE);
1125 	}
1126 
1127 	/* Optional architecture. */
1128 	if (i) {
1129 		req->q.arch = mandoc_strdup(dir[i]);
1130 		if (req->q.manpath == NULL)
1131 			free(dir[0]);
1132 	} else
1133 		req->q.arch = dir[0];
1134 }
1135 
1136 /*
1137  * Scan for indexable paths.
1138  */
1139 static void
1140 parse_manpath_conf(struct req *req)
1141 {
1142 	FILE	*fp;
1143 	char	*dp;
1144 	size_t	 dpsz;
1145 	ssize_t	 len;
1146 
1147 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1148 		warn("%s/manpath.conf", MAN_DIR);
1149 		pg_error_internal();
1150 		exit(EXIT_FAILURE);
1151 	}
1152 
1153 	dp = NULL;
1154 	dpsz = 0;
1155 
1156 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1157 		if (dp[len - 1] == '\n')
1158 			dp[--len] = '\0';
1159 		req->p = mandoc_realloc(req->p,
1160 		    (req->psz + 1) * sizeof(char *));
1161 		if ( ! validate_urifrag(dp)) {
1162 			warnx("%s/manpath.conf contains "
1163 			    "unsafe path \"%s\"", MAN_DIR, dp);
1164 			pg_error_internal();
1165 			exit(EXIT_FAILURE);
1166 		}
1167 		if (strchr(dp, '/') != NULL) {
1168 			warnx("%s/manpath.conf contains "
1169 			    "path with slash \"%s\"", MAN_DIR, dp);
1170 			pg_error_internal();
1171 			exit(EXIT_FAILURE);
1172 		}
1173 		req->p[req->psz++] = dp;
1174 		dp = NULL;
1175 		dpsz = 0;
1176 	}
1177 	free(dp);
1178 
1179 	if (req->p == NULL) {
1180 		warnx("%s/manpath.conf is empty", MAN_DIR);
1181 		pg_error_internal();
1182 		exit(EXIT_FAILURE);
1183 	}
1184 }
1185