xref: /openbsd-src/usr.bin/mandoc/cgi.c (revision f6b75673f6c960a9743bfd16c1e52dd100265c68)
1 /*	$OpenBSD: cgi.c,v 1.99 2018/10/19 21:10:00 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2016, 2017, 2018 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/time.h>
20 
21 #include <ctype.h>
22 #include <err.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <limits.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "mandoc_aux.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mdoc.h"
36 #include "man.h"
37 #include "main.h"
38 #include "manconf.h"
39 #include "mansearch.h"
40 #include "cgi.h"
41 
42 /*
43  * A query as passed to the search function.
44  */
45 struct	query {
46 	char		*manpath; /* desired manual directory */
47 	char		*arch; /* architecture */
48 	char		*sec; /* manual section */
49 	char		*query; /* unparsed query expression */
50 	int		 equal; /* match whole names, not substrings */
51 };
52 
53 struct	req {
54 	struct query	  q;
55 	char		**p; /* array of available manpaths */
56 	size_t		  psz; /* number of available manpaths */
57 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
58 };
59 
60 enum	focus {
61 	FOCUS_NONE = 0,
62 	FOCUS_QUERY
63 };
64 
65 static	void		 html_print(const char *);
66 static	void		 html_putchar(char);
67 static	int		 http_decode(char *);
68 static	void		 http_encode(const char *p);
69 static	void		 parse_manpath_conf(struct req *);
70 static	void		 parse_path_info(struct req *req, const char *path);
71 static	void		 parse_query_string(struct req *, const char *);
72 static	void		 pg_error_badrequest(const char *);
73 static	void		 pg_error_internal(void);
74 static	void		 pg_index(const struct req *);
75 static	void		 pg_noresult(const struct req *, const char *);
76 static	void		 pg_redirect(const struct req *, const char *);
77 static	void		 pg_search(const struct req *);
78 static	void		 pg_searchres(const struct req *,
79 				struct manpage *, size_t);
80 static	void		 pg_show(struct req *, const char *);
81 static	void		 resp_begin_html(int, const char *, const char *);
82 static	void		 resp_begin_http(int, const char *);
83 static	void		 resp_catman(const struct req *, const char *);
84 static	void		 resp_copy(const char *);
85 static	void		 resp_end_html(void);
86 static	void		 resp_format(const struct req *, const char *);
87 static	void		 resp_searchform(const struct req *, enum focus);
88 static	void		 resp_show(const struct req *, const char *);
89 static	void		 set_query_attr(char **, char **);
90 static	int		 validate_arch(const char *);
91 static	int		 validate_filename(const char *);
92 static	int		 validate_manpath(const struct req *, const char *);
93 static	int		 validate_urifrag(const char *);
94 
95 static	const char	 *scriptname = SCRIPT_NAME;
96 
97 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
98 static	const char *const sec_numbers[] = {
99     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
100 };
101 static	const char *const sec_names[] = {
102     "All Sections",
103     "1 - General Commands",
104     "2 - System Calls",
105     "3 - Library Functions",
106     "3p - Perl Library",
107     "4 - Device Drivers",
108     "5 - File Formats",
109     "6 - Games",
110     "7 - Miscellaneous Information",
111     "8 - System Manager\'s Manual",
112     "9 - Kernel Developer\'s Manual"
113 };
114 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
115 
116 static	const char *const arch_names[] = {
117     "amd64",       "alpha",       "armv7",	"arm64",
118     "hppa",        "i386",        "landisk",
119     "loongson",    "luna88k",     "macppc",      "mips64",
120     "octeon",      "sgi",         "socppc",      "sparc64",
121     "amiga",       "arc",         "armish",      "arm32",
122     "atari",       "aviion",      "beagle",      "cats",
123     "hppa64",      "hp300",
124     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
125     "mvmeppc",     "palm",        "pc532",       "pegasos",
126     "pmax",        "powerpc",     "solbourne",   "sparc",
127     "sun3",        "vax",         "wgrisc",      "x68k",
128     "zaurus"
129 };
130 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
131 
132 /*
133  * Print a character, escaping HTML along the way.
134  * This will pass non-ASCII straight to output: be warned!
135  */
136 static void
137 html_putchar(char c)
138 {
139 
140 	switch (c) {
141 	case '"':
142 		printf("&quot;");
143 		break;
144 	case '&':
145 		printf("&amp;");
146 		break;
147 	case '>':
148 		printf("&gt;");
149 		break;
150 	case '<':
151 		printf("&lt;");
152 		break;
153 	default:
154 		putchar((unsigned char)c);
155 		break;
156 	}
157 }
158 
159 /*
160  * Call through to html_putchar().
161  * Accepts NULL strings.
162  */
163 static void
164 html_print(const char *p)
165 {
166 
167 	if (NULL == p)
168 		return;
169 	while ('\0' != *p)
170 		html_putchar(*p++);
171 }
172 
173 /*
174  * Transfer the responsibility for the allocated string *val
175  * to the query structure.
176  */
177 static void
178 set_query_attr(char **attr, char **val)
179 {
180 
181 	free(*attr);
182 	if (**val == '\0') {
183 		*attr = NULL;
184 		free(*val);
185 	} else
186 		*attr = *val;
187 	*val = NULL;
188 }
189 
190 /*
191  * Parse the QUERY_STRING for key-value pairs
192  * and store the values into the query structure.
193  */
194 static void
195 parse_query_string(struct req *req, const char *qs)
196 {
197 	char		*key, *val;
198 	size_t		 keysz, valsz;
199 
200 	req->isquery	= 1;
201 	req->q.manpath	= NULL;
202 	req->q.arch	= NULL;
203 	req->q.sec	= NULL;
204 	req->q.query	= NULL;
205 	req->q.equal	= 1;
206 
207 	key = val = NULL;
208 	while (*qs != '\0') {
209 
210 		/* Parse one key. */
211 
212 		keysz = strcspn(qs, "=;&");
213 		key = mandoc_strndup(qs, keysz);
214 		qs += keysz;
215 		if (*qs != '=')
216 			goto next;
217 
218 		/* Parse one value. */
219 
220 		valsz = strcspn(++qs, ";&");
221 		val = mandoc_strndup(qs, valsz);
222 		qs += valsz;
223 
224 		/* Decode and catch encoding errors. */
225 
226 		if ( ! (http_decode(key) && http_decode(val)))
227 			goto next;
228 
229 		/* Handle key-value pairs. */
230 
231 		if ( ! strcmp(key, "query"))
232 			set_query_attr(&req->q.query, &val);
233 
234 		else if ( ! strcmp(key, "apropos"))
235 			req->q.equal = !strcmp(val, "0");
236 
237 		else if ( ! strcmp(key, "manpath")) {
238 #ifdef COMPAT_OLDURI
239 			if ( ! strncmp(val, "OpenBSD ", 8)) {
240 				val[7] = '-';
241 				if ('C' == val[8])
242 					val[8] = 'c';
243 			}
244 #endif
245 			set_query_attr(&req->q.manpath, &val);
246 		}
247 
248 		else if ( ! (strcmp(key, "sec")
249 #ifdef COMPAT_OLDURI
250 		    && strcmp(key, "sektion")
251 #endif
252 		    )) {
253 			if ( ! strcmp(val, "0"))
254 				*val = '\0';
255 			set_query_attr(&req->q.sec, &val);
256 		}
257 
258 		else if ( ! strcmp(key, "arch")) {
259 			if ( ! strcmp(val, "default"))
260 				*val = '\0';
261 			set_query_attr(&req->q.arch, &val);
262 		}
263 
264 		/*
265 		 * The key must be freed in any case.
266 		 * The val may have been handed over to the query
267 		 * structure, in which case it is now NULL.
268 		 */
269 next:
270 		free(key);
271 		key = NULL;
272 		free(val);
273 		val = NULL;
274 
275 		if (*qs != '\0')
276 			qs++;
277 	}
278 }
279 
280 /*
281  * HTTP-decode a string.  The standard explanation is that this turns
282  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
283  * over the allocated string.
284  */
285 static int
286 http_decode(char *p)
287 {
288 	char             hex[3];
289 	char		*q;
290 	int              c;
291 
292 	hex[2] = '\0';
293 
294 	q = p;
295 	for ( ; '\0' != *p; p++, q++) {
296 		if ('%' == *p) {
297 			if ('\0' == (hex[0] = *(p + 1)))
298 				return 0;
299 			if ('\0' == (hex[1] = *(p + 2)))
300 				return 0;
301 			if (1 != sscanf(hex, "%x", &c))
302 				return 0;
303 			if ('\0' == c)
304 				return 0;
305 
306 			*q = (char)c;
307 			p += 2;
308 		} else
309 			*q = '+' == *p ? ' ' : *p;
310 	}
311 
312 	*q = '\0';
313 	return 1;
314 }
315 
316 static void
317 http_encode(const char *p)
318 {
319 	for (; *p != '\0'; p++) {
320 		if (isalnum((unsigned char)*p) == 0 &&
321 		    strchr("-._~", *p) == NULL)
322 			printf("%%%02.2X", (unsigned char)*p);
323 		else
324 			putchar(*p);
325 	}
326 }
327 
328 static void
329 resp_begin_http(int code, const char *msg)
330 {
331 
332 	if (200 != code)
333 		printf("Status: %d %s\r\n", code, msg);
334 
335 	printf("Content-Type: text/html; charset=utf-8\r\n"
336 	     "Cache-Control: no-cache\r\n"
337 	     "Pragma: no-cache\r\n"
338 	     "\r\n");
339 
340 	fflush(stdout);
341 }
342 
343 static void
344 resp_copy(const char *filename)
345 {
346 	char	 buf[4096];
347 	ssize_t	 sz;
348 	int	 fd;
349 
350 	if ((fd = open(filename, O_RDONLY)) != -1) {
351 		fflush(stdout);
352 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
353 			write(STDOUT_FILENO, buf, sz);
354 		close(fd);
355 	}
356 }
357 
358 static void
359 resp_begin_html(int code, const char *msg, const char *file)
360 {
361 	char	*cp;
362 
363 	resp_begin_http(code, msg);
364 
365 	printf("<!DOCTYPE html>\n"
366 	       "<html>\n"
367 	       "<head>\n"
368 	       "  <meta charset=\"UTF-8\"/>\n"
369 	       "  <meta name=\"viewport\""
370 		      " content=\"width=device-width, initial-scale=1.0\">\n"
371 	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
372 	       " type=\"text/css\" media=\"all\">\n"
373 	       "  <title>",
374 	       CSS_DIR);
375 	if (file != NULL) {
376 		if ((cp = strrchr(file, '/')) != NULL)
377 			file = cp + 1;
378 		if ((cp = strrchr(file, '.')) != NULL) {
379 			printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
380 		} else
381 			printf("%s - ", file);
382 	}
383 	printf("%s</title>\n"
384 	       "</head>\n"
385 	       "<body>\n",
386 	       CUSTOMIZE_TITLE);
387 
388 	resp_copy(MAN_DIR "/header.html");
389 }
390 
391 static void
392 resp_end_html(void)
393 {
394 
395 	resp_copy(MAN_DIR "/footer.html");
396 
397 	puts("</body>\n"
398 	     "</html>");
399 }
400 
401 static void
402 resp_searchform(const struct req *req, enum focus focus)
403 {
404 	int		 i;
405 
406 	printf("<form action=\"/%s\" method=\"get\">\n"
407 	       "  <fieldset>\n"
408 	       "    <legend>Manual Page Search Parameters</legend>\n",
409 	       scriptname);
410 
411 	/* Write query input box. */
412 
413 	printf("    <input type=\"search\" name=\"query\" value=\"");
414 	if (req->q.query != NULL)
415 		html_print(req->q.query);
416 	printf( "\" size=\"40\"");
417 	if (focus == FOCUS_QUERY)
418 		printf(" autofocus");
419 	puts(">");
420 
421 	/* Write submission buttons. */
422 
423 	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
424 		"man</button>\n"
425 		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
426 		"apropos</button>\n"
427 		"    <br/>\n");
428 
429 	/* Write section selector. */
430 
431 	puts("    <select name=\"sec\">");
432 	for (i = 0; i < sec_MAX; i++) {
433 		printf("      <option value=\"%s\"", sec_numbers[i]);
434 		if (NULL != req->q.sec &&
435 		    0 == strcmp(sec_numbers[i], req->q.sec))
436 			printf(" selected=\"selected\"");
437 		printf(">%s</option>\n", sec_names[i]);
438 	}
439 	puts("    </select>");
440 
441 	/* Write architecture selector. */
442 
443 	printf(	"    <select name=\"arch\">\n"
444 		"      <option value=\"default\"");
445 	if (NULL == req->q.arch)
446 		printf(" selected=\"selected\"");
447 	puts(">All Architectures</option>");
448 	for (i = 0; i < arch_MAX; i++) {
449 		printf("      <option");
450 		if (NULL != req->q.arch &&
451 		    0 == strcmp(arch_names[i], req->q.arch))
452 			printf(" selected=\"selected\"");
453 		printf(">%s</option>\n", arch_names[i]);
454 	}
455 	puts("    </select>");
456 
457 	/* Write manpath selector. */
458 
459 	if (req->psz > 1) {
460 		puts("    <select name=\"manpath\">");
461 		for (i = 0; i < (int)req->psz; i++) {
462 			printf("      <option");
463 			if (strcmp(req->q.manpath, req->p[i]) == 0)
464 				printf(" selected=\"selected\"");
465 			printf(">");
466 			html_print(req->p[i]);
467 			puts("</option>");
468 		}
469 		puts("    </select>");
470 	}
471 
472 	puts("  </fieldset>\n"
473 	     "</form>");
474 }
475 
476 static int
477 validate_urifrag(const char *frag)
478 {
479 
480 	while ('\0' != *frag) {
481 		if ( ! (isalnum((unsigned char)*frag) ||
482 		    '-' == *frag || '.' == *frag ||
483 		    '/' == *frag || '_' == *frag))
484 			return 0;
485 		frag++;
486 	}
487 	return 1;
488 }
489 
490 static int
491 validate_manpath(const struct req *req, const char* manpath)
492 {
493 	size_t	 i;
494 
495 	for (i = 0; i < req->psz; i++)
496 		if ( ! strcmp(manpath, req->p[i]))
497 			return 1;
498 
499 	return 0;
500 }
501 
502 static int
503 validate_arch(const char *arch)
504 {
505 	int	 i;
506 
507 	for (i = 0; i < arch_MAX; i++)
508 		if (strcmp(arch, arch_names[i]) == 0)
509 			return 1;
510 
511 	return 0;
512 }
513 
514 static int
515 validate_filename(const char *file)
516 {
517 
518 	if ('.' == file[0] && '/' == file[1])
519 		file += 2;
520 
521 	return ! (strstr(file, "../") || strstr(file, "/..") ||
522 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
523 }
524 
525 static void
526 pg_index(const struct req *req)
527 {
528 
529 	resp_begin_html(200, NULL, NULL);
530 	resp_searchform(req, FOCUS_QUERY);
531 	printf("<p>\n"
532 	       "This web interface is documented in the\n"
533 	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
534 	       "manual, and the\n"
535 	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
536 	       "manual explains the query syntax.\n"
537 	       "</p>\n",
538 	       scriptname, *scriptname == '\0' ? "" : "/",
539 	       scriptname, *scriptname == '\0' ? "" : "/");
540 	resp_end_html();
541 }
542 
543 static void
544 pg_noresult(const struct req *req, const char *msg)
545 {
546 	resp_begin_html(200, NULL, NULL);
547 	resp_searchform(req, FOCUS_QUERY);
548 	puts("<p>");
549 	puts(msg);
550 	puts("</p>");
551 	resp_end_html();
552 }
553 
554 static void
555 pg_error_badrequest(const char *msg)
556 {
557 
558 	resp_begin_html(400, "Bad Request", NULL);
559 	puts("<h1>Bad Request</h1>\n"
560 	     "<p>\n");
561 	puts(msg);
562 	printf("Try again from the\n"
563 	       "<a href=\"/%s\">main page</a>.\n"
564 	       "</p>", scriptname);
565 	resp_end_html();
566 }
567 
568 static void
569 pg_error_internal(void)
570 {
571 	resp_begin_html(500, "Internal Server Error", NULL);
572 	puts("<p>Internal Server Error</p>");
573 	resp_end_html();
574 }
575 
576 static void
577 pg_redirect(const struct req *req, const char *name)
578 {
579 	printf("Status: 303 See Other\r\n"
580 	    "Location: /");
581 	if (*scriptname != '\0')
582 		printf("%s/", scriptname);
583 	if (strcmp(req->q.manpath, req->p[0]))
584 		printf("%s/", req->q.manpath);
585 	if (req->q.arch != NULL)
586 		printf("%s/", req->q.arch);
587 	http_encode(name);
588 	if (req->q.sec != NULL) {
589 		putchar('.');
590 		http_encode(req->q.sec);
591 	}
592 	printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
593 }
594 
595 static void
596 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
597 {
598 	char		*arch, *archend;
599 	const char	*sec;
600 	size_t		 i, iuse;
601 	int		 archprio, archpriouse;
602 	int		 prio, priouse;
603 
604 	for (i = 0; i < sz; i++) {
605 		if (validate_filename(r[i].file))
606 			continue;
607 		warnx("invalid filename %s in %s database",
608 		    r[i].file, req->q.manpath);
609 		pg_error_internal();
610 		return;
611 	}
612 
613 	if (req->isquery && sz == 1) {
614 		/*
615 		 * If we have just one result, then jump there now
616 		 * without any delay.
617 		 */
618 		printf("Status: 303 See Other\r\n"
619 		    "Location: /");
620 		if (*scriptname != '\0')
621 			printf("%s/", scriptname);
622 		if (strcmp(req->q.manpath, req->p[0]))
623 			printf("%s/", req->q.manpath);
624 		printf("%s\r\n"
625 		    "Content-Type: text/html; charset=utf-8\r\n\r\n",
626 		    r[0].file);
627 		return;
628 	}
629 
630 	/*
631 	 * In man(1) mode, show one of the pages
632 	 * even if more than one is found.
633 	 */
634 
635 	iuse = 0;
636 	if (req->q.equal || sz == 1) {
637 		priouse = 20;
638 		archpriouse = 3;
639 		for (i = 0; i < sz; i++) {
640 			sec = r[i].file;
641 			sec += strcspn(sec, "123456789");
642 			if (sec[0] == '\0')
643 				continue;
644 			prio = sec_prios[sec[0] - '1'];
645 			if (sec[1] != '/')
646 				prio += 10;
647 			if (req->q.arch == NULL) {
648 				archprio =
649 				    ((arch = strchr(sec + 1, '/'))
650 					== NULL) ? 3 :
651 				    ((archend = strchr(arch + 1, '/'))
652 					== NULL) ? 0 :
653 				    strncmp(arch, "amd64/",
654 					archend - arch) ? 2 : 1;
655 				if (archprio < archpriouse) {
656 					archpriouse = archprio;
657 					priouse = prio;
658 					iuse = i;
659 					continue;
660 				}
661 				if (archprio > archpriouse)
662 					continue;
663 			}
664 			if (prio >= priouse)
665 				continue;
666 			priouse = prio;
667 			iuse = i;
668 		}
669 		resp_begin_html(200, NULL, r[iuse].file);
670 	} else
671 		resp_begin_html(200, NULL, NULL);
672 
673 	resp_searchform(req,
674 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
675 
676 	if (sz > 1) {
677 		puts("<table class=\"results\">");
678 		for (i = 0; i < sz; i++) {
679 			printf("  <tr>\n"
680 			       "    <td>"
681 			       "<a class=\"Xr\" href=\"/");
682 			if (*scriptname != '\0')
683 				printf("%s/", scriptname);
684 			if (strcmp(req->q.manpath, req->p[0]))
685 				printf("%s/", req->q.manpath);
686 			printf("%s\">", r[i].file);
687 			html_print(r[i].names);
688 			printf("</a></td>\n"
689 			       "    <td><span class=\"Nd\">");
690 			html_print(r[i].output);
691 			puts("</span></td>\n"
692 			     "  </tr>");
693 		}
694 		puts("</table>");
695 	}
696 
697 	if (req->q.equal || sz == 1) {
698 		puts("<hr>");
699 		resp_show(req, r[iuse].file);
700 	}
701 
702 	resp_end_html();
703 }
704 
705 static void
706 resp_catman(const struct req *req, const char *file)
707 {
708 	FILE		*f;
709 	char		*p;
710 	size_t		 sz;
711 	ssize_t		 len;
712 	int		 i;
713 	int		 italic, bold;
714 
715 	if ((f = fopen(file, "r")) == NULL) {
716 		puts("<p>You specified an invalid manual file.</p>");
717 		return;
718 	}
719 
720 	puts("<div class=\"catman\">\n"
721 	     "<pre>");
722 
723 	p = NULL;
724 	sz = 0;
725 
726 	while ((len = getline(&p, &sz, f)) != -1) {
727 		bold = italic = 0;
728 		for (i = 0; i < len - 1; i++) {
729 			/*
730 			 * This means that the catpage is out of state.
731 			 * Ignore it and keep going (although the
732 			 * catpage is bogus).
733 			 */
734 
735 			if ('\b' == p[i] || '\n' == p[i])
736 				continue;
737 
738 			/*
739 			 * Print a regular character.
740 			 * Close out any bold/italic scopes.
741 			 * If we're in back-space mode, make sure we'll
742 			 * have something to enter when we backspace.
743 			 */
744 
745 			if ('\b' != p[i + 1]) {
746 				if (italic)
747 					printf("</i>");
748 				if (bold)
749 					printf("</b>");
750 				italic = bold = 0;
751 				html_putchar(p[i]);
752 				continue;
753 			} else if (i + 2 >= len)
754 				continue;
755 
756 			/* Italic mode. */
757 
758 			if ('_' == p[i]) {
759 				if (bold)
760 					printf("</b>");
761 				if ( ! italic)
762 					printf("<i>");
763 				bold = 0;
764 				italic = 1;
765 				i += 2;
766 				html_putchar(p[i]);
767 				continue;
768 			}
769 
770 			/*
771 			 * Handle funny behaviour troff-isms.
772 			 * These grok'd from the original man2html.c.
773 			 */
774 
775 			if (('+' == p[i] && 'o' == p[i + 2]) ||
776 					('o' == p[i] && '+' == p[i + 2]) ||
777 					('|' == p[i] && '=' == p[i + 2]) ||
778 					('=' == p[i] && '|' == p[i + 2]) ||
779 					('*' == p[i] && '=' == p[i + 2]) ||
780 					('=' == p[i] && '*' == p[i + 2]) ||
781 					('*' == p[i] && '|' == p[i + 2]) ||
782 					('|' == p[i] && '*' == p[i + 2]))  {
783 				if (italic)
784 					printf("</i>");
785 				if (bold)
786 					printf("</b>");
787 				italic = bold = 0;
788 				putchar('*');
789 				i += 2;
790 				continue;
791 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
792 					('-' == p[i] && '|' == p[i + 1]) ||
793 					('+' == p[i] && '-' == p[i + 1]) ||
794 					('-' == p[i] && '+' == p[i + 1]) ||
795 					('+' == p[i] && '|' == p[i + 1]) ||
796 					('|' == p[i] && '+' == p[i + 1]))  {
797 				if (italic)
798 					printf("</i>");
799 				if (bold)
800 					printf("</b>");
801 				italic = bold = 0;
802 				putchar('+');
803 				i += 2;
804 				continue;
805 			}
806 
807 			/* Bold mode. */
808 
809 			if (italic)
810 				printf("</i>");
811 			if ( ! bold)
812 				printf("<b>");
813 			bold = 1;
814 			italic = 0;
815 			i += 2;
816 			html_putchar(p[i]);
817 		}
818 
819 		/*
820 		 * Clean up the last character.
821 		 * We can get to a newline; don't print that.
822 		 */
823 
824 		if (italic)
825 			printf("</i>");
826 		if (bold)
827 			printf("</b>");
828 
829 		if (i == len - 1 && p[i] != '\n')
830 			html_putchar(p[i]);
831 
832 		putchar('\n');
833 	}
834 	free(p);
835 
836 	puts("</pre>\n"
837 	     "</div>");
838 
839 	fclose(f);
840 }
841 
842 static void
843 resp_format(const struct req *req, const char *file)
844 {
845 	struct manoutput conf;
846 	struct mparse	*mp;
847 	struct roff_man	*man;
848 	void		*vp;
849 	int		 fd;
850 	int		 usepath;
851 
852 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
853 		puts("<p>You specified an invalid manual file.</p>");
854 		return;
855 	}
856 
857 	mchars_alloc();
858 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
859 	    MANDOCERR_MAX, NULL, MANDOC_OS_OTHER, req->q.manpath);
860 	mparse_readfd(mp, fd, file);
861 	close(fd);
862 
863 	memset(&conf, 0, sizeof(conf));
864 	conf.fragment = 1;
865 	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
866 	conf.toc = 1;
867 	usepath = strcmp(req->q.manpath, req->p[0]);
868 	mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
869 	    scriptname, *scriptname == '\0' ? "" : "/",
870 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
871 
872 	mparse_result(mp, &man, NULL);
873 	if (man == NULL) {
874 		warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
875 		pg_error_internal();
876 		mparse_free(mp);
877 		mchars_free();
878 		return;
879 	}
880 
881 	vp = html_alloc(&conf);
882 
883 	if (man->macroset == MACROSET_MDOC) {
884 		mdoc_validate(man);
885 		html_mdoc(vp, man);
886 	} else {
887 		man_validate(man);
888 		html_man(vp, man);
889 	}
890 
891 	html_free(vp);
892 	mparse_free(mp);
893 	mchars_free();
894 	free(conf.man);
895 	free(conf.style);
896 }
897 
898 static void
899 resp_show(const struct req *req, const char *file)
900 {
901 
902 	if ('.' == file[0] && '/' == file[1])
903 		file += 2;
904 
905 	if ('c' == *file)
906 		resp_catman(req, file);
907 	else
908 		resp_format(req, file);
909 }
910 
911 static void
912 pg_show(struct req *req, const char *fullpath)
913 {
914 	char		*manpath;
915 	const char	*file;
916 
917 	if ((file = strchr(fullpath, '/')) == NULL) {
918 		pg_error_badrequest(
919 		    "You did not specify a page to show.");
920 		return;
921 	}
922 	manpath = mandoc_strndup(fullpath, file - fullpath);
923 	file++;
924 
925 	if ( ! validate_manpath(req, manpath)) {
926 		pg_error_badrequest(
927 		    "You specified an invalid manpath.");
928 		free(manpath);
929 		return;
930 	}
931 
932 	/*
933 	 * Begin by chdir()ing into the manpath.
934 	 * This way we can pick up the database files, which are
935 	 * relative to the manpath root.
936 	 */
937 
938 	if (chdir(manpath) == -1) {
939 		warn("chdir %s", manpath);
940 		pg_error_internal();
941 		free(manpath);
942 		return;
943 	}
944 	free(manpath);
945 
946 	if ( ! validate_filename(file)) {
947 		pg_error_badrequest(
948 		    "You specified an invalid manual file.");
949 		return;
950 	}
951 
952 	resp_begin_html(200, NULL, file);
953 	resp_searchform(req, FOCUS_NONE);
954 	resp_show(req, file);
955 	resp_end_html();
956 }
957 
958 static void
959 pg_search(const struct req *req)
960 {
961 	struct mansearch	  search;
962 	struct manpaths		  paths;
963 	struct manpage		 *res;
964 	char			**argv;
965 	char			 *query, *rp, *wp;
966 	size_t			  ressz;
967 	int			  argc;
968 
969 	/*
970 	 * Begin by chdir()ing into the root of the manpath.
971 	 * This way we can pick up the database files, which are
972 	 * relative to the manpath root.
973 	 */
974 
975 	if (chdir(req->q.manpath) == -1) {
976 		warn("chdir %s", req->q.manpath);
977 		pg_error_internal();
978 		return;
979 	}
980 
981 	search.arch = req->q.arch;
982 	search.sec = req->q.sec;
983 	search.outkey = "Nd";
984 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
985 	search.firstmatch = 1;
986 
987 	paths.sz = 1;
988 	paths.paths = mandoc_malloc(sizeof(char *));
989 	paths.paths[0] = mandoc_strdup(".");
990 
991 	/*
992 	 * Break apart at spaces with backslash-escaping.
993 	 */
994 
995 	argc = 0;
996 	argv = NULL;
997 	rp = query = mandoc_strdup(req->q.query);
998 	for (;;) {
999 		while (isspace((unsigned char)*rp))
1000 			rp++;
1001 		if (*rp == '\0')
1002 			break;
1003 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1004 		argv[argc++] = wp = rp;
1005 		for (;;) {
1006 			if (isspace((unsigned char)*rp)) {
1007 				*wp = '\0';
1008 				rp++;
1009 				break;
1010 			}
1011 			if (rp[0] == '\\' && rp[1] != '\0')
1012 				rp++;
1013 			if (wp != rp)
1014 				*wp = *rp;
1015 			if (*rp == '\0')
1016 				break;
1017 			wp++;
1018 			rp++;
1019 		}
1020 	}
1021 
1022 	res = NULL;
1023 	ressz = 0;
1024 	if (req->isquery && req->q.equal && argc == 1)
1025 		pg_redirect(req, argv[0]);
1026 	else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1027 		pg_noresult(req, "You entered an invalid query.");
1028 	else if (ressz == 0)
1029 		pg_noresult(req, "No results found.");
1030 	else
1031 		pg_searchres(req, res, ressz);
1032 
1033 	free(query);
1034 	mansearch_free(res, ressz);
1035 	free(paths.paths[0]);
1036 	free(paths.paths);
1037 }
1038 
1039 int
1040 main(void)
1041 {
1042 	struct req	 req;
1043 	struct itimerval itimer;
1044 	const char	*path;
1045 	const char	*querystring;
1046 	int		 i;
1047 
1048 	/*
1049 	 * The "rpath" pledge could be revoked after mparse_readfd()
1050 	 * if the file desciptor to "/footer.html" would be opened
1051 	 * up front, but it's probably not worth the complication
1052 	 * of the code it would cause: it would require scattering
1053 	 * pledge() calls in multiple low-level resp_*() functions.
1054 	 */
1055 
1056 	if (pledge("stdio rpath", NULL) == -1) {
1057 		warn("pledge");
1058 		pg_error_internal();
1059 		return EXIT_FAILURE;
1060 	}
1061 
1062 	/* Poor man's ReDoS mitigation. */
1063 
1064 	itimer.it_value.tv_sec = 2;
1065 	itimer.it_value.tv_usec = 0;
1066 	itimer.it_interval.tv_sec = 2;
1067 	itimer.it_interval.tv_usec = 0;
1068 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1069 		warn("setitimer");
1070 		pg_error_internal();
1071 		return EXIT_FAILURE;
1072 	}
1073 
1074 	/*
1075 	 * First we change directory into the MAN_DIR so that
1076 	 * subsequent scanning for manpath directories is rooted
1077 	 * relative to the same position.
1078 	 */
1079 
1080 	if (chdir(MAN_DIR) == -1) {
1081 		warn("MAN_DIR: %s", MAN_DIR);
1082 		pg_error_internal();
1083 		return EXIT_FAILURE;
1084 	}
1085 
1086 	memset(&req, 0, sizeof(struct req));
1087 	req.q.equal = 1;
1088 	parse_manpath_conf(&req);
1089 
1090 	/* Parse the path info and the query string. */
1091 
1092 	if ((path = getenv("PATH_INFO")) == NULL)
1093 		path = "";
1094 	else if (*path == '/')
1095 		path++;
1096 
1097 	if (*path != '\0') {
1098 		parse_path_info(&req, path);
1099 		if (req.q.manpath == NULL || req.q.sec == NULL ||
1100 		    *req.q.query == '\0' || access(path, F_OK) == -1)
1101 			path = "";
1102 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1103 		parse_query_string(&req, querystring);
1104 
1105 	/* Validate parsed data and add defaults. */
1106 
1107 	if (req.q.manpath == NULL)
1108 		req.q.manpath = mandoc_strdup(req.p[0]);
1109 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1110 		pg_error_badrequest(
1111 		    "You specified an invalid manpath.");
1112 		return EXIT_FAILURE;
1113 	}
1114 
1115 	if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1116 		pg_error_badrequest(
1117 		    "You specified an invalid architecture.");
1118 		return EXIT_FAILURE;
1119 	}
1120 
1121 	/* Dispatch to the three different pages. */
1122 
1123 	if ('\0' != *path)
1124 		pg_show(&req, path);
1125 	else if (NULL != req.q.query)
1126 		pg_search(&req);
1127 	else
1128 		pg_index(&req);
1129 
1130 	free(req.q.manpath);
1131 	free(req.q.arch);
1132 	free(req.q.sec);
1133 	free(req.q.query);
1134 	for (i = 0; i < (int)req.psz; i++)
1135 		free(req.p[i]);
1136 	free(req.p);
1137 	return EXIT_SUCCESS;
1138 }
1139 
1140 /*
1141  * Translate PATH_INFO to a query.
1142  */
1143 static void
1144 parse_path_info(struct req *req, const char *path)
1145 {
1146 	const char	*name, *sec, *end;
1147 
1148 	req->isquery = 0;
1149 	req->q.equal = 1;
1150 	req->q.manpath = NULL;
1151 	req->q.arch = NULL;
1152 
1153 	/* Mandatory manual page name. */
1154 	if ((name = strrchr(path, '/')) == NULL)
1155 		name = path;
1156 	else
1157 		name++;
1158 
1159 	/* Optional trailing section. */
1160 	sec = strrchr(name, '.');
1161 	if (sec != NULL && isdigit((unsigned char)*++sec)) {
1162 		req->q.query = mandoc_strndup(name, sec - name - 1);
1163 		req->q.sec = mandoc_strdup(sec);
1164 	} else {
1165 		req->q.query = mandoc_strdup(name);
1166 		req->q.sec = NULL;
1167 	}
1168 
1169 	/* Handle the case of name[.section] only. */
1170 	if (name == path)
1171 		return;
1172 
1173 	/* Optional manpath. */
1174 	end = strchr(path, '/');
1175 	req->q.manpath = mandoc_strndup(path, end - path);
1176 	if (validate_manpath(req, req->q.manpath)) {
1177 		path = end + 1;
1178 		if (name == path)
1179 			return;
1180 	} else {
1181 		free(req->q.manpath);
1182 		req->q.manpath = NULL;
1183 	}
1184 
1185 	/* Optional section. */
1186 	if (strncmp(path, "man", 3) == 0) {
1187 		path += 3;
1188 		end = strchr(path, '/');
1189 		free(req->q.sec);
1190 		req->q.sec = mandoc_strndup(path, end - path);
1191 		path = end + 1;
1192 		if (name == path)
1193 			return;
1194 	}
1195 
1196 	/* Optional architecture. */
1197 	end = strchr(path, '/');
1198 	if (end + 1 != name) {
1199 		pg_error_badrequest(
1200 		    "You specified too many directory components.");
1201 		exit(EXIT_FAILURE);
1202 	}
1203 	req->q.arch = mandoc_strndup(path, end - path);
1204 	if (validate_arch(req->q.arch) == 0) {
1205 		pg_error_badrequest(
1206 		    "You specified an invalid directory component.");
1207 		exit(EXIT_FAILURE);
1208 	}
1209 }
1210 
1211 /*
1212  * Scan for indexable paths.
1213  */
1214 static void
1215 parse_manpath_conf(struct req *req)
1216 {
1217 	FILE	*fp;
1218 	char	*dp;
1219 	size_t	 dpsz;
1220 	ssize_t	 len;
1221 
1222 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1223 		warn("%s/manpath.conf", MAN_DIR);
1224 		pg_error_internal();
1225 		exit(EXIT_FAILURE);
1226 	}
1227 
1228 	dp = NULL;
1229 	dpsz = 0;
1230 
1231 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1232 		if (dp[len - 1] == '\n')
1233 			dp[--len] = '\0';
1234 		req->p = mandoc_realloc(req->p,
1235 		    (req->psz + 1) * sizeof(char *));
1236 		if ( ! validate_urifrag(dp)) {
1237 			warnx("%s/manpath.conf contains "
1238 			    "unsafe path \"%s\"", MAN_DIR, dp);
1239 			pg_error_internal();
1240 			exit(EXIT_FAILURE);
1241 		}
1242 		if (strchr(dp, '/') != NULL) {
1243 			warnx("%s/manpath.conf contains "
1244 			    "path with slash \"%s\"", MAN_DIR, dp);
1245 			pg_error_internal();
1246 			exit(EXIT_FAILURE);
1247 		}
1248 		req->p[req->psz++] = dp;
1249 		dp = NULL;
1250 		dpsz = 0;
1251 	}
1252 	free(dp);
1253 
1254 	if (req->p == NULL) {
1255 		warnx("%s/manpath.conf is empty", MAN_DIR);
1256 		pg_error_internal();
1257 		exit(EXIT_FAILURE);
1258 	}
1259 }
1260