xref: /openbsd-src/usr.bin/mandoc/cgi.c (revision 46035553bfdd96e63c94e32da0210227ec2e3cf1)
1 /* $OpenBSD: cgi.c,v 1.111 2020/06/29 19:16:59 schwarze Exp $ */
2 /*
3  * Copyright (c) 2014-2019 Ingo Schwarze <schwarze@usta.de>
4  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * Implementation of the man.cgi(8) program.
19  */
20 #include <sys/types.h>
21 #include <sys/time.h>
22 
23 #include <ctype.h>
24 #include <err.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <limits.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33 
34 #include "mandoc_aux.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mdoc.h"
38 #include "man.h"
39 #include "mandoc_parse.h"
40 #include "main.h"
41 #include "manconf.h"
42 #include "mansearch.h"
43 #include "cgi.h"
44 
45 /*
46  * A query as passed to the search function.
47  */
48 struct	query {
49 	char		*manpath; /* desired manual directory */
50 	char		*arch; /* architecture */
51 	char		*sec; /* manual section */
52 	char		*query; /* unparsed query expression */
53 	int		 equal; /* match whole names, not substrings */
54 };
55 
56 struct	req {
57 	struct query	  q;
58 	char		**p; /* array of available manpaths */
59 	size_t		  psz; /* number of available manpaths */
60 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
61 };
62 
63 enum	focus {
64 	FOCUS_NONE = 0,
65 	FOCUS_QUERY
66 };
67 
68 static	void		 html_print(const char *);
69 static	void		 html_putchar(char);
70 static	int		 http_decode(char *);
71 static	void		 http_encode(const char *);
72 static	void		 parse_manpath_conf(struct req *);
73 static	void		 parse_path_info(struct req *, const char *);
74 static	void		 parse_query_string(struct req *, const char *);
75 static	void		 pg_error_badrequest(const char *);
76 static	void		 pg_error_internal(void);
77 static	void		 pg_index(const struct req *);
78 static	void		 pg_noresult(const struct req *, int, const char *,
79 				const char *);
80 static	void		 pg_redirect(const struct req *, const char *);
81 static	void		 pg_search(const struct req *);
82 static	void		 pg_searchres(const struct req *,
83 				struct manpage *, size_t);
84 static	void		 pg_show(struct req *, const char *);
85 static	void		 resp_begin_html(int, const char *, const char *);
86 static	void		 resp_begin_http(int, const char *);
87 static	void		 resp_catman(const struct req *, const char *);
88 static	void		 resp_copy(const char *);
89 static	void		 resp_end_html(void);
90 static	void		 resp_format(const struct req *, const char *);
91 static	void		 resp_searchform(const struct req *, enum focus);
92 static	void		 resp_show(const struct req *, const char *);
93 static	void		 set_query_attr(char **, char **);
94 static	int		 validate_arch(const char *);
95 static	int		 validate_filename(const char *);
96 static	int		 validate_manpath(const struct req *, const char *);
97 static	int		 validate_urifrag(const char *);
98 
99 static	const char	 *scriptname = SCRIPT_NAME;
100 
101 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
102 static	const char *const sec_numbers[] = {
103     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
104 };
105 static	const char *const sec_names[] = {
106     "All Sections",
107     "1 - General Commands",
108     "2 - System Calls",
109     "3 - Library Functions",
110     "3p - Perl Library",
111     "4 - Device Drivers",
112     "5 - File Formats",
113     "6 - Games",
114     "7 - Miscellaneous Information",
115     "8 - System Manager\'s Manual",
116     "9 - Kernel Developer\'s Manual"
117 };
118 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
119 
120 static	const char *const arch_names[] = {
121     "amd64",       "alpha",       "armv7",       "arm64",
122     "hppa",        "i386",        "landisk",     "loongson",
123     "luna88k",     "macppc",      "mips64",      "octeon",
124     "powerpc64",   "sgi",         "socppc",      "sparc64",
125 
126     "amiga",       "arc",         "armish",      "arm32",
127     "atari",       "aviion",      "beagle",      "cats",
128     "hppa64",      "hp300",
129     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
130     "mvmeppc",     "palm",        "pc532",       "pegasos",
131     "pmax",        "powerpc",     "solbourne",   "sparc",
132     "sun3",        "vax",         "wgrisc",      "x68k",
133     "zaurus"
134 };
135 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
136 
137 /*
138  * Print a character, escaping HTML along the way.
139  * This will pass non-ASCII straight to output: be warned!
140  */
141 static void
142 html_putchar(char c)
143 {
144 
145 	switch (c) {
146 	case '"':
147 		printf("&quot;");
148 		break;
149 	case '&':
150 		printf("&amp;");
151 		break;
152 	case '>':
153 		printf("&gt;");
154 		break;
155 	case '<':
156 		printf("&lt;");
157 		break;
158 	default:
159 		putchar((unsigned char)c);
160 		break;
161 	}
162 }
163 
164 /*
165  * Call through to html_putchar().
166  * Accepts NULL strings.
167  */
168 static void
169 html_print(const char *p)
170 {
171 
172 	if (NULL == p)
173 		return;
174 	while ('\0' != *p)
175 		html_putchar(*p++);
176 }
177 
178 /*
179  * Transfer the responsibility for the allocated string *val
180  * to the query structure.
181  */
182 static void
183 set_query_attr(char **attr, char **val)
184 {
185 
186 	free(*attr);
187 	if (**val == '\0') {
188 		*attr = NULL;
189 		free(*val);
190 	} else
191 		*attr = *val;
192 	*val = NULL;
193 }
194 
195 /*
196  * Parse the QUERY_STRING for key-value pairs
197  * and store the values into the query structure.
198  */
199 static void
200 parse_query_string(struct req *req, const char *qs)
201 {
202 	char		*key, *val;
203 	size_t		 keysz, valsz;
204 
205 	req->isquery	= 1;
206 	req->q.manpath	= NULL;
207 	req->q.arch	= NULL;
208 	req->q.sec	= NULL;
209 	req->q.query	= NULL;
210 	req->q.equal	= 1;
211 
212 	key = val = NULL;
213 	while (*qs != '\0') {
214 
215 		/* Parse one key. */
216 
217 		keysz = strcspn(qs, "=;&");
218 		key = mandoc_strndup(qs, keysz);
219 		qs += keysz;
220 		if (*qs != '=')
221 			goto next;
222 
223 		/* Parse one value. */
224 
225 		valsz = strcspn(++qs, ";&");
226 		val = mandoc_strndup(qs, valsz);
227 		qs += valsz;
228 
229 		/* Decode and catch encoding errors. */
230 
231 		if ( ! (http_decode(key) && http_decode(val)))
232 			goto next;
233 
234 		/* Handle key-value pairs. */
235 
236 		if ( ! strcmp(key, "query"))
237 			set_query_attr(&req->q.query, &val);
238 
239 		else if ( ! strcmp(key, "apropos"))
240 			req->q.equal = !strcmp(val, "0");
241 
242 		else if ( ! strcmp(key, "manpath")) {
243 #ifdef COMPAT_OLDURI
244 			if ( ! strncmp(val, "OpenBSD ", 8)) {
245 				val[7] = '-';
246 				if ('C' == val[8])
247 					val[8] = 'c';
248 			}
249 #endif
250 			set_query_attr(&req->q.manpath, &val);
251 		}
252 
253 		else if ( ! (strcmp(key, "sec")
254 #ifdef COMPAT_OLDURI
255 		    && strcmp(key, "sektion")
256 #endif
257 		    )) {
258 			if ( ! strcmp(val, "0"))
259 				*val = '\0';
260 			set_query_attr(&req->q.sec, &val);
261 		}
262 
263 		else if ( ! strcmp(key, "arch")) {
264 			if ( ! strcmp(val, "default"))
265 				*val = '\0';
266 			set_query_attr(&req->q.arch, &val);
267 		}
268 
269 		/*
270 		 * The key must be freed in any case.
271 		 * The val may have been handed over to the query
272 		 * structure, in which case it is now NULL.
273 		 */
274 next:
275 		free(key);
276 		key = NULL;
277 		free(val);
278 		val = NULL;
279 
280 		if (*qs != '\0')
281 			qs++;
282 	}
283 }
284 
285 /*
286  * HTTP-decode a string.  The standard explanation is that this turns
287  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
288  * over the allocated string.
289  */
290 static int
291 http_decode(char *p)
292 {
293 	char             hex[3];
294 	char		*q;
295 	int              c;
296 
297 	hex[2] = '\0';
298 
299 	q = p;
300 	for ( ; '\0' != *p; p++, q++) {
301 		if ('%' == *p) {
302 			if ('\0' == (hex[0] = *(p + 1)))
303 				return 0;
304 			if ('\0' == (hex[1] = *(p + 2)))
305 				return 0;
306 			if (1 != sscanf(hex, "%x", &c))
307 				return 0;
308 			if ('\0' == c)
309 				return 0;
310 
311 			*q = (char)c;
312 			p += 2;
313 		} else
314 			*q = '+' == *p ? ' ' : *p;
315 	}
316 
317 	*q = '\0';
318 	return 1;
319 }
320 
321 static void
322 http_encode(const char *p)
323 {
324 	for (; *p != '\0'; p++) {
325 		if (isalnum((unsigned char)*p) == 0 &&
326 		    strchr("-._~", *p) == NULL)
327 			printf("%%%2.2X", (unsigned char)*p);
328 		else
329 			putchar(*p);
330 	}
331 }
332 
333 static void
334 resp_begin_http(int code, const char *msg)
335 {
336 
337 	if (200 != code)
338 		printf("Status: %d %s\r\n", code, msg);
339 
340 	printf("Content-Type: text/html; charset=utf-8\r\n"
341 	     "Cache-Control: no-cache\r\n"
342 	     "Content-Security-Policy: default-src 'none'; "
343 	     "style-src 'self' 'unsafe-inline'\r\n"
344 	     "Pragma: no-cache\r\n"
345 	     "\r\n");
346 
347 	fflush(stdout);
348 }
349 
350 static void
351 resp_copy(const char *filename)
352 {
353 	char	 buf[4096];
354 	ssize_t	 sz;
355 	int	 fd;
356 
357 	if ((fd = open(filename, O_RDONLY)) != -1) {
358 		fflush(stdout);
359 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
360 			write(STDOUT_FILENO, buf, sz);
361 		close(fd);
362 	}
363 }
364 
365 static void
366 resp_begin_html(int code, const char *msg, const char *file)
367 {
368 	char	*cp;
369 
370 	resp_begin_http(code, msg);
371 
372 	printf("<!DOCTYPE html>\n"
373 	       "<html>\n"
374 	       "<head>\n"
375 	       "  <meta charset=\"UTF-8\"/>\n"
376 	       "  <meta name=\"viewport\""
377 		      " content=\"width=device-width, initial-scale=1.0\">\n"
378 	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
379 	       " type=\"text/css\" media=\"all\">\n"
380 	       "  <title>",
381 	       CSS_DIR);
382 	if (file != NULL) {
383 		if ((cp = strrchr(file, '/')) != NULL)
384 			file = cp + 1;
385 		if ((cp = strrchr(file, '.')) != NULL) {
386 			printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
387 		} else
388 			printf("%s - ", file);
389 	}
390 	printf("%s</title>\n"
391 	       "</head>\n"
392 	       "<body>\n",
393 	       CUSTOMIZE_TITLE);
394 
395 	resp_copy(MAN_DIR "/header.html");
396 }
397 
398 static void
399 resp_end_html(void)
400 {
401 
402 	resp_copy(MAN_DIR "/footer.html");
403 
404 	puts("</body>\n"
405 	     "</html>");
406 }
407 
408 static void
409 resp_searchform(const struct req *req, enum focus focus)
410 {
411 	int		 i;
412 
413 	printf("<form action=\"/%s\" method=\"get\" "
414 	       "autocomplete=\"off\" autocapitalize=\"none\">\n"
415 	       "  <fieldset>\n"
416 	       "    <legend>Manual Page Search Parameters</legend>\n",
417 	       scriptname);
418 
419 	/* Write query input box. */
420 
421 	printf("    <input type=\"search\" name=\"query\" value=\"");
422 	if (req->q.query != NULL)
423 		html_print(req->q.query);
424 	printf( "\" size=\"40\"");
425 	if (focus == FOCUS_QUERY)
426 		printf(" autofocus");
427 	puts(">");
428 
429 	/* Write submission buttons. */
430 
431 	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
432 		"man</button>\n"
433 		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
434 		"apropos</button>\n"
435 		"    <br/>\n");
436 
437 	/* Write section selector. */
438 
439 	puts("    <select name=\"sec\">");
440 	for (i = 0; i < sec_MAX; i++) {
441 		printf("      <option value=\"%s\"", sec_numbers[i]);
442 		if (NULL != req->q.sec &&
443 		    0 == strcmp(sec_numbers[i], req->q.sec))
444 			printf(" selected=\"selected\"");
445 		printf(">%s</option>\n", sec_names[i]);
446 	}
447 	puts("    </select>");
448 
449 	/* Write architecture selector. */
450 
451 	printf(	"    <select name=\"arch\">\n"
452 		"      <option value=\"default\"");
453 	if (NULL == req->q.arch)
454 		printf(" selected=\"selected\"");
455 	puts(">All Architectures</option>");
456 	for (i = 0; i < arch_MAX; i++) {
457 		printf("      <option");
458 		if (NULL != req->q.arch &&
459 		    0 == strcmp(arch_names[i], req->q.arch))
460 			printf(" selected=\"selected\"");
461 		printf(">%s</option>\n", arch_names[i]);
462 	}
463 	puts("    </select>");
464 
465 	/* Write manpath selector. */
466 
467 	if (req->psz > 1) {
468 		puts("    <select name=\"manpath\">");
469 		for (i = 0; i < (int)req->psz; i++) {
470 			printf("      <option");
471 			if (strcmp(req->q.manpath, req->p[i]) == 0)
472 				printf(" selected=\"selected\"");
473 			printf(">");
474 			html_print(req->p[i]);
475 			puts("</option>");
476 		}
477 		puts("    </select>");
478 	}
479 
480 	puts("  </fieldset>\n"
481 	     "</form>");
482 }
483 
484 static int
485 validate_urifrag(const char *frag)
486 {
487 
488 	while ('\0' != *frag) {
489 		if ( ! (isalnum((unsigned char)*frag) ||
490 		    '-' == *frag || '.' == *frag ||
491 		    '/' == *frag || '_' == *frag))
492 			return 0;
493 		frag++;
494 	}
495 	return 1;
496 }
497 
498 static int
499 validate_manpath(const struct req *req, const char* manpath)
500 {
501 	size_t	 i;
502 
503 	for (i = 0; i < req->psz; i++)
504 		if ( ! strcmp(manpath, req->p[i]))
505 			return 1;
506 
507 	return 0;
508 }
509 
510 static int
511 validate_arch(const char *arch)
512 {
513 	int	 i;
514 
515 	for (i = 0; i < arch_MAX; i++)
516 		if (strcmp(arch, arch_names[i]) == 0)
517 			return 1;
518 
519 	return 0;
520 }
521 
522 static int
523 validate_filename(const char *file)
524 {
525 
526 	if ('.' == file[0] && '/' == file[1])
527 		file += 2;
528 
529 	return ! (strstr(file, "../") || strstr(file, "/..") ||
530 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
531 }
532 
533 static void
534 pg_index(const struct req *req)
535 {
536 
537 	resp_begin_html(200, NULL, NULL);
538 	resp_searchform(req, FOCUS_QUERY);
539 	printf("<p>\n"
540 	       "This web interface is documented in the\n"
541 	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
542 	       "manual, and the\n"
543 	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
544 	       "manual explains the query syntax.\n"
545 	       "</p>\n",
546 	       scriptname, *scriptname == '\0' ? "" : "/",
547 	       scriptname, *scriptname == '\0' ? "" : "/");
548 	resp_end_html();
549 }
550 
551 static void
552 pg_noresult(const struct req *req, int code, const char *http_msg,
553     const char *user_msg)
554 {
555 	resp_begin_html(code, http_msg, NULL);
556 	resp_searchform(req, FOCUS_QUERY);
557 	puts("<p>");
558 	puts(user_msg);
559 	puts("</p>");
560 	resp_end_html();
561 }
562 
563 static void
564 pg_error_badrequest(const char *msg)
565 {
566 
567 	resp_begin_html(400, "Bad Request", NULL);
568 	puts("<h1>Bad Request</h1>\n"
569 	     "<p>\n");
570 	puts(msg);
571 	printf("Try again from the\n"
572 	       "<a href=\"/%s\">main page</a>.\n"
573 	       "</p>", scriptname);
574 	resp_end_html();
575 }
576 
577 static void
578 pg_error_internal(void)
579 {
580 	resp_begin_html(500, "Internal Server Error", NULL);
581 	puts("<p>Internal Server Error</p>");
582 	resp_end_html();
583 }
584 
585 static void
586 pg_redirect(const struct req *req, const char *name)
587 {
588 	printf("Status: 303 See Other\r\n"
589 	    "Location: /");
590 	if (*scriptname != '\0')
591 		printf("%s/", scriptname);
592 	if (strcmp(req->q.manpath, req->p[0]))
593 		printf("%s/", req->q.manpath);
594 	if (req->q.arch != NULL)
595 		printf("%s/", req->q.arch);
596 	http_encode(name);
597 	if (req->q.sec != NULL) {
598 		putchar('.');
599 		http_encode(req->q.sec);
600 	}
601 	printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
602 }
603 
604 static void
605 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
606 {
607 	char		*arch, *archend;
608 	const char	*sec;
609 	size_t		 i, iuse;
610 	int		 archprio, archpriouse;
611 	int		 prio, priouse;
612 
613 	for (i = 0; i < sz; i++) {
614 		if (validate_filename(r[i].file))
615 			continue;
616 		warnx("invalid filename %s in %s database",
617 		    r[i].file, req->q.manpath);
618 		pg_error_internal();
619 		return;
620 	}
621 
622 	if (req->isquery && sz == 1) {
623 		/*
624 		 * If we have just one result, then jump there now
625 		 * without any delay.
626 		 */
627 		printf("Status: 303 See Other\r\n"
628 		    "Location: /");
629 		if (*scriptname != '\0')
630 			printf("%s/", scriptname);
631 		if (strcmp(req->q.manpath, req->p[0]))
632 			printf("%s/", req->q.manpath);
633 		printf("%s\r\n"
634 		    "Content-Type: text/html; charset=utf-8\r\n\r\n",
635 		    r[0].file);
636 		return;
637 	}
638 
639 	/*
640 	 * In man(1) mode, show one of the pages
641 	 * even if more than one is found.
642 	 */
643 
644 	iuse = 0;
645 	if (req->q.equal || sz == 1) {
646 		priouse = 20;
647 		archpriouse = 3;
648 		for (i = 0; i < sz; i++) {
649 			sec = r[i].file;
650 			sec += strcspn(sec, "123456789");
651 			if (sec[0] == '\0')
652 				continue;
653 			prio = sec_prios[sec[0] - '1'];
654 			if (sec[1] != '/')
655 				prio += 10;
656 			if (req->q.arch == NULL) {
657 				archprio =
658 				    ((arch = strchr(sec + 1, '/'))
659 					== NULL) ? 3 :
660 				    ((archend = strchr(arch + 1, '/'))
661 					== NULL) ? 0 :
662 				    strncmp(arch, "amd64/",
663 					archend - arch) ? 2 : 1;
664 				if (archprio < archpriouse) {
665 					archpriouse = archprio;
666 					priouse = prio;
667 					iuse = i;
668 					continue;
669 				}
670 				if (archprio > archpriouse)
671 					continue;
672 			}
673 			if (prio >= priouse)
674 				continue;
675 			priouse = prio;
676 			iuse = i;
677 		}
678 		resp_begin_html(200, NULL, r[iuse].file);
679 	} else
680 		resp_begin_html(200, NULL, NULL);
681 
682 	resp_searchform(req,
683 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
684 
685 	if (sz > 1) {
686 		puts("<table class=\"results\">");
687 		for (i = 0; i < sz; i++) {
688 			printf("  <tr>\n"
689 			       "    <td>"
690 			       "<a class=\"Xr\" href=\"/");
691 			if (*scriptname != '\0')
692 				printf("%s/", scriptname);
693 			if (strcmp(req->q.manpath, req->p[0]))
694 				printf("%s/", req->q.manpath);
695 			printf("%s\">", r[i].file);
696 			html_print(r[i].names);
697 			printf("</a></td>\n"
698 			       "    <td><span class=\"Nd\">");
699 			html_print(r[i].output);
700 			puts("</span></td>\n"
701 			     "  </tr>");
702 		}
703 		puts("</table>");
704 	}
705 
706 	if (req->q.equal || sz == 1) {
707 		puts("<hr>");
708 		resp_show(req, r[iuse].file);
709 	}
710 
711 	resp_end_html();
712 }
713 
714 static void
715 resp_catman(const struct req *req, const char *file)
716 {
717 	FILE		*f;
718 	char		*p;
719 	size_t		 sz;
720 	ssize_t		 len;
721 	int		 i;
722 	int		 italic, bold;
723 
724 	if ((f = fopen(file, "r")) == NULL) {
725 		puts("<p>You specified an invalid manual file.</p>");
726 		return;
727 	}
728 
729 	puts("<div class=\"catman\">\n"
730 	     "<pre>");
731 
732 	p = NULL;
733 	sz = 0;
734 
735 	while ((len = getline(&p, &sz, f)) != -1) {
736 		bold = italic = 0;
737 		for (i = 0; i < len - 1; i++) {
738 			/*
739 			 * This means that the catpage is out of state.
740 			 * Ignore it and keep going (although the
741 			 * catpage is bogus).
742 			 */
743 
744 			if ('\b' == p[i] || '\n' == p[i])
745 				continue;
746 
747 			/*
748 			 * Print a regular character.
749 			 * Close out any bold/italic scopes.
750 			 * If we're in back-space mode, make sure we'll
751 			 * have something to enter when we backspace.
752 			 */
753 
754 			if ('\b' != p[i + 1]) {
755 				if (italic)
756 					printf("</i>");
757 				if (bold)
758 					printf("</b>");
759 				italic = bold = 0;
760 				html_putchar(p[i]);
761 				continue;
762 			} else if (i + 2 >= len)
763 				continue;
764 
765 			/* Italic mode. */
766 
767 			if ('_' == p[i]) {
768 				if (bold)
769 					printf("</b>");
770 				if ( ! italic)
771 					printf("<i>");
772 				bold = 0;
773 				italic = 1;
774 				i += 2;
775 				html_putchar(p[i]);
776 				continue;
777 			}
778 
779 			/*
780 			 * Handle funny behaviour troff-isms.
781 			 * These grok'd from the original man2html.c.
782 			 */
783 
784 			if (('+' == p[i] && 'o' == p[i + 2]) ||
785 					('o' == p[i] && '+' == p[i + 2]) ||
786 					('|' == p[i] && '=' == p[i + 2]) ||
787 					('=' == p[i] && '|' == p[i + 2]) ||
788 					('*' == p[i] && '=' == p[i + 2]) ||
789 					('=' == p[i] && '*' == p[i + 2]) ||
790 					('*' == p[i] && '|' == p[i + 2]) ||
791 					('|' == p[i] && '*' == p[i + 2]))  {
792 				if (italic)
793 					printf("</i>");
794 				if (bold)
795 					printf("</b>");
796 				italic = bold = 0;
797 				putchar('*');
798 				i += 2;
799 				continue;
800 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
801 					('-' == p[i] && '|' == p[i + 1]) ||
802 					('+' == p[i] && '-' == p[i + 1]) ||
803 					('-' == p[i] && '+' == p[i + 1]) ||
804 					('+' == p[i] && '|' == p[i + 1]) ||
805 					('|' == p[i] && '+' == p[i + 1]))  {
806 				if (italic)
807 					printf("</i>");
808 				if (bold)
809 					printf("</b>");
810 				italic = bold = 0;
811 				putchar('+');
812 				i += 2;
813 				continue;
814 			}
815 
816 			/* Bold mode. */
817 
818 			if (italic)
819 				printf("</i>");
820 			if ( ! bold)
821 				printf("<b>");
822 			bold = 1;
823 			italic = 0;
824 			i += 2;
825 			html_putchar(p[i]);
826 		}
827 
828 		/*
829 		 * Clean up the last character.
830 		 * We can get to a newline; don't print that.
831 		 */
832 
833 		if (italic)
834 			printf("</i>");
835 		if (bold)
836 			printf("</b>");
837 
838 		if (i == len - 1 && p[i] != '\n')
839 			html_putchar(p[i]);
840 
841 		putchar('\n');
842 	}
843 	free(p);
844 
845 	puts("</pre>\n"
846 	     "</div>");
847 
848 	fclose(f);
849 }
850 
851 static void
852 resp_format(const struct req *req, const char *file)
853 {
854 	struct manoutput conf;
855 	struct mparse	*mp;
856 	struct roff_meta *meta;
857 	void		*vp;
858 	int		 fd;
859 	int		 usepath;
860 
861 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
862 		puts("<p>You specified an invalid manual file.</p>");
863 		return;
864 	}
865 
866 	mchars_alloc();
867 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
868 	    MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
869 	mparse_readfd(mp, fd, file);
870 	close(fd);
871 	meta = mparse_result(mp);
872 
873 	memset(&conf, 0, sizeof(conf));
874 	conf.fragment = 1;
875 	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
876 	usepath = strcmp(req->q.manpath, req->p[0]);
877 	mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
878 	    scriptname, *scriptname == '\0' ? "" : "/",
879 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
880 
881 	vp = html_alloc(&conf);
882 	if (meta->macroset == MACROSET_MDOC)
883 		html_mdoc(vp, meta);
884 	else
885 		html_man(vp, meta);
886 
887 	html_free(vp);
888 	mparse_free(mp);
889 	mchars_free();
890 	free(conf.man);
891 	free(conf.style);
892 }
893 
894 static void
895 resp_show(const struct req *req, const char *file)
896 {
897 
898 	if ('.' == file[0] && '/' == file[1])
899 		file += 2;
900 
901 	if ('c' == *file)
902 		resp_catman(req, file);
903 	else
904 		resp_format(req, file);
905 }
906 
907 static void
908 pg_show(struct req *req, const char *fullpath)
909 {
910 	char		*manpath;
911 	const char	*file;
912 
913 	if ((file = strchr(fullpath, '/')) == NULL) {
914 		pg_error_badrequest(
915 		    "You did not specify a page to show.");
916 		return;
917 	}
918 	manpath = mandoc_strndup(fullpath, file - fullpath);
919 	file++;
920 
921 	if ( ! validate_manpath(req, manpath)) {
922 		pg_error_badrequest(
923 		    "You specified an invalid manpath.");
924 		free(manpath);
925 		return;
926 	}
927 
928 	/*
929 	 * Begin by chdir()ing into the manpath.
930 	 * This way we can pick up the database files, which are
931 	 * relative to the manpath root.
932 	 */
933 
934 	if (chdir(manpath) == -1) {
935 		warn("chdir %s", manpath);
936 		pg_error_internal();
937 		free(manpath);
938 		return;
939 	}
940 	free(manpath);
941 
942 	if ( ! validate_filename(file)) {
943 		pg_error_badrequest(
944 		    "You specified an invalid manual file.");
945 		return;
946 	}
947 
948 	resp_begin_html(200, NULL, file);
949 	resp_searchform(req, FOCUS_NONE);
950 	resp_show(req, file);
951 	resp_end_html();
952 }
953 
954 static void
955 pg_search(const struct req *req)
956 {
957 	struct mansearch	  search;
958 	struct manpaths		  paths;
959 	struct manpage		 *res;
960 	char			**argv;
961 	char			 *query, *rp, *wp;
962 	size_t			  ressz;
963 	int			  argc;
964 
965 	/*
966 	 * Begin by chdir()ing into the root of the manpath.
967 	 * This way we can pick up the database files, which are
968 	 * relative to the manpath root.
969 	 */
970 
971 	if (chdir(req->q.manpath) == -1) {
972 		warn("chdir %s", req->q.manpath);
973 		pg_error_internal();
974 		return;
975 	}
976 
977 	search.arch = req->q.arch;
978 	search.sec = req->q.sec;
979 	search.outkey = "Nd";
980 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
981 	search.firstmatch = 1;
982 
983 	paths.sz = 1;
984 	paths.paths = mandoc_malloc(sizeof(char *));
985 	paths.paths[0] = mandoc_strdup(".");
986 
987 	/*
988 	 * Break apart at spaces with backslash-escaping.
989 	 */
990 
991 	argc = 0;
992 	argv = NULL;
993 	rp = query = mandoc_strdup(req->q.query);
994 	for (;;) {
995 		while (isspace((unsigned char)*rp))
996 			rp++;
997 		if (*rp == '\0')
998 			break;
999 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1000 		argv[argc++] = wp = rp;
1001 		for (;;) {
1002 			if (isspace((unsigned char)*rp)) {
1003 				*wp = '\0';
1004 				rp++;
1005 				break;
1006 			}
1007 			if (rp[0] == '\\' && rp[1] != '\0')
1008 				rp++;
1009 			if (wp != rp)
1010 				*wp = *rp;
1011 			if (*rp == '\0')
1012 				break;
1013 			wp++;
1014 			rp++;
1015 		}
1016 	}
1017 
1018 	res = NULL;
1019 	ressz = 0;
1020 	if (req->isquery && req->q.equal && argc == 1)
1021 		pg_redirect(req, argv[0]);
1022 	else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1023 		pg_noresult(req, 400, "Bad Request",
1024 		    "You entered an invalid query.");
1025 	else if (ressz == 0)
1026 		pg_noresult(req, 404, "Not Found", "No results found.");
1027 	else
1028 		pg_searchres(req, res, ressz);
1029 
1030 	free(query);
1031 	mansearch_free(res, ressz);
1032 	free(paths.paths[0]);
1033 	free(paths.paths);
1034 }
1035 
1036 int
1037 main(void)
1038 {
1039 	struct req	 req;
1040 	struct itimerval itimer;
1041 	const char	*path;
1042 	const char	*querystring;
1043 	int		 i;
1044 
1045 	/*
1046 	 * The "rpath" pledge could be revoked after mparse_readfd()
1047 	 * if the file desciptor to "/footer.html" would be opened
1048 	 * up front, but it's probably not worth the complication
1049 	 * of the code it would cause: it would require scattering
1050 	 * pledge() calls in multiple low-level resp_*() functions.
1051 	 */
1052 
1053 	if (pledge("stdio rpath", NULL) == -1) {
1054 		warn("pledge");
1055 		pg_error_internal();
1056 		return EXIT_FAILURE;
1057 	}
1058 
1059 	/* Poor man's ReDoS mitigation. */
1060 
1061 	itimer.it_value.tv_sec = 2;
1062 	itimer.it_value.tv_usec = 0;
1063 	itimer.it_interval.tv_sec = 2;
1064 	itimer.it_interval.tv_usec = 0;
1065 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1066 		warn("setitimer");
1067 		pg_error_internal();
1068 		return EXIT_FAILURE;
1069 	}
1070 
1071 	/*
1072 	 * First we change directory into the MAN_DIR so that
1073 	 * subsequent scanning for manpath directories is rooted
1074 	 * relative to the same position.
1075 	 */
1076 
1077 	if (chdir(MAN_DIR) == -1) {
1078 		warn("MAN_DIR: %s", MAN_DIR);
1079 		pg_error_internal();
1080 		return EXIT_FAILURE;
1081 	}
1082 
1083 	memset(&req, 0, sizeof(struct req));
1084 	req.q.equal = 1;
1085 	parse_manpath_conf(&req);
1086 
1087 	/* Parse the path info and the query string. */
1088 
1089 	if ((path = getenv("PATH_INFO")) == NULL)
1090 		path = "";
1091 	else if (*path == '/')
1092 		path++;
1093 
1094 	if (*path != '\0') {
1095 		parse_path_info(&req, path);
1096 		if (req.q.manpath == NULL || req.q.sec == NULL ||
1097 		    *req.q.query == '\0' || access(path, F_OK) == -1)
1098 			path = "";
1099 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1100 		parse_query_string(&req, querystring);
1101 
1102 	/* Validate parsed data and add defaults. */
1103 
1104 	if (req.q.manpath == NULL)
1105 		req.q.manpath = mandoc_strdup(req.p[0]);
1106 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1107 		pg_error_badrequest(
1108 		    "You specified an invalid manpath.");
1109 		return EXIT_FAILURE;
1110 	}
1111 
1112 	if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1113 		pg_error_badrequest(
1114 		    "You specified an invalid architecture.");
1115 		return EXIT_FAILURE;
1116 	}
1117 
1118 	/* Dispatch to the three different pages. */
1119 
1120 	if ('\0' != *path)
1121 		pg_show(&req, path);
1122 	else if (NULL != req.q.query)
1123 		pg_search(&req);
1124 	else
1125 		pg_index(&req);
1126 
1127 	free(req.q.manpath);
1128 	free(req.q.arch);
1129 	free(req.q.sec);
1130 	free(req.q.query);
1131 	for (i = 0; i < (int)req.psz; i++)
1132 		free(req.p[i]);
1133 	free(req.p);
1134 	return EXIT_SUCCESS;
1135 }
1136 
1137 /*
1138  * Translate PATH_INFO to a query.
1139  */
1140 static void
1141 parse_path_info(struct req *req, const char *path)
1142 {
1143 	const char	*name, *sec, *end;
1144 
1145 	req->isquery = 0;
1146 	req->q.equal = 1;
1147 	req->q.manpath = NULL;
1148 	req->q.arch = NULL;
1149 
1150 	/* Mandatory manual page name. */
1151 	if ((name = strrchr(path, '/')) == NULL)
1152 		name = path;
1153 	else
1154 		name++;
1155 
1156 	/* Optional trailing section. */
1157 	sec = strrchr(name, '.');
1158 	if (sec != NULL && isdigit((unsigned char)*++sec)) {
1159 		req->q.query = mandoc_strndup(name, sec - name - 1);
1160 		req->q.sec = mandoc_strdup(sec);
1161 	} else {
1162 		req->q.query = mandoc_strdup(name);
1163 		req->q.sec = NULL;
1164 	}
1165 
1166 	/* Handle the case of name[.section] only. */
1167 	if (name == path)
1168 		return;
1169 
1170 	/* Optional manpath. */
1171 	end = strchr(path, '/');
1172 	req->q.manpath = mandoc_strndup(path, end - path);
1173 	if (validate_manpath(req, req->q.manpath)) {
1174 		path = end + 1;
1175 		if (name == path)
1176 			return;
1177 	} else {
1178 		free(req->q.manpath);
1179 		req->q.manpath = NULL;
1180 	}
1181 
1182 	/* Optional section. */
1183 	if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1184 		path += 3;
1185 		end = strchr(path, '/');
1186 		free(req->q.sec);
1187 		req->q.sec = mandoc_strndup(path, end - path);
1188 		path = end + 1;
1189 		if (name == path)
1190 			return;
1191 	}
1192 
1193 	/* Optional architecture. */
1194 	end = strchr(path, '/');
1195 	if (end + 1 != name) {
1196 		pg_error_badrequest(
1197 		    "You specified too many directory components.");
1198 		exit(EXIT_FAILURE);
1199 	}
1200 	req->q.arch = mandoc_strndup(path, end - path);
1201 	if (validate_arch(req->q.arch) == 0) {
1202 		pg_error_badrequest(
1203 		    "You specified an invalid directory component.");
1204 		exit(EXIT_FAILURE);
1205 	}
1206 }
1207 
1208 /*
1209  * Scan for indexable paths.
1210  */
1211 static void
1212 parse_manpath_conf(struct req *req)
1213 {
1214 	FILE	*fp;
1215 	char	*dp;
1216 	size_t	 dpsz;
1217 	ssize_t	 len;
1218 
1219 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1220 		warn("%s/manpath.conf", MAN_DIR);
1221 		pg_error_internal();
1222 		exit(EXIT_FAILURE);
1223 	}
1224 
1225 	dp = NULL;
1226 	dpsz = 0;
1227 
1228 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1229 		if (dp[len - 1] == '\n')
1230 			dp[--len] = '\0';
1231 		req->p = mandoc_realloc(req->p,
1232 		    (req->psz + 1) * sizeof(char *));
1233 		if ( ! validate_urifrag(dp)) {
1234 			warnx("%s/manpath.conf contains "
1235 			    "unsafe path \"%s\"", MAN_DIR, dp);
1236 			pg_error_internal();
1237 			exit(EXIT_FAILURE);
1238 		}
1239 		if (strchr(dp, '/') != NULL) {
1240 			warnx("%s/manpath.conf contains "
1241 			    "path with slash \"%s\"", MAN_DIR, dp);
1242 			pg_error_internal();
1243 			exit(EXIT_FAILURE);
1244 		}
1245 		req->p[req->psz++] = dp;
1246 		dp = NULL;
1247 		dpsz = 0;
1248 	}
1249 	free(dp);
1250 
1251 	if (req->p == NULL) {
1252 		warnx("%s/manpath.conf is empty", MAN_DIR);
1253 		pg_error_internal();
1254 		exit(EXIT_FAILURE);
1255 	}
1256 }
1257