xref: /openbsd-src/usr.bin/mandoc/cgi.c (revision 5b859c19fe53bbea08f5c342e0a4470e99f883e1)
1 /*	$OpenBSD: cgi.c,v 1.41 2014/11/26 00:57:32 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/time.h>
20 
21 #include <ctype.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <limits.h>
25 #include <stdint.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 
31 #include "mandoc.h"
32 #include "mandoc_aux.h"
33 #include "main.h"
34 #include "manpath.h"
35 #include "mansearch.h"
36 #include "cgi.h"
37 
38 /*
39  * A query as passed to the search function.
40  */
41 struct	query {
42 	char		*manpath; /* desired manual directory */
43 	char		*arch; /* architecture */
44 	char		*sec; /* manual section */
45 	char		*query; /* unparsed query expression */
46 	int		 equal; /* match whole names, not substrings */
47 };
48 
49 struct	req {
50 	struct query	  q;
51 	char		**p; /* array of available manpaths */
52 	size_t		  psz; /* number of available manpaths */
53 };
54 
55 static	void		 catman(const struct req *, const char *);
56 static	void		 format(const struct req *, const char *);
57 static	void		 html_print(const char *);
58 static	void		 html_putchar(char);
59 static	int 		 http_decode(char *);
60 static	void		 http_parse(struct req *, const char *);
61 static	void		 http_print(const char *);
62 static	void 		 http_putchar(char);
63 static	void		 http_printquery(const struct req *, const char *);
64 static	void		 pathgen(struct req *);
65 static	void		 pg_error_badrequest(const char *);
66 static	void		 pg_error_internal(void);
67 static	void		 pg_index(const struct req *);
68 static	void		 pg_noresult(const struct req *, const char *);
69 static	void		 pg_search(const struct req *);
70 static	void		 pg_searchres(const struct req *,
71 				struct manpage *, size_t);
72 static	void		 pg_show(struct req *, const char *);
73 static	void		 resp_begin_html(int, const char *);
74 static	void		 resp_begin_http(int, const char *);
75 static	void		 resp_end_html(void);
76 static	void		 resp_searchform(const struct req *);
77 static	void		 resp_show(const struct req *, const char *);
78 static	void		 set_query_attr(char **, char **);
79 static	int		 validate_filename(const char *);
80 static	int		 validate_manpath(const struct req *, const char *);
81 static	int		 validate_urifrag(const char *);
82 
83 static	const char	 *scriptname; /* CGI script name */
84 
85 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
86 static	const char *const sec_numbers[] = {
87     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
88 };
89 static	const char *const sec_names[] = {
90     "All Sections",
91     "1 - General Commands",
92     "2 - System Calls",
93     "3 - Library Functions",
94     "3p - Perl Library",
95     "4 - Device Drivers",
96     "5 - File Formats",
97     "6 - Games",
98     "7 - Miscellaneous Information",
99     "8 - System Manager\'s Manual",
100     "9 - Kernel Developer\'s Manual"
101 };
102 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
103 
104 static	const char *const arch_names[] = {
105     "amd64",       "alpha",       "armish",      "armv7",
106     "aviion",      "hppa",        "hppa64",      "i386",
107     "ia64",        "landisk",     "loongson",    "luna88k",
108     "macppc",      "mips64",      "octeon",      "sgi",
109     "socppc",      "solbourne",   "sparc",       "sparc64",
110     "vax",         "zaurus",
111     "amiga",       "arc",         "arm32",       "atari",
112     "beagle",      "cats",        "hp300",       "mac68k",
113     "mvme68k",     "mvme88k",     "mvmeppc",     "palm",
114     "pc532",       "pegasos",     "pmax",        "powerpc",
115     "sun3",        "wgrisc",      "x68k"
116 };
117 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
118 
119 /*
120  * Print a character, escaping HTML along the way.
121  * This will pass non-ASCII straight to output: be warned!
122  */
123 static void
124 html_putchar(char c)
125 {
126 
127 	switch (c) {
128 	case ('"'):
129 		printf("&quote;");
130 		break;
131 	case ('&'):
132 		printf("&amp;");
133 		break;
134 	case ('>'):
135 		printf("&gt;");
136 		break;
137 	case ('<'):
138 		printf("&lt;");
139 		break;
140 	default:
141 		putchar((unsigned char)c);
142 		break;
143 	}
144 }
145 
146 static void
147 http_printquery(const struct req *req, const char *sep)
148 {
149 
150 	if (NULL != req->q.query) {
151 		printf("query=");
152 		http_print(req->q.query);
153 	}
154 	if (0 == req->q.equal)
155 		printf("%sapropos=1", sep);
156 	if (NULL != req->q.sec) {
157 		printf("%ssec=", sep);
158 		http_print(req->q.sec);
159 	}
160 	if (NULL != req->q.arch) {
161 		printf("%sarch=", sep);
162 		http_print(req->q.arch);
163 	}
164 	if (strcmp(req->q.manpath, req->p[0])) {
165 		printf("%smanpath=", sep);
166 		http_print(req->q.manpath);
167 	}
168 }
169 
170 static void
171 http_print(const char *p)
172 {
173 
174 	if (NULL == p)
175 		return;
176 	while ('\0' != *p)
177 		http_putchar(*p++);
178 }
179 
180 /*
181  * Call through to html_putchar().
182  * Accepts NULL strings.
183  */
184 static void
185 html_print(const char *p)
186 {
187 
188 	if (NULL == p)
189 		return;
190 	while ('\0' != *p)
191 		html_putchar(*p++);
192 }
193 
194 /*
195  * Transfer the responsibility for the allocated string *val
196  * to the query structure.
197  */
198 static void
199 set_query_attr(char **attr, char **val)
200 {
201 
202 	free(*attr);
203 	if (**val == '\0') {
204 		*attr = NULL;
205 		free(*val);
206 	} else
207 		*attr = *val;
208 	*val = NULL;
209 }
210 
211 /*
212  * Parse the QUERY_STRING for key-value pairs
213  * and store the values into the query structure.
214  */
215 static void
216 http_parse(struct req *req, const char *qs)
217 {
218 	char		*key, *val;
219 	size_t		 keysz, valsz;
220 
221 	req->q.manpath	= NULL;
222 	req->q.arch	= NULL;
223 	req->q.sec	= NULL;
224 	req->q.query	= NULL;
225 	req->q.equal	= 1;
226 
227 	key = val = NULL;
228 	while (*qs != '\0') {
229 
230 		/* Parse one key. */
231 
232 		keysz = strcspn(qs, "=;&");
233 		key = mandoc_strndup(qs, keysz);
234 		qs += keysz;
235 		if (*qs != '=')
236 			goto next;
237 
238 		/* Parse one value. */
239 
240 		valsz = strcspn(++qs, ";&");
241 		val = mandoc_strndup(qs, valsz);
242 		qs += valsz;
243 
244 		/* Decode and catch encoding errors. */
245 
246 		if ( ! (http_decode(key) && http_decode(val)))
247 			goto next;
248 
249 		/* Handle key-value pairs. */
250 
251 		if ( ! strcmp(key, "query"))
252 			set_query_attr(&req->q.query, &val);
253 
254 		else if ( ! strcmp(key, "apropos"))
255 			req->q.equal = !strcmp(val, "0");
256 
257 		else if ( ! strcmp(key, "manpath")) {
258 #ifdef COMPAT_OLDURI
259 			if ( ! strncmp(val, "OpenBSD ", 8)) {
260 				val[7] = '-';
261 				if ('C' == val[8])
262 					val[8] = 'c';
263 			}
264 #endif
265 			set_query_attr(&req->q.manpath, &val);
266 		}
267 
268 		else if ( ! (strcmp(key, "sec")
269 #ifdef COMPAT_OLDURI
270 		    && strcmp(key, "sektion")
271 #endif
272 		    )) {
273 			if ( ! strcmp(val, "0"))
274 				*val = '\0';
275 			set_query_attr(&req->q.sec, &val);
276 		}
277 
278 		else if ( ! strcmp(key, "arch")) {
279 			if ( ! strcmp(val, "default"))
280 				*val = '\0';
281 			set_query_attr(&req->q.arch, &val);
282 		}
283 
284 		/*
285 		 * The key must be freed in any case.
286 		 * The val may have been handed over to the query
287 		 * structure, in which case it is now NULL.
288 		 */
289 next:
290 		free(key);
291 		key = NULL;
292 		free(val);
293 		val = NULL;
294 
295 		if (*qs != '\0')
296 			qs++;
297 	}
298 }
299 
300 static void
301 http_putchar(char c)
302 {
303 
304 	if (isalnum((unsigned char)c)) {
305 		putchar((unsigned char)c);
306 		return;
307 	} else if (' ' == c) {
308 		putchar('+');
309 		return;
310 	}
311 	printf("%%%.2x", c);
312 }
313 
314 /*
315  * HTTP-decode a string.  The standard explanation is that this turns
316  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
317  * over the allocated string.
318  */
319 static int
320 http_decode(char *p)
321 {
322 	char             hex[3];
323 	char		*q;
324 	int              c;
325 
326 	hex[2] = '\0';
327 
328 	q = p;
329 	for ( ; '\0' != *p; p++, q++) {
330 		if ('%' == *p) {
331 			if ('\0' == (hex[0] = *(p + 1)))
332 				return(0);
333 			if ('\0' == (hex[1] = *(p + 2)))
334 				return(0);
335 			if (1 != sscanf(hex, "%x", &c))
336 				return(0);
337 			if ('\0' == c)
338 				return(0);
339 
340 			*q = (char)c;
341 			p += 2;
342 		} else
343 			*q = '+' == *p ? ' ' : *p;
344 	}
345 
346 	*q = '\0';
347 	return(1);
348 }
349 
350 static void
351 resp_begin_http(int code, const char *msg)
352 {
353 
354 	if (200 != code)
355 		printf("Status: %d %s\r\n", code, msg);
356 
357 	printf("Content-Type: text/html; charset=utf-8\r\n"
358 	     "Cache-Control: no-cache\r\n"
359 	     "Pragma: no-cache\r\n"
360 	     "\r\n");
361 
362 	fflush(stdout);
363 }
364 
365 static void
366 resp_begin_html(int code, const char *msg)
367 {
368 
369 	resp_begin_http(code, msg);
370 
371 	printf("<!DOCTYPE html>\n"
372 	       "<HTML>\n"
373 	       "<HEAD>\n"
374 	       "<META CHARSET=\"UTF-8\" />\n"
375 	       "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\""
376 	       " TYPE=\"text/css\" media=\"all\">\n"
377 	       "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\""
378 	       " TYPE=\"text/css\" media=\"all\">\n"
379 	       "<TITLE>%s</TITLE>\n"
380 	       "</HEAD>\n"
381 	       "<BODY>\n"
382 	       "<!-- Begin page content. //-->\n",
383 	       CSS_DIR, CSS_DIR, CUSTOMIZE_TITLE);
384 }
385 
386 static void
387 resp_end_html(void)
388 {
389 
390 	puts("</BODY>\n"
391 	     "</HTML>");
392 }
393 
394 static void
395 resp_searchform(const struct req *req)
396 {
397 	int		 i;
398 
399 	puts(CUSTOMIZE_BEGIN);
400 	puts("<!-- Begin search form. //-->");
401 	printf("<DIV ID=\"mancgi\">\n"
402 	       "<FORM ACTION=\"%s\" METHOD=\"get\">\n"
403 	       "<FIELDSET>\n"
404 	       "<LEGEND>Manual Page Search Parameters</LEGEND>\n",
405 	       scriptname);
406 
407 	/* Write query input box. */
408 
409 	printf(	"<TABLE><TR><TD>\n"
410 		"<INPUT TYPE=\"text\" NAME=\"query\" VALUE=\"");
411 	if (NULL != req->q.query)
412 		html_print(req->q.query);
413 	puts("\" SIZE=\"40\">");
414 
415 	/* Write submission and reset buttons. */
416 
417 	printf(	"<INPUT TYPE=\"submit\" VALUE=\"Submit\">\n"
418 		"<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n");
419 
420 	/* Write show radio button */
421 
422 	printf(	"</TD><TD>\n"
423 		"<INPUT TYPE=\"radio\" ");
424 	if (req->q.equal)
425 		printf("CHECKED=\"checked\" ");
426 	printf(	"NAME=\"apropos\" ID=\"show\" VALUE=\"0\">\n"
427 		"<LABEL FOR=\"show\">Show named manual page</LABEL>\n");
428 
429 	/* Write section selector. */
430 
431 	puts(	"</TD></TR><TR><TD>\n"
432 		"<SELECT NAME=\"sec\">");
433 	for (i = 0; i < sec_MAX; i++) {
434 		printf("<OPTION VALUE=\"%s\"", sec_numbers[i]);
435 		if (NULL != req->q.sec &&
436 		    0 == strcmp(sec_numbers[i], req->q.sec))
437 			printf(" SELECTED=\"selected\"");
438 		printf(">%s</OPTION>\n", sec_names[i]);
439 	}
440 	puts("</SELECT>");
441 
442 	/* Write architecture selector. */
443 
444 	printf(	"<SELECT NAME=\"arch\">\n"
445 		"<OPTION VALUE=\"default\"");
446 	if (NULL == req->q.arch)
447 		printf(" SELECTED=\"selected\"");
448 	puts(">All Architectures</OPTION>");
449 	for (i = 0; i < arch_MAX; i++) {
450 		printf("<OPTION VALUE=\"%s\"", arch_names[i]);
451 		if (NULL != req->q.arch &&
452 		    0 == strcmp(arch_names[i], req->q.arch))
453 			printf(" SELECTED=\"selected\"");
454 		printf(">%s</OPTION>\n", arch_names[i]);
455 	}
456 	puts("</SELECT>");
457 
458 	/* Write manpath selector. */
459 
460 	if (req->psz > 1) {
461 		puts("<SELECT NAME=\"manpath\">");
462 		for (i = 0; i < (int)req->psz; i++) {
463 			printf("<OPTION ");
464 			if (strcmp(req->q.manpath, req->p[i]) == 0)
465 				printf("SELECTED=\"selected\" ");
466 			printf("VALUE=\"");
467 			html_print(req->p[i]);
468 			printf("\">");
469 			html_print(req->p[i]);
470 			puts("</OPTION>");
471 		}
472 		puts("</SELECT>");
473 	}
474 
475 	/* Write search radio button */
476 
477 	printf(	"</TD><TD>\n"
478 		"<INPUT TYPE=\"radio\" ");
479 	if (0 == req->q.equal)
480 		printf("CHECKED=\"checked\" ");
481 	printf(	"NAME=\"apropos\" ID=\"search\" VALUE=\"1\">\n"
482 		"<LABEL FOR=\"search\">Search with apropos query</LABEL>\n");
483 
484 	puts("</TD></TR></TABLE>\n"
485 	     "</FIELDSET>\n"
486 	     "</FORM>\n"
487 	     "</DIV>");
488 	puts("<!-- End search form. //-->");
489 }
490 
491 static int
492 validate_urifrag(const char *frag)
493 {
494 
495 	while ('\0' != *frag) {
496 		if ( ! (isalnum((unsigned char)*frag) ||
497 		    '-' == *frag || '.' == *frag ||
498 		    '/' == *frag || '_' == *frag))
499 			return(0);
500 		frag++;
501 	}
502 	return(1);
503 }
504 
505 static int
506 validate_manpath(const struct req *req, const char* manpath)
507 {
508 	size_t	 i;
509 
510 	if ( ! strcmp(manpath, "mandoc"))
511 		return(1);
512 
513 	for (i = 0; i < req->psz; i++)
514 		if ( ! strcmp(manpath, req->p[i]))
515 			return(1);
516 
517 	return(0);
518 }
519 
520 static int
521 validate_filename(const char *file)
522 {
523 
524 	if ('.' == file[0] && '/' == file[1])
525 		file += 2;
526 
527 	return ( ! (strstr(file, "../") || strstr(file, "/..") ||
528 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3))));
529 }
530 
531 static void
532 pg_index(const struct req *req)
533 {
534 
535 	resp_begin_html(200, NULL);
536 	resp_searchform(req);
537 	printf("<P>\n"
538 	       "This web interface is documented in the\n"
539 	       "<A HREF=\"%s/mandoc/man8/man.cgi.8\">man.cgi</A>\n"
540 	       "manual, and the\n"
541 	       "<A HREF=\"%s/mandoc/man1/apropos.1\">apropos</A>\n"
542 	       "manual explains the query syntax.\n"
543 	       "</P>\n",
544 	       scriptname, scriptname);
545 	resp_end_html();
546 }
547 
548 static void
549 pg_noresult(const struct req *req, const char *msg)
550 {
551 	resp_begin_html(200, NULL);
552 	resp_searchform(req);
553 	puts("<P>");
554 	puts(msg);
555 	puts("</P>");
556 	resp_end_html();
557 }
558 
559 static void
560 pg_error_badrequest(const char *msg)
561 {
562 
563 	resp_begin_html(400, "Bad Request");
564 	puts("<H1>Bad Request</H1>\n"
565 	     "<P>\n");
566 	puts(msg);
567 	printf("Try again from the\n"
568 	       "<A HREF=\"%s\">main page</A>.\n"
569 	       "</P>", scriptname);
570 	resp_end_html();
571 }
572 
573 static void
574 pg_error_internal(void)
575 {
576 	resp_begin_html(500, "Internal Server Error");
577 	puts("<P>Internal Server Error</P>");
578 	resp_end_html();
579 }
580 
581 static void
582 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
583 {
584 	char		*arch, *archend;
585 	size_t		 i, iuse, isec;
586 	int		 archprio, archpriouse;
587 	int		 prio, priouse;
588 	char		 sec;
589 
590 	for (i = 0; i < sz; i++) {
591 		if (validate_filename(r[i].file))
592 			continue;
593 		fprintf(stderr, "invalid filename %s in %s database\n",
594 		    r[i].file, req->q.manpath);
595 		pg_error_internal();
596 		return;
597 	}
598 
599 	if (1 == sz) {
600 		/*
601 		 * If we have just one result, then jump there now
602 		 * without any delay.
603 		 */
604 		printf("Status: 303 See Other\r\n");
605 		printf("Location: http://%s%s/%s/%s?",
606 		    HTTP_HOST, scriptname, req->q.manpath, r[0].file);
607 		http_printquery(req, "&");
608 		printf("\r\n"
609 		     "Content-Type: text/html; charset=utf-8\r\n"
610 		     "\r\n");
611 		return;
612 	}
613 
614 	resp_begin_html(200, NULL);
615 	resp_searchform(req);
616 	puts("<DIV CLASS=\"results\">");
617 	puts("<TABLE>");
618 
619 	for (i = 0; i < sz; i++) {
620 		printf("<TR>\n"
621 		       "<TD CLASS=\"title\">\n"
622 		       "<A HREF=\"%s/%s/%s?",
623 		    scriptname, req->q.manpath, r[i].file);
624 		http_printquery(req, "&amp;");
625 		printf("\">");
626 		html_print(r[i].names);
627 		printf("</A>\n"
628 		       "</TD>\n"
629 		       "<TD CLASS=\"desc\">");
630 		html_print(r[i].output);
631 		puts("</TD>\n"
632 		     "</TR>");
633 	}
634 
635 	puts("</TABLE>\n"
636 	     "</DIV>");
637 
638 	/*
639 	 * In man(1) mode, show one of the pages
640 	 * even if more than one is found.
641 	 */
642 
643 	if (req->q.equal) {
644 		puts("<HR>");
645 		iuse = 0;
646 		priouse = 10;
647 		archpriouse = 3;
648 		for (i = 0; i < sz; i++) {
649 			isec = strcspn(r[i].file, "123456789");
650 			sec = r[i].file[isec];
651 			if ('\0' == sec)
652 				continue;
653 			prio = sec_prios[sec - '1'];
654 			if (NULL == req->q.arch) {
655 				archprio =
656 				    (NULL == (arch = strchr(
657 					r[i].file + isec, '/'))) ? 3 :
658 				    (NULL == (archend = strchr(
659 					arch + 1, '/'))) ? 0 :
660 				    strncmp(arch, "amd64/",
661 					archend - arch) ? 2 : 1;
662 				if (archprio < archpriouse) {
663 					archpriouse = archprio;
664 					priouse = prio;
665 					iuse = i;
666 					continue;
667 				}
668 				if (archprio > archpriouse)
669 					continue;
670 			}
671 			if (prio >= priouse)
672 				continue;
673 			priouse = prio;
674 			iuse = i;
675 		}
676 		resp_show(req, r[iuse].file);
677 	}
678 
679 	resp_end_html();
680 }
681 
682 static void
683 catman(const struct req *req, const char *file)
684 {
685 	FILE		*f;
686 	size_t		 len;
687 	int		 i;
688 	char		*p;
689 	int		 italic, bold;
690 
691 	if (NULL == (f = fopen(file, "r"))) {
692 		puts("<P>You specified an invalid manual file.</P>");
693 		return;
694 	}
695 
696 	puts("<DIV CLASS=\"catman\">\n"
697 	     "<PRE>");
698 
699 	while (NULL != (p = fgetln(f, &len))) {
700 		bold = italic = 0;
701 		for (i = 0; i < (int)len - 1; i++) {
702 			/*
703 			 * This means that the catpage is out of state.
704 			 * Ignore it and keep going (although the
705 			 * catpage is bogus).
706 			 */
707 
708 			if ('\b' == p[i] || '\n' == p[i])
709 				continue;
710 
711 			/*
712 			 * Print a regular character.
713 			 * Close out any bold/italic scopes.
714 			 * If we're in back-space mode, make sure we'll
715 			 * have something to enter when we backspace.
716 			 */
717 
718 			if ('\b' != p[i + 1]) {
719 				if (italic)
720 					printf("</I>");
721 				if (bold)
722 					printf("</B>");
723 				italic = bold = 0;
724 				html_putchar(p[i]);
725 				continue;
726 			} else if (i + 2 >= (int)len)
727 				continue;
728 
729 			/* Italic mode. */
730 
731 			if ('_' == p[i]) {
732 				if (bold)
733 					printf("</B>");
734 				if ( ! italic)
735 					printf("<I>");
736 				bold = 0;
737 				italic = 1;
738 				i += 2;
739 				html_putchar(p[i]);
740 				continue;
741 			}
742 
743 			/*
744 			 * Handle funny behaviour troff-isms.
745 			 * These grok'd from the original man2html.c.
746 			 */
747 
748 			if (('+' == p[i] && 'o' == p[i + 2]) ||
749 					('o' == p[i] && '+' == p[i + 2]) ||
750 					('|' == p[i] && '=' == p[i + 2]) ||
751 					('=' == p[i] && '|' == p[i + 2]) ||
752 					('*' == p[i] && '=' == p[i + 2]) ||
753 					('=' == p[i] && '*' == p[i + 2]) ||
754 					('*' == p[i] && '|' == p[i + 2]) ||
755 					('|' == p[i] && '*' == p[i + 2]))  {
756 				if (italic)
757 					printf("</I>");
758 				if (bold)
759 					printf("</B>");
760 				italic = bold = 0;
761 				putchar('*');
762 				i += 2;
763 				continue;
764 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
765 					('-' == p[i] && '|' == p[i + 1]) ||
766 					('+' == p[i] && '-' == p[i + 1]) ||
767 					('-' == p[i] && '+' == p[i + 1]) ||
768 					('+' == p[i] && '|' == p[i + 1]) ||
769 					('|' == p[i] && '+' == p[i + 1]))  {
770 				if (italic)
771 					printf("</I>");
772 				if (bold)
773 					printf("</B>");
774 				italic = bold = 0;
775 				putchar('+');
776 				i += 2;
777 				continue;
778 			}
779 
780 			/* Bold mode. */
781 
782 			if (italic)
783 				printf("</I>");
784 			if ( ! bold)
785 				printf("<B>");
786 			bold = 1;
787 			italic = 0;
788 			i += 2;
789 			html_putchar(p[i]);
790 		}
791 
792 		/*
793 		 * Clean up the last character.
794 		 * We can get to a newline; don't print that.
795 		 */
796 
797 		if (italic)
798 			printf("</I>");
799 		if (bold)
800 			printf("</B>");
801 
802 		if (i == (int)len - 1 && '\n' != p[i])
803 			html_putchar(p[i]);
804 
805 		putchar('\n');
806 	}
807 
808 	puts("</PRE>\n"
809 	     "</DIV>");
810 
811 	fclose(f);
812 }
813 
814 static void
815 format(const struct req *req, const char *file)
816 {
817 	struct mparse	*mp;
818 	struct mchars	*mchars;
819 	struct mdoc	*mdoc;
820 	struct man	*man;
821 	void		*vp;
822 	char		*opts;
823 	enum mandoclevel rc;
824 	int		 fd;
825 	int		 usepath;
826 
827 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
828 		puts("<P>You specified an invalid manual file.</P>");
829 		return;
830 	}
831 
832 	mchars = mchars_alloc();
833 	mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_FATAL, NULL,
834 	    mchars, req->q.manpath);
835 	rc = mparse_readfd(mp, fd, file);
836 	close(fd);
837 
838 	if (rc >= MANDOCLEVEL_FATAL) {
839 		fprintf(stderr, "fatal mandoc error: %s/%s\n",
840 		    req->q.manpath, file);
841 		pg_error_internal();
842 		return;
843 	}
844 
845 	usepath = strcmp(req->q.manpath, req->p[0]);
846 	mandoc_asprintf(&opts,
847 	    "fragment,man=%s?query=%%N&sec=%%S%s%s%s%s",
848 	    scriptname,
849 	    req->q.arch	? "&arch="       : "",
850 	    req->q.arch	? req->q.arch    : "",
851 	    usepath	? "&manpath="    : "",
852 	    usepath	? req->q.manpath : "");
853 
854 	mparse_result(mp, &mdoc, &man, NULL);
855 	if (NULL == man && NULL == mdoc) {
856 		fprintf(stderr, "fatal mandoc error: %s/%s\n",
857 		    req->q.manpath, file);
858 		pg_error_internal();
859 		mparse_free(mp);
860 		mchars_free(mchars);
861 		return;
862 	}
863 
864 	vp = html_alloc(mchars, opts);
865 
866 	if (NULL != mdoc)
867 		html_mdoc(vp, mdoc);
868 	else
869 		html_man(vp, man);
870 
871 	html_free(vp);
872 	mparse_free(mp);
873 	mchars_free(mchars);
874 	free(opts);
875 }
876 
877 static void
878 resp_show(const struct req *req, const char *file)
879 {
880 
881 	if ('.' == file[0] && '/' == file[1])
882 		file += 2;
883 
884 	if ('c' == *file)
885 		catman(req, file);
886 	else
887 		format(req, file);
888 }
889 
890 static void
891 pg_show(struct req *req, const char *fullpath)
892 {
893 	char		*manpath;
894 	const char	*file;
895 
896 	if ((file = strchr(fullpath, '/')) == NULL) {
897 		pg_error_badrequest(
898 		    "You did not specify a page to show.");
899 		return;
900 	}
901 	manpath = mandoc_strndup(fullpath, file - fullpath);
902 	file++;
903 
904 	if ( ! validate_manpath(req, manpath)) {
905 		pg_error_badrequest(
906 		    "You specified an invalid manpath.");
907 		free(manpath);
908 		return;
909 	}
910 
911 	/*
912 	 * Begin by chdir()ing into the manpath.
913 	 * This way we can pick up the database files, which are
914 	 * relative to the manpath root.
915 	 */
916 
917 	if (chdir(manpath) == -1) {
918 		fprintf(stderr, "chdir %s: %s\n",
919 		    manpath, strerror(errno));
920 		pg_error_internal();
921 		free(manpath);
922 		return;
923 	}
924 
925 	if (strcmp(manpath, "mandoc")) {
926 		free(req->q.manpath);
927 		req->q.manpath = manpath;
928 	} else
929 		free(manpath);
930 
931 	if ( ! validate_filename(file)) {
932 		pg_error_badrequest(
933 		    "You specified an invalid manual file.");
934 		return;
935 	}
936 
937 	resp_begin_html(200, NULL);
938 	resp_searchform(req);
939 	resp_show(req, file);
940 	resp_end_html();
941 }
942 
943 static void
944 pg_search(const struct req *req)
945 {
946 	struct mansearch	  search;
947 	struct manpaths		  paths;
948 	struct manpage		 *res;
949 	char			**argv;
950 	char			 *query, *rp, *wp;
951 	size_t			  ressz;
952 	int			  argc;
953 
954 	/*
955 	 * Begin by chdir()ing into the root of the manpath.
956 	 * This way we can pick up the database files, which are
957 	 * relative to the manpath root.
958 	 */
959 
960 	if (-1 == (chdir(req->q.manpath))) {
961 		fprintf(stderr, "chdir %s: %s\n",
962 		    req->q.manpath, strerror(errno));
963 		pg_error_internal();
964 		return;
965 	}
966 
967 	search.arch = req->q.arch;
968 	search.sec = req->q.sec;
969 	search.outkey = "Nd";
970 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
971 	search.firstmatch = 1;
972 
973 	paths.sz = 1;
974 	paths.paths = mandoc_malloc(sizeof(char *));
975 	paths.paths[0] = mandoc_strdup(".");
976 
977 	/*
978 	 * Break apart at spaces with backslash-escaping.
979 	 */
980 
981 	argc = 0;
982 	argv = NULL;
983 	rp = query = mandoc_strdup(req->q.query);
984 	for (;;) {
985 		while (isspace((unsigned char)*rp))
986 			rp++;
987 		if (*rp == '\0')
988 			break;
989 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
990 		argv[argc++] = wp = rp;
991 		for (;;) {
992 			if (isspace((unsigned char)*rp)) {
993 				*wp = '\0';
994 				rp++;
995 				break;
996 			}
997 			if (rp[0] == '\\' && rp[1] != '\0')
998 				rp++;
999 			if (wp != rp)
1000 				*wp = *rp;
1001 			if (*rp == '\0')
1002 				break;
1003 			wp++;
1004 			rp++;
1005 		}
1006 	}
1007 
1008 	if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
1009 		pg_noresult(req, "You entered an invalid query.");
1010 	else if (0 == ressz)
1011 		pg_noresult(req, "No results found.");
1012 	else
1013 		pg_searchres(req, res, ressz);
1014 
1015 	free(query);
1016 	mansearch_free(res, ressz);
1017 	free(paths.paths[0]);
1018 	free(paths.paths);
1019 }
1020 
1021 int
1022 main(void)
1023 {
1024 	struct req	 req;
1025 	struct itimerval itimer;
1026 	const char	*path;
1027 	const char	*querystring;
1028 	int		 i;
1029 
1030 	/* Poor man's ReDoS mitigation. */
1031 
1032 	itimer.it_value.tv_sec = 2;
1033 	itimer.it_value.tv_usec = 0;
1034 	itimer.it_interval.tv_sec = 2;
1035 	itimer.it_interval.tv_usec = 0;
1036 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1037 		fprintf(stderr, "setitimer: %s\n", strerror(errno));
1038 		pg_error_internal();
1039 		return(EXIT_FAILURE);
1040 	}
1041 
1042 	/* Scan our run-time environment. */
1043 
1044 	if (NULL == (scriptname = getenv("SCRIPT_NAME")))
1045 		scriptname = "";
1046 
1047 	if ( ! validate_urifrag(scriptname)) {
1048 		fprintf(stderr, "unsafe SCRIPT_NAME \"%s\"\n",
1049 		    scriptname);
1050 		pg_error_internal();
1051 		return(EXIT_FAILURE);
1052 	}
1053 
1054 	/*
1055 	 * First we change directory into the MAN_DIR so that
1056 	 * subsequent scanning for manpath directories is rooted
1057 	 * relative to the same position.
1058 	 */
1059 
1060 	if (-1 == chdir(MAN_DIR)) {
1061 		fprintf(stderr, "MAN_DIR: %s: %s\n",
1062 		    MAN_DIR, strerror(errno));
1063 		pg_error_internal();
1064 		return(EXIT_FAILURE);
1065 	}
1066 
1067 	memset(&req, 0, sizeof(struct req));
1068 	pathgen(&req);
1069 
1070 	/* Next parse out the query string. */
1071 
1072 	if (NULL != (querystring = getenv("QUERY_STRING")))
1073 		http_parse(&req, querystring);
1074 
1075 	if (req.q.manpath == NULL)
1076 		req.q.manpath = mandoc_strdup(req.p[0]);
1077 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1078 		pg_error_badrequest(
1079 		    "You specified an invalid manpath.");
1080 		return(EXIT_FAILURE);
1081 	}
1082 
1083 	if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1084 		pg_error_badrequest(
1085 		    "You specified an invalid architecture.");
1086 		return(EXIT_FAILURE);
1087 	}
1088 
1089 	/* Dispatch to the three different pages. */
1090 
1091 	path = getenv("PATH_INFO");
1092 	if (NULL == path)
1093 		path = "";
1094 	else if ('/' == *path)
1095 		path++;
1096 
1097 	if ('\0' != *path)
1098 		pg_show(&req, path);
1099 	else if (NULL != req.q.query)
1100 		pg_search(&req);
1101 	else
1102 		pg_index(&req);
1103 
1104 	free(req.q.manpath);
1105 	free(req.q.arch);
1106 	free(req.q.sec);
1107 	free(req.q.query);
1108 	for (i = 0; i < (int)req.psz; i++)
1109 		free(req.p[i]);
1110 	free(req.p);
1111 	return(EXIT_SUCCESS);
1112 }
1113 
1114 /*
1115  * Scan for indexable paths.
1116  */
1117 static void
1118 pathgen(struct req *req)
1119 {
1120 	FILE	*fp;
1121 	char	*dp;
1122 	size_t	 dpsz;
1123 
1124 	if (NULL == (fp = fopen("manpath.conf", "r"))) {
1125 		fprintf(stderr, "%s/manpath.conf: %s\n",
1126 			MAN_DIR, strerror(errno));
1127 		pg_error_internal();
1128 		exit(EXIT_FAILURE);
1129 	}
1130 
1131 	while (NULL != (dp = fgetln(fp, &dpsz))) {
1132 		if ('\n' == dp[dpsz - 1])
1133 			dpsz--;
1134 		req->p = mandoc_realloc(req->p,
1135 		    (req->psz + 1) * sizeof(char *));
1136 		dp = mandoc_strndup(dp, dpsz);
1137 		if ( ! validate_urifrag(dp)) {
1138 			fprintf(stderr, "%s/manpath.conf contains "
1139 			    "unsafe path \"%s\"\n", MAN_DIR, dp);
1140 			pg_error_internal();
1141 			exit(EXIT_FAILURE);
1142 		}
1143 		if (NULL != strchr(dp, '/')) {
1144 			fprintf(stderr, "%s/manpath.conf contains "
1145 			    "path with slash \"%s\"\n", MAN_DIR, dp);
1146 			pg_error_internal();
1147 			exit(EXIT_FAILURE);
1148 		}
1149 		req->p[req->psz++] = dp;
1150 	}
1151 
1152 	if ( req->p == NULL ) {
1153 		fprintf(stderr, "%s/manpath.conf is empty\n", MAN_DIR);
1154 		pg_error_internal();
1155 		exit(EXIT_FAILURE);
1156 	}
1157 }
1158