xref: /openbsd-src/usr.bin/mandoc/cgi.c (revision e5157e49389faebcb42b7237d55fbf096d9c2523)
1 /*	$OpenBSD: cgi.c,v 1.40 2014/11/11 19:03:10 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/time.h>
21 
22 #include <ctype.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <limits.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "mandoc.h"
33 #include "mandoc_aux.h"
34 #include "main.h"
35 #include "manpath.h"
36 #include "mansearch.h"
37 #include "cgi.h"
38 
39 /*
40  * A query as passed to the search function.
41  */
42 struct	query {
43 	char		*manpath; /* desired manual directory */
44 	char		*arch; /* architecture */
45 	char		*sec; /* manual section */
46 	char		*query; /* unparsed query expression */
47 	int		 equal; /* match whole names, not substrings */
48 };
49 
50 struct	req {
51 	struct query	  q;
52 	char		**p; /* array of available manpaths */
53 	size_t		  psz; /* number of available manpaths */
54 };
55 
56 static	void		 catman(const struct req *, const char *);
57 static	void		 format(const struct req *, const char *);
58 static	void		 html_print(const char *);
59 static	void		 html_putchar(char);
60 static	int 		 http_decode(char *);
61 static	void		 http_parse(struct req *, const char *);
62 static	void		 http_print(const char *);
63 static	void 		 http_putchar(char);
64 static	void		 http_printquery(const struct req *, const char *);
65 static	void		 pathgen(struct req *);
66 static	void		 pg_error_badrequest(const char *);
67 static	void		 pg_error_internal(void);
68 static	void		 pg_index(const struct req *);
69 static	void		 pg_noresult(const struct req *, const char *);
70 static	void		 pg_search(const struct req *);
71 static	void		 pg_searchres(const struct req *,
72 				struct manpage *, size_t);
73 static	void		 pg_show(struct req *, const char *);
74 static	void		 resp_begin_html(int, const char *);
75 static	void		 resp_begin_http(int, const char *);
76 static	void		 resp_end_html(void);
77 static	void		 resp_searchform(const struct req *);
78 static	void		 resp_show(const struct req *, const char *);
79 static	void		 set_query_attr(char **, char **);
80 static	int		 validate_filename(const char *);
81 static	int		 validate_manpath(const struct req *, const char *);
82 static	int		 validate_urifrag(const char *);
83 
84 static	const char	 *scriptname; /* CGI script name */
85 
86 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
87 static	const char *const sec_numbers[] = {
88     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
89 };
90 static	const char *const sec_names[] = {
91     "All Sections",
92     "1 - General Commands",
93     "2 - System Calls",
94     "3 - Library Functions",
95     "3p - Perl Library",
96     "4 - Device Drivers",
97     "5 - File Formats",
98     "6 - Games",
99     "7 - Miscellaneous Information",
100     "8 - System Manager\'s Manual",
101     "9 - Kernel Developer\'s Manual"
102 };
103 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
104 
105 static	const char *const arch_names[] = {
106     "amd64",       "alpha",       "armish",      "armv7",
107     "aviion",      "hppa",        "hppa64",      "i386",
108     "ia64",        "landisk",     "loongson",    "luna88k",
109     "macppc",      "mips64",      "octeon",      "sgi",
110     "socppc",      "solbourne",   "sparc",       "sparc64",
111     "vax",         "zaurus",
112     "amiga",       "arc",         "arm32",       "atari",
113     "beagle",      "cats",        "hp300",       "mac68k",
114     "mvme68k",     "mvme88k",     "mvmeppc",     "palm",
115     "pc532",       "pegasos",     "pmax",        "powerpc",
116     "sun3",        "wgrisc",      "x68k"
117 };
118 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
119 
120 /*
121  * Print a character, escaping HTML along the way.
122  * This will pass non-ASCII straight to output: be warned!
123  */
124 static void
125 html_putchar(char c)
126 {
127 
128 	switch (c) {
129 	case ('"'):
130 		printf("&quote;");
131 		break;
132 	case ('&'):
133 		printf("&amp;");
134 		break;
135 	case ('>'):
136 		printf("&gt;");
137 		break;
138 	case ('<'):
139 		printf("&lt;");
140 		break;
141 	default:
142 		putchar((unsigned char)c);
143 		break;
144 	}
145 }
146 
147 static void
148 http_printquery(const struct req *req, const char *sep)
149 {
150 
151 	if (NULL != req->q.query) {
152 		printf("query=");
153 		http_print(req->q.query);
154 	}
155 	if (0 == req->q.equal)
156 		printf("%sapropos=1", sep);
157 	if (NULL != req->q.sec) {
158 		printf("%ssec=", sep);
159 		http_print(req->q.sec);
160 	}
161 	if (NULL != req->q.arch) {
162 		printf("%sarch=", sep);
163 		http_print(req->q.arch);
164 	}
165 	if (NULL != req->q.manpath &&
166 	    strcmp(req->q.manpath, req->p[0])) {
167 		printf("%smanpath=", sep);
168 		http_print(req->q.manpath);
169 	}
170 }
171 
172 static void
173 http_print(const char *p)
174 {
175 
176 	if (NULL == p)
177 		return;
178 	while ('\0' != *p)
179 		http_putchar(*p++);
180 }
181 
182 /*
183  * Call through to html_putchar().
184  * Accepts NULL strings.
185  */
186 static void
187 html_print(const char *p)
188 {
189 
190 	if (NULL == p)
191 		return;
192 	while ('\0' != *p)
193 		html_putchar(*p++);
194 }
195 
196 /*
197  * Transfer the responsibility for the allocated string *val
198  * to the query structure.
199  */
200 static void
201 set_query_attr(char **attr, char **val)
202 {
203 
204 	free(*attr);
205 	if (**val == '\0') {
206 		*attr = NULL;
207 		free(*val);
208 	} else
209 		*attr = *val;
210 	*val = NULL;
211 }
212 
213 /*
214  * Parse the QUERY_STRING for key-value pairs
215  * and store the values into the query structure.
216  */
217 static void
218 http_parse(struct req *req, const char *qs)
219 {
220 	char		*key, *val;
221 	size_t		 keysz, valsz;
222 
223 	req->q.manpath	= NULL;
224 	req->q.arch	= NULL;
225 	req->q.sec	= NULL;
226 	req->q.query	= NULL;
227 	req->q.equal	= 1;
228 
229 	key = val = NULL;
230 	while (*qs != '\0') {
231 
232 		/* Parse one key. */
233 
234 		keysz = strcspn(qs, "=;&");
235 		key = mandoc_strndup(qs, keysz);
236 		qs += keysz;
237 		if (*qs != '=')
238 			goto next;
239 
240 		/* Parse one value. */
241 
242 		valsz = strcspn(++qs, ";&");
243 		val = mandoc_strndup(qs, valsz);
244 		qs += valsz;
245 
246 		/* Decode and catch encoding errors. */
247 
248 		if ( ! (http_decode(key) && http_decode(val)))
249 			goto next;
250 
251 		/* Handle key-value pairs. */
252 
253 		if ( ! strcmp(key, "query"))
254 			set_query_attr(&req->q.query, &val);
255 
256 		else if ( ! strcmp(key, "apropos"))
257 			req->q.equal = !strcmp(val, "0");
258 
259 		else if ( ! strcmp(key, "manpath")) {
260 #ifdef COMPAT_OLDURI
261 			if ( ! strncmp(val, "OpenBSD ", 8)) {
262 				val[7] = '-';
263 				if ('C' == val[8])
264 					val[8] = 'c';
265 			}
266 #endif
267 			set_query_attr(&req->q.manpath, &val);
268 		}
269 
270 		else if ( ! (strcmp(key, "sec")
271 #ifdef COMPAT_OLDURI
272 		    && strcmp(key, "sektion")
273 #endif
274 		    )) {
275 			if ( ! strcmp(val, "0"))
276 				*val = '\0';
277 			set_query_attr(&req->q.sec, &val);
278 		}
279 
280 		else if ( ! strcmp(key, "arch")) {
281 			if ( ! strcmp(val, "default"))
282 				*val = '\0';
283 			set_query_attr(&req->q.arch, &val);
284 		}
285 
286 		/*
287 		 * The key must be freed in any case.
288 		 * The val may have been handed over to the query
289 		 * structure, in which case it is now NULL.
290 		 */
291 next:
292 		free(key);
293 		key = NULL;
294 		free(val);
295 		val = NULL;
296 
297 		if (*qs != '\0')
298 			qs++;
299 	}
300 
301 	/* Fall back to the default manpath. */
302 
303 	if (req->q.manpath == NULL)
304 		req->q.manpath = mandoc_strdup(req->p[0]);
305 }
306 
307 static void
308 http_putchar(char c)
309 {
310 
311 	if (isalnum((unsigned char)c)) {
312 		putchar((unsigned char)c);
313 		return;
314 	} else if (' ' == c) {
315 		putchar('+');
316 		return;
317 	}
318 	printf("%%%.2x", c);
319 }
320 
321 /*
322  * HTTP-decode a string.  The standard explanation is that this turns
323  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
324  * over the allocated string.
325  */
326 static int
327 http_decode(char *p)
328 {
329 	char             hex[3];
330 	char		*q;
331 	int              c;
332 
333 	hex[2] = '\0';
334 
335 	q = p;
336 	for ( ; '\0' != *p; p++, q++) {
337 		if ('%' == *p) {
338 			if ('\0' == (hex[0] = *(p + 1)))
339 				return(0);
340 			if ('\0' == (hex[1] = *(p + 2)))
341 				return(0);
342 			if (1 != sscanf(hex, "%x", &c))
343 				return(0);
344 			if ('\0' == c)
345 				return(0);
346 
347 			*q = (char)c;
348 			p += 2;
349 		} else
350 			*q = '+' == *p ? ' ' : *p;
351 	}
352 
353 	*q = '\0';
354 	return(1);
355 }
356 
357 static void
358 resp_begin_http(int code, const char *msg)
359 {
360 
361 	if (200 != code)
362 		printf("Status: %d %s\r\n", code, msg);
363 
364 	printf("Content-Type: text/html; charset=utf-8\r\n"
365 	     "Cache-Control: no-cache\r\n"
366 	     "Pragma: no-cache\r\n"
367 	     "\r\n");
368 
369 	fflush(stdout);
370 }
371 
372 static void
373 resp_begin_html(int code, const char *msg)
374 {
375 
376 	resp_begin_http(code, msg);
377 
378 	printf("<!DOCTYPE html>\n"
379 	       "<HTML>\n"
380 	       "<HEAD>\n"
381 	       "<META CHARSET=\"UTF-8\" />\n"
382 	       "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\""
383 	       " TYPE=\"text/css\" media=\"all\">\n"
384 	       "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\""
385 	       " TYPE=\"text/css\" media=\"all\">\n"
386 	       "<TITLE>%s</TITLE>\n"
387 	       "</HEAD>\n"
388 	       "<BODY>\n"
389 	       "<!-- Begin page content. //-->\n",
390 	       CSS_DIR, CSS_DIR, CUSTOMIZE_TITLE);
391 }
392 
393 static void
394 resp_end_html(void)
395 {
396 
397 	puts("</BODY>\n"
398 	     "</HTML>");
399 }
400 
401 static void
402 resp_searchform(const struct req *req)
403 {
404 	int		 i;
405 
406 	puts(CUSTOMIZE_BEGIN);
407 	puts("<!-- Begin search form. //-->");
408 	printf("<DIV ID=\"mancgi\">\n"
409 	       "<FORM ACTION=\"%s\" METHOD=\"get\">\n"
410 	       "<FIELDSET>\n"
411 	       "<LEGEND>Manual Page Search Parameters</LEGEND>\n",
412 	       scriptname);
413 
414 	/* Write query input box. */
415 
416 	printf(	"<TABLE><TR><TD>\n"
417 		"<INPUT TYPE=\"text\" NAME=\"query\" VALUE=\"");
418 	if (NULL != req->q.query)
419 		html_print(req->q.query);
420 	puts("\" SIZE=\"40\">");
421 
422 	/* Write submission and reset buttons. */
423 
424 	printf(	"<INPUT TYPE=\"submit\" VALUE=\"Submit\">\n"
425 		"<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n");
426 
427 	/* Write show radio button */
428 
429 	printf(	"</TD><TD>\n"
430 		"<INPUT TYPE=\"radio\" ");
431 	if (req->q.equal)
432 		printf("CHECKED=\"checked\" ");
433 	printf(	"NAME=\"apropos\" ID=\"show\" VALUE=\"0\">\n"
434 		"<LABEL FOR=\"show\">Show named manual page</LABEL>\n");
435 
436 	/* Write section selector. */
437 
438 	puts(	"</TD></TR><TR><TD>\n"
439 		"<SELECT NAME=\"sec\">");
440 	for (i = 0; i < sec_MAX; i++) {
441 		printf("<OPTION VALUE=\"%s\"", sec_numbers[i]);
442 		if (NULL != req->q.sec &&
443 		    0 == strcmp(sec_numbers[i], req->q.sec))
444 			printf(" SELECTED=\"selected\"");
445 		printf(">%s</OPTION>\n", sec_names[i]);
446 	}
447 	puts("</SELECT>");
448 
449 	/* Write architecture selector. */
450 
451 	printf(	"<SELECT NAME=\"arch\">\n"
452 		"<OPTION VALUE=\"default\"");
453 	if (NULL == req->q.arch)
454 		printf(" SELECTED=\"selected\"");
455 	puts(">All Architectures</OPTION>");
456 	for (i = 0; i < arch_MAX; i++) {
457 		printf("<OPTION VALUE=\"%s\"", arch_names[i]);
458 		if (NULL != req->q.arch &&
459 		    0 == strcmp(arch_names[i], req->q.arch))
460 			printf(" SELECTED=\"selected\"");
461 		printf(">%s</OPTION>\n", arch_names[i]);
462 	}
463 	puts("</SELECT>");
464 
465 	/* Write manpath selector. */
466 
467 	if (req->psz > 1) {
468 		puts("<SELECT NAME=\"manpath\">");
469 		for (i = 0; i < (int)req->psz; i++) {
470 			printf("<OPTION ");
471 			if (NULL == req->q.manpath ? 0 == i :
472 			    0 == strcmp(req->q.manpath, req->p[i]))
473 				printf("SELECTED=\"selected\" ");
474 			printf("VALUE=\"");
475 			html_print(req->p[i]);
476 			printf("\">");
477 			html_print(req->p[i]);
478 			puts("</OPTION>");
479 		}
480 		puts("</SELECT>");
481 	}
482 
483 	/* Write search radio button */
484 
485 	printf(	"</TD><TD>\n"
486 		"<INPUT TYPE=\"radio\" ");
487 	if (0 == req->q.equal)
488 		printf("CHECKED=\"checked\" ");
489 	printf(	"NAME=\"apropos\" ID=\"search\" VALUE=\"1\">\n"
490 		"<LABEL FOR=\"search\">Search with apropos query</LABEL>\n");
491 
492 	puts("</TD></TR></TABLE>\n"
493 	     "</FIELDSET>\n"
494 	     "</FORM>\n"
495 	     "</DIV>");
496 	puts("<!-- End search form. //-->");
497 }
498 
499 static int
500 validate_urifrag(const char *frag)
501 {
502 
503 	while ('\0' != *frag) {
504 		if ( ! (isalnum((unsigned char)*frag) ||
505 		    '-' == *frag || '.' == *frag ||
506 		    '/' == *frag || '_' == *frag))
507 			return(0);
508 		frag++;
509 	}
510 	return(1);
511 }
512 
513 static int
514 validate_manpath(const struct req *req, const char* manpath)
515 {
516 	size_t	 i;
517 
518 	if ( ! strcmp(manpath, "mandoc"))
519 		return(1);
520 
521 	for (i = 0; i < req->psz; i++)
522 		if ( ! strcmp(manpath, req->p[i]))
523 			return(1);
524 
525 	return(0);
526 }
527 
528 static int
529 validate_filename(const char *file)
530 {
531 
532 	if ('.' == file[0] && '/' == file[1])
533 		file += 2;
534 
535 	return ( ! (strstr(file, "../") || strstr(file, "/..") ||
536 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3))));
537 }
538 
539 static void
540 pg_index(const struct req *req)
541 {
542 
543 	resp_begin_html(200, NULL);
544 	resp_searchform(req);
545 	printf("<P>\n"
546 	       "This web interface is documented in the\n"
547 	       "<A HREF=\"%s/mandoc/man8/man.cgi.8\">man.cgi</A>\n"
548 	       "manual, and the\n"
549 	       "<A HREF=\"%s/mandoc/man1/apropos.1\">apropos</A>\n"
550 	       "manual explains the query syntax.\n"
551 	       "</P>\n",
552 	       scriptname, scriptname);
553 	resp_end_html();
554 }
555 
556 static void
557 pg_noresult(const struct req *req, const char *msg)
558 {
559 	resp_begin_html(200, NULL);
560 	resp_searchform(req);
561 	puts("<P>");
562 	puts(msg);
563 	puts("</P>");
564 	resp_end_html();
565 }
566 
567 static void
568 pg_error_badrequest(const char *msg)
569 {
570 
571 	resp_begin_html(400, "Bad Request");
572 	puts("<H1>Bad Request</H1>\n"
573 	     "<P>\n");
574 	puts(msg);
575 	printf("Try again from the\n"
576 	       "<A HREF=\"%s\">main page</A>.\n"
577 	       "</P>", scriptname);
578 	resp_end_html();
579 }
580 
581 static void
582 pg_error_internal(void)
583 {
584 	resp_begin_html(500, "Internal Server Error");
585 	puts("<P>Internal Server Error</P>");
586 	resp_end_html();
587 }
588 
589 static void
590 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
591 {
592 	char		*arch, *archend;
593 	size_t		 i, iuse, isec;
594 	int		 archprio, archpriouse;
595 	int		 prio, priouse;
596 	char		 sec;
597 
598 	for (i = 0; i < sz; i++) {
599 		if (validate_filename(r[i].file))
600 			continue;
601 		fprintf(stderr, "invalid filename %s in %s database\n",
602 		    r[i].file, req->q.manpath);
603 		pg_error_internal();
604 		return;
605 	}
606 
607 	if (1 == sz) {
608 		/*
609 		 * If we have just one result, then jump there now
610 		 * without any delay.
611 		 */
612 		printf("Status: 303 See Other\r\n");
613 		printf("Location: http://%s%s/%s/%s?",
614 		    HTTP_HOST, scriptname, req->q.manpath, r[0].file);
615 		http_printquery(req, "&");
616 		printf("\r\n"
617 		     "Content-Type: text/html; charset=utf-8\r\n"
618 		     "\r\n");
619 		return;
620 	}
621 
622 	resp_begin_html(200, NULL);
623 	resp_searchform(req);
624 	puts("<DIV CLASS=\"results\">");
625 	puts("<TABLE>");
626 
627 	for (i = 0; i < sz; i++) {
628 		printf("<TR>\n"
629 		       "<TD CLASS=\"title\">\n"
630 		       "<A HREF=\"%s/%s/%s?",
631 		    scriptname, req->q.manpath, r[i].file);
632 		http_printquery(req, "&amp;");
633 		printf("\">");
634 		html_print(r[i].names);
635 		printf("</A>\n"
636 		       "</TD>\n"
637 		       "<TD CLASS=\"desc\">");
638 		html_print(r[i].output);
639 		puts("</TD>\n"
640 		     "</TR>");
641 	}
642 
643 	puts("</TABLE>\n"
644 	     "</DIV>");
645 
646 	/*
647 	 * In man(1) mode, show one of the pages
648 	 * even if more than one is found.
649 	 */
650 
651 	if (req->q.equal) {
652 		puts("<HR>");
653 		iuse = 0;
654 		priouse = 10;
655 		archpriouse = 3;
656 		for (i = 0; i < sz; i++) {
657 			isec = strcspn(r[i].file, "123456789");
658 			sec = r[i].file[isec];
659 			if ('\0' == sec)
660 				continue;
661 			prio = sec_prios[sec - '1'];
662 			if (NULL == req->q.arch) {
663 				archprio =
664 				    (NULL == (arch = strchr(
665 					r[i].file + isec, '/'))) ? 3 :
666 				    (NULL == (archend = strchr(
667 					arch + 1, '/'))) ? 0 :
668 				    strncmp(arch, "amd64/",
669 					archend - arch) ? 2 : 1;
670 				if (archprio < archpriouse) {
671 					archpriouse = archprio;
672 					priouse = prio;
673 					iuse = i;
674 					continue;
675 				}
676 				if (archprio > archpriouse)
677 					continue;
678 			}
679 			if (prio >= priouse)
680 				continue;
681 			priouse = prio;
682 			iuse = i;
683 		}
684 		resp_show(req, r[iuse].file);
685 	}
686 
687 	resp_end_html();
688 }
689 
690 static void
691 catman(const struct req *req, const char *file)
692 {
693 	FILE		*f;
694 	size_t		 len;
695 	int		 i;
696 	char		*p;
697 	int		 italic, bold;
698 
699 	if (NULL == (f = fopen(file, "r"))) {
700 		puts("<P>You specified an invalid manual file.</P>");
701 		return;
702 	}
703 
704 	puts("<DIV CLASS=\"catman\">\n"
705 	     "<PRE>");
706 
707 	while (NULL != (p = fgetln(f, &len))) {
708 		bold = italic = 0;
709 		for (i = 0; i < (int)len - 1; i++) {
710 			/*
711 			 * This means that the catpage is out of state.
712 			 * Ignore it and keep going (although the
713 			 * catpage is bogus).
714 			 */
715 
716 			if ('\b' == p[i] || '\n' == p[i])
717 				continue;
718 
719 			/*
720 			 * Print a regular character.
721 			 * Close out any bold/italic scopes.
722 			 * If we're in back-space mode, make sure we'll
723 			 * have something to enter when we backspace.
724 			 */
725 
726 			if ('\b' != p[i + 1]) {
727 				if (italic)
728 					printf("</I>");
729 				if (bold)
730 					printf("</B>");
731 				italic = bold = 0;
732 				html_putchar(p[i]);
733 				continue;
734 			} else if (i + 2 >= (int)len)
735 				continue;
736 
737 			/* Italic mode. */
738 
739 			if ('_' == p[i]) {
740 				if (bold)
741 					printf("</B>");
742 				if ( ! italic)
743 					printf("<I>");
744 				bold = 0;
745 				italic = 1;
746 				i += 2;
747 				html_putchar(p[i]);
748 				continue;
749 			}
750 
751 			/*
752 			 * Handle funny behaviour troff-isms.
753 			 * These grok'd from the original man2html.c.
754 			 */
755 
756 			if (('+' == p[i] && 'o' == p[i + 2]) ||
757 					('o' == p[i] && '+' == p[i + 2]) ||
758 					('|' == p[i] && '=' == p[i + 2]) ||
759 					('=' == p[i] && '|' == p[i + 2]) ||
760 					('*' == p[i] && '=' == p[i + 2]) ||
761 					('=' == p[i] && '*' == p[i + 2]) ||
762 					('*' == p[i] && '|' == p[i + 2]) ||
763 					('|' == p[i] && '*' == p[i + 2]))  {
764 				if (italic)
765 					printf("</I>");
766 				if (bold)
767 					printf("</B>");
768 				italic = bold = 0;
769 				putchar('*');
770 				i += 2;
771 				continue;
772 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
773 					('-' == p[i] && '|' == p[i + 1]) ||
774 					('+' == p[i] && '-' == p[i + 1]) ||
775 					('-' == p[i] && '+' == p[i + 1]) ||
776 					('+' == p[i] && '|' == p[i + 1]) ||
777 					('|' == p[i] && '+' == p[i + 1]))  {
778 				if (italic)
779 					printf("</I>");
780 				if (bold)
781 					printf("</B>");
782 				italic = bold = 0;
783 				putchar('+');
784 				i += 2;
785 				continue;
786 			}
787 
788 			/* Bold mode. */
789 
790 			if (italic)
791 				printf("</I>");
792 			if ( ! bold)
793 				printf("<B>");
794 			bold = 1;
795 			italic = 0;
796 			i += 2;
797 			html_putchar(p[i]);
798 		}
799 
800 		/*
801 		 * Clean up the last character.
802 		 * We can get to a newline; don't print that.
803 		 */
804 
805 		if (italic)
806 			printf("</I>");
807 		if (bold)
808 			printf("</B>");
809 
810 		if (i == (int)len - 1 && '\n' != p[i])
811 			html_putchar(p[i]);
812 
813 		putchar('\n');
814 	}
815 
816 	puts("</PRE>\n"
817 	     "</DIV>");
818 
819 	fclose(f);
820 }
821 
822 static void
823 format(const struct req *req, const char *file)
824 {
825 	struct mparse	*mp;
826 	struct mchars	*mchars;
827 	struct mdoc	*mdoc;
828 	struct man	*man;
829 	void		*vp;
830 	char		*opts;
831 	enum mandoclevel rc;
832 	int		 fd;
833 	int		 usepath;
834 
835 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
836 		puts("<P>You specified an invalid manual file.</P>");
837 		return;
838 	}
839 
840 	mchars = mchars_alloc();
841 	mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_FATAL, NULL,
842 	    mchars, req->q.manpath);
843 	rc = mparse_readfd(mp, fd, file);
844 	close(fd);
845 
846 	if (rc >= MANDOCLEVEL_FATAL) {
847 		fprintf(stderr, "fatal mandoc error: %s/%s\n",
848 		    req->q.manpath, file);
849 		pg_error_internal();
850 		return;
851 	}
852 
853 	usepath = strcmp(req->q.manpath, req->p[0]);
854 	mandoc_asprintf(&opts,
855 	    "fragment,man=%s?query=%%N&sec=%%S%s%s%s%s",
856 	    scriptname,
857 	    req->q.arch	? "&arch="       : "",
858 	    req->q.arch	? req->q.arch    : "",
859 	    usepath	? "&manpath="    : "",
860 	    usepath	? req->q.manpath : "");
861 
862 	mparse_result(mp, &mdoc, &man, NULL);
863 	if (NULL == man && NULL == mdoc) {
864 		fprintf(stderr, "fatal mandoc error: %s/%s\n",
865 		    req->q.manpath, file);
866 		pg_error_internal();
867 		mparse_free(mp);
868 		mchars_free(mchars);
869 		return;
870 	}
871 
872 	vp = html_alloc(mchars, opts);
873 
874 	if (NULL != mdoc)
875 		html_mdoc(vp, mdoc);
876 	else
877 		html_man(vp, man);
878 
879 	html_free(vp);
880 	mparse_free(mp);
881 	mchars_free(mchars);
882 	free(opts);
883 }
884 
885 static void
886 resp_show(const struct req *req, const char *file)
887 {
888 
889 	if ('.' == file[0] && '/' == file[1])
890 		file += 2;
891 
892 	if ('c' == *file)
893 		catman(req, file);
894 	else
895 		format(req, file);
896 }
897 
898 static void
899 pg_show(struct req *req, const char *fullpath)
900 {
901 	char		*manpath;
902 	const char	*file;
903 
904 	if ((file = strchr(fullpath, '/')) == NULL) {
905 		pg_error_badrequest(
906 		    "You did not specify a page to show.");
907 		return;
908 	}
909 	manpath = mandoc_strndup(fullpath, file - fullpath);
910 	file++;
911 
912 	if ( ! validate_manpath(req, manpath)) {
913 		pg_error_badrequest(
914 		    "You specified an invalid manpath.");
915 		free(manpath);
916 		return;
917 	}
918 
919 	/*
920 	 * Begin by chdir()ing into the manpath.
921 	 * This way we can pick up the database files, which are
922 	 * relative to the manpath root.
923 	 */
924 
925 	if (chdir(manpath) == -1) {
926 		fprintf(stderr, "chdir %s: %s\n",
927 		    manpath, strerror(errno));
928 		pg_error_internal();
929 		free(manpath);
930 		return;
931 	}
932 
933 	if (strcmp(manpath, "mandoc")) {
934 		free(req->q.manpath);
935 		req->q.manpath = manpath;
936 	} else
937 		free(manpath);
938 
939 	if ( ! validate_filename(file)) {
940 		pg_error_badrequest(
941 		    "You specified an invalid manual file.");
942 		return;
943 	}
944 
945 	resp_begin_html(200, NULL);
946 	resp_searchform(req);
947 	resp_show(req, file);
948 	resp_end_html();
949 }
950 
951 static void
952 pg_search(const struct req *req)
953 {
954 	struct mansearch	  search;
955 	struct manpaths		  paths;
956 	struct manpage		 *res;
957 	char			**argv;
958 	char			 *query, *rp, *wp;
959 	size_t			  ressz;
960 	int			  argc;
961 
962 	/*
963 	 * Begin by chdir()ing into the root of the manpath.
964 	 * This way we can pick up the database files, which are
965 	 * relative to the manpath root.
966 	 */
967 
968 	if (-1 == (chdir(req->q.manpath))) {
969 		fprintf(stderr, "chdir %s: %s\n",
970 		    req->q.manpath, strerror(errno));
971 		pg_error_internal();
972 		return;
973 	}
974 
975 	search.arch = req->q.arch;
976 	search.sec = req->q.sec;
977 	search.outkey = "Nd";
978 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
979 	search.firstmatch = 1;
980 
981 	paths.sz = 1;
982 	paths.paths = mandoc_malloc(sizeof(char *));
983 	paths.paths[0] = mandoc_strdup(".");
984 
985 	/*
986 	 * Break apart at spaces with backslash-escaping.
987 	 */
988 
989 	argc = 0;
990 	argv = NULL;
991 	rp = query = mandoc_strdup(req->q.query);
992 	for (;;) {
993 		while (isspace((unsigned char)*rp))
994 			rp++;
995 		if (*rp == '\0')
996 			break;
997 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
998 		argv[argc++] = wp = rp;
999 		for (;;) {
1000 			if (isspace((unsigned char)*rp)) {
1001 				*wp = '\0';
1002 				rp++;
1003 				break;
1004 			}
1005 			if (rp[0] == '\\' && rp[1] != '\0')
1006 				rp++;
1007 			if (wp != rp)
1008 				*wp = *rp;
1009 			if (*rp == '\0')
1010 				break;
1011 			wp++;
1012 			rp++;
1013 		}
1014 	}
1015 
1016 	if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
1017 		pg_noresult(req, "You entered an invalid query.");
1018 	else if (0 == ressz)
1019 		pg_noresult(req, "No results found.");
1020 	else
1021 		pg_searchres(req, res, ressz);
1022 
1023 	free(query);
1024 	mansearch_free(res, ressz);
1025 	free(paths.paths[0]);
1026 	free(paths.paths);
1027 }
1028 
1029 int
1030 main(void)
1031 {
1032 	struct req	 req;
1033 	struct itimerval itimer;
1034 	const char	*path;
1035 	const char	*querystring;
1036 	int		 i;
1037 
1038 	/* Poor man's ReDoS mitigation. */
1039 
1040 	itimer.it_value.tv_sec = 2;
1041 	itimer.it_value.tv_usec = 0;
1042 	itimer.it_interval.tv_sec = 2;
1043 	itimer.it_interval.tv_usec = 0;
1044 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1045 		fprintf(stderr, "setitimer: %s\n", strerror(errno));
1046 		pg_error_internal();
1047 		return(EXIT_FAILURE);
1048 	}
1049 
1050 	/* Scan our run-time environment. */
1051 
1052 	if (NULL == (scriptname = getenv("SCRIPT_NAME")))
1053 		scriptname = "";
1054 
1055 	if ( ! validate_urifrag(scriptname)) {
1056 		fprintf(stderr, "unsafe SCRIPT_NAME \"%s\"\n",
1057 		    scriptname);
1058 		pg_error_internal();
1059 		return(EXIT_FAILURE);
1060 	}
1061 
1062 	/*
1063 	 * First we change directory into the MAN_DIR so that
1064 	 * subsequent scanning for manpath directories is rooted
1065 	 * relative to the same position.
1066 	 */
1067 
1068 	if (-1 == chdir(MAN_DIR)) {
1069 		fprintf(stderr, "MAN_DIR: %s: %s\n",
1070 		    MAN_DIR, strerror(errno));
1071 		pg_error_internal();
1072 		return(EXIT_FAILURE);
1073 	}
1074 
1075 	memset(&req, 0, sizeof(struct req));
1076 	pathgen(&req);
1077 
1078 	/* Next parse out the query string. */
1079 
1080 	if (NULL != (querystring = getenv("QUERY_STRING")))
1081 		http_parse(&req, querystring);
1082 
1083 	if ( ! (NULL == req.q.manpath ||
1084 	    validate_manpath(&req, req.q.manpath))) {
1085 		pg_error_badrequest(
1086 		    "You specified an invalid manpath.");
1087 		return(EXIT_FAILURE);
1088 	}
1089 
1090 	if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1091 		pg_error_badrequest(
1092 		    "You specified an invalid architecture.");
1093 		return(EXIT_FAILURE);
1094 	}
1095 
1096 	/* Dispatch to the three different pages. */
1097 
1098 	path = getenv("PATH_INFO");
1099 	if (NULL == path)
1100 		path = "";
1101 	else if ('/' == *path)
1102 		path++;
1103 
1104 	if ('\0' != *path)
1105 		pg_show(&req, path);
1106 	else if (NULL != req.q.query)
1107 		pg_search(&req);
1108 	else
1109 		pg_index(&req);
1110 
1111 	free(req.q.manpath);
1112 	free(req.q.arch);
1113 	free(req.q.sec);
1114 	free(req.q.query);
1115 	for (i = 0; i < (int)req.psz; i++)
1116 		free(req.p[i]);
1117 	free(req.p);
1118 	return(EXIT_SUCCESS);
1119 }
1120 
1121 /*
1122  * Scan for indexable paths.
1123  */
1124 static void
1125 pathgen(struct req *req)
1126 {
1127 	FILE	*fp;
1128 	char	*dp;
1129 	size_t	 dpsz;
1130 
1131 	if (NULL == (fp = fopen("manpath.conf", "r"))) {
1132 		fprintf(stderr, "%s/manpath.conf: %s\n",
1133 			MAN_DIR, strerror(errno));
1134 		pg_error_internal();
1135 		exit(EXIT_FAILURE);
1136 	}
1137 
1138 	while (NULL != (dp = fgetln(fp, &dpsz))) {
1139 		if ('\n' == dp[dpsz - 1])
1140 			dpsz--;
1141 		req->p = mandoc_realloc(req->p,
1142 		    (req->psz + 1) * sizeof(char *));
1143 		dp = mandoc_strndup(dp, dpsz);
1144 		if ( ! validate_urifrag(dp)) {
1145 			fprintf(stderr, "%s/manpath.conf contains "
1146 			    "unsafe path \"%s\"\n", MAN_DIR, dp);
1147 			pg_error_internal();
1148 			exit(EXIT_FAILURE);
1149 		}
1150 		if (NULL != strchr(dp, '/')) {
1151 			fprintf(stderr, "%s/manpath.conf contains "
1152 			    "path with slash \"%s\"\n", MAN_DIR, dp);
1153 			pg_error_internal();
1154 			exit(EXIT_FAILURE);
1155 		}
1156 		req->p[req->psz++] = dp;
1157 	}
1158 
1159 	if ( req->p == NULL ) {
1160 		fprintf(stderr, "%s/manpath.conf is empty\n", MAN_DIR);
1161 		pg_error_internal();
1162 		exit(EXIT_FAILURE);
1163 	}
1164 }
1165