xref: /netbsd-src/external/bsd/mdocml/dist/cgi.c (revision fdd524d4ccd2bb0c6f67401e938dabf773eb0372)
1 /*	$Id: cgi.c,v 1.1.1.4 2015/12/17 21:58:48 christos Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 #include <sys/time.h>
22 
23 #include <ctype.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <limits.h>
27 #include <stdint.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32 
33 #include "mandoc.h"
34 #include "mandoc_aux.h"
35 #include "main.h"
36 #include "manpath.h"
37 #include "mansearch.h"
38 #include "cgi.h"
39 
40 /*
41  * A query as passed to the search function.
42  */
43 struct	query {
44 	char		*manpath; /* desired manual directory */
45 	char		*arch; /* architecture */
46 	char		*sec; /* manual section */
47 	char		*query; /* unparsed query expression */
48 	int		 equal; /* match whole names, not substrings */
49 };
50 
51 struct	req {
52 	struct query	  q;
53 	char		**p; /* array of available manpaths */
54 	size_t		  psz; /* number of available manpaths */
55 };
56 
57 static	void		 catman(const struct req *, const char *);
58 static	void		 format(const struct req *, const char *);
59 static	void		 html_print(const char *);
60 static	void		 html_putchar(char);
61 static	int		 http_decode(char *);
62 static	void		 http_parse(struct req *, const char *);
63 static	void		 http_print(const char *);
64 static	void		 http_putchar(char);
65 static	void		 http_printquery(const struct req *, const char *);
66 static	void		 pathgen(struct req *);
67 static	void		 pg_error_badrequest(const char *);
68 static	void		 pg_error_internal(void);
69 static	void		 pg_index(const struct req *);
70 static	void		 pg_noresult(const struct req *, const char *);
71 static	void		 pg_search(const struct req *);
72 static	void		 pg_searchres(const struct req *,
73 				struct manpage *, size_t);
74 static	void		 pg_show(struct req *, const char *);
75 static	void		 resp_begin_html(int, const char *);
76 static	void		 resp_begin_http(int, const char *);
77 static	void		 resp_end_html(void);
78 static	void		 resp_searchform(const struct req *);
79 static	void		 resp_show(const struct req *, const char *);
80 static	void		 set_query_attr(char **, char **);
81 static	int		 validate_filename(const char *);
82 static	int		 validate_manpath(const struct req *, const char *);
83 static	int		 validate_urifrag(const char *);
84 
85 static	const char	 *scriptname; /* CGI script name */
86 
87 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
88 static	const char *const sec_numbers[] = {
89     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
90 };
91 static	const char *const sec_names[] = {
92     "All Sections",
93     "1 - General Commands",
94     "2 - System Calls",
95     "3 - Library Functions",
96     "3p - Perl Library",
97     "4 - Device Drivers",
98     "5 - File Formats",
99     "6 - Games",
100     "7 - Miscellaneous Information",
101     "8 - System Manager\'s Manual",
102     "9 - Kernel Developer\'s Manual"
103 };
104 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
105 
106 static	const char *const arch_names[] = {
107     "amd64",       "alpha",       "armish",      "armv7",
108     "aviion",      "hppa",        "hppa64",      "i386",
109     "ia64",        "landisk",     "loongson",    "luna88k",
110     "macppc",      "mips64",      "octeon",      "sgi",
111     "socppc",      "solbourne",   "sparc",       "sparc64",
112     "vax",         "zaurus",
113     "amiga",       "arc",         "arm32",       "atari",
114     "beagle",      "cats",        "hp300",       "mac68k",
115     "mvme68k",     "mvme88k",     "mvmeppc",     "palm",
116     "pc532",       "pegasos",     "pmax",        "powerpc",
117     "sun3",        "wgrisc",      "x68k"
118 };
119 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
120 
121 /*
122  * Print a character, escaping HTML along the way.
123  * This will pass non-ASCII straight to output: be warned!
124  */
125 static void
126 html_putchar(char c)
127 {
128 
129 	switch (c) {
130 	case ('"'):
131 		printf("&quote;");
132 		break;
133 	case ('&'):
134 		printf("&amp;");
135 		break;
136 	case ('>'):
137 		printf("&gt;");
138 		break;
139 	case ('<'):
140 		printf("&lt;");
141 		break;
142 	default:
143 		putchar((unsigned char)c);
144 		break;
145 	}
146 }
147 
148 static void
149 http_printquery(const struct req *req, const char *sep)
150 {
151 
152 	if (NULL != req->q.query) {
153 		printf("query=");
154 		http_print(req->q.query);
155 	}
156 	if (0 == req->q.equal)
157 		printf("%sapropos=1", sep);
158 	if (NULL != req->q.sec) {
159 		printf("%ssec=", sep);
160 		http_print(req->q.sec);
161 	}
162 	if (NULL != req->q.arch) {
163 		printf("%sarch=", sep);
164 		http_print(req->q.arch);
165 	}
166 	if (strcmp(req->q.manpath, req->p[0])) {
167 		printf("%smanpath=", sep);
168 		http_print(req->q.manpath);
169 	}
170 }
171 
172 static void
173 http_print(const char *p)
174 {
175 
176 	if (NULL == p)
177 		return;
178 	while ('\0' != *p)
179 		http_putchar(*p++);
180 }
181 
182 /*
183  * Call through to html_putchar().
184  * Accepts NULL strings.
185  */
186 static void
187 html_print(const char *p)
188 {
189 
190 	if (NULL == p)
191 		return;
192 	while ('\0' != *p)
193 		html_putchar(*p++);
194 }
195 
196 /*
197  * Transfer the responsibility for the allocated string *val
198  * to the query structure.
199  */
200 static void
201 set_query_attr(char **attr, char **val)
202 {
203 
204 	free(*attr);
205 	if (**val == '\0') {
206 		*attr = NULL;
207 		free(*val);
208 	} else
209 		*attr = *val;
210 	*val = NULL;
211 }
212 
213 /*
214  * Parse the QUERY_STRING for key-value pairs
215  * and store the values into the query structure.
216  */
217 static void
218 http_parse(struct req *req, const char *qs)
219 {
220 	char		*key, *val;
221 	size_t		 keysz, valsz;
222 
223 	req->q.manpath	= NULL;
224 	req->q.arch	= NULL;
225 	req->q.sec	= NULL;
226 	req->q.query	= NULL;
227 	req->q.equal	= 1;
228 
229 	key = val = NULL;
230 	while (*qs != '\0') {
231 
232 		/* Parse one key. */
233 
234 		keysz = strcspn(qs, "=;&");
235 		key = mandoc_strndup(qs, keysz);
236 		qs += keysz;
237 		if (*qs != '=')
238 			goto next;
239 
240 		/* Parse one value. */
241 
242 		valsz = strcspn(++qs, ";&");
243 		val = mandoc_strndup(qs, valsz);
244 		qs += valsz;
245 
246 		/* Decode and catch encoding errors. */
247 
248 		if ( ! (http_decode(key) && http_decode(val)))
249 			goto next;
250 
251 		/* Handle key-value pairs. */
252 
253 		if ( ! strcmp(key, "query"))
254 			set_query_attr(&req->q.query, &val);
255 
256 		else if ( ! strcmp(key, "apropos"))
257 			req->q.equal = !strcmp(val, "0");
258 
259 		else if ( ! strcmp(key, "manpath")) {
260 #ifdef COMPAT_OLDURI
261 			if ( ! strncmp(val, "OpenBSD ", 8)) {
262 				val[7] = '-';
263 				if ('C' == val[8])
264 					val[8] = 'c';
265 			}
266 #endif
267 			set_query_attr(&req->q.manpath, &val);
268 		}
269 
270 		else if ( ! (strcmp(key, "sec")
271 #ifdef COMPAT_OLDURI
272 		    && strcmp(key, "sektion")
273 #endif
274 		    )) {
275 			if ( ! strcmp(val, "0"))
276 				*val = '\0';
277 			set_query_attr(&req->q.sec, &val);
278 		}
279 
280 		else if ( ! strcmp(key, "arch")) {
281 			if ( ! strcmp(val, "default"))
282 				*val = '\0';
283 			set_query_attr(&req->q.arch, &val);
284 		}
285 
286 		/*
287 		 * The key must be freed in any case.
288 		 * The val may have been handed over to the query
289 		 * structure, in which case it is now NULL.
290 		 */
291 next:
292 		free(key);
293 		key = NULL;
294 		free(val);
295 		val = NULL;
296 
297 		if (*qs != '\0')
298 			qs++;
299 	}
300 }
301 
302 static void
303 http_putchar(char c)
304 {
305 
306 	if (isalnum((unsigned char)c)) {
307 		putchar((unsigned char)c);
308 		return;
309 	} else if (' ' == c) {
310 		putchar('+');
311 		return;
312 	}
313 	printf("%%%.2x", c);
314 }
315 
316 /*
317  * HTTP-decode a string.  The standard explanation is that this turns
318  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
319  * over the allocated string.
320  */
321 static int
322 http_decode(char *p)
323 {
324 	char             hex[3];
325 	char		*q;
326 	int              c;
327 
328 	hex[2] = '\0';
329 
330 	q = p;
331 	for ( ; '\0' != *p; p++, q++) {
332 		if ('%' == *p) {
333 			if ('\0' == (hex[0] = *(p + 1)))
334 				return(0);
335 			if ('\0' == (hex[1] = *(p + 2)))
336 				return(0);
337 			if (1 != sscanf(hex, "%x", &c))
338 				return(0);
339 			if ('\0' == c)
340 				return(0);
341 
342 			*q = (char)c;
343 			p += 2;
344 		} else
345 			*q = '+' == *p ? ' ' : *p;
346 	}
347 
348 	*q = '\0';
349 	return(1);
350 }
351 
352 static void
353 resp_begin_http(int code, const char *msg)
354 {
355 
356 	if (200 != code)
357 		printf("Status: %d %s\r\n", code, msg);
358 
359 	printf("Content-Type: text/html; charset=utf-8\r\n"
360 	     "Cache-Control: no-cache\r\n"
361 	     "Pragma: no-cache\r\n"
362 	     "\r\n");
363 
364 	fflush(stdout);
365 }
366 
367 static void
368 resp_begin_html(int code, const char *msg)
369 {
370 
371 	resp_begin_http(code, msg);
372 
373 	printf("<!DOCTYPE html>\n"
374 	       "<HTML>\n"
375 	       "<HEAD>\n"
376 	       "<META CHARSET=\"UTF-8\" />\n"
377 	       "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\""
378 	       " TYPE=\"text/css\" media=\"all\">\n"
379 	       "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\""
380 	       " TYPE=\"text/css\" media=\"all\">\n"
381 	       "<TITLE>%s</TITLE>\n"
382 	       "</HEAD>\n"
383 	       "<BODY>\n"
384 	       "<!-- Begin page content. //-->\n",
385 	       CSS_DIR, CSS_DIR, CUSTOMIZE_TITLE);
386 }
387 
388 static void
389 resp_end_html(void)
390 {
391 
392 	puts("</BODY>\n"
393 	     "</HTML>");
394 }
395 
396 static void
397 resp_searchform(const struct req *req)
398 {
399 	int		 i;
400 
401 	puts(CUSTOMIZE_BEGIN);
402 	puts("<!-- Begin search form. //-->");
403 	printf("<DIV ID=\"mancgi\">\n"
404 	       "<FORM ACTION=\"%s\" METHOD=\"get\">\n"
405 	       "<FIELDSET>\n"
406 	       "<LEGEND>Manual Page Search Parameters</LEGEND>\n",
407 	       scriptname);
408 
409 	/* Write query input box. */
410 
411 	printf(	"<TABLE><TR><TD>\n"
412 		"<INPUT TYPE=\"text\" NAME=\"query\" VALUE=\"");
413 	if (NULL != req->q.query)
414 		html_print(req->q.query);
415 	puts("\" SIZE=\"40\">");
416 
417 	/* Write submission and reset buttons. */
418 
419 	printf(	"<INPUT TYPE=\"submit\" VALUE=\"Submit\">\n"
420 		"<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n");
421 
422 	/* Write show radio button */
423 
424 	printf(	"</TD><TD>\n"
425 		"<INPUT TYPE=\"radio\" ");
426 	if (req->q.equal)
427 		printf("CHECKED=\"checked\" ");
428 	printf(	"NAME=\"apropos\" ID=\"show\" VALUE=\"0\">\n"
429 		"<LABEL FOR=\"show\">Show named manual page</LABEL>\n");
430 
431 	/* Write section selector. */
432 
433 	puts(	"</TD></TR><TR><TD>\n"
434 		"<SELECT NAME=\"sec\">");
435 	for (i = 0; i < sec_MAX; i++) {
436 		printf("<OPTION VALUE=\"%s\"", sec_numbers[i]);
437 		if (NULL != req->q.sec &&
438 		    0 == strcmp(sec_numbers[i], req->q.sec))
439 			printf(" SELECTED=\"selected\"");
440 		printf(">%s</OPTION>\n", sec_names[i]);
441 	}
442 	puts("</SELECT>");
443 
444 	/* Write architecture selector. */
445 
446 	printf(	"<SELECT NAME=\"arch\">\n"
447 		"<OPTION VALUE=\"default\"");
448 	if (NULL == req->q.arch)
449 		printf(" SELECTED=\"selected\"");
450 	puts(">All Architectures</OPTION>");
451 	for (i = 0; i < arch_MAX; i++) {
452 		printf("<OPTION VALUE=\"%s\"", arch_names[i]);
453 		if (NULL != req->q.arch &&
454 		    0 == strcmp(arch_names[i], req->q.arch))
455 			printf(" SELECTED=\"selected\"");
456 		printf(">%s</OPTION>\n", arch_names[i]);
457 	}
458 	puts("</SELECT>");
459 
460 	/* Write manpath selector. */
461 
462 	if (req->psz > 1) {
463 		puts("<SELECT NAME=\"manpath\">");
464 		for (i = 0; i < (int)req->psz; i++) {
465 			printf("<OPTION ");
466 			if (strcmp(req->q.manpath, req->p[i]) == 0)
467 				printf("SELECTED=\"selected\" ");
468 			printf("VALUE=\"");
469 			html_print(req->p[i]);
470 			printf("\">");
471 			html_print(req->p[i]);
472 			puts("</OPTION>");
473 		}
474 		puts("</SELECT>");
475 	}
476 
477 	/* Write search radio button */
478 
479 	printf(	"</TD><TD>\n"
480 		"<INPUT TYPE=\"radio\" ");
481 	if (0 == req->q.equal)
482 		printf("CHECKED=\"checked\" ");
483 	printf(	"NAME=\"apropos\" ID=\"search\" VALUE=\"1\">\n"
484 		"<LABEL FOR=\"search\">Search with apropos query</LABEL>\n");
485 
486 	puts("</TD></TR></TABLE>\n"
487 	     "</FIELDSET>\n"
488 	     "</FORM>\n"
489 	     "</DIV>");
490 	puts("<!-- End search form. //-->");
491 }
492 
493 static int
494 validate_urifrag(const char *frag)
495 {
496 
497 	while ('\0' != *frag) {
498 		if ( ! (isalnum((unsigned char)*frag) ||
499 		    '-' == *frag || '.' == *frag ||
500 		    '/' == *frag || '_' == *frag))
501 			return(0);
502 		frag++;
503 	}
504 	return(1);
505 }
506 
507 static int
508 validate_manpath(const struct req *req, const char* manpath)
509 {
510 	size_t	 i;
511 
512 	if ( ! strcmp(manpath, "mandoc"))
513 		return(1);
514 
515 	for (i = 0; i < req->psz; i++)
516 		if ( ! strcmp(manpath, req->p[i]))
517 			return(1);
518 
519 	return(0);
520 }
521 
522 static int
523 validate_filename(const char *file)
524 {
525 
526 	if ('.' == file[0] && '/' == file[1])
527 		file += 2;
528 
529 	return ( ! (strstr(file, "../") || strstr(file, "/..") ||
530 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3))));
531 }
532 
533 static void
534 pg_index(const struct req *req)
535 {
536 
537 	resp_begin_html(200, NULL);
538 	resp_searchform(req);
539 	printf("<P>\n"
540 	       "This web interface is documented in the\n"
541 	       "<A HREF=\"%s/mandoc/man8/man.cgi.8\">man.cgi</A>\n"
542 	       "manual, and the\n"
543 	       "<A HREF=\"%s/mandoc/man1/apropos.1\">apropos</A>\n"
544 	       "manual explains the query syntax.\n"
545 	       "</P>\n",
546 	       scriptname, scriptname);
547 	resp_end_html();
548 }
549 
550 static void
551 pg_noresult(const struct req *req, const char *msg)
552 {
553 	resp_begin_html(200, NULL);
554 	resp_searchform(req);
555 	puts("<P>");
556 	puts(msg);
557 	puts("</P>");
558 	resp_end_html();
559 }
560 
561 static void
562 pg_error_badrequest(const char *msg)
563 {
564 
565 	resp_begin_html(400, "Bad Request");
566 	puts("<H1>Bad Request</H1>\n"
567 	     "<P>\n");
568 	puts(msg);
569 	printf("Try again from the\n"
570 	       "<A HREF=\"%s\">main page</A>.\n"
571 	       "</P>", scriptname);
572 	resp_end_html();
573 }
574 
575 static void
576 pg_error_internal(void)
577 {
578 	resp_begin_html(500, "Internal Server Error");
579 	puts("<P>Internal Server Error</P>");
580 	resp_end_html();
581 }
582 
583 static void
584 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
585 {
586 	char		*arch, *archend;
587 	size_t		 i, iuse, isec;
588 	int		 archprio, archpriouse;
589 	int		 prio, priouse;
590 	char		 sec;
591 
592 	for (i = 0; i < sz; i++) {
593 		if (validate_filename(r[i].file))
594 			continue;
595 		fprintf(stderr, "invalid filename %s in %s database\n",
596 		    r[i].file, req->q.manpath);
597 		pg_error_internal();
598 		return;
599 	}
600 
601 	if (1 == sz) {
602 		/*
603 		 * If we have just one result, then jump there now
604 		 * without any delay.
605 		 */
606 		printf("Status: 303 See Other\r\n");
607 		printf("Location: http://%s%s/%s/%s?",
608 		    HTTP_HOST, scriptname, req->q.manpath, r[0].file);
609 		http_printquery(req, "&");
610 		printf("\r\n"
611 		     "Content-Type: text/html; charset=utf-8\r\n"
612 		     "\r\n");
613 		return;
614 	}
615 
616 	resp_begin_html(200, NULL);
617 	resp_searchform(req);
618 	puts("<DIV CLASS=\"results\">");
619 	puts("<TABLE>");
620 
621 	for (i = 0; i < sz; i++) {
622 		printf("<TR>\n"
623 		       "<TD CLASS=\"title\">\n"
624 		       "<A HREF=\"%s/%s/%s?",
625 		    scriptname, req->q.manpath, r[i].file);
626 		http_printquery(req, "&amp;");
627 		printf("\">");
628 		html_print(r[i].names);
629 		printf("</A>\n"
630 		       "</TD>\n"
631 		       "<TD CLASS=\"desc\">");
632 		html_print(r[i].output);
633 		puts("</TD>\n"
634 		     "</TR>");
635 	}
636 
637 	puts("</TABLE>\n"
638 	     "</DIV>");
639 
640 	/*
641 	 * In man(1) mode, show one of the pages
642 	 * even if more than one is found.
643 	 */
644 
645 	if (req->q.equal) {
646 		puts("<HR>");
647 		iuse = 0;
648 		priouse = 10;
649 		archpriouse = 3;
650 		for (i = 0; i < sz; i++) {
651 			isec = strcspn(r[i].file, "123456789");
652 			sec = r[i].file[isec];
653 			if ('\0' == sec)
654 				continue;
655 			prio = sec_prios[sec - '1'];
656 			if (NULL == req->q.arch) {
657 				archprio =
658 				    (NULL == (arch = strchr(
659 					r[i].file + isec, '/'))) ? 3 :
660 				    (NULL == (archend = strchr(
661 					arch + 1, '/'))) ? 0 :
662 				    strncmp(arch, "amd64/",
663 					archend - arch) ? 2 : 1;
664 				if (archprio < archpriouse) {
665 					archpriouse = archprio;
666 					priouse = prio;
667 					iuse = i;
668 					continue;
669 				}
670 				if (archprio > archpriouse)
671 					continue;
672 			}
673 			if (prio >= priouse)
674 				continue;
675 			priouse = prio;
676 			iuse = i;
677 		}
678 		resp_show(req, r[iuse].file);
679 	}
680 
681 	resp_end_html();
682 }
683 
684 static void
685 catman(const struct req *req, const char *file)
686 {
687 	FILE		*f;
688 	size_t		 len;
689 	int		 i;
690 	char		*p;
691 	int		 italic, bold;
692 
693 	if (NULL == (f = fopen(file, "r"))) {
694 		puts("<P>You specified an invalid manual file.</P>");
695 		return;
696 	}
697 
698 	puts("<DIV CLASS=\"catman\">\n"
699 	     "<PRE>");
700 
701 	while (NULL != (p = fgetln(f, &len))) {
702 		bold = italic = 0;
703 		for (i = 0; i < (int)len - 1; i++) {
704 			/*
705 			 * This means that the catpage is out of state.
706 			 * Ignore it and keep going (although the
707 			 * catpage is bogus).
708 			 */
709 
710 			if ('\b' == p[i] || '\n' == p[i])
711 				continue;
712 
713 			/*
714 			 * Print a regular character.
715 			 * Close out any bold/italic scopes.
716 			 * If we're in back-space mode, make sure we'll
717 			 * have something to enter when we backspace.
718 			 */
719 
720 			if ('\b' != p[i + 1]) {
721 				if (italic)
722 					printf("</I>");
723 				if (bold)
724 					printf("</B>");
725 				italic = bold = 0;
726 				html_putchar(p[i]);
727 				continue;
728 			} else if (i + 2 >= (int)len)
729 				continue;
730 
731 			/* Italic mode. */
732 
733 			if ('_' == p[i]) {
734 				if (bold)
735 					printf("</B>");
736 				if ( ! italic)
737 					printf("<I>");
738 				bold = 0;
739 				italic = 1;
740 				i += 2;
741 				html_putchar(p[i]);
742 				continue;
743 			}
744 
745 			/*
746 			 * Handle funny behaviour troff-isms.
747 			 * These grok'd from the original man2html.c.
748 			 */
749 
750 			if (('+' == p[i] && 'o' == p[i + 2]) ||
751 					('o' == p[i] && '+' == p[i + 2]) ||
752 					('|' == p[i] && '=' == p[i + 2]) ||
753 					('=' == p[i] && '|' == p[i + 2]) ||
754 					('*' == p[i] && '=' == p[i + 2]) ||
755 					('=' == p[i] && '*' == p[i + 2]) ||
756 					('*' == p[i] && '|' == p[i + 2]) ||
757 					('|' == p[i] && '*' == p[i + 2]))  {
758 				if (italic)
759 					printf("</I>");
760 				if (bold)
761 					printf("</B>");
762 				italic = bold = 0;
763 				putchar('*');
764 				i += 2;
765 				continue;
766 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
767 					('-' == p[i] && '|' == p[i + 1]) ||
768 					('+' == p[i] && '-' == p[i + 1]) ||
769 					('-' == p[i] && '+' == p[i + 1]) ||
770 					('+' == p[i] && '|' == p[i + 1]) ||
771 					('|' == p[i] && '+' == p[i + 1]))  {
772 				if (italic)
773 					printf("</I>");
774 				if (bold)
775 					printf("</B>");
776 				italic = bold = 0;
777 				putchar('+');
778 				i += 2;
779 				continue;
780 			}
781 
782 			/* Bold mode. */
783 
784 			if (italic)
785 				printf("</I>");
786 			if ( ! bold)
787 				printf("<B>");
788 			bold = 1;
789 			italic = 0;
790 			i += 2;
791 			html_putchar(p[i]);
792 		}
793 
794 		/*
795 		 * Clean up the last character.
796 		 * We can get to a newline; don't print that.
797 		 */
798 
799 		if (italic)
800 			printf("</I>");
801 		if (bold)
802 			printf("</B>");
803 
804 		if (i == (int)len - 1 && '\n' != p[i])
805 			html_putchar(p[i]);
806 
807 		putchar('\n');
808 	}
809 
810 	puts("</PRE>\n"
811 	     "</DIV>");
812 
813 	fclose(f);
814 }
815 
816 static void
817 format(const struct req *req, const char *file)
818 {
819 	struct mparse	*mp;
820 	struct mchars	*mchars;
821 	struct mdoc	*mdoc;
822 	struct man	*man;
823 	void		*vp;
824 	char		*opts;
825 	int		 fd;
826 	int		 usepath;
827 
828 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
829 		puts("<P>You specified an invalid manual file.</P>");
830 		return;
831 	}
832 
833 	mchars = mchars_alloc();
834 	mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL,
835 	    mchars, req->q.manpath);
836 	mparse_readfd(mp, fd, file);
837 	close(fd);
838 
839 	usepath = strcmp(req->q.manpath, req->p[0]);
840 	mandoc_asprintf(&opts,
841 	    "fragment,man=%s?query=%%N&sec=%%S%s%s%s%s",
842 	    scriptname,
843 	    req->q.arch	? "&arch="       : "",
844 	    req->q.arch	? req->q.arch    : "",
845 	    usepath	? "&manpath="    : "",
846 	    usepath	? req->q.manpath : "");
847 
848 	mparse_result(mp, &mdoc, &man, NULL);
849 	if (NULL == man && NULL == mdoc) {
850 		fprintf(stderr, "fatal mandoc error: %s/%s\n",
851 		    req->q.manpath, file);
852 		pg_error_internal();
853 		mparse_free(mp);
854 		mchars_free(mchars);
855 		return;
856 	}
857 
858 	vp = html_alloc(mchars, opts);
859 
860 	if (NULL != mdoc)
861 		html_mdoc(vp, mdoc);
862 	else
863 		html_man(vp, man);
864 
865 	html_free(vp);
866 	mparse_free(mp);
867 	mchars_free(mchars);
868 	free(opts);
869 }
870 
871 static void
872 resp_show(const struct req *req, const char *file)
873 {
874 
875 	if ('.' == file[0] && '/' == file[1])
876 		file += 2;
877 
878 	if ('c' == *file)
879 		catman(req, file);
880 	else
881 		format(req, file);
882 }
883 
884 static void
885 pg_show(struct req *req, const char *fullpath)
886 {
887 	char		*manpath;
888 	const char	*file;
889 
890 	if ((file = strchr(fullpath, '/')) == NULL) {
891 		pg_error_badrequest(
892 		    "You did not specify a page to show.");
893 		return;
894 	}
895 	manpath = mandoc_strndup(fullpath, file - fullpath);
896 	file++;
897 
898 	if ( ! validate_manpath(req, manpath)) {
899 		pg_error_badrequest(
900 		    "You specified an invalid manpath.");
901 		free(manpath);
902 		return;
903 	}
904 
905 	/*
906 	 * Begin by chdir()ing into the manpath.
907 	 * This way we can pick up the database files, which are
908 	 * relative to the manpath root.
909 	 */
910 
911 	if (chdir(manpath) == -1) {
912 		fprintf(stderr, "chdir %s: %s\n",
913 		    manpath, strerror(errno));
914 		pg_error_internal();
915 		free(manpath);
916 		return;
917 	}
918 
919 	if (strcmp(manpath, "mandoc")) {
920 		free(req->q.manpath);
921 		req->q.manpath = manpath;
922 	} else
923 		free(manpath);
924 
925 	if ( ! validate_filename(file)) {
926 		pg_error_badrequest(
927 		    "You specified an invalid manual file.");
928 		return;
929 	}
930 
931 	resp_begin_html(200, NULL);
932 	resp_searchform(req);
933 	resp_show(req, file);
934 	resp_end_html();
935 }
936 
937 static void
938 pg_search(const struct req *req)
939 {
940 	struct mansearch	  search;
941 	struct manpaths		  paths;
942 	struct manpage		 *res;
943 	char			**argv;
944 	char			 *query, *rp, *wp;
945 	size_t			  ressz;
946 	int			  argc;
947 
948 	/*
949 	 * Begin by chdir()ing into the root of the manpath.
950 	 * This way we can pick up the database files, which are
951 	 * relative to the manpath root.
952 	 */
953 
954 	if (-1 == (chdir(req->q.manpath))) {
955 		fprintf(stderr, "chdir %s: %s\n",
956 		    req->q.manpath, strerror(errno));
957 		pg_error_internal();
958 		return;
959 	}
960 
961 	search.arch = req->q.arch;
962 	search.sec = req->q.sec;
963 	search.outkey = "Nd";
964 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
965 	search.firstmatch = 1;
966 
967 	paths.sz = 1;
968 	paths.paths = mandoc_malloc(sizeof(char *));
969 	paths.paths[0] = mandoc_strdup(".");
970 
971 	/*
972 	 * Break apart at spaces with backslash-escaping.
973 	 */
974 
975 	argc = 0;
976 	argv = NULL;
977 	rp = query = mandoc_strdup(req->q.query);
978 	for (;;) {
979 		while (isspace((unsigned char)*rp))
980 			rp++;
981 		if (*rp == '\0')
982 			break;
983 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
984 		argv[argc++] = wp = rp;
985 		for (;;) {
986 			if (isspace((unsigned char)*rp)) {
987 				*wp = '\0';
988 				rp++;
989 				break;
990 			}
991 			if (rp[0] == '\\' && rp[1] != '\0')
992 				rp++;
993 			if (wp != rp)
994 				*wp = *rp;
995 			if (*rp == '\0')
996 				break;
997 			wp++;
998 			rp++;
999 		}
1000 	}
1001 
1002 	if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
1003 		pg_noresult(req, "You entered an invalid query.");
1004 	else if (0 == ressz)
1005 		pg_noresult(req, "No results found.");
1006 	else
1007 		pg_searchres(req, res, ressz);
1008 
1009 	free(query);
1010 	mansearch_free(res, ressz);
1011 	free(paths.paths[0]);
1012 	free(paths.paths);
1013 }
1014 
1015 int
1016 main(void)
1017 {
1018 	struct req	 req;
1019 	struct itimerval itimer;
1020 	const char	*path;
1021 	const char	*querystring;
1022 	int		 i;
1023 
1024 	/* Poor man's ReDoS mitigation. */
1025 
1026 	itimer.it_value.tv_sec = 2;
1027 	itimer.it_value.tv_usec = 0;
1028 	itimer.it_interval.tv_sec = 2;
1029 	itimer.it_interval.tv_usec = 0;
1030 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1031 		fprintf(stderr, "setitimer: %s\n", strerror(errno));
1032 		pg_error_internal();
1033 		return(EXIT_FAILURE);
1034 	}
1035 
1036 	/* Scan our run-time environment. */
1037 
1038 	if (NULL == (scriptname = getenv("SCRIPT_NAME")))
1039 		scriptname = "";
1040 
1041 	if ( ! validate_urifrag(scriptname)) {
1042 		fprintf(stderr, "unsafe SCRIPT_NAME \"%s\"\n",
1043 		    scriptname);
1044 		pg_error_internal();
1045 		return(EXIT_FAILURE);
1046 	}
1047 
1048 	/*
1049 	 * First we change directory into the MAN_DIR so that
1050 	 * subsequent scanning for manpath directories is rooted
1051 	 * relative to the same position.
1052 	 */
1053 
1054 	if (-1 == chdir(MAN_DIR)) {
1055 		fprintf(stderr, "MAN_DIR: %s: %s\n",
1056 		    MAN_DIR, strerror(errno));
1057 		pg_error_internal();
1058 		return(EXIT_FAILURE);
1059 	}
1060 
1061 	memset(&req, 0, sizeof(struct req));
1062 	pathgen(&req);
1063 
1064 	/* Next parse out the query string. */
1065 
1066 	if (NULL != (querystring = getenv("QUERY_STRING")))
1067 		http_parse(&req, querystring);
1068 
1069 	if (req.q.manpath == NULL)
1070 		req.q.manpath = mandoc_strdup(req.p[0]);
1071 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1072 		pg_error_badrequest(
1073 		    "You specified an invalid manpath.");
1074 		return(EXIT_FAILURE);
1075 	}
1076 
1077 	if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1078 		pg_error_badrequest(
1079 		    "You specified an invalid architecture.");
1080 		return(EXIT_FAILURE);
1081 	}
1082 
1083 	/* Dispatch to the three different pages. */
1084 
1085 	path = getenv("PATH_INFO");
1086 	if (NULL == path)
1087 		path = "";
1088 	else if ('/' == *path)
1089 		path++;
1090 
1091 	if ('\0' != *path)
1092 		pg_show(&req, path);
1093 	else if (NULL != req.q.query)
1094 		pg_search(&req);
1095 	else
1096 		pg_index(&req);
1097 
1098 	free(req.q.manpath);
1099 	free(req.q.arch);
1100 	free(req.q.sec);
1101 	free(req.q.query);
1102 	for (i = 0; i < (int)req.psz; i++)
1103 		free(req.p[i]);
1104 	free(req.p);
1105 	return(EXIT_SUCCESS);
1106 }
1107 
1108 /*
1109  * Scan for indexable paths.
1110  */
1111 static void
1112 pathgen(struct req *req)
1113 {
1114 	FILE	*fp;
1115 	char	*dp;
1116 	size_t	 dpsz;
1117 
1118 	if (NULL == (fp = fopen("manpath.conf", "r"))) {
1119 		fprintf(stderr, "%s/manpath.conf: %s\n",
1120 			MAN_DIR, strerror(errno));
1121 		pg_error_internal();
1122 		exit(EXIT_FAILURE);
1123 	}
1124 
1125 	while (NULL != (dp = fgetln(fp, &dpsz))) {
1126 		if ('\n' == dp[dpsz - 1])
1127 			dpsz--;
1128 		req->p = mandoc_realloc(req->p,
1129 		    (req->psz + 1) * sizeof(char *));
1130 		dp = mandoc_strndup(dp, dpsz);
1131 		if ( ! validate_urifrag(dp)) {
1132 			fprintf(stderr, "%s/manpath.conf contains "
1133 			    "unsafe path \"%s\"\n", MAN_DIR, dp);
1134 			pg_error_internal();
1135 			exit(EXIT_FAILURE);
1136 		}
1137 		if (NULL != strchr(dp, '/')) {
1138 			fprintf(stderr, "%s/manpath.conf contains "
1139 			    "path with slash \"%s\"\n", MAN_DIR, dp);
1140 			pg_error_internal();
1141 			exit(EXIT_FAILURE);
1142 		}
1143 		req->p[req->psz++] = dp;
1144 	}
1145 
1146 	if ( req->p == NULL ) {
1147 		fprintf(stderr, "%s/manpath.conf is empty\n", MAN_DIR);
1148 		pg_error_internal();
1149 		exit(EXIT_FAILURE);
1150 	}
1151 }
1152