xref: /openbsd-src/usr.bin/mandoc/cgi.c (revision cb39b41371628601fbe4c618205356d538b9d08a)
1 /*	$OpenBSD: cgi.c,v 1.47 2015/04/18 16:34:03 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/time.h>
20 
21 #include <ctype.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <limits.h>
25 #include <stdint.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 
31 #include "mandoc_aux.h"
32 #include "mandoc.h"
33 #include "roff.h"
34 #include "main.h"
35 #include "manconf.h"
36 #include "mansearch.h"
37 #include "cgi.h"
38 
39 /*
40  * A query as passed to the search function.
41  */
42 struct	query {
43 	char		*manpath; /* desired manual directory */
44 	char		*arch; /* architecture */
45 	char		*sec; /* manual section */
46 	char		*query; /* unparsed query expression */
47 	int		 equal; /* match whole names, not substrings */
48 };
49 
50 struct	req {
51 	struct query	  q;
52 	char		**p; /* array of available manpaths */
53 	size_t		  psz; /* number of available manpaths */
54 };
55 
56 static	void		 catman(const struct req *, const char *);
57 static	void		 format(const struct req *, const char *);
58 static	void		 html_print(const char *);
59 static	void		 html_putchar(char);
60 static	int		 http_decode(char *);
61 static	void		 http_parse(struct req *, const char *);
62 static	void		 http_print(const char *);
63 static	void		 http_putchar(char);
64 static	void		 http_printquery(const struct req *, const char *);
65 static	void		 pathgen(struct req *);
66 static	void		 pg_error_badrequest(const char *);
67 static	void		 pg_error_internal(void);
68 static	void		 pg_index(const struct req *);
69 static	void		 pg_noresult(const struct req *, const char *);
70 static	void		 pg_search(const struct req *);
71 static	void		 pg_searchres(const struct req *,
72 				struct manpage *, size_t);
73 static	void		 pg_show(struct req *, const char *);
74 static	void		 resp_begin_html(int, const char *);
75 static	void		 resp_begin_http(int, const char *);
76 static	void		 resp_end_html(void);
77 static	void		 resp_searchform(const struct req *);
78 static	void		 resp_show(const struct req *, const char *);
79 static	void		 set_query_attr(char **, char **);
80 static	int		 validate_filename(const char *);
81 static	int		 validate_manpath(const struct req *, const char *);
82 static	int		 validate_urifrag(const char *);
83 
84 static	const char	 *scriptname; /* CGI script name */
85 
86 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
87 static	const char *const sec_numbers[] = {
88     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
89 };
90 static	const char *const sec_names[] = {
91     "All Sections",
92     "1 - General Commands",
93     "2 - System Calls",
94     "3 - Library Functions",
95     "3p - Perl Library",
96     "4 - Device Drivers",
97     "5 - File Formats",
98     "6 - Games",
99     "7 - Miscellaneous Information",
100     "8 - System Manager\'s Manual",
101     "9 - Kernel Developer\'s Manual"
102 };
103 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
104 
105 static	const char *const arch_names[] = {
106     "amd64",       "alpha",       "armish",      "armv7",
107     "aviion",      "hppa",        "hppa64",      "i386",
108     "ia64",        "landisk",     "loongson",    "luna88k",
109     "macppc",      "mips64",      "octeon",      "sgi",
110     "socppc",      "solbourne",   "sparc",       "sparc64",
111     "vax",         "zaurus",
112     "amiga",       "arc",         "arm32",       "atari",
113     "beagle",      "cats",        "hp300",       "mac68k",
114     "mvme68k",     "mvme88k",     "mvmeppc",     "palm",
115     "pc532",       "pegasos",     "pmax",        "powerpc",
116     "sun3",        "wgrisc",      "x68k"
117 };
118 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
119 
120 /*
121  * Print a character, escaping HTML along the way.
122  * This will pass non-ASCII straight to output: be warned!
123  */
124 static void
125 html_putchar(char c)
126 {
127 
128 	switch (c) {
129 	case ('"'):
130 		printf("&quote;");
131 		break;
132 	case ('&'):
133 		printf("&amp;");
134 		break;
135 	case ('>'):
136 		printf("&gt;");
137 		break;
138 	case ('<'):
139 		printf("&lt;");
140 		break;
141 	default:
142 		putchar((unsigned char)c);
143 		break;
144 	}
145 }
146 
147 static void
148 http_printquery(const struct req *req, const char *sep)
149 {
150 
151 	if (NULL != req->q.query) {
152 		printf("query=");
153 		http_print(req->q.query);
154 	}
155 	if (0 == req->q.equal)
156 		printf("%sapropos=1", sep);
157 	if (NULL != req->q.sec) {
158 		printf("%ssec=", sep);
159 		http_print(req->q.sec);
160 	}
161 	if (NULL != req->q.arch) {
162 		printf("%sarch=", sep);
163 		http_print(req->q.arch);
164 	}
165 	if (strcmp(req->q.manpath, req->p[0])) {
166 		printf("%smanpath=", sep);
167 		http_print(req->q.manpath);
168 	}
169 }
170 
171 static void
172 http_print(const char *p)
173 {
174 
175 	if (NULL == p)
176 		return;
177 	while ('\0' != *p)
178 		http_putchar(*p++);
179 }
180 
181 /*
182  * Call through to html_putchar().
183  * Accepts NULL strings.
184  */
185 static void
186 html_print(const char *p)
187 {
188 
189 	if (NULL == p)
190 		return;
191 	while ('\0' != *p)
192 		html_putchar(*p++);
193 }
194 
195 /*
196  * Transfer the responsibility for the allocated string *val
197  * to the query structure.
198  */
199 static void
200 set_query_attr(char **attr, char **val)
201 {
202 
203 	free(*attr);
204 	if (**val == '\0') {
205 		*attr = NULL;
206 		free(*val);
207 	} else
208 		*attr = *val;
209 	*val = NULL;
210 }
211 
212 /*
213  * Parse the QUERY_STRING for key-value pairs
214  * and store the values into the query structure.
215  */
216 static void
217 http_parse(struct req *req, const char *qs)
218 {
219 	char		*key, *val;
220 	size_t		 keysz, valsz;
221 
222 	req->q.manpath	= NULL;
223 	req->q.arch	= NULL;
224 	req->q.sec	= NULL;
225 	req->q.query	= NULL;
226 	req->q.equal	= 1;
227 
228 	key = val = NULL;
229 	while (*qs != '\0') {
230 
231 		/* Parse one key. */
232 
233 		keysz = strcspn(qs, "=;&");
234 		key = mandoc_strndup(qs, keysz);
235 		qs += keysz;
236 		if (*qs != '=')
237 			goto next;
238 
239 		/* Parse one value. */
240 
241 		valsz = strcspn(++qs, ";&");
242 		val = mandoc_strndup(qs, valsz);
243 		qs += valsz;
244 
245 		/* Decode and catch encoding errors. */
246 
247 		if ( ! (http_decode(key) && http_decode(val)))
248 			goto next;
249 
250 		/* Handle key-value pairs. */
251 
252 		if ( ! strcmp(key, "query"))
253 			set_query_attr(&req->q.query, &val);
254 
255 		else if ( ! strcmp(key, "apropos"))
256 			req->q.equal = !strcmp(val, "0");
257 
258 		else if ( ! strcmp(key, "manpath")) {
259 #ifdef COMPAT_OLDURI
260 			if ( ! strncmp(val, "OpenBSD ", 8)) {
261 				val[7] = '-';
262 				if ('C' == val[8])
263 					val[8] = 'c';
264 			}
265 #endif
266 			set_query_attr(&req->q.manpath, &val);
267 		}
268 
269 		else if ( ! (strcmp(key, "sec")
270 #ifdef COMPAT_OLDURI
271 		    && strcmp(key, "sektion")
272 #endif
273 		    )) {
274 			if ( ! strcmp(val, "0"))
275 				*val = '\0';
276 			set_query_attr(&req->q.sec, &val);
277 		}
278 
279 		else if ( ! strcmp(key, "arch")) {
280 			if ( ! strcmp(val, "default"))
281 				*val = '\0';
282 			set_query_attr(&req->q.arch, &val);
283 		}
284 
285 		/*
286 		 * The key must be freed in any case.
287 		 * The val may have been handed over to the query
288 		 * structure, in which case it is now NULL.
289 		 */
290 next:
291 		free(key);
292 		key = NULL;
293 		free(val);
294 		val = NULL;
295 
296 		if (*qs != '\0')
297 			qs++;
298 	}
299 }
300 
301 static void
302 http_putchar(char c)
303 {
304 
305 	if (isalnum((unsigned char)c)) {
306 		putchar((unsigned char)c);
307 		return;
308 	} else if (' ' == c) {
309 		putchar('+');
310 		return;
311 	}
312 	printf("%%%.2x", c);
313 }
314 
315 /*
316  * HTTP-decode a string.  The standard explanation is that this turns
317  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
318  * over the allocated string.
319  */
320 static int
321 http_decode(char *p)
322 {
323 	char             hex[3];
324 	char		*q;
325 	int              c;
326 
327 	hex[2] = '\0';
328 
329 	q = p;
330 	for ( ; '\0' != *p; p++, q++) {
331 		if ('%' == *p) {
332 			if ('\0' == (hex[0] = *(p + 1)))
333 				return(0);
334 			if ('\0' == (hex[1] = *(p + 2)))
335 				return(0);
336 			if (1 != sscanf(hex, "%x", &c))
337 				return(0);
338 			if ('\0' == c)
339 				return(0);
340 
341 			*q = (char)c;
342 			p += 2;
343 		} else
344 			*q = '+' == *p ? ' ' : *p;
345 	}
346 
347 	*q = '\0';
348 	return(1);
349 }
350 
351 static void
352 resp_begin_http(int code, const char *msg)
353 {
354 
355 	if (200 != code)
356 		printf("Status: %d %s\r\n", code, msg);
357 
358 	printf("Content-Type: text/html; charset=utf-8\r\n"
359 	     "Cache-Control: no-cache\r\n"
360 	     "Pragma: no-cache\r\n"
361 	     "\r\n");
362 
363 	fflush(stdout);
364 }
365 
366 static void
367 resp_begin_html(int code, const char *msg)
368 {
369 
370 	resp_begin_http(code, msg);
371 
372 	printf("<!DOCTYPE html>\n"
373 	       "<HTML>\n"
374 	       "<HEAD>\n"
375 	       "<META CHARSET=\"UTF-8\" />\n"
376 	       "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\""
377 	       " TYPE=\"text/css\" media=\"all\">\n"
378 	       "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\""
379 	       " TYPE=\"text/css\" media=\"all\">\n"
380 	       "<TITLE>%s</TITLE>\n"
381 	       "</HEAD>\n"
382 	       "<BODY>\n"
383 	       "<!-- Begin page content. //-->\n",
384 	       CSS_DIR, CSS_DIR, CUSTOMIZE_TITLE);
385 }
386 
387 static void
388 resp_end_html(void)
389 {
390 
391 	puts("</BODY>\n"
392 	     "</HTML>");
393 }
394 
395 static void
396 resp_searchform(const struct req *req)
397 {
398 	int		 i;
399 
400 	puts(CUSTOMIZE_BEGIN);
401 	puts("<!-- Begin search form. //-->");
402 	printf("<DIV ID=\"mancgi\">\n"
403 	       "<FORM ACTION=\"%s\" METHOD=\"get\">\n"
404 	       "<FIELDSET>\n"
405 	       "<LEGEND>Manual Page Search Parameters</LEGEND>\n",
406 	       scriptname);
407 
408 	/* Write query input box. */
409 
410 	printf(	"<TABLE><TR><TD>\n"
411 		"<INPUT TYPE=\"text\" NAME=\"query\" VALUE=\"");
412 	if (NULL != req->q.query)
413 		html_print(req->q.query);
414 	puts("\" SIZE=\"40\">");
415 
416 	/* Write submission and reset buttons. */
417 
418 	printf(	"<INPUT TYPE=\"submit\" VALUE=\"Submit\">\n"
419 		"<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n");
420 
421 	/* Write show radio button */
422 
423 	printf(	"</TD><TD>\n"
424 		"<INPUT TYPE=\"radio\" ");
425 	if (req->q.equal)
426 		printf("CHECKED=\"checked\" ");
427 	printf(	"NAME=\"apropos\" ID=\"show\" VALUE=\"0\">\n"
428 		"<LABEL FOR=\"show\">Show named manual page</LABEL>\n");
429 
430 	/* Write section selector. */
431 
432 	puts(	"</TD></TR><TR><TD>\n"
433 		"<SELECT NAME=\"sec\">");
434 	for (i = 0; i < sec_MAX; i++) {
435 		printf("<OPTION VALUE=\"%s\"", sec_numbers[i]);
436 		if (NULL != req->q.sec &&
437 		    0 == strcmp(sec_numbers[i], req->q.sec))
438 			printf(" SELECTED=\"selected\"");
439 		printf(">%s</OPTION>\n", sec_names[i]);
440 	}
441 	puts("</SELECT>");
442 
443 	/* Write architecture selector. */
444 
445 	printf(	"<SELECT NAME=\"arch\">\n"
446 		"<OPTION VALUE=\"default\"");
447 	if (NULL == req->q.arch)
448 		printf(" SELECTED=\"selected\"");
449 	puts(">All Architectures</OPTION>");
450 	for (i = 0; i < arch_MAX; i++) {
451 		printf("<OPTION VALUE=\"%s\"", arch_names[i]);
452 		if (NULL != req->q.arch &&
453 		    0 == strcmp(arch_names[i], req->q.arch))
454 			printf(" SELECTED=\"selected\"");
455 		printf(">%s</OPTION>\n", arch_names[i]);
456 	}
457 	puts("</SELECT>");
458 
459 	/* Write manpath selector. */
460 
461 	if (req->psz > 1) {
462 		puts("<SELECT NAME=\"manpath\">");
463 		for (i = 0; i < (int)req->psz; i++) {
464 			printf("<OPTION ");
465 			if (strcmp(req->q.manpath, req->p[i]) == 0)
466 				printf("SELECTED=\"selected\" ");
467 			printf("VALUE=\"");
468 			html_print(req->p[i]);
469 			printf("\">");
470 			html_print(req->p[i]);
471 			puts("</OPTION>");
472 		}
473 		puts("</SELECT>");
474 	}
475 
476 	/* Write search radio button */
477 
478 	printf(	"</TD><TD>\n"
479 		"<INPUT TYPE=\"radio\" ");
480 	if (0 == req->q.equal)
481 		printf("CHECKED=\"checked\" ");
482 	printf(	"NAME=\"apropos\" ID=\"search\" VALUE=\"1\">\n"
483 		"<LABEL FOR=\"search\">Search with apropos query</LABEL>\n");
484 
485 	puts("</TD></TR></TABLE>\n"
486 	     "</FIELDSET>\n"
487 	     "</FORM>\n"
488 	     "</DIV>");
489 	puts("<!-- End search form. //-->");
490 }
491 
492 static int
493 validate_urifrag(const char *frag)
494 {
495 
496 	while ('\0' != *frag) {
497 		if ( ! (isalnum((unsigned char)*frag) ||
498 		    '-' == *frag || '.' == *frag ||
499 		    '/' == *frag || '_' == *frag))
500 			return(0);
501 		frag++;
502 	}
503 	return(1);
504 }
505 
506 static int
507 validate_manpath(const struct req *req, const char* manpath)
508 {
509 	size_t	 i;
510 
511 	if ( ! strcmp(manpath, "mandoc"))
512 		return(1);
513 
514 	for (i = 0; i < req->psz; i++)
515 		if ( ! strcmp(manpath, req->p[i]))
516 			return(1);
517 
518 	return(0);
519 }
520 
521 static int
522 validate_filename(const char *file)
523 {
524 
525 	if ('.' == file[0] && '/' == file[1])
526 		file += 2;
527 
528 	return ( ! (strstr(file, "../") || strstr(file, "/..") ||
529 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3))));
530 }
531 
532 static void
533 pg_index(const struct req *req)
534 {
535 
536 	resp_begin_html(200, NULL);
537 	resp_searchform(req);
538 	printf("<P>\n"
539 	       "This web interface is documented in the\n"
540 	       "<A HREF=\"%s/mandoc/man8/man.cgi.8\">man.cgi</A>\n"
541 	       "manual, and the\n"
542 	       "<A HREF=\"%s/mandoc/man1/apropos.1\">apropos</A>\n"
543 	       "manual explains the query syntax.\n"
544 	       "</P>\n",
545 	       scriptname, scriptname);
546 	resp_end_html();
547 }
548 
549 static void
550 pg_noresult(const struct req *req, const char *msg)
551 {
552 	resp_begin_html(200, NULL);
553 	resp_searchform(req);
554 	puts("<P>");
555 	puts(msg);
556 	puts("</P>");
557 	resp_end_html();
558 }
559 
560 static void
561 pg_error_badrequest(const char *msg)
562 {
563 
564 	resp_begin_html(400, "Bad Request");
565 	puts("<H1>Bad Request</H1>\n"
566 	     "<P>\n");
567 	puts(msg);
568 	printf("Try again from the\n"
569 	       "<A HREF=\"%s\">main page</A>.\n"
570 	       "</P>", scriptname);
571 	resp_end_html();
572 }
573 
574 static void
575 pg_error_internal(void)
576 {
577 	resp_begin_html(500, "Internal Server Error");
578 	puts("<P>Internal Server Error</P>");
579 	resp_end_html();
580 }
581 
582 static void
583 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
584 {
585 	char		*arch, *archend;
586 	size_t		 i, iuse, isec;
587 	int		 archprio, archpriouse;
588 	int		 prio, priouse;
589 	char		 sec;
590 
591 	for (i = 0; i < sz; i++) {
592 		if (validate_filename(r[i].file))
593 			continue;
594 		fprintf(stderr, "invalid filename %s in %s database\n",
595 		    r[i].file, req->q.manpath);
596 		pg_error_internal();
597 		return;
598 	}
599 
600 	if (1 == sz) {
601 		/*
602 		 * If we have just one result, then jump there now
603 		 * without any delay.
604 		 */
605 		printf("Status: 303 See Other\r\n");
606 		printf("Location: http://%s%s/%s/%s?",
607 		    HTTP_HOST, scriptname, req->q.manpath, r[0].file);
608 		http_printquery(req, "&");
609 		printf("\r\n"
610 		     "Content-Type: text/html; charset=utf-8\r\n"
611 		     "\r\n");
612 		return;
613 	}
614 
615 	resp_begin_html(200, NULL);
616 	resp_searchform(req);
617 	puts("<DIV CLASS=\"results\">");
618 	puts("<TABLE>");
619 
620 	for (i = 0; i < sz; i++) {
621 		printf("<TR>\n"
622 		       "<TD CLASS=\"title\">\n"
623 		       "<A HREF=\"%s/%s/%s?",
624 		    scriptname, req->q.manpath, r[i].file);
625 		http_printquery(req, "&amp;");
626 		printf("\">");
627 		html_print(r[i].names);
628 		printf("</A>\n"
629 		       "</TD>\n"
630 		       "<TD CLASS=\"desc\">");
631 		html_print(r[i].output);
632 		puts("</TD>\n"
633 		     "</TR>");
634 	}
635 
636 	puts("</TABLE>\n"
637 	     "</DIV>");
638 
639 	/*
640 	 * In man(1) mode, show one of the pages
641 	 * even if more than one is found.
642 	 */
643 
644 	if (req->q.equal) {
645 		puts("<HR>");
646 		iuse = 0;
647 		priouse = 10;
648 		archpriouse = 3;
649 		for (i = 0; i < sz; i++) {
650 			isec = strcspn(r[i].file, "123456789");
651 			sec = r[i].file[isec];
652 			if ('\0' == sec)
653 				continue;
654 			prio = sec_prios[sec - '1'];
655 			if (NULL == req->q.arch) {
656 				archprio =
657 				    (NULL == (arch = strchr(
658 					r[i].file + isec, '/'))) ? 3 :
659 				    (NULL == (archend = strchr(
660 					arch + 1, '/'))) ? 0 :
661 				    strncmp(arch, "amd64/",
662 					archend - arch) ? 2 : 1;
663 				if (archprio < archpriouse) {
664 					archpriouse = archprio;
665 					priouse = prio;
666 					iuse = i;
667 					continue;
668 				}
669 				if (archprio > archpriouse)
670 					continue;
671 			}
672 			if (prio >= priouse)
673 				continue;
674 			priouse = prio;
675 			iuse = i;
676 		}
677 		resp_show(req, r[iuse].file);
678 	}
679 
680 	resp_end_html();
681 }
682 
683 static void
684 catman(const struct req *req, const char *file)
685 {
686 	FILE		*f;
687 	size_t		 len;
688 	int		 i;
689 	char		*p;
690 	int		 italic, bold;
691 
692 	if (NULL == (f = fopen(file, "r"))) {
693 		puts("<P>You specified an invalid manual file.</P>");
694 		return;
695 	}
696 
697 	puts("<DIV CLASS=\"catman\">\n"
698 	     "<PRE>");
699 
700 	while (NULL != (p = fgetln(f, &len))) {
701 		bold = italic = 0;
702 		for (i = 0; i < (int)len - 1; i++) {
703 			/*
704 			 * This means that the catpage is out of state.
705 			 * Ignore it and keep going (although the
706 			 * catpage is bogus).
707 			 */
708 
709 			if ('\b' == p[i] || '\n' == p[i])
710 				continue;
711 
712 			/*
713 			 * Print a regular character.
714 			 * Close out any bold/italic scopes.
715 			 * If we're in back-space mode, make sure we'll
716 			 * have something to enter when we backspace.
717 			 */
718 
719 			if ('\b' != p[i + 1]) {
720 				if (italic)
721 					printf("</I>");
722 				if (bold)
723 					printf("</B>");
724 				italic = bold = 0;
725 				html_putchar(p[i]);
726 				continue;
727 			} else if (i + 2 >= (int)len)
728 				continue;
729 
730 			/* Italic mode. */
731 
732 			if ('_' == p[i]) {
733 				if (bold)
734 					printf("</B>");
735 				if ( ! italic)
736 					printf("<I>");
737 				bold = 0;
738 				italic = 1;
739 				i += 2;
740 				html_putchar(p[i]);
741 				continue;
742 			}
743 
744 			/*
745 			 * Handle funny behaviour troff-isms.
746 			 * These grok'd from the original man2html.c.
747 			 */
748 
749 			if (('+' == p[i] && 'o' == p[i + 2]) ||
750 					('o' == p[i] && '+' == p[i + 2]) ||
751 					('|' == p[i] && '=' == p[i + 2]) ||
752 					('=' == p[i] && '|' == p[i + 2]) ||
753 					('*' == p[i] && '=' == p[i + 2]) ||
754 					('=' == p[i] && '*' == p[i + 2]) ||
755 					('*' == p[i] && '|' == p[i + 2]) ||
756 					('|' == p[i] && '*' == p[i + 2]))  {
757 				if (italic)
758 					printf("</I>");
759 				if (bold)
760 					printf("</B>");
761 				italic = bold = 0;
762 				putchar('*');
763 				i += 2;
764 				continue;
765 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
766 					('-' == p[i] && '|' == p[i + 1]) ||
767 					('+' == p[i] && '-' == p[i + 1]) ||
768 					('-' == p[i] && '+' == p[i + 1]) ||
769 					('+' == p[i] && '|' == p[i + 1]) ||
770 					('|' == p[i] && '+' == p[i + 1]))  {
771 				if (italic)
772 					printf("</I>");
773 				if (bold)
774 					printf("</B>");
775 				italic = bold = 0;
776 				putchar('+');
777 				i += 2;
778 				continue;
779 			}
780 
781 			/* Bold mode. */
782 
783 			if (italic)
784 				printf("</I>");
785 			if ( ! bold)
786 				printf("<B>");
787 			bold = 1;
788 			italic = 0;
789 			i += 2;
790 			html_putchar(p[i]);
791 		}
792 
793 		/*
794 		 * Clean up the last character.
795 		 * We can get to a newline; don't print that.
796 		 */
797 
798 		if (italic)
799 			printf("</I>");
800 		if (bold)
801 			printf("</B>");
802 
803 		if (i == (int)len - 1 && '\n' != p[i])
804 			html_putchar(p[i]);
805 
806 		putchar('\n');
807 	}
808 
809 	puts("</PRE>\n"
810 	     "</DIV>");
811 
812 	fclose(f);
813 }
814 
815 static void
816 format(const struct req *req, const char *file)
817 {
818 	struct manoutput conf;
819 	struct mparse	*mp;
820 	struct mchars	*mchars;
821 	struct roff_man	*man;
822 	void		*vp;
823 	int		 fd;
824 	int		 usepath;
825 
826 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
827 		puts("<P>You specified an invalid manual file.</P>");
828 		return;
829 	}
830 
831 	mchars = mchars_alloc();
832 	mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL,
833 	    mchars, req->q.manpath);
834 	mparse_readfd(mp, fd, file);
835 	close(fd);
836 
837 	memset(&conf, 0, sizeof(conf));
838 	conf.fragment = 1;
839 	usepath = strcmp(req->q.manpath, req->p[0]);
840 	mandoc_asprintf(&conf.man, "%s?query=%%N&sec=%%S%s%s%s%s",
841 	    scriptname,
842 	    req->q.arch	? "&arch="       : "",
843 	    req->q.arch	? req->q.arch    : "",
844 	    usepath	? "&manpath="    : "",
845 	    usepath	? req->q.manpath : "");
846 
847 	mparse_result(mp, &man, NULL);
848 	if (man == NULL) {
849 		fprintf(stderr, "fatal mandoc error: %s/%s\n",
850 		    req->q.manpath, file);
851 		pg_error_internal();
852 		mparse_free(mp);
853 		mchars_free(mchars);
854 		return;
855 	}
856 
857 	vp = html_alloc(mchars, &conf);
858 
859 	if (man->macroset == MACROSET_MDOC)
860 		html_mdoc(vp, man);
861 	else
862 		html_man(vp, man);
863 
864 	html_free(vp);
865 	mparse_free(mp);
866 	mchars_free(mchars);
867 	free(conf.man);
868 }
869 
870 static void
871 resp_show(const struct req *req, const char *file)
872 {
873 
874 	if ('.' == file[0] && '/' == file[1])
875 		file += 2;
876 
877 	if ('c' == *file)
878 		catman(req, file);
879 	else
880 		format(req, file);
881 }
882 
883 static void
884 pg_show(struct req *req, const char *fullpath)
885 {
886 	char		*manpath;
887 	const char	*file;
888 
889 	if ((file = strchr(fullpath, '/')) == NULL) {
890 		pg_error_badrequest(
891 		    "You did not specify a page to show.");
892 		return;
893 	}
894 	manpath = mandoc_strndup(fullpath, file - fullpath);
895 	file++;
896 
897 	if ( ! validate_manpath(req, manpath)) {
898 		pg_error_badrequest(
899 		    "You specified an invalid manpath.");
900 		free(manpath);
901 		return;
902 	}
903 
904 	/*
905 	 * Begin by chdir()ing into the manpath.
906 	 * This way we can pick up the database files, which are
907 	 * relative to the manpath root.
908 	 */
909 
910 	if (chdir(manpath) == -1) {
911 		fprintf(stderr, "chdir %s: %s\n",
912 		    manpath, strerror(errno));
913 		pg_error_internal();
914 		free(manpath);
915 		return;
916 	}
917 
918 	if (strcmp(manpath, "mandoc")) {
919 		free(req->q.manpath);
920 		req->q.manpath = manpath;
921 	} else
922 		free(manpath);
923 
924 	if ( ! validate_filename(file)) {
925 		pg_error_badrequest(
926 		    "You specified an invalid manual file.");
927 		return;
928 	}
929 
930 	resp_begin_html(200, NULL);
931 	resp_searchform(req);
932 	resp_show(req, file);
933 	resp_end_html();
934 }
935 
936 static void
937 pg_search(const struct req *req)
938 {
939 	struct mansearch	  search;
940 	struct manpaths		  paths;
941 	struct manpage		 *res;
942 	char			**argv;
943 	char			 *query, *rp, *wp;
944 	size_t			  ressz;
945 	int			  argc;
946 
947 	/*
948 	 * Begin by chdir()ing into the root of the manpath.
949 	 * This way we can pick up the database files, which are
950 	 * relative to the manpath root.
951 	 */
952 
953 	if (-1 == (chdir(req->q.manpath))) {
954 		fprintf(stderr, "chdir %s: %s\n",
955 		    req->q.manpath, strerror(errno));
956 		pg_error_internal();
957 		return;
958 	}
959 
960 	search.arch = req->q.arch;
961 	search.sec = req->q.sec;
962 	search.outkey = "Nd";
963 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
964 	search.firstmatch = 1;
965 
966 	paths.sz = 1;
967 	paths.paths = mandoc_malloc(sizeof(char *));
968 	paths.paths[0] = mandoc_strdup(".");
969 
970 	/*
971 	 * Break apart at spaces with backslash-escaping.
972 	 */
973 
974 	argc = 0;
975 	argv = NULL;
976 	rp = query = mandoc_strdup(req->q.query);
977 	for (;;) {
978 		while (isspace((unsigned char)*rp))
979 			rp++;
980 		if (*rp == '\0')
981 			break;
982 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
983 		argv[argc++] = wp = rp;
984 		for (;;) {
985 			if (isspace((unsigned char)*rp)) {
986 				*wp = '\0';
987 				rp++;
988 				break;
989 			}
990 			if (rp[0] == '\\' && rp[1] != '\0')
991 				rp++;
992 			if (wp != rp)
993 				*wp = *rp;
994 			if (*rp == '\0')
995 				break;
996 			wp++;
997 			rp++;
998 		}
999 	}
1000 
1001 	if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
1002 		pg_noresult(req, "You entered an invalid query.");
1003 	else if (0 == ressz)
1004 		pg_noresult(req, "No results found.");
1005 	else
1006 		pg_searchres(req, res, ressz);
1007 
1008 	free(query);
1009 	mansearch_free(res, ressz);
1010 	free(paths.paths[0]);
1011 	free(paths.paths);
1012 }
1013 
1014 int
1015 main(void)
1016 {
1017 	struct req	 req;
1018 	struct itimerval itimer;
1019 	const char	*path;
1020 	const char	*querystring;
1021 	int		 i;
1022 
1023 	/* Poor man's ReDoS mitigation. */
1024 
1025 	itimer.it_value.tv_sec = 2;
1026 	itimer.it_value.tv_usec = 0;
1027 	itimer.it_interval.tv_sec = 2;
1028 	itimer.it_interval.tv_usec = 0;
1029 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1030 		fprintf(stderr, "setitimer: %s\n", strerror(errno));
1031 		pg_error_internal();
1032 		return(EXIT_FAILURE);
1033 	}
1034 
1035 	/* Scan our run-time environment. */
1036 
1037 	if (NULL == (scriptname = getenv("SCRIPT_NAME")))
1038 		scriptname = "";
1039 
1040 	if ( ! validate_urifrag(scriptname)) {
1041 		fprintf(stderr, "unsafe SCRIPT_NAME \"%s\"\n",
1042 		    scriptname);
1043 		pg_error_internal();
1044 		return(EXIT_FAILURE);
1045 	}
1046 
1047 	/*
1048 	 * First we change directory into the MAN_DIR so that
1049 	 * subsequent scanning for manpath directories is rooted
1050 	 * relative to the same position.
1051 	 */
1052 
1053 	if (-1 == chdir(MAN_DIR)) {
1054 		fprintf(stderr, "MAN_DIR: %s: %s\n",
1055 		    MAN_DIR, strerror(errno));
1056 		pg_error_internal();
1057 		return(EXIT_FAILURE);
1058 	}
1059 
1060 	memset(&req, 0, sizeof(struct req));
1061 	pathgen(&req);
1062 
1063 	/* Next parse out the query string. */
1064 
1065 	if (NULL != (querystring = getenv("QUERY_STRING")))
1066 		http_parse(&req, querystring);
1067 
1068 	if (req.q.manpath == NULL)
1069 		req.q.manpath = mandoc_strdup(req.p[0]);
1070 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1071 		pg_error_badrequest(
1072 		    "You specified an invalid manpath.");
1073 		return(EXIT_FAILURE);
1074 	}
1075 
1076 	if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1077 		pg_error_badrequest(
1078 		    "You specified an invalid architecture.");
1079 		return(EXIT_FAILURE);
1080 	}
1081 
1082 	/* Dispatch to the three different pages. */
1083 
1084 	path = getenv("PATH_INFO");
1085 	if (NULL == path)
1086 		path = "";
1087 	else if ('/' == *path)
1088 		path++;
1089 
1090 	if ('\0' != *path)
1091 		pg_show(&req, path);
1092 	else if (NULL != req.q.query)
1093 		pg_search(&req);
1094 	else
1095 		pg_index(&req);
1096 
1097 	free(req.q.manpath);
1098 	free(req.q.arch);
1099 	free(req.q.sec);
1100 	free(req.q.query);
1101 	for (i = 0; i < (int)req.psz; i++)
1102 		free(req.p[i]);
1103 	free(req.p);
1104 	return(EXIT_SUCCESS);
1105 }
1106 
1107 /*
1108  * Scan for indexable paths.
1109  */
1110 static void
1111 pathgen(struct req *req)
1112 {
1113 	FILE	*fp;
1114 	char	*dp;
1115 	size_t	 dpsz;
1116 
1117 	if (NULL == (fp = fopen("manpath.conf", "r"))) {
1118 		fprintf(stderr, "%s/manpath.conf: %s\n",
1119 			MAN_DIR, strerror(errno));
1120 		pg_error_internal();
1121 		exit(EXIT_FAILURE);
1122 	}
1123 
1124 	while (NULL != (dp = fgetln(fp, &dpsz))) {
1125 		if ('\n' == dp[dpsz - 1])
1126 			dpsz--;
1127 		req->p = mandoc_realloc(req->p,
1128 		    (req->psz + 1) * sizeof(char *));
1129 		dp = mandoc_strndup(dp, dpsz);
1130 		if ( ! validate_urifrag(dp)) {
1131 			fprintf(stderr, "%s/manpath.conf contains "
1132 			    "unsafe path \"%s\"\n", MAN_DIR, dp);
1133 			pg_error_internal();
1134 			exit(EXIT_FAILURE);
1135 		}
1136 		if (NULL != strchr(dp, '/')) {
1137 			fprintf(stderr, "%s/manpath.conf contains "
1138 			    "path with slash \"%s\"\n", MAN_DIR, dp);
1139 			pg_error_internal();
1140 			exit(EXIT_FAILURE);
1141 		}
1142 		req->p[req->psz++] = dp;
1143 	}
1144 
1145 	if ( req->p == NULL ) {
1146 		fprintf(stderr, "%s/manpath.conf is empty\n", MAN_DIR);
1147 		pg_error_internal();
1148 		exit(EXIT_FAILURE);
1149 	}
1150 }
1151