xref: /openbsd-src/usr.bin/mandoc/cgi.c (revision 89c96e90242a81edf0d9a0550930a11563268b9d)
1 /*	$OpenBSD: cgi.c,v 1.82 2017/01/19 13:55:49 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/time.h>
20 
21 #include <ctype.h>
22 #include <err.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <limits.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "mandoc_aux.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mdoc.h"
36 #include "man.h"
37 #include "main.h"
38 #include "manconf.h"
39 #include "mansearch.h"
40 #include "cgi.h"
41 
42 /*
43  * A query as passed to the search function.
44  */
45 struct	query {
46 	char		*manpath; /* desired manual directory */
47 	char		*arch; /* architecture */
48 	char		*sec; /* manual section */
49 	char		*query; /* unparsed query expression */
50 	int		 equal; /* match whole names, not substrings */
51 };
52 
53 struct	req {
54 	struct query	  q;
55 	char		**p; /* array of available manpaths */
56 	size_t		  psz; /* number of available manpaths */
57 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
58 };
59 
60 enum	focus {
61 	FOCUS_NONE = 0,
62 	FOCUS_QUERY
63 };
64 
65 static	void		 html_print(const char *);
66 static	void		 html_putchar(char);
67 static	int		 http_decode(char *);
68 static	void		 parse_manpath_conf(struct req *);
69 static	void		 parse_path_info(struct req *req, const char *path);
70 static	void		 parse_query_string(struct req *, const char *);
71 static	void		 pg_error_badrequest(const char *);
72 static	void		 pg_error_internal(void);
73 static	void		 pg_index(const struct req *);
74 static	void		 pg_noresult(const struct req *, const char *);
75 static	void		 pg_search(const struct req *);
76 static	void		 pg_searchres(const struct req *,
77 				struct manpage *, size_t);
78 static	void		 pg_show(struct req *, const char *);
79 static	void		 resp_begin_html(int, const char *);
80 static	void		 resp_begin_http(int, const char *);
81 static	void		 resp_catman(const struct req *, const char *);
82 static	void		 resp_copy(const char *);
83 static	void		 resp_end_html(void);
84 static	void		 resp_format(const struct req *, const char *);
85 static	void		 resp_searchform(const struct req *, enum focus);
86 static	void		 resp_show(const struct req *, const char *);
87 static	void		 set_query_attr(char **, char **);
88 static	int		 validate_filename(const char *);
89 static	int		 validate_manpath(const struct req *, const char *);
90 static	int		 validate_urifrag(const char *);
91 
92 static	const char	 *scriptname = SCRIPT_NAME;
93 
94 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
95 static	const char *const sec_numbers[] = {
96     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
97 };
98 static	const char *const sec_names[] = {
99     "All Sections",
100     "1 - General Commands",
101     "2 - System Calls",
102     "3 - Library Functions",
103     "3p - Perl Library",
104     "4 - Device Drivers",
105     "5 - File Formats",
106     "6 - Games",
107     "7 - Miscellaneous Information",
108     "8 - System Manager\'s Manual",
109     "9 - Kernel Developer\'s Manual"
110 };
111 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
112 
113 static	const char *const arch_names[] = {
114     "amd64",       "alpha",       "armv7",
115     "hppa",        "i386",        "landisk",
116     "loongson",    "luna88k",     "macppc",      "mips64",
117     "octeon",      "sgi",         "socppc",      "sparc64",
118     "amiga",       "arc",         "armish",      "arm32",
119     "atari",       "aviion",      "beagle",      "cats",
120     "hppa64",      "hp300",
121     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
122     "mvmeppc",     "palm",        "pc532",       "pegasos",
123     "pmax",        "powerpc",     "solbourne",   "sparc",
124     "sun3",        "vax",         "wgrisc",      "x68k",
125     "zaurus"
126 };
127 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
128 
129 /*
130  * Print a character, escaping HTML along the way.
131  * This will pass non-ASCII straight to output: be warned!
132  */
133 static void
134 html_putchar(char c)
135 {
136 
137 	switch (c) {
138 	case ('"'):
139 		printf("&quot;");
140 		break;
141 	case ('&'):
142 		printf("&amp;");
143 		break;
144 	case ('>'):
145 		printf("&gt;");
146 		break;
147 	case ('<'):
148 		printf("&lt;");
149 		break;
150 	default:
151 		putchar((unsigned char)c);
152 		break;
153 	}
154 }
155 
156 /*
157  * Call through to html_putchar().
158  * Accepts NULL strings.
159  */
160 static void
161 html_print(const char *p)
162 {
163 
164 	if (NULL == p)
165 		return;
166 	while ('\0' != *p)
167 		html_putchar(*p++);
168 }
169 
170 /*
171  * Transfer the responsibility for the allocated string *val
172  * to the query structure.
173  */
174 static void
175 set_query_attr(char **attr, char **val)
176 {
177 
178 	free(*attr);
179 	if (**val == '\0') {
180 		*attr = NULL;
181 		free(*val);
182 	} else
183 		*attr = *val;
184 	*val = NULL;
185 }
186 
187 /*
188  * Parse the QUERY_STRING for key-value pairs
189  * and store the values into the query structure.
190  */
191 static void
192 parse_query_string(struct req *req, const char *qs)
193 {
194 	char		*key, *val;
195 	size_t		 keysz, valsz;
196 
197 	req->isquery	= 1;
198 	req->q.manpath	= NULL;
199 	req->q.arch	= NULL;
200 	req->q.sec	= NULL;
201 	req->q.query	= NULL;
202 	req->q.equal	= 1;
203 
204 	key = val = NULL;
205 	while (*qs != '\0') {
206 
207 		/* Parse one key. */
208 
209 		keysz = strcspn(qs, "=;&");
210 		key = mandoc_strndup(qs, keysz);
211 		qs += keysz;
212 		if (*qs != '=')
213 			goto next;
214 
215 		/* Parse one value. */
216 
217 		valsz = strcspn(++qs, ";&");
218 		val = mandoc_strndup(qs, valsz);
219 		qs += valsz;
220 
221 		/* Decode and catch encoding errors. */
222 
223 		if ( ! (http_decode(key) && http_decode(val)))
224 			goto next;
225 
226 		/* Handle key-value pairs. */
227 
228 		if ( ! strcmp(key, "query"))
229 			set_query_attr(&req->q.query, &val);
230 
231 		else if ( ! strcmp(key, "apropos"))
232 			req->q.equal = !strcmp(val, "0");
233 
234 		else if ( ! strcmp(key, "manpath")) {
235 #ifdef COMPAT_OLDURI
236 			if ( ! strncmp(val, "OpenBSD ", 8)) {
237 				val[7] = '-';
238 				if ('C' == val[8])
239 					val[8] = 'c';
240 			}
241 #endif
242 			set_query_attr(&req->q.manpath, &val);
243 		}
244 
245 		else if ( ! (strcmp(key, "sec")
246 #ifdef COMPAT_OLDURI
247 		    && strcmp(key, "sektion")
248 #endif
249 		    )) {
250 			if ( ! strcmp(val, "0"))
251 				*val = '\0';
252 			set_query_attr(&req->q.sec, &val);
253 		}
254 
255 		else if ( ! strcmp(key, "arch")) {
256 			if ( ! strcmp(val, "default"))
257 				*val = '\0';
258 			set_query_attr(&req->q.arch, &val);
259 		}
260 
261 		/*
262 		 * The key must be freed in any case.
263 		 * The val may have been handed over to the query
264 		 * structure, in which case it is now NULL.
265 		 */
266 next:
267 		free(key);
268 		key = NULL;
269 		free(val);
270 		val = NULL;
271 
272 		if (*qs != '\0')
273 			qs++;
274 	}
275 }
276 
277 /*
278  * HTTP-decode a string.  The standard explanation is that this turns
279  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
280  * over the allocated string.
281  */
282 static int
283 http_decode(char *p)
284 {
285 	char             hex[3];
286 	char		*q;
287 	int              c;
288 
289 	hex[2] = '\0';
290 
291 	q = p;
292 	for ( ; '\0' != *p; p++, q++) {
293 		if ('%' == *p) {
294 			if ('\0' == (hex[0] = *(p + 1)))
295 				return 0;
296 			if ('\0' == (hex[1] = *(p + 2)))
297 				return 0;
298 			if (1 != sscanf(hex, "%x", &c))
299 				return 0;
300 			if ('\0' == c)
301 				return 0;
302 
303 			*q = (char)c;
304 			p += 2;
305 		} else
306 			*q = '+' == *p ? ' ' : *p;
307 	}
308 
309 	*q = '\0';
310 	return 1;
311 }
312 
313 static void
314 resp_begin_http(int code, const char *msg)
315 {
316 
317 	if (200 != code)
318 		printf("Status: %d %s\r\n", code, msg);
319 
320 	printf("Content-Type: text/html; charset=utf-8\r\n"
321 	     "Cache-Control: no-cache\r\n"
322 	     "Pragma: no-cache\r\n"
323 	     "\r\n");
324 
325 	fflush(stdout);
326 }
327 
328 static void
329 resp_copy(const char *filename)
330 {
331 	char	 buf[4096];
332 	ssize_t	 sz;
333 	int	 fd;
334 
335 	if ((fd = open(filename, O_RDONLY)) != -1) {
336 		fflush(stdout);
337 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
338 			write(STDOUT_FILENO, buf, sz);
339 		close(fd);
340 	}
341 }
342 
343 static void
344 resp_begin_html(int code, const char *msg)
345 {
346 
347 	resp_begin_http(code, msg);
348 
349 	printf("<!DOCTYPE html>\n"
350 	       "<html>\n"
351 	       "<head>\n"
352 	       "  <meta charset=\"UTF-8\"/>\n"
353 	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
354 	       " type=\"text/css\" media=\"all\">\n"
355 	       "  <title>%s</title>\n"
356 	       "</head>\n"
357 	       "<body>\n",
358 	       CSS_DIR, CUSTOMIZE_TITLE);
359 
360 	resp_copy(MAN_DIR "/header.html");
361 }
362 
363 static void
364 resp_end_html(void)
365 {
366 
367 	resp_copy(MAN_DIR "/footer.html");
368 
369 	puts("</body>\n"
370 	     "</html>");
371 }
372 
373 static void
374 resp_searchform(const struct req *req, enum focus focus)
375 {
376 	int		 i;
377 
378 	printf("<form action=\"/%s\" method=\"get\">\n"
379 	       "  <fieldset>\n"
380 	       "    <legend>Manual Page Search Parameters</legend>\n",
381 	       scriptname);
382 
383 	/* Write query input box. */
384 
385 	printf("    <input type=\"text\" name=\"query\" value=\"");
386 	if (req->q.query != NULL)
387 		html_print(req->q.query);
388 	printf( "\" size=\"40\"");
389 	if (focus == FOCUS_QUERY)
390 		printf(" autofocus");
391 	puts(">");
392 
393 	/* Write submission buttons. */
394 
395 	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
396 		"man</button>\n"
397 		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
398 		"apropos</button>\n"
399 		"    <br/>\n");
400 
401 	/* Write section selector. */
402 
403 	puts("    <select name=\"sec\">");
404 	for (i = 0; i < sec_MAX; i++) {
405 		printf("      <option value=\"%s\"", sec_numbers[i]);
406 		if (NULL != req->q.sec &&
407 		    0 == strcmp(sec_numbers[i], req->q.sec))
408 			printf(" selected=\"selected\"");
409 		printf(">%s</option>\n", sec_names[i]);
410 	}
411 	puts("    </select>");
412 
413 	/* Write architecture selector. */
414 
415 	printf(	"    <select name=\"arch\">\n"
416 		"      <option value=\"default\"");
417 	if (NULL == req->q.arch)
418 		printf(" selected=\"selected\"");
419 	puts(">All Architectures</option>");
420 	for (i = 0; i < arch_MAX; i++) {
421 		printf("      <option value=\"%s\"", arch_names[i]);
422 		if (NULL != req->q.arch &&
423 		    0 == strcmp(arch_names[i], req->q.arch))
424 			printf(" selected=\"selected\"");
425 		printf(">%s</option>\n", arch_names[i]);
426 	}
427 	puts("    </select>");
428 
429 	/* Write manpath selector. */
430 
431 	if (req->psz > 1) {
432 		puts("    <select name=\"manpath\">");
433 		for (i = 0; i < (int)req->psz; i++) {
434 			printf("      <option ");
435 			if (strcmp(req->q.manpath, req->p[i]) == 0)
436 				printf("selected=\"selected\" ");
437 			printf("value=\"");
438 			html_print(req->p[i]);
439 			printf("\">");
440 			html_print(req->p[i]);
441 			puts("</option>");
442 		}
443 		puts("    </select>");
444 	}
445 
446 	puts("  </fieldset>\n"
447 	     "</form>");
448 }
449 
450 static int
451 validate_urifrag(const char *frag)
452 {
453 
454 	while ('\0' != *frag) {
455 		if ( ! (isalnum((unsigned char)*frag) ||
456 		    '-' == *frag || '.' == *frag ||
457 		    '/' == *frag || '_' == *frag))
458 			return 0;
459 		frag++;
460 	}
461 	return 1;
462 }
463 
464 static int
465 validate_manpath(const struct req *req, const char* manpath)
466 {
467 	size_t	 i;
468 
469 	for (i = 0; i < req->psz; i++)
470 		if ( ! strcmp(manpath, req->p[i]))
471 			return 1;
472 
473 	return 0;
474 }
475 
476 static int
477 validate_filename(const char *file)
478 {
479 
480 	if ('.' == file[0] && '/' == file[1])
481 		file += 2;
482 
483 	return ! (strstr(file, "../") || strstr(file, "/..") ||
484 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
485 }
486 
487 static void
488 pg_index(const struct req *req)
489 {
490 
491 	resp_begin_html(200, NULL);
492 	resp_searchform(req, FOCUS_QUERY);
493 	printf("<p>\n"
494 	       "This web interface is documented in the\n"
495 	       "<a href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
496 	       "manual, and the\n"
497 	       "<a href=\"/%s%sapropos.1\">apropos(1)</a>\n"
498 	       "manual explains the query syntax.\n"
499 	       "</p>\n",
500 	       scriptname, *scriptname == '\0' ? "" : "/",
501 	       scriptname, *scriptname == '\0' ? "" : "/");
502 	resp_end_html();
503 }
504 
505 static void
506 pg_noresult(const struct req *req, const char *msg)
507 {
508 	resp_begin_html(200, NULL);
509 	resp_searchform(req, FOCUS_QUERY);
510 	puts("<p>");
511 	puts(msg);
512 	puts("</p>");
513 	resp_end_html();
514 }
515 
516 static void
517 pg_error_badrequest(const char *msg)
518 {
519 
520 	resp_begin_html(400, "Bad Request");
521 	puts("<h1>Bad Request</h1>\n"
522 	     "<p>\n");
523 	puts(msg);
524 	printf("Try again from the\n"
525 	       "<a href=\"/%s\">main page</a>.\n"
526 	       "</p>", scriptname);
527 	resp_end_html();
528 }
529 
530 static void
531 pg_error_internal(void)
532 {
533 	resp_begin_html(500, "Internal Server Error");
534 	puts("<p>Internal Server Error</p>");
535 	resp_end_html();
536 }
537 
538 static void
539 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
540 {
541 	char		*arch, *archend;
542 	const char	*sec;
543 	size_t		 i, iuse;
544 	int		 archprio, archpriouse;
545 	int		 prio, priouse;
546 
547 	for (i = 0; i < sz; i++) {
548 		if (validate_filename(r[i].file))
549 			continue;
550 		warnx("invalid filename %s in %s database",
551 		    r[i].file, req->q.manpath);
552 		pg_error_internal();
553 		return;
554 	}
555 
556 	if (req->isquery && sz == 1) {
557 		/*
558 		 * If we have just one result, then jump there now
559 		 * without any delay.
560 		 */
561 		printf("Status: 303 See Other\r\n");
562 		printf("Location: http://%s/%s%s%s/%s",
563 		    HTTP_HOST, scriptname,
564 		    *scriptname == '\0' ? "" : "/",
565 		    req->q.manpath, r[0].file);
566 		printf("\r\n"
567 		     "Content-Type: text/html; charset=utf-8\r\n"
568 		     "\r\n");
569 		return;
570 	}
571 
572 	resp_begin_html(200, NULL);
573 	resp_searchform(req,
574 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
575 
576 	if (sz > 1) {
577 		puts("<div class=\"results\">");
578 		puts("<table>");
579 
580 		for (i = 0; i < sz; i++) {
581 			printf("  <tr>\n"
582 			       "    <td class=\"title\">"
583 			       "<a href=\"/%s%s%s/%s",
584 			    scriptname, *scriptname == '\0' ? "" : "/",
585 			    req->q.manpath, r[i].file);
586 			printf("\">");
587 			html_print(r[i].names);
588 			printf("</a></td>\n"
589 			       "    <td class=\"desc\">");
590 			html_print(r[i].output);
591 			puts("</td>\n"
592 			     "  </tr>");
593 		}
594 
595 		puts("</table>\n"
596 		     "</div>");
597 	}
598 
599 	/*
600 	 * In man(1) mode, show one of the pages
601 	 * even if more than one is found.
602 	 */
603 
604 	if (req->q.equal || sz == 1) {
605 		puts("<hr>");
606 		iuse = 0;
607 		priouse = 20;
608 		archpriouse = 3;
609 		for (i = 0; i < sz; i++) {
610 			sec = r[i].file;
611 			sec += strcspn(sec, "123456789");
612 			if (sec[0] == '\0')
613 				continue;
614 			prio = sec_prios[sec[0] - '1'];
615 			if (sec[1] != '/')
616 				prio += 10;
617 			if (req->q.arch == NULL) {
618 				archprio =
619 				    ((arch = strchr(sec + 1, '/'))
620 					== NULL) ? 3 :
621 				    ((archend = strchr(arch + 1, '/'))
622 					== NULL) ? 0 :
623 				    strncmp(arch, "amd64/",
624 					archend - arch) ? 2 : 1;
625 				if (archprio < archpriouse) {
626 					archpriouse = archprio;
627 					priouse = prio;
628 					iuse = i;
629 					continue;
630 				}
631 				if (archprio > archpriouse)
632 					continue;
633 			}
634 			if (prio >= priouse)
635 				continue;
636 			priouse = prio;
637 			iuse = i;
638 		}
639 		resp_show(req, r[iuse].file);
640 	}
641 
642 	resp_end_html();
643 }
644 
645 static void
646 resp_catman(const struct req *req, const char *file)
647 {
648 	FILE		*f;
649 	char		*p;
650 	size_t		 sz;
651 	ssize_t		 len;
652 	int		 i;
653 	int		 italic, bold;
654 
655 	if ((f = fopen(file, "r")) == NULL) {
656 		puts("<p>You specified an invalid manual file.</p>");
657 		return;
658 	}
659 
660 	puts("<div class=\"catman\">\n"
661 	     "<pre>");
662 
663 	p = NULL;
664 	sz = 0;
665 
666 	while ((len = getline(&p, &sz, f)) != -1) {
667 		bold = italic = 0;
668 		for (i = 0; i < len - 1; i++) {
669 			/*
670 			 * This means that the catpage is out of state.
671 			 * Ignore it and keep going (although the
672 			 * catpage is bogus).
673 			 */
674 
675 			if ('\b' == p[i] || '\n' == p[i])
676 				continue;
677 
678 			/*
679 			 * Print a regular character.
680 			 * Close out any bold/italic scopes.
681 			 * If we're in back-space mode, make sure we'll
682 			 * have something to enter when we backspace.
683 			 */
684 
685 			if ('\b' != p[i + 1]) {
686 				if (italic)
687 					printf("</i>");
688 				if (bold)
689 					printf("</b>");
690 				italic = bold = 0;
691 				html_putchar(p[i]);
692 				continue;
693 			} else if (i + 2 >= len)
694 				continue;
695 
696 			/* Italic mode. */
697 
698 			if ('_' == p[i]) {
699 				if (bold)
700 					printf("</b>");
701 				if ( ! italic)
702 					printf("<i>");
703 				bold = 0;
704 				italic = 1;
705 				i += 2;
706 				html_putchar(p[i]);
707 				continue;
708 			}
709 
710 			/*
711 			 * Handle funny behaviour troff-isms.
712 			 * These grok'd from the original man2html.c.
713 			 */
714 
715 			if (('+' == p[i] && 'o' == p[i + 2]) ||
716 					('o' == p[i] && '+' == p[i + 2]) ||
717 					('|' == p[i] && '=' == p[i + 2]) ||
718 					('=' == p[i] && '|' == p[i + 2]) ||
719 					('*' == p[i] && '=' == p[i + 2]) ||
720 					('=' == p[i] && '*' == p[i + 2]) ||
721 					('*' == p[i] && '|' == p[i + 2]) ||
722 					('|' == p[i] && '*' == p[i + 2]))  {
723 				if (italic)
724 					printf("</i>");
725 				if (bold)
726 					printf("</b>");
727 				italic = bold = 0;
728 				putchar('*');
729 				i += 2;
730 				continue;
731 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
732 					('-' == p[i] && '|' == p[i + 1]) ||
733 					('+' == p[i] && '-' == p[i + 1]) ||
734 					('-' == p[i] && '+' == p[i + 1]) ||
735 					('+' == p[i] && '|' == p[i + 1]) ||
736 					('|' == p[i] && '+' == p[i + 1]))  {
737 				if (italic)
738 					printf("</i>");
739 				if (bold)
740 					printf("</b>");
741 				italic = bold = 0;
742 				putchar('+');
743 				i += 2;
744 				continue;
745 			}
746 
747 			/* Bold mode. */
748 
749 			if (italic)
750 				printf("</i>");
751 			if ( ! bold)
752 				printf("<b>");
753 			bold = 1;
754 			italic = 0;
755 			i += 2;
756 			html_putchar(p[i]);
757 		}
758 
759 		/*
760 		 * Clean up the last character.
761 		 * We can get to a newline; don't print that.
762 		 */
763 
764 		if (italic)
765 			printf("</i>");
766 		if (bold)
767 			printf("</b>");
768 
769 		if (i == len - 1 && p[i] != '\n')
770 			html_putchar(p[i]);
771 
772 		putchar('\n');
773 	}
774 	free(p);
775 
776 	puts("</pre>\n"
777 	     "</div>");
778 
779 	fclose(f);
780 }
781 
782 static void
783 resp_format(const struct req *req, const char *file)
784 {
785 	struct manoutput conf;
786 	struct mparse	*mp;
787 	struct roff_man	*man;
788 	void		*vp;
789 	int		 fd;
790 	int		 usepath;
791 
792 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
793 		puts("<p>You specified an invalid manual file.</p>");
794 		return;
795 	}
796 
797 	mchars_alloc();
798 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
799 	    MANDOCLEVEL_BADARG, NULL, req->q.manpath);
800 	mparse_readfd(mp, fd, file);
801 	close(fd);
802 
803 	memset(&conf, 0, sizeof(conf));
804 	conf.fragment = 1;
805 	usepath = strcmp(req->q.manpath, req->p[0]);
806 	mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
807 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
808 
809 	mparse_result(mp, &man, NULL);
810 	if (man == NULL) {
811 		warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
812 		pg_error_internal();
813 		mparse_free(mp);
814 		mchars_free();
815 		return;
816 	}
817 
818 	vp = html_alloc(&conf);
819 
820 	if (man->macroset == MACROSET_MDOC) {
821 		mdoc_validate(man);
822 		html_mdoc(vp, man);
823 	} else {
824 		man_validate(man);
825 		html_man(vp, man);
826 	}
827 
828 	html_free(vp);
829 	mparse_free(mp);
830 	mchars_free();
831 	free(conf.man);
832 }
833 
834 static void
835 resp_show(const struct req *req, const char *file)
836 {
837 
838 	if ('.' == file[0] && '/' == file[1])
839 		file += 2;
840 
841 	if ('c' == *file)
842 		resp_catman(req, file);
843 	else
844 		resp_format(req, file);
845 }
846 
847 static void
848 pg_show(struct req *req, const char *fullpath)
849 {
850 	char		*manpath;
851 	const char	*file;
852 
853 	if ((file = strchr(fullpath, '/')) == NULL) {
854 		pg_error_badrequest(
855 		    "You did not specify a page to show.");
856 		return;
857 	}
858 	manpath = mandoc_strndup(fullpath, file - fullpath);
859 	file++;
860 
861 	if ( ! validate_manpath(req, manpath)) {
862 		pg_error_badrequest(
863 		    "You specified an invalid manpath.");
864 		free(manpath);
865 		return;
866 	}
867 
868 	/*
869 	 * Begin by chdir()ing into the manpath.
870 	 * This way we can pick up the database files, which are
871 	 * relative to the manpath root.
872 	 */
873 
874 	if (chdir(manpath) == -1) {
875 		warn("chdir %s", manpath);
876 		pg_error_internal();
877 		free(manpath);
878 		return;
879 	}
880 	free(manpath);
881 
882 	if ( ! validate_filename(file)) {
883 		pg_error_badrequest(
884 		    "You specified an invalid manual file.");
885 		return;
886 	}
887 
888 	resp_begin_html(200, NULL);
889 	resp_searchform(req, FOCUS_NONE);
890 	resp_show(req, file);
891 	resp_end_html();
892 }
893 
894 static void
895 pg_search(const struct req *req)
896 {
897 	struct mansearch	  search;
898 	struct manpaths		  paths;
899 	struct manpage		 *res;
900 	char			**argv;
901 	char			 *query, *rp, *wp;
902 	size_t			  ressz;
903 	int			  argc;
904 
905 	/*
906 	 * Begin by chdir()ing into the root of the manpath.
907 	 * This way we can pick up the database files, which are
908 	 * relative to the manpath root.
909 	 */
910 
911 	if (chdir(req->q.manpath) == -1) {
912 		warn("chdir %s", req->q.manpath);
913 		pg_error_internal();
914 		return;
915 	}
916 
917 	search.arch = req->q.arch;
918 	search.sec = req->q.sec;
919 	search.outkey = "Nd";
920 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
921 	search.firstmatch = 1;
922 
923 	paths.sz = 1;
924 	paths.paths = mandoc_malloc(sizeof(char *));
925 	paths.paths[0] = mandoc_strdup(".");
926 
927 	/*
928 	 * Break apart at spaces with backslash-escaping.
929 	 */
930 
931 	argc = 0;
932 	argv = NULL;
933 	rp = query = mandoc_strdup(req->q.query);
934 	for (;;) {
935 		while (isspace((unsigned char)*rp))
936 			rp++;
937 		if (*rp == '\0')
938 			break;
939 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
940 		argv[argc++] = wp = rp;
941 		for (;;) {
942 			if (isspace((unsigned char)*rp)) {
943 				*wp = '\0';
944 				rp++;
945 				break;
946 			}
947 			if (rp[0] == '\\' && rp[1] != '\0')
948 				rp++;
949 			if (wp != rp)
950 				*wp = *rp;
951 			if (*rp == '\0')
952 				break;
953 			wp++;
954 			rp++;
955 		}
956 	}
957 
958 	if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
959 		pg_noresult(req, "You entered an invalid query.");
960 	else if (0 == ressz)
961 		pg_noresult(req, "No results found.");
962 	else
963 		pg_searchres(req, res, ressz);
964 
965 	free(query);
966 	mansearch_free(res, ressz);
967 	free(paths.paths[0]);
968 	free(paths.paths);
969 }
970 
971 int
972 main(void)
973 {
974 	struct req	 req;
975 	struct itimerval itimer;
976 	const char	*path;
977 	const char	*querystring;
978 	int		 i;
979 
980 	/* Poor man's ReDoS mitigation. */
981 
982 	itimer.it_value.tv_sec = 2;
983 	itimer.it_value.tv_usec = 0;
984 	itimer.it_interval.tv_sec = 2;
985 	itimer.it_interval.tv_usec = 0;
986 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
987 		warn("setitimer");
988 		pg_error_internal();
989 		return EXIT_FAILURE;
990 	}
991 
992 	/*
993 	 * First we change directory into the MAN_DIR so that
994 	 * subsequent scanning for manpath directories is rooted
995 	 * relative to the same position.
996 	 */
997 
998 	if (chdir(MAN_DIR) == -1) {
999 		warn("MAN_DIR: %s", MAN_DIR);
1000 		pg_error_internal();
1001 		return EXIT_FAILURE;
1002 	}
1003 
1004 	memset(&req, 0, sizeof(struct req));
1005 	req.q.equal = 1;
1006 	parse_manpath_conf(&req);
1007 
1008 	/* Parse the path info and the query string. */
1009 
1010 	if ((path = getenv("PATH_INFO")) == NULL)
1011 		path = "";
1012 	else if (*path == '/')
1013 		path++;
1014 
1015 	if (*path != '\0') {
1016 		parse_path_info(&req, path);
1017 		if (req.q.manpath == NULL || access(path, F_OK) == -1)
1018 			path = "";
1019 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1020 		parse_query_string(&req, querystring);
1021 
1022 	/* Validate parsed data and add defaults. */
1023 
1024 	if (req.q.manpath == NULL)
1025 		req.q.manpath = mandoc_strdup(req.p[0]);
1026 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1027 		pg_error_badrequest(
1028 		    "You specified an invalid manpath.");
1029 		return EXIT_FAILURE;
1030 	}
1031 
1032 	if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1033 		pg_error_badrequest(
1034 		    "You specified an invalid architecture.");
1035 		return EXIT_FAILURE;
1036 	}
1037 
1038 	/* Dispatch to the three different pages. */
1039 
1040 	if ('\0' != *path)
1041 		pg_show(&req, path);
1042 	else if (NULL != req.q.query)
1043 		pg_search(&req);
1044 	else
1045 		pg_index(&req);
1046 
1047 	free(req.q.manpath);
1048 	free(req.q.arch);
1049 	free(req.q.sec);
1050 	free(req.q.query);
1051 	for (i = 0; i < (int)req.psz; i++)
1052 		free(req.p[i]);
1053 	free(req.p);
1054 	return EXIT_SUCCESS;
1055 }
1056 
1057 /*
1058  * If PATH_INFO is not a file name, translate it to a query.
1059  */
1060 static void
1061 parse_path_info(struct req *req, const char *path)
1062 {
1063 	char	*dir[4];
1064 	int	 i;
1065 
1066 	req->isquery = 0;
1067 	req->q.equal = 1;
1068 	req->q.manpath = mandoc_strdup(path);
1069 	req->q.arch = NULL;
1070 
1071 	/* Mandatory manual page name. */
1072 	if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1073 		req->q.query = req->q.manpath;
1074 		req->q.manpath = NULL;
1075 	} else
1076 		*req->q.query++ = '\0';
1077 
1078 	/* Optional trailing section. */
1079 	if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1080 		if(isdigit((unsigned char)req->q.sec[1])) {
1081 			*req->q.sec++ = '\0';
1082 			req->q.sec = mandoc_strdup(req->q.sec);
1083 		} else
1084 			req->q.sec = NULL;
1085 	}
1086 
1087 	/* Handle the case of name[.section] only. */
1088 	if (req->q.manpath == NULL)
1089 		return;
1090 	req->q.query = mandoc_strdup(req->q.query);
1091 
1092 	/* Split directory components. */
1093 	dir[i = 0] = req->q.manpath;
1094 	while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1095 		if (++i == 3) {
1096 			pg_error_badrequest(
1097 			    "You specified too many directory components.");
1098 			exit(EXIT_FAILURE);
1099 		}
1100 		*dir[i]++ = '\0';
1101 	}
1102 
1103 	/* Optional manpath. */
1104 	if ((i = validate_manpath(req, req->q.manpath)) == 0)
1105 		req->q.manpath = NULL;
1106 	else if (dir[1] == NULL)
1107 		return;
1108 
1109 	/* Optional section. */
1110 	if (strncmp(dir[i], "man", 3) == 0) {
1111 		free(req->q.sec);
1112 		req->q.sec = mandoc_strdup(dir[i++] + 3);
1113 	}
1114 	if (dir[i] == NULL) {
1115 		if (req->q.manpath == NULL)
1116 			free(dir[0]);
1117 		return;
1118 	}
1119 	if (dir[i + 1] != NULL) {
1120 		pg_error_badrequest(
1121 		    "You specified an invalid directory component.");
1122 		exit(EXIT_FAILURE);
1123 	}
1124 
1125 	/* Optional architecture. */
1126 	if (i) {
1127 		req->q.arch = mandoc_strdup(dir[i]);
1128 		if (req->q.manpath == NULL)
1129 			free(dir[0]);
1130 	} else
1131 		req->q.arch = dir[0];
1132 }
1133 
1134 /*
1135  * Scan for indexable paths.
1136  */
1137 static void
1138 parse_manpath_conf(struct req *req)
1139 {
1140 	FILE	*fp;
1141 	char	*dp;
1142 	size_t	 dpsz;
1143 	ssize_t	 len;
1144 
1145 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1146 		warn("%s/manpath.conf", MAN_DIR);
1147 		pg_error_internal();
1148 		exit(EXIT_FAILURE);
1149 	}
1150 
1151 	dp = NULL;
1152 	dpsz = 0;
1153 
1154 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1155 		if (dp[len - 1] == '\n')
1156 			dp[--len] = '\0';
1157 		req->p = mandoc_realloc(req->p,
1158 		    (req->psz + 1) * sizeof(char *));
1159 		if ( ! validate_urifrag(dp)) {
1160 			warnx("%s/manpath.conf contains "
1161 			    "unsafe path \"%s\"", MAN_DIR, dp);
1162 			pg_error_internal();
1163 			exit(EXIT_FAILURE);
1164 		}
1165 		if (strchr(dp, '/') != NULL) {
1166 			warnx("%s/manpath.conf contains "
1167 			    "path with slash \"%s\"", MAN_DIR, dp);
1168 			pg_error_internal();
1169 			exit(EXIT_FAILURE);
1170 		}
1171 		req->p[req->psz++] = dp;
1172 		dp = NULL;
1173 		dpsz = 0;
1174 	}
1175 	free(dp);
1176 
1177 	if (req->p == NULL) {
1178 		warnx("%s/manpath.conf is empty", MAN_DIR);
1179 		pg_error_internal();
1180 		exit(EXIT_FAILURE);
1181 	}
1182 }
1183