xref: /openbsd-src/usr.bin/mandoc/read.c (revision d1df930ffab53da22f3324c32bed7ac5709915e6)
1 /*	$OpenBSD: read.c,v 1.172 2018/08/25 16:43:52 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010-2018 Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include <sys/types.h>
20 #include <sys/mman.h>
21 #include <sys/stat.h>
22 
23 #include <assert.h>
24 #include <ctype.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <stdarg.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32 #include <zlib.h>
33 
34 #include "mandoc_aux.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mdoc.h"
38 #include "man.h"
39 #include "libmandoc.h"
40 
41 #define	REPARSE_LIMIT	1000
42 
43 struct	mparse {
44 	struct roff	 *roff; /* roff parser (!NULL) */
45 	struct roff_man	 *man; /* man parser */
46 	char		 *sodest; /* filename pointed to by .so */
47 	const char	 *file; /* filename of current input file */
48 	struct buf	 *primary; /* buffer currently being parsed */
49 	struct buf	 *secondary; /* copy of top level input */
50 	struct buf	 *loop; /* open .while request line */
51 	const char	 *os_s; /* default operating system */
52 	mandocmsg	  mmsg; /* warning/error message handler */
53 	enum mandoclevel  file_status; /* status of current parse */
54 	enum mandocerr	  mmin; /* ignore messages below this */
55 	int		  options; /* parser options */
56 	int		  gzip; /* current input file is gzipped */
57 	int		  filenc; /* encoding of the current file */
58 	int		  reparse_count; /* finite interp. stack */
59 	int		  line; /* line number in the file */
60 };
61 
62 static	void	  choose_parser(struct mparse *);
63 static	void	  free_buf_list(struct buf *);
64 static	void	  resize_buf(struct buf *, size_t);
65 static	int	  mparse_buf_r(struct mparse *, struct buf, size_t, int);
66 static	int	  read_whole_file(struct mparse *, const char *, int,
67 				struct buf *, int *);
68 static	void	  mparse_end(struct mparse *);
69 static	void	  mparse_parse_buffer(struct mparse *, struct buf,
70 			const char *);
71 
72 static	const enum mandocerr	mandoclimits[MANDOCLEVEL_MAX] = {
73 	MANDOCERR_OK,
74 	MANDOCERR_OK,
75 	MANDOCERR_WARNING,
76 	MANDOCERR_ERROR,
77 	MANDOCERR_UNSUPP,
78 	MANDOCERR_MAX,
79 	MANDOCERR_MAX
80 };
81 
82 static	const char * const	mandocerrs[MANDOCERR_MAX] = {
83 	"ok",
84 
85 	"base system convention",
86 
87 	"Mdocdate found",
88 	"Mdocdate missing",
89 	"unknown architecture",
90 	"operating system explicitly specified",
91 	"RCS id missing",
92 	"referenced manual not found",
93 
94 	"generic style suggestion",
95 
96 	"legacy man(7) date format",
97 	"normalizing date format to",
98 	"lower case character in document title",
99 	"duplicate RCS id",
100 	"possible typo in section name",
101 	"unterminated quoted argument",
102 	"useless macro",
103 	"consider using OS macro",
104 	"errnos out of order",
105 	"duplicate errno",
106 	"trailing delimiter",
107 	"no blank before trailing delimiter",
108 	"fill mode already enabled, skipping",
109 	"fill mode already disabled, skipping",
110 	"verbatim \"--\", maybe consider using \\(em",
111 	"function name without markup",
112 	"whitespace at end of input line",
113 	"bad comment style",
114 
115 	"generic warning",
116 
117 	/* related to the prologue */
118 	"missing manual title, using UNTITLED",
119 	"missing manual title, using \"\"",
120 	"missing manual section, using \"\"",
121 	"unknown manual section",
122 	"missing date, using today's date",
123 	"cannot parse date, using it verbatim",
124 	"date in the future, using it anyway",
125 	"missing Os macro, using \"\"",
126 	"late prologue macro",
127 	"prologue macros out of order",
128 
129 	/* related to document structure */
130 	".so is fragile, better use ln(1)",
131 	"no document body",
132 	"content before first section header",
133 	"first section is not \"NAME\"",
134 	"NAME section without Nm before Nd",
135 	"NAME section without description",
136 	"description not at the end of NAME",
137 	"bad NAME section content",
138 	"missing comma before name",
139 	"missing description line, using \"\"",
140 	"description line outside NAME section",
141 	"sections out of conventional order",
142 	"duplicate section title",
143 	"unexpected section",
144 	"cross reference to self",
145 	"unusual Xr order",
146 	"unusual Xr punctuation",
147 	"AUTHORS section without An macro",
148 
149 	/* related to macros and nesting */
150 	"obsolete macro",
151 	"macro neither callable nor escaped",
152 	"skipping paragraph macro",
153 	"moving paragraph macro out of list",
154 	"skipping no-space macro",
155 	"blocks badly nested",
156 	"nested displays are not portable",
157 	"moving content out of list",
158 	"first macro on line",
159 	"line scope broken",
160 	"skipping blank line in line scope",
161 
162 	/* related to missing macro arguments */
163 	"skipping empty request",
164 	"conditional request controls empty scope",
165 	"skipping empty macro",
166 	"empty block",
167 	"empty argument, using 0n",
168 	"missing display type, using -ragged",
169 	"list type is not the first argument",
170 	"missing -width in -tag list, using 6n",
171 	"missing utility name, using \"\"",
172 	"missing function name, using \"\"",
173 	"empty head in list item",
174 	"empty list item",
175 	"missing argument, using next line",
176 	"missing font type, using \\fR",
177 	"unknown font type, using \\fR",
178 	"nothing follows prefix",
179 	"empty reference block",
180 	"missing section argument",
181 	"missing -std argument, adding it",
182 	"missing option string, using \"\"",
183 	"missing resource identifier, using \"\"",
184 	"missing eqn box, using \"\"",
185 
186 	/* related to bad macro arguments */
187 	"duplicate argument",
188 	"skipping duplicate argument",
189 	"skipping duplicate display type",
190 	"skipping duplicate list type",
191 	"skipping -width argument",
192 	"wrong number of cells",
193 	"unknown AT&T UNIX version",
194 	"comma in function argument",
195 	"parenthesis in function name",
196 	"unknown library name",
197 	"invalid content in Rs block",
198 	"invalid Boolean argument",
199 	"argument contains two font escapes",
200 	"unknown font, skipping request",
201 	"odd number of characters in request",
202 
203 	/* related to plain text */
204 	"blank line in fill mode, using .sp",
205 	"tab in filled text",
206 	"new sentence, new line",
207 	"invalid escape sequence",
208 	"undefined string, using \"\"",
209 
210 	/* related to tables */
211 	"tbl line starts with span",
212 	"tbl column starts with span",
213 	"skipping vertical bar in tbl layout",
214 
215 	"generic error",
216 
217 	/* related to tables */
218 	"non-alphabetic character in tbl options",
219 	"skipping unknown tbl option",
220 	"missing tbl option argument",
221 	"wrong tbl option argument size",
222 	"empty tbl layout",
223 	"invalid character in tbl layout",
224 	"unmatched parenthesis in tbl layout",
225 	"tbl without any data cells",
226 	"ignoring data in spanned tbl cell",
227 	"ignoring extra tbl data cells",
228 	"data block open at end of tbl",
229 
230 	/* related to document structure and macros */
231 	NULL,
232 	"duplicate prologue macro",
233 	"skipping late title macro",
234 	"input stack limit exceeded, infinite loop?",
235 	"skipping bad character",
236 	"skipping unknown macro",
237 	"ignoring request outside macro",
238 	"skipping insecure request",
239 	"skipping item outside list",
240 	"skipping column outside column list",
241 	"skipping end of block that is not open",
242 	"fewer RS blocks open, skipping",
243 	"inserting missing end of block",
244 	"appending missing end of block",
245 
246 	/* related to request and macro arguments */
247 	"escaped character not allowed in a name",
248 	"using macro argument outside macro",
249 	"argument number is not numeric",
250 	"NOT IMPLEMENTED: Bd -file",
251 	"skipping display without arguments",
252 	"missing list type, using -item",
253 	"argument is not numeric, using 1",
254 	"argument is not a character",
255 	"missing manual name, using \"\"",
256 	"uname(3) system call failed, using UNKNOWN",
257 	"unknown standard specifier",
258 	"skipping request without numeric argument",
259 	"excessive shift",
260 	"NOT IMPLEMENTED: .so with absolute path or \"..\"",
261 	".so request failed",
262 	"skipping all arguments",
263 	"skipping excess arguments",
264 	"divide by zero",
265 
266 	"unsupported feature",
267 	"input too large",
268 	"unsupported control character",
269 	"unsupported roff request",
270 	"nested .while loops",
271 	"end of scope with open .while loop",
272 	"end of .while loop in inner scope",
273 	"cannot continue this .while loop",
274 	"eqn delim option in tbl",
275 	"unsupported tbl layout modifier",
276 	"ignoring macro in table",
277 };
278 
279 static	const char * const	mandoclevels[MANDOCLEVEL_MAX] = {
280 	"SUCCESS",
281 	"STYLE",
282 	"WARNING",
283 	"ERROR",
284 	"UNSUPP",
285 	"BADARG",
286 	"SYSERR"
287 };
288 
289 
290 static void
291 resize_buf(struct buf *buf, size_t initial)
292 {
293 
294 	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
295 	buf->buf = mandoc_realloc(buf->buf, buf->sz);
296 }
297 
298 static void
299 free_buf_list(struct buf *buf)
300 {
301 	struct buf *tmp;
302 
303 	while (buf != NULL) {
304 		tmp = buf;
305 		buf = tmp->next;
306 		free(tmp->buf);
307 		free(tmp);
308 	}
309 }
310 
311 static void
312 choose_parser(struct mparse *curp)
313 {
314 	char		*cp, *ep;
315 	int		 format;
316 
317 	/*
318 	 * If neither command line arguments -mdoc or -man select
319 	 * a parser nor the roff parser found a .Dd or .TH macro
320 	 * yet, look ahead in the main input buffer.
321 	 */
322 
323 	if ((format = roff_getformat(curp->roff)) == 0) {
324 		cp = curp->primary->buf;
325 		ep = cp + curp->primary->sz;
326 		while (cp < ep) {
327 			if (*cp == '.' || *cp == '\'') {
328 				cp++;
329 				if (cp[0] == 'D' && cp[1] == 'd') {
330 					format = MPARSE_MDOC;
331 					break;
332 				}
333 				if (cp[0] == 'T' && cp[1] == 'H') {
334 					format = MPARSE_MAN;
335 					break;
336 				}
337 			}
338 			cp = memchr(cp, '\n', ep - cp);
339 			if (cp == NULL)
340 				break;
341 			cp++;
342 		}
343 	}
344 
345 	if (format == MPARSE_MDOC) {
346 		curp->man->macroset = MACROSET_MDOC;
347 		if (curp->man->mdocmac == NULL)
348 			curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
349 	} else {
350 		curp->man->macroset = MACROSET_MAN;
351 		if (curp->man->manmac == NULL)
352 			curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
353 	}
354 	curp->man->first->tok = TOKEN_NONE;
355 }
356 
357 /*
358  * Main parse routine for a buffer.
359  * It assumes encoding and line numbering are already set up.
360  * It can recurse directly (for invocations of user-defined
361  * macros, inline equations, and input line traps)
362  * and indirectly (for .so file inclusion).
363  */
364 static int
365 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
366 {
367 	struct buf	 ln;
368 	struct buf	*firstln, *lastln, *thisln, *loop;
369 	const char	*save_file;
370 	char		*cp;
371 	size_t		 pos; /* byte number in the ln buffer */
372 	int		 line_result, result;
373 	int		 of;
374 	int		 lnn; /* line number in the real file */
375 	int		 fd;
376 	int		 inloop; /* Saw .while on this level. */
377 	unsigned char	 c;
378 
379 	ln.sz = 256;
380 	ln.buf = mandoc_malloc(ln.sz);
381 	ln.next = NULL;
382 	firstln = loop = NULL;
383 	lnn = curp->line;
384 	pos = 0;
385 	inloop = 0;
386 	result = ROFF_CONT;
387 
388 	while (i < blk.sz && (blk.buf[i] != '\0' || pos != 0)) {
389 		if (start) {
390 			curp->line = lnn;
391 			curp->reparse_count = 0;
392 
393 			if (lnn < 3 &&
394 			    curp->filenc & MPARSE_UTF8 &&
395 			    curp->filenc & MPARSE_LATIN1)
396 				curp->filenc = preconv_cue(&blk, i);
397 		}
398 
399 		while (i < blk.sz && (start || blk.buf[i] != '\0')) {
400 
401 			/*
402 			 * When finding an unescaped newline character,
403 			 * leave the character loop to process the line.
404 			 * Skip a preceding carriage return, if any.
405 			 */
406 
407 			if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
408 			    '\n' == blk.buf[i + 1])
409 				++i;
410 			if ('\n' == blk.buf[i]) {
411 				++i;
412 				++lnn;
413 				break;
414 			}
415 
416 			/*
417 			 * Make sure we have space for the worst
418 			 * case of 12 bytes: "\\[u10ffff]\n\0"
419 			 */
420 
421 			if (pos + 12 > ln.sz)
422 				resize_buf(&ln, 256);
423 
424 			/*
425 			 * Encode 8-bit input.
426 			 */
427 
428 			c = blk.buf[i];
429 			if (c & 0x80) {
430 				if ( ! (curp->filenc && preconv_encode(
431 				    &blk, &i, &ln, &pos, &curp->filenc))) {
432 					mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
433 					    curp->line, pos, "0x%x", c);
434 					ln.buf[pos++] = '?';
435 					i++;
436 				}
437 				continue;
438 			}
439 
440 			/*
441 			 * Exclude control characters.
442 			 */
443 
444 			if (c == 0x7f || (c < 0x20 && c != 0x09)) {
445 				mandoc_vmsg(c == 0x00 || c == 0x04 ||
446 				    c > 0x0a ? MANDOCERR_CHAR_BAD :
447 				    MANDOCERR_CHAR_UNSUPP,
448 				    curp, curp->line, pos, "0x%x", c);
449 				i++;
450 				if (c != '\r')
451 					ln.buf[pos++] = '?';
452 				continue;
453 			}
454 
455 			ln.buf[pos++] = blk.buf[i++];
456 		}
457 		ln.buf[pos] = '\0';
458 
459 		/*
460 		 * Maintain a lookaside buffer of all lines.
461 		 * parsed from this input source.
462 		 */
463 
464 		thisln = mandoc_malloc(sizeof(*thisln));
465 		thisln->buf = mandoc_strdup(ln.buf);
466 		thisln->sz = strlen(ln.buf) + 1;
467 		thisln->next = NULL;
468 		if (firstln == NULL) {
469 			firstln = lastln = thisln;
470 			if (curp->secondary == NULL)
471 				curp->secondary = firstln;
472 		} else {
473 			lastln->next = thisln;
474 			lastln = thisln;
475 		}
476 
477 		/* XXX Ugly hack to mark the end of the input. */
478 
479 		if (i == blk.sz || blk.buf[i] == '\0') {
480 			ln.buf[pos++] = '\n';
481 			ln.buf[pos] = '\0';
482 		}
483 
484 		/*
485 		 * A significant amount of complexity is contained by
486 		 * the roff preprocessor.  It's line-oriented but can be
487 		 * expressed on one line, so we need at times to
488 		 * readjust our starting point and re-run it.  The roff
489 		 * preprocessor can also readjust the buffers with new
490 		 * data, so we pass them in wholesale.
491 		 */
492 
493 		of = 0;
494 rerun:
495 		line_result = roff_parseln(curp->roff, curp->line, &ln, &of);
496 
497 		/* Process options. */
498 
499 		if (line_result & ROFF_APPEND)
500 			assert(line_result == (ROFF_IGN | ROFF_APPEND));
501 
502 		if (line_result & ROFF_USERCALL)
503 			assert((line_result & ROFF_MASK) == ROFF_REPARSE);
504 
505 		if (line_result & ROFF_USERRET) {
506 			assert(line_result == (ROFF_IGN | ROFF_USERRET));
507 			if (start == 0) {
508 				/* Return from the current macro. */
509 				result = ROFF_USERRET;
510 				goto out;
511 			}
512 		}
513 
514 		switch (line_result & ROFF_LOOPMASK) {
515 		case ROFF_IGN:
516 			break;
517 		case ROFF_WHILE:
518 			if (curp->loop != NULL) {
519 				if (loop == curp->loop)
520 					break;
521 				mandoc_msg(MANDOCERR_WHILE_NEST,
522 				    curp, curp->line, pos, NULL);
523 			}
524 			curp->loop = thisln;
525 			loop = NULL;
526 			inloop = 1;
527 			break;
528 		case ROFF_LOOPCONT:
529 		case ROFF_LOOPEXIT:
530 			if (curp->loop == NULL) {
531 				mandoc_msg(MANDOCERR_WHILE_FAIL,
532 				    curp, curp->line, pos, NULL);
533 				break;
534 			}
535 			if (inloop == 0) {
536 				mandoc_msg(MANDOCERR_WHILE_INTO,
537 				    curp, curp->line, pos, NULL);
538 				curp->loop = loop = NULL;
539 				break;
540 			}
541 			if (line_result & ROFF_LOOPCONT)
542 				loop = curp->loop;
543 			else {
544 				curp->loop = loop = NULL;
545 				inloop = 0;
546 			}
547 			break;
548 		default:
549 			abort();
550 		}
551 
552 		/* Process the main instruction from the roff parser. */
553 
554 		switch (line_result & ROFF_MASK) {
555 		case ROFF_IGN:
556 			break;
557 		case ROFF_CONT:
558 			if (curp->man->macroset == MACROSET_NONE)
559 				choose_parser(curp);
560 			if ((curp->man->macroset == MACROSET_MDOC ?
561 			     mdoc_parseln(curp->man, curp->line, ln.buf, of) :
562 			     man_parseln(curp->man, curp->line, ln.buf, of)
563 			    ) == 2)
564 				goto out;
565 			break;
566 		case ROFF_RERUN:
567 			goto rerun;
568 		case ROFF_REPARSE:
569 			if (++curp->reparse_count > REPARSE_LIMIT) {
570 				/* Abort and return to the top level. */
571 				result = ROFF_IGN;
572 				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
573 				    curp->line, pos, NULL);
574 				goto out;
575 			}
576 			result = mparse_buf_r(curp, ln, of, 0);
577 			if (line_result & ROFF_USERCALL) {
578 				roff_userret(curp->roff);
579 				/* Continue normally. */
580 				if (result & ROFF_USERRET)
581 					result = ROFF_CONT;
582 			}
583 			if (start == 0 && result != ROFF_CONT)
584 				goto out;
585 			break;
586 		case ROFF_SO:
587 			if ( ! (curp->options & MPARSE_SO) &&
588 			    (i >= blk.sz || blk.buf[i] == '\0')) {
589 				curp->sodest = mandoc_strdup(ln.buf + of);
590 				goto out;
591 			}
592 			save_file = curp->file;
593 			if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
594 				mparse_readfd(curp, fd, ln.buf + of);
595 				close(fd);
596 				curp->file = save_file;
597 			} else {
598 				curp->file = save_file;
599 				mandoc_vmsg(MANDOCERR_SO_FAIL,
600 				    curp, curp->line, pos,
601 				    ".so %s", ln.buf + of);
602 				ln.sz = mandoc_asprintf(&cp,
603 				    ".sp\nSee the file %s.\n.sp",
604 				    ln.buf + of);
605 				free(ln.buf);
606 				ln.buf = cp;
607 				of = 0;
608 				mparse_buf_r(curp, ln, of, 0);
609 			}
610 			break;
611 		default:
612 			abort();
613 		}
614 
615 		/* Start the next input line. */
616 
617 		if (loop != NULL &&
618 		    (line_result & ROFF_LOOPMASK) == ROFF_IGN)
619 			loop = loop->next;
620 
621 		if (loop != NULL) {
622 			if ((line_result & ROFF_APPEND) == 0)
623 				*ln.buf = '\0';
624 			if (ln.sz < loop->sz)
625 				resize_buf(&ln, loop->sz);
626 			(void)strlcat(ln.buf, loop->buf, ln.sz);
627 			of = 0;
628 			goto rerun;
629 		}
630 
631 		pos = (line_result & ROFF_APPEND) ? strlen(ln.buf) : 0;
632 	}
633 out:
634 	if (inloop) {
635 		if (result != ROFF_USERRET)
636 			mandoc_msg(MANDOCERR_WHILE_OUTOF, curp,
637 			    curp->line, pos, NULL);
638 		curp->loop = NULL;
639 	}
640 	free(ln.buf);
641 	if (firstln != curp->secondary)
642 		free_buf_list(firstln);
643 	return result;
644 }
645 
646 static int
647 read_whole_file(struct mparse *curp, const char *file, int fd,
648 		struct buf *fb, int *with_mmap)
649 {
650 	struct stat	 st;
651 	gzFile		 gz;
652 	size_t		 off;
653 	ssize_t		 ssz;
654 	int		 gzerrnum, retval;
655 
656 	if (fstat(fd, &st) == -1) {
657 		mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
658 		    "fstat: %s", strerror(errno));
659 		return 0;
660 	}
661 
662 	/*
663 	 * If we're a regular file, try just reading in the whole entry
664 	 * via mmap().  This is faster than reading it into blocks, and
665 	 * since each file is only a few bytes to begin with, I'm not
666 	 * concerned that this is going to tank any machines.
667 	 */
668 
669 	if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
670 		if (st.st_size > 0x7fffffff) {
671 			mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
672 			return 0;
673 		}
674 		*with_mmap = 1;
675 		fb->sz = (size_t)st.st_size;
676 		fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
677 		if (fb->buf != MAP_FAILED)
678 			return 1;
679 	}
680 
681 	if (curp->gzip) {
682 		/*
683 		 * Duplicating the file descriptor is required
684 		 * because we will have to call gzclose(3)
685 		 * to free memory used internally by zlib,
686 		 * but that will also close the file descriptor,
687 		 * which this function must not do.
688 		 */
689 		if ((fd = dup(fd)) == -1) {
690 			mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
691 			    "dup: %s", strerror(errno));
692 			return 0;
693 		}
694 		if ((gz = gzdopen(fd, "rb")) == NULL) {
695 			mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
696 			    "gzdopen: %s", strerror(errno));
697 			close(fd);
698 			return 0;
699 		}
700 	} else
701 		gz = NULL;
702 
703 	/*
704 	 * If this isn't a regular file (like, say, stdin), then we must
705 	 * go the old way and just read things in bit by bit.
706 	 */
707 
708 	*with_mmap = 0;
709 	off = 0;
710 	retval = 0;
711 	fb->sz = 0;
712 	fb->buf = NULL;
713 	for (;;) {
714 		if (off == fb->sz) {
715 			if (fb->sz == (1U << 31)) {
716 				mandoc_msg(MANDOCERR_TOOLARGE, curp,
717 				    0, 0, NULL);
718 				break;
719 			}
720 			resize_buf(fb, 65536);
721 		}
722 		ssz = curp->gzip ?
723 		    gzread(gz, fb->buf + (int)off, fb->sz - off) :
724 		    read(fd, fb->buf + (int)off, fb->sz - off);
725 		if (ssz == 0) {
726 			fb->sz = off;
727 			retval = 1;
728 			break;
729 		}
730 		if (ssz == -1) {
731 			if (curp->gzip)
732 				(void)gzerror(gz, &gzerrnum);
733 			mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "read: %s",
734 			    curp->gzip && gzerrnum != Z_ERRNO ?
735 			    zError(gzerrnum) : strerror(errno));
736 			break;
737 		}
738 		off += (size_t)ssz;
739 	}
740 
741 	if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK)
742 		mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "gzclose: %s",
743 		    gzerrnum == Z_ERRNO ? strerror(errno) :
744 		    zError(gzerrnum));
745 	if (retval == 0) {
746 		free(fb->buf);
747 		fb->buf = NULL;
748 	}
749 	return retval;
750 }
751 
752 static void
753 mparse_end(struct mparse *curp)
754 {
755 	if (curp->man->macroset == MACROSET_NONE)
756 		curp->man->macroset = MACROSET_MAN;
757 	if (curp->man->macroset == MACROSET_MDOC)
758 		mdoc_endparse(curp->man);
759 	else
760 		man_endparse(curp->man);
761 	roff_endparse(curp->roff);
762 }
763 
764 static void
765 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
766 {
767 	struct buf	*svprimary;
768 	const char	*svfile;
769 	size_t		 offset;
770 	static int	 recursion_depth;
771 
772 	if (64 < recursion_depth) {
773 		mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
774 		return;
775 	}
776 
777 	/* Line number is per-file. */
778 	svfile = curp->file;
779 	curp->file = file;
780 	svprimary = curp->primary;
781 	curp->primary = &blk;
782 	curp->line = 1;
783 	recursion_depth++;
784 
785 	/* Skip an UTF-8 byte order mark. */
786 	if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
787 	    (unsigned char)blk.buf[0] == 0xef &&
788 	    (unsigned char)blk.buf[1] == 0xbb &&
789 	    (unsigned char)blk.buf[2] == 0xbf) {
790 		offset = 3;
791 		curp->filenc &= ~MPARSE_LATIN1;
792 	} else
793 		offset = 0;
794 
795 	mparse_buf_r(curp, blk, offset, 1);
796 
797 	if (--recursion_depth == 0)
798 		mparse_end(curp);
799 
800 	curp->primary = svprimary;
801 	curp->file = svfile;
802 }
803 
804 /*
805  * Read the whole file into memory and call the parsers.
806  * Called recursively when an .so request is encountered.
807  */
808 enum mandoclevel
809 mparse_readfd(struct mparse *curp, int fd, const char *file)
810 {
811 	struct buf	 blk;
812 	int		 with_mmap;
813 	int		 save_filenc;
814 
815 	if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
816 		save_filenc = curp->filenc;
817 		curp->filenc = curp->options &
818 		    (MPARSE_UTF8 | MPARSE_LATIN1);
819 		mparse_parse_buffer(curp, blk, file);
820 		curp->filenc = save_filenc;
821 		if (with_mmap)
822 			munmap(blk.buf, blk.sz);
823 		else
824 			free(blk.buf);
825 	}
826 	return curp->file_status;
827 }
828 
829 int
830 mparse_open(struct mparse *curp, const char *file)
831 {
832 	char		 *cp;
833 	int		  fd;
834 
835 	curp->file = file;
836 	cp = strrchr(file, '.');
837 	curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
838 
839 	/* First try to use the filename as it is. */
840 
841 	if ((fd = open(file, O_RDONLY)) != -1)
842 		return fd;
843 
844 	/*
845 	 * If that doesn't work and the filename doesn't
846 	 * already  end in .gz, try appending .gz.
847 	 */
848 
849 	if ( ! curp->gzip) {
850 		mandoc_asprintf(&cp, "%s.gz", file);
851 		fd = open(cp, O_RDONLY);
852 		free(cp);
853 		if (fd != -1) {
854 			curp->gzip = 1;
855 			return fd;
856 		}
857 	}
858 
859 	/* Neither worked, give up. */
860 
861 	mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
862 	return -1;
863 }
864 
865 struct mparse *
866 mparse_alloc(int options, enum mandocerr mmin, mandocmsg mmsg,
867     enum mandoc_os os_e, const char *os_s)
868 {
869 	struct mparse	*curp;
870 
871 	curp = mandoc_calloc(1, sizeof(struct mparse));
872 
873 	curp->options = options;
874 	curp->mmin = mmin;
875 	curp->mmsg = mmsg;
876 	curp->os_s = os_s;
877 
878 	curp->roff = roff_alloc(curp, options);
879 	curp->man = roff_man_alloc(curp->roff, curp, curp->os_s,
880 		curp->options & MPARSE_QUICK ? 1 : 0);
881 	if (curp->options & MPARSE_MDOC) {
882 		curp->man->macroset = MACROSET_MDOC;
883 		if (curp->man->mdocmac == NULL)
884 			curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
885 	} else if (curp->options & MPARSE_MAN) {
886 		curp->man->macroset = MACROSET_MAN;
887 		if (curp->man->manmac == NULL)
888 			curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
889 	}
890 	curp->man->first->tok = TOKEN_NONE;
891 	curp->man->meta.os_e = os_e;
892 	return curp;
893 }
894 
895 void
896 mparse_reset(struct mparse *curp)
897 {
898 	roff_reset(curp->roff);
899 	roff_man_reset(curp->man);
900 	free_buf_list(curp->secondary);
901 	curp->secondary = NULL;
902 
903 	free(curp->sodest);
904 	curp->sodest = NULL;
905 
906 	curp->file_status = MANDOCLEVEL_OK;
907 	curp->gzip = 0;
908 }
909 
910 void
911 mparse_free(struct mparse *curp)
912 {
913 	roffhash_free(curp->man->mdocmac);
914 	roffhash_free(curp->man->manmac);
915 	roff_man_free(curp->man);
916 	roff_free(curp->roff);
917 	free_buf_list(curp->secondary);
918 	free(curp->sodest);
919 	free(curp);
920 }
921 
922 void
923 mparse_result(struct mparse *curp, struct roff_man **man,
924 	char **sodest)
925 {
926 
927 	if (sodest && NULL != (*sodest = curp->sodest)) {
928 		*man = NULL;
929 		return;
930 	}
931 	if (man)
932 		*man = curp->man;
933 }
934 
935 void
936 mparse_updaterc(struct mparse *curp, enum mandoclevel *rc)
937 {
938 	if (curp->file_status > *rc)
939 		*rc = curp->file_status;
940 }
941 
942 void
943 mandoc_vmsg(enum mandocerr t, struct mparse *m,
944 		int ln, int pos, const char *fmt, ...)
945 {
946 	char		 buf[256];
947 	va_list		 ap;
948 
949 	va_start(ap, fmt);
950 	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
951 	va_end(ap);
952 
953 	mandoc_msg(t, m, ln, pos, buf);
954 }
955 
956 void
957 mandoc_msg(enum mandocerr er, struct mparse *m,
958 		int ln, int col, const char *msg)
959 {
960 	enum mandoclevel level;
961 
962 	if (er < m->mmin && er != MANDOCERR_FILE)
963 		return;
964 
965 	level = MANDOCLEVEL_UNSUPP;
966 	while (er < mandoclimits[level])
967 		level--;
968 
969 	if (m->mmsg)
970 		(*m->mmsg)(er, level, m->file, ln, col, msg);
971 
972 	if (m->file_status < level)
973 		m->file_status = level;
974 }
975 
976 const char *
977 mparse_strerror(enum mandocerr er)
978 {
979 
980 	return mandocerrs[er];
981 }
982 
983 const char *
984 mparse_strlevel(enum mandoclevel lvl)
985 {
986 	return mandoclevels[lvl];
987 }
988 
989 void
990 mparse_copy(const struct mparse *p)
991 {
992 	struct buf	*buf;
993 
994 	for (buf = p->secondary; buf != NULL; buf = buf->next)
995 		puts(buf->buf);
996 }
997