xref: /openbsd-src/usr.bin/mandoc/read.c (revision 0b7734b3d77bb9b21afec6f4621cae6c805dbd45)
1 /*	$OpenBSD: read.c,v 1.124 2016/07/19 16:22:34 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010-2016 Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include <sys/types.h>
20 #include <sys/mman.h>
21 #include <sys/stat.h>
22 
23 #include <assert.h>
24 #include <ctype.h>
25 #include <err.h>
26 #include <errno.h>
27 #include <fcntl.h>
28 #include <stdarg.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33 #include <zlib.h>
34 
35 #include "mandoc_aux.h"
36 #include "mandoc.h"
37 #include "roff.h"
38 #include "mdoc.h"
39 #include "man.h"
40 #include "libmandoc.h"
41 #include "roff_int.h"
42 
43 #define	REPARSE_LIMIT	1000
44 
45 struct	mparse {
46 	struct roff_man	 *man; /* man parser */
47 	struct roff	 *roff; /* roff parser (!NULL) */
48 	char		 *sodest; /* filename pointed to by .so */
49 	const char	 *file; /* filename of current input file */
50 	struct buf	 *primary; /* buffer currently being parsed */
51 	struct buf	 *secondary; /* preprocessed copy of input */
52 	const char	 *defos; /* default operating system */
53 	mandocmsg	  mmsg; /* warning/error message handler */
54 	enum mandoclevel  file_status; /* status of current parse */
55 	enum mandoclevel  wlevel; /* ignore messages below this */
56 	int		  options; /* parser options */
57 	int		  gzip; /* current input file is gzipped */
58 	int		  filenc; /* encoding of the current file */
59 	int		  reparse_count; /* finite interp. stack */
60 	int		  line; /* line number in the file */
61 };
62 
63 static	void	  choose_parser(struct mparse *);
64 static	void	  resize_buf(struct buf *, size_t);
65 static	void	  mparse_buf_r(struct mparse *, struct buf, size_t, int);
66 static	int	  read_whole_file(struct mparse *, const char *, int,
67 				struct buf *, int *);
68 static	void	  mparse_end(struct mparse *);
69 static	void	  mparse_parse_buffer(struct mparse *, struct buf,
70 			const char *);
71 
72 static	const enum mandocerr	mandoclimits[MANDOCLEVEL_MAX] = {
73 	MANDOCERR_OK,
74 	MANDOCERR_WARNING,
75 	MANDOCERR_WARNING,
76 	MANDOCERR_ERROR,
77 	MANDOCERR_UNSUPP,
78 	MANDOCERR_MAX,
79 	MANDOCERR_MAX
80 };
81 
82 static	const char * const	mandocerrs[MANDOCERR_MAX] = {
83 	"ok",
84 
85 	"generic warning",
86 
87 	/* related to the prologue */
88 	"missing manual title, using UNTITLED",
89 	"missing manual title, using \"\"",
90 	"lower case character in document title",
91 	"missing manual section, using \"\"",
92 	"unknown manual section",
93 	"missing date, using today's date",
94 	"cannot parse date, using it verbatim",
95 	"missing Os macro, using \"\"",
96 	"duplicate prologue macro",
97 	"late prologue macro",
98 	"skipping late title macro",
99 	"prologue macros out of order",
100 
101 	/* related to document structure */
102 	".so is fragile, better use ln(1)",
103 	"no document body",
104 	"content before first section header",
105 	"first section is not \"NAME\"",
106 	"NAME section without name",
107 	"NAME section without description",
108 	"description not at the end of NAME",
109 	"bad NAME section content",
110 	"missing description line, using \"\"",
111 	"sections out of conventional order",
112 	"duplicate section title",
113 	"unexpected section",
114 	"unusual Xr order",
115 	"unusual Xr punctuation",
116 	"AUTHORS section without An macro",
117 
118 	/* related to macros and nesting */
119 	"obsolete macro",
120 	"macro neither callable nor escaped",
121 	"skipping paragraph macro",
122 	"moving paragraph macro out of list",
123 	"skipping no-space macro",
124 	"blocks badly nested",
125 	"nested displays are not portable",
126 	"moving content out of list",
127 	"fill mode already enabled, skipping",
128 	"fill mode already disabled, skipping",
129 	"line scope broken",
130 
131 	/* related to missing macro arguments */
132 	"skipping empty request",
133 	"conditional request controls empty scope",
134 	"skipping empty macro",
135 	"empty block",
136 	"empty argument, using 0n",
137 	"missing display type, using -ragged",
138 	"list type is not the first argument",
139 	"missing -width in -tag list, using 8n",
140 	"missing utility name, using \"\"",
141 	"missing function name, using \"\"",
142 	"empty head in list item",
143 	"empty list item",
144 	"missing font type, using \\fR",
145 	"unknown font type, using \\fR",
146 	"nothing follows prefix",
147 	"empty reference block",
148 	"missing -std argument, adding it",
149 	"missing option string, using \"\"",
150 	"missing resource identifier, using \"\"",
151 	"missing eqn box, using \"\"",
152 
153 	/* related to bad macro arguments */
154 	"unterminated quoted argument",
155 	"duplicate argument",
156 	"skipping duplicate argument",
157 	"skipping duplicate display type",
158 	"skipping duplicate list type",
159 	"skipping -width argument",
160 	"wrong number of cells",
161 	"unknown AT&T UNIX version",
162 	"comma in function argument",
163 	"parenthesis in function name",
164 	"invalid content in Rs block",
165 	"invalid Boolean argument",
166 	"unknown font, skipping request",
167 	"odd number of characters in request",
168 
169 	/* related to plain text */
170 	"blank line in fill mode, using .sp",
171 	"tab in filled text",
172 	"whitespace at end of input line",
173 	"bad comment style",
174 	"invalid escape sequence",
175 	"undefined string, using \"\"",
176 
177 	/* related to tables */
178 	"tbl line starts with span",
179 	"tbl column starts with span",
180 	"skipping vertical bar in tbl layout",
181 
182 	"generic error",
183 
184 	/* related to tables */
185 	"non-alphabetic character in tbl options",
186 	"skipping unknown tbl option",
187 	"missing tbl option argument",
188 	"wrong tbl option argument size",
189 	"empty tbl layout",
190 	"invalid character in tbl layout",
191 	"unmatched parenthesis in tbl layout",
192 	"tbl without any data cells",
193 	"ignoring data in spanned tbl cell",
194 	"ignoring extra tbl data cells",
195 	"data block open at end of tbl",
196 
197 	/* related to document structure and macros */
198 	NULL,
199 	"input stack limit exceeded, infinite loop?",
200 	"skipping bad character",
201 	"skipping unknown macro",
202 	"skipping insecure request",
203 	"skipping item outside list",
204 	"skipping column outside column list",
205 	"skipping end of block that is not open",
206 	"fewer RS blocks open, skipping",
207 	"inserting missing end of block",
208 	"appending missing end of block",
209 
210 	/* related to request and macro arguments */
211 	"escaped character not allowed in a name",
212 	"NOT IMPLEMENTED: Bd -file",
213 	"skipping display without arguments",
214 	"missing list type, using -item",
215 	"missing manual name, using \"\"",
216 	"uname(3) system call failed, using UNKNOWN",
217 	"unknown standard specifier",
218 	"skipping request without numeric argument",
219 	"NOT IMPLEMENTED: .so with absolute path or \"..\"",
220 	".so request failed",
221 	"skipping all arguments",
222 	"skipping excess arguments",
223 	"divide by zero",
224 
225 	"unsupported feature",
226 	"input too large",
227 	"unsupported control character",
228 	"unsupported roff request",
229 	"eqn delim option in tbl",
230 	"unsupported tbl layout modifier",
231 	"ignoring macro in table",
232 };
233 
234 static	const char * const	mandoclevels[MANDOCLEVEL_MAX] = {
235 	"SUCCESS",
236 	"RESERVED",
237 	"WARNING",
238 	"ERROR",
239 	"UNSUPP",
240 	"BADARG",
241 	"SYSERR"
242 };
243 
244 
245 static void
246 resize_buf(struct buf *buf, size_t initial)
247 {
248 
249 	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
250 	buf->buf = mandoc_realloc(buf->buf, buf->sz);
251 }
252 
253 static void
254 choose_parser(struct mparse *curp)
255 {
256 	char		*cp, *ep;
257 	int		 format;
258 
259 	/*
260 	 * If neither command line arguments -mdoc or -man select
261 	 * a parser nor the roff parser found a .Dd or .TH macro
262 	 * yet, look ahead in the main input buffer.
263 	 */
264 
265 	if ((format = roff_getformat(curp->roff)) == 0) {
266 		cp = curp->primary->buf;
267 		ep = cp + curp->primary->sz;
268 		while (cp < ep) {
269 			if (*cp == '.' || *cp == '\'') {
270 				cp++;
271 				if (cp[0] == 'D' && cp[1] == 'd') {
272 					format = MPARSE_MDOC;
273 					break;
274 				}
275 				if (cp[0] == 'T' && cp[1] == 'H') {
276 					format = MPARSE_MAN;
277 					break;
278 				}
279 			}
280 			cp = memchr(cp, '\n', ep - cp);
281 			if (cp == NULL)
282 				break;
283 			cp++;
284 		}
285 	}
286 
287 	if (format == MPARSE_MDOC) {
288 		mdoc_hash_init();
289 		curp->man->macroset = MACROSET_MDOC;
290 		curp->man->first->tok = TOKEN_NONE;
291 	} else {
292 		man_hash_init();
293 		curp->man->macroset = MACROSET_MAN;
294 		curp->man->first->tok = TOKEN_NONE;
295 	}
296 }
297 
298 /*
299  * Main parse routine for a buffer.
300  * It assumes encoding and line numbering are already set up.
301  * It can recurse directly (for invocations of user-defined
302  * macros, inline equations, and input line traps)
303  * and indirectly (for .so file inclusion).
304  */
305 static void
306 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
307 {
308 	const struct tbl_span	*span;
309 	struct buf	 ln;
310 	const char	*save_file;
311 	char		*cp;
312 	size_t		 pos; /* byte number in the ln buffer */
313 	enum rofferr	 rr;
314 	int		 of;
315 	int		 lnn; /* line number in the real file */
316 	int		 fd;
317 	unsigned char	 c;
318 
319 	memset(&ln, 0, sizeof(ln));
320 
321 	lnn = curp->line;
322 	pos = 0;
323 
324 	while (i < blk.sz) {
325 		if (0 == pos && '\0' == blk.buf[i])
326 			break;
327 
328 		if (start) {
329 			curp->line = lnn;
330 			curp->reparse_count = 0;
331 
332 			if (lnn < 3 &&
333 			    curp->filenc & MPARSE_UTF8 &&
334 			    curp->filenc & MPARSE_LATIN1)
335 				curp->filenc = preconv_cue(&blk, i);
336 		}
337 
338 		while (i < blk.sz && (start || blk.buf[i] != '\0')) {
339 
340 			/*
341 			 * When finding an unescaped newline character,
342 			 * leave the character loop to process the line.
343 			 * Skip a preceding carriage return, if any.
344 			 */
345 
346 			if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
347 			    '\n' == blk.buf[i + 1])
348 				++i;
349 			if ('\n' == blk.buf[i]) {
350 				++i;
351 				++lnn;
352 				break;
353 			}
354 
355 			/*
356 			 * Make sure we have space for the worst
357 			 * case of 11 bytes: "\\[u10ffff]\0"
358 			 */
359 
360 			if (pos + 11 > ln.sz)
361 				resize_buf(&ln, 256);
362 
363 			/*
364 			 * Encode 8-bit input.
365 			 */
366 
367 			c = blk.buf[i];
368 			if (c & 0x80) {
369 				if ( ! (curp->filenc && preconv_encode(
370 				    &blk, &i, &ln, &pos, &curp->filenc))) {
371 					mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
372 					    curp->line, pos, "0x%x", c);
373 					ln.buf[pos++] = '?';
374 					i++;
375 				}
376 				continue;
377 			}
378 
379 			/*
380 			 * Exclude control characters.
381 			 */
382 
383 			if (c == 0x7f || (c < 0x20 && c != 0x09)) {
384 				mandoc_vmsg(c == 0x00 || c == 0x04 ||
385 				    c > 0x0a ? MANDOCERR_CHAR_BAD :
386 				    MANDOCERR_CHAR_UNSUPP,
387 				    curp, curp->line, pos, "0x%x", c);
388 				i++;
389 				if (c != '\r')
390 					ln.buf[pos++] = '?';
391 				continue;
392 			}
393 
394 			/* Trailing backslash = a plain char. */
395 
396 			if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
397 				ln.buf[pos++] = blk.buf[i++];
398 				continue;
399 			}
400 
401 			/*
402 			 * Found escape and at least one other character.
403 			 * When it's a newline character, skip it.
404 			 * When there is a carriage return in between,
405 			 * skip that one as well.
406 			 */
407 
408 			if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
409 			    '\n' == blk.buf[i + 2])
410 				++i;
411 			if ('\n' == blk.buf[i + 1]) {
412 				i += 2;
413 				++lnn;
414 				continue;
415 			}
416 
417 			if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
418 				i += 2;
419 				/* Comment, skip to end of line */
420 				for (; i < blk.sz; ++i) {
421 					if ('\n' == blk.buf[i]) {
422 						++i;
423 						++lnn;
424 						break;
425 					}
426 				}
427 
428 				/* Backout trailing whitespaces */
429 				for (; pos > 0; --pos) {
430 					if (ln.buf[pos - 1] != ' ')
431 						break;
432 					if (pos > 2 && ln.buf[pos - 2] == '\\')
433 						break;
434 				}
435 				break;
436 			}
437 
438 			/* Catch escaped bogus characters. */
439 
440 			c = (unsigned char) blk.buf[i+1];
441 
442 			if ( ! (isascii(c) &&
443 			    (isgraph(c) || isblank(c)))) {
444 				mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
445 				    curp->line, pos, "0x%x", c);
446 				i += 2;
447 				ln.buf[pos++] = '?';
448 				continue;
449 			}
450 
451 			/* Some other escape sequence, copy & cont. */
452 
453 			ln.buf[pos++] = blk.buf[i++];
454 			ln.buf[pos++] = blk.buf[i++];
455 		}
456 
457 		if (pos >= ln.sz)
458 			resize_buf(&ln, 256);
459 
460 		ln.buf[pos] = '\0';
461 
462 		/*
463 		 * A significant amount of complexity is contained by
464 		 * the roff preprocessor.  It's line-oriented but can be
465 		 * expressed on one line, so we need at times to
466 		 * readjust our starting point and re-run it.  The roff
467 		 * preprocessor can also readjust the buffers with new
468 		 * data, so we pass them in wholesale.
469 		 */
470 
471 		of = 0;
472 
473 		/*
474 		 * Maintain a lookaside buffer of all parsed lines.  We
475 		 * only do this if mparse_keep() has been invoked (the
476 		 * buffer may be accessed with mparse_getkeep()).
477 		 */
478 
479 		if (curp->secondary) {
480 			curp->secondary->buf = mandoc_realloc(
481 			    curp->secondary->buf,
482 			    curp->secondary->sz + pos + 2);
483 			memcpy(curp->secondary->buf +
484 			    curp->secondary->sz,
485 			    ln.buf, pos);
486 			curp->secondary->sz += pos;
487 			curp->secondary->buf
488 				[curp->secondary->sz] = '\n';
489 			curp->secondary->sz++;
490 			curp->secondary->buf
491 				[curp->secondary->sz] = '\0';
492 		}
493 rerun:
494 		rr = roff_parseln(curp->roff, curp->line, &ln, &of);
495 
496 		switch (rr) {
497 		case ROFF_REPARSE:
498 			if (REPARSE_LIMIT >= ++curp->reparse_count)
499 				mparse_buf_r(curp, ln, of, 0);
500 			else
501 				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
502 				    curp->line, pos, NULL);
503 			pos = 0;
504 			continue;
505 		case ROFF_APPEND:
506 			pos = strlen(ln.buf);
507 			continue;
508 		case ROFF_RERUN:
509 			goto rerun;
510 		case ROFF_IGN:
511 			pos = 0;
512 			continue;
513 		case ROFF_SO:
514 			if ( ! (curp->options & MPARSE_SO) &&
515 			    (i >= blk.sz || blk.buf[i] == '\0')) {
516 				curp->sodest = mandoc_strdup(ln.buf + of);
517 				free(ln.buf);
518 				return;
519 			}
520 			/*
521 			 * We remove `so' clauses from our lookaside
522 			 * buffer because we're going to descend into
523 			 * the file recursively.
524 			 */
525 			if (curp->secondary)
526 				curp->secondary->sz -= pos + 1;
527 			save_file = curp->file;
528 			if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
529 				mparse_readfd(curp, fd, ln.buf + of);
530 				close(fd);
531 				curp->file = save_file;
532 			} else {
533 				curp->file = save_file;
534 				mandoc_vmsg(MANDOCERR_SO_FAIL,
535 				    curp, curp->line, pos,
536 				    ".so %s", ln.buf + of);
537 				ln.sz = mandoc_asprintf(&cp,
538 				    ".sp\nSee the file %s.\n.sp",
539 				    ln.buf + of);
540 				free(ln.buf);
541 				ln.buf = cp;
542 				of = 0;
543 				mparse_buf_r(curp, ln, of, 0);
544 			}
545 			pos = 0;
546 			continue;
547 		default:
548 			break;
549 		}
550 
551 		if (curp->man->macroset == MACROSET_NONE)
552 			choose_parser(curp);
553 
554 		/*
555 		 * Lastly, push down into the parsers themselves.
556 		 * If libroff returns ROFF_TBL, then add it to the
557 		 * currently open parse.  Since we only get here if
558 		 * there does exist data (see tbl_data.c), we're
559 		 * guaranteed that something's been allocated.
560 		 * Do the same for ROFF_EQN.
561 		 */
562 
563 		if (rr == ROFF_TBL)
564 			while ((span = roff_span(curp->roff)) != NULL)
565 				roff_addtbl(curp->man, span);
566 		else if (rr == ROFF_EQN)
567 			roff_addeqn(curp->man, roff_eqn(curp->roff));
568 		else if ((curp->man->macroset == MACROSET_MDOC ?
569 		    mdoc_parseln(curp->man, curp->line, ln.buf, of) :
570 		    man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
571 				break;
572 
573 		/* Temporary buffers typically are not full. */
574 
575 		if (0 == start && '\0' == blk.buf[i])
576 			break;
577 
578 		/* Start the next input line. */
579 
580 		pos = 0;
581 	}
582 
583 	free(ln.buf);
584 }
585 
586 static int
587 read_whole_file(struct mparse *curp, const char *file, int fd,
588 		struct buf *fb, int *with_mmap)
589 {
590 	struct stat	 st;
591 	gzFile		 gz;
592 	size_t		 off;
593 	ssize_t		 ssz;
594 
595 	if (fstat(fd, &st) == -1)
596 		err((int)MANDOCLEVEL_SYSERR, "%s", file);
597 
598 	/*
599 	 * If we're a regular file, try just reading in the whole entry
600 	 * via mmap().  This is faster than reading it into blocks, and
601 	 * since each file is only a few bytes to begin with, I'm not
602 	 * concerned that this is going to tank any machines.
603 	 */
604 
605 	if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
606 		if (st.st_size > 0x7fffffff) {
607 			mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
608 			return 0;
609 		}
610 		*with_mmap = 1;
611 		fb->sz = (size_t)st.st_size;
612 		fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
613 		if (fb->buf != MAP_FAILED)
614 			return 1;
615 	}
616 
617 	if (curp->gzip) {
618 		if ((gz = gzdopen(fd, "rb")) == NULL)
619 			err((int)MANDOCLEVEL_SYSERR, "%s", file);
620 	} else
621 		gz = NULL;
622 
623 	/*
624 	 * If this isn't a regular file (like, say, stdin), then we must
625 	 * go the old way and just read things in bit by bit.
626 	 */
627 
628 	*with_mmap = 0;
629 	off = 0;
630 	fb->sz = 0;
631 	fb->buf = NULL;
632 	for (;;) {
633 		if (off == fb->sz) {
634 			if (fb->sz == (1U << 31)) {
635 				mandoc_msg(MANDOCERR_TOOLARGE, curp,
636 				    0, 0, NULL);
637 				break;
638 			}
639 			resize_buf(fb, 65536);
640 		}
641 		ssz = curp->gzip ?
642 		    gzread(gz, fb->buf + (int)off, fb->sz - off) :
643 		    read(fd, fb->buf + (int)off, fb->sz - off);
644 		if (ssz == 0) {
645 			fb->sz = off;
646 			return 1;
647 		}
648 		if (ssz == -1)
649 			err((int)MANDOCLEVEL_SYSERR, "%s", file);
650 		off += (size_t)ssz;
651 	}
652 
653 	free(fb->buf);
654 	fb->buf = NULL;
655 	return 0;
656 }
657 
658 static void
659 mparse_end(struct mparse *curp)
660 {
661 	if (curp->man->macroset == MACROSET_NONE)
662 		curp->man->macroset = MACROSET_MAN;
663 	if (curp->man->macroset == MACROSET_MDOC)
664 		mdoc_endparse(curp->man);
665 	else
666 		man_endparse(curp->man);
667 	roff_endparse(curp->roff);
668 }
669 
670 static void
671 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
672 {
673 	struct buf	*svprimary;
674 	const char	*svfile;
675 	size_t		 offset;
676 	static int	 recursion_depth;
677 
678 	if (64 < recursion_depth) {
679 		mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
680 		return;
681 	}
682 
683 	/* Line number is per-file. */
684 	svfile = curp->file;
685 	curp->file = file;
686 	svprimary = curp->primary;
687 	curp->primary = &blk;
688 	curp->line = 1;
689 	recursion_depth++;
690 
691 	/* Skip an UTF-8 byte order mark. */
692 	if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
693 	    (unsigned char)blk.buf[0] == 0xef &&
694 	    (unsigned char)blk.buf[1] == 0xbb &&
695 	    (unsigned char)blk.buf[2] == 0xbf) {
696 		offset = 3;
697 		curp->filenc &= ~MPARSE_LATIN1;
698 	} else
699 		offset = 0;
700 
701 	mparse_buf_r(curp, blk, offset, 1);
702 
703 	if (--recursion_depth == 0)
704 		mparse_end(curp);
705 
706 	curp->primary = svprimary;
707 	curp->file = svfile;
708 }
709 
710 /*
711  * Read the whole file into memory and call the parsers.
712  * Called recursively when an .so request is encountered.
713  */
714 enum mandoclevel
715 mparse_readfd(struct mparse *curp, int fd, const char *file)
716 {
717 	struct buf	 blk;
718 	int		 with_mmap;
719 	int		 save_filenc;
720 
721 	if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
722 		save_filenc = curp->filenc;
723 		curp->filenc = curp->options &
724 		    (MPARSE_UTF8 | MPARSE_LATIN1);
725 		mparse_parse_buffer(curp, blk, file);
726 		curp->filenc = save_filenc;
727 		if (with_mmap)
728 			munmap(blk.buf, blk.sz);
729 		else
730 			free(blk.buf);
731 	}
732 	return curp->file_status;
733 }
734 
735 int
736 mparse_open(struct mparse *curp, const char *file)
737 {
738 	char		 *cp;
739 	int		  fd;
740 
741 	curp->file = file;
742 	cp = strrchr(file, '.');
743 	curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
744 
745 	/* First try to use the filename as it is. */
746 
747 	if ((fd = open(file, O_RDONLY)) != -1)
748 		return fd;
749 
750 	/*
751 	 * If that doesn't work and the filename doesn't
752 	 * already  end in .gz, try appending .gz.
753 	 */
754 
755 	if ( ! curp->gzip) {
756 		mandoc_asprintf(&cp, "%s.gz", file);
757 		fd = open(cp, O_RDONLY);
758 		free(cp);
759 		if (fd != -1) {
760 			curp->gzip = 1;
761 			return fd;
762 		}
763 	}
764 
765 	/* Neither worked, give up. */
766 
767 	mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
768 	return -1;
769 }
770 
771 struct mparse *
772 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
773     const char *defos)
774 {
775 	struct mparse	*curp;
776 
777 	curp = mandoc_calloc(1, sizeof(struct mparse));
778 
779 	curp->options = options;
780 	curp->wlevel = wlevel;
781 	curp->mmsg = mmsg;
782 	curp->defos = defos;
783 
784 	curp->roff = roff_alloc(curp, options);
785 	curp->man = roff_man_alloc( curp->roff, curp, curp->defos,
786 		curp->options & MPARSE_QUICK ? 1 : 0);
787 	if (curp->options & MPARSE_MDOC) {
788 		mdoc_hash_init();
789 		curp->man->macroset = MACROSET_MDOC;
790 	} else if (curp->options & MPARSE_MAN) {
791 		man_hash_init();
792 		curp->man->macroset = MACROSET_MAN;
793 	}
794 	curp->man->first->tok = TOKEN_NONE;
795 	return curp;
796 }
797 
798 void
799 mparse_reset(struct mparse *curp)
800 {
801 	roff_reset(curp->roff);
802 	roff_man_reset(curp->man);
803 	if (curp->secondary)
804 		curp->secondary->sz = 0;
805 
806 	curp->file_status = MANDOCLEVEL_OK;
807 
808 	free(curp->sodest);
809 	curp->sodest = NULL;
810 }
811 
812 void
813 mparse_free(struct mparse *curp)
814 {
815 
816 	roff_man_free(curp->man);
817 	if (curp->roff)
818 		roff_free(curp->roff);
819 	if (curp->secondary)
820 		free(curp->secondary->buf);
821 
822 	free(curp->secondary);
823 	free(curp->sodest);
824 	free(curp);
825 }
826 
827 void
828 mparse_result(struct mparse *curp, struct roff_man **man,
829 	char **sodest)
830 {
831 
832 	if (sodest && NULL != (*sodest = curp->sodest)) {
833 		*man = NULL;
834 		return;
835 	}
836 	if (man)
837 		*man = curp->man;
838 }
839 
840 void
841 mandoc_vmsg(enum mandocerr t, struct mparse *m,
842 		int ln, int pos, const char *fmt, ...)
843 {
844 	char		 buf[256];
845 	va_list		 ap;
846 
847 	va_start(ap, fmt);
848 	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
849 	va_end(ap);
850 
851 	mandoc_msg(t, m, ln, pos, buf);
852 }
853 
854 void
855 mandoc_msg(enum mandocerr er, struct mparse *m,
856 		int ln, int col, const char *msg)
857 {
858 	enum mandoclevel level;
859 
860 	level = MANDOCLEVEL_UNSUPP;
861 	while (er < mandoclimits[level])
862 		level--;
863 
864 	if (level < m->wlevel && er != MANDOCERR_FILE)
865 		return;
866 
867 	if (m->mmsg)
868 		(*m->mmsg)(er, level, m->file, ln, col, msg);
869 
870 	if (m->file_status < level)
871 		m->file_status = level;
872 }
873 
874 const char *
875 mparse_strerror(enum mandocerr er)
876 {
877 
878 	return mandocerrs[er];
879 }
880 
881 const char *
882 mparse_strlevel(enum mandoclevel lvl)
883 {
884 	return mandoclevels[lvl];
885 }
886 
887 void
888 mparse_keep(struct mparse *p)
889 {
890 
891 	assert(NULL == p->secondary);
892 	p->secondary = mandoc_calloc(1, sizeof(struct buf));
893 }
894 
895 const char *
896 mparse_getkeep(const struct mparse *p)
897 {
898 
899 	assert(p->secondary);
900 	return p->secondary->sz ? p->secondary->buf : NULL;
901 }
902