xref: /openbsd-src/usr.bin/mandoc/read.c (revision 6c6408334dbede3a2c0dcd9ff9c489157df0c856)
1 /*	$OpenBSD: read.c,v 1.166 2018/02/23 21:34:37 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include <sys/types.h>
20 #include <sys/mman.h>
21 #include <sys/stat.h>
22 
23 #include <assert.h>
24 #include <ctype.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <stdarg.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32 #include <zlib.h>
33 
34 #include "mandoc_aux.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mdoc.h"
38 #include "man.h"
39 #include "libmandoc.h"
40 
41 #define	REPARSE_LIMIT	1000
42 
43 struct	mparse {
44 	struct roff	 *roff; /* roff parser (!NULL) */
45 	struct roff_man	 *man; /* man parser */
46 	char		 *sodest; /* filename pointed to by .so */
47 	const char	 *file; /* filename of current input file */
48 	struct buf	 *primary; /* buffer currently being parsed */
49 	struct buf	 *secondary; /* preprocessed copy of input */
50 	const char	 *os_s; /* default operating system */
51 	mandocmsg	  mmsg; /* warning/error message handler */
52 	enum mandoclevel  file_status; /* status of current parse */
53 	enum mandocerr	  mmin; /* ignore messages below this */
54 	int		  options; /* parser options */
55 	int		  gzip; /* current input file is gzipped */
56 	int		  filenc; /* encoding of the current file */
57 	int		  reparse_count; /* finite interp. stack */
58 	int		  line; /* line number in the file */
59 };
60 
61 static	void	  choose_parser(struct mparse *);
62 static	void	  resize_buf(struct buf *, size_t);
63 static	int	  mparse_buf_r(struct mparse *, struct buf, size_t, int);
64 static	int	  read_whole_file(struct mparse *, const char *, int,
65 				struct buf *, int *);
66 static	void	  mparse_end(struct mparse *);
67 static	void	  mparse_parse_buffer(struct mparse *, struct buf,
68 			const char *);
69 
70 static	const enum mandocerr	mandoclimits[MANDOCLEVEL_MAX] = {
71 	MANDOCERR_OK,
72 	MANDOCERR_OK,
73 	MANDOCERR_WARNING,
74 	MANDOCERR_ERROR,
75 	MANDOCERR_UNSUPP,
76 	MANDOCERR_MAX,
77 	MANDOCERR_MAX
78 };
79 
80 static	const char * const	mandocerrs[MANDOCERR_MAX] = {
81 	"ok",
82 
83 	"base system convention",
84 
85 	"Mdocdate found",
86 	"Mdocdate missing",
87 	"unknown architecture",
88 	"operating system explicitly specified",
89 	"RCS id missing",
90 	"referenced manual not found",
91 
92 	"generic style suggestion",
93 
94 	"legacy man(7) date format",
95 	"lower case character in document title",
96 	"duplicate RCS id",
97 	"possible typo in section name",
98 	"unterminated quoted argument",
99 	"useless macro",
100 	"consider using OS macro",
101 	"errnos out of order",
102 	"duplicate errno",
103 	"trailing delimiter",
104 	"no blank before trailing delimiter",
105 	"fill mode already enabled, skipping",
106 	"fill mode already disabled, skipping",
107 	"function name without markup",
108 	"whitespace at end of input line",
109 	"bad comment style",
110 
111 	"generic warning",
112 
113 	/* related to the prologue */
114 	"missing manual title, using UNTITLED",
115 	"missing manual title, using \"\"",
116 	"missing manual section, using \"\"",
117 	"unknown manual section",
118 	"missing date, using today's date",
119 	"cannot parse date, using it verbatim",
120 	"date in the future, using it anyway",
121 	"missing Os macro, using \"\"",
122 	"late prologue macro",
123 	"prologue macros out of order",
124 
125 	/* related to document structure */
126 	".so is fragile, better use ln(1)",
127 	"no document body",
128 	"content before first section header",
129 	"first section is not \"NAME\"",
130 	"NAME section without Nm before Nd",
131 	"NAME section without description",
132 	"description not at the end of NAME",
133 	"bad NAME section content",
134 	"missing comma before name",
135 	"missing description line, using \"\"",
136 	"description line outside NAME section",
137 	"sections out of conventional order",
138 	"duplicate section title",
139 	"unexpected section",
140 	"cross reference to self",
141 	"unusual Xr order",
142 	"unusual Xr punctuation",
143 	"AUTHORS section without An macro",
144 
145 	/* related to macros and nesting */
146 	"obsolete macro",
147 	"macro neither callable nor escaped",
148 	"skipping paragraph macro",
149 	"moving paragraph macro out of list",
150 	"skipping no-space macro",
151 	"blocks badly nested",
152 	"nested displays are not portable",
153 	"moving content out of list",
154 	"first macro on line",
155 	"line scope broken",
156 	"skipping blank line in line scope",
157 
158 	/* related to missing macro arguments */
159 	"skipping empty request",
160 	"conditional request controls empty scope",
161 	"skipping empty macro",
162 	"empty block",
163 	"empty argument, using 0n",
164 	"missing display type, using -ragged",
165 	"list type is not the first argument",
166 	"missing -width in -tag list, using 6n",
167 	"missing utility name, using \"\"",
168 	"missing function name, using \"\"",
169 	"empty head in list item",
170 	"empty list item",
171 	"missing argument, using next line",
172 	"missing font type, using \\fR",
173 	"unknown font type, using \\fR",
174 	"nothing follows prefix",
175 	"empty reference block",
176 	"missing section argument",
177 	"missing -std argument, adding it",
178 	"missing option string, using \"\"",
179 	"missing resource identifier, using \"\"",
180 	"missing eqn box, using \"\"",
181 
182 	/* related to bad macro arguments */
183 	"duplicate argument",
184 	"skipping duplicate argument",
185 	"skipping duplicate display type",
186 	"skipping duplicate list type",
187 	"skipping -width argument",
188 	"wrong number of cells",
189 	"unknown AT&T UNIX version",
190 	"comma in function argument",
191 	"parenthesis in function name",
192 	"unknown library name",
193 	"invalid content in Rs block",
194 	"invalid Boolean argument",
195 	"unknown font, skipping request",
196 	"odd number of characters in request",
197 
198 	/* related to plain text */
199 	"blank line in fill mode, using .sp",
200 	"tab in filled text",
201 	"new sentence, new line",
202 	"invalid escape sequence",
203 	"undefined string, using \"\"",
204 
205 	/* related to tables */
206 	"tbl line starts with span",
207 	"tbl column starts with span",
208 	"skipping vertical bar in tbl layout",
209 
210 	"generic error",
211 
212 	/* related to tables */
213 	"non-alphabetic character in tbl options",
214 	"skipping unknown tbl option",
215 	"missing tbl option argument",
216 	"wrong tbl option argument size",
217 	"empty tbl layout",
218 	"invalid character in tbl layout",
219 	"unmatched parenthesis in tbl layout",
220 	"tbl without any data cells",
221 	"ignoring data in spanned tbl cell",
222 	"ignoring extra tbl data cells",
223 	"data block open at end of tbl",
224 
225 	/* related to document structure and macros */
226 	NULL,
227 	"duplicate prologue macro",
228 	"skipping late title macro",
229 	"input stack limit exceeded, infinite loop?",
230 	"skipping bad character",
231 	"skipping unknown macro",
232 	"skipping insecure request",
233 	"skipping item outside list",
234 	"skipping column outside column list",
235 	"skipping end of block that is not open",
236 	"fewer RS blocks open, skipping",
237 	"inserting missing end of block",
238 	"appending missing end of block",
239 
240 	/* related to request and macro arguments */
241 	"escaped character not allowed in a name",
242 	"NOT IMPLEMENTED: Bd -file",
243 	"skipping display without arguments",
244 	"missing list type, using -item",
245 	"argument is not numeric, using 1",
246 	"missing manual name, using \"\"",
247 	"uname(3) system call failed, using UNKNOWN",
248 	"unknown standard specifier",
249 	"skipping request without numeric argument",
250 	"NOT IMPLEMENTED: .so with absolute path or \"..\"",
251 	".so request failed",
252 	"skipping all arguments",
253 	"skipping excess arguments",
254 	"divide by zero",
255 
256 	"unsupported feature",
257 	"input too large",
258 	"unsupported control character",
259 	"unsupported roff request",
260 	"eqn delim option in tbl",
261 	"unsupported tbl layout modifier",
262 	"ignoring macro in table",
263 };
264 
265 static	const char * const	mandoclevels[MANDOCLEVEL_MAX] = {
266 	"SUCCESS",
267 	"STYLE",
268 	"WARNING",
269 	"ERROR",
270 	"UNSUPP",
271 	"BADARG",
272 	"SYSERR"
273 };
274 
275 
276 static void
277 resize_buf(struct buf *buf, size_t initial)
278 {
279 
280 	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
281 	buf->buf = mandoc_realloc(buf->buf, buf->sz);
282 }
283 
284 static void
285 choose_parser(struct mparse *curp)
286 {
287 	char		*cp, *ep;
288 	int		 format;
289 
290 	/*
291 	 * If neither command line arguments -mdoc or -man select
292 	 * a parser nor the roff parser found a .Dd or .TH macro
293 	 * yet, look ahead in the main input buffer.
294 	 */
295 
296 	if ((format = roff_getformat(curp->roff)) == 0) {
297 		cp = curp->primary->buf;
298 		ep = cp + curp->primary->sz;
299 		while (cp < ep) {
300 			if (*cp == '.' || *cp == '\'') {
301 				cp++;
302 				if (cp[0] == 'D' && cp[1] == 'd') {
303 					format = MPARSE_MDOC;
304 					break;
305 				}
306 				if (cp[0] == 'T' && cp[1] == 'H') {
307 					format = MPARSE_MAN;
308 					break;
309 				}
310 			}
311 			cp = memchr(cp, '\n', ep - cp);
312 			if (cp == NULL)
313 				break;
314 			cp++;
315 		}
316 	}
317 
318 	if (format == MPARSE_MDOC) {
319 		curp->man->macroset = MACROSET_MDOC;
320 		if (curp->man->mdocmac == NULL)
321 			curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
322 	} else {
323 		curp->man->macroset = MACROSET_MAN;
324 		if (curp->man->manmac == NULL)
325 			curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
326 	}
327 	curp->man->first->tok = TOKEN_NONE;
328 }
329 
330 /*
331  * Main parse routine for a buffer.
332  * It assumes encoding and line numbering are already set up.
333  * It can recurse directly (for invocations of user-defined
334  * macros, inline equations, and input line traps)
335  * and indirectly (for .so file inclusion).
336  */
337 static int
338 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
339 {
340 	struct buf	 ln;
341 	const char	*save_file;
342 	char		*cp;
343 	size_t		 pos; /* byte number in the ln buffer */
344 	enum rofferr	 rr;
345 	int		 of;
346 	int		 lnn; /* line number in the real file */
347 	int		 fd;
348 	unsigned char	 c;
349 
350 	memset(&ln, 0, sizeof(ln));
351 
352 	lnn = curp->line;
353 	pos = 0;
354 
355 	while (i < blk.sz) {
356 		if (0 == pos && '\0' == blk.buf[i])
357 			break;
358 
359 		if (start) {
360 			curp->line = lnn;
361 			curp->reparse_count = 0;
362 
363 			if (lnn < 3 &&
364 			    curp->filenc & MPARSE_UTF8 &&
365 			    curp->filenc & MPARSE_LATIN1)
366 				curp->filenc = preconv_cue(&blk, i);
367 		}
368 
369 		while (i < blk.sz && (start || blk.buf[i] != '\0')) {
370 
371 			/*
372 			 * When finding an unescaped newline character,
373 			 * leave the character loop to process the line.
374 			 * Skip a preceding carriage return, if any.
375 			 */
376 
377 			if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
378 			    '\n' == blk.buf[i + 1])
379 				++i;
380 			if ('\n' == blk.buf[i]) {
381 				++i;
382 				++lnn;
383 				break;
384 			}
385 
386 			/*
387 			 * Make sure we have space for the worst
388 			 * case of 11 bytes: "\\[u10ffff]\0"
389 			 */
390 
391 			if (pos + 11 > ln.sz)
392 				resize_buf(&ln, 256);
393 
394 			/*
395 			 * Encode 8-bit input.
396 			 */
397 
398 			c = blk.buf[i];
399 			if (c & 0x80) {
400 				if ( ! (curp->filenc && preconv_encode(
401 				    &blk, &i, &ln, &pos, &curp->filenc))) {
402 					mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
403 					    curp->line, pos, "0x%x", c);
404 					ln.buf[pos++] = '?';
405 					i++;
406 				}
407 				continue;
408 			}
409 
410 			/*
411 			 * Exclude control characters.
412 			 */
413 
414 			if (c == 0x7f || (c < 0x20 && c != 0x09)) {
415 				mandoc_vmsg(c == 0x00 || c == 0x04 ||
416 				    c > 0x0a ? MANDOCERR_CHAR_BAD :
417 				    MANDOCERR_CHAR_UNSUPP,
418 				    curp, curp->line, pos, "0x%x", c);
419 				i++;
420 				if (c != '\r')
421 					ln.buf[pos++] = '?';
422 				continue;
423 			}
424 
425 			ln.buf[pos++] = blk.buf[i++];
426 		}
427 
428 		if (pos + 1 >= ln.sz)
429 			resize_buf(&ln, 256);
430 
431 		if (i == blk.sz || blk.buf[i] == '\0')
432 			ln.buf[pos++] = '\n';
433 		ln.buf[pos] = '\0';
434 
435 		/*
436 		 * A significant amount of complexity is contained by
437 		 * the roff preprocessor.  It's line-oriented but can be
438 		 * expressed on one line, so we need at times to
439 		 * readjust our starting point and re-run it.  The roff
440 		 * preprocessor can also readjust the buffers with new
441 		 * data, so we pass them in wholesale.
442 		 */
443 
444 		of = 0;
445 
446 		/*
447 		 * Maintain a lookaside buffer of all parsed lines.  We
448 		 * only do this if mparse_keep() has been invoked (the
449 		 * buffer may be accessed with mparse_getkeep()).
450 		 */
451 
452 		if (curp->secondary) {
453 			curp->secondary->buf = mandoc_realloc(
454 			    curp->secondary->buf,
455 			    curp->secondary->sz + pos + 2);
456 			memcpy(curp->secondary->buf +
457 			    curp->secondary->sz,
458 			    ln.buf, pos);
459 			curp->secondary->sz += pos;
460 			curp->secondary->buf
461 				[curp->secondary->sz] = '\n';
462 			curp->secondary->sz++;
463 			curp->secondary->buf
464 				[curp->secondary->sz] = '\0';
465 		}
466 rerun:
467 		rr = roff_parseln(curp->roff, curp->line, &ln, &of);
468 
469 		switch (rr) {
470 		case ROFF_REPARSE:
471 			if (++curp->reparse_count > REPARSE_LIMIT)
472 				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
473 				    curp->line, pos, NULL);
474 			else if (mparse_buf_r(curp, ln, of, 0) == 1 ||
475 			    start == 1) {
476 				pos = 0;
477 				continue;
478 			}
479 			free(ln.buf);
480 			return 0;
481 		case ROFF_APPEND:
482 			pos = strlen(ln.buf);
483 			continue;
484 		case ROFF_RERUN:
485 			goto rerun;
486 		case ROFF_IGN:
487 			pos = 0;
488 			continue;
489 		case ROFF_SO:
490 			if ( ! (curp->options & MPARSE_SO) &&
491 			    (i >= blk.sz || blk.buf[i] == '\0')) {
492 				curp->sodest = mandoc_strdup(ln.buf + of);
493 				free(ln.buf);
494 				return 1;
495 			}
496 			/*
497 			 * We remove `so' clauses from our lookaside
498 			 * buffer because we're going to descend into
499 			 * the file recursively.
500 			 */
501 			if (curp->secondary)
502 				curp->secondary->sz -= pos + 1;
503 			save_file = curp->file;
504 			if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
505 				mparse_readfd(curp, fd, ln.buf + of);
506 				close(fd);
507 				curp->file = save_file;
508 			} else {
509 				curp->file = save_file;
510 				mandoc_vmsg(MANDOCERR_SO_FAIL,
511 				    curp, curp->line, pos,
512 				    ".so %s", ln.buf + of);
513 				ln.sz = mandoc_asprintf(&cp,
514 				    ".sp\nSee the file %s.\n.sp",
515 				    ln.buf + of);
516 				free(ln.buf);
517 				ln.buf = cp;
518 				of = 0;
519 				mparse_buf_r(curp, ln, of, 0);
520 			}
521 			pos = 0;
522 			continue;
523 		default:
524 			break;
525 		}
526 
527 		if (curp->man->macroset == MACROSET_NONE)
528 			choose_parser(curp);
529 
530 		if ((curp->man->macroset == MACROSET_MDOC ?
531 		    mdoc_parseln(curp->man, curp->line, ln.buf, of) :
532 		    man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
533 				break;
534 
535 		/* Temporary buffers typically are not full. */
536 
537 		if (0 == start && '\0' == blk.buf[i])
538 			break;
539 
540 		/* Start the next input line. */
541 
542 		pos = 0;
543 	}
544 
545 	free(ln.buf);
546 	return 1;
547 }
548 
549 static int
550 read_whole_file(struct mparse *curp, const char *file, int fd,
551 		struct buf *fb, int *with_mmap)
552 {
553 	struct stat	 st;
554 	gzFile		 gz;
555 	size_t		 off;
556 	ssize_t		 ssz;
557 	int		 gzerrnum, retval;
558 
559 	if (fstat(fd, &st) == -1) {
560 		mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
561 		    "fstat: %s", strerror(errno));
562 		return 0;
563 	}
564 
565 	/*
566 	 * If we're a regular file, try just reading in the whole entry
567 	 * via mmap().  This is faster than reading it into blocks, and
568 	 * since each file is only a few bytes to begin with, I'm not
569 	 * concerned that this is going to tank any machines.
570 	 */
571 
572 	if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
573 		if (st.st_size > 0x7fffffff) {
574 			mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
575 			return 0;
576 		}
577 		*with_mmap = 1;
578 		fb->sz = (size_t)st.st_size;
579 		fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
580 		if (fb->buf != MAP_FAILED)
581 			return 1;
582 	}
583 
584 	if (curp->gzip) {
585 		/*
586 		 * Duplicating the file descriptor is required
587 		 * because we will have to call gzclose(3)
588 		 * to free memory used internally by zlib,
589 		 * but that will also close the file descriptor,
590 		 * which this function must not do.
591 		 */
592 		if ((fd = dup(fd)) == -1) {
593 			mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
594 			    "dup: %s", strerror(errno));
595 			return 0;
596 		}
597 		if ((gz = gzdopen(fd, "rb")) == NULL) {
598 			mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
599 			    "gzdopen: %s", strerror(errno));
600 			close(fd);
601 			return 0;
602 		}
603 	} else
604 		gz = NULL;
605 
606 	/*
607 	 * If this isn't a regular file (like, say, stdin), then we must
608 	 * go the old way and just read things in bit by bit.
609 	 */
610 
611 	*with_mmap = 0;
612 	off = 0;
613 	retval = 0;
614 	fb->sz = 0;
615 	fb->buf = NULL;
616 	for (;;) {
617 		if (off == fb->sz) {
618 			if (fb->sz == (1U << 31)) {
619 				mandoc_msg(MANDOCERR_TOOLARGE, curp,
620 				    0, 0, NULL);
621 				break;
622 			}
623 			resize_buf(fb, 65536);
624 		}
625 		ssz = curp->gzip ?
626 		    gzread(gz, fb->buf + (int)off, fb->sz - off) :
627 		    read(fd, fb->buf + (int)off, fb->sz - off);
628 		if (ssz == 0) {
629 			fb->sz = off;
630 			retval = 1;
631 			break;
632 		}
633 		if (ssz == -1) {
634 			if (curp->gzip)
635 				(void)gzerror(gz, &gzerrnum);
636 			mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "read: %s",
637 			    curp->gzip && gzerrnum != Z_ERRNO ?
638 			    zError(gzerrnum) : strerror(errno));
639 			break;
640 		}
641 		off += (size_t)ssz;
642 	}
643 
644 	if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK)
645 		mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "gzclose: %s",
646 		    gzerrnum == Z_ERRNO ? strerror(errno) :
647 		    zError(gzerrnum));
648 	if (retval == 0) {
649 		free(fb->buf);
650 		fb->buf = NULL;
651 	}
652 	return retval;
653 }
654 
655 static void
656 mparse_end(struct mparse *curp)
657 {
658 	if (curp->man->macroset == MACROSET_NONE)
659 		curp->man->macroset = MACROSET_MAN;
660 	if (curp->man->macroset == MACROSET_MDOC)
661 		mdoc_endparse(curp->man);
662 	else
663 		man_endparse(curp->man);
664 	roff_endparse(curp->roff);
665 }
666 
667 static void
668 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
669 {
670 	struct buf	*svprimary;
671 	const char	*svfile;
672 	size_t		 offset;
673 	static int	 recursion_depth;
674 
675 	if (64 < recursion_depth) {
676 		mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
677 		return;
678 	}
679 
680 	/* Line number is per-file. */
681 	svfile = curp->file;
682 	curp->file = file;
683 	svprimary = curp->primary;
684 	curp->primary = &blk;
685 	curp->line = 1;
686 	recursion_depth++;
687 
688 	/* Skip an UTF-8 byte order mark. */
689 	if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
690 	    (unsigned char)blk.buf[0] == 0xef &&
691 	    (unsigned char)blk.buf[1] == 0xbb &&
692 	    (unsigned char)blk.buf[2] == 0xbf) {
693 		offset = 3;
694 		curp->filenc &= ~MPARSE_LATIN1;
695 	} else
696 		offset = 0;
697 
698 	mparse_buf_r(curp, blk, offset, 1);
699 
700 	if (--recursion_depth == 0)
701 		mparse_end(curp);
702 
703 	curp->primary = svprimary;
704 	curp->file = svfile;
705 }
706 
707 /*
708  * Read the whole file into memory and call the parsers.
709  * Called recursively when an .so request is encountered.
710  */
711 enum mandoclevel
712 mparse_readfd(struct mparse *curp, int fd, const char *file)
713 {
714 	struct buf	 blk;
715 	int		 with_mmap;
716 	int		 save_filenc;
717 
718 	if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
719 		save_filenc = curp->filenc;
720 		curp->filenc = curp->options &
721 		    (MPARSE_UTF8 | MPARSE_LATIN1);
722 		mparse_parse_buffer(curp, blk, file);
723 		curp->filenc = save_filenc;
724 		if (with_mmap)
725 			munmap(blk.buf, blk.sz);
726 		else
727 			free(blk.buf);
728 	}
729 	return curp->file_status;
730 }
731 
732 int
733 mparse_open(struct mparse *curp, const char *file)
734 {
735 	char		 *cp;
736 	int		  fd;
737 
738 	curp->file = file;
739 	cp = strrchr(file, '.');
740 	curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
741 
742 	/* First try to use the filename as it is. */
743 
744 	if ((fd = open(file, O_RDONLY)) != -1)
745 		return fd;
746 
747 	/*
748 	 * If that doesn't work and the filename doesn't
749 	 * already  end in .gz, try appending .gz.
750 	 */
751 
752 	if ( ! curp->gzip) {
753 		mandoc_asprintf(&cp, "%s.gz", file);
754 		fd = open(cp, O_RDONLY);
755 		free(cp);
756 		if (fd != -1) {
757 			curp->gzip = 1;
758 			return fd;
759 		}
760 	}
761 
762 	/* Neither worked, give up. */
763 
764 	mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
765 	return -1;
766 }
767 
768 struct mparse *
769 mparse_alloc(int options, enum mandocerr mmin, mandocmsg mmsg,
770     enum mandoc_os os_e, const char *os_s)
771 {
772 	struct mparse	*curp;
773 
774 	curp = mandoc_calloc(1, sizeof(struct mparse));
775 
776 	curp->options = options;
777 	curp->mmin = mmin;
778 	curp->mmsg = mmsg;
779 	curp->os_s = os_s;
780 
781 	curp->roff = roff_alloc(curp, options);
782 	curp->man = roff_man_alloc(curp->roff, curp, curp->os_s,
783 		curp->options & MPARSE_QUICK ? 1 : 0);
784 	if (curp->options & MPARSE_MDOC) {
785 		curp->man->macroset = MACROSET_MDOC;
786 		if (curp->man->mdocmac == NULL)
787 			curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
788 	} else if (curp->options & MPARSE_MAN) {
789 		curp->man->macroset = MACROSET_MAN;
790 		if (curp->man->manmac == NULL)
791 			curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
792 	}
793 	curp->man->first->tok = TOKEN_NONE;
794 	curp->man->meta.os_e = os_e;
795 	return curp;
796 }
797 
798 void
799 mparse_reset(struct mparse *curp)
800 {
801 	roff_reset(curp->roff);
802 	roff_man_reset(curp->man);
803 
804 	free(curp->sodest);
805 	curp->sodest = NULL;
806 
807 	if (curp->secondary)
808 		curp->secondary->sz = 0;
809 
810 	curp->file_status = MANDOCLEVEL_OK;
811 	curp->gzip = 0;
812 }
813 
814 void
815 mparse_free(struct mparse *curp)
816 {
817 
818 	roffhash_free(curp->man->mdocmac);
819 	roffhash_free(curp->man->manmac);
820 	roff_man_free(curp->man);
821 	roff_free(curp->roff);
822 	if (curp->secondary)
823 		free(curp->secondary->buf);
824 
825 	free(curp->secondary);
826 	free(curp->sodest);
827 	free(curp);
828 }
829 
830 void
831 mparse_result(struct mparse *curp, struct roff_man **man,
832 	char **sodest)
833 {
834 
835 	if (sodest && NULL != (*sodest = curp->sodest)) {
836 		*man = NULL;
837 		return;
838 	}
839 	if (man)
840 		*man = curp->man;
841 }
842 
843 void
844 mparse_updaterc(struct mparse *curp, enum mandoclevel *rc)
845 {
846 	if (curp->file_status > *rc)
847 		*rc = curp->file_status;
848 }
849 
850 void
851 mandoc_vmsg(enum mandocerr t, struct mparse *m,
852 		int ln, int pos, const char *fmt, ...)
853 {
854 	char		 buf[256];
855 	va_list		 ap;
856 
857 	va_start(ap, fmt);
858 	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
859 	va_end(ap);
860 
861 	mandoc_msg(t, m, ln, pos, buf);
862 }
863 
864 void
865 mandoc_msg(enum mandocerr er, struct mparse *m,
866 		int ln, int col, const char *msg)
867 {
868 	enum mandoclevel level;
869 
870 	if (er < m->mmin && er != MANDOCERR_FILE)
871 		return;
872 
873 	level = MANDOCLEVEL_UNSUPP;
874 	while (er < mandoclimits[level])
875 		level--;
876 
877 	if (m->mmsg)
878 		(*m->mmsg)(er, level, m->file, ln, col, msg);
879 
880 	if (m->file_status < level)
881 		m->file_status = level;
882 }
883 
884 const char *
885 mparse_strerror(enum mandocerr er)
886 {
887 
888 	return mandocerrs[er];
889 }
890 
891 const char *
892 mparse_strlevel(enum mandoclevel lvl)
893 {
894 	return mandoclevels[lvl];
895 }
896 
897 void
898 mparse_keep(struct mparse *p)
899 {
900 
901 	assert(NULL == p->secondary);
902 	p->secondary = mandoc_calloc(1, sizeof(struct buf));
903 }
904 
905 const char *
906 mparse_getkeep(const struct mparse *p)
907 {
908 
909 	assert(p->secondary);
910 	return p->secondary->sz ? p->secondary->buf : NULL;
911 }
912