xref: /openbsd-src/usr.bin/mandoc/read.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$Id: read.c,v 1.47 2014/07/09 11:30:07 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include <sys/stat.h>
20 #include <sys/mman.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <stdarg.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "mandoc.h"
33 #include "mandoc_aux.h"
34 #include "libmandoc.h"
35 #include "mdoc.h"
36 #include "man.h"
37 
38 #define	REPARSE_LIMIT	1000
39 
40 struct	buf {
41 	char		 *buf; /* binary input buffer */
42 	size_t		  sz; /* size of binary buffer */
43 };
44 
45 struct	mparse {
46 	enum mandoclevel  file_status; /* status of current parse */
47 	enum mandoclevel  wlevel; /* ignore messages below this */
48 	int		  line; /* line number in the file */
49 	int		  options; /* parser options */
50 	struct man	 *pman; /* persistent man parser */
51 	struct mdoc	 *pmdoc; /* persistent mdoc parser */
52 	struct man	 *man; /* man parser */
53 	struct mdoc	 *mdoc; /* mdoc parser */
54 	struct roff	 *roff; /* roff parser (!NULL) */
55 	char		 *sodest; /* filename pointed to by .so */
56 	int		  reparse_count; /* finite interp. stack */
57 	mandocmsg	  mmsg; /* warning/error message handler */
58 	const char	 *file;
59 	struct buf	 *secondary;
60 	const char	 *defos; /* default operating system */
61 };
62 
63 static	void	  resize_buf(struct buf *, size_t);
64 static	void	  mparse_buf_r(struct mparse *, struct buf, int);
65 static	void	  pset(const char *, int, struct mparse *);
66 static	int	  read_whole_file(struct mparse *, const char *, int,
67 				struct buf *, int *);
68 static	void	  mparse_end(struct mparse *);
69 static	void	  mparse_parse_buffer(struct mparse *, struct buf,
70 			const char *);
71 
72 static	const enum mandocerr	mandoclimits[MANDOCLEVEL_MAX] = {
73 	MANDOCERR_OK,
74 	MANDOCERR_WARNING,
75 	MANDOCERR_WARNING,
76 	MANDOCERR_ERROR,
77 	MANDOCERR_FATAL,
78 	MANDOCERR_MAX,
79 	MANDOCERR_MAX
80 };
81 
82 static	const char * const	mandocerrs[MANDOCERR_MAX] = {
83 	"ok",
84 
85 	"generic warning",
86 
87 	/* related to the prologue */
88 	"missing .TH macro, using \"unknown 1\"",
89 	"lower case character in document title",
90 	"unknown manual section",
91 	"unknown manual volume or arch",
92 	"missing date, using today's date",
93 	"cannot parse date, using it verbatim",
94 	"prologue macros out of order",
95 	"duplicate prologue macro",
96 	"incomplete prologue, terminated by",
97 	"skipping prologue macro in body",
98 
99 	/* related to document structure */
100 	".so is fragile, better use ln(1)",
101 	"no document body",
102 	"content before first section header",
103 	"first section is not \"NAME\"",
104 	"bad NAME section contents",
105 	"sections out of conventional order",
106 	"duplicate section title",
107 	"unexpected section",
108 
109 	/* related to macros and nesting */
110 	"obsolete macro",
111 	"skipping paragraph macro",
112 	"moving paragraph macro out of list",
113 	"skipping no-space macro",
114 	"blocks badly nested",
115 	"nested displays are not portable",
116 	"moving content out of list",
117 	".Vt block has child macro",
118 	"fill mode already enabled, skipping .fi",
119 	"fill mode already disabled, skipping .nf",
120 	"line scope broken",
121 
122 	/* related to missing macro arguments */
123 	"skipping empty request",
124 	"conditional request controls empty scope",
125 	"skipping empty macro",
126 	"empty argument, using 0n",
127 	"argument count wrong",
128 	"missing display type, using -ragged",
129 	"list type is not the first argument",
130 	"missing -width in -tag list, using 8n",
131 	"empty head in list item",
132 	"empty list item",
133 	"missing font type, using \\fR",
134 	"unknown font type, using \\fR",
135 	"missing -std argument, adding it",
136 
137 	/* related to bad macro arguments */
138 	"skipping argument",
139 	"unterminated quoted argument",
140 	"duplicate argument",
141 	"skipping duplicate display type",
142 	"skipping duplicate list type",
143 	"unknown AT&T UNIX version",
144 	"invalid content in Rs block",
145 	"invalid Boolean argument",
146 	"unknown font, skipping request",
147 
148 	/* related to plain text */
149 	"blank line in fill mode, using .sp",
150 	"tab in filled text",
151 	"whitespace at end of input line",
152 	"bad comment style",
153 	"invalid escape sequence",
154 	"undefined string, using \"\"",
155 
156 	"generic error",
157 
158 	/* related to equations */
159 	"unexpected equation scope closure",
160 	"equation scope open on exit",
161 	"overlapping equation scopes",
162 	"unexpected end of equation",
163 	"equation syntax error",
164 
165 	/* related to tables */
166 	"bad table syntax",
167 	"bad table option",
168 	"bad table layout",
169 	"no table layout cells specified",
170 	"no table data cells specified",
171 	"ignore data in cell",
172 	"data block still open",
173 	"ignoring extra data cells",
174 
175 	/* related to document structure and macros */
176 	"input stack limit exceeded, infinite loop?",
177 	"skipping bad character",
178 	"skipping unknown macro",
179 	"skipping column outside column list",
180 	"skipping end of block that is not open",
181 	"inserting missing end of block",
182 	"appending missing end of block",
183 
184 	/* related to request and macro arguments */
185 	"escaped character not allowed in a name",
186 	"manual name not yet set",
187 	"argument count wrong",
188 	"unknown standard specifier",
189 	"uname(3) system call failed",
190 	"request requires a numeric argument",
191 	"missing list type, using -item",
192 	"skipping all arguments",
193 	"skipping excess arguments",
194 
195 	"generic fatal error",
196 
197 	"input too large",
198 	"not a manual",
199 	"column syntax is inconsistent",
200 	"NOT IMPLEMENTED: .Bd -file",
201 	"child violates parent syntax",
202 	"argument count wrong, violates syntax",
203 	"NOT IMPLEMENTED: .so with absolute path or \"..\"",
204 	".so request failed",
205 	"no document prologue",
206 	"static buffer exhausted",
207 
208 	/* system errors */
209 	NULL,
210 	"cannot stat file",
211 	"cannot read file",
212 };
213 
214 static	const char * const	mandoclevels[MANDOCLEVEL_MAX] = {
215 	"SUCCESS",
216 	"RESERVED",
217 	"WARNING",
218 	"ERROR",
219 	"FATAL",
220 	"BADARG",
221 	"SYSERR"
222 };
223 
224 
225 static void
226 resize_buf(struct buf *buf, size_t initial)
227 {
228 
229 	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
230 	buf->buf = mandoc_realloc(buf->buf, buf->sz);
231 }
232 
233 static void
234 pset(const char *buf, int pos, struct mparse *curp)
235 {
236 	int		 i;
237 
238 	/*
239 	 * Try to intuit which kind of manual parser should be used.  If
240 	 * passed in by command-line (-man, -mdoc), then use that
241 	 * explicitly.  If passed as -mandoc, then try to guess from the
242 	 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
243 	 * default to -man, which is more lenient.
244 	 *
245 	 * Separate out pmdoc/pman from mdoc/man: the first persists
246 	 * through all parsers, while the latter is used per-parse.
247 	 */
248 
249 	if ('.' == buf[0] || '\'' == buf[0]) {
250 		for (i = 1; buf[i]; i++)
251 			if (' ' != buf[i] && '\t' != buf[i])
252 				break;
253 		if ('\0' == buf[i])
254 			return;
255 	}
256 
257 	if (MPARSE_MDOC & curp->options) {
258 		if (NULL == curp->pmdoc)
259 			curp->pmdoc = mdoc_alloc(
260 			    curp->roff, curp, curp->defos,
261 			    MPARSE_QUICK & curp->options ? 1 : 0);
262 		assert(curp->pmdoc);
263 		curp->mdoc = curp->pmdoc;
264 		return;
265 	} else if (MPARSE_MAN & curp->options) {
266 		if (NULL == curp->pman)
267 			curp->pman = man_alloc(curp->roff, curp,
268 			    MPARSE_QUICK & curp->options ? 1 : 0);
269 		assert(curp->pman);
270 		curp->man = curp->pman;
271 		return;
272 	}
273 
274 	if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3))  {
275 		if (NULL == curp->pmdoc)
276 			curp->pmdoc = mdoc_alloc(
277 			    curp->roff, curp, curp->defos,
278 			    MPARSE_QUICK & curp->options ? 1 : 0);
279 		assert(curp->pmdoc);
280 		curp->mdoc = curp->pmdoc;
281 		return;
282 	}
283 
284 	if (NULL == curp->pman)
285 		curp->pman = man_alloc(curp->roff, curp,
286 		    MPARSE_QUICK & curp->options ? 1 : 0);
287 	assert(curp->pman);
288 	curp->man = curp->pman;
289 }
290 
291 /*
292  * Main parse routine for an opened file.  This is called for each
293  * opened file and simply loops around the full input file, possibly
294  * nesting (i.e., with `so').
295  */
296 static void
297 mparse_buf_r(struct mparse *curp, struct buf blk, int start)
298 {
299 	const struct tbl_span	*span;
300 	struct buf	 ln;
301 	enum rofferr	 rr;
302 	int		 i, of, rc;
303 	int		 pos; /* byte number in the ln buffer */
304 	int		 lnn; /* line number in the real file */
305 	unsigned char	 c;
306 
307 	memset(&ln, 0, sizeof(struct buf));
308 
309 	lnn = curp->line;
310 	pos = 0;
311 
312 	for (i = 0; i < (int)blk.sz; ) {
313 		if (0 == pos && '\0' == blk.buf[i])
314 			break;
315 
316 		if (start) {
317 			curp->line = lnn;
318 			curp->reparse_count = 0;
319 		}
320 
321 		while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
322 
323 			/*
324 			 * When finding an unescaped newline character,
325 			 * leave the character loop to process the line.
326 			 * Skip a preceding carriage return, if any.
327 			 */
328 
329 			if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
330 			    '\n' == blk.buf[i + 1])
331 				++i;
332 			if ('\n' == blk.buf[i]) {
333 				++i;
334 				++lnn;
335 				break;
336 			}
337 
338 			/*
339 			 * Make sure we have space for at least
340 			 * one backslash and one other character
341 			 * and the trailing NUL byte.
342 			 */
343 
344 			if (pos + 2 >= (int)ln.sz)
345 				resize_buf(&ln, 256);
346 
347 			/*
348 			 * Warn about bogus characters.  If you're using
349 			 * non-ASCII encoding, you're screwing your
350 			 * readers.  Since I'd rather this not happen,
351 			 * I'll be helpful and replace these characters
352 			 * with "?", so we don't display gibberish.
353 			 * Note to manual writers: use special characters.
354 			 */
355 
356 			c = (unsigned char) blk.buf[i];
357 
358 			if ( ! (isascii(c) &&
359 			    (isgraph(c) || isblank(c)))) {
360 				mandoc_msg(MANDOCERR_BADCHAR, curp,
361 				    curp->line, pos, NULL);
362 				i++;
363 				ln.buf[pos++] = '?';
364 				continue;
365 			}
366 
367 			/* Trailing backslash = a plain char. */
368 
369 			if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
370 				ln.buf[pos++] = blk.buf[i++];
371 				continue;
372 			}
373 
374 			/*
375 			 * Found escape and at least one other character.
376 			 * When it's a newline character, skip it.
377 			 * When there is a carriage return in between,
378 			 * skip that one as well.
379 			 */
380 
381 			if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
382 			    '\n' == blk.buf[i + 2])
383 				++i;
384 			if ('\n' == blk.buf[i + 1]) {
385 				i += 2;
386 				++lnn;
387 				continue;
388 			}
389 
390 			if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
391 				i += 2;
392 				/* Comment, skip to end of line */
393 				for (; i < (int)blk.sz; ++i) {
394 					if ('\n' == blk.buf[i]) {
395 						++i;
396 						++lnn;
397 						break;
398 					}
399 				}
400 
401 				/* Backout trailing whitespaces */
402 				for (; pos > 0; --pos) {
403 					if (ln.buf[pos - 1] != ' ')
404 						break;
405 					if (pos > 2 && ln.buf[pos - 2] == '\\')
406 						break;
407 				}
408 				break;
409 			}
410 
411 			/* Catch escaped bogus characters. */
412 
413 			c = (unsigned char) blk.buf[i+1];
414 
415 			if ( ! (isascii(c) &&
416 			    (isgraph(c) || isblank(c)))) {
417 				mandoc_msg(MANDOCERR_BADCHAR, curp,
418 				    curp->line, pos, NULL);
419 				i += 2;
420 				ln.buf[pos++] = '?';
421 				continue;
422 			}
423 
424 			/* Some other escape sequence, copy & cont. */
425 
426 			ln.buf[pos++] = blk.buf[i++];
427 			ln.buf[pos++] = blk.buf[i++];
428 		}
429 
430 		if (pos >= (int)ln.sz)
431 			resize_buf(&ln, 256);
432 
433 		ln.buf[pos] = '\0';
434 
435 		/*
436 		 * A significant amount of complexity is contained by
437 		 * the roff preprocessor.  It's line-oriented but can be
438 		 * expressed on one line, so we need at times to
439 		 * readjust our starting point and re-run it.  The roff
440 		 * preprocessor can also readjust the buffers with new
441 		 * data, so we pass them in wholesale.
442 		 */
443 
444 		of = 0;
445 
446 		/*
447 		 * Maintain a lookaside buffer of all parsed lines.  We
448 		 * only do this if mparse_keep() has been invoked (the
449 		 * buffer may be accessed with mparse_getkeep()).
450 		 */
451 
452 		if (curp->secondary) {
453 			curp->secondary->buf = mandoc_realloc(
454 			    curp->secondary->buf,
455 			    curp->secondary->sz + pos + 2);
456 			memcpy(curp->secondary->buf +
457 			    curp->secondary->sz,
458 			    ln.buf, pos);
459 			curp->secondary->sz += pos;
460 			curp->secondary->buf
461 				[curp->secondary->sz] = '\n';
462 			curp->secondary->sz++;
463 			curp->secondary->buf
464 				[curp->secondary->sz] = '\0';
465 		}
466 rerun:
467 		rr = roff_parseln(curp->roff, curp->line,
468 		    &ln.buf, &ln.sz, of, &of);
469 
470 		switch (rr) {
471 		case ROFF_REPARSE:
472 			if (REPARSE_LIMIT >= ++curp->reparse_count)
473 				mparse_buf_r(curp, ln, 0);
474 			else
475 				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
476 				    curp->line, pos, NULL);
477 			pos = 0;
478 			continue;
479 		case ROFF_APPEND:
480 			pos = (int)strlen(ln.buf);
481 			continue;
482 		case ROFF_RERUN:
483 			goto rerun;
484 		case ROFF_IGN:
485 			pos = 0;
486 			continue;
487 		case ROFF_ERR:
488 			assert(MANDOCLEVEL_FATAL <= curp->file_status);
489 			break;
490 		case ROFF_SO:
491 			if (0 == (MPARSE_SO & curp->options) &&
492 			    (i >= (int)blk.sz || '\0' == blk.buf[i])) {
493 				curp->sodest = mandoc_strdup(ln.buf + of);
494 				free(ln.buf);
495 				return;
496 			}
497 			/*
498 			 * We remove `so' clauses from our lookaside
499 			 * buffer because we're going to descend into
500 			 * the file recursively.
501 			 */
502 			if (curp->secondary)
503 				curp->secondary->sz -= pos + 1;
504 			mparse_readfd(curp, -1, ln.buf + of);
505 			if (MANDOCLEVEL_FATAL <= curp->file_status) {
506 				mandoc_vmsg(MANDOCERR_SO_FAIL,
507 				    curp, curp->line, pos,
508 				    ".so %s", ln.buf + of);
509 				break;
510 			}
511 			pos = 0;
512 			continue;
513 		default:
514 			break;
515 		}
516 
517 		/*
518 		 * If we encounter errors in the recursive parse, make
519 		 * sure we don't continue parsing.
520 		 */
521 
522 		if (MANDOCLEVEL_FATAL <= curp->file_status)
523 			break;
524 
525 		/*
526 		 * If input parsers have not been allocated, do so now.
527 		 * We keep these instanced between parsers, but set them
528 		 * locally per parse routine since we can use different
529 		 * parsers with each one.
530 		 */
531 
532 		if ( ! (curp->man || curp->mdoc))
533 			pset(ln.buf + of, pos - of, curp);
534 
535 		/*
536 		 * Lastly, push down into the parsers themselves.  One
537 		 * of these will have already been set in the pset()
538 		 * routine.
539 		 * If libroff returns ROFF_TBL, then add it to the
540 		 * currently open parse.  Since we only get here if
541 		 * there does exist data (see tbl_data.c), we're
542 		 * guaranteed that something's been allocated.
543 		 * Do the same for ROFF_EQN.
544 		 */
545 
546 		rc = -1;
547 
548 		if (ROFF_TBL == rr)
549 			while (NULL != (span = roff_span(curp->roff))) {
550 				rc = curp->man ?
551 				    man_addspan(curp->man, span) :
552 				    mdoc_addspan(curp->mdoc, span);
553 				if (0 == rc)
554 					break;
555 			}
556 		else if (ROFF_EQN == rr)
557 			rc = curp->mdoc ?
558 			    mdoc_addeqn(curp->mdoc,
559 				roff_eqn(curp->roff)) :
560 			    man_addeqn(curp->man,
561 				roff_eqn(curp->roff));
562 		else if (curp->man || curp->mdoc)
563 			rc = curp->man ?
564 			    man_parseln(curp->man,
565 				curp->line, ln.buf, of) :
566 			    mdoc_parseln(curp->mdoc,
567 				curp->line, ln.buf, of);
568 
569 		if (0 == rc) {
570 			assert(MANDOCLEVEL_FATAL <= curp->file_status);
571 			break;
572 		} else if (2 == rc)
573 			break;
574 
575 		/* Temporary buffers typically are not full. */
576 
577 		if (0 == start && '\0' == blk.buf[i])
578 			break;
579 
580 		/* Start the next input line. */
581 
582 		pos = 0;
583 	}
584 
585 	free(ln.buf);
586 }
587 
588 static int
589 read_whole_file(struct mparse *curp, const char *file, int fd,
590 		struct buf *fb, int *with_mmap)
591 {
592 	struct stat	 st;
593 	size_t		 off;
594 	ssize_t		 ssz;
595 
596 	if (-1 == fstat(fd, &st)) {
597 		curp->file_status = MANDOCLEVEL_SYSERR;
598 		if (curp->mmsg)
599 			(*curp->mmsg)(MANDOCERR_SYSSTAT, curp->file_status,
600 			    file, 0, 0, strerror(errno));
601 		return(0);
602 	}
603 
604 	/*
605 	 * If we're a regular file, try just reading in the whole entry
606 	 * via mmap().  This is faster than reading it into blocks, and
607 	 * since each file is only a few bytes to begin with, I'm not
608 	 * concerned that this is going to tank any machines.
609 	 */
610 
611 	if (S_ISREG(st.st_mode)) {
612 		if (st.st_size >= (1U << 31)) {
613 			curp->file_status = MANDOCLEVEL_FATAL;
614 			if (curp->mmsg)
615 				(*curp->mmsg)(MANDOCERR_TOOLARGE,
616 				    curp->file_status, file, 0, 0, NULL);
617 			return(0);
618 		}
619 		*with_mmap = 1;
620 		fb->sz = (size_t)st.st_size;
621 		fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
622 		if (fb->buf != MAP_FAILED)
623 			return(1);
624 	}
625 
626 	/*
627 	 * If this isn't a regular file (like, say, stdin), then we must
628 	 * go the old way and just read things in bit by bit.
629 	 */
630 
631 	*with_mmap = 0;
632 	off = 0;
633 	fb->sz = 0;
634 	fb->buf = NULL;
635 	for (;;) {
636 		if (off == fb->sz) {
637 			if (fb->sz == (1U << 31)) {
638 				curp->file_status = MANDOCLEVEL_FATAL;
639 				if (curp->mmsg)
640 					(*curp->mmsg)(MANDOCERR_TOOLARGE,
641 					    curp->file_status,
642 					    file, 0, 0, NULL);
643 				break;
644 			}
645 			resize_buf(fb, 65536);
646 		}
647 		ssz = read(fd, fb->buf + (int)off, fb->sz - off);
648 		if (ssz == 0) {
649 			fb->sz = off;
650 			return(1);
651 		}
652 		if (ssz == -1) {
653 			curp->file_status = MANDOCLEVEL_SYSERR;
654 			if (curp->mmsg)
655 				(*curp->mmsg)(MANDOCERR_SYSREAD,
656 				    curp->file_status, file, 0, 0,
657 				    strerror(errno));
658 			break;
659 		}
660 		off += (size_t)ssz;
661 	}
662 
663 	free(fb->buf);
664 	fb->buf = NULL;
665 	return(0);
666 }
667 
668 static void
669 mparse_end(struct mparse *curp)
670 {
671 
672 	if (MANDOCLEVEL_FATAL <= curp->file_status)
673 		return;
674 
675 	if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
676 		assert(MANDOCLEVEL_FATAL <= curp->file_status);
677 		return;
678 	}
679 
680 	if (curp->man && ! man_endparse(curp->man)) {
681 		assert(MANDOCLEVEL_FATAL <= curp->file_status);
682 		return;
683 	}
684 
685 	if ( ! (curp->mdoc || curp->man || curp->sodest)) {
686 		mandoc_msg(MANDOCERR_NOTMANUAL, curp, 0, 0, NULL);
687 		curp->file_status = MANDOCLEVEL_FATAL;
688 		return;
689 	}
690 
691 	roff_endparse(curp->roff);
692 }
693 
694 static void
695 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
696 {
697 	const char	*svfile;
698 	static int	 recursion_depth;
699 
700 	if (64 < recursion_depth) {
701 		mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
702 		return;
703 	}
704 
705 	/* Line number is per-file. */
706 	svfile = curp->file;
707 	curp->file = file;
708 	curp->line = 1;
709 	recursion_depth++;
710 
711 	mparse_buf_r(curp, blk, 1);
712 
713 	if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
714 		mparse_end(curp);
715 
716 	curp->file = svfile;
717 }
718 
719 enum mandoclevel
720 mparse_readfd(struct mparse *curp, int fd, const char *file)
721 {
722 	struct buf	 blk;
723 	int		 with_mmap;
724 
725 	if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) {
726 		curp->file_status = MANDOCLEVEL_SYSERR;
727 		if (curp->mmsg)
728 			(*curp->mmsg)(MANDOCERR_SYSOPEN,
729 			    curp->file_status,
730 			    file, 0, 0, strerror(errno));
731 		goto out;
732 	}
733 
734 	/*
735 	 * Run for each opened file; may be called more than once for
736 	 * each full parse sequence if the opened file is nested (i.e.,
737 	 * from `so').  Simply sucks in the whole file and moves into
738 	 * the parse phase for the file.
739 	 */
740 
741 	if ( ! read_whole_file(curp, file, fd, &blk, &with_mmap))
742 		goto out;
743 
744 	mparse_parse_buffer(curp, blk, file);
745 
746 	if (with_mmap)
747 		munmap(blk.buf, blk.sz);
748 	else
749 		free(blk.buf);
750 
751 	if (STDIN_FILENO != fd && -1 == close(fd))
752 		perror(file);
753 out:
754 	return(curp->file_status);
755 }
756 
757 struct mparse *
758 mparse_alloc(int options, enum mandoclevel wlevel,
759 		mandocmsg mmsg, const char *defos)
760 {
761 	struct mparse	*curp;
762 
763 	assert(wlevel <= MANDOCLEVEL_FATAL);
764 
765 	curp = mandoc_calloc(1, sizeof(struct mparse));
766 
767 	curp->options = options;
768 	curp->wlevel = wlevel;
769 	curp->mmsg = mmsg;
770 	curp->defos = defos;
771 
772 	curp->roff = roff_alloc(curp, options);
773 	return(curp);
774 }
775 
776 void
777 mparse_reset(struct mparse *curp)
778 {
779 
780 	roff_reset(curp->roff);
781 
782 	if (curp->mdoc)
783 		mdoc_reset(curp->mdoc);
784 	if (curp->man)
785 		man_reset(curp->man);
786 	if (curp->secondary)
787 		curp->secondary->sz = 0;
788 
789 	curp->file_status = MANDOCLEVEL_OK;
790 	curp->mdoc = NULL;
791 	curp->man = NULL;
792 
793 	free(curp->sodest);
794 	curp->sodest = NULL;
795 }
796 
797 void
798 mparse_free(struct mparse *curp)
799 {
800 
801 	if (curp->pmdoc)
802 		mdoc_free(curp->pmdoc);
803 	if (curp->pman)
804 		man_free(curp->pman);
805 	if (curp->roff)
806 		roff_free(curp->roff);
807 	if (curp->secondary)
808 		free(curp->secondary->buf);
809 
810 	free(curp->secondary);
811 	free(curp->sodest);
812 	free(curp);
813 }
814 
815 void
816 mparse_result(struct mparse *curp,
817 	struct mdoc **mdoc, struct man **man, char **sodest)
818 {
819 
820 	if (sodest && NULL != (*sodest = curp->sodest)) {
821 		*mdoc = NULL;
822 		*man = NULL;
823 		return;
824 	}
825 	if (mdoc)
826 		*mdoc = curp->mdoc;
827 	if (man)
828 		*man = curp->man;
829 }
830 
831 void
832 mandoc_vmsg(enum mandocerr t, struct mparse *m,
833 		int ln, int pos, const char *fmt, ...)
834 {
835 	char		 buf[256];
836 	va_list		 ap;
837 
838 	va_start(ap, fmt);
839 	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
840 	va_end(ap);
841 
842 	mandoc_msg(t, m, ln, pos, buf);
843 }
844 
845 void
846 mandoc_msg(enum mandocerr er, struct mparse *m,
847 		int ln, int col, const char *msg)
848 {
849 	enum mandoclevel level;
850 
851 	level = MANDOCLEVEL_FATAL;
852 	while (er < mandoclimits[level])
853 		level--;
854 
855 	if (level < m->wlevel)
856 		return;
857 
858 	if (m->mmsg)
859 		(*m->mmsg)(er, level, m->file, ln, col, msg);
860 
861 	if (m->file_status < level)
862 		m->file_status = level;
863 }
864 
865 const char *
866 mparse_strerror(enum mandocerr er)
867 {
868 
869 	return(mandocerrs[er]);
870 }
871 
872 const char *
873 mparse_strlevel(enum mandoclevel lvl)
874 {
875 	return(mandoclevels[lvl]);
876 }
877 
878 void
879 mparse_keep(struct mparse *p)
880 {
881 
882 	assert(NULL == p->secondary);
883 	p->secondary = mandoc_calloc(1, sizeof(struct buf));
884 }
885 
886 const char *
887 mparse_getkeep(const struct mparse *p)
888 {
889 
890 	assert(p->secondary);
891 	return(p->secondary->sz ? p->secondary->buf : NULL);
892 }
893