xref: /openbsd-src/usr.bin/mandoc/read.c (revision 91f110e064cd7c194e59e019b83bb7496c1c84d4)
1 /*	$Id: read.c,v 1.24 2014/03/21 22:17:01 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include <sys/stat.h>
20 #include <sys/mman.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <stdarg.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "mandoc.h"
33 #include "mandoc_aux.h"
34 #include "libmandoc.h"
35 #include "mdoc.h"
36 #include "man.h"
37 
38 #define	REPARSE_LIMIT	1000
39 
40 struct	buf {
41 	char	 	 *buf; /* binary input buffer */
42 	size_t		  sz; /* size of binary buffer */
43 };
44 
45 struct	mparse {
46 	enum mandoclevel  file_status; /* status of current parse */
47 	enum mandoclevel  wlevel; /* ignore messages below this */
48 	int		  line; /* line number in the file */
49 	int		  options; /* parser options */
50 	struct man	 *pman; /* persistent man parser */
51 	struct mdoc	 *pmdoc; /* persistent mdoc parser */
52 	struct man	 *man; /* man parser */
53 	struct mdoc	 *mdoc; /* mdoc parser */
54 	struct roff	 *roff; /* roff parser (!NULL) */
55 	char		 *sodest; /* filename pointed to by .so */
56 	int		  reparse_count; /* finite interp. stack */
57 	mandocmsg	  mmsg; /* warning/error message handler */
58 	const char	 *file;
59 	struct buf	 *secondary;
60 	char		 *defos; /* default operating system */
61 };
62 
63 static	void	  resize_buf(struct buf *, size_t);
64 static	void	  mparse_buf_r(struct mparse *, struct buf, int);
65 static	void	  pset(const char *, int, struct mparse *);
66 static	int	  read_whole_file(struct mparse *, const char *, int,
67 				struct buf *, int *);
68 static	void	  mparse_end(struct mparse *);
69 static	void	  mparse_parse_buffer(struct mparse *, struct buf,
70 			const char *);
71 
72 static	const enum mandocerr	mandoclimits[MANDOCLEVEL_MAX] = {
73 	MANDOCERR_OK,
74 	MANDOCERR_WARNING,
75 	MANDOCERR_WARNING,
76 	MANDOCERR_ERROR,
77 	MANDOCERR_FATAL,
78 	MANDOCERR_MAX,
79 	MANDOCERR_MAX
80 };
81 
82 static	const char * const	mandocerrs[MANDOCERR_MAX] = {
83 	"ok",
84 
85 	"generic warning",
86 
87 	/* related to the prologue */
88 	"no title in document",
89 	"document title should be all caps",
90 	"unknown manual section",
91 	"unknown manual volume or arch",
92 	"date missing, using today's date",
93 	"cannot parse date, using it verbatim",
94 	"prologue macros out of order",
95 	"duplicate prologue macro",
96 	"macro not allowed in prologue",
97 	"macro not allowed in body",
98 
99 	/* related to document structure */
100 	".so is fragile, better use ln(1)",
101 	"NAME section must come first",
102 	"bad NAME section contents",
103 	"sections out of conventional order",
104 	"duplicate section name",
105 	"section header suited to sections 2, 3, and 9 only",
106 
107 	/* related to macros and nesting */
108 	"skipping obsolete macro",
109 	"skipping paragraph macro",
110 	"moving paragraph macro out of list",
111 	"skipping no-space macro",
112 	"blocks badly nested",
113 	"child violates parent syntax",
114 	"nested displays are not portable",
115 	"already in literal mode",
116 	"line scope broken",
117 
118 	/* related to missing macro arguments */
119 	"skipping empty macro",
120 	"argument count wrong",
121 	"missing display type",
122 	"list type must come first",
123 	"tag lists require a width argument",
124 	"missing font type",
125 	"skipping end of block that is not open",
126 
127 	/* related to bad macro arguments */
128 	"skipping argument",
129 	"duplicate argument",
130 	"duplicate display type",
131 	"duplicate list type",
132 	"unknown AT&T UNIX version",
133 	"bad Boolean value",
134 	"unknown font",
135 	"unknown standard specifier",
136 	"bad width argument",
137 
138 	/* related to plain text */
139 	"blank line in non-literal context",
140 	"tab in non-literal context",
141 	"end of line whitespace",
142 	"bad comment style",
143 	"bad escape sequence",
144 	"unterminated quoted string",
145 
146 	/* related to equations */
147 	"unexpected literal in equation",
148 
149 	"generic error",
150 
151 	/* related to equations */
152 	"unexpected equation scope closure",
153 	"equation scope open on exit",
154 	"overlapping equation scopes",
155 	"unexpected end of equation",
156 	"equation syntax error",
157 
158 	/* related to tables */
159 	"bad table syntax",
160 	"bad table option",
161 	"bad table layout",
162 	"no table layout cells specified",
163 	"no table data cells specified",
164 	"ignore data in cell",
165 	"data block still open",
166 	"ignoring extra data cells",
167 
168 	"input stack limit exceeded, infinite loop?",
169 	"skipping bad character",
170 	"escaped character not allowed in a name",
171 	"manual name not yet set",
172 	"skipping text before the first section header",
173 	"skipping unknown macro",
174 	"NOT IMPLEMENTED, please use groff: skipping request",
175 	"argument count wrong",
176 	"skipping column outside column list",
177 	"skipping end of block that is not open",
178 	"missing end of block",
179 	"scope open on exit",
180 	"uname(3) system call failed",
181 	"macro requires line argument(s)",
182 	"macro requires body argument(s)",
183 	"macro requires argument(s)",
184 	"request requires a numeric argument",
185 	"missing list type",
186 	"line argument(s) will be lost",
187 	"body argument(s) will be lost",
188 
189 	"generic fatal error",
190 
191 	"input too large",
192 	"not a manual",
193 	"column syntax is inconsistent",
194 	"NOT IMPLEMENTED: .Bd -file",
195 	"argument count wrong, violates syntax",
196 	"child violates parent syntax",
197 	"argument count wrong, violates syntax",
198 	"NOT IMPLEMENTED: .so with absolute path or \"..\"",
199 	"no document body",
200 	"no document prologue",
201 	"static buffer exhausted",
202 
203 	/* system errors */
204 	"cannot open file",
205 	"cannot stat file",
206 	"cannot read file",
207 };
208 
209 static	const char * const	mandoclevels[MANDOCLEVEL_MAX] = {
210 	"SUCCESS",
211 	"RESERVED",
212 	"WARNING",
213 	"ERROR",
214 	"FATAL",
215 	"BADARG",
216 	"SYSERR"
217 };
218 
219 static void
220 resize_buf(struct buf *buf, size_t initial)
221 {
222 
223 	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
224 	buf->buf = mandoc_realloc(buf->buf, buf->sz);
225 }
226 
227 static void
228 pset(const char *buf, int pos, struct mparse *curp)
229 {
230 	int		 i;
231 
232 	/*
233 	 * Try to intuit which kind of manual parser should be used.  If
234 	 * passed in by command-line (-man, -mdoc), then use that
235 	 * explicitly.  If passed as -mandoc, then try to guess from the
236 	 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
237 	 * default to -man, which is more lenient.
238 	 *
239 	 * Separate out pmdoc/pman from mdoc/man: the first persists
240 	 * through all parsers, while the latter is used per-parse.
241 	 */
242 
243 	if ('.' == buf[0] || '\'' == buf[0]) {
244 		for (i = 1; buf[i]; i++)
245 			if (' ' != buf[i] && '\t' != buf[i])
246 				break;
247 		if ('\0' == buf[i])
248 			return;
249 	}
250 
251 	if (MPARSE_MDOC & curp->options) {
252 		if (NULL == curp->pmdoc)
253 			curp->pmdoc = mdoc_alloc(
254 			    curp->roff, curp, curp->defos,
255 			    MPARSE_QUICK & curp->options ? 1 : 0);
256 		assert(curp->pmdoc);
257 		curp->mdoc = curp->pmdoc;
258 		return;
259 	} else if (MPARSE_MAN & curp->options) {
260 		if (NULL == curp->pman)
261 			curp->pman = man_alloc(curp->roff, curp,
262 			    MPARSE_QUICK & curp->options ? 1 : 0);
263 		assert(curp->pman);
264 		curp->man = curp->pman;
265 		return;
266 	}
267 
268 	if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3))  {
269 		if (NULL == curp->pmdoc)
270 			curp->pmdoc = mdoc_alloc(
271 			    curp->roff, curp, curp->defos,
272 			    MPARSE_QUICK & curp->options ? 1 : 0);
273 		assert(curp->pmdoc);
274 		curp->mdoc = curp->pmdoc;
275 		return;
276 	}
277 
278 	if (NULL == curp->pman)
279 		curp->pman = man_alloc(curp->roff, curp,
280 		    MPARSE_QUICK & curp->options ? 1 : 0);
281 	assert(curp->pman);
282 	curp->man = curp->pman;
283 }
284 
285 /*
286  * Main parse routine for an opened file.  This is called for each
287  * opened file and simply loops around the full input file, possibly
288  * nesting (i.e., with `so').
289  */
290 static void
291 mparse_buf_r(struct mparse *curp, struct buf blk, int start)
292 {
293 	const struct tbl_span	*span;
294 	struct buf	 ln;
295 	enum rofferr	 rr;
296 	int		 i, of, rc;
297 	int		 pos; /* byte number in the ln buffer */
298 	int		 lnn; /* line number in the real file */
299 	unsigned char	 c;
300 
301 	memset(&ln, 0, sizeof(struct buf));
302 
303 	lnn = curp->line;
304 	pos = 0;
305 
306 	for (i = 0; i < (int)blk.sz; ) {
307 		if (0 == pos && '\0' == blk.buf[i])
308 			break;
309 
310 		if (start) {
311 			curp->line = lnn;
312 			curp->reparse_count = 0;
313 		}
314 
315 		while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
316 
317 			/*
318 			 * When finding an unescaped newline character,
319 			 * leave the character loop to process the line.
320 			 * Skip a preceding carriage return, if any.
321 			 */
322 
323 			if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
324 			    '\n' == blk.buf[i + 1])
325 				++i;
326 			if ('\n' == blk.buf[i]) {
327 				++i;
328 				++lnn;
329 				break;
330 			}
331 
332 			/*
333 			 * Make sure we have space for at least
334 			 * one backslash and one other character
335 			 * and the trailing NUL byte.
336 			 */
337 
338 			if (pos + 2 >= (int)ln.sz)
339 				resize_buf(&ln, 256);
340 
341 			/*
342 			 * Warn about bogus characters.  If you're using
343 			 * non-ASCII encoding, you're screwing your
344 			 * readers.  Since I'd rather this not happen,
345 			 * I'll be helpful and replace these characters
346 			 * with "?", so we don't display gibberish.
347 			 * Note to manual writers: use special characters.
348 			 */
349 
350 			c = (unsigned char) blk.buf[i];
351 
352 			if ( ! (isascii(c) &&
353 					(isgraph(c) || isblank(c)))) {
354 				mandoc_msg(MANDOCERR_BADCHAR, curp,
355 						curp->line, pos, NULL);
356 				i++;
357 				ln.buf[pos++] = '?';
358 				continue;
359 			}
360 
361 			/* Trailing backslash = a plain char. */
362 
363 			if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
364 				ln.buf[pos++] = blk.buf[i++];
365 				continue;
366 			}
367 
368 			/*
369 			 * Found escape and at least one other character.
370 			 * When it's a newline character, skip it.
371 			 * When there is a carriage return in between,
372 			 * skip that one as well.
373 			 */
374 
375 			if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
376 			    '\n' == blk.buf[i + 2])
377 				++i;
378 			if ('\n' == blk.buf[i + 1]) {
379 				i += 2;
380 				++lnn;
381 				continue;
382 			}
383 
384 			if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
385 				i += 2;
386 				/* Comment, skip to end of line */
387 				for (; i < (int)blk.sz; ++i) {
388 					if ('\n' == blk.buf[i]) {
389 						++i;
390 						++lnn;
391 						break;
392 					}
393 				}
394 
395 				/* Backout trailing whitespaces */
396 				for (; pos > 0; --pos) {
397 					if (ln.buf[pos - 1] != ' ')
398 						break;
399 					if (pos > 2 && ln.buf[pos - 2] == '\\')
400 						break;
401 				}
402 				break;
403 			}
404 
405 			/* Catch escaped bogus characters. */
406 
407 			c = (unsigned char) blk.buf[i+1];
408 
409 			if ( ! (isascii(c) &&
410 					(isgraph(c) || isblank(c)))) {
411 				mandoc_msg(MANDOCERR_BADCHAR, curp,
412 						curp->line, pos, NULL);
413 				i += 2;
414 				ln.buf[pos++] = '?';
415 				continue;
416 			}
417 
418 			/* Some other escape sequence, copy & cont. */
419 
420 			ln.buf[pos++] = blk.buf[i++];
421 			ln.buf[pos++] = blk.buf[i++];
422 		}
423 
424  		if (pos >= (int)ln.sz)
425 			resize_buf(&ln, 256);
426 
427 		ln.buf[pos] = '\0';
428 
429 		/*
430 		 * A significant amount of complexity is contained by
431 		 * the roff preprocessor.  It's line-oriented but can be
432 		 * expressed on one line, so we need at times to
433 		 * readjust our starting point and re-run it.  The roff
434 		 * preprocessor can also readjust the buffers with new
435 		 * data, so we pass them in wholesale.
436 		 */
437 
438 		of = 0;
439 
440 		/*
441 		 * Maintain a lookaside buffer of all parsed lines.  We
442 		 * only do this if mparse_keep() has been invoked (the
443 		 * buffer may be accessed with mparse_getkeep()).
444 		 */
445 
446 		if (curp->secondary) {
447 			curp->secondary->buf =
448 				mandoc_realloc
449 				(curp->secondary->buf,
450 				 curp->secondary->sz + pos + 2);
451 			memcpy(curp->secondary->buf +
452 					curp->secondary->sz,
453 					ln.buf, pos);
454 			curp->secondary->sz += pos;
455 			curp->secondary->buf
456 				[curp->secondary->sz] = '\n';
457 			curp->secondary->sz++;
458 			curp->secondary->buf
459 				[curp->secondary->sz] = '\0';
460 		}
461 rerun:
462 		rr = roff_parseln
463 			(curp->roff, curp->line,
464 			 &ln.buf, &ln.sz, of, &of);
465 
466 		switch (rr) {
467 		case (ROFF_REPARSE):
468 			if (REPARSE_LIMIT >= ++curp->reparse_count)
469 				mparse_buf_r(curp, ln, 0);
470 			else
471 				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
472 					curp->line, pos, NULL);
473 			pos = 0;
474 			continue;
475 		case (ROFF_APPEND):
476 			pos = (int)strlen(ln.buf);
477 			continue;
478 		case (ROFF_RERUN):
479 			goto rerun;
480 		case (ROFF_IGN):
481 			pos = 0;
482 			continue;
483 		case (ROFF_ERR):
484 			assert(MANDOCLEVEL_FATAL <= curp->file_status);
485 			break;
486 		case (ROFF_SO):
487 			if (0 == (MPARSE_SO & curp->options) &&
488 			    (i >= (int)blk.sz || '\0' == blk.buf[i])) {
489 				curp->sodest = mandoc_strdup(ln.buf + of);
490 				free(ln.buf);
491 				return;
492 			}
493 			/*
494 			 * We remove `so' clauses from our lookaside
495 			 * buffer because we're going to descend into
496 			 * the file recursively.
497 			 */
498 			if (curp->secondary)
499 				curp->secondary->sz -= pos + 1;
500 			mparse_readfd(curp, -1, ln.buf + of);
501 			if (MANDOCLEVEL_FATAL <= curp->file_status)
502 				break;
503 			pos = 0;
504 			continue;
505 		default:
506 			break;
507 		}
508 
509 		/*
510 		 * If we encounter errors in the recursive parse, make
511 		 * sure we don't continue parsing.
512 		 */
513 
514 		if (MANDOCLEVEL_FATAL <= curp->file_status)
515 			break;
516 
517 		/*
518 		 * If input parsers have not been allocated, do so now.
519 		 * We keep these instanced between parsers, but set them
520 		 * locally per parse routine since we can use different
521 		 * parsers with each one.
522 		 */
523 
524 		if ( ! (curp->man || curp->mdoc))
525 			pset(ln.buf + of, pos - of, curp);
526 
527 		/*
528 		 * Lastly, push down into the parsers themselves.  One
529 		 * of these will have already been set in the pset()
530 		 * routine.
531 		 * If libroff returns ROFF_TBL, then add it to the
532 		 * currently open parse.  Since we only get here if
533 		 * there does exist data (see tbl_data.c), we're
534 		 * guaranteed that something's been allocated.
535 		 * Do the same for ROFF_EQN.
536 		 */
537 
538 		rc = -1;
539 
540 		if (ROFF_TBL == rr)
541 			while (NULL != (span = roff_span(curp->roff))) {
542 				rc = curp->man ?
543 					man_addspan(curp->man, span) :
544 					mdoc_addspan(curp->mdoc, span);
545 				if (0 == rc)
546 					break;
547 			}
548 		else if (ROFF_EQN == rr)
549 			rc = curp->mdoc ?
550 				mdoc_addeqn(curp->mdoc,
551 					roff_eqn(curp->roff)) :
552 				man_addeqn(curp->man,
553 					roff_eqn(curp->roff));
554 		else if (curp->man || curp->mdoc)
555 			rc = curp->man ?
556 				man_parseln(curp->man,
557 					curp->line, ln.buf, of) :
558 				mdoc_parseln(curp->mdoc,
559 					curp->line, ln.buf, of);
560 
561 		if (0 == rc) {
562 			assert(MANDOCLEVEL_FATAL <= curp->file_status);
563 			break;
564 		} else if (2 == rc)
565 			break;
566 
567 		/* Temporary buffers typically are not full. */
568 
569 		if (0 == start && '\0' == blk.buf[i])
570 			break;
571 
572 		/* Start the next input line. */
573 
574 		pos = 0;
575 	}
576 
577 	free(ln.buf);
578 }
579 
580 static int
581 read_whole_file(struct mparse *curp, const char *file, int fd,
582 		struct buf *fb, int *with_mmap)
583 {
584 	struct stat	 st;
585 	size_t		 off;
586 	ssize_t		 ssz;
587 
588 	if (-1 == fstat(fd, &st)) {
589 		curp->file_status = MANDOCLEVEL_SYSERR;
590 		if (curp->mmsg)
591 			(*curp->mmsg)(MANDOCERR_SYSSTAT, curp->file_status,
592 			    file, 0, 0, strerror(errno));
593 		return(0);
594 	}
595 
596 	/*
597 	 * If we're a regular file, try just reading in the whole entry
598 	 * via mmap().  This is faster than reading it into blocks, and
599 	 * since each file is only a few bytes to begin with, I'm not
600 	 * concerned that this is going to tank any machines.
601 	 */
602 
603 	if (S_ISREG(st.st_mode)) {
604 		if (st.st_size >= (1U << 31)) {
605 			curp->file_status = MANDOCLEVEL_FATAL;
606 			if (curp->mmsg)
607 				(*curp->mmsg)(MANDOCERR_TOOLARGE,
608 				    curp->file_status, file, 0, 0, NULL);
609 			return(0);
610 		}
611 		*with_mmap = 1;
612 		fb->sz = (size_t)st.st_size;
613 		fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
614 		if (fb->buf != MAP_FAILED)
615 			return(1);
616 	}
617 
618 	/*
619 	 * If this isn't a regular file (like, say, stdin), then we must
620 	 * go the old way and just read things in bit by bit.
621 	 */
622 
623 	*with_mmap = 0;
624 	off = 0;
625 	fb->sz = 0;
626 	fb->buf = NULL;
627 	for (;;) {
628 		if (off == fb->sz) {
629 			if (fb->sz == (1U << 31)) {
630 				curp->file_status = MANDOCLEVEL_FATAL;
631 				if (curp->mmsg)
632 					(*curp->mmsg)(MANDOCERR_TOOLARGE,
633 					    curp->file_status,
634 					    file, 0, 0, NULL);
635 				break;
636 			}
637 			resize_buf(fb, 65536);
638 		}
639 		ssz = read(fd, fb->buf + (int)off, fb->sz - off);
640 		if (ssz == 0) {
641 			fb->sz = off;
642 			return(1);
643 		}
644 		if (ssz == -1) {
645 			curp->file_status = MANDOCLEVEL_SYSERR;
646 			if (curp->mmsg)
647 				(*curp->mmsg)(MANDOCERR_SYSREAD,
648 				    curp->file_status, file, 0, 0,
649 				    strerror(errno));
650 			break;
651 		}
652 		off += (size_t)ssz;
653 	}
654 
655 	free(fb->buf);
656 	fb->buf = NULL;
657 	return(0);
658 }
659 
660 static void
661 mparse_end(struct mparse *curp)
662 {
663 
664 	if (MANDOCLEVEL_FATAL <= curp->file_status)
665 		return;
666 
667 	if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
668 		assert(MANDOCLEVEL_FATAL <= curp->file_status);
669 		return;
670 	}
671 
672 	if (curp->man && ! man_endparse(curp->man)) {
673 		assert(MANDOCLEVEL_FATAL <= curp->file_status);
674 		return;
675 	}
676 
677 	if ( ! (curp->mdoc || curp->man || curp->sodest)) {
678 		mandoc_msg(MANDOCERR_NOTMANUAL, curp, 1, 0, NULL);
679 		curp->file_status = MANDOCLEVEL_FATAL;
680 		return;
681 	}
682 
683 	roff_endparse(curp->roff);
684 }
685 
686 static void
687 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
688 {
689 	const char	*svfile;
690 	static int	 recursion_depth;
691 
692 	if (64 < recursion_depth) {
693 		mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
694 		return;
695 	}
696 
697 	/* Line number is per-file. */
698 	svfile = curp->file;
699 	curp->file = file;
700 	curp->line = 1;
701 	recursion_depth++;
702 
703 	mparse_buf_r(curp, blk, 1);
704 
705 	if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
706 		mparse_end(curp);
707 
708 	curp->file = svfile;
709 }
710 
711 enum mandoclevel
712 mparse_readfd(struct mparse *curp, int fd, const char *file)
713 {
714 	struct buf	 blk;
715 	int		 with_mmap;
716 
717 	if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) {
718 		curp->file_status = MANDOCLEVEL_SYSERR;
719 		if (curp->mmsg)
720 			(*curp->mmsg)(MANDOCERR_SYSOPEN,
721 			    curp->file_status,
722 			    file, 0, 0, strerror(errno));
723 		goto out;
724 	}
725 
726 	/*
727 	 * Run for each opened file; may be called more than once for
728 	 * each full parse sequence if the opened file is nested (i.e.,
729 	 * from `so').  Simply sucks in the whole file and moves into
730 	 * the parse phase for the file.
731 	 */
732 
733 	if ( ! read_whole_file(curp, file, fd, &blk, &with_mmap))
734 		goto out;
735 
736 	mparse_parse_buffer(curp, blk, file);
737 
738 	if (with_mmap)
739 		munmap(blk.buf, blk.sz);
740 	else
741 		free(blk.buf);
742 
743 	if (STDIN_FILENO != fd && -1 == close(fd))
744 		perror(file);
745 out:
746 	return(curp->file_status);
747 }
748 
749 struct mparse *
750 mparse_alloc(int options, enum mandoclevel wlevel,
751 		mandocmsg mmsg, char *defos)
752 {
753 	struct mparse	*curp;
754 
755 	assert(wlevel <= MANDOCLEVEL_FATAL);
756 
757 	curp = mandoc_calloc(1, sizeof(struct mparse));
758 
759 	curp->options = options;
760 	curp->wlevel = wlevel;
761 	curp->mmsg = mmsg;
762 	curp->defos = defos;
763 
764 	curp->roff = roff_alloc(curp, options);
765 	return(curp);
766 }
767 
768 void
769 mparse_reset(struct mparse *curp)
770 {
771 
772 	roff_reset(curp->roff);
773 
774 	if (curp->mdoc)
775 		mdoc_reset(curp->mdoc);
776 	if (curp->man)
777 		man_reset(curp->man);
778 	if (curp->secondary)
779 		curp->secondary->sz = 0;
780 
781 	curp->file_status = MANDOCLEVEL_OK;
782 	curp->mdoc = NULL;
783 	curp->man = NULL;
784 
785 	free(curp->sodest);
786 	curp->sodest = NULL;
787 }
788 
789 void
790 mparse_free(struct mparse *curp)
791 {
792 
793 	if (curp->pmdoc)
794 		mdoc_free(curp->pmdoc);
795 	if (curp->pman)
796 		man_free(curp->pman);
797 	if (curp->roff)
798 		roff_free(curp->roff);
799 	if (curp->secondary)
800 		free(curp->secondary->buf);
801 
802 	free(curp->secondary);
803 	free(curp->sodest);
804 	free(curp);
805 }
806 
807 void
808 mparse_result(struct mparse *curp,
809 	struct mdoc **mdoc, struct man **man, char **sodest)
810 {
811 
812 	if (sodest && NULL != (*sodest = curp->sodest)) {
813 		*mdoc = NULL;
814 		*man = NULL;
815 		return;
816 	}
817 	if (mdoc)
818 		*mdoc = curp->mdoc;
819 	if (man)
820 		*man = curp->man;
821 }
822 
823 void
824 mandoc_vmsg(enum mandocerr t, struct mparse *m,
825 		int ln, int pos, const char *fmt, ...)
826 {
827 	char		 buf[256];
828 	va_list		 ap;
829 
830 	va_start(ap, fmt);
831 	vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
832 	va_end(ap);
833 
834 	mandoc_msg(t, m, ln, pos, buf);
835 }
836 
837 void
838 mandoc_msg(enum mandocerr er, struct mparse *m,
839 		int ln, int col, const char *msg)
840 {
841 	enum mandoclevel level;
842 
843 	level = MANDOCLEVEL_FATAL;
844 	while (er < mandoclimits[level])
845 		level--;
846 
847 	if (level < m->wlevel)
848 		return;
849 
850 	if (m->mmsg)
851 		(*m->mmsg)(er, level, m->file, ln, col, msg);
852 
853 	if (m->file_status < level)
854 		m->file_status = level;
855 }
856 
857 const char *
858 mparse_strerror(enum mandocerr er)
859 {
860 
861 	return(mandocerrs[er]);
862 }
863 
864 const char *
865 mparse_strlevel(enum mandoclevel lvl)
866 {
867 	return(mandoclevels[lvl]);
868 }
869 
870 void
871 mparse_keep(struct mparse *p)
872 {
873 
874 	assert(NULL == p->secondary);
875 	p->secondary = mandoc_calloc(1, sizeof(struct buf));
876 }
877 
878 const char *
879 mparse_getkeep(const struct mparse *p)
880 {
881 
882 	assert(p->secondary);
883 	return(p->secondary->sz ? p->secondary->buf : NULL);
884 }
885