xref: /minix3/external/bsd/mdocml/dist/read.c (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1*0a6a1f1dSLionel Sambuc /*	Id: read.c,v 1.40 2014/01/02 16:29:55 schwarze Exp  */
292395e9cSLionel Sambuc /*
392395e9cSLionel Sambuc  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4*0a6a1f1dSLionel Sambuc  * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
592395e9cSLionel Sambuc  *
692395e9cSLionel Sambuc  * Permission to use, copy, modify, and distribute this software for any
792395e9cSLionel Sambuc  * purpose with or without fee is hereby granted, provided that the above
892395e9cSLionel Sambuc  * copyright notice and this permission notice appear in all copies.
992395e9cSLionel Sambuc  *
1092395e9cSLionel Sambuc  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1192395e9cSLionel Sambuc  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1292395e9cSLionel Sambuc  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1392395e9cSLionel Sambuc  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1492395e9cSLionel Sambuc  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1592395e9cSLionel Sambuc  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1692395e9cSLionel Sambuc  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1792395e9cSLionel Sambuc  */
1892395e9cSLionel Sambuc #ifdef HAVE_CONFIG_H
1992395e9cSLionel Sambuc #include "config.h"
2092395e9cSLionel Sambuc #endif
2192395e9cSLionel Sambuc 
2292395e9cSLionel Sambuc #ifdef HAVE_MMAP
2392395e9cSLionel Sambuc # include <sys/stat.h>
2492395e9cSLionel Sambuc # include <sys/mman.h>
2592395e9cSLionel Sambuc #endif
2692395e9cSLionel Sambuc 
2792395e9cSLionel Sambuc #include <assert.h>
2892395e9cSLionel Sambuc #include <ctype.h>
29*0a6a1f1dSLionel Sambuc #include <errno.h>
3092395e9cSLionel Sambuc #include <fcntl.h>
3192395e9cSLionel Sambuc #include <stdarg.h>
3292395e9cSLionel Sambuc #include <stdint.h>
3392395e9cSLionel Sambuc #include <stdio.h>
3492395e9cSLionel Sambuc #include <stdlib.h>
3592395e9cSLionel Sambuc #include <string.h>
3692395e9cSLionel Sambuc #include <unistd.h>
3792395e9cSLionel Sambuc 
3892395e9cSLionel Sambuc #include "mandoc.h"
3992395e9cSLionel Sambuc #include "libmandoc.h"
4092395e9cSLionel Sambuc #include "mdoc.h"
4192395e9cSLionel Sambuc #include "man.h"
4292395e9cSLionel Sambuc #include "main.h"
4392395e9cSLionel Sambuc 
4492395e9cSLionel Sambuc #define	REPARSE_LIMIT	1000
4592395e9cSLionel Sambuc 
4692395e9cSLionel Sambuc struct	buf {
4792395e9cSLionel Sambuc 	char	 	 *buf; /* binary input buffer */
4892395e9cSLionel Sambuc 	size_t		  sz; /* size of binary buffer */
4992395e9cSLionel Sambuc };
5092395e9cSLionel Sambuc 
5192395e9cSLionel Sambuc struct	mparse {
5292395e9cSLionel Sambuc 	enum mandoclevel  file_status; /* status of current parse */
5392395e9cSLionel Sambuc 	enum mandoclevel  wlevel; /* ignore messages below this */
5492395e9cSLionel Sambuc 	int		  line; /* line number in the file */
5592395e9cSLionel Sambuc 	enum mparset	  inttype; /* which parser to use */
5692395e9cSLionel Sambuc 	struct man	 *pman; /* persistent man parser */
5792395e9cSLionel Sambuc 	struct mdoc	 *pmdoc; /* persistent mdoc parser */
5892395e9cSLionel Sambuc 	struct man	 *man; /* man parser */
5992395e9cSLionel Sambuc 	struct mdoc	 *mdoc; /* mdoc parser */
6092395e9cSLionel Sambuc 	struct roff	 *roff; /* roff parser (!NULL) */
6192395e9cSLionel Sambuc 	int		  reparse_count; /* finite interp. stack */
6292395e9cSLionel Sambuc 	mandocmsg	  mmsg; /* warning/error message handler */
6392395e9cSLionel Sambuc 	void		 *arg; /* argument to mmsg */
6492395e9cSLionel Sambuc 	const char	 *file;
6592395e9cSLionel Sambuc 	struct buf	 *secondary;
66*0a6a1f1dSLionel Sambuc 	char		 *defos; /* default operating system */
6792395e9cSLionel Sambuc };
6892395e9cSLionel Sambuc 
6992395e9cSLionel Sambuc static	void	  resize_buf(struct buf *, size_t);
7092395e9cSLionel Sambuc static	void	  mparse_buf_r(struct mparse *, struct buf, int);
7192395e9cSLionel Sambuc static	void	  pset(const char *, int, struct mparse *);
72*0a6a1f1dSLionel Sambuc static	int	  read_whole_file(struct mparse *, const char *, int,
73*0a6a1f1dSLionel Sambuc 				struct buf *, int *);
7492395e9cSLionel Sambuc static	void	  mparse_end(struct mparse *);
75*0a6a1f1dSLionel Sambuc static	void	  mparse_parse_buffer(struct mparse *, struct buf,
76*0a6a1f1dSLionel Sambuc 			const char *);
7792395e9cSLionel Sambuc 
7892395e9cSLionel Sambuc static	const enum mandocerr	mandoclimits[MANDOCLEVEL_MAX] = {
7992395e9cSLionel Sambuc 	MANDOCERR_OK,
8092395e9cSLionel Sambuc 	MANDOCERR_WARNING,
8192395e9cSLionel Sambuc 	MANDOCERR_WARNING,
8292395e9cSLionel Sambuc 	MANDOCERR_ERROR,
8392395e9cSLionel Sambuc 	MANDOCERR_FATAL,
8492395e9cSLionel Sambuc 	MANDOCERR_MAX,
8592395e9cSLionel Sambuc 	MANDOCERR_MAX
8692395e9cSLionel Sambuc };
8792395e9cSLionel Sambuc 
8892395e9cSLionel Sambuc static	const char * const	mandocerrs[MANDOCERR_MAX] = {
8992395e9cSLionel Sambuc 	"ok",
9092395e9cSLionel Sambuc 
9192395e9cSLionel Sambuc 	"generic warning",
9292395e9cSLionel Sambuc 
9392395e9cSLionel Sambuc 	/* related to the prologue */
9492395e9cSLionel Sambuc 	"no title in document",
9592395e9cSLionel Sambuc 	"document title should be all caps",
9692395e9cSLionel Sambuc 	"unknown manual section",
97*0a6a1f1dSLionel Sambuc 	"unknown manual volume or arch",
9892395e9cSLionel Sambuc 	"date missing, using today's date",
9992395e9cSLionel Sambuc 	"cannot parse date, using it verbatim",
10092395e9cSLionel Sambuc 	"prologue macros out of order",
10192395e9cSLionel Sambuc 	"duplicate prologue macro",
10292395e9cSLionel Sambuc 	"macro not allowed in prologue",
10392395e9cSLionel Sambuc 	"macro not allowed in body",
10492395e9cSLionel Sambuc 
10592395e9cSLionel Sambuc 	/* related to document structure */
10692395e9cSLionel Sambuc 	".so is fragile, better use ln(1)",
10792395e9cSLionel Sambuc 	"NAME section must come first",
10892395e9cSLionel Sambuc 	"bad NAME section contents",
10992395e9cSLionel Sambuc 	"sections out of conventional order",
11092395e9cSLionel Sambuc 	"duplicate section name",
111*0a6a1f1dSLionel Sambuc 	"section header suited to sections 2, 3, and 9 only",
11292395e9cSLionel Sambuc 
11392395e9cSLionel Sambuc 	/* related to macros and nesting */
11492395e9cSLionel Sambuc 	"skipping obsolete macro",
11592395e9cSLionel Sambuc 	"skipping paragraph macro",
116*0a6a1f1dSLionel Sambuc 	"moving paragraph macro out of list",
11792395e9cSLionel Sambuc 	"skipping no-space macro",
11892395e9cSLionel Sambuc 	"blocks badly nested",
11992395e9cSLionel Sambuc 	"child violates parent syntax",
12092395e9cSLionel Sambuc 	"nested displays are not portable",
12192395e9cSLionel Sambuc 	"already in literal mode",
12292395e9cSLionel Sambuc 	"line scope broken",
12392395e9cSLionel Sambuc 
12492395e9cSLionel Sambuc 	/* related to missing macro arguments */
12592395e9cSLionel Sambuc 	"skipping empty macro",
12692395e9cSLionel Sambuc 	"argument count wrong",
12792395e9cSLionel Sambuc 	"missing display type",
12892395e9cSLionel Sambuc 	"list type must come first",
12992395e9cSLionel Sambuc 	"tag lists require a width argument",
13092395e9cSLionel Sambuc 	"missing font type",
13192395e9cSLionel Sambuc 	"skipping end of block that is not open",
13292395e9cSLionel Sambuc 
13392395e9cSLionel Sambuc 	/* related to bad macro arguments */
13492395e9cSLionel Sambuc 	"skipping argument",
13592395e9cSLionel Sambuc 	"duplicate argument",
13692395e9cSLionel Sambuc 	"duplicate display type",
13792395e9cSLionel Sambuc 	"duplicate list type",
13892395e9cSLionel Sambuc 	"unknown AT&T UNIX version",
13992395e9cSLionel Sambuc 	"bad Boolean value",
14092395e9cSLionel Sambuc 	"unknown font",
14192395e9cSLionel Sambuc 	"unknown standard specifier",
14292395e9cSLionel Sambuc 	"bad width argument",
14392395e9cSLionel Sambuc 
14492395e9cSLionel Sambuc 	/* related to plain text */
14592395e9cSLionel Sambuc 	"blank line in non-literal context",
14692395e9cSLionel Sambuc 	"tab in non-literal context",
14792395e9cSLionel Sambuc 	"end of line whitespace",
14892395e9cSLionel Sambuc 	"bad comment style",
14992395e9cSLionel Sambuc 	"bad escape sequence",
15092395e9cSLionel Sambuc 	"unterminated quoted string",
15192395e9cSLionel Sambuc 
15292395e9cSLionel Sambuc 	/* related to equations */
15392395e9cSLionel Sambuc 	"unexpected literal in equation",
15492395e9cSLionel Sambuc 
15592395e9cSLionel Sambuc 	"generic error",
15692395e9cSLionel Sambuc 
15792395e9cSLionel Sambuc 	/* related to equations */
15892395e9cSLionel Sambuc 	"unexpected equation scope closure",
15992395e9cSLionel Sambuc 	"equation scope open on exit",
16092395e9cSLionel Sambuc 	"overlapping equation scopes",
16192395e9cSLionel Sambuc 	"unexpected end of equation",
16292395e9cSLionel Sambuc 	"equation syntax error",
16392395e9cSLionel Sambuc 
16492395e9cSLionel Sambuc 	/* related to tables */
16592395e9cSLionel Sambuc 	"bad table syntax",
16692395e9cSLionel Sambuc 	"bad table option",
16792395e9cSLionel Sambuc 	"bad table layout",
16892395e9cSLionel Sambuc 	"no table layout cells specified",
16992395e9cSLionel Sambuc 	"no table data cells specified",
17092395e9cSLionel Sambuc 	"ignore data in cell",
17192395e9cSLionel Sambuc 	"data block still open",
17292395e9cSLionel Sambuc 	"ignoring extra data cells",
17392395e9cSLionel Sambuc 
17492395e9cSLionel Sambuc 	"input stack limit exceeded, infinite loop?",
17592395e9cSLionel Sambuc 	"skipping bad character",
17692395e9cSLionel Sambuc 	"escaped character not allowed in a name",
177*0a6a1f1dSLionel Sambuc 	"manual name not yet set",
17892395e9cSLionel Sambuc 	"skipping text before the first section header",
17992395e9cSLionel Sambuc 	"skipping unknown macro",
18092395e9cSLionel Sambuc 	"NOT IMPLEMENTED, please use groff: skipping request",
18192395e9cSLionel Sambuc 	"argument count wrong",
182*0a6a1f1dSLionel Sambuc 	"skipping column outside column list",
18392395e9cSLionel Sambuc 	"skipping end of block that is not open",
18492395e9cSLionel Sambuc 	"missing end of block",
18592395e9cSLionel Sambuc 	"scope open on exit",
18692395e9cSLionel Sambuc 	"uname(3) system call failed",
18792395e9cSLionel Sambuc 	"macro requires line argument(s)",
18892395e9cSLionel Sambuc 	"macro requires body argument(s)",
18992395e9cSLionel Sambuc 	"macro requires argument(s)",
190*0a6a1f1dSLionel Sambuc 	"request requires a numeric argument",
19192395e9cSLionel Sambuc 	"missing list type",
19292395e9cSLionel Sambuc 	"line argument(s) will be lost",
19392395e9cSLionel Sambuc 	"body argument(s) will be lost",
19492395e9cSLionel Sambuc 
19592395e9cSLionel Sambuc 	"generic fatal error",
19692395e9cSLionel Sambuc 
197*0a6a1f1dSLionel Sambuc 	"input too large",
19892395e9cSLionel Sambuc 	"not a manual",
19992395e9cSLionel Sambuc 	"column syntax is inconsistent",
20092395e9cSLionel Sambuc 	"NOT IMPLEMENTED: .Bd -file",
20192395e9cSLionel Sambuc 	"argument count wrong, violates syntax",
20292395e9cSLionel Sambuc 	"child violates parent syntax",
20392395e9cSLionel Sambuc 	"argument count wrong, violates syntax",
20492395e9cSLionel Sambuc 	"NOT IMPLEMENTED: .so with absolute path or \"..\"",
20592395e9cSLionel Sambuc 	"no document body",
20692395e9cSLionel Sambuc 	"no document prologue",
20792395e9cSLionel Sambuc 	"static buffer exhausted",
208*0a6a1f1dSLionel Sambuc 
209*0a6a1f1dSLionel Sambuc 	/* system errors */
210*0a6a1f1dSLionel Sambuc 	"cannot open file",
211*0a6a1f1dSLionel Sambuc 	"cannot stat file",
212*0a6a1f1dSLionel Sambuc 	"cannot read file",
21392395e9cSLionel Sambuc };
21492395e9cSLionel Sambuc 
21592395e9cSLionel Sambuc static	const char * const	mandoclevels[MANDOCLEVEL_MAX] = {
21692395e9cSLionel Sambuc 	"SUCCESS",
21792395e9cSLionel Sambuc 	"RESERVED",
21892395e9cSLionel Sambuc 	"WARNING",
21992395e9cSLionel Sambuc 	"ERROR",
22092395e9cSLionel Sambuc 	"FATAL",
22192395e9cSLionel Sambuc 	"BADARG",
22292395e9cSLionel Sambuc 	"SYSERR"
22392395e9cSLionel Sambuc };
22492395e9cSLionel Sambuc 
22592395e9cSLionel Sambuc static void
resize_buf(struct buf * buf,size_t initial)22692395e9cSLionel Sambuc resize_buf(struct buf *buf, size_t initial)
22792395e9cSLionel Sambuc {
22892395e9cSLionel Sambuc 
22992395e9cSLionel Sambuc 	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
23092395e9cSLionel Sambuc 	buf->buf = mandoc_realloc(buf->buf, buf->sz);
23192395e9cSLionel Sambuc }
23292395e9cSLionel Sambuc 
23392395e9cSLionel Sambuc static void
pset(const char * buf,int pos,struct mparse * curp)23492395e9cSLionel Sambuc pset(const char *buf, int pos, struct mparse *curp)
23592395e9cSLionel Sambuc {
23692395e9cSLionel Sambuc 	int		 i;
23792395e9cSLionel Sambuc 
23892395e9cSLionel Sambuc 	/*
23992395e9cSLionel Sambuc 	 * Try to intuit which kind of manual parser should be used.  If
24092395e9cSLionel Sambuc 	 * passed in by command-line (-man, -mdoc), then use that
24192395e9cSLionel Sambuc 	 * explicitly.  If passed as -mandoc, then try to guess from the
24292395e9cSLionel Sambuc 	 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
24392395e9cSLionel Sambuc 	 * default to -man, which is more lenient.
24492395e9cSLionel Sambuc 	 *
24592395e9cSLionel Sambuc 	 * Separate out pmdoc/pman from mdoc/man: the first persists
24692395e9cSLionel Sambuc 	 * through all parsers, while the latter is used per-parse.
24792395e9cSLionel Sambuc 	 */
24892395e9cSLionel Sambuc 
24992395e9cSLionel Sambuc 	if ('.' == buf[0] || '\'' == buf[0]) {
25092395e9cSLionel Sambuc 		for (i = 1; buf[i]; i++)
25192395e9cSLionel Sambuc 			if (' ' != buf[i] && '\t' != buf[i])
25292395e9cSLionel Sambuc 				break;
25392395e9cSLionel Sambuc 		if ('\0' == buf[i])
25492395e9cSLionel Sambuc 			return;
25592395e9cSLionel Sambuc 	}
25692395e9cSLionel Sambuc 
25792395e9cSLionel Sambuc 	switch (curp->inttype) {
25892395e9cSLionel Sambuc 	case (MPARSE_MDOC):
25992395e9cSLionel Sambuc 		if (NULL == curp->pmdoc)
260*0a6a1f1dSLionel Sambuc 			curp->pmdoc = mdoc_alloc(curp->roff, curp,
261*0a6a1f1dSLionel Sambuc 					curp->defos);
26292395e9cSLionel Sambuc 		assert(curp->pmdoc);
26392395e9cSLionel Sambuc 		curp->mdoc = curp->pmdoc;
26492395e9cSLionel Sambuc 		return;
26592395e9cSLionel Sambuc 	case (MPARSE_MAN):
26692395e9cSLionel Sambuc 		if (NULL == curp->pman)
26792395e9cSLionel Sambuc 			curp->pman = man_alloc(curp->roff, curp);
26892395e9cSLionel Sambuc 		assert(curp->pman);
26992395e9cSLionel Sambuc 		curp->man = curp->pman;
27092395e9cSLionel Sambuc 		return;
27192395e9cSLionel Sambuc 	default:
27292395e9cSLionel Sambuc 		break;
27392395e9cSLionel Sambuc 	}
27492395e9cSLionel Sambuc 
27592395e9cSLionel Sambuc 	if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3))  {
27692395e9cSLionel Sambuc 		if (NULL == curp->pmdoc)
277*0a6a1f1dSLionel Sambuc 			curp->pmdoc = mdoc_alloc(curp->roff, curp,
278*0a6a1f1dSLionel Sambuc 					curp->defos);
27992395e9cSLionel Sambuc 		assert(curp->pmdoc);
28092395e9cSLionel Sambuc 		curp->mdoc = curp->pmdoc;
28192395e9cSLionel Sambuc 		return;
28292395e9cSLionel Sambuc 	}
28392395e9cSLionel Sambuc 
28492395e9cSLionel Sambuc 	if (NULL == curp->pman)
28592395e9cSLionel Sambuc 		curp->pman = man_alloc(curp->roff, curp);
28692395e9cSLionel Sambuc 	assert(curp->pman);
28792395e9cSLionel Sambuc 	curp->man = curp->pman;
28892395e9cSLionel Sambuc }
28992395e9cSLionel Sambuc 
29092395e9cSLionel Sambuc /*
29192395e9cSLionel Sambuc  * Main parse routine for an opened file.  This is called for each
29292395e9cSLionel Sambuc  * opened file and simply loops around the full input file, possibly
29392395e9cSLionel Sambuc  * nesting (i.e., with `so').
29492395e9cSLionel Sambuc  */
29592395e9cSLionel Sambuc static void
mparse_buf_r(struct mparse * curp,struct buf blk,int start)29692395e9cSLionel Sambuc mparse_buf_r(struct mparse *curp, struct buf blk, int start)
29792395e9cSLionel Sambuc {
29892395e9cSLionel Sambuc 	const struct tbl_span	*span;
29992395e9cSLionel Sambuc 	struct buf	 ln;
30092395e9cSLionel Sambuc 	enum rofferr	 rr;
30192395e9cSLionel Sambuc 	int		 i, of, rc;
30292395e9cSLionel Sambuc 	int		 pos; /* byte number in the ln buffer */
30392395e9cSLionel Sambuc 	int		 lnn; /* line number in the real file */
30492395e9cSLionel Sambuc 	unsigned char	 c;
30592395e9cSLionel Sambuc 
30692395e9cSLionel Sambuc 	memset(&ln, 0, sizeof(struct buf));
30792395e9cSLionel Sambuc 
30892395e9cSLionel Sambuc 	lnn = curp->line;
30992395e9cSLionel Sambuc 	pos = 0;
31092395e9cSLionel Sambuc 
31192395e9cSLionel Sambuc 	for (i = 0; i < (int)blk.sz; ) {
31292395e9cSLionel Sambuc 		if (0 == pos && '\0' == blk.buf[i])
31392395e9cSLionel Sambuc 			break;
31492395e9cSLionel Sambuc 
31592395e9cSLionel Sambuc 		if (start) {
31692395e9cSLionel Sambuc 			curp->line = lnn;
31792395e9cSLionel Sambuc 			curp->reparse_count = 0;
31892395e9cSLionel Sambuc 		}
31992395e9cSLionel Sambuc 
32092395e9cSLionel Sambuc 		while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
32192395e9cSLionel Sambuc 
32292395e9cSLionel Sambuc 			/*
32392395e9cSLionel Sambuc 			 * When finding an unescaped newline character,
32492395e9cSLionel Sambuc 			 * leave the character loop to process the line.
32592395e9cSLionel Sambuc 			 * Skip a preceding carriage return, if any.
32692395e9cSLionel Sambuc 			 */
32792395e9cSLionel Sambuc 
32892395e9cSLionel Sambuc 			if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
32992395e9cSLionel Sambuc 			    '\n' == blk.buf[i + 1])
33092395e9cSLionel Sambuc 				++i;
33192395e9cSLionel Sambuc 			if ('\n' == blk.buf[i]) {
33292395e9cSLionel Sambuc 				++i;
33392395e9cSLionel Sambuc 				++lnn;
33492395e9cSLionel Sambuc 				break;
33592395e9cSLionel Sambuc 			}
33692395e9cSLionel Sambuc 
33792395e9cSLionel Sambuc 			/*
338*0a6a1f1dSLionel Sambuc 			 * Make sure we have space for at least
339*0a6a1f1dSLionel Sambuc 			 * one backslash and one other character
340*0a6a1f1dSLionel Sambuc 			 * and the trailing NUL byte.
341*0a6a1f1dSLionel Sambuc 			 */
342*0a6a1f1dSLionel Sambuc 
343*0a6a1f1dSLionel Sambuc 			if (pos + 2 >= (int)ln.sz)
344*0a6a1f1dSLionel Sambuc 				resize_buf(&ln, 256);
345*0a6a1f1dSLionel Sambuc 
346*0a6a1f1dSLionel Sambuc 			/*
34792395e9cSLionel Sambuc 			 * Warn about bogus characters.  If you're using
34892395e9cSLionel Sambuc 			 * non-ASCII encoding, you're screwing your
34992395e9cSLionel Sambuc 			 * readers.  Since I'd rather this not happen,
35092395e9cSLionel Sambuc 			 * I'll be helpful and replace these characters
35192395e9cSLionel Sambuc 			 * with "?", so we don't display gibberish.
35292395e9cSLionel Sambuc 			 * Note to manual writers: use special characters.
35392395e9cSLionel Sambuc 			 */
35492395e9cSLionel Sambuc 
35592395e9cSLionel Sambuc 			c = (unsigned char) blk.buf[i];
35692395e9cSLionel Sambuc 
35792395e9cSLionel Sambuc 			if ( ! (isascii(c) &&
35892395e9cSLionel Sambuc 					(isgraph(c) || isblank(c)))) {
35992395e9cSLionel Sambuc 				mandoc_msg(MANDOCERR_BADCHAR, curp,
36092395e9cSLionel Sambuc 						curp->line, pos, NULL);
36192395e9cSLionel Sambuc 				i++;
36292395e9cSLionel Sambuc 				ln.buf[pos++] = '?';
36392395e9cSLionel Sambuc 				continue;
36492395e9cSLionel Sambuc 			}
36592395e9cSLionel Sambuc 
36684d9c625SLionel Sambuc 			/* Trailing backslash = a plain char. */
367*0a6a1f1dSLionel Sambuc 
36892395e9cSLionel Sambuc 			if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
36992395e9cSLionel Sambuc 				ln.buf[pos++] = blk.buf[i++];
37092395e9cSLionel Sambuc 				continue;
37192395e9cSLionel Sambuc 			}
372*0a6a1f1dSLionel Sambuc 
37392395e9cSLionel Sambuc 			/*
37492395e9cSLionel Sambuc 			 * Found escape and at least one other character.
37592395e9cSLionel Sambuc 			 * When it's a newline character, skip it.
37692395e9cSLionel Sambuc 			 * When there is a carriage return in between,
37792395e9cSLionel Sambuc 			 * skip that one as well.
37892395e9cSLionel Sambuc 			 */
37992395e9cSLionel Sambuc 
38092395e9cSLionel Sambuc 			if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
38192395e9cSLionel Sambuc 			    '\n' == blk.buf[i + 2])
38292395e9cSLionel Sambuc 				++i;
38392395e9cSLionel Sambuc 			if ('\n' == blk.buf[i + 1]) {
38492395e9cSLionel Sambuc 				i += 2;
38592395e9cSLionel Sambuc 				++lnn;
38692395e9cSLionel Sambuc 				continue;
38792395e9cSLionel Sambuc 			}
38892395e9cSLionel Sambuc 
38992395e9cSLionel Sambuc 			if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
39092395e9cSLionel Sambuc 				i += 2;
39192395e9cSLionel Sambuc 				/* Comment, skip to end of line */
39292395e9cSLionel Sambuc 				for (; i < (int)blk.sz; ++i) {
39392395e9cSLionel Sambuc 					if ('\n' == blk.buf[i]) {
39492395e9cSLionel Sambuc 						++i;
39592395e9cSLionel Sambuc 						++lnn;
39692395e9cSLionel Sambuc 						break;
39792395e9cSLionel Sambuc 					}
39892395e9cSLionel Sambuc 				}
39992395e9cSLionel Sambuc 
40092395e9cSLionel Sambuc 				/* Backout trailing whitespaces */
40192395e9cSLionel Sambuc 				for (; pos > 0; --pos) {
40292395e9cSLionel Sambuc 					if (ln.buf[pos - 1] != ' ')
40392395e9cSLionel Sambuc 						break;
40492395e9cSLionel Sambuc 					if (pos > 2 && ln.buf[pos - 2] == '\\')
40592395e9cSLionel Sambuc 						break;
40692395e9cSLionel Sambuc 				}
40792395e9cSLionel Sambuc 				break;
40892395e9cSLionel Sambuc 			}
40992395e9cSLionel Sambuc 
410*0a6a1f1dSLionel Sambuc 			/* Catch escaped bogus characters. */
41192395e9cSLionel Sambuc 
412*0a6a1f1dSLionel Sambuc 			c = (unsigned char) blk.buf[i+1];
413*0a6a1f1dSLionel Sambuc 
414*0a6a1f1dSLionel Sambuc 			if ( ! (isascii(c) &&
415*0a6a1f1dSLionel Sambuc 					(isgraph(c) || isblank(c)))) {
416*0a6a1f1dSLionel Sambuc 				mandoc_msg(MANDOCERR_BADCHAR, curp,
417*0a6a1f1dSLionel Sambuc 						curp->line, pos, NULL);
418*0a6a1f1dSLionel Sambuc 				i += 2;
419*0a6a1f1dSLionel Sambuc 				ln.buf[pos++] = '?';
420*0a6a1f1dSLionel Sambuc 				continue;
421*0a6a1f1dSLionel Sambuc 			}
422*0a6a1f1dSLionel Sambuc 
423*0a6a1f1dSLionel Sambuc 			/* Some other escape sequence, copy & cont. */
42492395e9cSLionel Sambuc 
42592395e9cSLionel Sambuc 			ln.buf[pos++] = blk.buf[i++];
42692395e9cSLionel Sambuc 			ln.buf[pos++] = blk.buf[i++];
42792395e9cSLionel Sambuc 		}
42892395e9cSLionel Sambuc 
42992395e9cSLionel Sambuc  		if (pos >= (int)ln.sz)
43092395e9cSLionel Sambuc 			resize_buf(&ln, 256);
43192395e9cSLionel Sambuc 
43292395e9cSLionel Sambuc 		ln.buf[pos] = '\0';
43392395e9cSLionel Sambuc 
43492395e9cSLionel Sambuc 		/*
43592395e9cSLionel Sambuc 		 * A significant amount of complexity is contained by
43692395e9cSLionel Sambuc 		 * the roff preprocessor.  It's line-oriented but can be
43792395e9cSLionel Sambuc 		 * expressed on one line, so we need at times to
43892395e9cSLionel Sambuc 		 * readjust our starting point and re-run it.  The roff
43992395e9cSLionel Sambuc 		 * preprocessor can also readjust the buffers with new
44092395e9cSLionel Sambuc 		 * data, so we pass them in wholesale.
44192395e9cSLionel Sambuc 		 */
44292395e9cSLionel Sambuc 
44392395e9cSLionel Sambuc 		of = 0;
44492395e9cSLionel Sambuc 
44592395e9cSLionel Sambuc 		/*
44692395e9cSLionel Sambuc 		 * Maintain a lookaside buffer of all parsed lines.  We
44792395e9cSLionel Sambuc 		 * only do this if mparse_keep() has been invoked (the
44892395e9cSLionel Sambuc 		 * buffer may be accessed with mparse_getkeep()).
44992395e9cSLionel Sambuc 		 */
45092395e9cSLionel Sambuc 
45192395e9cSLionel Sambuc 		if (curp->secondary) {
45292395e9cSLionel Sambuc 			curp->secondary->buf =
45392395e9cSLionel Sambuc 				mandoc_realloc
45492395e9cSLionel Sambuc 				(curp->secondary->buf,
45592395e9cSLionel Sambuc 				 curp->secondary->sz + pos + 2);
45692395e9cSLionel Sambuc 			memcpy(curp->secondary->buf +
45792395e9cSLionel Sambuc 					curp->secondary->sz,
45892395e9cSLionel Sambuc 					ln.buf, pos);
45992395e9cSLionel Sambuc 			curp->secondary->sz += pos;
46092395e9cSLionel Sambuc 			curp->secondary->buf
46192395e9cSLionel Sambuc 				[curp->secondary->sz] = '\n';
46292395e9cSLionel Sambuc 			curp->secondary->sz++;
46392395e9cSLionel Sambuc 			curp->secondary->buf
46492395e9cSLionel Sambuc 				[curp->secondary->sz] = '\0';
46592395e9cSLionel Sambuc 		}
46692395e9cSLionel Sambuc rerun:
46792395e9cSLionel Sambuc 		rr = roff_parseln
46892395e9cSLionel Sambuc 			(curp->roff, curp->line,
46992395e9cSLionel Sambuc 			 &ln.buf, &ln.sz, of, &of);
47092395e9cSLionel Sambuc 
47192395e9cSLionel Sambuc 		switch (rr) {
47292395e9cSLionel Sambuc 		case (ROFF_REPARSE):
47392395e9cSLionel Sambuc 			if (REPARSE_LIMIT >= ++curp->reparse_count)
47492395e9cSLionel Sambuc 				mparse_buf_r(curp, ln, 0);
47592395e9cSLionel Sambuc 			else
47692395e9cSLionel Sambuc 				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
47792395e9cSLionel Sambuc 					curp->line, pos, NULL);
47892395e9cSLionel Sambuc 			pos = 0;
47992395e9cSLionel Sambuc 			continue;
48092395e9cSLionel Sambuc 		case (ROFF_APPEND):
48192395e9cSLionel Sambuc 			pos = (int)strlen(ln.buf);
48292395e9cSLionel Sambuc 			continue;
48392395e9cSLionel Sambuc 		case (ROFF_RERUN):
48492395e9cSLionel Sambuc 			goto rerun;
48592395e9cSLionel Sambuc 		case (ROFF_IGN):
48692395e9cSLionel Sambuc 			pos = 0;
48792395e9cSLionel Sambuc 			continue;
48892395e9cSLionel Sambuc 		case (ROFF_ERR):
48992395e9cSLionel Sambuc 			assert(MANDOCLEVEL_FATAL <= curp->file_status);
49092395e9cSLionel Sambuc 			break;
49192395e9cSLionel Sambuc 		case (ROFF_SO):
49292395e9cSLionel Sambuc 			/*
49392395e9cSLionel Sambuc 			 * We remove `so' clauses from our lookaside
49492395e9cSLionel Sambuc 			 * buffer because we're going to descend into
49592395e9cSLionel Sambuc 			 * the file recursively.
49692395e9cSLionel Sambuc 			 */
49792395e9cSLionel Sambuc 			if (curp->secondary)
49892395e9cSLionel Sambuc 				curp->secondary->sz -= pos + 1;
499*0a6a1f1dSLionel Sambuc 			mparse_readfd(curp, -1, ln.buf + of);
50092395e9cSLionel Sambuc 			if (MANDOCLEVEL_FATAL <= curp->file_status)
50192395e9cSLionel Sambuc 				break;
50292395e9cSLionel Sambuc 			pos = 0;
50392395e9cSLionel Sambuc 			continue;
50492395e9cSLionel Sambuc 		default:
50592395e9cSLionel Sambuc 			break;
50692395e9cSLionel Sambuc 		}
50792395e9cSLionel Sambuc 
50892395e9cSLionel Sambuc 		/*
50992395e9cSLionel Sambuc 		 * If we encounter errors in the recursive parse, make
51092395e9cSLionel Sambuc 		 * sure we don't continue parsing.
51192395e9cSLionel Sambuc 		 */
51292395e9cSLionel Sambuc 
51392395e9cSLionel Sambuc 		if (MANDOCLEVEL_FATAL <= curp->file_status)
51492395e9cSLionel Sambuc 			break;
51592395e9cSLionel Sambuc 
51692395e9cSLionel Sambuc 		/*
51792395e9cSLionel Sambuc 		 * If input parsers have not been allocated, do so now.
51892395e9cSLionel Sambuc 		 * We keep these instanced between parsers, but set them
51992395e9cSLionel Sambuc 		 * locally per parse routine since we can use different
52092395e9cSLionel Sambuc 		 * parsers with each one.
52192395e9cSLionel Sambuc 		 */
52292395e9cSLionel Sambuc 
52392395e9cSLionel Sambuc 		if ( ! (curp->man || curp->mdoc))
52492395e9cSLionel Sambuc 			pset(ln.buf + of, pos - of, curp);
52592395e9cSLionel Sambuc 
52692395e9cSLionel Sambuc 		/*
52792395e9cSLionel Sambuc 		 * Lastly, push down into the parsers themselves.  One
52892395e9cSLionel Sambuc 		 * of these will have already been set in the pset()
52992395e9cSLionel Sambuc 		 * routine.
53092395e9cSLionel Sambuc 		 * If libroff returns ROFF_TBL, then add it to the
53192395e9cSLionel Sambuc 		 * currently open parse.  Since we only get here if
53292395e9cSLionel Sambuc 		 * there does exist data (see tbl_data.c), we're
53392395e9cSLionel Sambuc 		 * guaranteed that something's been allocated.
53492395e9cSLionel Sambuc 		 * Do the same for ROFF_EQN.
53592395e9cSLionel Sambuc 		 */
53692395e9cSLionel Sambuc 
53792395e9cSLionel Sambuc 		rc = -1;
53892395e9cSLionel Sambuc 
53992395e9cSLionel Sambuc 		if (ROFF_TBL == rr)
54092395e9cSLionel Sambuc 			while (NULL != (span = roff_span(curp->roff))) {
54192395e9cSLionel Sambuc 				rc = curp->man ?
54292395e9cSLionel Sambuc 					man_addspan(curp->man, span) :
54392395e9cSLionel Sambuc 					mdoc_addspan(curp->mdoc, span);
54492395e9cSLionel Sambuc 				if (0 == rc)
54592395e9cSLionel Sambuc 					break;
54692395e9cSLionel Sambuc 			}
54792395e9cSLionel Sambuc 		else if (ROFF_EQN == rr)
54892395e9cSLionel Sambuc 			rc = curp->mdoc ?
54992395e9cSLionel Sambuc 				mdoc_addeqn(curp->mdoc,
55092395e9cSLionel Sambuc 					roff_eqn(curp->roff)) :
55192395e9cSLionel Sambuc 				man_addeqn(curp->man,
55292395e9cSLionel Sambuc 					roff_eqn(curp->roff));
55392395e9cSLionel Sambuc 		else if (curp->man || curp->mdoc)
55492395e9cSLionel Sambuc 			rc = curp->man ?
55592395e9cSLionel Sambuc 				man_parseln(curp->man,
55692395e9cSLionel Sambuc 					curp->line, ln.buf, of) :
55792395e9cSLionel Sambuc 				mdoc_parseln(curp->mdoc,
55892395e9cSLionel Sambuc 					curp->line, ln.buf, of);
55992395e9cSLionel Sambuc 
56092395e9cSLionel Sambuc 		if (0 == rc) {
56192395e9cSLionel Sambuc 			assert(MANDOCLEVEL_FATAL <= curp->file_status);
56292395e9cSLionel Sambuc 			break;
56392395e9cSLionel Sambuc 		}
56492395e9cSLionel Sambuc 
56592395e9cSLionel Sambuc 		/* Temporary buffers typically are not full. */
56692395e9cSLionel Sambuc 
56792395e9cSLionel Sambuc 		if (0 == start && '\0' == blk.buf[i])
56892395e9cSLionel Sambuc 			break;
56992395e9cSLionel Sambuc 
57092395e9cSLionel Sambuc 		/* Start the next input line. */
57192395e9cSLionel Sambuc 
57292395e9cSLionel Sambuc 		pos = 0;
57392395e9cSLionel Sambuc 	}
57492395e9cSLionel Sambuc 
57592395e9cSLionel Sambuc 	free(ln.buf);
57692395e9cSLionel Sambuc }
57792395e9cSLionel Sambuc 
57892395e9cSLionel Sambuc static int
read_whole_file(struct mparse * curp,const char * file,int fd,struct buf * fb,int * with_mmap)579*0a6a1f1dSLionel Sambuc read_whole_file(struct mparse *curp, const char *file, int fd,
580*0a6a1f1dSLionel Sambuc 		struct buf *fb, int *with_mmap)
58192395e9cSLionel Sambuc {
58292395e9cSLionel Sambuc 	size_t		 off;
58392395e9cSLionel Sambuc 	ssize_t		 ssz;
58492395e9cSLionel Sambuc 
58592395e9cSLionel Sambuc #ifdef	HAVE_MMAP
58692395e9cSLionel Sambuc 	struct stat	 st;
58792395e9cSLionel Sambuc 	if (-1 == fstat(fd, &st)) {
588*0a6a1f1dSLionel Sambuc 		curp->file_status = MANDOCLEVEL_SYSERR;
589*0a6a1f1dSLionel Sambuc 		if (curp->mmsg)
590*0a6a1f1dSLionel Sambuc 			(*curp->mmsg)(MANDOCERR_SYSSTAT, curp->file_status,
591*0a6a1f1dSLionel Sambuc 			    file, 0, 0, strerror(errno));
59292395e9cSLionel Sambuc 		return(0);
59392395e9cSLionel Sambuc 	}
59492395e9cSLionel Sambuc 
59592395e9cSLionel Sambuc 	/*
59692395e9cSLionel Sambuc 	 * If we're a regular file, try just reading in the whole entry
59792395e9cSLionel Sambuc 	 * via mmap().  This is faster than reading it into blocks, and
59892395e9cSLionel Sambuc 	 * since each file is only a few bytes to begin with, I'm not
59992395e9cSLionel Sambuc 	 * concerned that this is going to tank any machines.
60092395e9cSLionel Sambuc 	 */
60192395e9cSLionel Sambuc 
60292395e9cSLionel Sambuc 	if (S_ISREG(st.st_mode)) {
60392395e9cSLionel Sambuc 		if (st.st_size >= (1U << 31)) {
604*0a6a1f1dSLionel Sambuc 			curp->file_status = MANDOCLEVEL_FATAL;
605*0a6a1f1dSLionel Sambuc 			if (curp->mmsg)
606*0a6a1f1dSLionel Sambuc 				(*curp->mmsg)(MANDOCERR_TOOLARGE,
607*0a6a1f1dSLionel Sambuc 				    curp->file_status, file, 0, 0, NULL);
60892395e9cSLionel Sambuc 			return(0);
60992395e9cSLionel Sambuc 		}
61092395e9cSLionel Sambuc 		*with_mmap = 1;
61192395e9cSLionel Sambuc 		fb->sz = (size_t)st.st_size;
612*0a6a1f1dSLionel Sambuc 		fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
61392395e9cSLionel Sambuc 		if (fb->buf != MAP_FAILED)
61492395e9cSLionel Sambuc 			return(1);
61592395e9cSLionel Sambuc 	}
61692395e9cSLionel Sambuc #endif
61792395e9cSLionel Sambuc 
61892395e9cSLionel Sambuc 	/*
61992395e9cSLionel Sambuc 	 * If this isn't a regular file (like, say, stdin), then we must
62092395e9cSLionel Sambuc 	 * go the old way and just read things in bit by bit.
62192395e9cSLionel Sambuc 	 */
62292395e9cSLionel Sambuc 
62392395e9cSLionel Sambuc 	*with_mmap = 0;
62492395e9cSLionel Sambuc 	off = 0;
62592395e9cSLionel Sambuc 	fb->sz = 0;
62692395e9cSLionel Sambuc 	fb->buf = NULL;
62792395e9cSLionel Sambuc 	for (;;) {
62892395e9cSLionel Sambuc 		if (off == fb->sz) {
62992395e9cSLionel Sambuc 			if (fb->sz == (1U << 31)) {
630*0a6a1f1dSLionel Sambuc 				curp->file_status = MANDOCLEVEL_FATAL;
631*0a6a1f1dSLionel Sambuc 				if (curp->mmsg)
632*0a6a1f1dSLionel Sambuc 					(*curp->mmsg)(MANDOCERR_TOOLARGE,
633*0a6a1f1dSLionel Sambuc 					    curp->file_status,
634*0a6a1f1dSLionel Sambuc 					    file, 0, 0, NULL);
63592395e9cSLionel Sambuc 				break;
63692395e9cSLionel Sambuc 			}
63792395e9cSLionel Sambuc 			resize_buf(fb, 65536);
63892395e9cSLionel Sambuc 		}
63992395e9cSLionel Sambuc 		ssz = read(fd, fb->buf + (int)off, fb->sz - off);
64092395e9cSLionel Sambuc 		if (ssz == 0) {
64192395e9cSLionel Sambuc 			fb->sz = off;
64292395e9cSLionel Sambuc 			return(1);
64392395e9cSLionel Sambuc 		}
64492395e9cSLionel Sambuc 		if (ssz == -1) {
645*0a6a1f1dSLionel Sambuc 			curp->file_status = MANDOCLEVEL_SYSERR;
646*0a6a1f1dSLionel Sambuc 			if (curp->mmsg)
647*0a6a1f1dSLionel Sambuc 				(*curp->mmsg)(MANDOCERR_SYSREAD,
648*0a6a1f1dSLionel Sambuc 				    curp->file_status, file, 0, 0,
649*0a6a1f1dSLionel Sambuc 				    strerror(errno));
65092395e9cSLionel Sambuc 			break;
65192395e9cSLionel Sambuc 		}
65292395e9cSLionel Sambuc 		off += (size_t)ssz;
65392395e9cSLionel Sambuc 	}
65492395e9cSLionel Sambuc 
65592395e9cSLionel Sambuc 	free(fb->buf);
65692395e9cSLionel Sambuc 	fb->buf = NULL;
65792395e9cSLionel Sambuc 	return(0);
65892395e9cSLionel Sambuc }
65992395e9cSLionel Sambuc 
66092395e9cSLionel Sambuc static void
mparse_end(struct mparse * curp)66192395e9cSLionel Sambuc mparse_end(struct mparse *curp)
66292395e9cSLionel Sambuc {
66392395e9cSLionel Sambuc 
66492395e9cSLionel Sambuc 	if (MANDOCLEVEL_FATAL <= curp->file_status)
66592395e9cSLionel Sambuc 		return;
66692395e9cSLionel Sambuc 
66792395e9cSLionel Sambuc 	if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
66892395e9cSLionel Sambuc 		assert(MANDOCLEVEL_FATAL <= curp->file_status);
66992395e9cSLionel Sambuc 		return;
67092395e9cSLionel Sambuc 	}
67192395e9cSLionel Sambuc 
67292395e9cSLionel Sambuc 	if (curp->man && ! man_endparse(curp->man)) {
67392395e9cSLionel Sambuc 		assert(MANDOCLEVEL_FATAL <= curp->file_status);
67492395e9cSLionel Sambuc 		return;
67592395e9cSLionel Sambuc 	}
67692395e9cSLionel Sambuc 
67792395e9cSLionel Sambuc 	if ( ! (curp->man || curp->mdoc)) {
67892395e9cSLionel Sambuc 		mandoc_msg(MANDOCERR_NOTMANUAL, curp, 1, 0, NULL);
67992395e9cSLionel Sambuc 		curp->file_status = MANDOCLEVEL_FATAL;
68092395e9cSLionel Sambuc 		return;
68192395e9cSLionel Sambuc 	}
68292395e9cSLionel Sambuc 
68392395e9cSLionel Sambuc 	roff_endparse(curp->roff);
68492395e9cSLionel Sambuc }
68592395e9cSLionel Sambuc 
68692395e9cSLionel Sambuc static void
mparse_parse_buffer(struct mparse * curp,struct buf blk,const char * file)687*0a6a1f1dSLionel Sambuc mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
68892395e9cSLionel Sambuc {
68992395e9cSLionel Sambuc 	const char	*svfile;
690*0a6a1f1dSLionel Sambuc 	static int	 recursion_depth;
691*0a6a1f1dSLionel Sambuc 
692*0a6a1f1dSLionel Sambuc 	if (64 < recursion_depth) {
693*0a6a1f1dSLionel Sambuc 		mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
694*0a6a1f1dSLionel Sambuc 		return;
695*0a6a1f1dSLionel Sambuc 	}
69692395e9cSLionel Sambuc 
69792395e9cSLionel Sambuc 	/* Line number is per-file. */
69892395e9cSLionel Sambuc 	svfile = curp->file;
69992395e9cSLionel Sambuc 	curp->file = file;
70092395e9cSLionel Sambuc 	curp->line = 1;
701*0a6a1f1dSLionel Sambuc 	recursion_depth++;
70292395e9cSLionel Sambuc 
70392395e9cSLionel Sambuc 	mparse_buf_r(curp, blk, 1);
70492395e9cSLionel Sambuc 
705*0a6a1f1dSLionel Sambuc 	if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
70692395e9cSLionel Sambuc 		mparse_end(curp);
70792395e9cSLionel Sambuc 
70892395e9cSLionel Sambuc 	curp->file = svfile;
70992395e9cSLionel Sambuc }
71092395e9cSLionel Sambuc 
71192395e9cSLionel Sambuc enum mandoclevel
mparse_readmem(struct mparse * curp,const void * buf,size_t len,const char * file)71292395e9cSLionel Sambuc mparse_readmem(struct mparse *curp, const void *buf, size_t len,
71392395e9cSLionel Sambuc 		const char *file)
71492395e9cSLionel Sambuc {
71592395e9cSLionel Sambuc 	struct buf blk;
71692395e9cSLionel Sambuc 
71792395e9cSLionel Sambuc 	blk.buf = UNCONST(buf);
71892395e9cSLionel Sambuc 	blk.sz = len;
71992395e9cSLionel Sambuc 
720*0a6a1f1dSLionel Sambuc 	mparse_parse_buffer(curp, blk, file);
72192395e9cSLionel Sambuc 	return(curp->file_status);
72292395e9cSLionel Sambuc }
72392395e9cSLionel Sambuc 
724*0a6a1f1dSLionel Sambuc enum mandoclevel
mparse_readfd(struct mparse * curp,int fd,const char * file)725*0a6a1f1dSLionel Sambuc mparse_readfd(struct mparse *curp, int fd, const char *file)
72692395e9cSLionel Sambuc {
72792395e9cSLionel Sambuc 	struct buf	 blk;
72892395e9cSLionel Sambuc 	int		 with_mmap;
72992395e9cSLionel Sambuc 
730*0a6a1f1dSLionel Sambuc 	if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) {
73192395e9cSLionel Sambuc 		curp->file_status = MANDOCLEVEL_SYSERR;
732*0a6a1f1dSLionel Sambuc 		if (curp->mmsg)
733*0a6a1f1dSLionel Sambuc 			(*curp->mmsg)(MANDOCERR_SYSOPEN,
734*0a6a1f1dSLionel Sambuc 			    curp->file_status,
735*0a6a1f1dSLionel Sambuc 			    file, 0, 0, strerror(errno));
736*0a6a1f1dSLionel Sambuc 		goto out;
73792395e9cSLionel Sambuc 	}
738*0a6a1f1dSLionel Sambuc 
73992395e9cSLionel Sambuc 	/*
74092395e9cSLionel Sambuc 	 * Run for each opened file; may be called more than once for
74192395e9cSLionel Sambuc 	 * each full parse sequence if the opened file is nested (i.e.,
74292395e9cSLionel Sambuc 	 * from `so').  Simply sucks in the whole file and moves into
74392395e9cSLionel Sambuc 	 * the parse phase for the file.
74492395e9cSLionel Sambuc 	 */
74592395e9cSLionel Sambuc 
746*0a6a1f1dSLionel Sambuc 	if ( ! read_whole_file(curp, file, fd, &blk, &with_mmap))
747*0a6a1f1dSLionel Sambuc 		goto out;
74892395e9cSLionel Sambuc 
749*0a6a1f1dSLionel Sambuc 	mparse_parse_buffer(curp, blk, file);
75092395e9cSLionel Sambuc 
75192395e9cSLionel Sambuc #ifdef	HAVE_MMAP
75292395e9cSLionel Sambuc 	if (with_mmap)
75392395e9cSLionel Sambuc 		munmap(blk.buf, blk.sz);
75492395e9cSLionel Sambuc 	else
75592395e9cSLionel Sambuc #endif
75692395e9cSLionel Sambuc 		free(blk.buf);
75792395e9cSLionel Sambuc 
75892395e9cSLionel Sambuc 	if (STDIN_FILENO != fd && -1 == close(fd))
75992395e9cSLionel Sambuc 		perror(file);
760*0a6a1f1dSLionel Sambuc out:
76192395e9cSLionel Sambuc 	return(curp->file_status);
76292395e9cSLionel Sambuc }
76392395e9cSLionel Sambuc 
76492395e9cSLionel Sambuc struct mparse *
mparse_alloc(enum mparset inttype,enum mandoclevel wlevel,mandocmsg mmsg,void * arg,char * defos)765*0a6a1f1dSLionel Sambuc mparse_alloc(enum mparset inttype, enum mandoclevel wlevel,
766*0a6a1f1dSLionel Sambuc 		mandocmsg mmsg, void *arg, char *defos)
76792395e9cSLionel Sambuc {
76892395e9cSLionel Sambuc 	struct mparse	*curp;
76992395e9cSLionel Sambuc 
77092395e9cSLionel Sambuc 	assert(wlevel <= MANDOCLEVEL_FATAL);
77192395e9cSLionel Sambuc 
77292395e9cSLionel Sambuc 	curp = mandoc_calloc(1, sizeof(struct mparse));
77392395e9cSLionel Sambuc 
77492395e9cSLionel Sambuc 	curp->wlevel = wlevel;
77592395e9cSLionel Sambuc 	curp->mmsg = mmsg;
77692395e9cSLionel Sambuc 	curp->arg = arg;
77792395e9cSLionel Sambuc 	curp->inttype = inttype;
778*0a6a1f1dSLionel Sambuc 	curp->defos = defos;
77992395e9cSLionel Sambuc 
780*0a6a1f1dSLionel Sambuc 	curp->roff = roff_alloc(inttype, curp);
78192395e9cSLionel Sambuc 	return(curp);
78292395e9cSLionel Sambuc }
78392395e9cSLionel Sambuc 
78492395e9cSLionel Sambuc void
mparse_reset(struct mparse * curp)78592395e9cSLionel Sambuc mparse_reset(struct mparse *curp)
78692395e9cSLionel Sambuc {
78792395e9cSLionel Sambuc 
78892395e9cSLionel Sambuc 	roff_reset(curp->roff);
78992395e9cSLionel Sambuc 
79092395e9cSLionel Sambuc 	if (curp->mdoc)
79192395e9cSLionel Sambuc 		mdoc_reset(curp->mdoc);
79292395e9cSLionel Sambuc 	if (curp->man)
79392395e9cSLionel Sambuc 		man_reset(curp->man);
79492395e9cSLionel Sambuc 	if (curp->secondary)
79592395e9cSLionel Sambuc 		curp->secondary->sz = 0;
79692395e9cSLionel Sambuc 
79792395e9cSLionel Sambuc 	curp->file_status = MANDOCLEVEL_OK;
79892395e9cSLionel Sambuc 	curp->mdoc = NULL;
79992395e9cSLionel Sambuc 	curp->man = NULL;
80092395e9cSLionel Sambuc }
80192395e9cSLionel Sambuc 
80292395e9cSLionel Sambuc void
mparse_free(struct mparse * curp)80392395e9cSLionel Sambuc mparse_free(struct mparse *curp)
80492395e9cSLionel Sambuc {
80592395e9cSLionel Sambuc 
80692395e9cSLionel Sambuc 	if (curp->pmdoc)
80792395e9cSLionel Sambuc 		mdoc_free(curp->pmdoc);
80892395e9cSLionel Sambuc 	if (curp->pman)
80992395e9cSLionel Sambuc 		man_free(curp->pman);
81092395e9cSLionel Sambuc 	if (curp->roff)
81192395e9cSLionel Sambuc 		roff_free(curp->roff);
81292395e9cSLionel Sambuc 	if (curp->secondary)
81392395e9cSLionel Sambuc 		free(curp->secondary->buf);
81492395e9cSLionel Sambuc 
81592395e9cSLionel Sambuc 	free(curp->secondary);
81692395e9cSLionel Sambuc 	free(curp);
81792395e9cSLionel Sambuc }
81892395e9cSLionel Sambuc 
81992395e9cSLionel Sambuc void
mparse_result(struct mparse * curp,struct mdoc ** mdoc,struct man ** man)82092395e9cSLionel Sambuc mparse_result(struct mparse *curp, struct mdoc **mdoc, struct man **man)
82192395e9cSLionel Sambuc {
82292395e9cSLionel Sambuc 
82392395e9cSLionel Sambuc 	if (mdoc)
82492395e9cSLionel Sambuc 		*mdoc = curp->mdoc;
82592395e9cSLionel Sambuc 	if (man)
82692395e9cSLionel Sambuc 		*man = curp->man;
82792395e9cSLionel Sambuc }
82892395e9cSLionel Sambuc 
82992395e9cSLionel Sambuc void
mandoc_vmsg(enum mandocerr t,struct mparse * m,int ln,int pos,const char * fmt,...)83092395e9cSLionel Sambuc mandoc_vmsg(enum mandocerr t, struct mparse *m,
83192395e9cSLionel Sambuc 		int ln, int pos, const char *fmt, ...)
83292395e9cSLionel Sambuc {
83392395e9cSLionel Sambuc 	char		 buf[256];
83492395e9cSLionel Sambuc 	va_list		 ap;
83592395e9cSLionel Sambuc 
83692395e9cSLionel Sambuc 	va_start(ap, fmt);
83792395e9cSLionel Sambuc 	vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
83892395e9cSLionel Sambuc 	va_end(ap);
83992395e9cSLionel Sambuc 
84092395e9cSLionel Sambuc 	mandoc_msg(t, m, ln, pos, buf);
84192395e9cSLionel Sambuc }
84292395e9cSLionel Sambuc 
84392395e9cSLionel Sambuc void
mandoc_msg(enum mandocerr er,struct mparse * m,int ln,int col,const char * msg)84492395e9cSLionel Sambuc mandoc_msg(enum mandocerr er, struct mparse *m,
84592395e9cSLionel Sambuc 		int ln, int col, const char *msg)
84692395e9cSLionel Sambuc {
84792395e9cSLionel Sambuc 	enum mandoclevel level;
84892395e9cSLionel Sambuc 
84992395e9cSLionel Sambuc 	level = MANDOCLEVEL_FATAL;
85092395e9cSLionel Sambuc 	while (er < mandoclimits[level])
85192395e9cSLionel Sambuc 		level--;
85292395e9cSLionel Sambuc 
85392395e9cSLionel Sambuc 	if (level < m->wlevel)
85492395e9cSLionel Sambuc 		return;
85592395e9cSLionel Sambuc 
85692395e9cSLionel Sambuc 	if (m->mmsg)
85792395e9cSLionel Sambuc 		(*m->mmsg)(er, level, m->file, ln, col, msg);
85892395e9cSLionel Sambuc 
85992395e9cSLionel Sambuc 	if (m->file_status < level)
86092395e9cSLionel Sambuc 		m->file_status = level;
86192395e9cSLionel Sambuc }
86292395e9cSLionel Sambuc 
86392395e9cSLionel Sambuc const char *
mparse_strerror(enum mandocerr er)86492395e9cSLionel Sambuc mparse_strerror(enum mandocerr er)
86592395e9cSLionel Sambuc {
86692395e9cSLionel Sambuc 
86792395e9cSLionel Sambuc 	return(mandocerrs[er]);
86892395e9cSLionel Sambuc }
86992395e9cSLionel Sambuc 
87092395e9cSLionel Sambuc const char *
mparse_strlevel(enum mandoclevel lvl)87192395e9cSLionel Sambuc mparse_strlevel(enum mandoclevel lvl)
87292395e9cSLionel Sambuc {
87392395e9cSLionel Sambuc 	return(mandoclevels[lvl]);
87492395e9cSLionel Sambuc }
87592395e9cSLionel Sambuc 
87692395e9cSLionel Sambuc void
mparse_keep(struct mparse * p)87792395e9cSLionel Sambuc mparse_keep(struct mparse *p)
87892395e9cSLionel Sambuc {
87992395e9cSLionel Sambuc 
88092395e9cSLionel Sambuc 	assert(NULL == p->secondary);
88192395e9cSLionel Sambuc 	p->secondary = mandoc_calloc(1, sizeof(struct buf));
88292395e9cSLionel Sambuc }
88392395e9cSLionel Sambuc 
88492395e9cSLionel Sambuc const char *
mparse_getkeep(const struct mparse * p)88592395e9cSLionel Sambuc mparse_getkeep(const struct mparse *p)
88692395e9cSLionel Sambuc {
88792395e9cSLionel Sambuc 
88892395e9cSLionel Sambuc 	assert(p->secondary);
88992395e9cSLionel Sambuc 	return(p->secondary->sz ? p->secondary->buf : NULL);
89092395e9cSLionel Sambuc }
891