xref: /minix3/external/bsd/mdocml/dist/read.c (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1 /*	Id: read.c,v 1.40 2014/01/02 16:29:55 schwarze Exp  */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #ifdef HAVE_MMAP
23 # include <sys/stat.h>
24 # include <sys/mman.h>
25 #endif
26 
27 #include <assert.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <stdarg.h>
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37 
38 #include "mandoc.h"
39 #include "libmandoc.h"
40 #include "mdoc.h"
41 #include "man.h"
42 #include "main.h"
43 
44 #define	REPARSE_LIMIT	1000
45 
46 struct	buf {
47 	char	 	 *buf; /* binary input buffer */
48 	size_t		  sz; /* size of binary buffer */
49 };
50 
51 struct	mparse {
52 	enum mandoclevel  file_status; /* status of current parse */
53 	enum mandoclevel  wlevel; /* ignore messages below this */
54 	int		  line; /* line number in the file */
55 	enum mparset	  inttype; /* which parser to use */
56 	struct man	 *pman; /* persistent man parser */
57 	struct mdoc	 *pmdoc; /* persistent mdoc parser */
58 	struct man	 *man; /* man parser */
59 	struct mdoc	 *mdoc; /* mdoc parser */
60 	struct roff	 *roff; /* roff parser (!NULL) */
61 	int		  reparse_count; /* finite interp. stack */
62 	mandocmsg	  mmsg; /* warning/error message handler */
63 	void		 *arg; /* argument to mmsg */
64 	const char	 *file;
65 	struct buf	 *secondary;
66 	char		 *defos; /* default operating system */
67 };
68 
69 static	void	  resize_buf(struct buf *, size_t);
70 static	void	  mparse_buf_r(struct mparse *, struct buf, int);
71 static	void	  pset(const char *, int, struct mparse *);
72 static	int	  read_whole_file(struct mparse *, const char *, int,
73 				struct buf *, int *);
74 static	void	  mparse_end(struct mparse *);
75 static	void	  mparse_parse_buffer(struct mparse *, struct buf,
76 			const char *);
77 
78 static	const enum mandocerr	mandoclimits[MANDOCLEVEL_MAX] = {
79 	MANDOCERR_OK,
80 	MANDOCERR_WARNING,
81 	MANDOCERR_WARNING,
82 	MANDOCERR_ERROR,
83 	MANDOCERR_FATAL,
84 	MANDOCERR_MAX,
85 	MANDOCERR_MAX
86 };
87 
88 static	const char * const	mandocerrs[MANDOCERR_MAX] = {
89 	"ok",
90 
91 	"generic warning",
92 
93 	/* related to the prologue */
94 	"no title in document",
95 	"document title should be all caps",
96 	"unknown manual section",
97 	"unknown manual volume or arch",
98 	"date missing, using today's date",
99 	"cannot parse date, using it verbatim",
100 	"prologue macros out of order",
101 	"duplicate prologue macro",
102 	"macro not allowed in prologue",
103 	"macro not allowed in body",
104 
105 	/* related to document structure */
106 	".so is fragile, better use ln(1)",
107 	"NAME section must come first",
108 	"bad NAME section contents",
109 	"sections out of conventional order",
110 	"duplicate section name",
111 	"section header suited to sections 2, 3, and 9 only",
112 
113 	/* related to macros and nesting */
114 	"skipping obsolete macro",
115 	"skipping paragraph macro",
116 	"moving paragraph macro out of list",
117 	"skipping no-space macro",
118 	"blocks badly nested",
119 	"child violates parent syntax",
120 	"nested displays are not portable",
121 	"already in literal mode",
122 	"line scope broken",
123 
124 	/* related to missing macro arguments */
125 	"skipping empty macro",
126 	"argument count wrong",
127 	"missing display type",
128 	"list type must come first",
129 	"tag lists require a width argument",
130 	"missing font type",
131 	"skipping end of block that is not open",
132 
133 	/* related to bad macro arguments */
134 	"skipping argument",
135 	"duplicate argument",
136 	"duplicate display type",
137 	"duplicate list type",
138 	"unknown AT&T UNIX version",
139 	"bad Boolean value",
140 	"unknown font",
141 	"unknown standard specifier",
142 	"bad width argument",
143 
144 	/* related to plain text */
145 	"blank line in non-literal context",
146 	"tab in non-literal context",
147 	"end of line whitespace",
148 	"bad comment style",
149 	"bad escape sequence",
150 	"unterminated quoted string",
151 
152 	/* related to equations */
153 	"unexpected literal in equation",
154 
155 	"generic error",
156 
157 	/* related to equations */
158 	"unexpected equation scope closure",
159 	"equation scope open on exit",
160 	"overlapping equation scopes",
161 	"unexpected end of equation",
162 	"equation syntax error",
163 
164 	/* related to tables */
165 	"bad table syntax",
166 	"bad table option",
167 	"bad table layout",
168 	"no table layout cells specified",
169 	"no table data cells specified",
170 	"ignore data in cell",
171 	"data block still open",
172 	"ignoring extra data cells",
173 
174 	"input stack limit exceeded, infinite loop?",
175 	"skipping bad character",
176 	"escaped character not allowed in a name",
177 	"manual name not yet set",
178 	"skipping text before the first section header",
179 	"skipping unknown macro",
180 	"NOT IMPLEMENTED, please use groff: skipping request",
181 	"argument count wrong",
182 	"skipping column outside column list",
183 	"skipping end of block that is not open",
184 	"missing end of block",
185 	"scope open on exit",
186 	"uname(3) system call failed",
187 	"macro requires line argument(s)",
188 	"macro requires body argument(s)",
189 	"macro requires argument(s)",
190 	"request requires a numeric argument",
191 	"missing list type",
192 	"line argument(s) will be lost",
193 	"body argument(s) will be lost",
194 
195 	"generic fatal error",
196 
197 	"input too large",
198 	"not a manual",
199 	"column syntax is inconsistent",
200 	"NOT IMPLEMENTED: .Bd -file",
201 	"argument count wrong, violates syntax",
202 	"child violates parent syntax",
203 	"argument count wrong, violates syntax",
204 	"NOT IMPLEMENTED: .so with absolute path or \"..\"",
205 	"no document body",
206 	"no document prologue",
207 	"static buffer exhausted",
208 
209 	/* system errors */
210 	"cannot open file",
211 	"cannot stat file",
212 	"cannot read file",
213 };
214 
215 static	const char * const	mandoclevels[MANDOCLEVEL_MAX] = {
216 	"SUCCESS",
217 	"RESERVED",
218 	"WARNING",
219 	"ERROR",
220 	"FATAL",
221 	"BADARG",
222 	"SYSERR"
223 };
224 
225 static void
resize_buf(struct buf * buf,size_t initial)226 resize_buf(struct buf *buf, size_t initial)
227 {
228 
229 	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
230 	buf->buf = mandoc_realloc(buf->buf, buf->sz);
231 }
232 
233 static void
pset(const char * buf,int pos,struct mparse * curp)234 pset(const char *buf, int pos, struct mparse *curp)
235 {
236 	int		 i;
237 
238 	/*
239 	 * Try to intuit which kind of manual parser should be used.  If
240 	 * passed in by command-line (-man, -mdoc), then use that
241 	 * explicitly.  If passed as -mandoc, then try to guess from the
242 	 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
243 	 * default to -man, which is more lenient.
244 	 *
245 	 * Separate out pmdoc/pman from mdoc/man: the first persists
246 	 * through all parsers, while the latter is used per-parse.
247 	 */
248 
249 	if ('.' == buf[0] || '\'' == buf[0]) {
250 		for (i = 1; buf[i]; i++)
251 			if (' ' != buf[i] && '\t' != buf[i])
252 				break;
253 		if ('\0' == buf[i])
254 			return;
255 	}
256 
257 	switch (curp->inttype) {
258 	case (MPARSE_MDOC):
259 		if (NULL == curp->pmdoc)
260 			curp->pmdoc = mdoc_alloc(curp->roff, curp,
261 					curp->defos);
262 		assert(curp->pmdoc);
263 		curp->mdoc = curp->pmdoc;
264 		return;
265 	case (MPARSE_MAN):
266 		if (NULL == curp->pman)
267 			curp->pman = man_alloc(curp->roff, curp);
268 		assert(curp->pman);
269 		curp->man = curp->pman;
270 		return;
271 	default:
272 		break;
273 	}
274 
275 	if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3))  {
276 		if (NULL == curp->pmdoc)
277 			curp->pmdoc = mdoc_alloc(curp->roff, curp,
278 					curp->defos);
279 		assert(curp->pmdoc);
280 		curp->mdoc = curp->pmdoc;
281 		return;
282 	}
283 
284 	if (NULL == curp->pman)
285 		curp->pman = man_alloc(curp->roff, curp);
286 	assert(curp->pman);
287 	curp->man = curp->pman;
288 }
289 
290 /*
291  * Main parse routine for an opened file.  This is called for each
292  * opened file and simply loops around the full input file, possibly
293  * nesting (i.e., with `so').
294  */
295 static void
mparse_buf_r(struct mparse * curp,struct buf blk,int start)296 mparse_buf_r(struct mparse *curp, struct buf blk, int start)
297 {
298 	const struct tbl_span	*span;
299 	struct buf	 ln;
300 	enum rofferr	 rr;
301 	int		 i, of, rc;
302 	int		 pos; /* byte number in the ln buffer */
303 	int		 lnn; /* line number in the real file */
304 	unsigned char	 c;
305 
306 	memset(&ln, 0, sizeof(struct buf));
307 
308 	lnn = curp->line;
309 	pos = 0;
310 
311 	for (i = 0; i < (int)blk.sz; ) {
312 		if (0 == pos && '\0' == blk.buf[i])
313 			break;
314 
315 		if (start) {
316 			curp->line = lnn;
317 			curp->reparse_count = 0;
318 		}
319 
320 		while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
321 
322 			/*
323 			 * When finding an unescaped newline character,
324 			 * leave the character loop to process the line.
325 			 * Skip a preceding carriage return, if any.
326 			 */
327 
328 			if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
329 			    '\n' == blk.buf[i + 1])
330 				++i;
331 			if ('\n' == blk.buf[i]) {
332 				++i;
333 				++lnn;
334 				break;
335 			}
336 
337 			/*
338 			 * Make sure we have space for at least
339 			 * one backslash and one other character
340 			 * and the trailing NUL byte.
341 			 */
342 
343 			if (pos + 2 >= (int)ln.sz)
344 				resize_buf(&ln, 256);
345 
346 			/*
347 			 * Warn about bogus characters.  If you're using
348 			 * non-ASCII encoding, you're screwing your
349 			 * readers.  Since I'd rather this not happen,
350 			 * I'll be helpful and replace these characters
351 			 * with "?", so we don't display gibberish.
352 			 * Note to manual writers: use special characters.
353 			 */
354 
355 			c = (unsigned char) blk.buf[i];
356 
357 			if ( ! (isascii(c) &&
358 					(isgraph(c) || isblank(c)))) {
359 				mandoc_msg(MANDOCERR_BADCHAR, curp,
360 						curp->line, pos, NULL);
361 				i++;
362 				ln.buf[pos++] = '?';
363 				continue;
364 			}
365 
366 			/* Trailing backslash = a plain char. */
367 
368 			if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
369 				ln.buf[pos++] = blk.buf[i++];
370 				continue;
371 			}
372 
373 			/*
374 			 * Found escape and at least one other character.
375 			 * When it's a newline character, skip it.
376 			 * When there is a carriage return in between,
377 			 * skip that one as well.
378 			 */
379 
380 			if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
381 			    '\n' == blk.buf[i + 2])
382 				++i;
383 			if ('\n' == blk.buf[i + 1]) {
384 				i += 2;
385 				++lnn;
386 				continue;
387 			}
388 
389 			if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
390 				i += 2;
391 				/* Comment, skip to end of line */
392 				for (; i < (int)blk.sz; ++i) {
393 					if ('\n' == blk.buf[i]) {
394 						++i;
395 						++lnn;
396 						break;
397 					}
398 				}
399 
400 				/* Backout trailing whitespaces */
401 				for (; pos > 0; --pos) {
402 					if (ln.buf[pos - 1] != ' ')
403 						break;
404 					if (pos > 2 && ln.buf[pos - 2] == '\\')
405 						break;
406 				}
407 				break;
408 			}
409 
410 			/* Catch escaped bogus characters. */
411 
412 			c = (unsigned char) blk.buf[i+1];
413 
414 			if ( ! (isascii(c) &&
415 					(isgraph(c) || isblank(c)))) {
416 				mandoc_msg(MANDOCERR_BADCHAR, curp,
417 						curp->line, pos, NULL);
418 				i += 2;
419 				ln.buf[pos++] = '?';
420 				continue;
421 			}
422 
423 			/* Some other escape sequence, copy & cont. */
424 
425 			ln.buf[pos++] = blk.buf[i++];
426 			ln.buf[pos++] = blk.buf[i++];
427 		}
428 
429  		if (pos >= (int)ln.sz)
430 			resize_buf(&ln, 256);
431 
432 		ln.buf[pos] = '\0';
433 
434 		/*
435 		 * A significant amount of complexity is contained by
436 		 * the roff preprocessor.  It's line-oriented but can be
437 		 * expressed on one line, so we need at times to
438 		 * readjust our starting point and re-run it.  The roff
439 		 * preprocessor can also readjust the buffers with new
440 		 * data, so we pass them in wholesale.
441 		 */
442 
443 		of = 0;
444 
445 		/*
446 		 * Maintain a lookaside buffer of all parsed lines.  We
447 		 * only do this if mparse_keep() has been invoked (the
448 		 * buffer may be accessed with mparse_getkeep()).
449 		 */
450 
451 		if (curp->secondary) {
452 			curp->secondary->buf =
453 				mandoc_realloc
454 				(curp->secondary->buf,
455 				 curp->secondary->sz + pos + 2);
456 			memcpy(curp->secondary->buf +
457 					curp->secondary->sz,
458 					ln.buf, pos);
459 			curp->secondary->sz += pos;
460 			curp->secondary->buf
461 				[curp->secondary->sz] = '\n';
462 			curp->secondary->sz++;
463 			curp->secondary->buf
464 				[curp->secondary->sz] = '\0';
465 		}
466 rerun:
467 		rr = roff_parseln
468 			(curp->roff, curp->line,
469 			 &ln.buf, &ln.sz, of, &of);
470 
471 		switch (rr) {
472 		case (ROFF_REPARSE):
473 			if (REPARSE_LIMIT >= ++curp->reparse_count)
474 				mparse_buf_r(curp, ln, 0);
475 			else
476 				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
477 					curp->line, pos, NULL);
478 			pos = 0;
479 			continue;
480 		case (ROFF_APPEND):
481 			pos = (int)strlen(ln.buf);
482 			continue;
483 		case (ROFF_RERUN):
484 			goto rerun;
485 		case (ROFF_IGN):
486 			pos = 0;
487 			continue;
488 		case (ROFF_ERR):
489 			assert(MANDOCLEVEL_FATAL <= curp->file_status);
490 			break;
491 		case (ROFF_SO):
492 			/*
493 			 * We remove `so' clauses from our lookaside
494 			 * buffer because we're going to descend into
495 			 * the file recursively.
496 			 */
497 			if (curp->secondary)
498 				curp->secondary->sz -= pos + 1;
499 			mparse_readfd(curp, -1, ln.buf + of);
500 			if (MANDOCLEVEL_FATAL <= curp->file_status)
501 				break;
502 			pos = 0;
503 			continue;
504 		default:
505 			break;
506 		}
507 
508 		/*
509 		 * If we encounter errors in the recursive parse, make
510 		 * sure we don't continue parsing.
511 		 */
512 
513 		if (MANDOCLEVEL_FATAL <= curp->file_status)
514 			break;
515 
516 		/*
517 		 * If input parsers have not been allocated, do so now.
518 		 * We keep these instanced between parsers, but set them
519 		 * locally per parse routine since we can use different
520 		 * parsers with each one.
521 		 */
522 
523 		if ( ! (curp->man || curp->mdoc))
524 			pset(ln.buf + of, pos - of, curp);
525 
526 		/*
527 		 * Lastly, push down into the parsers themselves.  One
528 		 * of these will have already been set in the pset()
529 		 * routine.
530 		 * If libroff returns ROFF_TBL, then add it to the
531 		 * currently open parse.  Since we only get here if
532 		 * there does exist data (see tbl_data.c), we're
533 		 * guaranteed that something's been allocated.
534 		 * Do the same for ROFF_EQN.
535 		 */
536 
537 		rc = -1;
538 
539 		if (ROFF_TBL == rr)
540 			while (NULL != (span = roff_span(curp->roff))) {
541 				rc = curp->man ?
542 					man_addspan(curp->man, span) :
543 					mdoc_addspan(curp->mdoc, span);
544 				if (0 == rc)
545 					break;
546 			}
547 		else if (ROFF_EQN == rr)
548 			rc = curp->mdoc ?
549 				mdoc_addeqn(curp->mdoc,
550 					roff_eqn(curp->roff)) :
551 				man_addeqn(curp->man,
552 					roff_eqn(curp->roff));
553 		else if (curp->man || curp->mdoc)
554 			rc = curp->man ?
555 				man_parseln(curp->man,
556 					curp->line, ln.buf, of) :
557 				mdoc_parseln(curp->mdoc,
558 					curp->line, ln.buf, of);
559 
560 		if (0 == rc) {
561 			assert(MANDOCLEVEL_FATAL <= curp->file_status);
562 			break;
563 		}
564 
565 		/* Temporary buffers typically are not full. */
566 
567 		if (0 == start && '\0' == blk.buf[i])
568 			break;
569 
570 		/* Start the next input line. */
571 
572 		pos = 0;
573 	}
574 
575 	free(ln.buf);
576 }
577 
578 static int
read_whole_file(struct mparse * curp,const char * file,int fd,struct buf * fb,int * with_mmap)579 read_whole_file(struct mparse *curp, const char *file, int fd,
580 		struct buf *fb, int *with_mmap)
581 {
582 	size_t		 off;
583 	ssize_t		 ssz;
584 
585 #ifdef	HAVE_MMAP
586 	struct stat	 st;
587 	if (-1 == fstat(fd, &st)) {
588 		curp->file_status = MANDOCLEVEL_SYSERR;
589 		if (curp->mmsg)
590 			(*curp->mmsg)(MANDOCERR_SYSSTAT, curp->file_status,
591 			    file, 0, 0, strerror(errno));
592 		return(0);
593 	}
594 
595 	/*
596 	 * If we're a regular file, try just reading in the whole entry
597 	 * via mmap().  This is faster than reading it into blocks, and
598 	 * since each file is only a few bytes to begin with, I'm not
599 	 * concerned that this is going to tank any machines.
600 	 */
601 
602 	if (S_ISREG(st.st_mode)) {
603 		if (st.st_size >= (1U << 31)) {
604 			curp->file_status = MANDOCLEVEL_FATAL;
605 			if (curp->mmsg)
606 				(*curp->mmsg)(MANDOCERR_TOOLARGE,
607 				    curp->file_status, file, 0, 0, NULL);
608 			return(0);
609 		}
610 		*with_mmap = 1;
611 		fb->sz = (size_t)st.st_size;
612 		fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
613 		if (fb->buf != MAP_FAILED)
614 			return(1);
615 	}
616 #endif
617 
618 	/*
619 	 * If this isn't a regular file (like, say, stdin), then we must
620 	 * go the old way and just read things in bit by bit.
621 	 */
622 
623 	*with_mmap = 0;
624 	off = 0;
625 	fb->sz = 0;
626 	fb->buf = NULL;
627 	for (;;) {
628 		if (off == fb->sz) {
629 			if (fb->sz == (1U << 31)) {
630 				curp->file_status = MANDOCLEVEL_FATAL;
631 				if (curp->mmsg)
632 					(*curp->mmsg)(MANDOCERR_TOOLARGE,
633 					    curp->file_status,
634 					    file, 0, 0, NULL);
635 				break;
636 			}
637 			resize_buf(fb, 65536);
638 		}
639 		ssz = read(fd, fb->buf + (int)off, fb->sz - off);
640 		if (ssz == 0) {
641 			fb->sz = off;
642 			return(1);
643 		}
644 		if (ssz == -1) {
645 			curp->file_status = MANDOCLEVEL_SYSERR;
646 			if (curp->mmsg)
647 				(*curp->mmsg)(MANDOCERR_SYSREAD,
648 				    curp->file_status, file, 0, 0,
649 				    strerror(errno));
650 			break;
651 		}
652 		off += (size_t)ssz;
653 	}
654 
655 	free(fb->buf);
656 	fb->buf = NULL;
657 	return(0);
658 }
659 
660 static void
mparse_end(struct mparse * curp)661 mparse_end(struct mparse *curp)
662 {
663 
664 	if (MANDOCLEVEL_FATAL <= curp->file_status)
665 		return;
666 
667 	if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
668 		assert(MANDOCLEVEL_FATAL <= curp->file_status);
669 		return;
670 	}
671 
672 	if (curp->man && ! man_endparse(curp->man)) {
673 		assert(MANDOCLEVEL_FATAL <= curp->file_status);
674 		return;
675 	}
676 
677 	if ( ! (curp->man || curp->mdoc)) {
678 		mandoc_msg(MANDOCERR_NOTMANUAL, curp, 1, 0, NULL);
679 		curp->file_status = MANDOCLEVEL_FATAL;
680 		return;
681 	}
682 
683 	roff_endparse(curp->roff);
684 }
685 
686 static void
mparse_parse_buffer(struct mparse * curp,struct buf blk,const char * file)687 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
688 {
689 	const char	*svfile;
690 	static int	 recursion_depth;
691 
692 	if (64 < recursion_depth) {
693 		mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
694 		return;
695 	}
696 
697 	/* Line number is per-file. */
698 	svfile = curp->file;
699 	curp->file = file;
700 	curp->line = 1;
701 	recursion_depth++;
702 
703 	mparse_buf_r(curp, blk, 1);
704 
705 	if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
706 		mparse_end(curp);
707 
708 	curp->file = svfile;
709 }
710 
711 enum mandoclevel
mparse_readmem(struct mparse * curp,const void * buf,size_t len,const char * file)712 mparse_readmem(struct mparse *curp, const void *buf, size_t len,
713 		const char *file)
714 {
715 	struct buf blk;
716 
717 	blk.buf = UNCONST(buf);
718 	blk.sz = len;
719 
720 	mparse_parse_buffer(curp, blk, file);
721 	return(curp->file_status);
722 }
723 
724 enum mandoclevel
mparse_readfd(struct mparse * curp,int fd,const char * file)725 mparse_readfd(struct mparse *curp, int fd, const char *file)
726 {
727 	struct buf	 blk;
728 	int		 with_mmap;
729 
730 	if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) {
731 		curp->file_status = MANDOCLEVEL_SYSERR;
732 		if (curp->mmsg)
733 			(*curp->mmsg)(MANDOCERR_SYSOPEN,
734 			    curp->file_status,
735 			    file, 0, 0, strerror(errno));
736 		goto out;
737 	}
738 
739 	/*
740 	 * Run for each opened file; may be called more than once for
741 	 * each full parse sequence if the opened file is nested (i.e.,
742 	 * from `so').  Simply sucks in the whole file and moves into
743 	 * the parse phase for the file.
744 	 */
745 
746 	if ( ! read_whole_file(curp, file, fd, &blk, &with_mmap))
747 		goto out;
748 
749 	mparse_parse_buffer(curp, blk, file);
750 
751 #ifdef	HAVE_MMAP
752 	if (with_mmap)
753 		munmap(blk.buf, blk.sz);
754 	else
755 #endif
756 		free(blk.buf);
757 
758 	if (STDIN_FILENO != fd && -1 == close(fd))
759 		perror(file);
760 out:
761 	return(curp->file_status);
762 }
763 
764 struct mparse *
mparse_alloc(enum mparset inttype,enum mandoclevel wlevel,mandocmsg mmsg,void * arg,char * defos)765 mparse_alloc(enum mparset inttype, enum mandoclevel wlevel,
766 		mandocmsg mmsg, void *arg, char *defos)
767 {
768 	struct mparse	*curp;
769 
770 	assert(wlevel <= MANDOCLEVEL_FATAL);
771 
772 	curp = mandoc_calloc(1, sizeof(struct mparse));
773 
774 	curp->wlevel = wlevel;
775 	curp->mmsg = mmsg;
776 	curp->arg = arg;
777 	curp->inttype = inttype;
778 	curp->defos = defos;
779 
780 	curp->roff = roff_alloc(inttype, curp);
781 	return(curp);
782 }
783 
784 void
mparse_reset(struct mparse * curp)785 mparse_reset(struct mparse *curp)
786 {
787 
788 	roff_reset(curp->roff);
789 
790 	if (curp->mdoc)
791 		mdoc_reset(curp->mdoc);
792 	if (curp->man)
793 		man_reset(curp->man);
794 	if (curp->secondary)
795 		curp->secondary->sz = 0;
796 
797 	curp->file_status = MANDOCLEVEL_OK;
798 	curp->mdoc = NULL;
799 	curp->man = NULL;
800 }
801 
802 void
mparse_free(struct mparse * curp)803 mparse_free(struct mparse *curp)
804 {
805 
806 	if (curp->pmdoc)
807 		mdoc_free(curp->pmdoc);
808 	if (curp->pman)
809 		man_free(curp->pman);
810 	if (curp->roff)
811 		roff_free(curp->roff);
812 	if (curp->secondary)
813 		free(curp->secondary->buf);
814 
815 	free(curp->secondary);
816 	free(curp);
817 }
818 
819 void
mparse_result(struct mparse * curp,struct mdoc ** mdoc,struct man ** man)820 mparse_result(struct mparse *curp, struct mdoc **mdoc, struct man **man)
821 {
822 
823 	if (mdoc)
824 		*mdoc = curp->mdoc;
825 	if (man)
826 		*man = curp->man;
827 }
828 
829 void
mandoc_vmsg(enum mandocerr t,struct mparse * m,int ln,int pos,const char * fmt,...)830 mandoc_vmsg(enum mandocerr t, struct mparse *m,
831 		int ln, int pos, const char *fmt, ...)
832 {
833 	char		 buf[256];
834 	va_list		 ap;
835 
836 	va_start(ap, fmt);
837 	vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
838 	va_end(ap);
839 
840 	mandoc_msg(t, m, ln, pos, buf);
841 }
842 
843 void
mandoc_msg(enum mandocerr er,struct mparse * m,int ln,int col,const char * msg)844 mandoc_msg(enum mandocerr er, struct mparse *m,
845 		int ln, int col, const char *msg)
846 {
847 	enum mandoclevel level;
848 
849 	level = MANDOCLEVEL_FATAL;
850 	while (er < mandoclimits[level])
851 		level--;
852 
853 	if (level < m->wlevel)
854 		return;
855 
856 	if (m->mmsg)
857 		(*m->mmsg)(er, level, m->file, ln, col, msg);
858 
859 	if (m->file_status < level)
860 		m->file_status = level;
861 }
862 
863 const char *
mparse_strerror(enum mandocerr er)864 mparse_strerror(enum mandocerr er)
865 {
866 
867 	return(mandocerrs[er]);
868 }
869 
870 const char *
mparse_strlevel(enum mandoclevel lvl)871 mparse_strlevel(enum mandoclevel lvl)
872 {
873 	return(mandoclevels[lvl]);
874 }
875 
876 void
mparse_keep(struct mparse * p)877 mparse_keep(struct mparse *p)
878 {
879 
880 	assert(NULL == p->secondary);
881 	p->secondary = mandoc_calloc(1, sizeof(struct buf));
882 }
883 
884 const char *
mparse_getkeep(const struct mparse * p)885 mparse_getkeep(const struct mparse *p)
886 {
887 
888 	assert(p->secondary);
889 	return(p->secondary->sz ? p->secondary->buf : NULL);
890 }
891