xref: /openbsd-src/usr.bin/mandoc/read.c (revision ae3cb403620ab940fbaabb3055fac045a63d56b7)
1 /*	$OpenBSD: read.c,v 1.165 2017/11/10 22:48:05 jca Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include <sys/types.h>
20 #include <sys/mman.h>
21 #include <sys/stat.h>
22 
23 #include <assert.h>
24 #include <ctype.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <stdarg.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32 #include <zlib.h>
33 
34 #include "mandoc_aux.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mdoc.h"
38 #include "man.h"
39 #include "libmandoc.h"
40 
41 #define	REPARSE_LIMIT	1000
42 
43 struct	mparse {
44 	struct roff	 *roff; /* roff parser (!NULL) */
45 	struct roff_man	 *man; /* man parser */
46 	char		 *sodest; /* filename pointed to by .so */
47 	const char	 *file; /* filename of current input file */
48 	struct buf	 *primary; /* buffer currently being parsed */
49 	struct buf	 *secondary; /* preprocessed copy of input */
50 	const char	 *os_s; /* default operating system */
51 	mandocmsg	  mmsg; /* warning/error message handler */
52 	enum mandoclevel  file_status; /* status of current parse */
53 	enum mandocerr	  mmin; /* ignore messages below this */
54 	int		  options; /* parser options */
55 	int		  gzip; /* current input file is gzipped */
56 	int		  filenc; /* encoding of the current file */
57 	int		  reparse_count; /* finite interp. stack */
58 	int		  line; /* line number in the file */
59 };
60 
61 static	void	  choose_parser(struct mparse *);
62 static	void	  resize_buf(struct buf *, size_t);
63 static	int	  mparse_buf_r(struct mparse *, struct buf, size_t, int);
64 static	int	  read_whole_file(struct mparse *, const char *, int,
65 				struct buf *, int *);
66 static	void	  mparse_end(struct mparse *);
67 static	void	  mparse_parse_buffer(struct mparse *, struct buf,
68 			const char *);
69 
70 static	const enum mandocerr	mandoclimits[MANDOCLEVEL_MAX] = {
71 	MANDOCERR_OK,
72 	MANDOCERR_OK,
73 	MANDOCERR_WARNING,
74 	MANDOCERR_ERROR,
75 	MANDOCERR_UNSUPP,
76 	MANDOCERR_MAX,
77 	MANDOCERR_MAX
78 };
79 
80 static	const char * const	mandocerrs[MANDOCERR_MAX] = {
81 	"ok",
82 
83 	"base system convention",
84 
85 	"Mdocdate found",
86 	"Mdocdate missing",
87 	"unknown architecture",
88 	"operating system explicitly specified",
89 	"RCS id missing",
90 	"referenced manual not found",
91 
92 	"generic style suggestion",
93 
94 	"legacy man(7) date format",
95 	"lower case character in document title",
96 	"duplicate RCS id",
97 	"possible typo in section name",
98 	"unterminated quoted argument",
99 	"useless macro",
100 	"consider using OS macro",
101 	"errnos out of order",
102 	"duplicate errno",
103 	"trailing delimiter",
104 	"no blank before trailing delimiter",
105 	"fill mode already enabled, skipping",
106 	"fill mode already disabled, skipping",
107 	"function name without markup",
108 	"whitespace at end of input line",
109 	"bad comment style",
110 
111 	"generic warning",
112 
113 	/* related to the prologue */
114 	"missing manual title, using UNTITLED",
115 	"missing manual title, using \"\"",
116 	"missing manual section, using \"\"",
117 	"unknown manual section",
118 	"missing date, using today's date",
119 	"cannot parse date, using it verbatim",
120 	"date in the future, using it anyway",
121 	"missing Os macro, using \"\"",
122 	"late prologue macro",
123 	"prologue macros out of order",
124 
125 	/* related to document structure */
126 	".so is fragile, better use ln(1)",
127 	"no document body",
128 	"content before first section header",
129 	"first section is not \"NAME\"",
130 	"NAME section without Nm before Nd",
131 	"NAME section without description",
132 	"description not at the end of NAME",
133 	"bad NAME section content",
134 	"missing comma before name",
135 	"missing description line, using \"\"",
136 	"description line outside NAME section",
137 	"sections out of conventional order",
138 	"duplicate section title",
139 	"unexpected section",
140 	"cross reference to self",
141 	"unusual Xr order",
142 	"unusual Xr punctuation",
143 	"AUTHORS section without An macro",
144 
145 	/* related to macros and nesting */
146 	"obsolete macro",
147 	"macro neither callable nor escaped",
148 	"skipping paragraph macro",
149 	"moving paragraph macro out of list",
150 	"skipping no-space macro",
151 	"blocks badly nested",
152 	"nested displays are not portable",
153 	"moving content out of list",
154 	"first macro on line",
155 	"line scope broken",
156 	"skipping blank line in line scope",
157 
158 	/* related to missing macro arguments */
159 	"skipping empty request",
160 	"conditional request controls empty scope",
161 	"skipping empty macro",
162 	"empty block",
163 	"empty argument, using 0n",
164 	"missing display type, using -ragged",
165 	"list type is not the first argument",
166 	"missing -width in -tag list, using 6n",
167 	"missing utility name, using \"\"",
168 	"missing function name, using \"\"",
169 	"empty head in list item",
170 	"empty list item",
171 	"missing argument, using next line",
172 	"missing font type, using \\fR",
173 	"unknown font type, using \\fR",
174 	"nothing follows prefix",
175 	"empty reference block",
176 	"missing section argument",
177 	"missing -std argument, adding it",
178 	"missing option string, using \"\"",
179 	"missing resource identifier, using \"\"",
180 	"missing eqn box, using \"\"",
181 
182 	/* related to bad macro arguments */
183 	"duplicate argument",
184 	"skipping duplicate argument",
185 	"skipping duplicate display type",
186 	"skipping duplicate list type",
187 	"skipping -width argument",
188 	"wrong number of cells",
189 	"unknown AT&T UNIX version",
190 	"comma in function argument",
191 	"parenthesis in function name",
192 	"unknown library name",
193 	"invalid content in Rs block",
194 	"invalid Boolean argument",
195 	"unknown font, skipping request",
196 	"odd number of characters in request",
197 
198 	/* related to plain text */
199 	"blank line in fill mode, using .sp",
200 	"tab in filled text",
201 	"new sentence, new line",
202 	"invalid escape sequence",
203 	"undefined string, using \"\"",
204 
205 	/* related to tables */
206 	"tbl line starts with span",
207 	"tbl column starts with span",
208 	"skipping vertical bar in tbl layout",
209 
210 	"generic error",
211 
212 	/* related to tables */
213 	"non-alphabetic character in tbl options",
214 	"skipping unknown tbl option",
215 	"missing tbl option argument",
216 	"wrong tbl option argument size",
217 	"empty tbl layout",
218 	"invalid character in tbl layout",
219 	"unmatched parenthesis in tbl layout",
220 	"tbl without any data cells",
221 	"ignoring data in spanned tbl cell",
222 	"ignoring extra tbl data cells",
223 	"data block open at end of tbl",
224 
225 	/* related to document structure and macros */
226 	NULL,
227 	"duplicate prologue macro",
228 	"skipping late title macro",
229 	"input stack limit exceeded, infinite loop?",
230 	"skipping bad character",
231 	"skipping unknown macro",
232 	"skipping insecure request",
233 	"skipping item outside list",
234 	"skipping column outside column list",
235 	"skipping end of block that is not open",
236 	"fewer RS blocks open, skipping",
237 	"inserting missing end of block",
238 	"appending missing end of block",
239 
240 	/* related to request and macro arguments */
241 	"escaped character not allowed in a name",
242 	"NOT IMPLEMENTED: Bd -file",
243 	"skipping display without arguments",
244 	"missing list type, using -item",
245 	"argument is not numeric, using 1",
246 	"missing manual name, using \"\"",
247 	"uname(3) system call failed, using UNKNOWN",
248 	"unknown standard specifier",
249 	"skipping request without numeric argument",
250 	"NOT IMPLEMENTED: .so with absolute path or \"..\"",
251 	".so request failed",
252 	"skipping all arguments",
253 	"skipping excess arguments",
254 	"divide by zero",
255 
256 	"unsupported feature",
257 	"input too large",
258 	"unsupported control character",
259 	"unsupported roff request",
260 	"eqn delim option in tbl",
261 	"unsupported tbl layout modifier",
262 	"ignoring macro in table",
263 };
264 
265 static	const char * const	mandoclevels[MANDOCLEVEL_MAX] = {
266 	"SUCCESS",
267 	"STYLE",
268 	"WARNING",
269 	"ERROR",
270 	"UNSUPP",
271 	"BADARG",
272 	"SYSERR"
273 };
274 
275 
276 static void
277 resize_buf(struct buf *buf, size_t initial)
278 {
279 
280 	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
281 	buf->buf = mandoc_realloc(buf->buf, buf->sz);
282 }
283 
284 static void
285 choose_parser(struct mparse *curp)
286 {
287 	char		*cp, *ep;
288 	int		 format;
289 
290 	/*
291 	 * If neither command line arguments -mdoc or -man select
292 	 * a parser nor the roff parser found a .Dd or .TH macro
293 	 * yet, look ahead in the main input buffer.
294 	 */
295 
296 	if ((format = roff_getformat(curp->roff)) == 0) {
297 		cp = curp->primary->buf;
298 		ep = cp + curp->primary->sz;
299 		while (cp < ep) {
300 			if (*cp == '.' || *cp == '\'') {
301 				cp++;
302 				if (cp[0] == 'D' && cp[1] == 'd') {
303 					format = MPARSE_MDOC;
304 					break;
305 				}
306 				if (cp[0] == 'T' && cp[1] == 'H') {
307 					format = MPARSE_MAN;
308 					break;
309 				}
310 			}
311 			cp = memchr(cp, '\n', ep - cp);
312 			if (cp == NULL)
313 				break;
314 			cp++;
315 		}
316 	}
317 
318 	if (format == MPARSE_MDOC) {
319 		curp->man->macroset = MACROSET_MDOC;
320 		if (curp->man->mdocmac == NULL)
321 			curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
322 	} else {
323 		curp->man->macroset = MACROSET_MAN;
324 		if (curp->man->manmac == NULL)
325 			curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
326 	}
327 	curp->man->first->tok = TOKEN_NONE;
328 }
329 
330 /*
331  * Main parse routine for a buffer.
332  * It assumes encoding and line numbering are already set up.
333  * It can recurse directly (for invocations of user-defined
334  * macros, inline equations, and input line traps)
335  * and indirectly (for .so file inclusion).
336  */
337 static int
338 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
339 {
340 	struct buf	 ln;
341 	const char	*save_file;
342 	char		*cp;
343 	size_t		 pos; /* byte number in the ln buffer */
344 	enum rofferr	 rr;
345 	int		 of;
346 	int		 lnn; /* line number in the real file */
347 	int		 fd;
348 	unsigned char	 c;
349 
350 	memset(&ln, 0, sizeof(ln));
351 
352 	lnn = curp->line;
353 	pos = 0;
354 
355 	while (i < blk.sz) {
356 		if (0 == pos && '\0' == blk.buf[i])
357 			break;
358 
359 		if (start) {
360 			curp->line = lnn;
361 			curp->reparse_count = 0;
362 
363 			if (lnn < 3 &&
364 			    curp->filenc & MPARSE_UTF8 &&
365 			    curp->filenc & MPARSE_LATIN1)
366 				curp->filenc = preconv_cue(&blk, i);
367 		}
368 
369 		while (i < blk.sz && (start || blk.buf[i] != '\0')) {
370 
371 			/*
372 			 * When finding an unescaped newline character,
373 			 * leave the character loop to process the line.
374 			 * Skip a preceding carriage return, if any.
375 			 */
376 
377 			if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
378 			    '\n' == blk.buf[i + 1])
379 				++i;
380 			if ('\n' == blk.buf[i]) {
381 				++i;
382 				++lnn;
383 				break;
384 			}
385 
386 			/*
387 			 * Make sure we have space for the worst
388 			 * case of 11 bytes: "\\[u10ffff]\0"
389 			 */
390 
391 			if (pos + 11 > ln.sz)
392 				resize_buf(&ln, 256);
393 
394 			/*
395 			 * Encode 8-bit input.
396 			 */
397 
398 			c = blk.buf[i];
399 			if (c & 0x80) {
400 				if ( ! (curp->filenc && preconv_encode(
401 				    &blk, &i, &ln, &pos, &curp->filenc))) {
402 					mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
403 					    curp->line, pos, "0x%x", c);
404 					ln.buf[pos++] = '?';
405 					i++;
406 				}
407 				continue;
408 			}
409 
410 			/*
411 			 * Exclude control characters.
412 			 */
413 
414 			if (c == 0x7f || (c < 0x20 && c != 0x09)) {
415 				mandoc_vmsg(c == 0x00 || c == 0x04 ||
416 				    c > 0x0a ? MANDOCERR_CHAR_BAD :
417 				    MANDOCERR_CHAR_UNSUPP,
418 				    curp, curp->line, pos, "0x%x", c);
419 				i++;
420 				if (c != '\r')
421 					ln.buf[pos++] = '?';
422 				continue;
423 			}
424 
425 			ln.buf[pos++] = blk.buf[i++];
426 		}
427 
428 		if (pos + 1 >= ln.sz)
429 			resize_buf(&ln, 256);
430 
431 		if (i == blk.sz || blk.buf[i] == '\0')
432 			ln.buf[pos++] = '\n';
433 		ln.buf[pos] = '\0';
434 
435 		/*
436 		 * A significant amount of complexity is contained by
437 		 * the roff preprocessor.  It's line-oriented but can be
438 		 * expressed on one line, so we need at times to
439 		 * readjust our starting point and re-run it.  The roff
440 		 * preprocessor can also readjust the buffers with new
441 		 * data, so we pass them in wholesale.
442 		 */
443 
444 		of = 0;
445 
446 		/*
447 		 * Maintain a lookaside buffer of all parsed lines.  We
448 		 * only do this if mparse_keep() has been invoked (the
449 		 * buffer may be accessed with mparse_getkeep()).
450 		 */
451 
452 		if (curp->secondary) {
453 			curp->secondary->buf = mandoc_realloc(
454 			    curp->secondary->buf,
455 			    curp->secondary->sz + pos + 2);
456 			memcpy(curp->secondary->buf +
457 			    curp->secondary->sz,
458 			    ln.buf, pos);
459 			curp->secondary->sz += pos;
460 			curp->secondary->buf
461 				[curp->secondary->sz] = '\n';
462 			curp->secondary->sz++;
463 			curp->secondary->buf
464 				[curp->secondary->sz] = '\0';
465 		}
466 rerun:
467 		rr = roff_parseln(curp->roff, curp->line, &ln, &of);
468 
469 		switch (rr) {
470 		case ROFF_REPARSE:
471 			if (++curp->reparse_count > REPARSE_LIMIT)
472 				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
473 				    curp->line, pos, NULL);
474 			else if (mparse_buf_r(curp, ln, of, 0) == 1 ||
475 			    start == 1) {
476 				pos = 0;
477 				continue;
478 			}
479 			free(ln.buf);
480 			return 0;
481 		case ROFF_APPEND:
482 			pos = strlen(ln.buf);
483 			continue;
484 		case ROFF_RERUN:
485 			goto rerun;
486 		case ROFF_IGN:
487 			pos = 0;
488 			continue;
489 		case ROFF_SO:
490 			if ( ! (curp->options & MPARSE_SO) &&
491 			    (i >= blk.sz || blk.buf[i] == '\0')) {
492 				curp->sodest = mandoc_strdup(ln.buf + of);
493 				free(ln.buf);
494 				return 1;
495 			}
496 			/*
497 			 * We remove `so' clauses from our lookaside
498 			 * buffer because we're going to descend into
499 			 * the file recursively.
500 			 */
501 			if (curp->secondary)
502 				curp->secondary->sz -= pos + 1;
503 			save_file = curp->file;
504 			if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
505 				mparse_readfd(curp, fd, ln.buf + of);
506 				close(fd);
507 				curp->file = save_file;
508 			} else {
509 				curp->file = save_file;
510 				mandoc_vmsg(MANDOCERR_SO_FAIL,
511 				    curp, curp->line, pos,
512 				    ".so %s", ln.buf + of);
513 				ln.sz = mandoc_asprintf(&cp,
514 				    ".sp\nSee the file %s.\n.sp",
515 				    ln.buf + of);
516 				free(ln.buf);
517 				ln.buf = cp;
518 				of = 0;
519 				mparse_buf_r(curp, ln, of, 0);
520 			}
521 			pos = 0;
522 			continue;
523 		default:
524 			break;
525 		}
526 
527 		if (curp->man->macroset == MACROSET_NONE)
528 			choose_parser(curp);
529 
530 		if ((curp->man->macroset == MACROSET_MDOC ?
531 		    mdoc_parseln(curp->man, curp->line, ln.buf, of) :
532 		    man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
533 				break;
534 
535 		/* Temporary buffers typically are not full. */
536 
537 		if (0 == start && '\0' == blk.buf[i])
538 			break;
539 
540 		/* Start the next input line. */
541 
542 		pos = 0;
543 	}
544 
545 	free(ln.buf);
546 	return 1;
547 }
548 
549 static int
550 read_whole_file(struct mparse *curp, const char *file, int fd,
551 		struct buf *fb, int *with_mmap)
552 {
553 	struct stat	 st;
554 	gzFile		 gz;
555 	size_t		 off;
556 	ssize_t		 ssz;
557 
558 	if (fstat(fd, &st) == -1) {
559 		mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
560 		    "fstat: %s", strerror(errno));
561 		return 0;
562 	}
563 
564 	/*
565 	 * If we're a regular file, try just reading in the whole entry
566 	 * via mmap().  This is faster than reading it into blocks, and
567 	 * since each file is only a few bytes to begin with, I'm not
568 	 * concerned that this is going to tank any machines.
569 	 */
570 
571 	if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
572 		if (st.st_size > 0x7fffffff) {
573 			mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
574 			return 0;
575 		}
576 		*with_mmap = 1;
577 		fb->sz = (size_t)st.st_size;
578 		fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
579 		if (fb->buf != MAP_FAILED)
580 			return 1;
581 	}
582 
583 	if (curp->gzip) {
584 		if ((gz = gzdopen(fd, "rb")) == NULL) {
585 			mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
586 			    "gzdopen: %s", strerror(errno));
587 			return 0;
588 		}
589 	} else
590 		gz = NULL;
591 
592 	/*
593 	 * If this isn't a regular file (like, say, stdin), then we must
594 	 * go the old way and just read things in bit by bit.
595 	 */
596 
597 	*with_mmap = 0;
598 	off = 0;
599 	fb->sz = 0;
600 	fb->buf = NULL;
601 	for (;;) {
602 		if (off == fb->sz) {
603 			if (fb->sz == (1U << 31)) {
604 				mandoc_msg(MANDOCERR_TOOLARGE, curp,
605 				    0, 0, NULL);
606 				break;
607 			}
608 			resize_buf(fb, 65536);
609 		}
610 		ssz = curp->gzip ?
611 		    gzread(gz, fb->buf + (int)off, fb->sz - off) :
612 		    read(fd, fb->buf + (int)off, fb->sz - off);
613 		if (ssz == 0) {
614 			fb->sz = off;
615 			return 1;
616 		}
617 		if (ssz == -1) {
618 			mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
619 			    "read: %s", strerror(errno));
620 			break;
621 		}
622 		off += (size_t)ssz;
623 	}
624 
625 	free(fb->buf);
626 	fb->buf = NULL;
627 	return 0;
628 }
629 
630 static void
631 mparse_end(struct mparse *curp)
632 {
633 	if (curp->man->macroset == MACROSET_NONE)
634 		curp->man->macroset = MACROSET_MAN;
635 	if (curp->man->macroset == MACROSET_MDOC)
636 		mdoc_endparse(curp->man);
637 	else
638 		man_endparse(curp->man);
639 	roff_endparse(curp->roff);
640 }
641 
642 static void
643 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
644 {
645 	struct buf	*svprimary;
646 	const char	*svfile;
647 	size_t		 offset;
648 	static int	 recursion_depth;
649 
650 	if (64 < recursion_depth) {
651 		mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
652 		return;
653 	}
654 
655 	/* Line number is per-file. */
656 	svfile = curp->file;
657 	curp->file = file;
658 	svprimary = curp->primary;
659 	curp->primary = &blk;
660 	curp->line = 1;
661 	recursion_depth++;
662 
663 	/* Skip an UTF-8 byte order mark. */
664 	if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
665 	    (unsigned char)blk.buf[0] == 0xef &&
666 	    (unsigned char)blk.buf[1] == 0xbb &&
667 	    (unsigned char)blk.buf[2] == 0xbf) {
668 		offset = 3;
669 		curp->filenc &= ~MPARSE_LATIN1;
670 	} else
671 		offset = 0;
672 
673 	mparse_buf_r(curp, blk, offset, 1);
674 
675 	if (--recursion_depth == 0)
676 		mparse_end(curp);
677 
678 	curp->primary = svprimary;
679 	curp->file = svfile;
680 }
681 
682 /*
683  * Read the whole file into memory and call the parsers.
684  * Called recursively when an .so request is encountered.
685  */
686 enum mandoclevel
687 mparse_readfd(struct mparse *curp, int fd, const char *file)
688 {
689 	struct buf	 blk;
690 	int		 with_mmap;
691 	int		 save_filenc;
692 
693 	if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
694 		save_filenc = curp->filenc;
695 		curp->filenc = curp->options &
696 		    (MPARSE_UTF8 | MPARSE_LATIN1);
697 		mparse_parse_buffer(curp, blk, file);
698 		curp->filenc = save_filenc;
699 		if (with_mmap)
700 			munmap(blk.buf, blk.sz);
701 		else
702 			free(blk.buf);
703 	}
704 	return curp->file_status;
705 }
706 
707 int
708 mparse_open(struct mparse *curp, const char *file)
709 {
710 	char		 *cp;
711 	int		  fd;
712 
713 	curp->file = file;
714 	cp = strrchr(file, '.');
715 	curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
716 
717 	/* First try to use the filename as it is. */
718 
719 	if ((fd = open(file, O_RDONLY)) != -1)
720 		return fd;
721 
722 	/*
723 	 * If that doesn't work and the filename doesn't
724 	 * already  end in .gz, try appending .gz.
725 	 */
726 
727 	if ( ! curp->gzip) {
728 		mandoc_asprintf(&cp, "%s.gz", file);
729 		fd = open(cp, O_RDONLY);
730 		free(cp);
731 		if (fd != -1) {
732 			curp->gzip = 1;
733 			return fd;
734 		}
735 	}
736 
737 	/* Neither worked, give up. */
738 
739 	mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
740 	return -1;
741 }
742 
743 struct mparse *
744 mparse_alloc(int options, enum mandocerr mmin, mandocmsg mmsg,
745     enum mandoc_os os_e, const char *os_s)
746 {
747 	struct mparse	*curp;
748 
749 	curp = mandoc_calloc(1, sizeof(struct mparse));
750 
751 	curp->options = options;
752 	curp->mmin = mmin;
753 	curp->mmsg = mmsg;
754 	curp->os_s = os_s;
755 
756 	curp->roff = roff_alloc(curp, options);
757 	curp->man = roff_man_alloc(curp->roff, curp, curp->os_s,
758 		curp->options & MPARSE_QUICK ? 1 : 0);
759 	if (curp->options & MPARSE_MDOC) {
760 		curp->man->macroset = MACROSET_MDOC;
761 		if (curp->man->mdocmac == NULL)
762 			curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
763 	} else if (curp->options & MPARSE_MAN) {
764 		curp->man->macroset = MACROSET_MAN;
765 		if (curp->man->manmac == NULL)
766 			curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
767 	}
768 	curp->man->first->tok = TOKEN_NONE;
769 	curp->man->meta.os_e = os_e;
770 	return curp;
771 }
772 
773 void
774 mparse_reset(struct mparse *curp)
775 {
776 	roff_reset(curp->roff);
777 	roff_man_reset(curp->man);
778 
779 	free(curp->sodest);
780 	curp->sodest = NULL;
781 
782 	if (curp->secondary)
783 		curp->secondary->sz = 0;
784 
785 	curp->file_status = MANDOCLEVEL_OK;
786 	curp->gzip = 0;
787 }
788 
789 void
790 mparse_free(struct mparse *curp)
791 {
792 
793 	roffhash_free(curp->man->mdocmac);
794 	roffhash_free(curp->man->manmac);
795 	roff_man_free(curp->man);
796 	roff_free(curp->roff);
797 	if (curp->secondary)
798 		free(curp->secondary->buf);
799 
800 	free(curp->secondary);
801 	free(curp->sodest);
802 	free(curp);
803 }
804 
805 void
806 mparse_result(struct mparse *curp, struct roff_man **man,
807 	char **sodest)
808 {
809 
810 	if (sodest && NULL != (*sodest = curp->sodest)) {
811 		*man = NULL;
812 		return;
813 	}
814 	if (man)
815 		*man = curp->man;
816 }
817 
818 void
819 mparse_updaterc(struct mparse *curp, enum mandoclevel *rc)
820 {
821 	if (curp->file_status > *rc)
822 		*rc = curp->file_status;
823 }
824 
825 void
826 mandoc_vmsg(enum mandocerr t, struct mparse *m,
827 		int ln, int pos, const char *fmt, ...)
828 {
829 	char		 buf[256];
830 	va_list		 ap;
831 
832 	va_start(ap, fmt);
833 	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
834 	va_end(ap);
835 
836 	mandoc_msg(t, m, ln, pos, buf);
837 }
838 
839 void
840 mandoc_msg(enum mandocerr er, struct mparse *m,
841 		int ln, int col, const char *msg)
842 {
843 	enum mandoclevel level;
844 
845 	if (er < m->mmin && er != MANDOCERR_FILE)
846 		return;
847 
848 	level = MANDOCLEVEL_UNSUPP;
849 	while (er < mandoclimits[level])
850 		level--;
851 
852 	if (m->mmsg)
853 		(*m->mmsg)(er, level, m->file, ln, col, msg);
854 
855 	if (m->file_status < level)
856 		m->file_status = level;
857 }
858 
859 const char *
860 mparse_strerror(enum mandocerr er)
861 {
862 
863 	return mandocerrs[er];
864 }
865 
866 const char *
867 mparse_strlevel(enum mandoclevel lvl)
868 {
869 	return mandoclevels[lvl];
870 }
871 
872 void
873 mparse_keep(struct mparse *p)
874 {
875 
876 	assert(NULL == p->secondary);
877 	p->secondary = mandoc_calloc(1, sizeof(struct buf));
878 }
879 
880 const char *
881 mparse_getkeep(const struct mparse *p)
882 {
883 
884 	assert(p->secondary);
885 	return p->secondary->sz ? p->secondary->buf : NULL;
886 }
887